seo_report 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e43cca8581863cf2f96787c1bc7d5540e1cbbb65
4
+ data.tar.gz: 48da9cc13c59920b22fbfecfc64196dd1d2b1eea
5
+ SHA512:
6
+ metadata.gz: b02574fbbcd9c1b22986f466e500a2b4147d7e2efdb79536f4f6ad3975e42717a9ca5c8400ec9e7080e630ee3c983c7783c215c42cbd60eeb86df94ae39fff54
7
+ data.tar.gz: 922908e117fd759c82e6a12a6fc035a5672f4672b324a0d9527d099bb79815d588feb32b427b2e1355a286cf0452b77be763ae4b0cff9625e0b42041c8c256e7
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.3
4
+ before_install: gem install bundler -v 1.10.6
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in seo_report.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,165 @@
1
+ GNU LESSER GENERAL PUBLIC LICENSE
2
+ Version 3, 29 June 2007
3
+
4
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
5
+ Everyone is permitted to copy and distribute verbatim copies
6
+ of this license document, but changing it is not allowed.
7
+
8
+
9
+ This version of the GNU Lesser General Public License incorporates
10
+ the terms and conditions of version 3 of the GNU General Public
11
+ License, supplemented by the additional permissions listed below.
12
+
13
+ 0. Additional Definitions.
14
+
15
+ As used herein, "this License" refers to version 3 of the GNU Lesser
16
+ General Public License, and the "GNU GPL" refers to version 3 of the GNU
17
+ General Public License.
18
+
19
+ "The Library" refers to a covered work governed by this License,
20
+ other than an Application or a Combined Work as defined below.
21
+
22
+ An "Application" is any work that makes use of an interface provided
23
+ by the Library, but which is not otherwise based on the Library.
24
+ Defining a subclass of a class defined by the Library is deemed a mode
25
+ of using an interface provided by the Library.
26
+
27
+ A "Combined Work" is a work produced by combining or linking an
28
+ Application with the Library. The particular version of the Library
29
+ with which the Combined Work was made is also called the "Linked
30
+ Version".
31
+
32
+ The "Minimal Corresponding Source" for a Combined Work means the
33
+ Corresponding Source for the Combined Work, excluding any source code
34
+ for portions of the Combined Work that, considered in isolation, are
35
+ based on the Application, and not on the Linked Version.
36
+
37
+ The "Corresponding Application Code" for a Combined Work means the
38
+ object code and/or source code for the Application, including any data
39
+ and utility programs needed for reproducing the Combined Work from the
40
+ Application, but excluding the System Libraries of the Combined Work.
41
+
42
+ 1. Exception to Section 3 of the GNU GPL.
43
+
44
+ You may convey a covered work under sections 3 and 4 of this License
45
+ without being bound by section 3 of the GNU GPL.
46
+
47
+ 2. Conveying Modified Versions.
48
+
49
+ If you modify a copy of the Library, and, in your modifications, a
50
+ facility refers to a function or data to be supplied by an Application
51
+ that uses the facility (other than as an argument passed when the
52
+ facility is invoked), then you may convey a copy of the modified
53
+ version:
54
+
55
+ a) under this License, provided that you make a good faith effort to
56
+ ensure that, in the event an Application does not supply the
57
+ function or data, the facility still operates, and performs
58
+ whatever part of its purpose remains meaningful, or
59
+
60
+ b) under the GNU GPL, with none of the additional permissions of
61
+ this License applicable to that copy.
62
+
63
+ 3. Object Code Incorporating Material from Library Header Files.
64
+
65
+ The object code form of an Application may incorporate material from
66
+ a header file that is part of the Library. You may convey such object
67
+ code under terms of your choice, provided that, if the incorporated
68
+ material is not limited to numerical parameters, data structure
69
+ layouts and accessors, or small macros, inline functions and templates
70
+ (ten or fewer lines in length), you do both of the following:
71
+
72
+ a) Give prominent notice with each copy of the object code that the
73
+ Library is used in it and that the Library and its use are
74
+ covered by this License.
75
+
76
+ b) Accompany the object code with a copy of the GNU GPL and this license
77
+ document.
78
+
79
+ 4. Combined Works.
80
+
81
+ You may convey a Combined Work under terms of your choice that,
82
+ taken together, effectively do not restrict modification of the
83
+ portions of the Library contained in the Combined Work and reverse
84
+ engineering for debugging such modifications, if you also do each of
85
+ the following:
86
+
87
+ a) Give prominent notice with each copy of the Combined Work that
88
+ the Library is used in it and that the Library and its use are
89
+ covered by this License.
90
+
91
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
92
+ document.
93
+
94
+ c) For a Combined Work that displays copyright notices during
95
+ execution, include the copyright notice for the Library among
96
+ these notices, as well as a reference directing the user to the
97
+ copies of the GNU GPL and this license document.
98
+
99
+ d) Do one of the following:
100
+
101
+ 0) Convey the Minimal Corresponding Source under the terms of this
102
+ License, and the Corresponding Application Code in a form
103
+ suitable for, and under terms that permit, the user to
104
+ recombine or relink the Application with a modified version of
105
+ the Linked Version to produce a modified Combined Work, in the
106
+ manner specified by section 6 of the GNU GPL for conveying
107
+ Corresponding Source.
108
+
109
+ 1) Use a suitable shared library mechanism for linking with the
110
+ Library. A suitable mechanism is one that (a) uses at run time
111
+ a copy of the Library already present on the user's computer
112
+ system, and (b) will operate properly with a modified version
113
+ of the Library that is interface-compatible with the Linked
114
+ Version.
115
+
116
+ e) Provide Installation Information, but only if you would otherwise
117
+ be required to provide such information under section 6 of the
118
+ GNU GPL, and only to the extent that such information is
119
+ necessary to install and execute a modified version of the
120
+ Combined Work produced by recombining or relinking the
121
+ Application with a modified version of the Linked Version. (If
122
+ you use option 4d0, the Installation Information must accompany
123
+ the Minimal Corresponding Source and Corresponding Application
124
+ Code. If you use option 4d1, you must provide the Installation
125
+ Information in the manner specified by section 6 of the GNU GPL
126
+ for conveying Corresponding Source.)
127
+
128
+ 5. Combined Libraries.
129
+
130
+ You may place library facilities that are a work based on the
131
+ Library side by side in a single library together with other library
132
+ facilities that are not Applications and are not covered by this
133
+ License, and convey such a combined library under terms of your
134
+ choice, if you do both of the following:
135
+
136
+ a) Accompany the combined library with a copy of the same work based
137
+ on the Library, uncombined with any other library facilities,
138
+ conveyed under the terms of this License.
139
+
140
+ b) Give prominent notice with the combined library that part of it
141
+ is a work based on the Library, and explaining where to find the
142
+ accompanying uncombined form of the same work.
143
+
144
+ 6. Revised Versions of the GNU Lesser General Public License.
145
+
146
+ The Free Software Foundation may publish revised and/or new versions
147
+ of the GNU Lesser General Public License from time to time. Such new
148
+ versions will be similar in spirit to the present version, but may
149
+ differ in detail to address new problems or concerns.
150
+
151
+ Each version is given a distinguishing version number. If the
152
+ Library as you received it specifies that a certain numbered version
153
+ of the GNU Lesser General Public License "or any later version"
154
+ applies to it, you have the option of following the terms and
155
+ conditions either of that published version or of any later version
156
+ published by the Free Software Foundation. If the Library as you
157
+ received it does not specify a version number of the GNU Lesser
158
+ General Public License, you may choose any version of the GNU Lesser
159
+ General Public License ever published by the Free Software Foundation.
160
+
161
+ If the Library as you received it specifies that a proxy can decide
162
+ whether future versions of the GNU Lesser General Public License shall
163
+ apply, that proxy's public statement of acceptance of any version is
164
+ permanent authorization for you to choose that version for the
165
+ Library.
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Tim Reddehase
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,73 @@
1
+ # SeoReport
2
+
3
+ seo_report is a gem that provide a binary/executable called `seo-report`. With
4
+ this you can get a report of seo-relevant data for pretty much any URL.
5
+ It focuses on these aspects of seo-relevance:
6
+
7
+ * redirects, especially redirect-chains
8
+ * canonical URLs
9
+ * robots meta tags
10
+ * title and description
11
+ * social media data
12
+ * twitter-cards (twitter)
13
+ * open-graph (facebook)
14
+ * microdata (schema.org), *currently only in json output*
15
+
16
+ ## planned functionality
17
+
18
+ * add support for structured data
19
+ * read in rdfa-formatted structured data
20
+ * (support json-ld? (embedded and linked?))
21
+
22
+ ## installation
23
+
24
+ Add this line to your application's Gemfile:
25
+
26
+ ```ruby
27
+ gem 'seo_report'
28
+ ```
29
+
30
+ And then execute:
31
+
32
+ $ bundle
33
+
34
+ Or install it yourself as:
35
+
36
+ $ gem install seo_report
37
+
38
+ ## usage
39
+
40
+ * `seo-report URL`
41
+ will provide you with a human-readable report on the command-line.
42
+ * `seo-report --json URL`
43
+ will provide you with json output, that is parseable by other means (like for
44
+ example [jq](https://stedolan.github.io/jq/)). Be aware that the json
45
+ format is currently not set in stone. It is most likely to change until
46
+ at least version *0.2.0* is reached.
47
+
48
+ ## development
49
+
50
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run
51
+ `rake spec` to run the tests. You can also run `bin/console` for an interactive
52
+ prompt that will allow you to experiment.
53
+
54
+ To install this gem onto your local machine, run `bundle exec rake install`. To
55
+ release a new version, update the version number in `version.rb`, and then run
56
+ `bundle exec rake release`, which will create a git tag for the version, push
57
+ git commits and tags, and push the `.gem` file to
58
+ [rubygems.org](https://rubygems.org).
59
+
60
+ ## contributing
61
+
62
+ Bug reports and pull requests are welcome on GitHub at
63
+ https://github.com/0robustus1/seo_report. If you have any ideas regarding a
64
+ bigger feature change/addition i would recommend creating an issue before
65
+ setting down an implementing the code for a PR. This increases the chance of it
66
+ being merged, as it allows us to have a discussion beforehand.
67
+
68
+ ## license
69
+
70
+ The gem is available as open source under the terms of the
71
+ [LGPL 3.0 License](http://choosealicense.com/licenses/lgpl-3.0/). A copy of the
72
+ license is part of this repository in the *LICENSE* file.
73
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "seo_report"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ require "pry"
11
+ Pry.start
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
data/exe/seo-report ADDED
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "seo_report"
4
+ require "getoptlong"
5
+
6
+ options = GetoptLong.new(
7
+ ["--json", GetoptLong::NO_ARGUMENT],
8
+ ["--help", "-h", GetoptLong::NO_ARGUMENT])
9
+
10
+ opts = {}
11
+ options.each { |opt, val| opts[opt] = val }
12
+
13
+ url = ARGV[0]
14
+ if url.nil? || url.empty?
15
+ abort("Please provide a url to seo-report.\nUsage: seo-report URL")
16
+ else
17
+ if opts["--json"]
18
+ SeoReport::Representation::Json.represent_with_report_for(url)
19
+ else
20
+ SeoReport::Representation::Cli.represent_with_report_for(url)
21
+ end
22
+ end
@@ -0,0 +1,17 @@
1
+ module SeoReport
2
+ module Extractions
3
+ module Head
4
+ def extract_head(doc)
5
+ title = doc.xpath('//head/title').text
6
+ description = doc.xpath('//head/meta[@name="description"]').
7
+ map { |node| node.attr("content") }
8
+ {
9
+ title: title,
10
+ description: unarray(description),
11
+ }
12
+ end
13
+ end
14
+
15
+ SeoReport::Report.register_extraction(:html, Head, :extract_head)
16
+ end
17
+ end
@@ -0,0 +1,112 @@
1
+ module SeoReport::Extractions
2
+ module Microdata
3
+ def extract_microdata(document)
4
+ MicrodataExtractor.new(document).extract!
5
+ end
6
+
7
+ class MicrodataExtractor
8
+ attr_reader :document
9
+
10
+ def initialize(document)
11
+ @document = document
12
+ end
13
+
14
+ def extract!
15
+ document.at_xpath('/html/body').children.each do |child|
16
+ process(child)
17
+ end
18
+ {microdata: result_set}
19
+ end
20
+
21
+ protected
22
+ def process(child, level = 1)
23
+ if microdata_element?(child)
24
+ process_microdata(child, level)
25
+ else
26
+ child.children.each { |c| process(c, level) }
27
+ end
28
+ end
29
+
30
+ def process_microdata(child, level)
31
+ if microdata_scope?(child)
32
+ push_scope(child)
33
+ child.children.each { |c| process(c, level + 1) }
34
+ scope = wrap_scope!
35
+ store_as_dangling_if_necessary!(child, scope, level)
36
+ result_set << scope if level == 1
37
+ else
38
+ provide_data_for_itemprop(child)
39
+ child.children.each { |c| process(c, level) }
40
+ end
41
+ end
42
+
43
+ def store_as_dangling_if_necessary!(element, scope, level)
44
+ itemprop = element["itemprop"]
45
+ is_top_level = level == 1
46
+ is_assigned_with_itemprop = !itemprop.nil? && !itemprop.empty?
47
+ if !is_top_level && !is_assigned_with_itemprop
48
+ (current_scope[:@dangling_children] ||= []) << scope
49
+ end
50
+ end
51
+
52
+ def provide_data_for_itemprop(element)
53
+ data =
54
+ if element.name == "meta"
55
+ element["content"]
56
+ elsif %w(audio embed iframe img source track video).include?(element.name)
57
+ element["src"]
58
+ elsif %w(a area link).include?(element.name)
59
+ element["href"]
60
+ elsif element.name == "object"
61
+ element["data"]
62
+ elsif %w(data meter).include?(element.name)
63
+ element["value"]
64
+ else
65
+ element.text
66
+ end
67
+ current_scope[element["itemprop"]] = data
68
+ end
69
+
70
+ def push_scope(element)
71
+ new_scope = {
72
+ type: element["itemtype"]
73
+ }
74
+ if microdata_prop?(element)
75
+ current_scope[element["itemprop"]] = new_scope
76
+ end
77
+ scopes.push(new_scope)
78
+ end
79
+
80
+ def wrap_scope!
81
+ scopes.pop
82
+ end
83
+
84
+ def current_scope
85
+ scopes.last
86
+ end
87
+
88
+ def scopes
89
+ @scopes ||= []
90
+ end
91
+
92
+ def result_set
93
+ @result_set ||= []
94
+ end
95
+
96
+ def microdata_element?(element)
97
+ %w(itemscope itemtype itemprop).
98
+ any? { |attr| element.has_attribute?(attr) }
99
+ end
100
+
101
+ def microdata_scope?(element)
102
+ element.has_attribute?("itemscope")
103
+ end
104
+
105
+ def microdata_prop?(element)
106
+ element.has_attribute?("itemprop")
107
+ end
108
+ end
109
+
110
+ SeoReport::Report.register_extraction(:html, Microdata, :extract_microdata)
111
+ end
112
+ end
@@ -0,0 +1,32 @@
1
+ module SeoReport
2
+ module Extractions
3
+ module Opengraph
4
+ def extract_og(doc)
5
+ type = doc.xpath('//head/meta[@property="og:type"]').
6
+ map { |node| node.attr("content") }
7
+ title = doc.xpath('//head/meta[@property="og:title"]').
8
+ map { |node| node.attr("content") }
9
+ description = doc.xpath('//head/meta[@property="og:description"]').
10
+ map { |node| node.attr("content") }
11
+ site_name = doc.xpath('//head/meta[@property="og:site_name"]').
12
+ map { |node| node.attr("content") }
13
+ image = doc.xpath('//head/meta[@property="og:image"]').
14
+ map { |node| node.attr("content") }
15
+ url = doc.xpath('//head/meta[@property="og:url"]').
16
+ map { |node| node.attr("content") }
17
+ {
18
+ og: {
19
+ type: unarray(type),
20
+ title: unarray(title),
21
+ description: unarray(description),
22
+ site_name: unarray(site_name),
23
+ image: unarray(image),
24
+ url: unarray(url),
25
+ }
26
+ }
27
+ end
28
+ end
29
+
30
+ SeoReport::Report.register_extraction(:html, Opengraph, :extract_og)
31
+ end
32
+ end
@@ -0,0 +1,19 @@
1
+ module SeoReport
2
+ module Extractions
3
+ module Seo
4
+ def extract_robots(doc)
5
+ robots_tags = doc.xpath('//head/meta[@name="robots"]').
6
+ map { |node| node.attr("content") }
7
+ {robots: robots_tags}
8
+ end
9
+
10
+ def extract_canonical(doc)
11
+ canonical = doc.xpath('//head/link[@rel="canonical"]').
12
+ map { |node| node.attr("href") }
13
+ {canonical: unarray(canonical)}
14
+ end
15
+ end
16
+
17
+ SeoReport::Report.register_extraction(:html, Seo, :extract_robots, :extract_canonical)
18
+ end
19
+ end
@@ -0,0 +1,26 @@
1
+ module SeoReport
2
+ module Extractions
3
+ module Twitter
4
+ def extract_twitter(doc)
5
+ card = doc.xpath('//head/meta[@name="twitter:card"]').
6
+ map { |node| node.attr("content") }
7
+ domain = doc.xpath('//head/meta[@name="twitter:domain"]').
8
+ map { |node| node.attr("content") }
9
+ title = doc.xpath('//head/meta[@name="twitter:title"]').
10
+ map { |node| node.attr("content") }
11
+ description = doc.xpath('//head/meta[@name="twitter:description"]').
12
+ map { |node| node.attr("content") }
13
+ {
14
+ twitter: {
15
+ card: unarray(card),
16
+ domain: unarray(domain),
17
+ title: unarray(title),
18
+ description: unarray(description),
19
+ }
20
+ }
21
+ end
22
+ end
23
+
24
+ SeoReport::Report.register_extraction(:html, Twitter, :extract_twitter)
25
+ end
26
+ end
@@ -0,0 +1,10 @@
1
+ module SeoReport
2
+ module Extractions
3
+ end
4
+ end
5
+
6
+ require "seo_report/extractions/head"
7
+ require "seo_report/extractions/seo"
8
+ require "seo_report/extractions/twitter"
9
+ require "seo_report/extractions/opengraph"
10
+ require "seo_report/extractions/microdata"
@@ -0,0 +1,70 @@
1
+ require "nokogiri"
2
+
3
+ module SeoReport
4
+ class Report
5
+ attr_reader :start_url, :data
6
+
7
+ def self.register_extraction(type, module_name, *method_names)
8
+ include module_name
9
+ extractions_for_type = extractions[type] ||= []
10
+ method_names.each { |m| extractions_for_type << m }
11
+ end
12
+
13
+ def self.extractions
14
+ @extractions ||= Hash.new([])
15
+ end
16
+
17
+ def initialize(start_url)
18
+ @start_url = start_url
19
+ end
20
+
21
+ def produce
22
+ @data = generate_report
23
+ end
24
+
25
+ protected
26
+ def generate_report
27
+ chain = RequestChain.new(start_url)
28
+ chain.perform
29
+ {
30
+ requests: chain.request_chain.map do |request|
31
+ {
32
+ request_url: request.url.to_s,
33
+ response_code: request.response.code.to_i
34
+ }.merge(generate_html_report(request))
35
+ end
36
+ }
37
+ end
38
+
39
+ def generate_html_report(request)
40
+ if request.response.is_a?(Net::HTTPOK)
41
+ doc = Nokogiri::HTML(request.response.body)
42
+ content_through_extractions({}, doc, type: :html)
43
+ elsif request.response.is_a?(Net::HTTPRedirection)
44
+ {
45
+ location: request.response["Location"],
46
+ }
47
+ else
48
+ {}
49
+ end
50
+ end
51
+
52
+ def content_through_extractions(base, document, type: :html)
53
+ self.class.extractions[type].reduce(base) do |current, method_name|
54
+ current.merge(send(method_name, document))
55
+ end
56
+ end
57
+
58
+ def unarray(array)
59
+ if array.respond_to?(:length) && array.respond_to?(:first)
60
+ if array.length <= 1
61
+ array.first
62
+ else
63
+ array
64
+ end
65
+ else
66
+ array
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,28 @@
1
+ require "forwardable"
2
+
3
+ module SeoReport::Representation
4
+ class Base
5
+ extend Forwardable
6
+
7
+ attr_reader :report
8
+ def_delegators :report, :data
9
+
10
+ def self.represent_with_report_for(url)
11
+ report = SeoReport::Report.new(url)
12
+ report.produce
13
+ new(report).represent
14
+ end
15
+
16
+ def initialize(report)
17
+ @report = report
18
+ end
19
+
20
+ def represent
21
+ raise NotImplementedError.new("#represent needs to be implemented by a subclass.")
22
+ end
23
+
24
+ def any_data_in_hash?(hash)
25
+ (hash || {}).select { |_, val| !val.nil? }.any?
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,147 @@
1
+ module SeoReport::Representation
2
+ class Cli < Base
3
+ def represent
4
+ url = data[:requests].first[:request_url]
5
+ puts "#{white_color('URL: ')}#{url}"
6
+ separator
7
+ data[:requests].each do |request|
8
+ provide_response_data(request)
9
+ end
10
+ end
11
+
12
+ protected
13
+ def provide_response_data(request)
14
+ code = request[:response_code]
15
+ if code == 200
16
+ provide_html_response(request)
17
+ elsif code >= 300 && code < 400
18
+ provide_redirection_response(request)
19
+ end
20
+ end
21
+
22
+ def provide_html_response(request)
23
+ code = request[:response_code]
24
+ separator if multiple_requests?
25
+ puts "#{white_color('Status: ')}#{color_for_code(code)}"
26
+ canonical =
27
+ if request[:canonical] == request[:request_url]
28
+ green_color(request[:canonical], bold: true)
29
+ else
30
+ red_color(request[:canonical], bold: true)
31
+ end
32
+ puts "#{white_color('Canonical: ')}#{canonical}"
33
+ puts "#{white_color('Title: ')}#{request[:title]}"
34
+ puts "#{white_color('Description: ')}#{request[:description]}"
35
+ provide_robots_response(request)
36
+ provide_twitter_response(request)
37
+ provide_opengraph_response(request)
38
+ end
39
+
40
+ def provide_redirection_response(request)
41
+ code = request[:response_code]
42
+ location =
43
+ if request[:location] == request[:request_url]
44
+ red_color(request[:location], bold: true)
45
+ else
46
+ request[:location]
47
+ end
48
+ if data[:requests].length > 1
49
+ puts "#{white_color('redirect')} with #{blue_color(code, bold: true)} to #{white_color(location)}"
50
+ else
51
+ puts "#{white_color('Status: ')}#{color_for_code(code)}"
52
+ puts "#{white_color('Location: ')}#{location}"
53
+ end
54
+ end
55
+
56
+ def provide_robots_response(request)
57
+ literal_tags = request[:robots].dup
58
+ tags = literal_tags.map(&:downcase)
59
+ puts "#{white_color('Robots: ')}"
60
+ no_robots_tag = tags.find { |t| t.match(/.*no(?:index|follow).*/) }
61
+ if !tags.include?("all") && no_robots_tag
62
+ literal_tags.delete_at(tags.find_index(no_robots_tag))
63
+ puts " - #{red_color(no_robots_tag, bold: true)}"
64
+ elsif !tags.include?("all") && !tags.any? { |t| t.match(/index|follow/) }
65
+ puts " - index, follow (#{white_color('default')})"
66
+ end
67
+ literal_tags.each do |robot_tag|
68
+ puts " - #{robot_tag}"
69
+ end
70
+ end
71
+
72
+ def provide_twitter_response(request)
73
+ twitter = request[:twitter]
74
+ if any_data_in_hash?(twitter)
75
+ puts "#{white_color('Twitter-Card: ')}"
76
+ puts " #{white_color('Card: ')}#{twitter[:card]}"
77
+ puts " #{white_color('Domain: ')}#{twitter[:domain]}"
78
+ puts " #{white_color('Title: ')}#{twitter[:title]}"
79
+ puts " #{white_color('Description: ')}#{twitter[:description]}"
80
+ else
81
+ puts "#{white_color('Twitter-Card:')} no matching data present in response"
82
+ end
83
+ end
84
+
85
+ def provide_opengraph_response(request)
86
+ opengraph = request[:og]
87
+ if any_data_in_hash?(opengraph)
88
+ puts "#{white_color('OpenGraph (Facebook): ')}"
89
+ puts " #{white_color('type: ')}#{opengraph[:type]}"
90
+ puts " #{white_color('site_name: ')}#{opengraph[:site_name]}"
91
+ puts " #{white_color('Title: ')}#{opengraph[:title]}"
92
+ puts " #{white_color('Description: ')}#{opengraph[:description]}"
93
+ puts " #{white_color('URL: ')}#{opengraph[:url]}"
94
+ puts " #{white_color('image: ')}#{opengraph[:image]}"
95
+ else
96
+ puts "#{white_color('OpenGraph (Facebook):')} no matching data present in response"
97
+ end
98
+ end
99
+
100
+ def multiple_requests?
101
+ data[:requests].length > 1
102
+ end
103
+
104
+ def separator
105
+ puts "#{white_color('----------------------------------------')}"
106
+ end
107
+
108
+ def white(text, io = $stdout)
109
+ io.print(white_color(text))
110
+ end
111
+
112
+ def white_color(text, bold: true)
113
+ color_with_code(text, code: 37, bold: bold)
114
+ end
115
+
116
+ def green_color(text, bold: false)
117
+ color_with_code(text, code: 32, bold: bold)
118
+ end
119
+
120
+ def red_color(text, bold: false)
121
+ color_with_code(text, code: 31, bold: bold)
122
+ end
123
+
124
+ def blue_color(text, bold: false)
125
+ color_with_code(text, code: 34, bold: bold)
126
+ end
127
+
128
+ def color_with_code(text, code:, bold: false)
129
+ bold_val = bold ? 1 : 0
130
+ "\033[#{bold_val};#{code}m#{text}\033[0m"
131
+ end
132
+
133
+ def color_for_code(code)
134
+ if code == 200
135
+ green_color(code, bold: true)
136
+ elsif code > 200 && code < 300
137
+ white_color(code, bold: false)
138
+ elsif code >= 300 && code < 400
139
+ blue_color(code, bold: true)
140
+ elsif code >= 400 && code < 500
141
+ red_color(code, bold: false)
142
+ else
143
+ red_color(code, bold: true)
144
+ end
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,9 @@
1
+ require "json"
2
+
3
+ module SeoReport::Representation
4
+ class Json < Base
5
+ def represent
6
+ puts JSON(data)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,8 @@
1
+ module SeoReport
2
+ module Representation
3
+ end
4
+ end
5
+
6
+ require "seo_report/representation/base"
7
+ require "seo_report/representation/cli"
8
+ require "seo_report/representation/json"
@@ -0,0 +1,39 @@
1
+ require "net/http"
2
+ require "net/https"
3
+
4
+ module SeoReport
5
+ class Request
6
+ class Error < ::StandardError; end
7
+ class NoRequestPerformedYetError < Error; end
8
+
9
+ attr_reader :url, :headers
10
+
11
+ def initialize(url, headers = {})
12
+ @url = URI(url)
13
+ @headers = headers
14
+ end
15
+
16
+ def perform
17
+ http = Net::HTTP.new(url.host, url.port)
18
+ http.use_ssl = url.is_a?(URI::HTTPS)
19
+ @response = http.request(http_request)
20
+ end
21
+
22
+ def request
23
+ if @response
24
+ http_request
25
+ else
26
+ raise NoRequestPerformedYetError.new(url)
27
+ end
28
+ end
29
+
30
+ def response
31
+ @response or raise NoRequestPerformedYetError.new(url)
32
+ end
33
+
34
+ protected
35
+ def http_request
36
+ @http_request ||= Net::HTTP::Get.new(url.request_uri, headers)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,34 @@
1
+ module SeoReport
2
+ class RequestChain
3
+ attr_reader :initial_request
4
+ attr_reader :request_chain, :terminal_request
5
+
6
+ def initialize(initial_url, headers = {})
7
+ @initial_request = Request.new(initial_url, headers)
8
+ @request_chain = [@initial_request]
9
+ end
10
+
11
+ def perform
12
+ loop do
13
+ last_request.perform
14
+ if has_redirection_response?(last_request)
15
+ break if request_chain.length >= 10
16
+ request_chain << Request.new(last_request.response["Location"])
17
+ else
18
+ @terminal_request = request_chain.last
19
+ break
20
+ end
21
+ end
22
+ end
23
+
24
+ protected
25
+ def last_request
26
+ request_chain.last
27
+ end
28
+
29
+ def has_redirection_response?(request)
30
+ code = request.response.code.to_i
31
+ code >= 300 && code < 400
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,3 @@
1
+ module SeoReport
2
+ VERSION = "0.1.1"
3
+ end
data/lib/seo_report.rb ADDED
@@ -0,0 +1,10 @@
1
+ require "seo_report/version"
2
+
3
+ module SeoReport
4
+ end
5
+
6
+ require "seo_report/request"
7
+ require "seo_report/request_chain"
8
+ require "seo_report/report"
9
+ require "seo_report/extractions"
10
+ require "seo_report/representation"
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'seo_report/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "seo_report"
8
+ spec.version = SeoReport::VERSION
9
+ spec.authors = ["Tim Reddehase"]
10
+ spec.email = ["robustus@rightsrestricted.com"]
11
+
12
+ spec.summary = %q{report seo relevant data for a given url}
13
+ spec.description = %q{Get a report with seo relevant data for a given URL, like redirects, canonical, robots, Soc. Med. data and so on.}
14
+ spec.homepage = "https://github.com/0robustus1/seo_report"
15
+ spec.license = "LGPL-3.0"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_dependency "nokogiri", "~> 1.6.8"
23
+ spec.add_dependency "json", "~> 2.0.1"
24
+
25
+ spec.add_development_dependency "bundler", "~> 1.10"
26
+ spec.add_development_dependency "rake", "~> 10.0"
27
+ spec.add_development_dependency "rspec"
28
+ spec.add_development_dependency "pry"
29
+ end
metadata ADDED
@@ -0,0 +1,157 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: seo_report
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Tim Reddehase
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-08-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.6.8
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.6.8
27
+ - !ruby/object:Gem::Dependency
28
+ name: json
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 2.0.1
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 2.0.1
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.10'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.10'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Get a report with seo relevant data for a given URL, like redirects,
98
+ canonical, robots, Soc. Med. data and so on.
99
+ email:
100
+ - robustus@rightsrestricted.com
101
+ executables:
102
+ - seo-report
103
+ extensions: []
104
+ extra_rdoc_files: []
105
+ files:
106
+ - ".gitignore"
107
+ - ".rspec"
108
+ - ".travis.yml"
109
+ - Gemfile
110
+ - LICENSE
111
+ - LICENSE.txt
112
+ - README.md
113
+ - Rakefile
114
+ - bin/console
115
+ - bin/setup
116
+ - exe/seo-report
117
+ - lib/seo_report.rb
118
+ - lib/seo_report/extractions.rb
119
+ - lib/seo_report/extractions/head.rb
120
+ - lib/seo_report/extractions/microdata.rb
121
+ - lib/seo_report/extractions/opengraph.rb
122
+ - lib/seo_report/extractions/seo.rb
123
+ - lib/seo_report/extractions/twitter.rb
124
+ - lib/seo_report/report.rb
125
+ - lib/seo_report/representation.rb
126
+ - lib/seo_report/representation/base.rb
127
+ - lib/seo_report/representation/cli.rb
128
+ - lib/seo_report/representation/json.rb
129
+ - lib/seo_report/request.rb
130
+ - lib/seo_report/request_chain.rb
131
+ - lib/seo_report/version.rb
132
+ - seo_report.gemspec
133
+ homepage: https://github.com/0robustus1/seo_report
134
+ licenses:
135
+ - LGPL-3.0
136
+ metadata: {}
137
+ post_install_message:
138
+ rdoc_options: []
139
+ require_paths:
140
+ - lib
141
+ required_ruby_version: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ required_rubygems_version: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ requirements: []
152
+ rubyforge_project:
153
+ rubygems_version: 2.4.5.1
154
+ signing_key:
155
+ specification_version: 4
156
+ summary: report seo relevant data for a given url
157
+ test_files: []