sitemap_check 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 51d3f2f99c25a2034868ae1434c94e9adafdb50a
4
+ data.tar.gz: adf5452975f758257bfabf1d87e471bd2c2fb98a
5
+ SHA512:
6
+ metadata.gz: b9736873b07433315af0134f0cd5df00a39d9dc7b316fc2066bf3d00423dc38007e32cbc587320058e1360180d14549f7c9e48585744d263fdcf128461b055d6
7
+ data.tar.gz: 9d6411787e55f4767cf8a10224b04ed4696b3381bfc425a2010c960a2c81512658bde3299b0abf56df580b028a531af6cc09249be44ff547926f63672e15f742
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.2.2
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.2.2
@@ -0,0 +1,3 @@
1
+ # Contributor Code of Conduct
2
+
3
+ Matz is Nice And So We Are Nice
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in sitemap_check.gemspec
4
+ gemspec
data/LICENCE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Reevoo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,11 @@
1
+ # Sitemap Check
2
+
3
+ ## Install
4
+
5
+ `gem install sitemap_check`
6
+
7
+ ## Usage
8
+
9
+ ```bash
10
+ $ CHECK_URL=http://reevoo.com/sitemap_index.xml sitemap_check
11
+ ```
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'reevoocop/rake_task'
3
+ require 'rspec/core/rake_task'
4
+
5
+ ReevooCop::RakeTask.new(:reevoocop)
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ task default: [:spec, :reevoocop]
9
+ task release: [:spec, :reevoocop]
10
+ task build: [:spec, :reevoocop]
data/bin/sitemap_check ADDED
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'sitemap_check'
4
+ SitemapCheck.check
@@ -0,0 +1,33 @@
1
+ require 'httpclient'
2
+
3
+ class SitemapCheck
4
+ class Page
5
+ def initialize(url, http = HTTPClient.new, holdoff = 1)
6
+ self.url = url
7
+ self.http = http
8
+ self.tries = 0
9
+ self.holdoff = holdoff
10
+ end
11
+
12
+ attr_reader :url
13
+
14
+ def exists?
15
+ @_exists ||= http.head(url, follow_redirect: true).ok?
16
+ rescue SocketError, HTTPClient::ConnectTimeoutError
17
+ self.tries += 1
18
+ if tries < 5
19
+ sleep holdoff
20
+ retry
21
+ else
22
+ @_exists = false
23
+ end
24
+ rescue HTTPClient::BadResponseError
25
+ @_exists = false
26
+ end
27
+
28
+ protected
29
+
30
+ attr_accessor :http, :tries, :holdoff
31
+ attr_writer :url
32
+ end
33
+ end
@@ -0,0 +1,86 @@
1
+ require 'httpclient'
2
+ require 'sitemap_check/page'
3
+ require 'nokogiri'
4
+ require 'colorize'
5
+
6
+ class SitemapCheck
7
+ class Sitemap
8
+ def initialize(url, http = HTTPClient.new)
9
+ self.url = url
10
+ self.checked = 0
11
+ self.http = http
12
+ setup_doc
13
+ end
14
+
15
+ attr_reader :url, :checked
16
+
17
+ def sitemaps
18
+ expanded_sitemaps = maps.map do |sitemap|
19
+ map = Sitemap.new(sitemap.loc.text, http)
20
+ [map] + map.sitemaps
21
+ end.flatten
22
+ (expanded_sitemaps + [self]).uniq(&:url)
23
+ end
24
+
25
+ def missing_pages
26
+ @_misssing ||= find_missing_pages
27
+ end
28
+
29
+ def exists? # rubocop:disable Style/TrivialAccessors
30
+ @ok
31
+ end
32
+
33
+ protected
34
+
35
+ attr_accessor :http, :doc
36
+ attr_writer :url, :checked
37
+
38
+ private
39
+
40
+ def concurency
41
+ ENV.fetch('CONCURENCY', 10)
42
+ end
43
+
44
+ def find_missing_pages # rubocop:disable Metrics/AbcSize
45
+ q = Queue.new
46
+ mutex = Mutex.new
47
+ pages.each { |page| q.push page }
48
+ concurency.times.map do
49
+ Thread.new do
50
+ begin
51
+ while (page = q.pop(true))
52
+ unless page.exists?
53
+ puts " missing: #{page.url}".red
54
+ page
55
+ end
56
+ mutex.synchronize { self.checked += 1 }
57
+ end
58
+ rescue ThreadError # rubocop:disable Lint/HandleExceptions
59
+ end
60
+ end
61
+ end.each(&:join)
62
+ pages.reject(&:exists?)
63
+ end
64
+
65
+ def setup_doc
66
+ response = http.get(url, follow_redirect: true)
67
+ return unless (@ok = response.ok?)
68
+ self.doc = Nokogiri::Slop(response.body)
69
+ doc.remove_namespaces!
70
+ rescue HTTPClient::BadResponseError
71
+ @ok = false
72
+ end
73
+
74
+ def pages
75
+ doc.urlset.url.map { |url| Page.new(url.loc.text, http) }
76
+ rescue NoMethodError
77
+ []
78
+ end
79
+
80
+ def maps
81
+ doc.sitemapindex.sitemap
82
+ rescue NoMethodError
83
+ []
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,3 @@
1
+ class SitemapCheck
2
+ VERSION = '0.1.0'
3
+ end
@@ -0,0 +1,71 @@
1
+ require 'colorize'
2
+ require 'sitemap_check/sitemap'
3
+
4
+ class SitemapCheck
5
+
6
+ def self.check
7
+ $stdout.sync = true
8
+ new.check
9
+ end
10
+
11
+ def initialize(http = HTTPClient.new)
12
+ self.exit_code = 0
13
+ puts "Expanding Sitemaps from #{ENV['CHECK_URL']}"
14
+ self.sitemaps = Sitemap.new(ENV['CHECK_URL'], http).sitemaps
15
+ end
16
+
17
+ def check
18
+ check_indexes
19
+ check_pages
20
+ exit exit_code
21
+ end
22
+
23
+ protected
24
+
25
+ attr_accessor :sitemaps, :exit_code
26
+
27
+ private
28
+
29
+ def check_indexes
30
+ sitemaps.reject(&:exists?).each do |sitemap|
31
+ puts "#{sitemap.url} does not exist".red.bold
32
+ self.exit_code = 1
33
+ end
34
+ puts ''
35
+ end
36
+
37
+ def good_sitemaps
38
+ sitemaps.select(&:exists?)
39
+ end
40
+
41
+ def check_pages
42
+ good_sitemaps.each { |sitemap| check_pages_in(sitemap) }
43
+ end
44
+
45
+ def check_pages_in(sitemap)
46
+ puts "Checking #{sitemap.url}"
47
+ if sitemap.missing_pages.any?
48
+ missing_pages(sitemap)
49
+ else
50
+ if sitemap.checked > 0
51
+ a_ok(sitemap)
52
+ else
53
+ nothing_doing
54
+ end
55
+ end
56
+ puts ''
57
+ end
58
+
59
+ def missing_pages(sitemap)
60
+ self.exit_code = 1
61
+ puts "checked #{sitemap.checked} pages and #{sitemap.missing_pages.count} were missing".red.bold
62
+ end
63
+
64
+ def a_ok(sitemap)
65
+ puts "checked #{sitemap.checked} pages and everything was ok".green.bold
66
+ end
67
+
68
+ def nothing_doing
69
+ puts 'this sitemap did not contain any pages'.green
70
+ end
71
+ end
@@ -0,0 +1,30 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'sitemap_check/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'sitemap_check'
8
+ spec.version = SitemapCheck::VERSION
9
+ spec.authors = ['Ed Robinson']
10
+ spec.email = ['ed@reevoo.com']
11
+
12
+ spec.summary = 'Check for broken links in your sitemap'
13
+ spec.homepage = 'https://github.com/reevoo/sitemap_check'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(/^spec\//) }
17
+ spec.bindir = 'bin'
18
+ spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_dependency 'nokogiri', '~> 1.5'
22
+ spec.add_dependency 'httpclient', '~> 2.6'
23
+ spec.add_dependency 'colorize', '~> 0.7'
24
+ spec.add_development_dependency 'bundler', '~> 1.9'
25
+ spec.add_development_dependency 'rake', '~> 10.0'
26
+ spec.add_development_dependency 'rspec', '~> 3.1'
27
+ spec.add_development_dependency 'reevoocop'
28
+ spec.add_development_dependency 'pry'
29
+ spec.add_development_dependency 'codeclimate-test-reporter'
30
+ end
metadata ADDED
@@ -0,0 +1,186 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sitemap_check
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Ed Robinson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-04-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: httpclient
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2.6'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '2.6'
41
+ - !ruby/object:Gem::Dependency
42
+ name: colorize
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.7'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.7'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.9'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.9'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '10.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '10.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.1'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.1'
97
+ - !ruby/object:Gem::Dependency
98
+ name: reevoocop
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: pry
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: codeclimate-test-reporter
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ description:
140
+ email:
141
+ - ed@reevoo.com
142
+ executables:
143
+ - sitemap_check
144
+ extensions: []
145
+ extra_rdoc_files: []
146
+ files:
147
+ - ".gitignore"
148
+ - ".rspec"
149
+ - ".ruby-version"
150
+ - ".travis.yml"
151
+ - CODE_OF_CONDUCT.md
152
+ - Gemfile
153
+ - LICENCE
154
+ - README.md
155
+ - Rakefile
156
+ - bin/sitemap_check
157
+ - lib/sitemap_check.rb
158
+ - lib/sitemap_check/page.rb
159
+ - lib/sitemap_check/sitemap.rb
160
+ - lib/sitemap_check/version.rb
161
+ - sitemap_check.gemspec
162
+ homepage: https://github.com/reevoo/sitemap_check
163
+ licenses:
164
+ - MIT
165
+ metadata: {}
166
+ post_install_message:
167
+ rdoc_options: []
168
+ require_paths:
169
+ - lib
170
+ required_ruby_version: !ruby/object:Gem::Requirement
171
+ requirements:
172
+ - - ">="
173
+ - !ruby/object:Gem::Version
174
+ version: '0'
175
+ required_rubygems_version: !ruby/object:Gem::Requirement
176
+ requirements:
177
+ - - ">="
178
+ - !ruby/object:Gem::Version
179
+ version: '0'
180
+ requirements: []
181
+ rubyforge_project:
182
+ rubygems_version: 2.4.5
183
+ signing_key:
184
+ specification_version: 4
185
+ summary: Check for broken links in your sitemap
186
+ test_files: []