sitemap_check 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/.travis.yml +3 -0
- data/CODE_OF_CONDUCT.md +3 -0
- data/Gemfile +4 -0
- data/LICENCE +21 -0
- data/README.md +11 -0
- data/Rakefile +10 -0
- data/bin/sitemap_check +4 -0
- data/lib/sitemap_check/page.rb +33 -0
- data/lib/sitemap_check/sitemap.rb +86 -0
- data/lib/sitemap_check/version.rb +3 -0
- data/lib/sitemap_check.rb +71 -0
- data/sitemap_check.gemspec +30 -0
- metadata +186 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 51d3f2f99c25a2034868ae1434c94e9adafdb50a
|
4
|
+
data.tar.gz: adf5452975f758257bfabf1d87e471bd2c2fb98a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b9736873b07433315af0134f0cd5df00a39d9dc7b316fc2066bf3d00423dc38007e32cbc587320058e1360180d14549f7c9e48585744d263fdcf128461b055d6
|
7
|
+
data.tar.gz: 9d6411787e55f4767cf8a10224b04ed4696b3381bfc425a2010c960a2c81512658bde3299b0abf56df580b028a531af6cc09249be44ff547926f63672e15f742
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.2.2
|
data/.travis.yml
ADDED
data/CODE_OF_CONDUCT.md
ADDED
data/Gemfile
ADDED
data/LICENCE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Reevoo
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'reevoocop/rake_task'
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
|
5
|
+
ReevooCop::RakeTask.new(:reevoocop)
|
6
|
+
RSpec::Core::RakeTask.new(:spec)
|
7
|
+
|
8
|
+
task default: [:spec, :reevoocop]
|
9
|
+
task release: [:spec, :reevoocop]
|
10
|
+
task build: [:spec, :reevoocop]
|
data/bin/sitemap_check
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'httpclient'
|
2
|
+
|
3
|
+
class SitemapCheck
|
4
|
+
class Page
|
5
|
+
def initialize(url, http = HTTPClient.new, holdoff = 1)
|
6
|
+
self.url = url
|
7
|
+
self.http = http
|
8
|
+
self.tries = 0
|
9
|
+
self.holdoff = holdoff
|
10
|
+
end
|
11
|
+
|
12
|
+
attr_reader :url
|
13
|
+
|
14
|
+
def exists?
|
15
|
+
@_exists ||= http.head(url, follow_redirect: true).ok?
|
16
|
+
rescue SocketError, HTTPClient::ConnectTimeoutError
|
17
|
+
self.tries += 1
|
18
|
+
if tries < 5
|
19
|
+
sleep holdoff
|
20
|
+
retry
|
21
|
+
else
|
22
|
+
@_exists = false
|
23
|
+
end
|
24
|
+
rescue HTTPClient::BadResponseError
|
25
|
+
@_exists = false
|
26
|
+
end
|
27
|
+
|
28
|
+
protected
|
29
|
+
|
30
|
+
attr_accessor :http, :tries, :holdoff
|
31
|
+
attr_writer :url
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'httpclient'
|
2
|
+
require 'sitemap_check/page'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'colorize'
|
5
|
+
|
6
|
+
class SitemapCheck
|
7
|
+
class Sitemap
|
8
|
+
def initialize(url, http = HTTPClient.new)
|
9
|
+
self.url = url
|
10
|
+
self.checked = 0
|
11
|
+
self.http = http
|
12
|
+
setup_doc
|
13
|
+
end
|
14
|
+
|
15
|
+
attr_reader :url, :checked
|
16
|
+
|
17
|
+
def sitemaps
|
18
|
+
expanded_sitemaps = maps.map do |sitemap|
|
19
|
+
map = Sitemap.new(sitemap.loc.text, http)
|
20
|
+
[map] + map.sitemaps
|
21
|
+
end.flatten
|
22
|
+
(expanded_sitemaps + [self]).uniq(&:url)
|
23
|
+
end
|
24
|
+
|
25
|
+
def missing_pages
|
26
|
+
@_misssing ||= find_missing_pages
|
27
|
+
end
|
28
|
+
|
29
|
+
def exists? # rubocop:disable Style/TrivialAccessors
|
30
|
+
@ok
|
31
|
+
end
|
32
|
+
|
33
|
+
protected
|
34
|
+
|
35
|
+
attr_accessor :http, :doc
|
36
|
+
attr_writer :url, :checked
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def concurency
|
41
|
+
ENV.fetch('CONCURENCY', 10)
|
42
|
+
end
|
43
|
+
|
44
|
+
def find_missing_pages # rubocop:disable Metrics/AbcSize
|
45
|
+
q = Queue.new
|
46
|
+
mutex = Mutex.new
|
47
|
+
pages.each { |page| q.push page }
|
48
|
+
concurency.times.map do
|
49
|
+
Thread.new do
|
50
|
+
begin
|
51
|
+
while (page = q.pop(true))
|
52
|
+
unless page.exists?
|
53
|
+
puts " missing: #{page.url}".red
|
54
|
+
page
|
55
|
+
end
|
56
|
+
mutex.synchronize { self.checked += 1 }
|
57
|
+
end
|
58
|
+
rescue ThreadError # rubocop:disable Lint/HandleExceptions
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end.each(&:join)
|
62
|
+
pages.reject(&:exists?)
|
63
|
+
end
|
64
|
+
|
65
|
+
def setup_doc
|
66
|
+
response = http.get(url, follow_redirect: true)
|
67
|
+
return unless (@ok = response.ok?)
|
68
|
+
self.doc = Nokogiri::Slop(response.body)
|
69
|
+
doc.remove_namespaces!
|
70
|
+
rescue HTTPClient::BadResponseError
|
71
|
+
@ok = false
|
72
|
+
end
|
73
|
+
|
74
|
+
def pages
|
75
|
+
doc.urlset.url.map { |url| Page.new(url.loc.text, http) }
|
76
|
+
rescue NoMethodError
|
77
|
+
[]
|
78
|
+
end
|
79
|
+
|
80
|
+
def maps
|
81
|
+
doc.sitemapindex.sitemap
|
82
|
+
rescue NoMethodError
|
83
|
+
[]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'colorize'
|
2
|
+
require 'sitemap_check/sitemap'
|
3
|
+
|
4
|
+
class SitemapCheck
|
5
|
+
|
6
|
+
def self.check
|
7
|
+
$stdout.sync = true
|
8
|
+
new.check
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(http = HTTPClient.new)
|
12
|
+
self.exit_code = 0
|
13
|
+
puts "Expanding Sitemaps from #{ENV['CHECK_URL']}"
|
14
|
+
self.sitemaps = Sitemap.new(ENV['CHECK_URL'], http).sitemaps
|
15
|
+
end
|
16
|
+
|
17
|
+
def check
|
18
|
+
check_indexes
|
19
|
+
check_pages
|
20
|
+
exit exit_code
|
21
|
+
end
|
22
|
+
|
23
|
+
protected
|
24
|
+
|
25
|
+
attr_accessor :sitemaps, :exit_code
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def check_indexes
|
30
|
+
sitemaps.reject(&:exists?).each do |sitemap|
|
31
|
+
puts "#{sitemap.url} does not exist".red.bold
|
32
|
+
self.exit_code = 1
|
33
|
+
end
|
34
|
+
puts ''
|
35
|
+
end
|
36
|
+
|
37
|
+
def good_sitemaps
|
38
|
+
sitemaps.select(&:exists?)
|
39
|
+
end
|
40
|
+
|
41
|
+
def check_pages
|
42
|
+
good_sitemaps.each { |sitemap| check_pages_in(sitemap) }
|
43
|
+
end
|
44
|
+
|
45
|
+
def check_pages_in(sitemap)
|
46
|
+
puts "Checking #{sitemap.url}"
|
47
|
+
if sitemap.missing_pages.any?
|
48
|
+
missing_pages(sitemap)
|
49
|
+
else
|
50
|
+
if sitemap.checked > 0
|
51
|
+
a_ok(sitemap)
|
52
|
+
else
|
53
|
+
nothing_doing
|
54
|
+
end
|
55
|
+
end
|
56
|
+
puts ''
|
57
|
+
end
|
58
|
+
|
59
|
+
def missing_pages(sitemap)
|
60
|
+
self.exit_code = 1
|
61
|
+
puts "checked #{sitemap.checked} pages and #{sitemap.missing_pages.count} were missing".red.bold
|
62
|
+
end
|
63
|
+
|
64
|
+
def a_ok(sitemap)
|
65
|
+
puts "checked #{sitemap.checked} pages and everything was ok".green.bold
|
66
|
+
end
|
67
|
+
|
68
|
+
def nothing_doing
|
69
|
+
puts 'this sitemap did not contain any pages'.green
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'sitemap_check/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'sitemap_check'
|
8
|
+
spec.version = SitemapCheck::VERSION
|
9
|
+
spec.authors = ['Ed Robinson']
|
10
|
+
spec.email = ['ed@reevoo.com']
|
11
|
+
|
12
|
+
spec.summary = 'Check for broken links in your sitemap'
|
13
|
+
spec.homepage = 'https://github.com/reevoo/sitemap_check'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(/^spec\//) }
|
17
|
+
spec.bindir = 'bin'
|
18
|
+
spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
|
19
|
+
spec.require_paths = ['lib']
|
20
|
+
|
21
|
+
spec.add_dependency 'nokogiri', '~> 1.5'
|
22
|
+
spec.add_dependency 'httpclient', '~> 2.6'
|
23
|
+
spec.add_dependency 'colorize', '~> 0.7'
|
24
|
+
spec.add_development_dependency 'bundler', '~> 1.9'
|
25
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
26
|
+
spec.add_development_dependency 'rspec', '~> 3.1'
|
27
|
+
spec.add_development_dependency 'reevoocop'
|
28
|
+
spec.add_development_dependency 'pry'
|
29
|
+
spec.add_development_dependency 'codeclimate-test-reporter'
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,186 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sitemap_check
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Ed Robinson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-04-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.5'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: httpclient
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.6'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.6'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: colorize
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.7'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.7'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: bundler
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.9'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.9'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '10.0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '10.0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rspec
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '3.1'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '3.1'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: reevoocop
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: pry
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: codeclimate-test-reporter
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
description:
|
140
|
+
email:
|
141
|
+
- ed@reevoo.com
|
142
|
+
executables:
|
143
|
+
- sitemap_check
|
144
|
+
extensions: []
|
145
|
+
extra_rdoc_files: []
|
146
|
+
files:
|
147
|
+
- ".gitignore"
|
148
|
+
- ".rspec"
|
149
|
+
- ".ruby-version"
|
150
|
+
- ".travis.yml"
|
151
|
+
- CODE_OF_CONDUCT.md
|
152
|
+
- Gemfile
|
153
|
+
- LICENCE
|
154
|
+
- README.md
|
155
|
+
- Rakefile
|
156
|
+
- bin/sitemap_check
|
157
|
+
- lib/sitemap_check.rb
|
158
|
+
- lib/sitemap_check/page.rb
|
159
|
+
- lib/sitemap_check/sitemap.rb
|
160
|
+
- lib/sitemap_check/version.rb
|
161
|
+
- sitemap_check.gemspec
|
162
|
+
homepage: https://github.com/reevoo/sitemap_check
|
163
|
+
licenses:
|
164
|
+
- MIT
|
165
|
+
metadata: {}
|
166
|
+
post_install_message:
|
167
|
+
rdoc_options: []
|
168
|
+
require_paths:
|
169
|
+
- lib
|
170
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
171
|
+
requirements:
|
172
|
+
- - ">="
|
173
|
+
- !ruby/object:Gem::Version
|
174
|
+
version: '0'
|
175
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
176
|
+
requirements:
|
177
|
+
- - ">="
|
178
|
+
- !ruby/object:Gem::Version
|
179
|
+
version: '0'
|
180
|
+
requirements: []
|
181
|
+
rubyforge_project:
|
182
|
+
rubygems_version: 2.4.5
|
183
|
+
signing_key:
|
184
|
+
specification_version: 4
|
185
|
+
summary: Check for broken links in your sitemap
|
186
|
+
test_files: []
|