sitemap_check 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/sitemap_check +1 -1
- data/lib/sitemap_check.rb +5 -3
- data/lib/sitemap_check/page.rb +27 -24
- data/lib/sitemap_check/sitemap.rb +17 -40
- data/lib/sitemap_check/version.rb +1 -1
- data/sitemap_check.gemspec +6 -6
- metadata +15 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2fcf90cc5916a816ef6a0c7f191c7739066f5303
|
4
|
+
data.tar.gz: 632022e800ace98dbf9cbd9e3a3185f042a56756
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de78fed953cfac1403d376f69841d736711fe8c3fe335f971ca712be882ab69930aca8392066e8251075dd610f6f893d9cfe7b7ed854b2df4770c4cd94e024ad
|
7
|
+
data.tar.gz: ffb5a6e6781ebe7d9eea77b42f5766d8e4f9e7cc6348cb482c687c3f2467b79cbcf2a5bd7f74d818fad395a1a6bb67aca742cf26ace822999f3cc6fd1b26bdd1
|
data/bin/sitemap_check
CHANGED
data/lib/sitemap_check.rb
CHANGED
@@ -9,12 +9,13 @@ class SitemapCheck
|
|
9
9
|
new(url).check
|
10
10
|
end
|
11
11
|
|
12
|
-
def initialize(
|
12
|
+
def initialize(check_url)
|
13
13
|
self.start_time = Time.now
|
14
14
|
self.exit_code = 0
|
15
|
-
check_url =
|
15
|
+
check_url = check_url
|
16
16
|
puts "Expanding Sitemaps from #{check_url}"
|
17
|
-
self.sitemaps = Sitemap.new(check_url
|
17
|
+
self.sitemaps = Sitemap.new(check_url).sitemaps
|
18
|
+
Typhoeus::Config.user_agent = "SitemapCheckbot/#{VERSION} (+https://github.com/reevoo/sitemap_check)"
|
18
19
|
end
|
19
20
|
|
20
21
|
def check
|
@@ -65,6 +66,7 @@ class SitemapCheck
|
|
65
66
|
|
66
67
|
def check_pages_in(sitemap)
|
67
68
|
puts "Checking #{sitemap.url}"
|
69
|
+
sitemap.check_pages
|
68
70
|
if sitemap.missing_pages.any?
|
69
71
|
missing_pages(sitemap)
|
70
72
|
else
|
data/lib/sitemap_check/page.rb
CHANGED
@@ -1,35 +1,38 @@
|
|
1
|
-
require "
|
1
|
+
require "typhoeus"
|
2
|
+
require "sitemap_check/logger"
|
3
|
+
require "colorize"
|
2
4
|
|
3
5
|
class SitemapCheck
|
4
6
|
class Page
|
5
|
-
def initialize(url,
|
7
|
+
def initialize(url, logger = Logger.new)
|
6
8
|
self.url = url
|
7
|
-
self.
|
8
|
-
self.
|
9
|
-
|
9
|
+
self.request = Typhoeus::Request.new(self.url, method: :head, followlocation: true)
|
10
|
+
self.logger = logger
|
11
|
+
setup_callbacks
|
10
12
|
end
|
11
13
|
|
12
|
-
attr_reader :url, :error
|
13
|
-
|
14
|
-
def exists?
|
15
|
-
@_exists ||= http.head(url, follow_redirect: true).ok?
|
16
|
-
rescue SocketError, HTTPClient::ConnectTimeoutError, Errno::ETIMEDOUT => e
|
17
|
-
self.tries += 1
|
18
|
-
if tries < 5
|
19
|
-
sleep holdoff
|
20
|
-
retry
|
21
|
-
else
|
22
|
-
self.error = e
|
23
|
-
@_exists = true
|
24
|
-
end
|
25
|
-
rescue HTTPClient::BadResponseError => e
|
26
|
-
self.error = e
|
27
|
-
@_exists = true
|
28
|
-
end
|
14
|
+
attr_reader :url, :request, :exists, :error
|
29
15
|
|
30
16
|
protected
|
31
17
|
|
32
|
-
|
33
|
-
|
18
|
+
attr_writer :url, :request
|
19
|
+
attr_accessor :logger
|
20
|
+
|
21
|
+
def setup_callbacks # rubocop:disable Metrics/AbcSize
|
22
|
+
request.on_complete do |response|
|
23
|
+
if response.success?
|
24
|
+
@exists = true
|
25
|
+
elsif response.timed_out?
|
26
|
+
@exists = true
|
27
|
+
logger.log " warning: request to #{url} timed out".magenta
|
28
|
+
elsif response.code == 404
|
29
|
+
@exists = false
|
30
|
+
logger.log " missing: #{url}".magenta
|
31
|
+
else
|
32
|
+
@error = true
|
33
|
+
logger.log " error: (#{response.code}) while connecting to #{url}".magenta
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
34
37
|
end
|
35
38
|
end
|
@@ -1,32 +1,36 @@
|
|
1
|
-
require "
|
1
|
+
require "typhoeus"
|
2
2
|
require "sitemap_check/page"
|
3
3
|
require "sitemap_check/logger"
|
4
4
|
require "nokogiri"
|
5
|
-
require "colorize"
|
6
5
|
|
7
6
|
class SitemapCheck
|
8
7
|
class Sitemap
|
9
|
-
def initialize(url,
|
8
|
+
def initialize(url, logger = Logger.new)
|
10
9
|
self.logger = logger
|
11
10
|
self.url = url
|
12
11
|
self.checked = 0
|
13
|
-
self.
|
14
|
-
self.queue = Queue.new
|
12
|
+
self.hydra = Typhoeus::Hydra.new(max_concurrency: concurency)
|
15
13
|
setup_doc
|
16
14
|
end
|
17
15
|
|
18
|
-
attr_reader :url, :checked
|
16
|
+
attr_reader :url, :checked, :pages
|
17
|
+
|
18
|
+
def check_pages
|
19
|
+
queue_pages
|
20
|
+
hydra.run
|
21
|
+
self.checked = pages.count
|
22
|
+
end
|
19
23
|
|
20
24
|
def sitemaps
|
21
25
|
expanded_sitemaps = maps.map do |sitemap|
|
22
|
-
map = Sitemap.new(sitemap.loc.text
|
26
|
+
map = Sitemap.new(sitemap.loc.text)
|
23
27
|
[map] + map.sitemaps
|
24
28
|
end.flatten
|
25
29
|
(expanded_sitemaps + [self]).uniq(&:url)
|
26
30
|
end
|
27
31
|
|
28
32
|
def missing_pages
|
29
|
-
|
33
|
+
pages.reject(&:exists)
|
30
34
|
end
|
31
35
|
|
32
36
|
def errored_pages
|
@@ -39,7 +43,7 @@ class SitemapCheck
|
|
39
43
|
|
40
44
|
protected
|
41
45
|
|
42
|
-
attr_accessor :
|
46
|
+
attr_accessor :hydra, :doc, :logger
|
43
47
|
attr_writer :url, :checked
|
44
48
|
|
45
49
|
private
|
@@ -48,46 +52,19 @@ class SitemapCheck
|
|
48
52
|
ENV.fetch("CONCURRENCY", "10").to_i
|
49
53
|
end
|
50
54
|
|
51
|
-
def find_missing_pages
|
52
|
-
queue_pages
|
53
|
-
check_pages
|
54
|
-
pages.reject(&:exists?)
|
55
|
-
end
|
56
|
-
|
57
|
-
def check_pages
|
58
|
-
concurency.times.map do
|
59
|
-
Thread.new do
|
60
|
-
begin
|
61
|
-
nil while check_page(queue.pop(true))
|
62
|
-
rescue ThreadError
|
63
|
-
nil
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end.each(&:join)
|
67
|
-
self.checked = pages.count
|
68
|
-
end
|
69
|
-
|
70
|
-
def check_page(page)
|
71
|
-
return unless page
|
72
|
-
logger.log " missing: #{page.url}".red unless page.exists?
|
73
|
-
logger.log " warning: error connecting to #{page.url}".magenta if page.error
|
74
|
-
end
|
75
|
-
|
76
55
|
def queue_pages
|
77
|
-
pages.each { |page| queue
|
56
|
+
pages.each { |page| hydra.queue page.request }
|
78
57
|
end
|
79
58
|
|
80
59
|
def setup_doc
|
81
|
-
response =
|
82
|
-
return unless (@ok = response.
|
60
|
+
response = Typhoeus.get(url, followlocation: true)
|
61
|
+
return unless (@ok = response.success?)
|
83
62
|
self.doc = Nokogiri::Slop(response.body)
|
84
63
|
doc.remove_namespaces!
|
85
|
-
rescue HTTPClient::BadResponseError
|
86
|
-
@ok = false
|
87
64
|
end
|
88
65
|
|
89
66
|
def pages
|
90
|
-
doc.urlset.url.map { |url| Page.new(url.loc.text,
|
67
|
+
@pages ||= doc.urlset.url.map { |url| Page.new(url.loc.text, logger) }
|
91
68
|
rescue NoMethodError
|
92
69
|
[]
|
93
70
|
end
|
data/sitemap_check.gemspec
CHANGED
@@ -18,12 +18,12 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_dependency "nokogiri", "~> 1.
|
22
|
-
spec.add_dependency "
|
23
|
-
spec.add_dependency "colorize", "~> 0.
|
24
|
-
spec.add_development_dependency "bundler", "~> 1.
|
25
|
-
spec.add_development_dependency "rake", "~>
|
26
|
-
spec.add_development_dependency "rspec", "~> 3.
|
21
|
+
spec.add_dependency "nokogiri", "~> 1.7"
|
22
|
+
spec.add_dependency "typhoeus", "~> 1.1"
|
23
|
+
spec.add_dependency "colorize", "~> 0.8"
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.14"
|
25
|
+
spec.add_development_dependency "rake", "~> 12.0"
|
26
|
+
spec.add_development_dependency "rspec", "~> 3.5"
|
27
27
|
spec.add_development_dependency "reevoocop"
|
28
28
|
spec.add_development_dependency "pry"
|
29
29
|
spec.add_development_dependency "codeclimate-test-reporter"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitemap_check
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ed Robinson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-02-
|
11
|
+
date: 2017-02-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -16,84 +16,84 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.7'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.7'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: typhoeus
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '1.1'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '1.1'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: colorize
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0.
|
47
|
+
version: '0.8'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0.
|
54
|
+
version: '0.8'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: bundler
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '1.
|
61
|
+
version: '1.14'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '1.
|
68
|
+
version: '1.14'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rake
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
75
|
+
version: '12.0'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
82
|
+
version: '12.0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rspec
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '3.
|
89
|
+
version: '3.5'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '3.
|
96
|
+
version: '3.5'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: reevoocop
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|