sitemap_check 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/sitemap_check +1 -1
- data/lib/sitemap_check.rb +5 -3
- data/lib/sitemap_check/page.rb +27 -24
- data/lib/sitemap_check/sitemap.rb +17 -40
- data/lib/sitemap_check/version.rb +1 -1
- data/sitemap_check.gemspec +6 -6
- metadata +15 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2fcf90cc5916a816ef6a0c7f191c7739066f5303
|
4
|
+
data.tar.gz: 632022e800ace98dbf9cbd9e3a3185f042a56756
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: de78fed953cfac1403d376f69841d736711fe8c3fe335f971ca712be882ab69930aca8392066e8251075dd610f6f893d9cfe7b7ed854b2df4770c4cd94e024ad
|
7
|
+
data.tar.gz: ffb5a6e6781ebe7d9eea77b42f5766d8e4f9e7cc6348cb482c687c3f2467b79cbcf2a5bd7f74d818fad395a1a6bb67aca742cf26ace822999f3cc6fd1b26bdd1
|
data/bin/sitemap_check
CHANGED
data/lib/sitemap_check.rb
CHANGED
@@ -9,12 +9,13 @@ class SitemapCheck
|
|
9
9
|
new(url).check
|
10
10
|
end
|
11
11
|
|
12
|
-
def initialize(
|
12
|
+
def initialize(check_url)
|
13
13
|
self.start_time = Time.now
|
14
14
|
self.exit_code = 0
|
15
|
-
check_url =
|
15
|
+
check_url = check_url
|
16
16
|
puts "Expanding Sitemaps from #{check_url}"
|
17
|
-
self.sitemaps = Sitemap.new(check_url
|
17
|
+
self.sitemaps = Sitemap.new(check_url).sitemaps
|
18
|
+
Typhoeus::Config.user_agent = "SitemapCheckbot/#{VERSION} (+https://github.com/reevoo/sitemap_check)"
|
18
19
|
end
|
19
20
|
|
20
21
|
def check
|
@@ -65,6 +66,7 @@ class SitemapCheck
|
|
65
66
|
|
66
67
|
def check_pages_in(sitemap)
|
67
68
|
puts "Checking #{sitemap.url}"
|
69
|
+
sitemap.check_pages
|
68
70
|
if sitemap.missing_pages.any?
|
69
71
|
missing_pages(sitemap)
|
70
72
|
else
|
data/lib/sitemap_check/page.rb
CHANGED
@@ -1,35 +1,38 @@
|
|
1
|
-
require "
|
1
|
+
require "typhoeus"
|
2
|
+
require "sitemap_check/logger"
|
3
|
+
require "colorize"
|
2
4
|
|
3
5
|
class SitemapCheck
|
4
6
|
class Page
|
5
|
-
def initialize(url,
|
7
|
+
def initialize(url, logger = Logger.new)
|
6
8
|
self.url = url
|
7
|
-
self.
|
8
|
-
self.
|
9
|
-
|
9
|
+
self.request = Typhoeus::Request.new(self.url, method: :head, followlocation: true)
|
10
|
+
self.logger = logger
|
11
|
+
setup_callbacks
|
10
12
|
end
|
11
13
|
|
12
|
-
attr_reader :url, :error
|
13
|
-
|
14
|
-
def exists?
|
15
|
-
@_exists ||= http.head(url, follow_redirect: true).ok?
|
16
|
-
rescue SocketError, HTTPClient::ConnectTimeoutError, Errno::ETIMEDOUT => e
|
17
|
-
self.tries += 1
|
18
|
-
if tries < 5
|
19
|
-
sleep holdoff
|
20
|
-
retry
|
21
|
-
else
|
22
|
-
self.error = e
|
23
|
-
@_exists = true
|
24
|
-
end
|
25
|
-
rescue HTTPClient::BadResponseError => e
|
26
|
-
self.error = e
|
27
|
-
@_exists = true
|
28
|
-
end
|
14
|
+
attr_reader :url, :request, :exists, :error
|
29
15
|
|
30
16
|
protected
|
31
17
|
|
32
|
-
|
33
|
-
|
18
|
+
attr_writer :url, :request
|
19
|
+
attr_accessor :logger
|
20
|
+
|
21
|
+
def setup_callbacks # rubocop:disable Metrics/AbcSize
|
22
|
+
request.on_complete do |response|
|
23
|
+
if response.success?
|
24
|
+
@exists = true
|
25
|
+
elsif response.timed_out?
|
26
|
+
@exists = true
|
27
|
+
logger.log " warning: request to #{url} timed out".magenta
|
28
|
+
elsif response.code == 404
|
29
|
+
@exists = false
|
30
|
+
logger.log " missing: #{url}".magenta
|
31
|
+
else
|
32
|
+
@error = true
|
33
|
+
logger.log " error: (#{response.code}) while connecting to #{url}".magenta
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
34
37
|
end
|
35
38
|
end
|
@@ -1,32 +1,36 @@
|
|
1
|
-
require "
|
1
|
+
require "typhoeus"
|
2
2
|
require "sitemap_check/page"
|
3
3
|
require "sitemap_check/logger"
|
4
4
|
require "nokogiri"
|
5
|
-
require "colorize"
|
6
5
|
|
7
6
|
class SitemapCheck
|
8
7
|
class Sitemap
|
9
|
-
def initialize(url,
|
8
|
+
def initialize(url, logger = Logger.new)
|
10
9
|
self.logger = logger
|
11
10
|
self.url = url
|
12
11
|
self.checked = 0
|
13
|
-
self.
|
14
|
-
self.queue = Queue.new
|
12
|
+
self.hydra = Typhoeus::Hydra.new(max_concurrency: concurency)
|
15
13
|
setup_doc
|
16
14
|
end
|
17
15
|
|
18
|
-
attr_reader :url, :checked
|
16
|
+
attr_reader :url, :checked, :pages
|
17
|
+
|
18
|
+
def check_pages
|
19
|
+
queue_pages
|
20
|
+
hydra.run
|
21
|
+
self.checked = pages.count
|
22
|
+
end
|
19
23
|
|
20
24
|
def sitemaps
|
21
25
|
expanded_sitemaps = maps.map do |sitemap|
|
22
|
-
map = Sitemap.new(sitemap.loc.text
|
26
|
+
map = Sitemap.new(sitemap.loc.text)
|
23
27
|
[map] + map.sitemaps
|
24
28
|
end.flatten
|
25
29
|
(expanded_sitemaps + [self]).uniq(&:url)
|
26
30
|
end
|
27
31
|
|
28
32
|
def missing_pages
|
29
|
-
|
33
|
+
pages.reject(&:exists)
|
30
34
|
end
|
31
35
|
|
32
36
|
def errored_pages
|
@@ -39,7 +43,7 @@ class SitemapCheck
|
|
39
43
|
|
40
44
|
protected
|
41
45
|
|
42
|
-
attr_accessor :
|
46
|
+
attr_accessor :hydra, :doc, :logger
|
43
47
|
attr_writer :url, :checked
|
44
48
|
|
45
49
|
private
|
@@ -48,46 +52,19 @@ class SitemapCheck
|
|
48
52
|
ENV.fetch("CONCURRENCY", "10").to_i
|
49
53
|
end
|
50
54
|
|
51
|
-
def find_missing_pages
|
52
|
-
queue_pages
|
53
|
-
check_pages
|
54
|
-
pages.reject(&:exists?)
|
55
|
-
end
|
56
|
-
|
57
|
-
def check_pages
|
58
|
-
concurency.times.map do
|
59
|
-
Thread.new do
|
60
|
-
begin
|
61
|
-
nil while check_page(queue.pop(true))
|
62
|
-
rescue ThreadError
|
63
|
-
nil
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end.each(&:join)
|
67
|
-
self.checked = pages.count
|
68
|
-
end
|
69
|
-
|
70
|
-
def check_page(page)
|
71
|
-
return unless page
|
72
|
-
logger.log " missing: #{page.url}".red unless page.exists?
|
73
|
-
logger.log " warning: error connecting to #{page.url}".magenta if page.error
|
74
|
-
end
|
75
|
-
|
76
55
|
def queue_pages
|
77
|
-
pages.each { |page| queue
|
56
|
+
pages.each { |page| hydra.queue page.request }
|
78
57
|
end
|
79
58
|
|
80
59
|
def setup_doc
|
81
|
-
response =
|
82
|
-
return unless (@ok = response.
|
60
|
+
response = Typhoeus.get(url, followlocation: true)
|
61
|
+
return unless (@ok = response.success?)
|
83
62
|
self.doc = Nokogiri::Slop(response.body)
|
84
63
|
doc.remove_namespaces!
|
85
|
-
rescue HTTPClient::BadResponseError
|
86
|
-
@ok = false
|
87
64
|
end
|
88
65
|
|
89
66
|
def pages
|
90
|
-
doc.urlset.url.map { |url| Page.new(url.loc.text,
|
67
|
+
@pages ||= doc.urlset.url.map { |url| Page.new(url.loc.text, logger) }
|
91
68
|
rescue NoMethodError
|
92
69
|
[]
|
93
70
|
end
|
data/sitemap_check.gemspec
CHANGED
@@ -18,12 +18,12 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_dependency "nokogiri", "~> 1.
|
22
|
-
spec.add_dependency "
|
23
|
-
spec.add_dependency "colorize", "~> 0.
|
24
|
-
spec.add_development_dependency "bundler", "~> 1.
|
25
|
-
spec.add_development_dependency "rake", "~>
|
26
|
-
spec.add_development_dependency "rspec", "~> 3.
|
21
|
+
spec.add_dependency "nokogiri", "~> 1.7"
|
22
|
+
spec.add_dependency "typhoeus", "~> 1.1"
|
23
|
+
spec.add_dependency "colorize", "~> 0.8"
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.14"
|
25
|
+
spec.add_development_dependency "rake", "~> 12.0"
|
26
|
+
spec.add_development_dependency "rspec", "~> 3.5"
|
27
27
|
spec.add_development_dependency "reevoocop"
|
28
28
|
spec.add_development_dependency "pry"
|
29
29
|
spec.add_development_dependency "codeclimate-test-reporter"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sitemap_check
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ed Robinson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-02-
|
11
|
+
date: 2017-02-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -16,84 +16,84 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.7'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.7'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: typhoeus
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '1.1'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '1.1'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: colorize
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0.
|
47
|
+
version: '0.8'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0.
|
54
|
+
version: '0.8'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: bundler
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '1.
|
61
|
+
version: '1.14'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '1.
|
68
|
+
version: '1.14'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rake
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
75
|
+
version: '12.0'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
82
|
+
version: '12.0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: rspec
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '3.
|
89
|
+
version: '3.5'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '3.
|
96
|
+
version: '3.5'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: reevoocop
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|