sitemap_check 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d49c855487981b357e247bd51099795f9e8c1444
4
- data.tar.gz: 8787dcdc878e484471cb5a998a4dce27cf1f9eec
3
+ metadata.gz: 2fcf90cc5916a816ef6a0c7f191c7739066f5303
4
+ data.tar.gz: 632022e800ace98dbf9cbd9e3a3185f042a56756
5
5
  SHA512:
6
- metadata.gz: 1c50fa5f6518a2b6329a4a3e481cf4aa167af4fd1c772d4b3aec6fdf7f0493689c4c131f4eb0c424c3eec735689611b4be0e30d7ca252cd606566cdc394542c4
7
- data.tar.gz: 3325d58fb39689c119f54234e4000489121642046e0f440b1ae8ab2a48ae0200c1bb17e434b3e1252119c0ad61868c3571554121fd359bd3636b747457e6518b
6
+ metadata.gz: de78fed953cfac1403d376f69841d736711fe8c3fe335f971ca712be882ab69930aca8392066e8251075dd610f6f893d9cfe7b7ed854b2df4770c4cd94e024ad
7
+ data.tar.gz: ffb5a6e6781ebe7d9eea77b42f5766d8e4f9e7cc6348cb482c687c3f2467b79cbcf2a5bd7f74d818fad395a1a6bb67aca742cf26ace822999f3cc6fd1b26bdd1
@@ -1,4 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require "sitemap_check"
4
- SitemapCheck.check ARGV[0]
4
+ SitemapCheck.check ENV.fetch("CHECK_URL", ARGV[0])
@@ -9,12 +9,13 @@ class SitemapCheck
9
9
  new(url).check
10
10
  end
11
11
 
12
- def initialize(url = nil, http = HTTPClient.new)
12
+ def initialize(check_url)
13
13
  self.start_time = Time.now
14
14
  self.exit_code = 0
15
- check_url = url || ENV.fetch("CHECK_URL")
15
+ check_url = check_url
16
16
  puts "Expanding Sitemaps from #{check_url}"
17
- self.sitemaps = Sitemap.new(check_url, http).sitemaps
17
+ self.sitemaps = Sitemap.new(check_url).sitemaps
18
+ Typhoeus::Config.user_agent = "SitemapCheckbot/#{VERSION} (+https://github.com/reevoo/sitemap_check)"
18
19
  end
19
20
 
20
21
  def check
@@ -65,6 +66,7 @@ class SitemapCheck
65
66
 
66
67
  def check_pages_in(sitemap)
67
68
  puts "Checking #{sitemap.url}"
69
+ sitemap.check_pages
68
70
  if sitemap.missing_pages.any?
69
71
  missing_pages(sitemap)
70
72
  else
@@ -1,35 +1,38 @@
1
- require "httpclient"
1
+ require "typhoeus"
2
+ require "sitemap_check/logger"
3
+ require "colorize"
2
4
 
3
5
  class SitemapCheck
4
6
  class Page
5
- def initialize(url, http = HTTPClient.new, holdoff = 1)
7
+ def initialize(url, logger = Logger.new)
6
8
  self.url = url
7
- self.http = http
8
- self.tries = 0
9
- self.holdoff = holdoff
9
+ self.request = Typhoeus::Request.new(self.url, method: :head, followlocation: true)
10
+ self.logger = logger
11
+ setup_callbacks
10
12
  end
11
13
 
12
- attr_reader :url, :error
13
-
14
- def exists?
15
- @_exists ||= http.head(url, follow_redirect: true).ok?
16
- rescue SocketError, HTTPClient::ConnectTimeoutError, Errno::ETIMEDOUT => e
17
- self.tries += 1
18
- if tries < 5
19
- sleep holdoff
20
- retry
21
- else
22
- self.error = e
23
- @_exists = true
24
- end
25
- rescue HTTPClient::BadResponseError => e
26
- self.error = e
27
- @_exists = true
28
- end
14
+ attr_reader :url, :request, :exists, :error
29
15
 
30
16
  protected
31
17
 
32
- attr_accessor :http, :tries, :holdoff
33
- attr_writer :url, :error
18
+ attr_writer :url, :request
19
+ attr_accessor :logger
20
+
21
+ def setup_callbacks # rubocop:disable Metrics/AbcSize
22
+ request.on_complete do |response|
23
+ if response.success?
24
+ @exists = true
25
+ elsif response.timed_out?
26
+ @exists = true
27
+ logger.log " warning: request to #{url} timed out".magenta
28
+ elsif response.code == 404
29
+ @exists = false
30
+ logger.log " missing: #{url}".magenta
31
+ else
32
+ @error = true
33
+ logger.log " error: (#{response.code}) while connecting to #{url}".magenta
34
+ end
35
+ end
36
+ end
34
37
  end
35
38
  end
@@ -1,32 +1,36 @@
1
- require "httpclient"
1
+ require "typhoeus"
2
2
  require "sitemap_check/page"
3
3
  require "sitemap_check/logger"
4
4
  require "nokogiri"
5
- require "colorize"
6
5
 
7
6
  class SitemapCheck
8
7
  class Sitemap
9
- def initialize(url, http = HTTPClient.new, logger = Logger.new)
8
+ def initialize(url, logger = Logger.new)
10
9
  self.logger = logger
11
10
  self.url = url
12
11
  self.checked = 0
13
- self.http = http
14
- self.queue = Queue.new
12
+ self.hydra = Typhoeus::Hydra.new(max_concurrency: concurency)
15
13
  setup_doc
16
14
  end
17
15
 
18
- attr_reader :url, :checked
16
+ attr_reader :url, :checked, :pages
17
+
18
+ def check_pages
19
+ queue_pages
20
+ hydra.run
21
+ self.checked = pages.count
22
+ end
19
23
 
20
24
  def sitemaps
21
25
  expanded_sitemaps = maps.map do |sitemap|
22
- map = Sitemap.new(sitemap.loc.text, http)
26
+ map = Sitemap.new(sitemap.loc.text)
23
27
  [map] + map.sitemaps
24
28
  end.flatten
25
29
  (expanded_sitemaps + [self]).uniq(&:url)
26
30
  end
27
31
 
28
32
  def missing_pages
29
- @_misssing ||= find_missing_pages
33
+ pages.reject(&:exists)
30
34
  end
31
35
 
32
36
  def errored_pages
@@ -39,7 +43,7 @@ class SitemapCheck
39
43
 
40
44
  protected
41
45
 
42
- attr_accessor :http, :doc, :logger, :queue
46
+ attr_accessor :hydra, :doc, :logger
43
47
  attr_writer :url, :checked
44
48
 
45
49
  private
@@ -48,46 +52,19 @@ class SitemapCheck
48
52
  ENV.fetch("CONCURRENCY", "10").to_i
49
53
  end
50
54
 
51
- def find_missing_pages
52
- queue_pages
53
- check_pages
54
- pages.reject(&:exists?)
55
- end
56
-
57
- def check_pages
58
- concurency.times.map do
59
- Thread.new do
60
- begin
61
- nil while check_page(queue.pop(true))
62
- rescue ThreadError
63
- nil
64
- end
65
- end
66
- end.each(&:join)
67
- self.checked = pages.count
68
- end
69
-
70
- def check_page(page)
71
- return unless page
72
- logger.log " missing: #{page.url}".red unless page.exists?
73
- logger.log " warning: error connecting to #{page.url}".magenta if page.error
74
- end
75
-
76
55
  def queue_pages
77
- pages.each { |page| queue.push page }
56
+ pages.each { |page| hydra.queue page.request }
78
57
  end
79
58
 
80
59
  def setup_doc
81
- response = http.get(url, follow_redirect: true)
82
- return unless (@ok = response.ok?)
60
+ response = Typhoeus.get(url, followlocation: true)
61
+ return unless (@ok = response.success?)
83
62
  self.doc = Nokogiri::Slop(response.body)
84
63
  doc.remove_namespaces!
85
- rescue HTTPClient::BadResponseError
86
- @ok = false
87
64
  end
88
65
 
89
66
  def pages
90
- doc.urlset.url.map { |url| Page.new(url.loc.text, http) }
67
+ @pages ||= doc.urlset.url.map { |url| Page.new(url.loc.text, logger) }
91
68
  rescue NoMethodError
92
69
  []
93
70
  end
@@ -1,3 +1,3 @@
1
1
  class SitemapCheck
2
- VERSION = "0.1.6"
2
+ VERSION = "0.1.7"
3
3
  end
@@ -18,12 +18,12 @@ Gem::Specification.new do |spec|
18
18
  spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_dependency "nokogiri", "~> 1.5"
22
- spec.add_dependency "httpclient", "~> 2.6"
23
- spec.add_dependency "colorize", "~> 0.7"
24
- spec.add_development_dependency "bundler", "~> 1.9"
25
- spec.add_development_dependency "rake", "~> 10.0"
26
- spec.add_development_dependency "rspec", "~> 3.1"
21
+ spec.add_dependency "nokogiri", "~> 1.7"
22
+ spec.add_dependency "typhoeus", "~> 1.1"
23
+ spec.add_dependency "colorize", "~> 0.8"
24
+ spec.add_development_dependency "bundler", "~> 1.14"
25
+ spec.add_development_dependency "rake", "~> 12.0"
26
+ spec.add_development_dependency "rspec", "~> 3.5"
27
27
  spec.add_development_dependency "reevoocop"
28
28
  spec.add_development_dependency "pry"
29
29
  spec.add_development_dependency "codeclimate-test-reporter"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitemap_check
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ed Robinson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-14 00:00:00.000000000 Z
11
+ date: 2017-02-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -16,84 +16,84 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.5'
19
+ version: '1.7'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.5'
26
+ version: '1.7'
27
27
  - !ruby/object:Gem::Dependency
28
- name: httpclient
28
+ name: typhoeus
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.6'
33
+ version: '1.1'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.6'
40
+ version: '1.1'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: colorize
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0.7'
47
+ version: '0.8'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0.7'
54
+ version: '0.8'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: bundler
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '1.9'
61
+ version: '1.14'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '1.9'
68
+ version: '1.14'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rake
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '10.0'
75
+ version: '12.0'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '10.0'
82
+ version: '12.0'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: rspec
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '3.1'
89
+ version: '3.5'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '3.1'
96
+ version: '3.5'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: reevoocop
99
99
  requirement: !ruby/object:Gem::Requirement