sitemap_check 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d49c855487981b357e247bd51099795f9e8c1444
4
- data.tar.gz: 8787dcdc878e484471cb5a998a4dce27cf1f9eec
3
+ metadata.gz: 2fcf90cc5916a816ef6a0c7f191c7739066f5303
4
+ data.tar.gz: 632022e800ace98dbf9cbd9e3a3185f042a56756
5
5
  SHA512:
6
- metadata.gz: 1c50fa5f6518a2b6329a4a3e481cf4aa167af4fd1c772d4b3aec6fdf7f0493689c4c131f4eb0c424c3eec735689611b4be0e30d7ca252cd606566cdc394542c4
7
- data.tar.gz: 3325d58fb39689c119f54234e4000489121642046e0f440b1ae8ab2a48ae0200c1bb17e434b3e1252119c0ad61868c3571554121fd359bd3636b747457e6518b
6
+ metadata.gz: de78fed953cfac1403d376f69841d736711fe8c3fe335f971ca712be882ab69930aca8392066e8251075dd610f6f893d9cfe7b7ed854b2df4770c4cd94e024ad
7
+ data.tar.gz: ffb5a6e6781ebe7d9eea77b42f5766d8e4f9e7cc6348cb482c687c3f2467b79cbcf2a5bd7f74d818fad395a1a6bb67aca742cf26ace822999f3cc6fd1b26bdd1
@@ -1,4 +1,4 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require "sitemap_check"
4
- SitemapCheck.check ARGV[0]
4
+ SitemapCheck.check ENV.fetch("CHECK_URL", ARGV[0])
@@ -9,12 +9,13 @@ class SitemapCheck
9
9
  new(url).check
10
10
  end
11
11
 
12
- def initialize(url = nil, http = HTTPClient.new)
12
+ def initialize(check_url)
13
13
  self.start_time = Time.now
14
14
  self.exit_code = 0
15
- check_url = url || ENV.fetch("CHECK_URL")
15
+ check_url = check_url
16
16
  puts "Expanding Sitemaps from #{check_url}"
17
- self.sitemaps = Sitemap.new(check_url, http).sitemaps
17
+ self.sitemaps = Sitemap.new(check_url).sitemaps
18
+ Typhoeus::Config.user_agent = "SitemapCheckbot/#{VERSION} (+https://github.com/reevoo/sitemap_check)"
18
19
  end
19
20
 
20
21
  def check
@@ -65,6 +66,7 @@ class SitemapCheck
65
66
 
66
67
  def check_pages_in(sitemap)
67
68
  puts "Checking #{sitemap.url}"
69
+ sitemap.check_pages
68
70
  if sitemap.missing_pages.any?
69
71
  missing_pages(sitemap)
70
72
  else
@@ -1,35 +1,38 @@
1
- require "httpclient"
1
+ require "typhoeus"
2
+ require "sitemap_check/logger"
3
+ require "colorize"
2
4
 
3
5
  class SitemapCheck
4
6
  class Page
5
- def initialize(url, http = HTTPClient.new, holdoff = 1)
7
+ def initialize(url, logger = Logger.new)
6
8
  self.url = url
7
- self.http = http
8
- self.tries = 0
9
- self.holdoff = holdoff
9
+ self.request = Typhoeus::Request.new(self.url, method: :head, followlocation: true)
10
+ self.logger = logger
11
+ setup_callbacks
10
12
  end
11
13
 
12
- attr_reader :url, :error
13
-
14
- def exists?
15
- @_exists ||= http.head(url, follow_redirect: true).ok?
16
- rescue SocketError, HTTPClient::ConnectTimeoutError, Errno::ETIMEDOUT => e
17
- self.tries += 1
18
- if tries < 5
19
- sleep holdoff
20
- retry
21
- else
22
- self.error = e
23
- @_exists = true
24
- end
25
- rescue HTTPClient::BadResponseError => e
26
- self.error = e
27
- @_exists = true
28
- end
14
+ attr_reader :url, :request, :exists, :error
29
15
 
30
16
  protected
31
17
 
32
- attr_accessor :http, :tries, :holdoff
33
- attr_writer :url, :error
18
+ attr_writer :url, :request
19
+ attr_accessor :logger
20
+
21
+ def setup_callbacks # rubocop:disable Metrics/AbcSize
22
+ request.on_complete do |response|
23
+ if response.success?
24
+ @exists = true
25
+ elsif response.timed_out?
26
+ @exists = true
27
+ logger.log " warning: request to #{url} timed out".magenta
28
+ elsif response.code == 404
29
+ @exists = false
30
+ logger.log " missing: #{url}".magenta
31
+ else
32
+ @error = true
33
+ logger.log " error: (#{response.code}) while connecting to #{url}".magenta
34
+ end
35
+ end
36
+ end
34
37
  end
35
38
  end
@@ -1,32 +1,36 @@
1
- require "httpclient"
1
+ require "typhoeus"
2
2
  require "sitemap_check/page"
3
3
  require "sitemap_check/logger"
4
4
  require "nokogiri"
5
- require "colorize"
6
5
 
7
6
  class SitemapCheck
8
7
  class Sitemap
9
- def initialize(url, http = HTTPClient.new, logger = Logger.new)
8
+ def initialize(url, logger = Logger.new)
10
9
  self.logger = logger
11
10
  self.url = url
12
11
  self.checked = 0
13
- self.http = http
14
- self.queue = Queue.new
12
+ self.hydra = Typhoeus::Hydra.new(max_concurrency: concurency)
15
13
  setup_doc
16
14
  end
17
15
 
18
- attr_reader :url, :checked
16
+ attr_reader :url, :checked, :pages
17
+
18
+ def check_pages
19
+ queue_pages
20
+ hydra.run
21
+ self.checked = pages.count
22
+ end
19
23
 
20
24
  def sitemaps
21
25
  expanded_sitemaps = maps.map do |sitemap|
22
- map = Sitemap.new(sitemap.loc.text, http)
26
+ map = Sitemap.new(sitemap.loc.text)
23
27
  [map] + map.sitemaps
24
28
  end.flatten
25
29
  (expanded_sitemaps + [self]).uniq(&:url)
26
30
  end
27
31
 
28
32
  def missing_pages
29
- @_misssing ||= find_missing_pages
33
+ pages.reject(&:exists)
30
34
  end
31
35
 
32
36
  def errored_pages
@@ -39,7 +43,7 @@ class SitemapCheck
39
43
 
40
44
  protected
41
45
 
42
- attr_accessor :http, :doc, :logger, :queue
46
+ attr_accessor :hydra, :doc, :logger
43
47
  attr_writer :url, :checked
44
48
 
45
49
  private
@@ -48,46 +52,19 @@ class SitemapCheck
48
52
  ENV.fetch("CONCURRENCY", "10").to_i
49
53
  end
50
54
 
51
- def find_missing_pages
52
- queue_pages
53
- check_pages
54
- pages.reject(&:exists?)
55
- end
56
-
57
- def check_pages
58
- concurency.times.map do
59
- Thread.new do
60
- begin
61
- nil while check_page(queue.pop(true))
62
- rescue ThreadError
63
- nil
64
- end
65
- end
66
- end.each(&:join)
67
- self.checked = pages.count
68
- end
69
-
70
- def check_page(page)
71
- return unless page
72
- logger.log " missing: #{page.url}".red unless page.exists?
73
- logger.log " warning: error connecting to #{page.url}".magenta if page.error
74
- end
75
-
76
55
  def queue_pages
77
- pages.each { |page| queue.push page }
56
+ pages.each { |page| hydra.queue page.request }
78
57
  end
79
58
 
80
59
  def setup_doc
81
- response = http.get(url, follow_redirect: true)
82
- return unless (@ok = response.ok?)
60
+ response = Typhoeus.get(url, followlocation: true)
61
+ return unless (@ok = response.success?)
83
62
  self.doc = Nokogiri::Slop(response.body)
84
63
  doc.remove_namespaces!
85
- rescue HTTPClient::BadResponseError
86
- @ok = false
87
64
  end
88
65
 
89
66
  def pages
90
- doc.urlset.url.map { |url| Page.new(url.loc.text, http) }
67
+ @pages ||= doc.urlset.url.map { |url| Page.new(url.loc.text, logger) }
91
68
  rescue NoMethodError
92
69
  []
93
70
  end
@@ -1,3 +1,3 @@
1
1
  class SitemapCheck
2
- VERSION = "0.1.6"
2
+ VERSION = "0.1.7"
3
3
  end
@@ -18,12 +18,12 @@ Gem::Specification.new do |spec|
18
18
  spec.executables = spec.files.grep(/^bin\//) { |f| File.basename(f) }
19
19
  spec.require_paths = ["lib"]
20
20
 
21
- spec.add_dependency "nokogiri", "~> 1.5"
22
- spec.add_dependency "httpclient", "~> 2.6"
23
- spec.add_dependency "colorize", "~> 0.7"
24
- spec.add_development_dependency "bundler", "~> 1.9"
25
- spec.add_development_dependency "rake", "~> 10.0"
26
- spec.add_development_dependency "rspec", "~> 3.1"
21
+ spec.add_dependency "nokogiri", "~> 1.7"
22
+ spec.add_dependency "typhoeus", "~> 1.1"
23
+ spec.add_dependency "colorize", "~> 0.8"
24
+ spec.add_development_dependency "bundler", "~> 1.14"
25
+ spec.add_development_dependency "rake", "~> 12.0"
26
+ spec.add_development_dependency "rspec", "~> 3.5"
27
27
  spec.add_development_dependency "reevoocop"
28
28
  spec.add_development_dependency "pry"
29
29
  spec.add_development_dependency "codeclimate-test-reporter"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitemap_check
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ed Robinson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-14 00:00:00.000000000 Z
11
+ date: 2017-02-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -16,84 +16,84 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.5'
19
+ version: '1.7'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.5'
26
+ version: '1.7'
27
27
  - !ruby/object:Gem::Dependency
28
- name: httpclient
28
+ name: typhoeus
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.6'
33
+ version: '1.1'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.6'
40
+ version: '1.1'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: colorize
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0.7'
47
+ version: '0.8'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0.7'
54
+ version: '0.8'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: bundler
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '1.9'
61
+ version: '1.14'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '1.9'
68
+ version: '1.14'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rake
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '10.0'
75
+ version: '12.0'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '10.0'
82
+ version: '12.0'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: rspec
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '3.1'
89
+ version: '3.5'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '3.1'
96
+ version: '3.5'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: reevoocop
99
99
  requirement: !ruby/object:Gem::Requirement