sitemap_check 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 51d3f2f99c25a2034868ae1434c94e9adafdb50a
4
- data.tar.gz: adf5452975f758257bfabf1d87e471bd2c2fb98a
3
+ metadata.gz: 84dc68b7fb5e1fec3814c8f6e82f94d8cf43b619
4
+ data.tar.gz: 37cefa64e976770366fafe82b89d5d34dc97adce
5
5
  SHA512:
6
- metadata.gz: b9736873b07433315af0134f0cd5df00a39d9dc7b316fc2066bf3d00423dc38007e32cbc587320058e1360180d14549f7c9e48585744d263fdcf128461b055d6
7
- data.tar.gz: 9d6411787e55f4767cf8a10224b04ed4696b3381bfc425a2010c960a2c81512658bde3299b0abf56df580b028a531af6cc09249be44ff547926f63672e15f742
6
+ metadata.gz: a89b4e56d7df0b4cafef3e5844c64afc5510d7dfeccf6db602bcfecd591d1a95acc15370fa10468de214d5965e0bc47e9d066fdd306a6f1827e931b089360bf2
7
+ data.tar.gz: 0cc65ef4219b022f7675b53e10a3a242ac3e723d75ea89b0ad38b4c5650011f70c06828286b3fe123d37929141f6a246946808a2fb4f8dcc02861a63a9c09829
data/README.md CHANGED
@@ -9,3 +9,12 @@
9
9
  ```bash
10
10
  $ CHECK_URL=http://reevoo.com/sitemap_index.xml sitemap_check
11
11
  ```
12
+
13
+ ## Config
14
+
15
+ Config can be set with enviroment variables
16
+
17
+ variable | default | description
18
+ -------------|---------|-------------
19
+ `CHECK_URL` | `nil` | The url of the sitemap or sitemap index to check
20
+ `CONCURRENCY`| `10` | The number of concurent threads to use when checking the sitemap
@@ -0,0 +1,16 @@
1
+ class SitemapCheck
2
+ class Logger
3
+ def initialize(stream = $stdout)
4
+ self.stream = stream
5
+ self.mutex = Mutex.new
6
+ end
7
+
8
+ def log(message)
9
+ mutex.synchronize { stream.puts message }
10
+ end
11
+
12
+ protected
13
+
14
+ attr_accessor :stream, :mutex
15
+ end
16
+ end
@@ -13,7 +13,7 @@ class SitemapCheck
13
13
 
14
14
  def exists?
15
15
  @_exists ||= http.head(url, follow_redirect: true).ok?
16
- rescue SocketError, HTTPClient::ConnectTimeoutError
16
+ rescue SocketError, HTTPClient::ConnectTimeoutError, Errno::ETIMEDOUT
17
17
  self.tries += 1
18
18
  if tries < 5
19
19
  sleep holdoff
@@ -1,11 +1,13 @@
1
1
  require 'httpclient'
2
2
  require 'sitemap_check/page'
3
+ require 'sitemap_check/logger'
3
4
  require 'nokogiri'
4
5
  require 'colorize'
5
6
 
6
7
  class SitemapCheck
7
8
  class Sitemap
8
- def initialize(url, http = HTTPClient.new)
9
+ def initialize(url, http = HTTPClient.new, logger = Logger.new)
10
+ self.logger = logger
9
11
  self.url = url
10
12
  self.checked = 0
11
13
  self.http = http
@@ -32,33 +34,32 @@ class SitemapCheck
32
34
 
33
35
  protected
34
36
 
35
- attr_accessor :http, :doc
37
+ attr_accessor :http, :doc, :logger
36
38
  attr_writer :url, :checked
37
39
 
38
40
  private
39
41
 
40
42
  def concurency
41
- ENV.fetch('CONCURENCY', 10)
43
+ ENV.fetch('CONCURRENCY', '10').to_i
42
44
  end
43
45
 
44
46
  def find_missing_pages # rubocop:disable Metrics/AbcSize
45
47
  q = Queue.new
46
- mutex = Mutex.new
47
48
  pages.each { |page| q.push page }
48
49
  concurency.times.map do
49
50
  Thread.new do
50
51
  begin
51
52
  while (page = q.pop(true))
52
53
  unless page.exists?
53
- puts " missing: #{page.url}".red
54
+ logger.log " missing: #{page.url}".red
54
55
  page
55
56
  end
56
- mutex.synchronize { self.checked += 1 }
57
57
  end
58
58
  rescue ThreadError # rubocop:disable Lint/HandleExceptions
59
59
  end
60
60
  end
61
61
  end.each(&:join)
62
+ self.checked = pages.count
62
63
  pages.reject(&:exists?)
63
64
  end
64
65
 
@@ -1,3 +1,3 @@
1
1
  class SitemapCheck
2
- VERSION = '0.1.0'
2
+ VERSION = '0.1.1'
3
3
  end
data/lib/sitemap_check.rb CHANGED
@@ -3,12 +3,14 @@ require 'sitemap_check/sitemap'
3
3
 
4
4
  class SitemapCheck
5
5
 
6
+
6
7
  def self.check
7
8
  $stdout.sync = true
8
9
  new.check
9
10
  end
10
11
 
11
12
  def initialize(http = HTTPClient.new)
13
+ self.start_time = Time.now
12
14
  self.exit_code = 0
13
15
  puts "Expanding Sitemaps from #{ENV['CHECK_URL']}"
14
16
  self.sitemaps = Sitemap.new(ENV['CHECK_URL'], http).sitemaps
@@ -17,18 +19,36 @@ class SitemapCheck
17
19
  def check
18
20
  check_indexes
19
21
  check_pages
22
+ stats
20
23
  exit exit_code
21
24
  end
22
25
 
23
26
  protected
24
27
 
25
- attr_accessor :sitemaps, :exit_code
28
+ attr_accessor :sitemaps, :exit_code, :start_time, :logger
26
29
 
27
30
  private
28
31
 
32
+ def stats
33
+ puts "checked #{sitemaps.count} sitemaps and #{checked_pages} in #{time_taken} seconds"
34
+ puts "thats #{pages_per_second} pages per second"
35
+ end
36
+
37
+ def pages_per_second
38
+ checked_pages / time_taken
39
+ end
40
+
41
+ def time_taken
42
+ Time.now - start_time
43
+ end
44
+
45
+ def checked_pages
46
+ sitemaps.map(&:checked).reduce(&:+)
47
+ end
48
+
29
49
  def check_indexes
30
50
  sitemaps.reject(&:exists?).each do |sitemap|
31
- puts "#{sitemap.url} does not exist".red.bold
51
+ puts " #{sitemap.url} does not exist".red.bold
32
52
  self.exit_code = 1
33
53
  end
34
54
  puts ''
@@ -58,14 +78,14 @@ class SitemapCheck
58
78
 
59
79
  def missing_pages(sitemap)
60
80
  self.exit_code = 1
61
- puts "checked #{sitemap.checked} pages and #{sitemap.missing_pages.count} were missing".red.bold
81
+ puts " checked #{sitemap.checked} pages and #{sitemap.missing_pages.count} were missing".red.bold
62
82
  end
63
83
 
64
84
  def a_ok(sitemap)
65
- puts "checked #{sitemap.checked} pages and everything was ok".green.bold
85
+ puts " checked #{sitemap.checked} pages and everything was ok".green.bold
66
86
  end
67
87
 
68
88
  def nothing_doing
69
- puts 'this sitemap did not contain any pages'.green
89
+ puts ' this sitemap did not contain any pages'.green
70
90
  end
71
91
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitemap_check
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ed Robinson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-17 00:00:00.000000000 Z
11
+ date: 2015-04-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -155,6 +155,7 @@ files:
155
155
  - Rakefile
156
156
  - bin/sitemap_check
157
157
  - lib/sitemap_check.rb
158
+ - lib/sitemap_check/logger.rb
158
159
  - lib/sitemap_check/page.rb
159
160
  - lib/sitemap_check/sitemap.rb
160
161
  - lib/sitemap_check/version.rb