sitemap_check 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 51d3f2f99c25a2034868ae1434c94e9adafdb50a
4
- data.tar.gz: adf5452975f758257bfabf1d87e471bd2c2fb98a
3
+ metadata.gz: 84dc68b7fb5e1fec3814c8f6e82f94d8cf43b619
4
+ data.tar.gz: 37cefa64e976770366fafe82b89d5d34dc97adce
5
5
  SHA512:
6
- metadata.gz: b9736873b07433315af0134f0cd5df00a39d9dc7b316fc2066bf3d00423dc38007e32cbc587320058e1360180d14549f7c9e48585744d263fdcf128461b055d6
7
- data.tar.gz: 9d6411787e55f4767cf8a10224b04ed4696b3381bfc425a2010c960a2c81512658bde3299b0abf56df580b028a531af6cc09249be44ff547926f63672e15f742
6
+ metadata.gz: a89b4e56d7df0b4cafef3e5844c64afc5510d7dfeccf6db602bcfecd591d1a95acc15370fa10468de214d5965e0bc47e9d066fdd306a6f1827e931b089360bf2
7
+ data.tar.gz: 0cc65ef4219b022f7675b53e10a3a242ac3e723d75ea89b0ad38b4c5650011f70c06828286b3fe123d37929141f6a246946808a2fb4f8dcc02861a63a9c09829
data/README.md CHANGED
@@ -9,3 +9,12 @@
9
9
  ```bash
10
10
  $ CHECK_URL=http://reevoo.com/sitemap_index.xml sitemap_check
11
11
  ```
12
+
13
+ ## Config
14
+
15
+ Config can be set with enviroment variables
16
+
17
+ variable | default | description
18
+ -------------|---------|-------------
19
+ `CHECK_URL` | `nil` | The url of the sitemap or sitemap index to check
20
+ `CONCURRENCY`| `10` | The number of concurent threads to use when checking the sitemap
@@ -0,0 +1,16 @@
1
+ class SitemapCheck
2
+ class Logger
3
+ def initialize(stream = $stdout)
4
+ self.stream = stream
5
+ self.mutex = Mutex.new
6
+ end
7
+
8
+ def log(message)
9
+ mutex.synchronize { stream.puts message }
10
+ end
11
+
12
+ protected
13
+
14
+ attr_accessor :stream, :mutex
15
+ end
16
+ end
@@ -13,7 +13,7 @@ class SitemapCheck
13
13
 
14
14
  def exists?
15
15
  @_exists ||= http.head(url, follow_redirect: true).ok?
16
- rescue SocketError, HTTPClient::ConnectTimeoutError
16
+ rescue SocketError, HTTPClient::ConnectTimeoutError, Errno::ETIMEDOUT
17
17
  self.tries += 1
18
18
  if tries < 5
19
19
  sleep holdoff
@@ -1,11 +1,13 @@
1
1
  require 'httpclient'
2
2
  require 'sitemap_check/page'
3
+ require 'sitemap_check/logger'
3
4
  require 'nokogiri'
4
5
  require 'colorize'
5
6
 
6
7
  class SitemapCheck
7
8
  class Sitemap
8
- def initialize(url, http = HTTPClient.new)
9
+ def initialize(url, http = HTTPClient.new, logger = Logger.new)
10
+ self.logger = logger
9
11
  self.url = url
10
12
  self.checked = 0
11
13
  self.http = http
@@ -32,33 +34,32 @@ class SitemapCheck
32
34
 
33
35
  protected
34
36
 
35
- attr_accessor :http, :doc
37
+ attr_accessor :http, :doc, :logger
36
38
  attr_writer :url, :checked
37
39
 
38
40
  private
39
41
 
40
42
  def concurency
41
- ENV.fetch('CONCURENCY', 10)
43
+ ENV.fetch('CONCURRENCY', '10').to_i
42
44
  end
43
45
 
44
46
  def find_missing_pages # rubocop:disable Metrics/AbcSize
45
47
  q = Queue.new
46
- mutex = Mutex.new
47
48
  pages.each { |page| q.push page }
48
49
  concurency.times.map do
49
50
  Thread.new do
50
51
  begin
51
52
  while (page = q.pop(true))
52
53
  unless page.exists?
53
- puts " missing: #{page.url}".red
54
+ logger.log " missing: #{page.url}".red
54
55
  page
55
56
  end
56
- mutex.synchronize { self.checked += 1 }
57
57
  end
58
58
  rescue ThreadError # rubocop:disable Lint/HandleExceptions
59
59
  end
60
60
  end
61
61
  end.each(&:join)
62
+ self.checked = pages.count
62
63
  pages.reject(&:exists?)
63
64
  end
64
65
 
@@ -1,3 +1,3 @@
1
1
  class SitemapCheck
2
- VERSION = '0.1.0'
2
+ VERSION = '0.1.1'
3
3
  end
data/lib/sitemap_check.rb CHANGED
@@ -3,12 +3,14 @@ require 'sitemap_check/sitemap'
3
3
 
4
4
  class SitemapCheck
5
5
 
6
+
6
7
  def self.check
7
8
  $stdout.sync = true
8
9
  new.check
9
10
  end
10
11
 
11
12
  def initialize(http = HTTPClient.new)
13
+ self.start_time = Time.now
12
14
  self.exit_code = 0
13
15
  puts "Expanding Sitemaps from #{ENV['CHECK_URL']}"
14
16
  self.sitemaps = Sitemap.new(ENV['CHECK_URL'], http).sitemaps
@@ -17,18 +19,36 @@ class SitemapCheck
17
19
  def check
18
20
  check_indexes
19
21
  check_pages
22
+ stats
20
23
  exit exit_code
21
24
  end
22
25
 
23
26
  protected
24
27
 
25
- attr_accessor :sitemaps, :exit_code
28
+ attr_accessor :sitemaps, :exit_code, :start_time, :logger
26
29
 
27
30
  private
28
31
 
32
+ def stats
33
+ puts "checked #{sitemaps.count} sitemaps and #{checked_pages} in #{time_taken} seconds"
34
+ puts "thats #{pages_per_second} pages per second"
35
+ end
36
+
37
+ def pages_per_second
38
+ checked_pages / time_taken
39
+ end
40
+
41
+ def time_taken
42
+ Time.now - start_time
43
+ end
44
+
45
+ def checked_pages
46
+ sitemaps.map(&:checked).reduce(&:+)
47
+ end
48
+
29
49
  def check_indexes
30
50
  sitemaps.reject(&:exists?).each do |sitemap|
31
- puts "#{sitemap.url} does not exist".red.bold
51
+ puts " #{sitemap.url} does not exist".red.bold
32
52
  self.exit_code = 1
33
53
  end
34
54
  puts ''
@@ -58,14 +78,14 @@ class SitemapCheck
58
78
 
59
79
  def missing_pages(sitemap)
60
80
  self.exit_code = 1
61
- puts "checked #{sitemap.checked} pages and #{sitemap.missing_pages.count} were missing".red.bold
81
+ puts " checked #{sitemap.checked} pages and #{sitemap.missing_pages.count} were missing".red.bold
62
82
  end
63
83
 
64
84
  def a_ok(sitemap)
65
- puts "checked #{sitemap.checked} pages and everything was ok".green.bold
85
+ puts " checked #{sitemap.checked} pages and everything was ok".green.bold
66
86
  end
67
87
 
68
88
  def nothing_doing
69
- puts 'this sitemap did not contain any pages'.green
89
+ puts ' this sitemap did not contain any pages'.green
70
90
  end
71
91
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sitemap_check
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ed Robinson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-17 00:00:00.000000000 Z
11
+ date: 2015-04-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -155,6 +155,7 @@ files:
155
155
  - Rakefile
156
156
  - bin/sitemap_check
157
157
  - lib/sitemap_check.rb
158
+ - lib/sitemap_check/logger.rb
158
159
  - lib/sitemap_check/page.rb
159
160
  - lib/sitemap_check/sitemap.rb
160
161
  - lib/sitemap_check/version.rb