cobweb 0.0.13 → 0.0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  require 'rubygems'
2
+ require 'bundler/setup'
2
3
  require 'uri'
3
4
  require 'resque'
4
5
  require "addressable/uri"
@@ -9,7 +10,7 @@ Dir[File.dirname(__FILE__) + '/*.rb'].each do |file|
9
10
  require [File.dirname(__FILE__), File.basename(file, File.extname(file))].join("/")
10
11
  end
11
12
 
12
- class CobWeb
13
+ class Cobweb
13
14
 
14
15
  ## TASKS
15
16
 
@@ -65,7 +66,7 @@ class CobWeb
65
66
  if redis.get(unique_id) and @options[:cache]
66
67
  puts "Cache hit for #{url}" unless @options[:quiet]
67
68
  content = JSON.parse(redis.get(unique_id)).deep_symbolize_keys
68
- content[:body] = Base64.decode64(content[:body]) unless content[:body].nil? or content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml")
69
+ content[:body] = Base64.decode64(content[:body])
69
70
 
70
71
  content
71
72
  else
@@ -93,13 +94,25 @@ class CobWeb
93
94
 
94
95
  if @options[:follow_redirects] and response.code.to_i >= 300 and response.code.to_i < 400
95
96
  puts "redirected... " unless @options[:quiet]
97
+
98
+ # get location to redirect to
96
99
  url = absolutize.url(response['location']).to_s
100
+
101
+ # decrement redirect limit
97
102
  redirect_limit = redirect_limit - 1
103
+
104
+ # raise exception if we're being redirected to somewhere we've been redirected to in this content request
105
+ #raise RedirectError("Loop detected in redirect for - #{url}") if content[:redirect_through].include? url
106
+
107
+ # raise exception if redirect limit has reached 0
108
+ raise RedirectError, "Redirect Limit reached" if redirect_limit == 0
109
+
110
+ # get the content from redirect location
98
111
  content = get(url, redirect_limit)
99
112
  content[:url] = uri.to_s
100
113
  content[:redirect_through] = [] if content[:redirect_through].nil?
101
114
  content[:redirect_through].insert(0, url)
102
-
115
+
103
116
  content[:response_time] = Time.now.to_f - request_time
104
117
  else
105
118
  content[:response_time] = Time.now.to_f - request_time
@@ -109,8 +122,9 @@ class CobWeb
109
122
  # create the content container
110
123
  content[:url] = uri.to_s
111
124
  content[:status_code] = response.code.to_i
112
- content[:mime_type] = response.content_type.split(";")[0].strip
113
- if response["Content-Type"].include? ";"
125
+ content[:mime_type] = ""
126
+ content[:mime_type] = response.content_type.split(";")[0].strip unless response.content_type.nil?
127
+ if !response["Content-Type"].nil? && response["Content-Type"].include?(";")
114
128
  charset = response["Content-Type"][response["Content-Type"].index(";")+2..-1] if !response["Content-Type"].nil? and response["Content-Type"].include?(";")
115
129
  charset = charset[charset.index("=")+1..-1] if charset and charset.include?("=")
116
130
  content[:character_set] = charset
@@ -130,10 +144,25 @@ class CobWeb
130
144
  end
131
145
  # add content to cache if required
132
146
  if @options[:cache]
133
- content[:body] = Base64.encode64(content[:body]) unless content[:body].nil? or content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml")
147
+ content[:body] = Base64.encode64(content[:body])
134
148
  redis.set(unique_id, content.to_json)
135
149
  redis.expire unique_id, @options[:cache].to_i
136
150
  end
151
+ rescue RedirectError => e
152
+ puts "ERROR: #{e.message}"
153
+
154
+ ## generate a blank content
155
+ content = {}
156
+ content[:url] = uri.to_s
157
+ content[:response_time] = Time.now.to_f - request_time
158
+ content[:status_code] = 0
159
+ content[:length] = 0
160
+ content[:body] = ""
161
+ content[:error] = e.message
162
+ content[:mime_type] = "error/dnslookup"
163
+ content[:headers] = {}
164
+ content[:links] = {}
165
+
137
166
  rescue SocketError => e
138
167
  puts "ERROR: #{e.message}"
139
168
 
@@ -273,25 +302,7 @@ class CobWeb
273
302
  end
274
303
  end
275
304
 
276
- ## add symbolize methods to hash
277
- class Hash
278
- def symbolize_keys
279
- keys.each do |key|
280
- if key.instance_of? String
281
- value = self[key]
282
- self.delete(key)
283
- self[key.to_sym] = value
284
- end
285
- end
286
- self
287
- end
288
- def deep_symbolize_keys
289
- symbolize_keys
290
- keys.each do |key|
291
- if self[key].instance_of? Hash
292
- self[key].deep_symbolize_keys
293
- end
294
- end
295
- self
296
- end
297
- end
305
+
306
+
307
+
308
+
@@ -1,13 +1,20 @@
1
+ require 'digest/md5'
2
+ require 'date'
3
+ require 'ap'
4
+
1
5
  class CobwebCrawler
2
6
 
3
7
  def initialize(options={})
4
8
  @options = options
5
9
 
6
10
  @statistic = {}
7
- @queue = []
8
- @crawled = []
9
11
 
10
- @cobweb = CobWeb.new(@options)
12
+ @options[:redis_options] = {:host => "127.0.0.1"} unless @options.has_key? :redis_options
13
+ crawl_id = Digest::MD5.hexdigest(DateTime.now.inspect.to_s)
14
+
15
+ @redis = NamespacedRedis.new(Redis.new(@options[:redis_options]), "cobweb-#{crawl_id}")
16
+
17
+ @cobweb = Cobweb.new(@options)
11
18
  end
12
19
 
13
20
  def crawl(base_url, crawl_options = {}, &block)
@@ -17,104 +24,122 @@ class CobwebCrawler
17
24
 
18
25
  @absolutize = Absolutize.new(@options[:base_url], :output_debug => false, :raise_exceptions => false, :force_escaping => false, :remove_anchors => true)
19
26
 
20
- crawl_counter = @crawled.count
21
-
22
- @queue << base_url
23
-
24
- while !@queue.empty? && (@options[:crawl_limit].to_i == 0 || @options[:crawl_limit].to_i > crawl_counter)
25
-
26
- url = @queue.first
27
- @options[:url] = url
28
- unless @crawled.include?(url) || url =~ /\/(.+?)\/\1\/\1/
29
- begin
30
- content = @cobweb.get(@options[:url])
27
+ @redis.sadd "queued", base_url
28
+ crawl_counter = @redis.scard("crawled").to_i
29
+ queue_counter = @redis.scard("queued").to_i
31
30
 
32
- if @statistic[:average_response_time].nil?
33
- @statistic[:average_response_time] = content[:response_time].to_f
34
- else
35
- @statistic[:average_response_time] = (((@statistic[:average_response_time] * crawl_counter) + content[:response_time].to_f) / (crawl_counter + 1))
36
- end
31
+ while queue_counter>0 && (@options[:crawl_limit].to_i == 0 || @options[:crawl_limit].to_i > crawl_counter)
32
+ thread = Thread.new do
33
+
34
+ url = @redis.spop "queued"
35
+ crawl_counter = @redis.scard("crawled").to_i
36
+ queue_counter = @redis.scard("queued").to_i
37
+
38
+ @options[:url] = url
39
+ unless @redis.sismember("crawled", url.to_s)
40
+ begin
41
+ Stats.update_status("Requesting #{url}...")
42
+ content = @cobweb.get(url)
43
+ Stats.update_status("Processing #{url}...")
44
+
45
+ if @statistic[:average_response_time].nil?
46
+ @statistic[:average_response_time] = content[:response_time].to_f
47
+ else
48
+ @statistic[:average_response_time] = (((@statistic[:average_response_time] * crawl_counter) + content[:response_time].to_f) / (crawl_counter + 1))
49
+ end
37
50
 
38
- @statistic[:maximum_response_time] = content[:response_time] if @statistic[:maximum_response_time].nil? || @statistic[:maximum_response_time] < content[:response_time]
39
- @statistic[:minimum_response_time] = content[:response_time] if @statistic[:minimum_response_time].nil? || @statistic[:minimum_response_time] > content[:response_time]
51
+ @statistic[:maximum_response_time] = content[:response_time] if @statistic[:maximum_response_time].nil? || @statistic[:maximum_response_time] < content[:response_time]
52
+ @statistic[:minimum_response_time] = content[:response_time] if @statistic[:minimum_response_time].nil? || @statistic[:minimum_response_time] > content[:response_time]
40
53
 
41
- if @statistic[:average_length]
42
- @statistic[:average_length] = (((@statistic[:average_length].to_i*crawl_counter) + content[:length].to_i) / (crawl_counter + 1))
43
- else
44
- @statistic[:average_length] = content[:length].to_i
45
- end
54
+ if @statistic[:average_length]
55
+ @statistic[:average_length] = (((@statistic[:average_length].to_i*crawl_counter) + content[:length].to_i) / (crawl_counter + 1))
56
+ else
57
+ @statistic[:average_length] = content[:length].to_i
58
+ end
46
59
 
47
- @statistic[:maximum_length] = content[:length].to_i if @statistic[:maximum_length].nil? || content[:length].to_i > @statistic[:maximum_length].to_i
48
- @statistic[:minimum_length] = content[:length].to_i if @statistic[:minimum_length].nil? || content[:length].to_i < @statistic[:minimum_length].to_i
49
- @statistic[:total_length] = @statistic[:total_length].to_i + content[:length].to_i
50
-
51
- if content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml")
52
- @statistic[:page_count] = @statistic[:page_count].to_i + 1
53
- @statistic[:page_size] = @statistic[:page_count].to_i + content[:length].to_i
54
- else
55
- @statistic[:asset_count] = @statistic[:asset_count].to_i + 1
56
- @statistic[:asset_size] = @statistic[:asset_count].to_i + content[:length].to_i
57
- end
60
+ @statistic[:maximum_length] = content[:length].to_i if @statistic[:maximum_length].nil? || content[:length].to_i > @statistic[:maximum_length].to_i
61
+ @statistic[:minimum_length] = content[:length].to_i if @statistic[:minimum_length].nil? || content[:length].to_i < @statistic[:minimum_length].to_i
62
+ @statistic[:total_length] = @statistic[:total_length].to_i + content[:length].to_i
58
63
 
59
- mime_counts = {}
60
- if @statistic.has_key? :mime_counts
61
- mime_counts = @statistic[:mime_counts]
62
- if mime_counts.has_key? content[:mime_type]
63
- mime_counts[content[:mime_type]] += 1
64
+ if content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml")
65
+ @statistic[:page_count] = @statistic[:page_count].to_i + 1
66
+ @statistic[:page_size] = @statistic[:page_size].to_i + content[:length].to_i
64
67
  else
65
- mime_counts[content[:mime_type]] = 1
68
+ @statistic[:asset_count] = @statistic[:asset_count].to_i + 1
69
+ @statistic[:asset_size] = @statistic[:asset_size].to_i + content[:length].to_i
66
70
  end
67
- else
68
- mime_counts = {content[:mime_type] => 1}
69
- end
70
- @statistic[:mime_counts] = mime_counts
71
+
72
+ @statistic[:total_redirects] = 0 if @statistic[:total_redirects].nil?
73
+ @statistic[:total_redirects] += content[:redirect_through].count unless content[:redirect_through].nil?
74
+
75
+ @statistic[:crawl_counter] = crawl_counter
76
+ @statistic[:queue_counter] = queue_counter
77
+
78
+ mime_counts = {}
79
+ if @statistic.has_key? :mime_counts
80
+ mime_counts = @statistic[:mime_counts]
81
+ if mime_counts.has_key? content[:mime_type]
82
+ mime_counts[content[:mime_type]] += 1
83
+ else
84
+ mime_counts[content[:mime_type]] = 1
85
+ end
86
+ else
87
+ mime_counts = {content[:mime_type] => 1}
88
+ end
89
+ @statistic[:mime_counts] = mime_counts
71
90
 
72
- status_counts = {}
91
+ status_counts = {}
73
92
 
74
- if @statistic.has_key? :status_counts
75
- status_counts = @statistic[:status_counts]
76
- if status_counts.has_key? content[:status_code].to_i
77
- status_counts[content[:status_code].to_i] += 1
93
+ if @statistic.has_key? :status_counts
94
+ status_counts = @statistic[:status_counts]
95
+ if status_counts.has_key? content[:status_code].to_i
96
+ status_counts[content[:status_code].to_i] += 1
97
+ else
98
+ status_counts[content[:status_code].to_i] = 1
99
+ end
78
100
  else
79
- status_counts[content[:status_code].to_i] = 1
101
+ status_counts = {content[:status_code].to_i => 1}
80
102
  end
81
- else
82
- status_counts = {content[:status_code].to_i => 1}
83
- end
84
- @statistic[:status_counts] = status_counts
103
+ @statistic[:status_counts] = status_counts
85
104
 
86
- @crawled << url
87
- crawl_counter += 1
88
- @queue.delete(url)
89
- content[:links].keys.map{|key| content[:links][key]}.flatten.each do |link|
90
- unless @crawled.include? link
91
- puts "Checking if #{link} matches #{@options[:base_url]} as internal?" if @options[:debug]
92
- if link.to_s.match(Regexp.new("^#{@options[:base_url]}"))
93
- puts "Matched as #{link} as internal" if @options[:debug]
94
- unless @crawled.include? link.to_s or @queue.include? link.to_s
95
- puts "Added #{link.to_s} to queue" if @options[:debug]
96
- @queue << link.to_s
105
+ @redis.sadd "crawled", url.to_s
106
+ @redis.incr "crawl-counter"
107
+
108
+ content[:links].keys.map{|key| content[:links][key]}.flatten.each do |content_link|
109
+ link = content_link.to_s
110
+ unless @redis.sismember("crawled", link)
111
+ puts "Checking if #{link} matches #{@options[:base_url]} as internal?" if @options[:debug]
112
+ if link.to_s.match(Regexp.new("^#{@options[:base_url]}"))
113
+ puts "Matched as #{link} as internal" if @options[:debug]
114
+ unless @redis.sismember("crawled", link) || @redis.sismember("queued", link)
115
+ puts "Added #{link.to_s} to queue" if @options[:debug]
116
+ @redis.sadd "queued", link
117
+ crawl_counter = @redis.scard("crawled").to_i
118
+ queue_counter = @redis.scard("queued").to_i
119
+ end
97
120
  end
98
121
  end
99
122
  end
100
- end
101
- @queue.uniq!
102
123
 
103
- puts "Crawled: #{crawl_counter} Limit: #{@options[:crawl_limit]} Queued: #{@queue.count}" if @options[:debug]
104
-
105
- yield content if block_given?
124
+ crawl_counter = @redis.scard("crawled").to_i
125
+ queue_counter = @redis.scard("queued").to_i
126
+ Stats.update_statistics(@statistic)
127
+ Stats.update_status("Completed #{url}.")
128
+ puts "Crawled: #{crawl_counter.to_i} Limit: #{@options[:crawl_limit].to_i} Queued: #{queue_counter.to_i}" if @options[:debug]
129
+
130
+ yield content, @statistic if block_given?
106
131
 
107
- rescue => e
108
- puts "!!!!!!!!!!!! ERROR !!!!!!!!!!!!!!!!"
109
- ap e
110
- @queue.delete(url)
111
-
132
+ rescue => e
133
+ puts "!!!!!!!!!!!! ERROR !!!!!!!!!!!!!!!!"
134
+ ap e
135
+ ap e.backtrace
136
+ end
137
+ else
138
+ puts "Already crawled #{@options[:url]}" if @options[:debug]
112
139
  end
113
- else
114
- puts "Already crawled #{@options[:url]}" if @options[:debug]
115
140
  end
141
+ thread.join
116
142
  end
117
143
  @statistic
118
144
  end
119
-
120
- end
145
+ end
@@ -40,7 +40,7 @@ class CrawlJob
40
40
  redis.incr "crawl-counter"
41
41
  crawl_counter += 1
42
42
  if crawl_counter <= content_request[:crawl_limit].to_i
43
- content = CobWeb.new(content_request).get(content_request[:url])
43
+ content = Cobweb.new(content_request).get(content_request[:url])
44
44
 
45
45
  ## update statistics
46
46
  if redis.hexists "statistics", "average_response_time"
@@ -0,0 +1,22 @@
1
+ ## add symbolize methods to hash
2
+ class Hash
3
+ def symbolize_keys
4
+ keys.each do |key|
5
+ if key.instance_of? String
6
+ value = self[key]
7
+ self.delete(key)
8
+ self[key.to_sym] = value
9
+ end
10
+ end
11
+ self
12
+ end
13
+ def deep_symbolize_keys
14
+ symbolize_keys
15
+ keys.each do |key|
16
+ if self[key].instance_of? Hash
17
+ self[key].deep_symbolize_keys
18
+ end
19
+ end
20
+ self
21
+ end
22
+ end
@@ -17,8 +17,16 @@ class NamespacedRedis
17
17
  @redis.srem namespaced(key), member
18
18
  end
19
19
 
20
+ def spop(key)
21
+ @redis.spop namespaced(key)
22
+ end
23
+
20
24
  def smembers(key)
21
25
  @redis.smembers namespaced(key)
26
+ end
27
+
28
+ def scard(key)
29
+ @redis.scard namespaced(key)
22
30
  end
23
31
 
24
32
  def get(key)
@@ -29,6 +37,10 @@ class NamespacedRedis
29
37
  @redis.incr namespaced(key)
30
38
  end
31
39
 
40
+ def decr(key)
41
+ @redis.decr namespaced(key)
42
+ end
43
+
32
44
  def exist(key)
33
45
  @redis.exist namespaced(key)
34
46
  end
@@ -0,0 +1,2 @@
1
+ class RedirectError < Exception
2
+ end
@@ -0,0 +1,32 @@
1
+ require 'bundler/setup'
2
+ require 'sinatra'
3
+ require 'haml'
4
+
5
+ class Stats < Sinatra::Base
6
+
7
+ def self.update_statistics(statistics)
8
+ @@statistics = statistics
9
+ end
10
+
11
+ def self.update_status(status)
12
+ @@status = status
13
+ end
14
+
15
+ ap settings.root
16
+ set :views, settings.root + '/../views'
17
+
18
+ get '/' do
19
+ @statistics = @@statistics
20
+ @status = @@status
21
+ haml :statistics
22
+ end
23
+
24
+ end
25
+
26
+ thread = Thread.new do
27
+ Stats.run!
28
+
29
+ ## we need to manually kill the main thread as sinatra traps the interrupts
30
+ Thread.main.kill
31
+ end
32
+
@@ -31,7 +31,7 @@ describe CobwebCrawler do
31
31
 
32
32
  crawler = CobwebCrawler.new({:cache => false, :quiet => false, :debug => false})
33
33
 
34
- statistics = crawler.crawl("http://www.rockwellcottage.com/")
34
+ statistics = crawler.crawl("http://rockwellcottage.heroku.com/")
35
35
 
36
36
  ap statistics
37
37
 
@@ -43,8 +43,9 @@ describe CobwebCrawler do
43
43
 
44
44
  crawler = CobwebCrawler.new({:cache => false, :quiet => false, :debug => false})
45
45
 
46
- statistics = crawler.crawl("http://www.rockwellcottage.com/") do |content|
46
+ statistics = crawler.crawl("http://www.rockwellcottage.com/") do |content, statistics|
47
47
  ap content[:url]
48
+ ap statistics[:average_length]
48
49
  end
49
50
 
50
51
  ap statistics
@@ -1,6 +1,6 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
2
 
3
- describe CobWeb do
3
+ describe Cobweb do
4
4
 
5
5
  before(:each) do
6
6
 
@@ -15,7 +15,7 @@ describe CobWeb do
15
15
  "Server" => "gws",
16
16
  "X-XSS-Protection" => "1; mode=block"}
17
17
 
18
- @cobweb = CobWeb.new :quiet => true, :cache => nil
18
+ @cobweb = Cobweb.new :quiet => true, :cache => nil
19
19
  end
20
20
 
21
21
  describe "with mock" do
@@ -40,7 +40,9 @@ describe CobWeb do
40
40
  @mock_http_client.stub!(:request).with(@mock_http_redirect_request2).and_return(@mock_http_redirect_response2)
41
41
  @mock_http_client.stub!(:read_timeout=).and_return(nil)
42
42
  @mock_http_client.stub!(:open_timeout=).and_return(nil)
43
- @mock_http_client.stub!(:start).and_return(@mock_http_response)
43
+ @mock_http_client.stub!(:start).and_return(@mock_http_response)
44
+ @mock_http_client.stub!(:address).and_return("www.baseurl.com")
45
+ @mock_http_client.stub!(:port).and_return("80 ")
44
46
 
45
47
  @mock_http_response.stub!(:code).and_return(200)
46
48
  @mock_http_response.stub!(:content_type).and_return("text/html")
@@ -69,7 +71,7 @@ describe CobWeb do
69
71
  end
70
72
 
71
73
  it "should generate a cobweb object" do
72
- CobWeb.new.should be_an_instance_of CobWeb
74
+ Cobweb.new.should be_an_instance_of Cobweb
73
75
  end
74
76
 
75
77
  describe "get" do
@@ -130,7 +132,7 @@ describe CobWeb do
130
132
 
131
133
  before(:each) do
132
134
  @base_url = "http://redirect-me.com/redirect.html"
133
- @cobweb = CobWeb.new(:follow_redirects => true, :quiet => true, :cache => nil)
135
+ @cobweb = Cobweb.new(:follow_redirects => true, :quiet => true, :cache => nil)
134
136
  end
135
137
 
136
138
  it "should flow through redirect" #do
@@ -155,8 +157,8 @@ describe CobWeb do
155
157
 
156
158
  #end
157
159
  it "should not follow with redirect disabled" do
158
- @cobweb = CobWeb.new(:follow_redirects => false, :cache => nil)
159
- @mock_http_client.should_receive(:start).and_return(@mock_http_redirect_response)
160
+ @cobweb = Cobweb.new(:follow_redirects => false, :cache => nil)
161
+ @mock_http_client.should_receive(:request).with(@mock_http_redirect_request).and_return(@mock_http_redirect_response)
160
162
 
161
163
  content = @cobweb.get(@base_url)
162
164
  content[:url].should == "http://redirect-me.com/redirect.html"
@@ -0,0 +1,71 @@
1
+ %h1 Cobweb Statistics
2
+
3
+ %h4= @status
4
+
5
+ %table
6
+ %tr
7
+ %th Page Count
8
+ %th Asset Count
9
+ %th Redirect Count
10
+ %tr
11
+ %td= @statistics[:page_count]
12
+ %td= @statistics[:asset_count]
13
+ %td= @statistics[:total_redirects]
14
+
15
+ %table
16
+ %tr
17
+ %th Total Page Size
18
+ %th Total Asset Size
19
+ %tr
20
+ %td= @statistics[:page_size]
21
+ %td= @statistics[:asset_size]
22
+
23
+ %table
24
+ %tr
25
+ %th Crawled
26
+ %th Queued
27
+ %tr
28
+ %td= @statistics[:crawl_counter]
29
+ %td= @statistics[:queue_counter]
30
+
31
+
32
+ %table
33
+ %tr
34
+ %th{:colspan => 2} Response Times
35
+ %tr
36
+ %th Average
37
+ %td= @statistics[:average_response_time]
38
+ %tr
39
+ %th Maximum
40
+ %td= @statistics[:maximum_response_time]
41
+ %tr
42
+ %th Minimum
43
+ %td= @statistics[:minimum_response_time]
44
+
45
+ %table
46
+ %tr
47
+ %th{:colspan => 2} Content Sizes
48
+ %tr
49
+ %th Average
50
+ %td= @statistics[:average_length]
51
+ %tr
52
+ %th Maximum
53
+ %td= @statistics[:maximum_length]
54
+ %tr
55
+ %th Minimum
56
+ %td= @statistics[:minimum_length]
57
+ %tr
58
+ %th Total
59
+ %td= @statistics[:total_length]
60
+
61
+ %table
62
+ - @statistics[:mime_counts].keys.each do |mime_type|
63
+ %tr
64
+ %td= mime_type
65
+ %td= @statistics[:mime_counts][mime_type]
66
+
67
+ %table
68
+ - @statistics[:status_counts].keys.each do |status|
69
+ %tr
70
+ %td= status
71
+ %td= @statistics[:status_counts][status]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cobweb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.17
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-10 00:00:00.000000000 Z
12
+ date: 2012-03-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: resque
16
- requirement: &70125097329480 !ruby/object:Gem::Requirement
16
+ requirement: &70173021660540 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70125097329480
24
+ version_requirements: *70173021660540
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: redis
27
- requirement: &70125097328760 !ruby/object:Gem::Requirement
27
+ requirement: &70173021659920 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70125097328760
35
+ version_requirements: *70173021659920
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: absolutize
38
- requirement: &70125097328280 !ruby/object:Gem::Requirement
38
+ requirement: &70173021659420 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70125097328280
46
+ version_requirements: *70173021659420
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: nokogiri
49
- requirement: &70125097327660 !ruby/object:Gem::Requirement
49
+ requirement: &70173021658760 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70125097327660
57
+ version_requirements: *70173021658760
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: addressable
60
- requirement: &70125097327060 !ruby/object:Gem::Requirement
60
+ requirement: &70173021658080 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70125097327060
68
+ version_requirements: *70173021658080
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rspec
71
- requirement: &70125097326400 !ruby/object:Gem::Requirement
71
+ requirement: &70173021657320 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,40 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70125097326400
79
+ version_requirements: *70173021657320
80
+ - !ruby/object:Gem::Dependency
81
+ name: awesome_print
82
+ requirement: &70173021654680 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ type: :runtime
89
+ prerelease: false
90
+ version_requirements: *70173021654680
91
+ - !ruby/object:Gem::Dependency
92
+ name: sinatra
93
+ requirement: &70173021654060 !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ type: :runtime
100
+ prerelease: false
101
+ version_requirements: *70173021654060
102
+ - !ruby/object:Gem::Dependency
103
+ name: thin
104
+ requirement: &70173021653560 !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ type: :runtime
111
+ prerelease: false
112
+ version_requirements: *70173021653560
80
113
  description:
81
114
  email: stewart@rockwellcottage.com
82
115
  executables: []
@@ -96,7 +129,11 @@ files:
96
129
  - lib/cobweb_process_job.rb
97
130
  - lib/content_link_parser.rb
98
131
  - lib/crawl_job.rb
132
+ - lib/hash.rb
99
133
  - lib/namespaced_redis.rb
134
+ - lib/redirect_error.rb
135
+ - lib/stats.rb
136
+ - views/statistics.haml
100
137
  - README.textile
101
138
  homepage: http://github.com/stewartmckee/cobweb
102
139
  licenses: []
@@ -118,7 +155,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
155
  version: '0'
119
156
  requirements: []
120
157
  rubyforge_project:
121
- rubygems_version: 1.8.11
158
+ rubygems_version: 1.8.10
122
159
  signing_key:
123
160
  specification_version: 3
124
161
  summary: Web Crawler that uses resque background job engine to allow you to cluster