cobweb 0.0.13 → 0.0.17

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,5 @@
1
1
  require 'rubygems'
2
+ require 'bundler/setup'
2
3
  require 'uri'
3
4
  require 'resque'
4
5
  require "addressable/uri"
@@ -9,7 +10,7 @@ Dir[File.dirname(__FILE__) + '/*.rb'].each do |file|
9
10
  require [File.dirname(__FILE__), File.basename(file, File.extname(file))].join("/")
10
11
  end
11
12
 
12
- class CobWeb
13
+ class Cobweb
13
14
 
14
15
  ## TASKS
15
16
 
@@ -65,7 +66,7 @@ class CobWeb
65
66
  if redis.get(unique_id) and @options[:cache]
66
67
  puts "Cache hit for #{url}" unless @options[:quiet]
67
68
  content = JSON.parse(redis.get(unique_id)).deep_symbolize_keys
68
- content[:body] = Base64.decode64(content[:body]) unless content[:body].nil? or content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml")
69
+ content[:body] = Base64.decode64(content[:body])
69
70
 
70
71
  content
71
72
  else
@@ -93,13 +94,25 @@ class CobWeb
93
94
 
94
95
  if @options[:follow_redirects] and response.code.to_i >= 300 and response.code.to_i < 400
95
96
  puts "redirected... " unless @options[:quiet]
97
+
98
+ # get location to redirect to
96
99
  url = absolutize.url(response['location']).to_s
100
+
101
+ # decrement redirect limit
97
102
  redirect_limit = redirect_limit - 1
103
+
104
+ # raise exception if we're being redirected to somewhere we've been redirected to in this content request
105
+ #raise RedirectError("Loop detected in redirect for - #{url}") if content[:redirect_through].include? url
106
+
107
+ # raise exception if redirect limit has reached 0
108
+ raise RedirectError, "Redirect Limit reached" if redirect_limit == 0
109
+
110
+ # get the content from redirect location
98
111
  content = get(url, redirect_limit)
99
112
  content[:url] = uri.to_s
100
113
  content[:redirect_through] = [] if content[:redirect_through].nil?
101
114
  content[:redirect_through].insert(0, url)
102
-
115
+
103
116
  content[:response_time] = Time.now.to_f - request_time
104
117
  else
105
118
  content[:response_time] = Time.now.to_f - request_time
@@ -109,8 +122,9 @@ class CobWeb
109
122
  # create the content container
110
123
  content[:url] = uri.to_s
111
124
  content[:status_code] = response.code.to_i
112
- content[:mime_type] = response.content_type.split(";")[0].strip
113
- if response["Content-Type"].include? ";"
125
+ content[:mime_type] = ""
126
+ content[:mime_type] = response.content_type.split(";")[0].strip unless response.content_type.nil?
127
+ if !response["Content-Type"].nil? && response["Content-Type"].include?(";")
114
128
  charset = response["Content-Type"][response["Content-Type"].index(";")+2..-1] if !response["Content-Type"].nil? and response["Content-Type"].include?(";")
115
129
  charset = charset[charset.index("=")+1..-1] if charset and charset.include?("=")
116
130
  content[:character_set] = charset
@@ -130,10 +144,25 @@ class CobWeb
130
144
  end
131
145
  # add content to cache if required
132
146
  if @options[:cache]
133
- content[:body] = Base64.encode64(content[:body]) unless content[:body].nil? or content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml")
147
+ content[:body] = Base64.encode64(content[:body])
134
148
  redis.set(unique_id, content.to_json)
135
149
  redis.expire unique_id, @options[:cache].to_i
136
150
  end
151
+ rescue RedirectError => e
152
+ puts "ERROR: #{e.message}"
153
+
154
+ ## generate a blank content
155
+ content = {}
156
+ content[:url] = uri.to_s
157
+ content[:response_time] = Time.now.to_f - request_time
158
+ content[:status_code] = 0
159
+ content[:length] = 0
160
+ content[:body] = ""
161
+ content[:error] = e.message
162
+ content[:mime_type] = "error/dnslookup"
163
+ content[:headers] = {}
164
+ content[:links] = {}
165
+
137
166
  rescue SocketError => e
138
167
  puts "ERROR: #{e.message}"
139
168
 
@@ -273,25 +302,7 @@ class CobWeb
273
302
  end
274
303
  end
275
304
 
276
- ## add symbolize methods to hash
277
- class Hash
278
- def symbolize_keys
279
- keys.each do |key|
280
- if key.instance_of? String
281
- value = self[key]
282
- self.delete(key)
283
- self[key.to_sym] = value
284
- end
285
- end
286
- self
287
- end
288
- def deep_symbolize_keys
289
- symbolize_keys
290
- keys.each do |key|
291
- if self[key].instance_of? Hash
292
- self[key].deep_symbolize_keys
293
- end
294
- end
295
- self
296
- end
297
- end
305
+
306
+
307
+
308
+
@@ -1,13 +1,20 @@
1
+ require 'digest/md5'
2
+ require 'date'
3
+ require 'ap'
4
+
1
5
  class CobwebCrawler
2
6
 
3
7
  def initialize(options={})
4
8
  @options = options
5
9
 
6
10
  @statistic = {}
7
- @queue = []
8
- @crawled = []
9
11
 
10
- @cobweb = CobWeb.new(@options)
12
+ @options[:redis_options] = {:host => "127.0.0.1"} unless @options.has_key? :redis_options
13
+ crawl_id = Digest::MD5.hexdigest(DateTime.now.inspect.to_s)
14
+
15
+ @redis = NamespacedRedis.new(Redis.new(@options[:redis_options]), "cobweb-#{crawl_id}")
16
+
17
+ @cobweb = Cobweb.new(@options)
11
18
  end
12
19
 
13
20
  def crawl(base_url, crawl_options = {}, &block)
@@ -17,104 +24,122 @@ class CobwebCrawler
17
24
 
18
25
  @absolutize = Absolutize.new(@options[:base_url], :output_debug => false, :raise_exceptions => false, :force_escaping => false, :remove_anchors => true)
19
26
 
20
- crawl_counter = @crawled.count
21
-
22
- @queue << base_url
23
-
24
- while !@queue.empty? && (@options[:crawl_limit].to_i == 0 || @options[:crawl_limit].to_i > crawl_counter)
25
-
26
- url = @queue.first
27
- @options[:url] = url
28
- unless @crawled.include?(url) || url =~ /\/(.+?)\/\1\/\1/
29
- begin
30
- content = @cobweb.get(@options[:url])
27
+ @redis.sadd "queued", base_url
28
+ crawl_counter = @redis.scard("crawled").to_i
29
+ queue_counter = @redis.scard("queued").to_i
31
30
 
32
- if @statistic[:average_response_time].nil?
33
- @statistic[:average_response_time] = content[:response_time].to_f
34
- else
35
- @statistic[:average_response_time] = (((@statistic[:average_response_time] * crawl_counter) + content[:response_time].to_f) / (crawl_counter + 1))
36
- end
31
+ while queue_counter>0 && (@options[:crawl_limit].to_i == 0 || @options[:crawl_limit].to_i > crawl_counter)
32
+ thread = Thread.new do
33
+
34
+ url = @redis.spop "queued"
35
+ crawl_counter = @redis.scard("crawled").to_i
36
+ queue_counter = @redis.scard("queued").to_i
37
+
38
+ @options[:url] = url
39
+ unless @redis.sismember("crawled", url.to_s)
40
+ begin
41
+ Stats.update_status("Requesting #{url}...")
42
+ content = @cobweb.get(url)
43
+ Stats.update_status("Processing #{url}...")
44
+
45
+ if @statistic[:average_response_time].nil?
46
+ @statistic[:average_response_time] = content[:response_time].to_f
47
+ else
48
+ @statistic[:average_response_time] = (((@statistic[:average_response_time] * crawl_counter) + content[:response_time].to_f) / (crawl_counter + 1))
49
+ end
37
50
 
38
- @statistic[:maximum_response_time] = content[:response_time] if @statistic[:maximum_response_time].nil? || @statistic[:maximum_response_time] < content[:response_time]
39
- @statistic[:minimum_response_time] = content[:response_time] if @statistic[:minimum_response_time].nil? || @statistic[:minimum_response_time] > content[:response_time]
51
+ @statistic[:maximum_response_time] = content[:response_time] if @statistic[:maximum_response_time].nil? || @statistic[:maximum_response_time] < content[:response_time]
52
+ @statistic[:minimum_response_time] = content[:response_time] if @statistic[:minimum_response_time].nil? || @statistic[:minimum_response_time] > content[:response_time]
40
53
 
41
- if @statistic[:average_length]
42
- @statistic[:average_length] = (((@statistic[:average_length].to_i*crawl_counter) + content[:length].to_i) / (crawl_counter + 1))
43
- else
44
- @statistic[:average_length] = content[:length].to_i
45
- end
54
+ if @statistic[:average_length]
55
+ @statistic[:average_length] = (((@statistic[:average_length].to_i*crawl_counter) + content[:length].to_i) / (crawl_counter + 1))
56
+ else
57
+ @statistic[:average_length] = content[:length].to_i
58
+ end
46
59
 
47
- @statistic[:maximum_length] = content[:length].to_i if @statistic[:maximum_length].nil? || content[:length].to_i > @statistic[:maximum_length].to_i
48
- @statistic[:minimum_length] = content[:length].to_i if @statistic[:minimum_length].nil? || content[:length].to_i < @statistic[:minimum_length].to_i
49
- @statistic[:total_length] = @statistic[:total_length].to_i + content[:length].to_i
50
-
51
- if content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml")
52
- @statistic[:page_count] = @statistic[:page_count].to_i + 1
53
- @statistic[:page_size] = @statistic[:page_count].to_i + content[:length].to_i
54
- else
55
- @statistic[:asset_count] = @statistic[:asset_count].to_i + 1
56
- @statistic[:asset_size] = @statistic[:asset_count].to_i + content[:length].to_i
57
- end
60
+ @statistic[:maximum_length] = content[:length].to_i if @statistic[:maximum_length].nil? || content[:length].to_i > @statistic[:maximum_length].to_i
61
+ @statistic[:minimum_length] = content[:length].to_i if @statistic[:minimum_length].nil? || content[:length].to_i < @statistic[:minimum_length].to_i
62
+ @statistic[:total_length] = @statistic[:total_length].to_i + content[:length].to_i
58
63
 
59
- mime_counts = {}
60
- if @statistic.has_key? :mime_counts
61
- mime_counts = @statistic[:mime_counts]
62
- if mime_counts.has_key? content[:mime_type]
63
- mime_counts[content[:mime_type]] += 1
64
+ if content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml")
65
+ @statistic[:page_count] = @statistic[:page_count].to_i + 1
66
+ @statistic[:page_size] = @statistic[:page_size].to_i + content[:length].to_i
64
67
  else
65
- mime_counts[content[:mime_type]] = 1
68
+ @statistic[:asset_count] = @statistic[:asset_count].to_i + 1
69
+ @statistic[:asset_size] = @statistic[:asset_size].to_i + content[:length].to_i
66
70
  end
67
- else
68
- mime_counts = {content[:mime_type] => 1}
69
- end
70
- @statistic[:mime_counts] = mime_counts
71
+
72
+ @statistic[:total_redirects] = 0 if @statistic[:total_redirects].nil?
73
+ @statistic[:total_redirects] += content[:redirect_through].count unless content[:redirect_through].nil?
74
+
75
+ @statistic[:crawl_counter] = crawl_counter
76
+ @statistic[:queue_counter] = queue_counter
77
+
78
+ mime_counts = {}
79
+ if @statistic.has_key? :mime_counts
80
+ mime_counts = @statistic[:mime_counts]
81
+ if mime_counts.has_key? content[:mime_type]
82
+ mime_counts[content[:mime_type]] += 1
83
+ else
84
+ mime_counts[content[:mime_type]] = 1
85
+ end
86
+ else
87
+ mime_counts = {content[:mime_type] => 1}
88
+ end
89
+ @statistic[:mime_counts] = mime_counts
71
90
 
72
- status_counts = {}
91
+ status_counts = {}
73
92
 
74
- if @statistic.has_key? :status_counts
75
- status_counts = @statistic[:status_counts]
76
- if status_counts.has_key? content[:status_code].to_i
77
- status_counts[content[:status_code].to_i] += 1
93
+ if @statistic.has_key? :status_counts
94
+ status_counts = @statistic[:status_counts]
95
+ if status_counts.has_key? content[:status_code].to_i
96
+ status_counts[content[:status_code].to_i] += 1
97
+ else
98
+ status_counts[content[:status_code].to_i] = 1
99
+ end
78
100
  else
79
- status_counts[content[:status_code].to_i] = 1
101
+ status_counts = {content[:status_code].to_i => 1}
80
102
  end
81
- else
82
- status_counts = {content[:status_code].to_i => 1}
83
- end
84
- @statistic[:status_counts] = status_counts
103
+ @statistic[:status_counts] = status_counts
85
104
 
86
- @crawled << url
87
- crawl_counter += 1
88
- @queue.delete(url)
89
- content[:links].keys.map{|key| content[:links][key]}.flatten.each do |link|
90
- unless @crawled.include? link
91
- puts "Checking if #{link} matches #{@options[:base_url]} as internal?" if @options[:debug]
92
- if link.to_s.match(Regexp.new("^#{@options[:base_url]}"))
93
- puts "Matched as #{link} as internal" if @options[:debug]
94
- unless @crawled.include? link.to_s or @queue.include? link.to_s
95
- puts "Added #{link.to_s} to queue" if @options[:debug]
96
- @queue << link.to_s
105
+ @redis.sadd "crawled", url.to_s
106
+ @redis.incr "crawl-counter"
107
+
108
+ content[:links].keys.map{|key| content[:links][key]}.flatten.each do |content_link|
109
+ link = content_link.to_s
110
+ unless @redis.sismember("crawled", link)
111
+ puts "Checking if #{link} matches #{@options[:base_url]} as internal?" if @options[:debug]
112
+ if link.to_s.match(Regexp.new("^#{@options[:base_url]}"))
113
+ puts "Matched as #{link} as internal" if @options[:debug]
114
+ unless @redis.sismember("crawled", link) || @redis.sismember("queued", link)
115
+ puts "Added #{link.to_s} to queue" if @options[:debug]
116
+ @redis.sadd "queued", link
117
+ crawl_counter = @redis.scard("crawled").to_i
118
+ queue_counter = @redis.scard("queued").to_i
119
+ end
97
120
  end
98
121
  end
99
122
  end
100
- end
101
- @queue.uniq!
102
123
 
103
- puts "Crawled: #{crawl_counter} Limit: #{@options[:crawl_limit]} Queued: #{@queue.count}" if @options[:debug]
104
-
105
- yield content if block_given?
124
+ crawl_counter = @redis.scard("crawled").to_i
125
+ queue_counter = @redis.scard("queued").to_i
126
+ Stats.update_statistics(@statistic)
127
+ Stats.update_status("Completed #{url}.")
128
+ puts "Crawled: #{crawl_counter.to_i} Limit: #{@options[:crawl_limit].to_i} Queued: #{queue_counter.to_i}" if @options[:debug]
129
+
130
+ yield content, @statistic if block_given?
106
131
 
107
- rescue => e
108
- puts "!!!!!!!!!!!! ERROR !!!!!!!!!!!!!!!!"
109
- ap e
110
- @queue.delete(url)
111
-
132
+ rescue => e
133
+ puts "!!!!!!!!!!!! ERROR !!!!!!!!!!!!!!!!"
134
+ ap e
135
+ ap e.backtrace
136
+ end
137
+ else
138
+ puts "Already crawled #{@options[:url]}" if @options[:debug]
112
139
  end
113
- else
114
- puts "Already crawled #{@options[:url]}" if @options[:debug]
115
140
  end
141
+ thread.join
116
142
  end
117
143
  @statistic
118
144
  end
119
-
120
- end
145
+ end
@@ -40,7 +40,7 @@ class CrawlJob
40
40
  redis.incr "crawl-counter"
41
41
  crawl_counter += 1
42
42
  if crawl_counter <= content_request[:crawl_limit].to_i
43
- content = CobWeb.new(content_request).get(content_request[:url])
43
+ content = Cobweb.new(content_request).get(content_request[:url])
44
44
 
45
45
  ## update statistics
46
46
  if redis.hexists "statistics", "average_response_time"
@@ -0,0 +1,22 @@
1
+ ## add symbolize methods to hash
2
+ class Hash
3
+ def symbolize_keys
4
+ keys.each do |key|
5
+ if key.instance_of? String
6
+ value = self[key]
7
+ self.delete(key)
8
+ self[key.to_sym] = value
9
+ end
10
+ end
11
+ self
12
+ end
13
+ def deep_symbolize_keys
14
+ symbolize_keys
15
+ keys.each do |key|
16
+ if self[key].instance_of? Hash
17
+ self[key].deep_symbolize_keys
18
+ end
19
+ end
20
+ self
21
+ end
22
+ end
@@ -17,8 +17,16 @@ class NamespacedRedis
17
17
  @redis.srem namespaced(key), member
18
18
  end
19
19
 
20
+ def spop(key)
21
+ @redis.spop namespaced(key)
22
+ end
23
+
20
24
  def smembers(key)
21
25
  @redis.smembers namespaced(key)
26
+ end
27
+
28
+ def scard(key)
29
+ @redis.scard namespaced(key)
22
30
  end
23
31
 
24
32
  def get(key)
@@ -29,6 +37,10 @@ class NamespacedRedis
29
37
  @redis.incr namespaced(key)
30
38
  end
31
39
 
40
+ def decr(key)
41
+ @redis.decr namespaced(key)
42
+ end
43
+
32
44
  def exist(key)
33
45
  @redis.exist namespaced(key)
34
46
  end
@@ -0,0 +1,2 @@
1
+ class RedirectError < Exception
2
+ end
@@ -0,0 +1,32 @@
1
+ require 'bundler/setup'
2
+ require 'sinatra'
3
+ require 'haml'
4
+
5
+ class Stats < Sinatra::Base
6
+
7
+ def self.update_statistics(statistics)
8
+ @@statistics = statistics
9
+ end
10
+
11
+ def self.update_status(status)
12
+ @@status = status
13
+ end
14
+
15
+ ap settings.root
16
+ set :views, settings.root + '/../views'
17
+
18
+ get '/' do
19
+ @statistics = @@statistics
20
+ @status = @@status
21
+ haml :statistics
22
+ end
23
+
24
+ end
25
+
26
+ thread = Thread.new do
27
+ Stats.run!
28
+
29
+ ## we need to manually kill the main thread as sinatra traps the interrupts
30
+ Thread.main.kill
31
+ end
32
+
@@ -31,7 +31,7 @@ describe CobwebCrawler do
31
31
 
32
32
  crawler = CobwebCrawler.new({:cache => false, :quiet => false, :debug => false})
33
33
 
34
- statistics = crawler.crawl("http://www.rockwellcottage.com/")
34
+ statistics = crawler.crawl("http://rockwellcottage.heroku.com/")
35
35
 
36
36
  ap statistics
37
37
 
@@ -43,8 +43,9 @@ describe CobwebCrawler do
43
43
 
44
44
  crawler = CobwebCrawler.new({:cache => false, :quiet => false, :debug => false})
45
45
 
46
- statistics = crawler.crawl("http://www.rockwellcottage.com/") do |content|
46
+ statistics = crawler.crawl("http://www.rockwellcottage.com/") do |content, statistics|
47
47
  ap content[:url]
48
+ ap statistics[:average_length]
48
49
  end
49
50
 
50
51
  ap statistics
@@ -1,6 +1,6 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
2
 
3
- describe CobWeb do
3
+ describe Cobweb do
4
4
 
5
5
  before(:each) do
6
6
 
@@ -15,7 +15,7 @@ describe CobWeb do
15
15
  "Server" => "gws",
16
16
  "X-XSS-Protection" => "1; mode=block"}
17
17
 
18
- @cobweb = CobWeb.new :quiet => true, :cache => nil
18
+ @cobweb = Cobweb.new :quiet => true, :cache => nil
19
19
  end
20
20
 
21
21
  describe "with mock" do
@@ -40,7 +40,9 @@ describe CobWeb do
40
40
  @mock_http_client.stub!(:request).with(@mock_http_redirect_request2).and_return(@mock_http_redirect_response2)
41
41
  @mock_http_client.stub!(:read_timeout=).and_return(nil)
42
42
  @mock_http_client.stub!(:open_timeout=).and_return(nil)
43
- @mock_http_client.stub!(:start).and_return(@mock_http_response)
43
+ @mock_http_client.stub!(:start).and_return(@mock_http_response)
44
+ @mock_http_client.stub!(:address).and_return("www.baseurl.com")
45
+ @mock_http_client.stub!(:port).and_return("80 ")
44
46
 
45
47
  @mock_http_response.stub!(:code).and_return(200)
46
48
  @mock_http_response.stub!(:content_type).and_return("text/html")
@@ -69,7 +71,7 @@ describe CobWeb do
69
71
  end
70
72
 
71
73
  it "should generate a cobweb object" do
72
- CobWeb.new.should be_an_instance_of CobWeb
74
+ Cobweb.new.should be_an_instance_of Cobweb
73
75
  end
74
76
 
75
77
  describe "get" do
@@ -130,7 +132,7 @@ describe CobWeb do
130
132
 
131
133
  before(:each) do
132
134
  @base_url = "http://redirect-me.com/redirect.html"
133
- @cobweb = CobWeb.new(:follow_redirects => true, :quiet => true, :cache => nil)
135
+ @cobweb = Cobweb.new(:follow_redirects => true, :quiet => true, :cache => nil)
134
136
  end
135
137
 
136
138
  it "should flow through redirect" #do
@@ -155,8 +157,8 @@ describe CobWeb do
155
157
 
156
158
  #end
157
159
  it "should not follow with redirect disabled" do
158
- @cobweb = CobWeb.new(:follow_redirects => false, :cache => nil)
159
- @mock_http_client.should_receive(:start).and_return(@mock_http_redirect_response)
160
+ @cobweb = Cobweb.new(:follow_redirects => false, :cache => nil)
161
+ @mock_http_client.should_receive(:request).with(@mock_http_redirect_request).and_return(@mock_http_redirect_response)
160
162
 
161
163
  content = @cobweb.get(@base_url)
162
164
  content[:url].should == "http://redirect-me.com/redirect.html"
@@ -0,0 +1,71 @@
1
+ %h1 Cobweb Statistics
2
+
3
+ %h4= @status
4
+
5
+ %table
6
+ %tr
7
+ %th Page Count
8
+ %th Asset Count
9
+ %th Redirect Count
10
+ %tr
11
+ %td= @statistics[:page_count]
12
+ %td= @statistics[:asset_count]
13
+ %td= @statistics[:total_redirects]
14
+
15
+ %table
16
+ %tr
17
+ %th Total Page Size
18
+ %th Total Asset Size
19
+ %tr
20
+ %td= @statistics[:page_size]
21
+ %td= @statistics[:asset_size]
22
+
23
+ %table
24
+ %tr
25
+ %th Crawled
26
+ %th Queued
27
+ %tr
28
+ %td= @statistics[:crawl_counter]
29
+ %td= @statistics[:queue_counter]
30
+
31
+
32
+ %table
33
+ %tr
34
+ %th{:colspan => 2} Response Times
35
+ %tr
36
+ %th Average
37
+ %td= @statistics[:average_response_time]
38
+ %tr
39
+ %th Maximum
40
+ %td= @statistics[:maximum_response_time]
41
+ %tr
42
+ %th Minimum
43
+ %td= @statistics[:minimum_response_time]
44
+
45
+ %table
46
+ %tr
47
+ %th{:colspan => 2} Content Sizes
48
+ %tr
49
+ %th Average
50
+ %td= @statistics[:average_length]
51
+ %tr
52
+ %th Maximum
53
+ %td= @statistics[:maximum_length]
54
+ %tr
55
+ %th Minimum
56
+ %td= @statistics[:minimum_length]
57
+ %tr
58
+ %th Total
59
+ %td= @statistics[:total_length]
60
+
61
+ %table
62
+ - @statistics[:mime_counts].keys.each do |mime_type|
63
+ %tr
64
+ %td= mime_type
65
+ %td= @statistics[:mime_counts][mime_type]
66
+
67
+ %table
68
+ - @statistics[:status_counts].keys.each do |status|
69
+ %tr
70
+ %td= status
71
+ %td= @statistics[:status_counts][status]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cobweb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.17
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-10 00:00:00.000000000 Z
12
+ date: 2012-03-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: resque
16
- requirement: &70125097329480 !ruby/object:Gem::Requirement
16
+ requirement: &70173021660540 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70125097329480
24
+ version_requirements: *70173021660540
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: redis
27
- requirement: &70125097328760 !ruby/object:Gem::Requirement
27
+ requirement: &70173021659920 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70125097328760
35
+ version_requirements: *70173021659920
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: absolutize
38
- requirement: &70125097328280 !ruby/object:Gem::Requirement
38
+ requirement: &70173021659420 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70125097328280
46
+ version_requirements: *70173021659420
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: nokogiri
49
- requirement: &70125097327660 !ruby/object:Gem::Requirement
49
+ requirement: &70173021658760 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70125097327660
57
+ version_requirements: *70173021658760
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: addressable
60
- requirement: &70125097327060 !ruby/object:Gem::Requirement
60
+ requirement: &70173021658080 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70125097327060
68
+ version_requirements: *70173021658080
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rspec
71
- requirement: &70125097326400 !ruby/object:Gem::Requirement
71
+ requirement: &70173021657320 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,7 +76,40 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70125097326400
79
+ version_requirements: *70173021657320
80
+ - !ruby/object:Gem::Dependency
81
+ name: awesome_print
82
+ requirement: &70173021654680 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
88
+ type: :runtime
89
+ prerelease: false
90
+ version_requirements: *70173021654680
91
+ - !ruby/object:Gem::Dependency
92
+ name: sinatra
93
+ requirement: &70173021654060 !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ type: :runtime
100
+ prerelease: false
101
+ version_requirements: *70173021654060
102
+ - !ruby/object:Gem::Dependency
103
+ name: thin
104
+ requirement: &70173021653560 !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ type: :runtime
111
+ prerelease: false
112
+ version_requirements: *70173021653560
80
113
  description:
81
114
  email: stewart@rockwellcottage.com
82
115
  executables: []
@@ -96,7 +129,11 @@ files:
96
129
  - lib/cobweb_process_job.rb
97
130
  - lib/content_link_parser.rb
98
131
  - lib/crawl_job.rb
132
+ - lib/hash.rb
99
133
  - lib/namespaced_redis.rb
134
+ - lib/redirect_error.rb
135
+ - lib/stats.rb
136
+ - views/statistics.haml
100
137
  - README.textile
101
138
  homepage: http://github.com/stewartmckee/cobweb
102
139
  licenses: []
@@ -118,7 +155,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
155
  version: '0'
119
156
  requirements: []
120
157
  rubyforge_project:
121
- rubygems_version: 1.8.11
158
+ rubygems_version: 1.8.10
122
159
  signing_key:
123
160
  specification_version: 3
124
161
  summary: Web Crawler that uses resque background job engine to allow you to cluster