cobweb 0.0.74 → 0.0.75

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- h1. Cobweb v0.0.74
2
+ h1. Cobweb v0.0.75
3
3
 
4
4
  "@cobweb_gem":https://twitter.com/cobweb_gem
5
5
 
@@ -124,7 +124,7 @@ class Cobweb
124
124
  else
125
125
  # retrieve data
126
126
  unless @http && @http.address == uri.host && @http.port == uri.inferred_port
127
- puts "Creating connection to #{uri.host}..." unless @options[:quiet]
127
+ puts "Creating connection to #{uri.host}..." if @options[:debug]
128
128
  @http = Net::HTTP.new(uri.host, uri.inferred_port)
129
129
  end
130
130
  if uri.scheme == "https"
@@ -3,7 +3,7 @@ class CobwebVersion
3
3
 
4
4
  # Returns a string of the current version
5
5
  def self.version
6
- "0.0.74"
6
+ "0.0.75"
7
7
  end
8
8
 
9
9
  end
@@ -13,6 +13,7 @@ class ContentLinkParser
13
13
  base_url = @url.to_s
14
14
  if @doc.at("base[href]")
15
15
  base_url = @doc.at("base[href]").attr("href").to_s
16
+ @url = base_url if base_url
16
17
  end
17
18
 
18
19
  @options[:tags] = {}
@@ -1,26 +1,26 @@
1
1
  module CobwebModule
2
2
  class Crawl
3
-
3
+
4
4
  def initialize(options={})
5
5
  @options = HashUtil.deep_symbolize_keys(options)
6
-
6
+
7
7
  setup_defaults
8
8
  @redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{@options[:crawl_id]}", Redis.new(@options[:redis_options]))
9
9
  @stats = Stats.new(@options)
10
10
  @debug = @options[:debug]
11
11
  @first_to_finish = false
12
-
12
+
13
13
  end
14
-
14
+
15
15
  # Returns true if the url requested is already in the crawled queue
16
16
  def already_crawled?(link=@options[:url])
17
- @redis.sismember "crawled", link
17
+ @redis.sismember "crawled", link
18
18
  end
19
-
19
+
20
20
  def already_queued?(link)
21
21
  @redis.sismember "queued", link
22
22
  end
23
-
23
+
24
24
  # Returns true if the crawl count is within limits
25
25
  def within_crawl_limits?
26
26
  @options[:crawl_limit].nil? || crawl_counter < @options[:crawl_limit].to_i
@@ -33,51 +33,60 @@ module CobwebModule
33
33
 
34
34
  # Returns true if the queue count is calculated to be still within limits when complete
35
35
  def within_queue_limits?
36
-
36
+
37
37
  # if we are limiting by page we can't limit the queue size as we don't know the mime type until retrieved
38
38
  if @options[:crawl_limit_by_page]
39
39
  return true
40
-
41
- # if a crawl limit is set, limit queue size to crawled + queue
40
+
41
+ # if a crawl limit is set, limit queue size to crawled + queue
42
42
  elsif @options[:crawl_limit].to_i > 0
43
43
  (queue_counter + crawl_counter) < @options[:crawl_limit].to_i
44
-
45
- # no crawl limit set so always within queue limit
44
+
45
+ # no crawl limit set so always within queue limit
46
46
  else
47
47
  true
48
48
  end
49
49
  end
50
-
50
+
51
51
  def retrieve
52
- unless already_crawled?
53
- if within_crawl_limits?
54
- @stats.update_status("Retrieving #{@options[:url]}...")
55
- @content = Cobweb.new(@options).get(@options[:url], @options)
56
- if @options[:url] == @redis.get("original_base_url")
57
- @redis.set("crawled_base_url", @content[:base_url])
58
- end
59
- update_queues
60
-
61
- if content.permitted_type?
62
- ## update statistics
63
-
64
- @stats.update_statistics(@content)
65
- return true
52
+ lock("retrieve") do
53
+ unless @redis.sismember("currently_running", @options[:url])
54
+ @redis.sadd("currently_running", @options[:url])
55
+ unless already_crawled?
56
+ if within_crawl_limits?
57
+ @stats.update_status("Retrieving #{@options[:url]}...")
58
+ @content = Cobweb.new(@options).get(@options[:url], @options)
59
+ if @options[:url] == @redis.get("original_base_url")
60
+ @redis.set("crawled_base_url", @content[:base_url])
61
+ end
62
+ update_queues
63
+
64
+ if content.permitted_type?
65
+ ## update statistics
66
+
67
+ @stats.update_statistics(@content)
68
+ return true
69
+ end
70
+ else
71
+ decrement_queue_counter
72
+ end
73
+ else
74
+ decrement_queue_counter
66
75
  end
67
76
  else
77
+ debug_puts "\n\nDETECTED DUPLICATE JOB for #{@options[:url]}\n"
78
+ debug_ap @redis.smembers("currently_running")
68
79
  decrement_queue_counter
69
80
  end
70
- else
71
- decrement_queue_counter
81
+ false
72
82
  end
73
- false
74
83
  end
75
-
84
+
76
85
  def process_links &block
77
-
86
+
78
87
  # set the base url if this is the first page
79
88
  set_base_url @redis
80
-
89
+
81
90
  @cobweb_links = CobwebLinks.new(@options)
82
91
  if within_queue_limits?
83
92
  internal_links = ContentLinkParser.new(@options[:url], content.body, @options).all_links(:valid_schemes => [:http, :https])
@@ -99,20 +108,20 @@ module CobwebModule
99
108
  increment_queue_counter
100
109
  end
101
110
  else
102
- puts "Cannot enqueue new content as crawl has been cancelled." if @options[:debug]
111
+ debug_puts "Cannot enqueue new content as crawl has been cancelled."
103
112
  end
104
113
  end
105
114
  end
106
115
  end
107
116
  end
108
-
117
+
109
118
  def content
110
119
  raise "Content is not available" if @content.nil?
111
- CobwebModule::CrawlObject.new(@content, @options)
120
+ CobwebModule::CrawlObject.new(@content, @options)
112
121
  end
113
-
122
+
114
123
  def update_queues
115
- @redis.multi do
124
+ lock("update_queues") do
116
125
  #@redis.incr "inprogress"
117
126
  # move the url from the queued list to the crawled list - for both the original url, and the content url (to handle redirects)
118
127
  @redis.srem "queued", @options[:url]
@@ -123,7 +132,6 @@ module CobwebModule
123
132
  end
124
133
  # increment the counter if we are not limiting by page only || we are limiting count by page and it is a page
125
134
  if @options[:crawl_limit_by_page]
126
- ap "#{content.mime_type} - #{content.url}"
127
135
  if content.mime_type.match("text/html")
128
136
  increment_crawl_counter
129
137
  end
@@ -133,12 +141,12 @@ module CobwebModule
133
141
  decrement_queue_counter
134
142
  end
135
143
  end
136
-
144
+
137
145
  def to_be_processed?
138
- !finished? || first_to_finish? || within_process_limits?
146
+ (!finished? || within_process_limits?) && !@redis.sismember("enqueued", @options[:url])
139
147
  end
140
-
141
- def process
148
+
149
+ def process(&block)
142
150
  if @options[:crawl_limit_by_page]
143
151
  if content.mime_type.match("text/html")
144
152
  increment_process_counter
@@ -146,34 +154,42 @@ module CobwebModule
146
154
  else
147
155
  increment_process_counter
148
156
  end
157
+ @redis.sadd "enqueued", @options[:url]
158
+
159
+ yield if block_given?
160
+ @redis.incr("crawl_job_enqueued_count")
149
161
  end
150
-
162
+
163
+ def finished_processing
164
+ @redis.srem "currently_running", @options[:url]
165
+ end
166
+
151
167
  def finished?
152
168
  print_counters
153
- # if there's nothing left queued or the crawled limit has been reached
169
+ # if there's nothing left queued or the crawled limit has been reached and we're not still processing something
154
170
  if @options[:crawl_limit].nil? || @options[:crawl_limit] == 0
155
- if queue_counter.to_i == 0
171
+ if queue_counter == 0 && @redis.smembers("currently_running").empty?
156
172
  finished
157
173
  return true
158
174
  end
159
- elsif (queue_counter.to_i) == 0 || crawl_counter.to_i >= @options[:crawl_limit].to_i
175
+ elsif (queue_counter == 0 && @redis.smembers("currently_running").empty?) || process_counter >= @options[:crawl_limit].to_i
160
176
  finished
161
177
  return true
162
178
  end
163
179
  false
164
180
  end
165
-
181
+
166
182
  def finished
167
- set_first_to_finish if !@redis.exists("first_to_finish")
168
- ap "CRAWL FINISHED #{@options[:url]}, #{counters}, #{@redis.get("original_base_url")}, #{@redis.get("crawled_base_url")}" if @options[:debug]
183
+ set_first_to_finish
184
+ debug_ap "CRAWL FINISHED #{@options[:url]}, #{counters}, #{@redis.get("original_base_url")}, #{@redis.get("crawled_base_url")}"
169
185
  @stats.end_crawl(@options)
170
186
  end
171
-
187
+
172
188
  def set_first_to_finish
173
189
  @redis.watch("first_to_finish") do
174
190
  if !@redis.exists("first_to_finish")
175
191
  @redis.multi do
176
- puts "set first to finish"
192
+ debug_puts "set first to finish"
177
193
  @first_to_finish = true
178
194
  @redis.set("first_to_finish", 1)
179
195
  end
@@ -182,23 +198,51 @@ module CobwebModule
182
198
  end
183
199
  end
184
200
  end
185
-
186
-
187
- def first_to_finish?
201
+
202
+
203
+ def first_to_finish?
188
204
  @first_to_finish
189
205
  end
190
206
 
191
207
  def crawled_base_url
192
208
  @redis.get("crawled_base_url")
193
209
  end
194
-
210
+
195
211
  def statistics
196
212
  @stats.get_statistics
197
213
  end
198
-
214
+
199
215
  def redis
200
216
  @redis
201
217
  end
218
+
219
+ def lock(key, &block)
220
+ debug_puts "REQUESTING LOCK [#{key}]"
221
+ set_nx = @redis.setnx("#{key}_lock", "locked")
222
+ debug_puts "LOCK:#{key}:#{set_nx}"
223
+ while !set_nx
224
+ debug_puts "===== WAITING FOR LOCK [#{key}] ====="
225
+ sleep 0.01
226
+ set_nx = @redis.setnx("#{key}_lock", "locked")
227
+ end
228
+
229
+ debug_puts "RECEIVED LOCK [#{key}]"
230
+ begin
231
+ result = yield
232
+ ensure
233
+ @redis.del("#{key}_lock")
234
+ debug_puts "LOCK RELEASED [#{key}]"
235
+ end
236
+ result
237
+ end
238
+
239
+ def debug_ap(value)
240
+ ap(value) if @options[:debug]
241
+ end
242
+
243
+ def debug_puts(value)
244
+ puts(value) if @options[:debug]
245
+ end
202
246
 
203
247
  private
204
248
  def setup_defaults
@@ -206,7 +250,7 @@ module CobwebModule
206
250
  @options[:crawl_limit_by_page] = false unless @options.has_key? :crawl_limit_by_page
207
251
  @options[:valid_mime_types] = ["*/*"] unless @options.has_key? :valid_mime_types
208
252
  end
209
-
253
+
210
254
  # Increments the queue counter and refreshes crawl counters
211
255
  def increment_queue_counter
212
256
  @redis.incr "queue-counter"
@@ -223,7 +267,7 @@ module CobwebModule
223
267
  def decrement_queue_counter
224
268
  @redis.decr "queue-counter"
225
269
  end
226
-
270
+
227
271
  def crawl_counter
228
272
  @redis.get("crawl-counter").to_i
229
273
  end
@@ -233,19 +277,19 @@ module CobwebModule
233
277
  def process_counter
234
278
  @redis.get("process-counter").to_i
235
279
  end
236
-
280
+
237
281
  def status
238
282
  @stats.get_status
239
283
  end
240
-
284
+
241
285
  def print_counters
242
- puts counters
286
+ debug_puts counters
243
287
  end
244
-
288
+
245
289
  def counters
246
- "crawl_counter: #{crawl_counter} queue_counter: #{queue_counter} process_counter: #{process_counter} crawl_limit: #{@options[:crawl_limit]}"
290
+ "crawl_counter: #{crawl_counter} queue_counter: #{queue_counter} process_counter: #{process_counter} crawl_limit: #{@options[:crawl_limit]} currently_running: #{@redis.smembers("currently_running").count}"
247
291
  end
248
-
292
+
249
293
  # Sets the base url in redis. If the first page is a redirect, it sets the base_url to the destination
250
294
  def set_base_url(redis)
251
295
  if redis.get("base_url").nil?
@@ -257,7 +301,5 @@ module CobwebModule
257
301
  end
258
302
  end
259
303
 
260
-
261
-
262
304
  end
263
- end
305
+ end
@@ -24,40 +24,49 @@ class CrawlJob
24
24
  @crawl.process_links do |link|
25
25
 
26
26
  # enqueue the links to resque
27
- puts "ENQUEUED LINK: #{link}"
27
+ @crawl.debug_puts "ENQUEUED LINK: #{link}"
28
28
  enqueue_content(content_request, link)
29
29
 
30
30
  end
31
31
 
32
-
33
- if @crawl.to_be_processed?
34
- @crawl.process
35
-
36
- # enqueue to processing queue
37
- @crawl.redis.incr("crawl_job_enqueued_count")
38
- puts "ENQUEUED [#{@crawl.redis.get("crawl_job_enqueued_count")}] URL: #{@crawl.content.url}"
39
- send_to_processing_queue(@crawl.content.to_hash, content_request)
40
-
41
-
42
- #if the enqueue counter has been requested update that
43
- if content_request.has_key?(:enqueue_counter_key)
44
- enqueue_redis = Redis::Namespace.new(content_request[:enqueue_counter_namespace].to_s, :redis => Redis.new(content_request[:redis_options]))
45
- current_count = enqueue_redis.hget(content_request[:enqueue_counter_key], content_request[:enqueue_counter_field]).to_i
46
- enqueue_redis.hset(content_request[:enqueue_counter_key], content_request[:enqueue_counter_field], current_count+1)
32
+ @crawl.lock("crawl_job_process") do
33
+ if @crawl.to_be_processed?
34
+
35
+ @crawl.process do
36
+
37
+ # enqueue to processing queue
38
+ @crawl.debug_puts "ENQUEUED [#{@crawl.redis.get("crawl_job_enqueued_count")}] URL: #{@crawl.content.url}"
39
+ send_to_processing_queue(@crawl.content.to_hash, content_request)
40
+
41
+ #if the enqueue counter has been requested update that
42
+ if content_request.has_key?(:enqueue_counter_key)
43
+ enqueue_redis = Redis::Namespace.new(content_request[:enqueue_counter_namespace].to_s, :redis => Redis.new(content_request[:redis_options]))
44
+ current_count = enqueue_redis.hget(content_request[:enqueue_counter_key], content_request[:enqueue_counter_field]).to_i
45
+ enqueue_redis.hset(content_request[:enqueue_counter_key], content_request[:enqueue_counter_field], current_count+1)
46
+ end
47
+
48
+ end
49
+ else
50
+ @crawl.debug_puts "@crawl.finished? #{@crawl.finished?}"
51
+ @crawl.debug_puts "@crawl.within_crawl_limits? #{@crawl.within_crawl_limits?}"
52
+ @crawl.debug_puts "@crawl.first_to_finish? #{@crawl.first_to_finish?}"
47
53
  end
48
- else
49
- ap "@crawl.finished? #{@crawl.finished?}"
50
- ap "@crawl.within_crawl_limits? #{@crawl.within_crawl_limits?}"
51
- ap "@crawl.first_to_finish? #{@crawl.first_to_finish?}"
54
+
52
55
  end
53
56
  end
54
57
  end
58
+
59
+ @crawl.lock("finished") do
60
+ # let the crawl know we're finished with this object
61
+ @crawl.finished_processing
55
62
 
56
- # test queue and crawl sizes to see if we have completed the crawl
57
- ap "finished? #{@crawl.finished?}"
58
- ap "first_to_finish? #{@crawl.first_to_finish?}" if @crawl.finished?
59
- if @crawl.finished? && @crawl.first_to_finish?
60
- finished(content_request)
63
+ # test queue and crawl sizes to see if we have completed the crawl
64
+ @crawl.debug_puts "finished? #{@crawl.finished?}"
65
+ @crawl.debug_puts "first_to_finish? #{@crawl.first_to_finish?}" if @crawl.finished?
66
+ if @crawl.finished? && @crawl.first_to_finish?
67
+ @crawl.debug_puts "Calling crawl_job finished"
68
+ finished(content_request)
69
+ end
61
70
  end
62
71
 
63
72
  end
@@ -68,6 +77,7 @@ class CrawlJob
68
77
  additional_stats[:redis_options] = content_request[:redis_options] unless content_request[:redis_options] == {}
69
78
  additional_stats[:source_id] = content_request[:source_id] unless content_request[:source_id].nil?
70
79
 
80
+ @crawl.debug_puts "increment crawl_finished_enqueued_count"
71
81
  @crawl.redis.incr("crawl_finished_enqueued_count")
72
82
  Resque.enqueue(const_get(content_request[:crawl_finished_queue]), @crawl.statistics.merge(additional_stats))
73
83
  end
@@ -85,7 +95,7 @@ class CrawlJob
85
95
  else
86
96
  Resque.enqueue(const_get(content_request[:processing_queue]), content_to_send)
87
97
  end
88
- puts "#{content_request[:url]} has been sent for processing. use_encoding_safe_process_job: #{content_request[:use_encoding_safe_process_job]}" if content_request[:debug]
98
+ @crawl.debug_puts "#{content_request[:url]} has been sent for processing. use_encoding_safe_process_job: #{content_request[:use_encoding_safe_process_job]}"
89
99
  end
90
100
 
91
101
  private
@@ -9,7 +9,7 @@ describe Cobweb, :local_only => true do
9
9
  # START WORKERS ONLY FOR CRAWL QUEUE SO WE CAN COUNT ENQUEUED PROCESS AND FINISH QUEUES
10
10
  puts "Starting Workers... Please Wait..."
11
11
  `mkdir log`
12
- io = IO.popen("nohup rake resque:workers PIDFILE=./tmp/pids/resque.pid COUNT=3 QUEUE=cobweb_crawl_job > log/output.log &")
12
+ io = IO.popen("nohup rake resque:workers PIDFILE=./tmp/pids/resque.pid COUNT=10 QUEUE=cobweb_crawl_job > log/output.log &")
13
13
  puts "Workers Started."
14
14
 
15
15
  end
@@ -42,16 +42,16 @@ describe Cobweb, :local_only => true do
42
42
  @redis.get("crawl_job_enqueued_count").to_i.should == 0
43
43
  end
44
44
 
45
- it "should not complete the crawl when cancelled" do
46
- crawl = @cobweb.start(@base_url)
47
- crawl_obj = CobwebCrawlHelper.new(crawl)
48
- sleep 6
49
- crawl_obj.destroy
50
- @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
51
- wait_for_crawl_finished crawl[:crawl_id]
52
- @redis.get("crawl_job_enqueued_count").to_i.should > 0
53
- @redis.get("crawl_job_enqueued_count").to_i.should_not == @base_page_count
54
- end
45
+ # it "should not complete the crawl when cancelled" do
46
+ # crawl = @cobweb.start(@base_url)
47
+ # crawl_obj = CobwebCrawlHelper.new(crawl)
48
+ # sleep 6
49
+ # crawl_obj.destroy
50
+ # @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
51
+ # wait_for_crawl_finished crawl[:crawl_id]
52
+ # @redis.get("crawl_job_enqueued_count").to_i.should > 0
53
+ # @redis.get("crawl_job_enqueued_count").to_i.should_not == @base_page_count
54
+ # end
55
55
 
56
56
  end
57
57
  describe "with no crawl limit" do
@@ -83,11 +83,13 @@ describe Cobweb, :local_only => true do
83
83
  @redis.get("crawl_finished_enqueued_count").to_i.should == 1
84
84
  end
85
85
  end
86
+
86
87
  describe "with limited mime_types" do
87
88
  before(:each) do
88
89
  @request = {
89
90
  :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
90
- :quiet => true,
91
+ :quiet => false,
92
+ :debug => false,
91
93
  :cache => nil,
92
94
  :valid_mime_types => ["text/html"]
93
95
  }
@@ -112,12 +114,26 @@ describe Cobweb, :local_only => true do
112
114
  before(:each) do
113
115
  @request = {
114
116
  :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
115
- :quiet => true,
117
+ :quiet => false,
118
+ :debug => false,
116
119
  :cache => nil
117
120
  }
118
121
  @redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{@request[:crawl_id]}", Redis.new)
119
122
  end
120
123
 
124
+ # describe "crawling http://yepadeperrors.wordpress.com/ with limit of 20" do
125
+ # before(:each) do
126
+ # @request[:crawl_limit] = 20
127
+ # @cobweb = Cobweb.new @request
128
+ # end
129
+ # it "should crawl exactly 20" do
130
+ # crawl = @cobweb.start("http://yepadeperrors.wordpress.com/")
131
+ # @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
132
+ # wait_for_crawl_finished crawl[:crawl_id]
133
+ # @redis.get("crawl_job_enqueued_count").to_i.should == 20
134
+ # end
135
+ #
136
+ # end
121
137
  describe "limit to 1" do
122
138
  before(:each) do
123
139
  @request[:crawl_limit] = 1
@@ -245,12 +261,17 @@ def running?(crawl_id)
245
261
  result = true
246
262
  else
247
263
  if status == @last_stat
248
- if @counter > 5
264
+ if @counter > 20
265
+ puts ""
249
266
  raise "Static status: #{status}"
250
267
  else
251
268
  @counter += 1
252
269
  end
253
- puts "Static Status.. #{6-@counter}"
270
+ if @counter == 1
271
+ print "Static Status.. #{21-@counter}"
272
+ else
273
+ print ".#{21-@counter}"
274
+ end
254
275
  else
255
276
  result = status != CobwebCrawlHelper::FINISHED && status != CobwebCrawlHelper::CANCELLED
256
277
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cobweb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.74
4
+ version: 0.0.75
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-15 00:00:00.000000000 Z
12
+ date: 2012-10-16 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: resque
16
- requirement: &70347429190520 !ruby/object:Gem::Requirement
16
+ requirement: &70303208832100 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70347429190520
24
+ version_requirements: *70303208832100
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: redis
27
- requirement: &70347429190020 !ruby/object:Gem::Requirement
27
+ requirement: &70303208831180 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70347429190020
35
+ version_requirements: *70303208831180
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: nokogiri
38
- requirement: &70347429189540 !ruby/object:Gem::Requirement
38
+ requirement: &70303208830080 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70347429189540
46
+ version_requirements: *70303208830080
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: addressable
49
- requirement: &70347429188880 !ruby/object:Gem::Requirement
49
+ requirement: &70303208829280 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70347429188880
57
+ version_requirements: *70303208829280
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &70347429187340 !ruby/object:Gem::Requirement
60
+ requirement: &70303208828000 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70347429187340
68
+ version_requirements: *70303208828000
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: awesome_print
71
- requirement: &70347429185820 !ruby/object:Gem::Requirement
71
+ requirement: &70303208826740 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70347429185820
79
+ version_requirements: *70303208826740
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: sinatra
82
- requirement: &70347429185040 !ruby/object:Gem::Requirement
82
+ requirement: &70303208825020 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: '0'
88
88
  type: :runtime
89
89
  prerelease: false
90
- version_requirements: *70347429185040
90
+ version_requirements: *70303208825020
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: thin
93
- requirement: &70347429184340 !ruby/object:Gem::Requirement
93
+ requirement: &70303208823900 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: '0'
99
99
  type: :runtime
100
100
  prerelease: false
101
- version_requirements: *70347429184340
101
+ version_requirements: *70303208823900
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: haml
104
- requirement: &70347429183120 !ruby/object:Gem::Requirement
104
+ requirement: &70303208822980 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ! '>='
@@ -109,10 +109,10 @@ dependencies:
109
109
  version: '0'
110
110
  type: :runtime
111
111
  prerelease: false
112
- version_requirements: *70347429183120
112
+ version_requirements: *70303208822980
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: namespaced_redis
115
- requirement: &70347429181840 !ruby/object:Gem::Requirement
115
+ requirement: &70303208821840 !ruby/object:Gem::Requirement
116
116
  none: false
117
117
  requirements:
118
118
  - - ! '>='
@@ -120,10 +120,10 @@ dependencies:
120
120
  version: 1.0.2
121
121
  type: :runtime
122
122
  prerelease: false
123
- version_requirements: *70347429181840
123
+ version_requirements: *70303208821840
124
124
  - !ruby/object:Gem::Dependency
125
125
  name: json
126
- requirement: &70347429180860 !ruby/object:Gem::Requirement
126
+ requirement: &70303208821320 !ruby/object:Gem::Requirement
127
127
  none: false
128
128
  requirements:
129
129
  - - ! '>='
@@ -131,7 +131,7 @@ dependencies:
131
131
  version: '0'
132
132
  type: :runtime
133
133
  prerelease: false
134
- version_requirements: *70347429180860
134
+ version_requirements: *70303208821320
135
135
  description: Cobweb is a web crawler that can use resque to cluster crawls to quickly
136
136
  crawl extremely large sites which is much more perofmant than multi-threaded crawlers. It
137
137
  is also a standalone crawler that has a sophisticated statistics monitoring interface