cobweb 1.0.6 → 1.0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,5 @@
1
1
 
2
- h1. Cobweb v1.0.6
2
+ h1. Cobweb v1.0.8
3
3
 
4
4
  "@cobweb_gem":https://twitter.com/cobweb_gem
5
5
 
@@ -152,6 +152,10 @@ bc. statistics = CobwebCrawler.new(:cache => 600).crawl("http://www.pepsico.com"
152
152
  end
153
153
  puts "Finished Crawl with #{statistics[:page_count]} pages and #{statistics[:asset_count]} assets."
154
154
 
155
+ There are some specific options for CobwebCrawler in addition to the normal cobweb options
156
+
157
+ * thread_count - specifies the number of threads used by the crawler, defaults to 1
158
+
155
159
  h3. CobwebCrawlHelper
156
160
 
157
161
  The CobwebCrawlHelper class is a helper class to assist in getting information about a crawl and to perform functions against the crawl
@@ -127,10 +127,10 @@ class Cobweb
127
127
  content = HashUtil.deep_symbolize_keys(Marshal.load(redis.get(unique_id)))
128
128
  else
129
129
  # retrieve data
130
- unless @http && @http.address == uri.host && @http.port == uri.inferred_port
130
+ #unless @http && @http.address == uri.host && @http.port == uri.inferred_port
131
131
  puts "Creating connection to #{uri.host}..." if @options[:debug]
132
132
  @http = Net::HTTP.new(uri.host, uri.inferred_port)
133
- end
133
+ #end
134
134
  if uri.scheme == "https"
135
135
  @http.use_ssl = true
136
136
  @http.verify_mode = OpenSSL::SSL::VERIFY_NONE
@@ -39,6 +39,7 @@ class CobwebCrawler
39
39
  # Initiates a crawl starting at the base_url and applying the options supplied. Can also take a block that is executed and passed content hash and statistic hash'
40
40
  def crawl(base_url, crawl_options = {}, &block)
41
41
  @options[:base_url] = base_url unless @options.has_key? :base_url
42
+ @options[:thread_count] = 1 unless @options.has_key? :thread_count
42
43
 
43
44
  @options[:internal_urls] << base_url if @options[:internal_urls].empty?
44
45
  @redis.sadd("internal_urls", base_url) if @options[:internal_urls].empty?
@@ -46,87 +47,110 @@ class CobwebCrawler
46
47
  @crawl_options = crawl_options
47
48
 
48
49
  @redis.sadd("queued", base_url) unless base_url.nil? || @redis.sismember("crawled", base_url) || @redis.sismember("queued", base_url)
49
- crawl_counter = @redis.scard("crawled").to_i
50
- queue_counter = @redis.scard("queued").to_i
50
+ @crawl_counter = @redis.scard("crawled").to_i
51
+ @queue_counter = @redis.scard("queued").to_i
51
52
 
53
+ @threads = []
52
54
  begin
53
55
  @stats.start_crawl(@options)
54
- while queue_counter>0 && (@options[:crawl_limit].to_i == 0 || @options[:crawl_limit].to_i > crawl_counter)
55
- thread = Thread.new do
56
-
56
+
57
+ @threads << Thread.new do
58
+ Thread.abort_on_exception = true
59
+ spawn_thread(&block)
60
+ end
57
61
 
58
- url = @redis.spop "queued"
59
- queue_counter = 0 if url.nil?
62
+ sleep 5
63
+ while running_thread_count > 0
64
+ if @queue_counter > 0
65
+ (@options[:thread_count]-running_thread_count).times.each do
66
+ @threads << Thread.new do
67
+ Thread.abort_on_exception = true
68
+ spawn_thread(&block)
69
+ end
70
+ end
71
+ end
72
+ sleep 1
73
+ end
74
+
75
+ ensure
76
+ @stats.end_crawl(@options)
77
+ end
78
+ @stats
79
+ end
60
80
 
61
- @options[:url] = url
62
- unless @redis.sismember("crawled", url.to_s)
63
- begin
64
- @stats.update_status("Requesting #{url}...")
65
- content = @cobweb.get(url) unless url.nil?
66
- if content.nil?
67
- queue_counter = queue_counter - 1 #@redis.scard("queued").to_i
68
- else
69
- @stats.update_status("Processing #{url}...")
81
+ def spawn_thread(&block)
82
+ while @queue_counter>0 && (@options[:crawl_limit].to_i == 0 || @options[:crawl_limit].to_i > @crawl_counter)
83
+ url = @redis.spop "queued"
84
+ @queue_counter = 0 if url.nil?
70
85
 
71
- @redis.sadd "crawled", url.to_s
72
- @redis.incr "crawl-counter"
73
-
74
- internal_links = ContentLinkParser.new(url, content[:body]).all_links(:valid_schemes => [:http, :https])
86
+ @options[:url] = url
87
+ unless @redis.sismember("crawled", url.to_s)
88
+ begin
89
+ @stats.update_status("Requesting #{url}...")
90
+ content = @cobweb.get(url) unless url.nil?
91
+ if content.nil?
92
+ @queue_counter = @queue_counter - 1 #@redis.scard("queued").to_i
93
+ else
94
+ @stats.update_status("Processing #{url}...")
95
+
96
+ @redis.sadd "crawled", url.to_s
97
+ @redis.incr "crawl-counter"
98
+
99
+ internal_links = ContentLinkParser.new(url, content[:body]).all_links(:valid_schemes => [:http, :https])
75
100
 
76
- # select the link if its internal (eliminate external before expensive lookups in queued and crawled)
77
- cobweb_links = CobwebLinks.new(@options)
101
+ # select the link if its internal (eliminate external before expensive lookups in queued and crawled)
102
+ cobweb_links = CobwebLinks.new(@options)
78
103
 
79
- internal_links = internal_links.select{|link| cobweb_links.internal?(link) || (@options[:crawl_linked_external] && cobweb_links.internal?(url.to_s))}
104
+ internal_links = internal_links.select{|link| cobweb_links.internal?(link) || (@options[:crawl_linked_external] && cobweb_links.internal?(url.to_s))}
80
105
 
81
- all_internal_links = internal_links
82
-
83
- # reject the link if we've crawled it or queued it
84
- internal_links.reject!{|link| @redis.sismember("crawled", link)}
85
- internal_links.reject!{|link| @redis.sismember("queued", link)}
86
- internal_links.reject!{|link| link.nil? || link.empty?}
87
-
88
- internal_links.each do |link|
89
- puts "Added #{link.to_s} to queue" if @debug
90
- @redis.sadd "queued", link unless link.nil?
91
- children = @redis.hget("navigation", url)
92
- children = [] if children.nil?
93
- children << link
94
- @redis.hset "navigation", url, children
95
- queue_counter += 1
96
- end
106
+ all_internal_links = internal_links
107
+
108
+ # reject the link if we've crawled it or queued it
109
+ internal_links.reject!{|link| @redis.sismember("crawled", link)}
110
+ internal_links.reject!{|link| @redis.sismember("queued", link)}
111
+ internal_links.reject!{|link| link.nil? || link.empty?}
112
+
113
+ internal_links.each do |link|
114
+ puts "Added #{link.to_s} to queue" if @debug
115
+ @redis.sadd "queued", link unless link.nil?
116
+ children = @redis.hget("navigation", url)
117
+ children = [] if children.nil?
118
+ children << link
119
+ @redis.hset "navigation", url, children
120
+ @queue_counter += 1
121
+ end
97
122
 
98
- if @options[:store_refered_url]
99
- all_internal_links.each do |link|
100
- @redis.sadd("inbound_links_#{Digest::MD5.hexdigest(link)}", url)
101
- end
102
- end
103
-
104
- crawl_counter = @redis.scard("crawled").to_i
105
- queue_counter = @redis.scard("queued").to_i
106
-
107
- @stats.update_statistics(content, crawl_counter, queue_counter)
108
- @stats.update_status("Completed #{url}.")
109
- yield content, @stats.get_statistics if block_given?
123
+ if @options[:store_refered_url]
124
+ all_internal_links.each do |link|
125
+ @redis.sadd("inbound_links_#{Digest::MD5.hexdigest(link)}", url)
110
126
  end
111
- rescue => e
112
- puts "Error loading #{url}: #{e}"
113
- #puts "!!!!!!!!!!!! ERROR !!!!!!!!!!!!!!!!"
114
- #ap e
115
- #ap e.backtrace
116
- ensure
117
- crawl_counter = @redis.scard("crawled").to_i
118
- queue_counter = @redis.scard("queued").to_i
119
127
  end
120
- else
121
- puts "Already crawled #{@options[:url]}" if @debug
128
+
129
+ @crawl_counter = @redis.scard("crawled").to_i
130
+ @queue_counter = @redis.scard("queued").to_i
131
+
132
+ @stats.update_statistics(content, @crawl_counter, @queue_counter)
133
+ @stats.update_status("Completed #{url}.")
134
+ yield content, @stats.get_statistics if block_given?
122
135
  end
136
+ rescue => e
137
+ puts "Error loading #{url}: #{e}"
138
+ #puts "!!!!!!!!!!!! ERROR !!!!!!!!!!!!!!!!"
139
+ #ap e
140
+ #ap e.backtrace
141
+ ensure
142
+ @crawl_counter = @redis.scard("crawled").to_i
143
+ @queue_counter = @redis.scard("queued").to_i
123
144
  end
124
- thread.join
145
+ else
146
+ puts "Already crawled #{@options[:url]}" if @debug
125
147
  end
126
- ensure
127
- @stats.end_crawl(@options)
128
148
  end
129
- @stats
149
+ Thread.exit
150
+ end
151
+
152
+ def running_thread_count
153
+ @threads.map{|t| t.status}.select{|status| status=="run" || status == "sleep"}.count
130
154
  end
131
155
 
132
156
  end
@@ -3,7 +3,7 @@ class CobwebVersion
3
3
 
4
4
  # Returns a string of the current version
5
5
  def self.version
6
- "1.0.6"
6
+ "1.0.8"
7
7
  end
8
8
 
9
9
  end
@@ -8,6 +8,7 @@ class Stats
8
8
  def initialize(options)
9
9
  options[:redis_options] = {} unless options.has_key? :redis_options
10
10
  @full_redis = Redis.new(options[:redis_options])
11
+ @lock = Mutex.new
11
12
  @redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{options[:crawl_id]}", :redis => @full_redis)
12
13
  end
13
14
 
@@ -44,105 +45,106 @@ class Stats
44
45
  # Returns statistics hash. update_statistics takes the content hash, extracts statistics from it and updates redis with the data.
45
46
  def update_statistics(content, crawl_counter=@redis.scard("crawled").to_i, queue_counter=@redis.scard("queued").to_i)
46
47
 
47
- @statistics = get_statistics
48
-
49
- if @statistics.has_key? :average_response_time
50
- @statistics[:average_response_time] = (((@redis.hget("statistics", "average_response_time").to_f*crawl_counter) + content[:response_time].to_f) / (crawl_counter + 1))
51
- else
52
- @statistics[:average_response_time] = content[:response_time].to_f
53
- end
54
- @statistics[:maximum_response_time] = content[:response_time].to_f if @statistics[:maximum_response_time].nil? or content[:response_time].to_f > @statistics[:maximum_response_time].to_f
55
- @statistics[:minimum_response_time] = content[:response_time].to_f if @statistics[:minimum_response_time].nil? or content[:response_time].to_f < @statistics[:minimum_response_time].to_f
56
- if @statistics.has_key? :average_length
57
- @statistics[:average_length] = (((@redis.hget("statistics", "average_length").to_i*crawl_counter) + content[:length].to_i) / (crawl_counter + 1))
58
- else
59
- @statistics[:average_length] = content[:length].to_i
60
- end
61
- @statistics[:maximum_length] = content[:length].to_i if @redis.hget("statistics", "maximum_length").nil? or content[:length].to_i > @statistics[:maximum_length].to_i
62
- @statistics[:minimum_length] = content[:length].to_i if @redis.hget("statistics", "minimum_length").nil? or content[:length].to_i < @statistics[:minimum_length].to_i
63
-
64
- if content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml")
65
- @statistics[:page_count] = @statistics[:page_count].to_i + 1
66
- @statistics[:page_size] = @statistics[:page_size].to_i + content[:length].to_i
67
- increment_time_stat("pages_count")
68
- else
69
- @statistics[:asset_count] = @statistics[:asset_count].to_i + 1
70
- @statistics[:asset_size] = @statistics[:asset_size].to_i + content[:length].to_i
71
- increment_time_stat("assets_count")
72
- end
73
-
74
- total_redirects = @statistics[:total_redirects].to_i
75
- @statistics[:total_redirects] = 0 if total_redirects.nil?
76
- @statistics[:total_redirects] = total_redirects += content[:redirect_through].count unless content[:redirect_through].nil?
48
+ @lock.synchronize {
49
+ @statistics = get_statistics
50
+
51
+ if @statistics.has_key? :average_response_time
52
+ @statistics[:average_response_time] = (((@redis.hget("statistics", "average_response_time").to_f*crawl_counter) + content[:response_time].to_f) / (crawl_counter + 1))
53
+ else
54
+ @statistics[:average_response_time] = content[:response_time].to_f
55
+ end
56
+ @statistics[:maximum_response_time] = content[:response_time].to_f if @statistics[:maximum_response_time].nil? or content[:response_time].to_f > @statistics[:maximum_response_time].to_f
57
+ @statistics[:minimum_response_time] = content[:response_time].to_f if @statistics[:minimum_response_time].nil? or content[:response_time].to_f < @statistics[:minimum_response_time].to_f
58
+ if @statistics.has_key? :average_length
59
+ @statistics[:average_length] = (((@redis.hget("statistics", "average_length").to_i*crawl_counter) + content[:length].to_i) / (crawl_counter + 1))
60
+ else
61
+ @statistics[:average_length] = content[:length].to_i
62
+ end
63
+ @statistics[:maximum_length] = content[:length].to_i if @redis.hget("statistics", "maximum_length").nil? or content[:length].to_i > @statistics[:maximum_length].to_i
64
+ @statistics[:minimum_length] = content[:length].to_i if @redis.hget("statistics", "minimum_length").nil? or content[:length].to_i < @statistics[:minimum_length].to_i
65
+
66
+ if content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml")
67
+ @statistics[:page_count] = @statistics[:page_count].to_i + 1
68
+ @statistics[:page_size] = @statistics[:page_size].to_i + content[:length].to_i
69
+ increment_time_stat("pages_count")
70
+ else
71
+ @statistics[:asset_count] = @statistics[:asset_count].to_i + 1
72
+ @statistics[:asset_size] = @statistics[:asset_size].to_i + content[:length].to_i
73
+ increment_time_stat("assets_count")
74
+ end
75
+
76
+ total_redirects = @statistics[:total_redirects].to_i
77
+ @statistics[:total_redirects] = 0 if total_redirects.nil?
78
+ @statistics[:total_redirects] = total_redirects += content[:redirect_through].count unless content[:redirect_through].nil?
77
79
 
78
- @statistics[:crawl_counter] = crawl_counter
79
- @statistics[:queue_counter] = queue_counter
80
-
81
- total_length = @statistics[:total_length].to_i
82
- @statistics[:total_length] = total_length + content[:length].to_i
80
+ @statistics[:crawl_counter] = crawl_counter
81
+ @statistics[:queue_counter] = queue_counter
82
+
83
+ total_length = @statistics[:total_length].to_i
84
+ @statistics[:total_length] = total_length + content[:length].to_i
83
85
 
84
- mime_counts = {}
85
- if @statistics.has_key? :mime_counts
86
- mime_counts = @statistics[:mime_counts]
87
- if mime_counts.has_key? content[:mime_type]
88
- mime_counts[content[:mime_type]] += 1
86
+ mime_counts = {}
87
+ if @statistics.has_key? :mime_counts
88
+ mime_counts = @statistics[:mime_counts]
89
+ if mime_counts.has_key? content[:mime_type]
90
+ mime_counts[content[:mime_type]] += 1
91
+ else
92
+ mime_counts[content[:mime_type]] = 1
93
+ end
89
94
  else
90
- mime_counts[content[:mime_type]] = 1
95
+ mime_counts = {content[:mime_type] => 1}
91
96
  end
92
- else
93
- mime_counts = {content[:mime_type] => 1}
94
- end
95
- @statistics[:mime_counts] = mime_counts.to_json
97
+ @statistics[:mime_counts] = mime_counts.to_json
96
98
 
97
- # record mime categories stats
98
- if content[:mime_type].cobweb_starts_with? "text"
99
- increment_time_stat("mime_text_count")
100
- elsif content[:mime_type].cobweb_starts_with? "application"
101
- increment_time_stat("mime_application_count")
102
- elsif content[:mime_type].cobweb_starts_with? "audio"
103
- increment_time_stat("mime_audio_count")
104
- elsif content[:mime_type].cobweb_starts_with? "image"
105
- increment_time_stat("mime_image_count")
106
- elsif content[:mime_type].cobweb_starts_with? "message"
107
- increment_time_stat("mime_message_count")
108
- elsif content[:mime_type].cobweb_starts_with? "model"
109
- increment_time_stat("mime_model_count")
110
- elsif content[:mime_type].cobweb_starts_with? "multipart"
111
- increment_time_stat("mime_multipart_count")
112
- elsif content[:mime_type].cobweb_starts_with? "video"
113
- increment_time_stat("mime_video_count")
114
- end
115
-
116
- status_counts = {}
117
- if @statistics.has_key? :status_counts
118
- status_counts = @statistics[:status_counts]
119
- status_code = content[:status_code].to_i.to_s.to_sym
120
- if status_counts.has_key? status_code
121
- status_counts[status_code] += 1
99
+ # record mime categories stats
100
+ if content[:mime_type].cobweb_starts_with? "text"
101
+ increment_time_stat("mime_text_count")
102
+ elsif content[:mime_type].cobweb_starts_with? "application"
103
+ increment_time_stat("mime_application_count")
104
+ elsif content[:mime_type].cobweb_starts_with? "audio"
105
+ increment_time_stat("mime_audio_count")
106
+ elsif content[:mime_type].cobweb_starts_with? "image"
107
+ increment_time_stat("mime_image_count")
108
+ elsif content[:mime_type].cobweb_starts_with? "message"
109
+ increment_time_stat("mime_message_count")
110
+ elsif content[:mime_type].cobweb_starts_with? "model"
111
+ increment_time_stat("mime_model_count")
112
+ elsif content[:mime_type].cobweb_starts_with? "multipart"
113
+ increment_time_stat("mime_multipart_count")
114
+ elsif content[:mime_type].cobweb_starts_with? "video"
115
+ increment_time_stat("mime_video_count")
116
+ end
117
+
118
+ status_counts = {}
119
+ if @statistics.has_key? :status_counts
120
+ status_counts = @statistics[:status_counts]
121
+ status_code = content[:status_code].to_i.to_s.to_sym
122
+ if status_counts.has_key? status_code
123
+ status_counts[status_code] += 1
124
+ else
125
+ status_counts[status_code] = 1
126
+ end
122
127
  else
123
- status_counts[status_code] = 1
124
- end
125
- else
126
- status_counts = {status_code => 1}
127
- end
128
-
129
- # record statistics by status type
130
- if content[:status_code] >= 200 && content[:status_code] < 300
131
- increment_time_stat("status_200_count")
132
- elsif content[:status_code] >= 400 && content[:status_code] < 500
133
- increment_time_stat("status|_400_count")
134
- elsif content[:status_code] >= 500 && content[:status_code] < 600
135
- increment_time_stat("status|_500_count")
136
- end
137
-
138
- @statistics[:status_counts] = status_counts.to_json
139
-
140
- ## time based statistics
141
- increment_time_stat("minute_totals", "minute", 60)
142
-
143
- redis_command = "@redis.hmset 'statistics', #{@statistics.keys.map{|key| "'#{key}', '#{@statistics[key].to_s.gsub("'","''")}'"}.join(", ")}"
144
- instance_eval redis_command
145
-
128
+ status_counts = {status_code => 1}
129
+ end
130
+
131
+ # record statistics by status type
132
+ if content[:status_code] >= 200 && content[:status_code] < 300
133
+ increment_time_stat("status_200_count")
134
+ elsif content[:status_code] >= 400 && content[:status_code] < 500
135
+ increment_time_stat("status|_400_count")
136
+ elsif content[:status_code] >= 500 && content[:status_code] < 600
137
+ increment_time_stat("status|_500_count")
138
+ end
139
+
140
+ @statistics[:status_counts] = status_counts.to_json
141
+
142
+ ## time based statistics
143
+ increment_time_stat("minute_totals", "minute", 60)
144
+
145
+ redis_command = "@redis.hmset 'statistics', #{@statistics.keys.map{|key| "'#{key}', '#{@statistics[key].to_s.gsub("'","''")}'"}.join(", ")}"
146
+ instance_eval redis_command
147
+ }
146
148
  @statistics
147
149
  end
148
150
 
@@ -12,16 +12,17 @@ RSpec.configure do |config|
12
12
  if ENV["TRAVIS_RUBY_VERSION"] || ENV['CI']
13
13
  config.filter_run_excluding :local_only => true
14
14
  end
15
+
16
+ Thread.new do
17
+ @thin ||= Thin::Server.start("0.0.0.0", 3532, SampleServer.app)
18
+ end
19
+
20
+ # WAIT FOR START TO COMPLETE
21
+ sleep 1
22
+
15
23
 
16
24
  config.before(:all) {
17
25
  # START THIN SERVER TO HOST THE SAMPLE SITE FOR CRAWLING
18
- @thin = nil
19
- Thread.new do
20
- @thin = Thin::Server.start("0.0.0.0", 3532, SampleServer.app)
21
- end
22
-
23
- # WAIT FOR START TO COMPLETE
24
- sleep 1
25
26
  }
26
27
 
27
28
  config.before(:each) {
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cobweb
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.6
4
+ version: 1.0.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-11 00:00:00.000000000 Z
12
+ date: 2013-02-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: resque
16
- requirement: &70331050084220 !ruby/object:Gem::Requirement
16
+ requirement: &70128767187740 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70331050084220
24
+ version_requirements: *70128767187740
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: redis
27
- requirement: &70331050081420 !ruby/object:Gem::Requirement
27
+ requirement: &70128767183580 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70331050081420
35
+ version_requirements: *70128767183580
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: nokogiri
38
- requirement: &70331050075780 !ruby/object:Gem::Requirement
38
+ requirement: &70128767182220 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70331050075780
46
+ version_requirements: *70128767182220
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: addressable
49
- requirement: &70331050066140 !ruby/object:Gem::Requirement
49
+ requirement: &70128767175380 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70331050066140
57
+ version_requirements: *70128767175380
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &70331050054340 !ruby/object:Gem::Requirement
60
+ requirement: &70128767172200 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70331050054340
68
+ version_requirements: *70128767172200
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: awesome_print
71
- requirement: &70331050049640 !ruby/object:Gem::Requirement
71
+ requirement: &70128767170580 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70331050049640
79
+ version_requirements: *70128767170580
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: sinatra
82
- requirement: &70331050048160 !ruby/object:Gem::Requirement
82
+ requirement: &70128767167800 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: '0'
88
88
  type: :runtime
89
89
  prerelease: false
90
- version_requirements: *70331050048160
90
+ version_requirements: *70128767167800
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: thin
93
- requirement: &70331050047400 !ruby/object:Gem::Requirement
93
+ requirement: &70128767161460 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: '0'
99
99
  type: :runtime
100
100
  prerelease: false
101
- version_requirements: *70331050047400
101
+ version_requirements: *70128767161460
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: haml
104
- requirement: &70331050046440 !ruby/object:Gem::Requirement
104
+ requirement: &70128767157640 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ! '>='
@@ -109,10 +109,10 @@ dependencies:
109
109
  version: '0'
110
110
  type: :runtime
111
111
  prerelease: false
112
- version_requirements: *70331050046440
112
+ version_requirements: *70128767157640
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: namespaced_redis
115
- requirement: &70331050045060 !ruby/object:Gem::Requirement
115
+ requirement: &70128767152300 !ruby/object:Gem::Requirement
116
116
  none: false
117
117
  requirements:
118
118
  - - ! '>='
@@ -120,10 +120,10 @@ dependencies:
120
120
  version: '0'
121
121
  type: :runtime
122
122
  prerelease: false
123
- version_requirements: *70331050045060
123
+ version_requirements: *70128767152300
124
124
  - !ruby/object:Gem::Dependency
125
125
  name: json
126
- requirement: &70331050044240 !ruby/object:Gem::Requirement
126
+ requirement: &70128767148180 !ruby/object:Gem::Requirement
127
127
  none: false
128
128
  requirements:
129
129
  - - ! '>='
@@ -131,7 +131,7 @@ dependencies:
131
131
  version: '0'
132
132
  type: :runtime
133
133
  prerelease: false
134
- version_requirements: *70331050044240
134
+ version_requirements: *70128767148180
135
135
  description: Cobweb is a web crawler that can use resque to cluster crawls to quickly
136
136
  crawl extremely large sites which is much more performant than multi-threaded crawlers. It
137
137
  is also a standalone crawler that has a sophisticated statistics monitoring interface