cobweb 1.0.6 → 1.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- h1. Cobweb v1.0.6
2
+ h1. Cobweb v1.0.8
3
3
 
4
4
  "@cobweb_gem":https://twitter.com/cobweb_gem
5
5
 
@@ -152,6 +152,10 @@ bc. statistics = CobwebCrawler.new(:cache => 600).crawl("http://www.pepsico.com"
152
152
  end
153
153
  puts "Finished Crawl with #{statistics[:page_count]} pages and #{statistics[:asset_count]} assets."
154
154
 
155
+ There are some specific options for CobwebCrawler in addition to the normal cobweb options
156
+
157
+ * thread_count - specifies the number of threads used by the crawler, defaults to 1
158
+
155
159
  h3. CobwebCrawlHelper
156
160
 
157
161
  The CobwebCrawlHelper class is a helper class to assist in getting information about a crawl and to perform functions against the crawl
@@ -127,10 +127,10 @@ class Cobweb
127
127
  content = HashUtil.deep_symbolize_keys(Marshal.load(redis.get(unique_id)))
128
128
  else
129
129
  # retrieve data
130
- unless @http && @http.address == uri.host && @http.port == uri.inferred_port
130
+ #unless @http && @http.address == uri.host && @http.port == uri.inferred_port
131
131
  puts "Creating connection to #{uri.host}..." if @options[:debug]
132
132
  @http = Net::HTTP.new(uri.host, uri.inferred_port)
133
- end
133
+ #end
134
134
  if uri.scheme == "https"
135
135
  @http.use_ssl = true
136
136
  @http.verify_mode = OpenSSL::SSL::VERIFY_NONE
@@ -39,6 +39,7 @@ class CobwebCrawler
39
39
  # Initiates a crawl starting at the base_url and applying the options supplied. Can also take a block that is executed and passed content hash and statistic hash'
40
40
  def crawl(base_url, crawl_options = {}, &block)
41
41
  @options[:base_url] = base_url unless @options.has_key? :base_url
42
+ @options[:thread_count] = 1 unless @options.has_key? :thread_count
42
43
 
43
44
  @options[:internal_urls] << base_url if @options[:internal_urls].empty?
44
45
  @redis.sadd("internal_urls", base_url) if @options[:internal_urls].empty?
@@ -46,87 +47,110 @@ class CobwebCrawler
46
47
  @crawl_options = crawl_options
47
48
 
48
49
  @redis.sadd("queued", base_url) unless base_url.nil? || @redis.sismember("crawled", base_url) || @redis.sismember("queued", base_url)
49
- crawl_counter = @redis.scard("crawled").to_i
50
- queue_counter = @redis.scard("queued").to_i
50
+ @crawl_counter = @redis.scard("crawled").to_i
51
+ @queue_counter = @redis.scard("queued").to_i
51
52
 
53
+ @threads = []
52
54
  begin
53
55
  @stats.start_crawl(@options)
54
- while queue_counter>0 && (@options[:crawl_limit].to_i == 0 || @options[:crawl_limit].to_i > crawl_counter)
55
- thread = Thread.new do
56
-
56
+
57
+ @threads << Thread.new do
58
+ Thread.abort_on_exception = true
59
+ spawn_thread(&block)
60
+ end
57
61
 
58
- url = @redis.spop "queued"
59
- queue_counter = 0 if url.nil?
62
+ sleep 5
63
+ while running_thread_count > 0
64
+ if @queue_counter > 0
65
+ (@options[:thread_count]-running_thread_count).times.each do
66
+ @threads << Thread.new do
67
+ Thread.abort_on_exception = true
68
+ spawn_thread(&block)
69
+ end
70
+ end
71
+ end
72
+ sleep 1
73
+ end
74
+
75
+ ensure
76
+ @stats.end_crawl(@options)
77
+ end
78
+ @stats
79
+ end
60
80
 
61
- @options[:url] = url
62
- unless @redis.sismember("crawled", url.to_s)
63
- begin
64
- @stats.update_status("Requesting #{url}...")
65
- content = @cobweb.get(url) unless url.nil?
66
- if content.nil?
67
- queue_counter = queue_counter - 1 #@redis.scard("queued").to_i
68
- else
69
- @stats.update_status("Processing #{url}...")
81
+ def spawn_thread(&block)
82
+ while @queue_counter>0 && (@options[:crawl_limit].to_i == 0 || @options[:crawl_limit].to_i > @crawl_counter)
83
+ url = @redis.spop "queued"
84
+ @queue_counter = 0 if url.nil?
70
85
 
71
- @redis.sadd "crawled", url.to_s
72
- @redis.incr "crawl-counter"
73
-
74
- internal_links = ContentLinkParser.new(url, content[:body]).all_links(:valid_schemes => [:http, :https])
86
+ @options[:url] = url
87
+ unless @redis.sismember("crawled", url.to_s)
88
+ begin
89
+ @stats.update_status("Requesting #{url}...")
90
+ content = @cobweb.get(url) unless url.nil?
91
+ if content.nil?
92
+ @queue_counter = @queue_counter - 1 #@redis.scard("queued").to_i
93
+ else
94
+ @stats.update_status("Processing #{url}...")
95
+
96
+ @redis.sadd "crawled", url.to_s
97
+ @redis.incr "crawl-counter"
98
+
99
+ internal_links = ContentLinkParser.new(url, content[:body]).all_links(:valid_schemes => [:http, :https])
75
100
 
76
- # select the link if its internal (eliminate external before expensive lookups in queued and crawled)
77
- cobweb_links = CobwebLinks.new(@options)
101
+ # select the link if its internal (eliminate external before expensive lookups in queued and crawled)
102
+ cobweb_links = CobwebLinks.new(@options)
78
103
 
79
- internal_links = internal_links.select{|link| cobweb_links.internal?(link) || (@options[:crawl_linked_external] && cobweb_links.internal?(url.to_s))}
104
+ internal_links = internal_links.select{|link| cobweb_links.internal?(link) || (@options[:crawl_linked_external] && cobweb_links.internal?(url.to_s))}
80
105
 
81
- all_internal_links = internal_links
82
-
83
- # reject the link if we've crawled it or queued it
84
- internal_links.reject!{|link| @redis.sismember("crawled", link)}
85
- internal_links.reject!{|link| @redis.sismember("queued", link)}
86
- internal_links.reject!{|link| link.nil? || link.empty?}
87
-
88
- internal_links.each do |link|
89
- puts "Added #{link.to_s} to queue" if @debug
90
- @redis.sadd "queued", link unless link.nil?
91
- children = @redis.hget("navigation", url)
92
- children = [] if children.nil?
93
- children << link
94
- @redis.hset "navigation", url, children
95
- queue_counter += 1
96
- end
106
+ all_internal_links = internal_links
107
+
108
+ # reject the link if we've crawled it or queued it
109
+ internal_links.reject!{|link| @redis.sismember("crawled", link)}
110
+ internal_links.reject!{|link| @redis.sismember("queued", link)}
111
+ internal_links.reject!{|link| link.nil? || link.empty?}
112
+
113
+ internal_links.each do |link|
114
+ puts "Added #{link.to_s} to queue" if @debug
115
+ @redis.sadd "queued", link unless link.nil?
116
+ children = @redis.hget("navigation", url)
117
+ children = [] if children.nil?
118
+ children << link
119
+ @redis.hset "navigation", url, children
120
+ @queue_counter += 1
121
+ end
97
122
 
98
- if @options[:store_refered_url]
99
- all_internal_links.each do |link|
100
- @redis.sadd("inbound_links_#{Digest::MD5.hexdigest(link)}", url)
101
- end
102
- end
103
-
104
- crawl_counter = @redis.scard("crawled").to_i
105
- queue_counter = @redis.scard("queued").to_i
106
-
107
- @stats.update_statistics(content, crawl_counter, queue_counter)
108
- @stats.update_status("Completed #{url}.")
109
- yield content, @stats.get_statistics if block_given?
123
+ if @options[:store_refered_url]
124
+ all_internal_links.each do |link|
125
+ @redis.sadd("inbound_links_#{Digest::MD5.hexdigest(link)}", url)
110
126
  end
111
- rescue => e
112
- puts "Error loading #{url}: #{e}"
113
- #puts "!!!!!!!!!!!! ERROR !!!!!!!!!!!!!!!!"
114
- #ap e
115
- #ap e.backtrace
116
- ensure
117
- crawl_counter = @redis.scard("crawled").to_i
118
- queue_counter = @redis.scard("queued").to_i
119
127
  end
120
- else
121
- puts "Already crawled #{@options[:url]}" if @debug
128
+
129
+ @crawl_counter = @redis.scard("crawled").to_i
130
+ @queue_counter = @redis.scard("queued").to_i
131
+
132
+ @stats.update_statistics(content, @crawl_counter, @queue_counter)
133
+ @stats.update_status("Completed #{url}.")
134
+ yield content, @stats.get_statistics if block_given?
122
135
  end
136
+ rescue => e
137
+ puts "Error loading #{url}: #{e}"
138
+ #puts "!!!!!!!!!!!! ERROR !!!!!!!!!!!!!!!!"
139
+ #ap e
140
+ #ap e.backtrace
141
+ ensure
142
+ @crawl_counter = @redis.scard("crawled").to_i
143
+ @queue_counter = @redis.scard("queued").to_i
123
144
  end
124
- thread.join
145
+ else
146
+ puts "Already crawled #{@options[:url]}" if @debug
125
147
  end
126
- ensure
127
- @stats.end_crawl(@options)
128
148
  end
129
- @stats
149
+ Thread.exit
150
+ end
151
+
152
+ def running_thread_count
153
+ @threads.map{|t| t.status}.select{|status| status=="run" || status == "sleep"}.count
130
154
  end
131
155
 
132
156
  end
@@ -3,7 +3,7 @@ class CobwebVersion
3
3
 
4
4
  # Returns a string of the current version
5
5
  def self.version
6
- "1.0.6"
6
+ "1.0.8"
7
7
  end
8
8
 
9
9
  end
@@ -8,6 +8,7 @@ class Stats
8
8
  def initialize(options)
9
9
  options[:redis_options] = {} unless options.has_key? :redis_options
10
10
  @full_redis = Redis.new(options[:redis_options])
11
+ @lock = Mutex.new
11
12
  @redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{options[:crawl_id]}", :redis => @full_redis)
12
13
  end
13
14
 
@@ -44,105 +45,106 @@ class Stats
44
45
  # Returns statistics hash. update_statistics takes the content hash, extracts statistics from it and updates redis with the data.
45
46
  def update_statistics(content, crawl_counter=@redis.scard("crawled").to_i, queue_counter=@redis.scard("queued").to_i)
46
47
 
47
- @statistics = get_statistics
48
-
49
- if @statistics.has_key? :average_response_time
50
- @statistics[:average_response_time] = (((@redis.hget("statistics", "average_response_time").to_f*crawl_counter) + content[:response_time].to_f) / (crawl_counter + 1))
51
- else
52
- @statistics[:average_response_time] = content[:response_time].to_f
53
- end
54
- @statistics[:maximum_response_time] = content[:response_time].to_f if @statistics[:maximum_response_time].nil? or content[:response_time].to_f > @statistics[:maximum_response_time].to_f
55
- @statistics[:minimum_response_time] = content[:response_time].to_f if @statistics[:minimum_response_time].nil? or content[:response_time].to_f < @statistics[:minimum_response_time].to_f
56
- if @statistics.has_key? :average_length
57
- @statistics[:average_length] = (((@redis.hget("statistics", "average_length").to_i*crawl_counter) + content[:length].to_i) / (crawl_counter + 1))
58
- else
59
- @statistics[:average_length] = content[:length].to_i
60
- end
61
- @statistics[:maximum_length] = content[:length].to_i if @redis.hget("statistics", "maximum_length").nil? or content[:length].to_i > @statistics[:maximum_length].to_i
62
- @statistics[:minimum_length] = content[:length].to_i if @redis.hget("statistics", "minimum_length").nil? or content[:length].to_i < @statistics[:minimum_length].to_i
63
-
64
- if content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml")
65
- @statistics[:page_count] = @statistics[:page_count].to_i + 1
66
- @statistics[:page_size] = @statistics[:page_size].to_i + content[:length].to_i
67
- increment_time_stat("pages_count")
68
- else
69
- @statistics[:asset_count] = @statistics[:asset_count].to_i + 1
70
- @statistics[:asset_size] = @statistics[:asset_size].to_i + content[:length].to_i
71
- increment_time_stat("assets_count")
72
- end
73
-
74
- total_redirects = @statistics[:total_redirects].to_i
75
- @statistics[:total_redirects] = 0 if total_redirects.nil?
76
- @statistics[:total_redirects] = total_redirects += content[:redirect_through].count unless content[:redirect_through].nil?
48
+ @lock.synchronize {
49
+ @statistics = get_statistics
50
+
51
+ if @statistics.has_key? :average_response_time
52
+ @statistics[:average_response_time] = (((@redis.hget("statistics", "average_response_time").to_f*crawl_counter) + content[:response_time].to_f) / (crawl_counter + 1))
53
+ else
54
+ @statistics[:average_response_time] = content[:response_time].to_f
55
+ end
56
+ @statistics[:maximum_response_time] = content[:response_time].to_f if @statistics[:maximum_response_time].nil? or content[:response_time].to_f > @statistics[:maximum_response_time].to_f
57
+ @statistics[:minimum_response_time] = content[:response_time].to_f if @statistics[:minimum_response_time].nil? or content[:response_time].to_f < @statistics[:minimum_response_time].to_f
58
+ if @statistics.has_key? :average_length
59
+ @statistics[:average_length] = (((@redis.hget("statistics", "average_length").to_i*crawl_counter) + content[:length].to_i) / (crawl_counter + 1))
60
+ else
61
+ @statistics[:average_length] = content[:length].to_i
62
+ end
63
+ @statistics[:maximum_length] = content[:length].to_i if @redis.hget("statistics", "maximum_length").nil? or content[:length].to_i > @statistics[:maximum_length].to_i
64
+ @statistics[:minimum_length] = content[:length].to_i if @redis.hget("statistics", "minimum_length").nil? or content[:length].to_i < @statistics[:minimum_length].to_i
65
+
66
+ if content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml")
67
+ @statistics[:page_count] = @statistics[:page_count].to_i + 1
68
+ @statistics[:page_size] = @statistics[:page_size].to_i + content[:length].to_i
69
+ increment_time_stat("pages_count")
70
+ else
71
+ @statistics[:asset_count] = @statistics[:asset_count].to_i + 1
72
+ @statistics[:asset_size] = @statistics[:asset_size].to_i + content[:length].to_i
73
+ increment_time_stat("assets_count")
74
+ end
75
+
76
+ total_redirects = @statistics[:total_redirects].to_i
77
+ @statistics[:total_redirects] = 0 if total_redirects.nil?
78
+ @statistics[:total_redirects] = total_redirects += content[:redirect_through].count unless content[:redirect_through].nil?
77
79
 
78
- @statistics[:crawl_counter] = crawl_counter
79
- @statistics[:queue_counter] = queue_counter
80
-
81
- total_length = @statistics[:total_length].to_i
82
- @statistics[:total_length] = total_length + content[:length].to_i
80
+ @statistics[:crawl_counter] = crawl_counter
81
+ @statistics[:queue_counter] = queue_counter
82
+
83
+ total_length = @statistics[:total_length].to_i
84
+ @statistics[:total_length] = total_length + content[:length].to_i
83
85
 
84
- mime_counts = {}
85
- if @statistics.has_key? :mime_counts
86
- mime_counts = @statistics[:mime_counts]
87
- if mime_counts.has_key? content[:mime_type]
88
- mime_counts[content[:mime_type]] += 1
86
+ mime_counts = {}
87
+ if @statistics.has_key? :mime_counts
88
+ mime_counts = @statistics[:mime_counts]
89
+ if mime_counts.has_key? content[:mime_type]
90
+ mime_counts[content[:mime_type]] += 1
91
+ else
92
+ mime_counts[content[:mime_type]] = 1
93
+ end
89
94
  else
90
- mime_counts[content[:mime_type]] = 1
95
+ mime_counts = {content[:mime_type] => 1}
91
96
  end
92
- else
93
- mime_counts = {content[:mime_type] => 1}
94
- end
95
- @statistics[:mime_counts] = mime_counts.to_json
97
+ @statistics[:mime_counts] = mime_counts.to_json
96
98
 
97
- # record mime categories stats
98
- if content[:mime_type].cobweb_starts_with? "text"
99
- increment_time_stat("mime_text_count")
100
- elsif content[:mime_type].cobweb_starts_with? "application"
101
- increment_time_stat("mime_application_count")
102
- elsif content[:mime_type].cobweb_starts_with? "audio"
103
- increment_time_stat("mime_audio_count")
104
- elsif content[:mime_type].cobweb_starts_with? "image"
105
- increment_time_stat("mime_image_count")
106
- elsif content[:mime_type].cobweb_starts_with? "message"
107
- increment_time_stat("mime_message_count")
108
- elsif content[:mime_type].cobweb_starts_with? "model"
109
- increment_time_stat("mime_model_count")
110
- elsif content[:mime_type].cobweb_starts_with? "multipart"
111
- increment_time_stat("mime_multipart_count")
112
- elsif content[:mime_type].cobweb_starts_with? "video"
113
- increment_time_stat("mime_video_count")
114
- end
115
-
116
- status_counts = {}
117
- if @statistics.has_key? :status_counts
118
- status_counts = @statistics[:status_counts]
119
- status_code = content[:status_code].to_i.to_s.to_sym
120
- if status_counts.has_key? status_code
121
- status_counts[status_code] += 1
99
+ # record mime categories stats
100
+ if content[:mime_type].cobweb_starts_with? "text"
101
+ increment_time_stat("mime_text_count")
102
+ elsif content[:mime_type].cobweb_starts_with? "application"
103
+ increment_time_stat("mime_application_count")
104
+ elsif content[:mime_type].cobweb_starts_with? "audio"
105
+ increment_time_stat("mime_audio_count")
106
+ elsif content[:mime_type].cobweb_starts_with? "image"
107
+ increment_time_stat("mime_image_count")
108
+ elsif content[:mime_type].cobweb_starts_with? "message"
109
+ increment_time_stat("mime_message_count")
110
+ elsif content[:mime_type].cobweb_starts_with? "model"
111
+ increment_time_stat("mime_model_count")
112
+ elsif content[:mime_type].cobweb_starts_with? "multipart"
113
+ increment_time_stat("mime_multipart_count")
114
+ elsif content[:mime_type].cobweb_starts_with? "video"
115
+ increment_time_stat("mime_video_count")
116
+ end
117
+
118
+ status_counts = {}
119
+ if @statistics.has_key? :status_counts
120
+ status_counts = @statistics[:status_counts]
121
+ status_code = content[:status_code].to_i.to_s.to_sym
122
+ if status_counts.has_key? status_code
123
+ status_counts[status_code] += 1
124
+ else
125
+ status_counts[status_code] = 1
126
+ end
122
127
  else
123
- status_counts[status_code] = 1
124
- end
125
- else
126
- status_counts = {status_code => 1}
127
- end
128
-
129
- # record statistics by status type
130
- if content[:status_code] >= 200 && content[:status_code] < 300
131
- increment_time_stat("status_200_count")
132
- elsif content[:status_code] >= 400 && content[:status_code] < 500
133
- increment_time_stat("status|_400_count")
134
- elsif content[:status_code] >= 500 && content[:status_code] < 600
135
- increment_time_stat("status|_500_count")
136
- end
137
-
138
- @statistics[:status_counts] = status_counts.to_json
139
-
140
- ## time based statistics
141
- increment_time_stat("minute_totals", "minute", 60)
142
-
143
- redis_command = "@redis.hmset 'statistics', #{@statistics.keys.map{|key| "'#{key}', '#{@statistics[key].to_s.gsub("'","''")}'"}.join(", ")}"
144
- instance_eval redis_command
145
-
128
+ status_counts = {status_code => 1}
129
+ end
130
+
131
+ # record statistics by status type
132
+ if content[:status_code] >= 200 && content[:status_code] < 300
133
+ increment_time_stat("status_200_count")
134
+ elsif content[:status_code] >= 400 && content[:status_code] < 500
135
+ increment_time_stat("status|_400_count")
136
+ elsif content[:status_code] >= 500 && content[:status_code] < 600
137
+ increment_time_stat("status|_500_count")
138
+ end
139
+
140
+ @statistics[:status_counts] = status_counts.to_json
141
+
142
+ ## time based statistics
143
+ increment_time_stat("minute_totals", "minute", 60)
144
+
145
+ redis_command = "@redis.hmset 'statistics', #{@statistics.keys.map{|key| "'#{key}', '#{@statistics[key].to_s.gsub("'","''")}'"}.join(", ")}"
146
+ instance_eval redis_command
147
+ }
146
148
  @statistics
147
149
  end
148
150
 
@@ -12,16 +12,17 @@ RSpec.configure do |config|
12
12
  if ENV["TRAVIS_RUBY_VERSION"] || ENV['CI']
13
13
  config.filter_run_excluding :local_only => true
14
14
  end
15
+
16
+ Thread.new do
17
+ @thin ||= Thin::Server.start("0.0.0.0", 3532, SampleServer.app)
18
+ end
19
+
20
+ # WAIT FOR START TO COMPLETE
21
+ sleep 1
22
+
15
23
 
16
24
  config.before(:all) {
17
25
  # START THIN SERVER TO HOST THE SAMPLE SITE FOR CRAWLING
18
- @thin = nil
19
- Thread.new do
20
- @thin = Thin::Server.start("0.0.0.0", 3532, SampleServer.app)
21
- end
22
-
23
- # WAIT FOR START TO COMPLETE
24
- sleep 1
25
26
  }
26
27
 
27
28
  config.before(:each) {
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cobweb
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.6
4
+ version: 1.0.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-02-11 00:00:00.000000000 Z
12
+ date: 2013-02-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: resque
16
- requirement: &70331050084220 !ruby/object:Gem::Requirement
16
+ requirement: &70128767187740 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70331050084220
24
+ version_requirements: *70128767187740
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: redis
27
- requirement: &70331050081420 !ruby/object:Gem::Requirement
27
+ requirement: &70128767183580 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70331050081420
35
+ version_requirements: *70128767183580
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: nokogiri
38
- requirement: &70331050075780 !ruby/object:Gem::Requirement
38
+ requirement: &70128767182220 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70331050075780
46
+ version_requirements: *70128767182220
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: addressable
49
- requirement: &70331050066140 !ruby/object:Gem::Requirement
49
+ requirement: &70128767175380 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70331050066140
57
+ version_requirements: *70128767175380
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &70331050054340 !ruby/object:Gem::Requirement
60
+ requirement: &70128767172200 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70331050054340
68
+ version_requirements: *70128767172200
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: awesome_print
71
- requirement: &70331050049640 !ruby/object:Gem::Requirement
71
+ requirement: &70128767170580 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70331050049640
79
+ version_requirements: *70128767170580
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: sinatra
82
- requirement: &70331050048160 !ruby/object:Gem::Requirement
82
+ requirement: &70128767167800 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: '0'
88
88
  type: :runtime
89
89
  prerelease: false
90
- version_requirements: *70331050048160
90
+ version_requirements: *70128767167800
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: thin
93
- requirement: &70331050047400 !ruby/object:Gem::Requirement
93
+ requirement: &70128767161460 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: '0'
99
99
  type: :runtime
100
100
  prerelease: false
101
- version_requirements: *70331050047400
101
+ version_requirements: *70128767161460
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: haml
104
- requirement: &70331050046440 !ruby/object:Gem::Requirement
104
+ requirement: &70128767157640 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ! '>='
@@ -109,10 +109,10 @@ dependencies:
109
109
  version: '0'
110
110
  type: :runtime
111
111
  prerelease: false
112
- version_requirements: *70331050046440
112
+ version_requirements: *70128767157640
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: namespaced_redis
115
- requirement: &70331050045060 !ruby/object:Gem::Requirement
115
+ requirement: &70128767152300 !ruby/object:Gem::Requirement
116
116
  none: false
117
117
  requirements:
118
118
  - - ! '>='
@@ -120,10 +120,10 @@ dependencies:
120
120
  version: '0'
121
121
  type: :runtime
122
122
  prerelease: false
123
- version_requirements: *70331050045060
123
+ version_requirements: *70128767152300
124
124
  - !ruby/object:Gem::Dependency
125
125
  name: json
126
- requirement: &70331050044240 !ruby/object:Gem::Requirement
126
+ requirement: &70128767148180 !ruby/object:Gem::Requirement
127
127
  none: false
128
128
  requirements:
129
129
  - - ! '>='
@@ -131,7 +131,7 @@ dependencies:
131
131
  version: '0'
132
132
  type: :runtime
133
133
  prerelease: false
134
- version_requirements: *70331050044240
134
+ version_requirements: *70128767148180
135
135
  description: Cobweb is a web crawler that can use resque to cluster crawls to quickly
136
136
  crawl extremely large sites which is much more performant than multi-threaded crawlers. It
137
137
  is also a standalone crawler that has a sophisticated statistics monitoring interface