cobweb 0.0.65 → 0.0.66

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,6 @@
1
1
 
2
- h1. Cobweb v0.0.65
2
+ h1. Cobweb v0.0.66
3
+
3
4
  !https://secure.travis-ci.org/stewartmckee/cobweb.png?branch=master!
4
5
 
5
6
  h2. Intro
@@ -45,7 +45,7 @@ class Cobweb
45
45
  default_first_page_redirect_internal_to true
46
46
  default_text_mime_types_to ["text/*", "application/xhtml+xml"]
47
47
  default_obey_robots_to false
48
- default_user_agent_to "cobweb"
48
+ default_user_agent_to "cobweb/#{Cobweb.version} (ruby/#{RUBY_VERSION} nokogiri/#{Nokogiri::VERSION})"
49
49
 
50
50
  end
51
51
 
@@ -27,7 +27,7 @@ class CobwebCrawler
27
27
 
28
28
  @stats = Stats.new(@options.merge(:crawl_id => @crawl_id))
29
29
  if @options[:web_statistics]
30
- Server.start
30
+ Server.start(@options)
31
31
  end
32
32
 
33
33
  @cobweb = Cobweb.new(@options)
@@ -3,7 +3,7 @@ class CobwebVersion
3
3
 
4
4
  # Returns a string of the current version
5
5
  def self.version
6
- "0.0.65"
6
+ "0.0.66"
7
7
  end
8
8
 
9
9
  end
@@ -24,12 +24,11 @@ class CrawlJob
24
24
 
25
25
  @debug = content_request[:debug]
26
26
 
27
- decrement_queue_counter
28
-
29
27
  # check we haven't crawled this url before
30
28
  unless @redis.sismember "crawled", content_request[:url]
31
29
  # if there is no limit or we're still under it lets get the url
32
30
  if within_crawl_limits?(content_request[:crawl_limit])
31
+ puts "cbpl: #{content_request[:url]}" if content_request[:crawl_limit_by_page]
33
32
  content = Cobweb.new(content_request).get(content_request[:url], content_request)
34
33
  if content_request[:url] == @redis.get("original_base_url")
35
34
  @redis.set("crawled_base_url", content[:base_url])
@@ -78,7 +77,7 @@ class CrawlJob
78
77
  send_to_processing_queue(content, content_request)
79
78
 
80
79
  #if the enqueue counter has been requested update that
81
- if content_request.has_key? :enqueue_counter_key
80
+ if content_request.has_key?(:enqueue_counter_key)
82
81
  enqueue_redis = NamespacedRedis.new(content_request[:redis_options], content_request[:enqueue_counter_namespace].to_s)
83
82
  current_count = enqueue_redis.hget(content_request[:enqueue_counter_key], content_request[:enqueue_counter_field]).to_i
84
83
  enqueue_redis.hset(content_request[:enqueue_counter_key], content_request[:enqueue_counter_field], current_count+1)
@@ -89,14 +88,16 @@ class CrawlJob
89
88
 
90
89
  # update the queue and crawl counts -- doing this very late in the piece so that the following transaction all occurs at once.
91
90
  # really we should do this with a lock https://github.com/PatrickTulskie/redis-lock
91
+ # stewart: i'm looking at the layout of this, think that there is scope for cleaning up the perform method to be more DRY.
92
92
  if content_request[:crawl_limit_by_page]
93
93
  if content[:mime_type].match("text/html")
94
94
  increment_crawl_counter
95
+ ap "clbp: #{crawl_counter}"
95
96
  end
96
97
  else
97
98
  increment_crawl_counter
98
99
  end
99
- puts "Crawled: #{@crawl_counter} Limit: #{content_request[:crawl_limit]} Queued: #{@queue_counter} In Progress: #{@crawl_started_counter-@crawl_counter}" if @debug
100
+ puts "Crawled: #{crawl_counter} Limit: #{content_request[:crawl_limit]} Queued: #{queue_counter} In Progress: #{crawl_started_counter-crawl_counter}" if @debug
100
101
  end
101
102
  else
102
103
  puts "ignoring #{content_request[:url]} as mime_type is #{content[:mime_type]}" if content_request[:debug]
@@ -110,13 +111,15 @@ class CrawlJob
110
111
  puts "Already crawled #{content_request[:url]}" if content_request[:debug]
111
112
  end
112
113
 
114
+ decrement_queue_counter
115
+ puts content_request[:crawl_limit]
116
+ print_counters
113
117
  # if there's nothing left queued or the crawled limit has been reached
114
- refresh_counters
115
118
  if content_request[:crawl_limit].nil? || content_request[:crawl_limit] == 0
116
- if @queue_counter+@crawl_started_counter-@crawl_counter == 0
119
+ if queue_counter + crawl_started_counter - crawl_counter == 0
117
120
  finished(content_request)
118
121
  end
119
- elsif (@queue_counter +@crawl_started_counter-@crawl_counter)== 0 || @crawl_counter >= content_request[:crawl_limit].to_i
122
+ elsif (queue_counter+crawl_started_counter-crawl_counter)== 0 || crawl_counter >= content_request[:crawl_limit].to_i
120
123
  finished(content_request)
121
124
  end
122
125
 
@@ -167,14 +170,12 @@ class CrawlJob
167
170
 
168
171
  # Returns true if the crawl count is within limits
169
172
  def self.within_crawl_limits?(crawl_limit)
170
- refresh_counters
171
- crawl_limit.nil? or @crawl_started_counter < crawl_limit.to_i
173
+ crawl_limit.nil? or crawl_counter < crawl_limit.to_i
172
174
  end
173
175
 
174
176
  # Returns true if the queue count is calculated to be still within limits when complete
175
177
  def self.within_queue_limits?(crawl_limit)
176
- refresh_counters
177
- (@content_request[:crawl_limit_by_page]&& (crawl_limit.nil? or @crawl_counter < crawl_limit.to_i)) || within_crawl_limits?(crawl_limit) && (crawl_limit.nil? || (@queue_counter + @crawl_counter) < crawl_limit.to_i)
178
+ (@content_request[:crawl_limit_by_page]&& (crawl_limit.nil? or crawl_counter < crawl_limit.to_i)) || within_crawl_limits?(crawl_limit) && (crawl_limit.nil? || (queue_counter + crawl_counter) < crawl_limit.to_i)
178
179
  end
179
180
 
180
181
  # Sets the base url in redis. If the first page is a redirect, it sets the base_url to the destination
@@ -201,27 +202,27 @@ class CrawlJob
201
202
  # Increments the queue counter and refreshes crawl counters
202
203
  def self.increment_queue_counter
203
204
  @redis.incr "queue-counter"
204
- refresh_counters
205
205
  end
206
206
  # Increments the crawl counter and refreshes crawl counters
207
207
  def self.increment_crawl_counter
208
208
  @redis.incr "crawl-counter"
209
- refresh_counters
210
209
  end
211
210
  def self.increment_crawl_started_counter
212
211
  @redis.incr "crawl-started-counter"
213
- refresh_counters
214
212
  end
215
213
  # Decrements the queue counter and refreshes crawl counters
216
214
  def self.decrement_queue_counter
217
215
  @redis.decr "queue-counter"
218
- refresh_counters
219
216
  end
220
- # Refreshes the crawl counters
221
- def self.refresh_counters
222
- @crawl_counter = @redis.get("crawl-counter").to_i
223
- @crawl_started_counter = @redis.get("crawl-started-counter").to_i
224
- @queue_counter = @redis.get("queue-counter").to_i
217
+
218
+ def self.crawl_counter
219
+ @redis.get("crawl-counter").to_i
220
+ end
221
+ def self.crawl_started_counter
222
+ @redis.get("crawl-started-counter").to_i
223
+ end
224
+ def self.queue_counter
225
+ @redis.get("queue-counter").to_i
225
226
  end
226
227
 
227
228
  def self.print_counters
@@ -229,7 +230,7 @@ class CrawlJob
229
230
  end
230
231
 
231
232
  def self.counters
232
- "@crawl_counter: #{@crawl_counter} @crawl_started_counter: #{@crawl_started_counter} @queue_counter: #{@queue_counter}"
233
+ "crawl_counter: #{crawl_counter} crawl_started_counter: #{crawl_started_counter} queue_counter: #{queue_counter}"
233
234
  end
234
235
 
235
236
  end
@@ -10,17 +10,18 @@ class Server < Sinatra::Base
10
10
 
11
11
  # Sinatra Dashboard
12
12
  get '/' do
13
- @full_redis = Redis.new
14
-
13
+ @full_redis = Redis.new(redis_options)
15
14
  @colors = ["#00366f", "#006ba0", "#3F0BDB", "#396CB3"]
16
15
 
17
16
  @crawls = []
18
- @full_redis.smembers("cobweb_crawls").each do |crawl_id|
19
- redis = NamespacedRedis.new({}, "cobweb-#{Cobweb.version}-#{crawl_id}")
17
+ @full_redis.smembers("cobweb_crawls").each do |crawl_id|
18
+ version = cobweb_version(crawl_id)
19
+ redis = NamespacedRedis.new(redis_options, "cobweb-#{version}-#{crawl_id}")
20
20
  stats = HashUtil.deep_symbolize_keys({
21
- :crawl_details => redis.hgetall("crawl_details"),
21
+ :cobweb_version => version,
22
+ :crawl_details => redis.hgetall("crawl_details"),
22
23
  :statistics => redis.hgetall("statistics"),
23
- :minute_totals => redis.hgetall("minute_totals")
24
+ :minute_totals => redis.hgetall("minute_totals"),
24
25
  })
25
26
  @crawls << stats
26
27
  end
@@ -30,7 +31,9 @@ class Server < Sinatra::Base
30
31
 
31
32
  # Sinatra Crawl Detail
32
33
  get '/statistics/:crawl_id' do
33
- redis = NamespacedRedis.new({}, "cobweb-#{Cobweb.version}-#{params[:crawl_id]}")
34
+
35
+ version = cobweb_version(params[:crawl_id])
36
+ redis = NamespacedRedis.new(redis_options, "cobweb-#{version}-#{params[:crawl_id]}")
34
37
 
35
38
  @statistics = HashUtil.deep_symbolize_keys(redis.hgetall("statistics"))
36
39
  if @statistics[:status_counts].nil?
@@ -44,6 +47,7 @@ class Server < Sinatra::Base
44
47
  @statistics[:mime_counts] = JSON.parse(@statistics[:mime_counts])
45
48
  end
46
49
  @crawl = {
50
+ :cobweb_version => version,
47
51
  :statistics => @statistics,
48
52
  :crawl_details => HashUtil.deep_symbolize_keys(redis.hgetall("crawl_details")),
49
53
  :minute_totals => HashUtil.deep_symbolize_keys(redis.hgetall("minute_totals")),
@@ -56,22 +60,49 @@ class Server < Sinatra::Base
56
60
  :pages_count => HashUtil.deep_symbolize_keys(redis.hgetall("pages_count")),
57
61
  :assets_count => HashUtil.deep_symbolize_keys(redis.hgetall("assets_count"))
58
62
  }
59
- ap @crawl
63
+ (1..30).each do |minutes|
64
+ date = (DateTime.now.new_offset(0) - (minutes/1440.0)).strftime("%Y-%m-%d %H:%M").to_sym
65
+ end
66
+
60
67
  haml :statistics
61
68
  end
62
69
 
70
+ def cobweb_version(crawl_id)
71
+ redis = Redis.new(redis_options)
72
+ key = redis.keys("cobweb-*-#{crawl_id}-crawl_details").first
73
+ key =~ /cobweb-(.*?)-(.*?)-crawl_details/
74
+ cobweb_version = $1
75
+ end
76
+
77
+ def redis_options
78
+ Server.cobweb_options[:redis_options]
79
+ end
80
+
63
81
  # Starts the Sinatra server, and kills the processes when shutdown
64
- def self.start
82
+ def self.start(options={})
83
+ @options = options
84
+ @options[:redis_options] = {} unless @options.has_key? :redis_options
85
+ ap @options
65
86
  unless Server.running?
66
- thread = Thread.new do
67
- puts "Starting Sinatra"
87
+ if @options[:run_as_server]
88
+ puts "Starting Sinatra for cobweb v#{Cobweb.version}"
68
89
  Server.run!
69
90
  puts "Stopping crawl..."
70
- ## we need to manually kill the main thread as sinatra traps the interrupts
71
- Thread.main.kill
91
+ else
92
+ thread = Thread.new do
93
+ puts "Starting Sinatra"
94
+ Server.run!
95
+ puts "Stopping crawl..."
96
+ ## we need to manually kill the main thread as sinatra traps the interrupts
97
+ Thread.main.kill
98
+ end
72
99
  end
73
100
  end
74
- end
101
+ end
102
+
103
+ def self.cobweb_options
104
+ @options
105
+ end
75
106
 
76
107
  end
77
108
 
@@ -81,7 +112,6 @@ class Numeric
81
112
  #Returns a human readable format for a number representing a data size
82
113
  def to_human
83
114
  units = %w{B KB MB GB TB}
84
- ap self
85
115
  e = 0
86
116
  e = (Math.log(self)/Math.log(1024)).floor unless self==0
87
117
  s = "%.3f" % (to_f / 1024**e)
@@ -1,4 +1,3 @@
1
-
2
1
  # Stats class is the main statisitics hub for monitoring crawls. Either can be viewed through the Sinatra interface, or returned from the CobwebCrawler.crawl method or block
3
2
  class Stats
4
3
 
@@ -9,7 +9,7 @@ describe Cobweb, :local_only => true do
9
9
  # START WORKERS ONLY FOR CRAWL QUEUE SO WE CAN COUNT ENQUEUED PROCESS AND FINISH QUEUES
10
10
  puts "Starting Workers... Please Wait..."
11
11
  `mkdir log`
12
- io = IO.popen("nohup rake resque:workers PIDFILE=./tmp/pids/resque.pid COUNT=5 QUEUE=cobweb_crawl_job > log/output.log &")
12
+ io = IO.popen("nohup rake resque:workers PIDFILE=./tmp/pids/resque.pid COUNT=1 QUEUE=cobweb_crawl_job > log/output.log &")
13
13
  puts "Workers Started."
14
14
 
15
15
  end
@@ -17,61 +17,58 @@ describe Cobweb, :local_only => true do
17
17
  before(:each) do
18
18
  @base_url = "http://localhost:3532/"
19
19
  @base_page_count = 77
20
-
21
20
  clear_queues
22
21
  end
23
-
22
+
24
23
  describe "with no crawl limit" do
25
24
  before(:each) do
26
- @request = {
27
- :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
28
- :crawl_limit => nil,
29
- :quiet => false,
30
- :debug => false,
31
- :cache => nil
32
- }
33
- @cobweb = Cobweb.new @request
25
+ @request = {
26
+ :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
27
+ :crawl_limit => nil,
28
+ :quiet => false,
29
+ :debug => false,
30
+ :cache => nil
31
+ }
32
+ @cobweb = Cobweb.new @request
34
33
  end
35
-
34
+
36
35
  it "should crawl entire site" do
37
- crawl = @cobweb.start(@base_url)
38
- @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
39
- wait_for_crawl_finished crawl[:crawl_id]
40
- Resque.size("cobweb_process_job").should == @base_page_count
36
+ crawl = @cobweb.start(@base_url)
37
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
38
+ wait_for_crawl_finished crawl[:crawl_id]
39
+ Resque.size("cobweb_process_job").should == @base_page_count
41
40
  end
42
- it "detect crawl finished" do
43
- crawl = @cobweb.start(@base_url)
44
- @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
45
- wait_for_crawl_finished crawl[:crawl_id]
46
- Resque.size("cobweb_finished_job").should == 1
41
+ it "detect crawl finished once" do
42
+ crawl = @cobweb.start(@base_url)
43
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
44
+ wait_for_crawl_finished crawl[:crawl_id]
45
+ Resque.size("cobweb_finished_job").should == 1
47
46
  end
48
47
  end
49
- describe "with limited mime_types" do
50
- before(:each) do
51
- @request = {
52
- :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
53
- :quiet => true,
54
- :cache => nil,
55
- :valid_mime_types => ["text/html"]
56
- }
57
- @cobweb = Cobweb.new @request
58
- end
59
-
60
- it "should only crawl html pages" do
61
- crawl = @cobweb.start(@base_url)
62
- @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
63
- wait_for_crawl_finished crawl[:crawl_id]
64
- Resque.size("cobweb_process_job").should == 8
65
-
66
- mime_types = Resque.peek("cobweb_process_job", 0, 100).map{|job| job["args"][0]["mime_type"]}
67
- mime_types.count.should == 8
68
- mime_types.map{|m| m.should == "text/html"}
69
- mime_types.select{|m| m=="text/html"}.count.should == 8
70
-
71
-
72
- end
73
-
74
- end
48
+ describe "with limited mime_types" do
49
+ before(:each) do
50
+ @request = {
51
+ :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
52
+ :quiet => true,
53
+ :cache => nil,
54
+ :valid_mime_types => ["text/html"]
55
+ }
56
+ @cobweb = Cobweb.new @request
57
+ end
58
+
59
+ it "should only crawl html pages" do
60
+ crawl = @cobweb.start(@base_url)
61
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
62
+ wait_for_crawl_finished crawl[:crawl_id]
63
+ Resque.size("cobweb_process_job").should == 8
64
+
65
+ mime_types = Resque.peek("cobweb_process_job", 0, 100).map{|job| job["args"][0]["mime_type"]}
66
+ mime_types.count.should == 8
67
+ mime_types.map{|m| m.should == "text/html"}
68
+ mime_types.select{|m| m=="text/html"}.count.should == 8
69
+ end
70
+
71
+ end
75
72
  describe "with a crawl limit" do
76
73
  before(:each) do
77
74
  @request = {
@@ -86,7 +83,7 @@ describe Cobweb, :local_only => true do
86
83
  @request[:crawl_limit] = 1
87
84
  @cobweb = Cobweb.new @request
88
85
  end
89
-
86
+
90
87
  it "should not crawl the entire site" do
91
88
  crawl = @cobweb.start(@base_url)
92
89
  @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
@@ -99,13 +96,12 @@ describe Cobweb, :local_only => true do
99
96
  wait_for_crawl_finished crawl[:crawl_id]
100
97
  Resque.size("cobweb_process_job").should == 1
101
98
  end
102
- it "should notify of crawl finished" do
99
+ it "should notify of crawl finished once" do
103
100
  crawl = @cobweb.start(@base_url)
104
101
  @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
105
102
  wait_for_crawl_finished crawl[:crawl_id]
106
103
  Resque.size("cobweb_finished_job").should == 1
107
- end
108
-
104
+ end
109
105
  end
110
106
 
111
107
  describe "for pages only" do
@@ -114,17 +110,17 @@ describe Cobweb, :local_only => true do
114
110
  @request[:crawl_limit] = 5
115
111
  @cobweb = Cobweb.new @request
116
112
  end
117
-
113
+
118
114
  it "should only use html pages towards the crawl limit" do
119
115
  crawl = @cobweb.start(@base_url)
120
116
  @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
121
117
  wait_for_crawl_finished crawl[:crawl_id]
122
118
  mime_types = Resque.peek("cobweb_process_job", 0, 200).map{|job| job["args"][0]["mime_type"]}
123
- mime_types.count.should == 70
119
+ Resque.peek("cobweb_process_job", 0, 200).count.should > 5
124
120
  mime_types.select{|m| m=="text/html"}.count.should == 5
125
121
  end
126
122
  end
127
-
123
+
128
124
  describe "limit to 10" do
129
125
  before(:each) do
130
126
  @request[:crawl_limit] = 10
@@ -137,7 +133,7 @@ describe Cobweb, :local_only => true do
137
133
  wait_for_crawl_finished crawl[:crawl_id]
138
134
  Resque.size("cobweb_process_job").should_not == @base_page_count
139
135
  end
140
- it "should notify of crawl finished" do
136
+ it "should notify of crawl finished once" do
141
137
  crawl = @cobweb.start(@base_url)
142
138
  @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
143
139
  wait_for_crawl_finished crawl[:crawl_id]
@@ -150,7 +146,7 @@ describe Cobweb, :local_only => true do
150
146
  Resque.size("cobweb_process_job").should == 10
151
147
  end
152
148
  end
153
-
149
+
154
150
  describe "limit to 100" do
155
151
  before(:each) do
156
152
  @request[:crawl_limit] = 100
@@ -163,7 +159,7 @@ describe Cobweb, :local_only => true do
163
159
  wait_for_crawl_finished crawl[:crawl_id]
164
160
  Resque.size("cobweb_process_job").should == @base_page_count
165
161
  end
166
- it "should notify of crawl finished" do
162
+ it "should notify of crawl finished once" do
167
163
  crawl = @cobweb.start(@base_url)
168
164
  @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
169
165
  wait_for_crawl_finished crawl[:crawl_id]
@@ -175,12 +171,13 @@ describe Cobweb, :local_only => true do
175
171
  wait_for_crawl_finished crawl[:crawl_id]
176
172
  Resque.size("cobweb_process_job").should_not == 100
177
173
  end
178
- end
174
+ end
179
175
  end
180
176
 
181
177
  after(:all) do
178
+
182
179
  @all_processes = `ps aux | grep resque | grep -v grep | grep -v resque-web | awk '{print $2}'`.split("\n")
183
- command = "kill #{(@all_processes - @existing_processes).join(" ")}"
180
+ command = "kill -9 #{(@all_processes - @existing_processes).join(" ")}"
184
181
  IO.popen(command)
185
182
 
186
183
  clear_queues
@@ -210,6 +207,7 @@ def clear_queues
210
207
 
211
208
  Resque.size("cobweb_process_job").should == 0
212
209
  Resque.size("cobweb_finished_job").should == 0
210
+ Resque.peek("cobweb_process_job", 0, 200).should be_empty
213
211
  end
214
212
 
215
213
 
@@ -40,6 +40,7 @@
40
40
  %thead
41
41
  %tr
42
42
  %th Base URL
43
+ %th Cobweb Version
43
44
  %th Total Pages
44
45
  %th Total Assets
45
46
  %th Queued Objects
@@ -48,6 +49,7 @@
48
49
  %tr
49
50
  %td
50
51
  %a{:href => "/statistics/#{crawl[:crawl_details][:crawl_id]}"}= crawl[:crawl_details][:base_url]
52
+ %td= crawl[:cobweb_version]
51
53
  %td= crawl[:statistics][:page_count]
52
54
  %td= crawl[:statistics][:asset_count]
53
55
  %td= crawl[:statistics][:queue_counter]
@@ -40,7 +40,7 @@
40
40
 
41
41
  %tbody
42
42
  - (1..30).each do |minutes|
43
- - date = (DateTime.now - (minutes/1440.0)).strftime("%Y-%m-%d %H:%M").to_sym
43
+ - date = (DateTime.now.new_offset(0) - (minutes/1440.0)).strftime("%Y-%m-%d %H:%M").to_sym
44
44
  %tr
45
45
  %th= minutes
46
46
  %td= @crawl[:pages_count][date]
@@ -88,7 +88,7 @@
88
88
 
89
89
  %tbody
90
90
  - (1..30).each do |minutes|
91
- - date = (DateTime.now - (minutes/1440.0)).strftime("%Y-%m-%d %H:%M").to_sym
91
+ - date = (DateTime.now.new_offset(0) - (minutes/1440.0)).strftime("%Y-%m-%d %H:%M").to_sym
92
92
  %tr
93
93
  %th= minutes
94
94
  %td= @crawl[:status_200_count][date]
@@ -113,7 +113,7 @@
113
113
 
114
114
  %tbody
115
115
  - (1..30).each do |minutes|
116
- - date = (DateTime.now - (minutes/1440.0)).strftime("%Y-%m-%d %H:%M").to_sym
116
+ - date = (DateTime.now.new_offset(0) - (minutes/1440.0)).strftime("%Y-%m-%d %H:%M").to_sym
117
117
  %tr
118
118
  %th= minutes
119
119
  %td= @crawl[:mime_text_count][date]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cobweb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.65
4
+ version: 0.0.66
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-26 00:00:00.000000000 Z
12
+ date: 2012-09-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: resque
16
- requirement: &70202662972080 !ruby/object:Gem::Requirement
16
+ requirement: &70145280967560 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70202662972080
24
+ version_requirements: *70145280967560
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: redis
27
- requirement: &70202662971540 !ruby/object:Gem::Requirement
27
+ requirement: &70145280966480 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70202662971540
35
+ version_requirements: *70145280966480
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: nokogiri
38
- requirement: &70202662970680 !ruby/object:Gem::Requirement
38
+ requirement: &70145280965880 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70202662970680
46
+ version_requirements: *70145280965880
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: addressable
49
- requirement: &70202662969640 !ruby/object:Gem::Requirement
49
+ requirement: &70145280964660 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70202662969640
57
+ version_requirements: *70145280964660
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &70202662968800 !ruby/object:Gem::Requirement
60
+ requirement: &70145280964040 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70202662968800
68
+ version_requirements: *70145280964040
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: awesome_print
71
- requirement: &70202662967880 !ruby/object:Gem::Requirement
71
+ requirement: &70145280963260 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70202662967880
79
+ version_requirements: *70145280963260
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: sinatra
82
- requirement: &70202662966760 !ruby/object:Gem::Requirement
82
+ requirement: &70145280962560 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: '0'
88
88
  type: :runtime
89
89
  prerelease: false
90
- version_requirements: *70202662966760
90
+ version_requirements: *70145280962560
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: thin
93
- requirement: &70202662961760 !ruby/object:Gem::Requirement
93
+ requirement: &70145280961780 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: '0'
99
99
  type: :runtime
100
100
  prerelease: false
101
- version_requirements: *70202662961760
101
+ version_requirements: *70145280961780
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: haml
104
- requirement: &70202662960800 !ruby/object:Gem::Requirement
104
+ requirement: &70145280960840 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ! '>='
@@ -109,10 +109,10 @@ dependencies:
109
109
  version: '0'
110
110
  type: :runtime
111
111
  prerelease: false
112
- version_requirements: *70202662960800
112
+ version_requirements: *70145280960840
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: namespaced_redis
115
- requirement: &70202662958560 !ruby/object:Gem::Requirement
115
+ requirement: &70145280960100 !ruby/object:Gem::Requirement
116
116
  none: false
117
117
  requirements:
118
118
  - - ! '>='
@@ -120,7 +120,7 @@ dependencies:
120
120
  version: 1.0.2
121
121
  type: :runtime
122
122
  prerelease: false
123
- version_requirements: *70202662958560
123
+ version_requirements: *70145280960100
124
124
  description: Cobweb is a web crawler that can use resque to cluster crawls to quickly
125
125
  crawl extremely large sites which is much more perofmant than multi-threaded crawlers. It
126
126
  is also a standalone crawler that has a sophisticated statistics monitoring interface