cobweb 0.0.65 → 0.0.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
 
2
- h1. Cobweb v0.0.65
2
+ h1. Cobweb v0.0.66
3
+
3
4
  !https://secure.travis-ci.org/stewartmckee/cobweb.png?branch=master!
4
5
 
5
6
  h2. Intro
@@ -45,7 +45,7 @@ class Cobweb
45
45
  default_first_page_redirect_internal_to true
46
46
  default_text_mime_types_to ["text/*", "application/xhtml+xml"]
47
47
  default_obey_robots_to false
48
- default_user_agent_to "cobweb"
48
+ default_user_agent_to "cobweb/#{Cobweb.version} (ruby/#{RUBY_VERSION} nokogiri/#{Nokogiri::VERSION})"
49
49
 
50
50
  end
51
51
 
@@ -27,7 +27,7 @@ class CobwebCrawler
27
27
 
28
28
  @stats = Stats.new(@options.merge(:crawl_id => @crawl_id))
29
29
  if @options[:web_statistics]
30
- Server.start
30
+ Server.start(@options)
31
31
  end
32
32
 
33
33
  @cobweb = Cobweb.new(@options)
@@ -3,7 +3,7 @@ class CobwebVersion
3
3
 
4
4
  # Returns a string of the current version
5
5
  def self.version
6
- "0.0.65"
6
+ "0.0.66"
7
7
  end
8
8
 
9
9
  end
@@ -24,12 +24,11 @@ class CrawlJob
24
24
 
25
25
  @debug = content_request[:debug]
26
26
 
27
- decrement_queue_counter
28
-
29
27
  # check we haven't crawled this url before
30
28
  unless @redis.sismember "crawled", content_request[:url]
31
29
  # if there is no limit or we're still under it lets get the url
32
30
  if within_crawl_limits?(content_request[:crawl_limit])
31
+ puts "cbpl: #{content_request[:url]}" if content_request[:crawl_limit_by_page]
33
32
  content = Cobweb.new(content_request).get(content_request[:url], content_request)
34
33
  if content_request[:url] == @redis.get("original_base_url")
35
34
  @redis.set("crawled_base_url", content[:base_url])
@@ -78,7 +77,7 @@ class CrawlJob
78
77
  send_to_processing_queue(content, content_request)
79
78
 
80
79
  #if the enqueue counter has been requested update that
81
- if content_request.has_key? :enqueue_counter_key
80
+ if content_request.has_key?(:enqueue_counter_key)
82
81
  enqueue_redis = NamespacedRedis.new(content_request[:redis_options], content_request[:enqueue_counter_namespace].to_s)
83
82
  current_count = enqueue_redis.hget(content_request[:enqueue_counter_key], content_request[:enqueue_counter_field]).to_i
84
83
  enqueue_redis.hset(content_request[:enqueue_counter_key], content_request[:enqueue_counter_field], current_count+1)
@@ -89,14 +88,16 @@ class CrawlJob
89
88
 
90
89
  # update the queue and crawl counts -- doing this very late in the piece so that the following transaction all occurs at once.
91
90
  # really we should do this with a lock https://github.com/PatrickTulskie/redis-lock
91
+ # stewart: i'm looking at the layout of this, think that there is scope for cleaning up the perform method to be more DRY.
92
92
  if content_request[:crawl_limit_by_page]
93
93
  if content[:mime_type].match("text/html")
94
94
  increment_crawl_counter
95
+ ap "clbp: #{crawl_counter}"
95
96
  end
96
97
  else
97
98
  increment_crawl_counter
98
99
  end
99
- puts "Crawled: #{@crawl_counter} Limit: #{content_request[:crawl_limit]} Queued: #{@queue_counter} In Progress: #{@crawl_started_counter-@crawl_counter}" if @debug
100
+ puts "Crawled: #{crawl_counter} Limit: #{content_request[:crawl_limit]} Queued: #{queue_counter} In Progress: #{crawl_started_counter-crawl_counter}" if @debug
100
101
  end
101
102
  else
102
103
  puts "ignoring #{content_request[:url]} as mime_type is #{content[:mime_type]}" if content_request[:debug]
@@ -110,13 +111,15 @@ class CrawlJob
110
111
  puts "Already crawled #{content_request[:url]}" if content_request[:debug]
111
112
  end
112
113
 
114
+ decrement_queue_counter
115
+ puts content_request[:crawl_limit]
116
+ print_counters
113
117
  # if there's nothing left queued or the crawled limit has been reached
114
- refresh_counters
115
118
  if content_request[:crawl_limit].nil? || content_request[:crawl_limit] == 0
116
- if @queue_counter+@crawl_started_counter-@crawl_counter == 0
119
+ if queue_counter + crawl_started_counter - crawl_counter == 0
117
120
  finished(content_request)
118
121
  end
119
- elsif (@queue_counter +@crawl_started_counter-@crawl_counter)== 0 || @crawl_counter >= content_request[:crawl_limit].to_i
122
+ elsif (queue_counter+crawl_started_counter-crawl_counter)== 0 || crawl_counter >= content_request[:crawl_limit].to_i
120
123
  finished(content_request)
121
124
  end
122
125
 
@@ -167,14 +170,12 @@ class CrawlJob
167
170
 
168
171
  # Returns true if the crawl count is within limits
169
172
  def self.within_crawl_limits?(crawl_limit)
170
- refresh_counters
171
- crawl_limit.nil? or @crawl_started_counter < crawl_limit.to_i
173
+ crawl_limit.nil? or crawl_counter < crawl_limit.to_i
172
174
  end
173
175
 
174
176
  # Returns true if the queue count is calculated to be still within limits when complete
175
177
  def self.within_queue_limits?(crawl_limit)
176
- refresh_counters
177
- (@content_request[:crawl_limit_by_page]&& (crawl_limit.nil? or @crawl_counter < crawl_limit.to_i)) || within_crawl_limits?(crawl_limit) && (crawl_limit.nil? || (@queue_counter + @crawl_counter) < crawl_limit.to_i)
178
+ (@content_request[:crawl_limit_by_page]&& (crawl_limit.nil? or crawl_counter < crawl_limit.to_i)) || within_crawl_limits?(crawl_limit) && (crawl_limit.nil? || (queue_counter + crawl_counter) < crawl_limit.to_i)
178
179
  end
179
180
 
180
181
  # Sets the base url in redis. If the first page is a redirect, it sets the base_url to the destination
@@ -201,27 +202,27 @@ class CrawlJob
201
202
  # Increments the queue counter and refreshes crawl counters
202
203
  def self.increment_queue_counter
203
204
  @redis.incr "queue-counter"
204
- refresh_counters
205
205
  end
206
206
  # Increments the crawl counter and refreshes crawl counters
207
207
  def self.increment_crawl_counter
208
208
  @redis.incr "crawl-counter"
209
- refresh_counters
210
209
  end
211
210
  def self.increment_crawl_started_counter
212
211
  @redis.incr "crawl-started-counter"
213
- refresh_counters
214
212
  end
215
213
  # Decrements the queue counter and refreshes crawl counters
216
214
  def self.decrement_queue_counter
217
215
  @redis.decr "queue-counter"
218
- refresh_counters
219
216
  end
220
- # Refreshes the crawl counters
221
- def self.refresh_counters
222
- @crawl_counter = @redis.get("crawl-counter").to_i
223
- @crawl_started_counter = @redis.get("crawl-started-counter").to_i
224
- @queue_counter = @redis.get("queue-counter").to_i
217
+
218
+ def self.crawl_counter
219
+ @redis.get("crawl-counter").to_i
220
+ end
221
+ def self.crawl_started_counter
222
+ @redis.get("crawl-started-counter").to_i
223
+ end
224
+ def self.queue_counter
225
+ @redis.get("queue-counter").to_i
225
226
  end
226
227
 
227
228
  def self.print_counters
@@ -229,7 +230,7 @@ class CrawlJob
229
230
  end
230
231
 
231
232
  def self.counters
232
- "@crawl_counter: #{@crawl_counter} @crawl_started_counter: #{@crawl_started_counter} @queue_counter: #{@queue_counter}"
233
+ "crawl_counter: #{crawl_counter} crawl_started_counter: #{crawl_started_counter} queue_counter: #{queue_counter}"
233
234
  end
234
235
 
235
236
  end
@@ -10,17 +10,18 @@ class Server < Sinatra::Base
10
10
 
11
11
  # Sinatra Dashboard
12
12
  get '/' do
13
- @full_redis = Redis.new
14
-
13
+ @full_redis = Redis.new(redis_options)
15
14
  @colors = ["#00366f", "#006ba0", "#3F0BDB", "#396CB3"]
16
15
 
17
16
  @crawls = []
18
- @full_redis.smembers("cobweb_crawls").each do |crawl_id|
19
- redis = NamespacedRedis.new({}, "cobweb-#{Cobweb.version}-#{crawl_id}")
17
+ @full_redis.smembers("cobweb_crawls").each do |crawl_id|
18
+ version = cobweb_version(crawl_id)
19
+ redis = NamespacedRedis.new(redis_options, "cobweb-#{version}-#{crawl_id}")
20
20
  stats = HashUtil.deep_symbolize_keys({
21
- :crawl_details => redis.hgetall("crawl_details"),
21
+ :cobweb_version => version,
22
+ :crawl_details => redis.hgetall("crawl_details"),
22
23
  :statistics => redis.hgetall("statistics"),
23
- :minute_totals => redis.hgetall("minute_totals")
24
+ :minute_totals => redis.hgetall("minute_totals"),
24
25
  })
25
26
  @crawls << stats
26
27
  end
@@ -30,7 +31,9 @@ class Server < Sinatra::Base
30
31
 
31
32
  # Sinatra Crawl Detail
32
33
  get '/statistics/:crawl_id' do
33
- redis = NamespacedRedis.new({}, "cobweb-#{Cobweb.version}-#{params[:crawl_id]}")
34
+
35
+ version = cobweb_version(params[:crawl_id])
36
+ redis = NamespacedRedis.new(redis_options, "cobweb-#{version}-#{params[:crawl_id]}")
34
37
 
35
38
  @statistics = HashUtil.deep_symbolize_keys(redis.hgetall("statistics"))
36
39
  if @statistics[:status_counts].nil?
@@ -44,6 +47,7 @@ class Server < Sinatra::Base
44
47
  @statistics[:mime_counts] = JSON.parse(@statistics[:mime_counts])
45
48
  end
46
49
  @crawl = {
50
+ :cobweb_version => version,
47
51
  :statistics => @statistics,
48
52
  :crawl_details => HashUtil.deep_symbolize_keys(redis.hgetall("crawl_details")),
49
53
  :minute_totals => HashUtil.deep_symbolize_keys(redis.hgetall("minute_totals")),
@@ -56,22 +60,49 @@ class Server < Sinatra::Base
56
60
  :pages_count => HashUtil.deep_symbolize_keys(redis.hgetall("pages_count")),
57
61
  :assets_count => HashUtil.deep_symbolize_keys(redis.hgetall("assets_count"))
58
62
  }
59
- ap @crawl
63
+ (1..30).each do |minutes|
64
+ date = (DateTime.now.new_offset(0) - (minutes/1440.0)).strftime("%Y-%m-%d %H:%M").to_sym
65
+ end
66
+
60
67
  haml :statistics
61
68
  end
62
69
 
70
+ def cobweb_version(crawl_id)
71
+ redis = Redis.new(redis_options)
72
+ key = redis.keys("cobweb-*-#{crawl_id}-crawl_details").first
73
+ key =~ /cobweb-(.*?)-(.*?)-crawl_details/
74
+ cobweb_version = $1
75
+ end
76
+
77
+ def redis_options
78
+ Server.cobweb_options[:redis_options]
79
+ end
80
+
63
81
  # Starts the Sinatra server, and kills the processes when shutdown
64
- def self.start
82
+ def self.start(options={})
83
+ @options = options
84
+ @options[:redis_options] = {} unless @options.has_key? :redis_options
85
+ ap @options
65
86
  unless Server.running?
66
- thread = Thread.new do
67
- puts "Starting Sinatra"
87
+ if @options[:run_as_server]
88
+ puts "Starting Sinatra for cobweb v#{Cobweb.version}"
68
89
  Server.run!
69
90
  puts "Stopping crawl..."
70
- ## we need to manually kill the main thread as sinatra traps the interrupts
71
- Thread.main.kill
91
+ else
92
+ thread = Thread.new do
93
+ puts "Starting Sinatra"
94
+ Server.run!
95
+ puts "Stopping crawl..."
96
+ ## we need to manually kill the main thread as sinatra traps the interrupts
97
+ Thread.main.kill
98
+ end
72
99
  end
73
100
  end
74
- end
101
+ end
102
+
103
+ def self.cobweb_options
104
+ @options
105
+ end
75
106
 
76
107
  end
77
108
 
@@ -81,7 +112,6 @@ class Numeric
81
112
  #Returns a human readable format for a number representing a data size
82
113
  def to_human
83
114
  units = %w{B KB MB GB TB}
84
- ap self
85
115
  e = 0
86
116
  e = (Math.log(self)/Math.log(1024)).floor unless self==0
87
117
  s = "%.3f" % (to_f / 1024**e)
@@ -1,4 +1,3 @@
1
-
2
1
  # Stats class is the main statisitics hub for monitoring crawls. Either can be viewed through the Sinatra interface, or returned from the CobwebCrawler.crawl method or block
3
2
  class Stats
4
3
 
@@ -9,7 +9,7 @@ describe Cobweb, :local_only => true do
9
9
  # START WORKERS ONLY FOR CRAWL QUEUE SO WE CAN COUNT ENQUEUED PROCESS AND FINISH QUEUES
10
10
  puts "Starting Workers... Please Wait..."
11
11
  `mkdir log`
12
- io = IO.popen("nohup rake resque:workers PIDFILE=./tmp/pids/resque.pid COUNT=5 QUEUE=cobweb_crawl_job > log/output.log &")
12
+ io = IO.popen("nohup rake resque:workers PIDFILE=./tmp/pids/resque.pid COUNT=1 QUEUE=cobweb_crawl_job > log/output.log &")
13
13
  puts "Workers Started."
14
14
 
15
15
  end
@@ -17,61 +17,58 @@ describe Cobweb, :local_only => true do
17
17
  before(:each) do
18
18
  @base_url = "http://localhost:3532/"
19
19
  @base_page_count = 77
20
-
21
20
  clear_queues
22
21
  end
23
-
22
+
24
23
  describe "with no crawl limit" do
25
24
  before(:each) do
26
- @request = {
27
- :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
28
- :crawl_limit => nil,
29
- :quiet => false,
30
- :debug => false,
31
- :cache => nil
32
- }
33
- @cobweb = Cobweb.new @request
25
+ @request = {
26
+ :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
27
+ :crawl_limit => nil,
28
+ :quiet => false,
29
+ :debug => false,
30
+ :cache => nil
31
+ }
32
+ @cobweb = Cobweb.new @request
34
33
  end
35
-
34
+
36
35
  it "should crawl entire site" do
37
- crawl = @cobweb.start(@base_url)
38
- @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
39
- wait_for_crawl_finished crawl[:crawl_id]
40
- Resque.size("cobweb_process_job").should == @base_page_count
36
+ crawl = @cobweb.start(@base_url)
37
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
38
+ wait_for_crawl_finished crawl[:crawl_id]
39
+ Resque.size("cobweb_process_job").should == @base_page_count
41
40
  end
42
- it "detect crawl finished" do
43
- crawl = @cobweb.start(@base_url)
44
- @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
45
- wait_for_crawl_finished crawl[:crawl_id]
46
- Resque.size("cobweb_finished_job").should == 1
41
+ it "detect crawl finished once" do
42
+ crawl = @cobweb.start(@base_url)
43
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
44
+ wait_for_crawl_finished crawl[:crawl_id]
45
+ Resque.size("cobweb_finished_job").should == 1
47
46
  end
48
47
  end
49
- describe "with limited mime_types" do
50
- before(:each) do
51
- @request = {
52
- :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
53
- :quiet => true,
54
- :cache => nil,
55
- :valid_mime_types => ["text/html"]
56
- }
57
- @cobweb = Cobweb.new @request
58
- end
59
-
60
- it "should only crawl html pages" do
61
- crawl = @cobweb.start(@base_url)
62
- @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
63
- wait_for_crawl_finished crawl[:crawl_id]
64
- Resque.size("cobweb_process_job").should == 8
65
-
66
- mime_types = Resque.peek("cobweb_process_job", 0, 100).map{|job| job["args"][0]["mime_type"]}
67
- mime_types.count.should == 8
68
- mime_types.map{|m| m.should == "text/html"}
69
- mime_types.select{|m| m=="text/html"}.count.should == 8
70
-
71
-
72
- end
73
-
74
- end
48
+ describe "with limited mime_types" do
49
+ before(:each) do
50
+ @request = {
51
+ :crawl_id => Digest::SHA1.hexdigest("#{Time.now.to_i}.#{Time.now.usec}"),
52
+ :quiet => true,
53
+ :cache => nil,
54
+ :valid_mime_types => ["text/html"]
55
+ }
56
+ @cobweb = Cobweb.new @request
57
+ end
58
+
59
+ it "should only crawl html pages" do
60
+ crawl = @cobweb.start(@base_url)
61
+ @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
62
+ wait_for_crawl_finished crawl[:crawl_id]
63
+ Resque.size("cobweb_process_job").should == 8
64
+
65
+ mime_types = Resque.peek("cobweb_process_job", 0, 100).map{|job| job["args"][0]["mime_type"]}
66
+ mime_types.count.should == 8
67
+ mime_types.map{|m| m.should == "text/html"}
68
+ mime_types.select{|m| m=="text/html"}.count.should == 8
69
+ end
70
+
71
+ end
75
72
  describe "with a crawl limit" do
76
73
  before(:each) do
77
74
  @request = {
@@ -86,7 +83,7 @@ describe Cobweb, :local_only => true do
86
83
  @request[:crawl_limit] = 1
87
84
  @cobweb = Cobweb.new @request
88
85
  end
89
-
86
+
90
87
  it "should not crawl the entire site" do
91
88
  crawl = @cobweb.start(@base_url)
92
89
  @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
@@ -99,13 +96,12 @@ describe Cobweb, :local_only => true do
99
96
  wait_for_crawl_finished crawl[:crawl_id]
100
97
  Resque.size("cobweb_process_job").should == 1
101
98
  end
102
- it "should notify of crawl finished" do
99
+ it "should notify of crawl finished once" do
103
100
  crawl = @cobweb.start(@base_url)
104
101
  @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
105
102
  wait_for_crawl_finished crawl[:crawl_id]
106
103
  Resque.size("cobweb_finished_job").should == 1
107
- end
108
-
104
+ end
109
105
  end
110
106
 
111
107
  describe "for pages only" do
@@ -114,17 +110,17 @@ describe Cobweb, :local_only => true do
114
110
  @request[:crawl_limit] = 5
115
111
  @cobweb = Cobweb.new @request
116
112
  end
117
-
113
+
118
114
  it "should only use html pages towards the crawl limit" do
119
115
  crawl = @cobweb.start(@base_url)
120
116
  @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
121
117
  wait_for_crawl_finished crawl[:crawl_id]
122
118
  mime_types = Resque.peek("cobweb_process_job", 0, 200).map{|job| job["args"][0]["mime_type"]}
123
- mime_types.count.should == 70
119
+ Resque.peek("cobweb_process_job", 0, 200).count.should > 5
124
120
  mime_types.select{|m| m=="text/html"}.count.should == 5
125
121
  end
126
122
  end
127
-
123
+
128
124
  describe "limit to 10" do
129
125
  before(:each) do
130
126
  @request[:crawl_limit] = 10
@@ -137,7 +133,7 @@ describe Cobweb, :local_only => true do
137
133
  wait_for_crawl_finished crawl[:crawl_id]
138
134
  Resque.size("cobweb_process_job").should_not == @base_page_count
139
135
  end
140
- it "should notify of crawl finished" do
136
+ it "should notify of crawl finished once" do
141
137
  crawl = @cobweb.start(@base_url)
142
138
  @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
143
139
  wait_for_crawl_finished crawl[:crawl_id]
@@ -150,7 +146,7 @@ describe Cobweb, :local_only => true do
150
146
  Resque.size("cobweb_process_job").should == 10
151
147
  end
152
148
  end
153
-
149
+
154
150
  describe "limit to 100" do
155
151
  before(:each) do
156
152
  @request[:crawl_limit] = 100
@@ -163,7 +159,7 @@ describe Cobweb, :local_only => true do
163
159
  wait_for_crawl_finished crawl[:crawl_id]
164
160
  Resque.size("cobweb_process_job").should == @base_page_count
165
161
  end
166
- it "should notify of crawl finished" do
162
+ it "should notify of crawl finished once" do
167
163
  crawl = @cobweb.start(@base_url)
168
164
  @stat = Stats.new({:crawl_id => crawl[:crawl_id]})
169
165
  wait_for_crawl_finished crawl[:crawl_id]
@@ -175,12 +171,13 @@ describe Cobweb, :local_only => true do
175
171
  wait_for_crawl_finished crawl[:crawl_id]
176
172
  Resque.size("cobweb_process_job").should_not == 100
177
173
  end
178
- end
174
+ end
179
175
  end
180
176
 
181
177
  after(:all) do
178
+
182
179
  @all_processes = `ps aux | grep resque | grep -v grep | grep -v resque-web | awk '{print $2}'`.split("\n")
183
- command = "kill #{(@all_processes - @existing_processes).join(" ")}"
180
+ command = "kill -9 #{(@all_processes - @existing_processes).join(" ")}"
184
181
  IO.popen(command)
185
182
 
186
183
  clear_queues
@@ -210,6 +207,7 @@ def clear_queues
210
207
 
211
208
  Resque.size("cobweb_process_job").should == 0
212
209
  Resque.size("cobweb_finished_job").should == 0
210
+ Resque.peek("cobweb_process_job", 0, 200).should be_empty
213
211
  end
214
212
 
215
213
 
@@ -40,6 +40,7 @@
40
40
  %thead
41
41
  %tr
42
42
  %th Base URL
43
+ %th Cobweb Version
43
44
  %th Total Pages
44
45
  %th Total Assets
45
46
  %th Queued Objects
@@ -48,6 +49,7 @@
48
49
  %tr
49
50
  %td
50
51
  %a{:href => "/statistics/#{crawl[:crawl_details][:crawl_id]}"}= crawl[:crawl_details][:base_url]
52
+ %td= crawl[:cobweb_version]
51
53
  %td= crawl[:statistics][:page_count]
52
54
  %td= crawl[:statistics][:asset_count]
53
55
  %td= crawl[:statistics][:queue_counter]
@@ -40,7 +40,7 @@
40
40
 
41
41
  %tbody
42
42
  - (1..30).each do |minutes|
43
- - date = (DateTime.now - (minutes/1440.0)).strftime("%Y-%m-%d %H:%M").to_sym
43
+ - date = (DateTime.now.new_offset(0) - (minutes/1440.0)).strftime("%Y-%m-%d %H:%M").to_sym
44
44
  %tr
45
45
  %th= minutes
46
46
  %td= @crawl[:pages_count][date]
@@ -88,7 +88,7 @@
88
88
 
89
89
  %tbody
90
90
  - (1..30).each do |minutes|
91
- - date = (DateTime.now - (minutes/1440.0)).strftime("%Y-%m-%d %H:%M").to_sym
91
+ - date = (DateTime.now.new_offset(0) - (minutes/1440.0)).strftime("%Y-%m-%d %H:%M").to_sym
92
92
  %tr
93
93
  %th= minutes
94
94
  %td= @crawl[:status_200_count][date]
@@ -113,7 +113,7 @@
113
113
 
114
114
  %tbody
115
115
  - (1..30).each do |minutes|
116
- - date = (DateTime.now - (minutes/1440.0)).strftime("%Y-%m-%d %H:%M").to_sym
116
+ - date = (DateTime.now.new_offset(0) - (minutes/1440.0)).strftime("%Y-%m-%d %H:%M").to_sym
117
117
  %tr
118
118
  %th= minutes
119
119
  %td= @crawl[:mime_text_count][date]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cobweb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.65
4
+ version: 0.0.66
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-26 00:00:00.000000000 Z
12
+ date: 2012-09-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: resque
16
- requirement: &70202662972080 !ruby/object:Gem::Requirement
16
+ requirement: &70145280967560 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70202662972080
24
+ version_requirements: *70145280967560
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: redis
27
- requirement: &70202662971540 !ruby/object:Gem::Requirement
27
+ requirement: &70145280966480 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70202662971540
35
+ version_requirements: *70145280966480
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: nokogiri
38
- requirement: &70202662970680 !ruby/object:Gem::Requirement
38
+ requirement: &70145280965880 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *70202662970680
46
+ version_requirements: *70145280965880
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: addressable
49
- requirement: &70202662969640 !ruby/object:Gem::Requirement
49
+ requirement: &70145280964660 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *70202662969640
57
+ version_requirements: *70145280964660
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &70202662968800 !ruby/object:Gem::Requirement
60
+ requirement: &70145280964040 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :runtime
67
67
  prerelease: false
68
- version_requirements: *70202662968800
68
+ version_requirements: *70145280964040
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: awesome_print
71
- requirement: &70202662967880 !ruby/object:Gem::Requirement
71
+ requirement: &70145280963260 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
- version_requirements: *70202662967880
79
+ version_requirements: *70145280963260
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: sinatra
82
- requirement: &70202662966760 !ruby/object:Gem::Requirement
82
+ requirement: &70145280962560 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: '0'
88
88
  type: :runtime
89
89
  prerelease: false
90
- version_requirements: *70202662966760
90
+ version_requirements: *70145280962560
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: thin
93
- requirement: &70202662961760 !ruby/object:Gem::Requirement
93
+ requirement: &70145280961780 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - ! '>='
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: '0'
99
99
  type: :runtime
100
100
  prerelease: false
101
- version_requirements: *70202662961760
101
+ version_requirements: *70145280961780
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: haml
104
- requirement: &70202662960800 !ruby/object:Gem::Requirement
104
+ requirement: &70145280960840 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ! '>='
@@ -109,10 +109,10 @@ dependencies:
109
109
  version: '0'
110
110
  type: :runtime
111
111
  prerelease: false
112
- version_requirements: *70202662960800
112
+ version_requirements: *70145280960840
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: namespaced_redis
115
- requirement: &70202662958560 !ruby/object:Gem::Requirement
115
+ requirement: &70145280960100 !ruby/object:Gem::Requirement
116
116
  none: false
117
117
  requirements:
118
118
  - - ! '>='
@@ -120,7 +120,7 @@ dependencies:
120
120
  version: 1.0.2
121
121
  type: :runtime
122
122
  prerelease: false
123
- version_requirements: *70202662958560
123
+ version_requirements: *70145280960100
124
124
  description: Cobweb is a web crawler that can use resque to cluster crawls to quickly
125
125
  crawl extremely large sites which is much more perofmant than multi-threaded crawlers. It
126
126
  is also a standalone crawler that has a sophisticated statistics monitoring interface