cobweb 1.0.20 → 1.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +5 -13
  2. data/lib/cobweb_version.rb +2 -2
  3. data/lib/crawl_helper.rb +22 -22
  4. metadata +203 -203
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- MDE5MzU3NzI2MTRhYzM5NzIwMDZlMTJjMTg5NzNiMzAyMjFkMjcxOQ==
5
- data.tar.gz: !binary |-
6
- MWE3ZTAwYjExZjc4NzU2MDYzOTlhOTQwMTNlNTcyZjNmZTYwNmU3Zg==
2
+ SHA1:
3
+ metadata.gz: f7c3816549392f4fa31701ae65bff51fbe22db89
4
+ data.tar.gz: a8cec8a17ec20f31a85980f75cb790331a6be16d
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- N2U2MDk1MmI3ZTU3OTFmZDI5YjY4YTEyNDNkMTE4MmJjOTFkNTZiYzNhY2Q4
10
- Mjk0ZjM1YThhNzhkMGNjNjJiZTJkNjM1OWQ1MGMzZmVlMDI5MzUyOTU5YTRk
11
- NzEzZjBiZjM2OTUxZTc2NzZjZDIyOWQ4ZmVlYzYyOGViMDIyYzY=
12
- data.tar.gz: !binary |-
13
- ZTJiYmRlNDY0M2FkNTdlN2I0ZjNiODYxOGQyN2MxZGZlMGViMWIxZDA4YmY1
14
- ZDc2ZDU3NDc4ODg1YmExYjFmYjMyY2U0MDU4MGQ0OTJkZjRmNjAyYmQ3NWVl
15
- ODY0ZTE5MGUzNzAzZWFlMzdmZmY1YzNhMmEzNWE1NzVkYzAwZDE=
6
+ metadata.gz: b2b172dd7f45efb8b5eccacad67b35683ee1f0867f8bfc423b5b8a91ed3b3cac22e5b2343a96d4a8bfdfe9426bf4461ced4385ea96e7bc850e80e3de4b0ce976
7
+ data.tar.gz: 2bace4df48372e0253973e7600e8d48ad06ab12a948723a5850620bfcb31efffba2fdaf6f36ae5502c054d8457a1dec9a23675636ce9a4c4474cf5b3086f6697
@@ -1,9 +1,9 @@
1
1
  # CobwebVersion holds the current version of the gem
2
2
  class CobwebVersion
3
-
3
+
4
4
  # Returns a string of the current version
5
5
  def self.version
6
- "1.0.20"
6
+ "1.0.21"
7
7
  end
8
8
 
9
9
  end
data/lib/crawl_helper.rb CHANGED
@@ -1,27 +1,27 @@
1
1
  class CrawlHelper
2
2
 
3
- require "net/https"
3
+ require "net/https"
4
4
  require "uri"
5
5
  require "redis"
6
- require 'namespaced_redis'
7
-
6
+ require "redis-namespace"
7
+
8
8
  def self.crawl_page(content_request)
9
9
  # change all hash keys to symbols
10
10
  content_request = HashUtil.deep_symbolize_keys(content_request)
11
11
  @content_request = content_request
12
-
12
+
13
13
  content_request[:redis_options] = {} unless content_request.has_key? :redis_options
14
14
  content_request[:crawl_limit_by_page] = false unless content_request.has_key? :crawl_limit_by_page
15
15
  content_request[:valid_mime_types] = ["*/*"] unless content_request.has_key? :valid_mime_types
16
16
  content_request[:queue_system] = content_request[:queue_system].to_sym
17
-
17
+
18
18
  @redis = NamespacedRedisConnection.new(content_request[:redis_options], "cobweb-#{Cobweb.version}-#{content_request[:crawl_id]}")
19
19
  @stats = Stats.new(content_request)
20
-
20
+
21
21
  @debug = content_request[:debug]
22
-
22
+
23
23
  decrement_queue_counter
24
-
24
+
25
25
  # check we haven't crawled this url before
26
26
  unless @redis.sismember "crawled", content_request[:url]
27
27
  # if there is no limit or we're still under it lets get the url
@@ -99,12 +99,12 @@ class CrawlHelper
99
99
  else
100
100
  puts "ignoring #{content_request[:url]} as outside of crawl limits." if content_request[:debug]
101
101
  end
102
-
102
+
103
103
  else
104
104
  @redis.srem "queued", content_request[:url]
105
105
  puts "Already crawled #{content_request[:url]}" if content_request[:debug]
106
106
  end
107
-
107
+
108
108
  # if there's nothing left queued or the crawled limit has been reached
109
109
  refresh_counters
110
110
  if content_request[:crawl_limit].nil? || content_request[:crawl_limit] == 0
@@ -114,7 +114,7 @@ class CrawlHelper
114
114
  elsif (@queue_counter +@crawl_started_counter-@crawl_counter)== 0 || @crawl_counter >= content_request[:crawl_limit].to_i
115
115
  finished(content_request)
116
116
  end
117
-
117
+
118
118
  end
119
119
 
120
120
  # Sets the crawl status to 'Crawl Finished' and enqueues the crawl finished job
@@ -123,11 +123,11 @@ class CrawlHelper
123
123
  if @redis.hget("statistics", "current_status")!= "Crawl Finished"
124
124
  ap "CRAWL FINISHED #{content_request[:url]}, #{counters}, #{@redis.get("original_base_url")}, #{@redis.get("crawled_base_url")}" if content_request[:debug]
125
125
  @stats.end_crawl(content_request)
126
-
126
+
127
127
  additional_stats = {:crawl_id => content_request[:crawl_id], :crawled_base_url => @redis.get("crawled_base_url")}
128
128
  additional_stats[:redis_options] = content_request[:redis_options] unless content_request[:redis_options] == {}
129
129
  additional_stats[:source_id] = content_request[:source_id] unless content_request[:source_id].nil?
130
-
130
+
131
131
  if content_request[:queue_system] == :resque
132
132
  Resque.enqueue(const_get(content_request[:crawl_finished_queue]), @stats.get_statistics.merge(additional_stats))
133
133
  elsif content_request[:queue_system] == :sidekiq
@@ -140,7 +140,7 @@ class CrawlHelper
140
140
  # nothing to report here, we're skipping the remaining urls as we're outside of the crawl limit
141
141
  end
142
142
  end
143
-
143
+
144
144
  # Enqueues the content to the processing queue setup in options
145
145
  def self.send_to_processing_queue(content, content_request)
146
146
  content_to_send = content.merge({:internal_urls => content_request[:internal_urls], :redis_options => content_request[:redis_options], :source_id => content_request[:source_id], :crawl_id => content_request[:crawl_id]})
@@ -171,7 +171,7 @@ class CrawlHelper
171
171
  end
172
172
 
173
173
  private
174
-
174
+
175
175
  # Helper method to determine if this content is to be processed or not
176
176
  def self.is_permitted_type(content)
177
177
  @content_request[:valid_mime_types].each do |mime_type|
@@ -179,19 +179,19 @@ class CrawlHelper
179
179
  end
180
180
  false
181
181
  end
182
-
182
+
183
183
  # Returns true if the crawl count is within limits
184
184
  def self.within_crawl_limits?(crawl_limit)
185
185
  refresh_counters
186
186
  crawl_limit.nil? or @crawl_started_counter < crawl_limit.to_i
187
187
  end
188
-
188
+
189
189
  # Returns true if the queue count is calculated to be still within limits when complete
190
190
  def self.within_queue_limits?(crawl_limit)
191
191
  refresh_counters
192
192
  (@content_request[:crawl_limit_by_page]&& (crawl_limit.nil? or @crawl_counter < crawl_limit.to_i)) || within_crawl_limits?(crawl_limit) && (crawl_limit.nil? || (@queue_counter + @crawl_counter) < crawl_limit.to_i)
193
193
  end
194
-
194
+
195
195
  # Sets the base url in redis. If the first page is a redirect, it sets the base_url to the destination
196
196
  def self.set_base_url(redis, content, content_request)
197
197
  if redis.get("base_url").nil?
@@ -202,7 +202,7 @@ class CrawlHelper
202
202
  redis.set("base_url", content[:url])
203
203
  end
204
204
  end
205
-
205
+
206
206
  # Enqueues content to the crawl_job queue
207
207
  def self.enqueue_content(content_request, link)
208
208
  new_request = content_request.clone
@@ -219,7 +219,7 @@ class CrawlHelper
219
219
  @redis.sadd "queued", link
220
220
  increment_queue_counter
221
221
  end
222
-
222
+
223
223
  # Increments the queue counter and refreshes crawl counters
224
224
  def self.increment_queue_counter
225
225
  @redis.incr "queue-counter"
@@ -245,7 +245,7 @@ class CrawlHelper
245
245
  @crawl_started_counter = @redis.get("crawl-started-counter").to_i
246
246
  @queue_counter = @redis.get("queue-counter").to_i
247
247
  end
248
-
248
+
249
249
  def self.print_counters
250
250
  puts counters
251
251
  end
@@ -253,4 +253,4 @@ class CrawlHelper
253
253
  def self.counters
254
254
  "@crawl_counter: #{@crawl_counter} @crawl_started_counter: #{@crawl_started_counter} @queue_counter: #{@queue_counter}"
255
255
  end
256
- end
256
+ end
metadata CHANGED
@@ -1,139 +1,139 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cobweb
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.20
4
+ version: 1.0.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stewart McKee
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-08-23 00:00:00.000000000 Z
11
+ date: 2014-11-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ! '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ! '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: nokogiri
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ! '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ! '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: addressable
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ! '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ! '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: awesome_print
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ! '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ! '>='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: sinatra
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ! '>='
73
+ - - ">="
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ! '>='
80
+ - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: haml
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ! '>='
87
+ - - ">="
88
88
  - !ruby/object:Gem::Version
89
89
  version: '0'
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ! '>='
94
+ - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: redis-namespace
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - ! '>='
101
+ - - ">="
102
102
  - !ruby/object:Gem::Version
103
103
  version: '0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ! '>='
108
+ - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: json
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - ! '>='
115
+ - - ">="
116
116
  - !ruby/object:Gem::Version
117
117
  version: '0'
118
118
  type: :runtime
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - ! '>='
122
+ - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: slop
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
- - - ! '>='
129
+ - - ">="
130
130
  - !ruby/object:Gem::Version
131
131
  version: '0'
132
132
  type: :runtime
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
- - - ! '>='
136
+ - - ">="
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
139
  description: Cobweb is a web crawler that can use resque to cluster crawls to quickly
@@ -146,6 +146,186 @@ extensions: []
146
146
  extra_rdoc_files:
147
147
  - README.textile
148
148
  files:
149
+ - README.textile
150
+ - lib/cobweb.rb
151
+ - lib/cobweb_crawl_helper.rb
152
+ - lib/cobweb_crawler.rb
153
+ - lib/cobweb_dsl.rb
154
+ - lib/cobweb_finished_job.rb
155
+ - lib/cobweb_links.rb
156
+ - lib/cobweb_process_job.rb
157
+ - lib/cobweb_version.rb
158
+ - lib/content_link_parser.rb
159
+ - lib/crawl.rb
160
+ - lib/crawl_finished_worker.rb
161
+ - lib/crawl_helper.rb
162
+ - lib/crawl_job.rb
163
+ - lib/crawl_object.rb
164
+ - lib/crawl_process_worker.rb
165
+ - lib/crawl_worker.rb
166
+ - lib/document.rb
167
+ - lib/encoding_safe_process_job.rb
168
+ - lib/export_command.rb
169
+ - lib/hash_util.rb
170
+ - lib/redirect_error.rb
171
+ - lib/redis_connection.rb
172
+ - lib/report_command.rb
173
+ - lib/robots.rb
174
+ - lib/server.rb
175
+ - lib/sidekiq/cobweb_helper.rb
176
+ - lib/stats.rb
177
+ - lib/string.rb
178
+ - lib/uri_helper.rb
179
+ - public/css/accordion.css
180
+ - public/css/custom.css
181
+ - public/css/datatable.css
182
+ - public/css/datepicker.css
183
+ - public/css/form-buttons.css
184
+ - public/css/forms.css
185
+ - public/css/jquery.fancybox-1.3.4.css
186
+ - public/css/jquery.treeview.css
187
+ - public/css/link-buttons.css
188
+ - public/css/login.css
189
+ - public/css/menu.css
190
+ - public/css/messages.css
191
+ - public/css/modalbox.css
192
+ - public/css/statics.css
193
+ - public/css/style.css
194
+ - public/css/style_text.css
195
+ - public/css/tabs.css
196
+ - public/css/wysiwyg-editor.css
197
+ - public/css/wysiwyg.css
198
+ - public/css/wysiwyg.modal.css
199
+ - public/gfx/back-menu.gif
200
+ - public/gfx/back-submenu.gif
201
+ - public/gfx/background.gif
202
+ - public/gfx/box-hide.png
203
+ - public/gfx/box-search.png
204
+ - public/gfx/box-title.gif
205
+ - public/gfx/code.gif
206
+ - public/gfx/datepicker-arrows.gif
207
+ - public/gfx/fancybox/blank.gif
208
+ - public/gfx/fancybox/fancy_close.png
209
+ - public/gfx/fancybox/fancy_loading.png
210
+ - public/gfx/fancybox/fancy_nav_left.png
211
+ - public/gfx/fancybox/fancy_nav_right.png
212
+ - public/gfx/fancybox/fancy_title_left.png
213
+ - public/gfx/fancybox/fancy_title_main.png
214
+ - public/gfx/fancybox/fancy_title_over.png
215
+ - public/gfx/fancybox/fancy_title_right.png
216
+ - public/gfx/fancybox/fancybox-x.png
217
+ - public/gfx/fancybox/fancybox.png
218
+ - public/gfx/forms/date-next.gif
219
+ - public/gfx/forms/date-prev.gif
220
+ - public/gfx/forms/forms-checkbox.gif
221
+ - public/gfx/forms/forms-date.gif
222
+ - public/gfx/forms/forms-file.gif
223
+ - public/gfx/forms/forms-input-big.gif
224
+ - public/gfx/forms/forms-input-medium.gif
225
+ - public/gfx/forms/forms-input-small.gif
226
+ - public/gfx/forms/forms-input-xl.gif
227
+ - public/gfx/forms/forms-radio.gif
228
+ - public/gfx/forms/forms-selectbox-small.gif
229
+ - public/gfx/forms/forms-selectbox.gif
230
+ - public/gfx/forms/forms-textarea-big.gif
231
+ - public/gfx/forms/forms-textarea-medium.gif
232
+ - public/gfx/forms/forms-textarea-small.gif
233
+ - public/gfx/forms/forms-textarea-xl.gif
234
+ - public/gfx/icon-delete.png
235
+ - public/gfx/icon-edit.png
236
+ - public/gfx/icon-home.gif
237
+ - public/gfx/img-delete.png
238
+ - public/gfx/img-hover.png
239
+ - public/gfx/img-zoom.png
240
+ - public/gfx/jquery.wysiwyg.gif
241
+ - public/gfx/label-icons.gif
242
+ - public/gfx/label.gif
243
+ - public/gfx/li-down.gif
244
+ - public/gfx/li.gif
245
+ - public/gfx/link-button-big.gif
246
+ - public/gfx/link-button-medium.gif
247
+ - public/gfx/link-button.gif
248
+ - public/gfx/loading-2.gif
249
+ - public/gfx/loading.gif
250
+ - public/gfx/logo.png
251
+ - public/gfx/modal-title.gif
252
+ - public/gfx/photos/00.jpg
253
+ - public/gfx/photos/01.jpg
254
+ - public/gfx/photos/01xl.jpg
255
+ - public/gfx/photos/02.jpg
256
+ - public/gfx/photos/02xl.jpg
257
+ - public/gfx/photos/03.jpg
258
+ - public/gfx/photos/03xl.jpg
259
+ - public/gfx/photos/04.jpg
260
+ - public/gfx/photos/04xl.jpg
261
+ - public/gfx/photos/05.jpg
262
+ - public/gfx/photos/05xl.jpg
263
+ - public/gfx/photos/06.jpg
264
+ - public/gfx/photos/06xl.jpg
265
+ - public/gfx/photos/07.jpg
266
+ - public/gfx/photos/07xl.jpg
267
+ - public/gfx/photos/08.jpg
268
+ - public/gfx/photos/08xl.jpg
269
+ - public/gfx/photos/09.jpg
270
+ - public/gfx/photos/09xl.jpg
271
+ - public/gfx/photos/10.jpg
272
+ - public/gfx/photos/10xl.jpg
273
+ - public/gfx/photos/11.jpg
274
+ - public/gfx/photos/11xl.jpg
275
+ - public/gfx/photos/12.jpg
276
+ - public/gfx/photos/12xl.jpg
277
+ - public/gfx/photos/13.jpg
278
+ - public/gfx/photos/13xl.jpg
279
+ - public/gfx/photos/14.jpg
280
+ - public/gfx/photos/14xl.jpg
281
+ - public/gfx/photos/15.jpg
282
+ - public/gfx/photos/15xl.jpg
283
+ - public/gfx/search-button.gif
284
+ - public/gfx/search-input.gif
285
+ - public/gfx/slider-button.gif
286
+ - public/gfx/system-messages.gif
287
+ - public/gfx/table-asc-arrow.gif
288
+ - public/gfx/table-desc-arrow.gif
289
+ - public/gfx/table-first.gif
290
+ - public/gfx/table-last.gif
291
+ - public/gfx/table-next.gif
292
+ - public/gfx/table-number.gif
293
+ - public/gfx/table-prev.gif
294
+ - public/gfx/table-rows.gif
295
+ - public/gfx/table-search.gif
296
+ - public/gfx/table-thead.gif
297
+ - public/gfx/tooltip.gif
298
+ - public/gfx/treeview/ajax-loader.gif
299
+ - public/gfx/treeview/file.gif
300
+ - public/gfx/treeview/folder-closed.gif
301
+ - public/gfx/treeview/folder.gif
302
+ - public/gfx/treeview/minus.gif
303
+ - public/gfx/treeview/plus.gif
304
+ - public/gfx/treeview/treeview-default-line.gif
305
+ - public/gfx/treeview/treeview-default.gif
306
+ - public/js/controls/wysiwyg.image.js
307
+ - public/js/controls/wysiwyg.link.js
308
+ - public/js/controls/wysiwyg.table.js
309
+ - public/js/customInput.jquery.js
310
+ - public/js/excanvas.min.js
311
+ - public/js/hoverIntent.js
312
+ - public/js/inline.js
313
+ - public/js/jquery-1.7.1.min.js
314
+ - public/js/jquery-ui-select.js
315
+ - public/js/jquery-ui-timepicker-addon.js
316
+ - public/js/jquery-ui.js
317
+ - public/js/jquery.dataTables.js
318
+ - public/js/jquery.fancybox-1.3.4.js
319
+ - public/js/jquery.filestyle.mini.js
320
+ - public/js/jquery.flot.js
321
+ - public/js/jquery.flot.resize.min.js
322
+ - public/js/jquery.graphtable-0.2.js
323
+ - public/js/jquery.tipsy.js
324
+ - public/js/jquery.treeview.js
325
+ - public/js/jquery.wysiwyg.js
326
+ - public/js/plugins/wysiwyg.rmFormat.js
327
+ - public/js/superfish.js
328
+ - public/js/supersubs.js
149
329
  - spec/cobweb/cobweb_crawl_helper_spec.rb
150
330
  - spec/cobweb/cobweb_crawl_spec.rb
151
331
  - spec/cobweb/cobweb_crawler_spec.rb
@@ -327,189 +507,9 @@ files:
327
507
  - spec/samples/sample_site/typography.html
328
508
  - spec/spec.opts
329
509
  - spec/spec_helper.rb
330
- - lib/cobweb.rb
331
- - lib/cobweb_crawl_helper.rb
332
- - lib/cobweb_crawler.rb
333
- - lib/cobweb_dsl.rb
334
- - lib/cobweb_finished_job.rb
335
- - lib/cobweb_links.rb
336
- - lib/cobweb_process_job.rb
337
- - lib/cobweb_version.rb
338
- - lib/content_link_parser.rb
339
- - lib/crawl.rb
340
- - lib/crawl_finished_worker.rb
341
- - lib/crawl_helper.rb
342
- - lib/crawl_job.rb
343
- - lib/crawl_object.rb
344
- - lib/crawl_process_worker.rb
345
- - lib/crawl_worker.rb
346
- - lib/document.rb
347
- - lib/encoding_safe_process_job.rb
348
- - lib/export_command.rb
349
- - lib/hash_util.rb
350
- - lib/redirect_error.rb
351
- - lib/redis_connection.rb
352
- - lib/report_command.rb
353
- - lib/robots.rb
354
- - lib/server.rb
355
- - lib/sidekiq/cobweb_helper.rb
356
- - lib/stats.rb
357
- - lib/string.rb
358
- - lib/uri_helper.rb
359
510
  - views/home.haml
360
511
  - views/layout.haml
361
512
  - views/statistics.haml
362
- - public/css/accordion.css
363
- - public/css/custom.css
364
- - public/css/datatable.css
365
- - public/css/datepicker.css
366
- - public/css/form-buttons.css
367
- - public/css/forms.css
368
- - public/css/jquery.fancybox-1.3.4.css
369
- - public/css/jquery.treeview.css
370
- - public/css/link-buttons.css
371
- - public/css/login.css
372
- - public/css/menu.css
373
- - public/css/messages.css
374
- - public/css/modalbox.css
375
- - public/css/statics.css
376
- - public/css/style.css
377
- - public/css/style_text.css
378
- - public/css/tabs.css
379
- - public/css/wysiwyg-editor.css
380
- - public/css/wysiwyg.css
381
- - public/css/wysiwyg.modal.css
382
- - public/gfx/back-menu.gif
383
- - public/gfx/back-submenu.gif
384
- - public/gfx/background.gif
385
- - public/gfx/box-hide.png
386
- - public/gfx/box-search.png
387
- - public/gfx/box-title.gif
388
- - public/gfx/code.gif
389
- - public/gfx/datepicker-arrows.gif
390
- - public/gfx/fancybox/blank.gif
391
- - public/gfx/fancybox/fancy_close.png
392
- - public/gfx/fancybox/fancy_loading.png
393
- - public/gfx/fancybox/fancy_nav_left.png
394
- - public/gfx/fancybox/fancy_nav_right.png
395
- - public/gfx/fancybox/fancy_title_left.png
396
- - public/gfx/fancybox/fancy_title_main.png
397
- - public/gfx/fancybox/fancy_title_over.png
398
- - public/gfx/fancybox/fancy_title_right.png
399
- - public/gfx/fancybox/fancybox-x.png
400
- - public/gfx/fancybox/fancybox.png
401
- - public/gfx/forms/date-next.gif
402
- - public/gfx/forms/date-prev.gif
403
- - public/gfx/forms/forms-checkbox.gif
404
- - public/gfx/forms/forms-date.gif
405
- - public/gfx/forms/forms-file.gif
406
- - public/gfx/forms/forms-input-big.gif
407
- - public/gfx/forms/forms-input-medium.gif
408
- - public/gfx/forms/forms-input-small.gif
409
- - public/gfx/forms/forms-input-xl.gif
410
- - public/gfx/forms/forms-radio.gif
411
- - public/gfx/forms/forms-selectbox-small.gif
412
- - public/gfx/forms/forms-selectbox.gif
413
- - public/gfx/forms/forms-textarea-big.gif
414
- - public/gfx/forms/forms-textarea-medium.gif
415
- - public/gfx/forms/forms-textarea-small.gif
416
- - public/gfx/forms/forms-textarea-xl.gif
417
- - public/gfx/icon-delete.png
418
- - public/gfx/icon-edit.png
419
- - public/gfx/icon-home.gif
420
- - public/gfx/img-delete.png
421
- - public/gfx/img-hover.png
422
- - public/gfx/img-zoom.png
423
- - public/gfx/jquery.wysiwyg.gif
424
- - public/gfx/label-icons.gif
425
- - public/gfx/label.gif
426
- - public/gfx/li-down.gif
427
- - public/gfx/li.gif
428
- - public/gfx/link-button-big.gif
429
- - public/gfx/link-button-medium.gif
430
- - public/gfx/link-button.gif
431
- - public/gfx/loading-2.gif
432
- - public/gfx/loading.gif
433
- - public/gfx/logo.png
434
- - public/gfx/modal-title.gif
435
- - public/gfx/photos/00.jpg
436
- - public/gfx/photos/01.jpg
437
- - public/gfx/photos/01xl.jpg
438
- - public/gfx/photos/02.jpg
439
- - public/gfx/photos/02xl.jpg
440
- - public/gfx/photos/03.jpg
441
- - public/gfx/photos/03xl.jpg
442
- - public/gfx/photos/04.jpg
443
- - public/gfx/photos/04xl.jpg
444
- - public/gfx/photos/05.jpg
445
- - public/gfx/photos/05xl.jpg
446
- - public/gfx/photos/06.jpg
447
- - public/gfx/photos/06xl.jpg
448
- - public/gfx/photos/07.jpg
449
- - public/gfx/photos/07xl.jpg
450
- - public/gfx/photos/08.jpg
451
- - public/gfx/photos/08xl.jpg
452
- - public/gfx/photos/09.jpg
453
- - public/gfx/photos/09xl.jpg
454
- - public/gfx/photos/10.jpg
455
- - public/gfx/photos/10xl.jpg
456
- - public/gfx/photos/11.jpg
457
- - public/gfx/photos/11xl.jpg
458
- - public/gfx/photos/12.jpg
459
- - public/gfx/photos/12xl.jpg
460
- - public/gfx/photos/13.jpg
461
- - public/gfx/photos/13xl.jpg
462
- - public/gfx/photos/14.jpg
463
- - public/gfx/photos/14xl.jpg
464
- - public/gfx/photos/15.jpg
465
- - public/gfx/photos/15xl.jpg
466
- - public/gfx/search-button.gif
467
- - public/gfx/search-input.gif
468
- - public/gfx/slider-button.gif
469
- - public/gfx/system-messages.gif
470
- - public/gfx/table-asc-arrow.gif
471
- - public/gfx/table-desc-arrow.gif
472
- - public/gfx/table-first.gif
473
- - public/gfx/table-last.gif
474
- - public/gfx/table-next.gif
475
- - public/gfx/table-number.gif
476
- - public/gfx/table-prev.gif
477
- - public/gfx/table-rows.gif
478
- - public/gfx/table-search.gif
479
- - public/gfx/table-thead.gif
480
- - public/gfx/tooltip.gif
481
- - public/gfx/treeview/ajax-loader.gif
482
- - public/gfx/treeview/file.gif
483
- - public/gfx/treeview/folder-closed.gif
484
- - public/gfx/treeview/folder.gif
485
- - public/gfx/treeview/minus.gif
486
- - public/gfx/treeview/plus.gif
487
- - public/gfx/treeview/treeview-default-line.gif
488
- - public/gfx/treeview/treeview-default.gif
489
- - public/js/controls/wysiwyg.image.js
490
- - public/js/controls/wysiwyg.link.js
491
- - public/js/controls/wysiwyg.table.js
492
- - public/js/customInput.jquery.js
493
- - public/js/excanvas.min.js
494
- - public/js/hoverIntent.js
495
- - public/js/inline.js
496
- - public/js/jquery-1.7.1.min.js
497
- - public/js/jquery-ui-select.js
498
- - public/js/jquery-ui-timepicker-addon.js
499
- - public/js/jquery-ui.js
500
- - public/js/jquery.dataTables.js
501
- - public/js/jquery.fancybox-1.3.4.js
502
- - public/js/jquery.filestyle.mini.js
503
- - public/js/jquery.flot.js
504
- - public/js/jquery.flot.resize.min.js
505
- - public/js/jquery.graphtable-0.2.js
506
- - public/js/jquery.tipsy.js
507
- - public/js/jquery.treeview.js
508
- - public/js/jquery.wysiwyg.js
509
- - public/js/plugins/wysiwyg.rmFormat.js
510
- - public/js/superfish.js
511
- - public/js/supersubs.js
512
- - README.textile
513
513
  homepage: http://github.com/stewartmckee/cobweb
514
514
  licenses:
515
515
  - MIT
@@ -520,17 +520,17 @@ require_paths:
520
520
  - lib
521
521
  required_ruby_version: !ruby/object:Gem::Requirement
522
522
  requirements:
523
- - - ! '>='
523
+ - - ">="
524
524
  - !ruby/object:Gem::Version
525
525
  version: '0'
526
526
  required_rubygems_version: !ruby/object:Gem::Requirement
527
527
  requirements:
528
- - - ! '>='
528
+ - - ">="
529
529
  - !ruby/object:Gem::Version
530
530
  version: '0'
531
531
  requirements: []
532
532
  rubyforge_project:
533
- rubygems_version: 2.1.11
533
+ rubygems_version: 2.2.2
534
534
  signing_key:
535
535
  specification_version: 4
536
536
  summary: Cobweb is a web crawler that can use resque to cluster crawls to quickly