cobweb 1.0.20 → 1.0.21

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +5 -13
  2. data/lib/cobweb_version.rb +2 -2
  3. data/lib/crawl_helper.rb +22 -22
  4. metadata +203 -203
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- MDE5MzU3NzI2MTRhYzM5NzIwMDZlMTJjMTg5NzNiMzAyMjFkMjcxOQ==
5
- data.tar.gz: !binary |-
6
- MWE3ZTAwYjExZjc4NzU2MDYzOTlhOTQwMTNlNTcyZjNmZTYwNmU3Zg==
2
+ SHA1:
3
+ metadata.gz: f7c3816549392f4fa31701ae65bff51fbe22db89
4
+ data.tar.gz: a8cec8a17ec20f31a85980f75cb790331a6be16d
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- N2U2MDk1MmI3ZTU3OTFmZDI5YjY4YTEyNDNkMTE4MmJjOTFkNTZiYzNhY2Q4
10
- Mjk0ZjM1YThhNzhkMGNjNjJiZTJkNjM1OWQ1MGMzZmVlMDI5MzUyOTU5YTRk
11
- NzEzZjBiZjM2OTUxZTc2NzZjZDIyOWQ4ZmVlYzYyOGViMDIyYzY=
12
- data.tar.gz: !binary |-
13
- ZTJiYmRlNDY0M2FkNTdlN2I0ZjNiODYxOGQyN2MxZGZlMGViMWIxZDA4YmY1
14
- ZDc2ZDU3NDc4ODg1YmExYjFmYjMyY2U0MDU4MGQ0OTJkZjRmNjAyYmQ3NWVl
15
- ODY0ZTE5MGUzNzAzZWFlMzdmZmY1YzNhMmEzNWE1NzVkYzAwZDE=
6
+ metadata.gz: b2b172dd7f45efb8b5eccacad67b35683ee1f0867f8bfc423b5b8a91ed3b3cac22e5b2343a96d4a8bfdfe9426bf4461ced4385ea96e7bc850e80e3de4b0ce976
7
+ data.tar.gz: 2bace4df48372e0253973e7600e8d48ad06ab12a948723a5850620bfcb31efffba2fdaf6f36ae5502c054d8457a1dec9a23675636ce9a4c4474cf5b3086f6697
@@ -1,9 +1,9 @@
1
1
  # CobwebVersion holds the current version of the gem
2
2
  class CobwebVersion
3
-
3
+
4
4
  # Returns a string of the current version
5
5
  def self.version
6
- "1.0.20"
6
+ "1.0.21"
7
7
  end
8
8
 
9
9
  end
data/lib/crawl_helper.rb CHANGED
@@ -1,27 +1,27 @@
1
1
  class CrawlHelper
2
2
 
3
- require "net/https"
3
+ require "net/https"
4
4
  require "uri"
5
5
  require "redis"
6
- require 'namespaced_redis'
7
-
6
+ require "redis-namespace"
7
+
8
8
  def self.crawl_page(content_request)
9
9
  # change all hash keys to symbols
10
10
  content_request = HashUtil.deep_symbolize_keys(content_request)
11
11
  @content_request = content_request
12
-
12
+
13
13
  content_request[:redis_options] = {} unless content_request.has_key? :redis_options
14
14
  content_request[:crawl_limit_by_page] = false unless content_request.has_key? :crawl_limit_by_page
15
15
  content_request[:valid_mime_types] = ["*/*"] unless content_request.has_key? :valid_mime_types
16
16
  content_request[:queue_system] = content_request[:queue_system].to_sym
17
-
17
+
18
18
  @redis = NamespacedRedisConnection.new(content_request[:redis_options], "cobweb-#{Cobweb.version}-#{content_request[:crawl_id]}")
19
19
  @stats = Stats.new(content_request)
20
-
20
+
21
21
  @debug = content_request[:debug]
22
-
22
+
23
23
  decrement_queue_counter
24
-
24
+
25
25
  # check we haven't crawled this url before
26
26
  unless @redis.sismember "crawled", content_request[:url]
27
27
  # if there is no limit or we're still under it lets get the url
@@ -99,12 +99,12 @@ class CrawlHelper
99
99
  else
100
100
  puts "ignoring #{content_request[:url]} as outside of crawl limits." if content_request[:debug]
101
101
  end
102
-
102
+
103
103
  else
104
104
  @redis.srem "queued", content_request[:url]
105
105
  puts "Already crawled #{content_request[:url]}" if content_request[:debug]
106
106
  end
107
-
107
+
108
108
  # if there's nothing left queued or the crawled limit has been reached
109
109
  refresh_counters
110
110
  if content_request[:crawl_limit].nil? || content_request[:crawl_limit] == 0
@@ -114,7 +114,7 @@ class CrawlHelper
114
114
  elsif (@queue_counter +@crawl_started_counter-@crawl_counter)== 0 || @crawl_counter >= content_request[:crawl_limit].to_i
115
115
  finished(content_request)
116
116
  end
117
-
117
+
118
118
  end
119
119
 
120
120
  # Sets the crawl status to 'Crawl Finished' and enqueues the crawl finished job
@@ -123,11 +123,11 @@ class CrawlHelper
123
123
  if @redis.hget("statistics", "current_status")!= "Crawl Finished"
124
124
  ap "CRAWL FINISHED #{content_request[:url]}, #{counters}, #{@redis.get("original_base_url")}, #{@redis.get("crawled_base_url")}" if content_request[:debug]
125
125
  @stats.end_crawl(content_request)
126
-
126
+
127
127
  additional_stats = {:crawl_id => content_request[:crawl_id], :crawled_base_url => @redis.get("crawled_base_url")}
128
128
  additional_stats[:redis_options] = content_request[:redis_options] unless content_request[:redis_options] == {}
129
129
  additional_stats[:source_id] = content_request[:source_id] unless content_request[:source_id].nil?
130
-
130
+
131
131
  if content_request[:queue_system] == :resque
132
132
  Resque.enqueue(const_get(content_request[:crawl_finished_queue]), @stats.get_statistics.merge(additional_stats))
133
133
  elsif content_request[:queue_system] == :sidekiq
@@ -140,7 +140,7 @@ class CrawlHelper
140
140
  # nothing to report here, we're skipping the remaining urls as we're outside of the crawl limit
141
141
  end
142
142
  end
143
-
143
+
144
144
  # Enqueues the content to the processing queue setup in options
145
145
  def self.send_to_processing_queue(content, content_request)
146
146
  content_to_send = content.merge({:internal_urls => content_request[:internal_urls], :redis_options => content_request[:redis_options], :source_id => content_request[:source_id], :crawl_id => content_request[:crawl_id]})
@@ -171,7 +171,7 @@ class CrawlHelper
171
171
  end
172
172
 
173
173
  private
174
-
174
+
175
175
  # Helper method to determine if this content is to be processed or not
176
176
  def self.is_permitted_type(content)
177
177
  @content_request[:valid_mime_types].each do |mime_type|
@@ -179,19 +179,19 @@ class CrawlHelper
179
179
  end
180
180
  false
181
181
  end
182
-
182
+
183
183
  # Returns true if the crawl count is within limits
184
184
  def self.within_crawl_limits?(crawl_limit)
185
185
  refresh_counters
186
186
  crawl_limit.nil? or @crawl_started_counter < crawl_limit.to_i
187
187
  end
188
-
188
+
189
189
  # Returns true if the queue count is calculated to be still within limits when complete
190
190
  def self.within_queue_limits?(crawl_limit)
191
191
  refresh_counters
192
192
  (@content_request[:crawl_limit_by_page]&& (crawl_limit.nil? or @crawl_counter < crawl_limit.to_i)) || within_crawl_limits?(crawl_limit) && (crawl_limit.nil? || (@queue_counter + @crawl_counter) < crawl_limit.to_i)
193
193
  end
194
-
194
+
195
195
  # Sets the base url in redis. If the first page is a redirect, it sets the base_url to the destination
196
196
  def self.set_base_url(redis, content, content_request)
197
197
  if redis.get("base_url").nil?
@@ -202,7 +202,7 @@ class CrawlHelper
202
202
  redis.set("base_url", content[:url])
203
203
  end
204
204
  end
205
-
205
+
206
206
  # Enqueues content to the crawl_job queue
207
207
  def self.enqueue_content(content_request, link)
208
208
  new_request = content_request.clone
@@ -219,7 +219,7 @@ class CrawlHelper
219
219
  @redis.sadd "queued", link
220
220
  increment_queue_counter
221
221
  end
222
-
222
+
223
223
  # Increments the queue counter and refreshes crawl counters
224
224
  def self.increment_queue_counter
225
225
  @redis.incr "queue-counter"
@@ -245,7 +245,7 @@ class CrawlHelper
245
245
  @crawl_started_counter = @redis.get("crawl-started-counter").to_i
246
246
  @queue_counter = @redis.get("queue-counter").to_i
247
247
  end
248
-
248
+
249
249
  def self.print_counters
250
250
  puts counters
251
251
  end
@@ -253,4 +253,4 @@ class CrawlHelper
253
253
  def self.counters
254
254
  "@crawl_counter: #{@crawl_counter} @crawl_started_counter: #{@crawl_started_counter} @queue_counter: #{@queue_counter}"
255
255
  end
256
- end
256
+ end
metadata CHANGED
@@ -1,139 +1,139 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cobweb
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.20
4
+ version: 1.0.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Stewart McKee
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-08-23 00:00:00.000000000 Z
11
+ date: 2014-11-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: redis
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ! '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ! '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: nokogiri
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ! '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ! '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: addressable
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ! '>='
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
47
  version: '0'
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ! '>='
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: awesome_print
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - ! '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '0'
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - ! '>='
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: sinatra
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ! '>='
73
+ - - ">="
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ! '>='
80
+ - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: haml
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - ! '>='
87
+ - - ">="
88
88
  - !ruby/object:Gem::Version
89
89
  version: '0'
90
90
  type: :runtime
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - ! '>='
94
+ - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: redis-namespace
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - ! '>='
101
+ - - ">="
102
102
  - !ruby/object:Gem::Version
103
103
  version: '0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - ! '>='
108
+ - - ">="
109
109
  - !ruby/object:Gem::Version
110
110
  version: '0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: json
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - ! '>='
115
+ - - ">="
116
116
  - !ruby/object:Gem::Version
117
117
  version: '0'
118
118
  type: :runtime
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - ! '>='
122
+ - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: slop
127
127
  requirement: !ruby/object:Gem::Requirement
128
128
  requirements:
129
- - - ! '>='
129
+ - - ">="
130
130
  - !ruby/object:Gem::Version
131
131
  version: '0'
132
132
  type: :runtime
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
- - - ! '>='
136
+ - - ">="
137
137
  - !ruby/object:Gem::Version
138
138
  version: '0'
139
139
  description: Cobweb is a web crawler that can use resque to cluster crawls to quickly
@@ -146,6 +146,186 @@ extensions: []
146
146
  extra_rdoc_files:
147
147
  - README.textile
148
148
  files:
149
+ - README.textile
150
+ - lib/cobweb.rb
151
+ - lib/cobweb_crawl_helper.rb
152
+ - lib/cobweb_crawler.rb
153
+ - lib/cobweb_dsl.rb
154
+ - lib/cobweb_finished_job.rb
155
+ - lib/cobweb_links.rb
156
+ - lib/cobweb_process_job.rb
157
+ - lib/cobweb_version.rb
158
+ - lib/content_link_parser.rb
159
+ - lib/crawl.rb
160
+ - lib/crawl_finished_worker.rb
161
+ - lib/crawl_helper.rb
162
+ - lib/crawl_job.rb
163
+ - lib/crawl_object.rb
164
+ - lib/crawl_process_worker.rb
165
+ - lib/crawl_worker.rb
166
+ - lib/document.rb
167
+ - lib/encoding_safe_process_job.rb
168
+ - lib/export_command.rb
169
+ - lib/hash_util.rb
170
+ - lib/redirect_error.rb
171
+ - lib/redis_connection.rb
172
+ - lib/report_command.rb
173
+ - lib/robots.rb
174
+ - lib/server.rb
175
+ - lib/sidekiq/cobweb_helper.rb
176
+ - lib/stats.rb
177
+ - lib/string.rb
178
+ - lib/uri_helper.rb
179
+ - public/css/accordion.css
180
+ - public/css/custom.css
181
+ - public/css/datatable.css
182
+ - public/css/datepicker.css
183
+ - public/css/form-buttons.css
184
+ - public/css/forms.css
185
+ - public/css/jquery.fancybox-1.3.4.css
186
+ - public/css/jquery.treeview.css
187
+ - public/css/link-buttons.css
188
+ - public/css/login.css
189
+ - public/css/menu.css
190
+ - public/css/messages.css
191
+ - public/css/modalbox.css
192
+ - public/css/statics.css
193
+ - public/css/style.css
194
+ - public/css/style_text.css
195
+ - public/css/tabs.css
196
+ - public/css/wysiwyg-editor.css
197
+ - public/css/wysiwyg.css
198
+ - public/css/wysiwyg.modal.css
199
+ - public/gfx/back-menu.gif
200
+ - public/gfx/back-submenu.gif
201
+ - public/gfx/background.gif
202
+ - public/gfx/box-hide.png
203
+ - public/gfx/box-search.png
204
+ - public/gfx/box-title.gif
205
+ - public/gfx/code.gif
206
+ - public/gfx/datepicker-arrows.gif
207
+ - public/gfx/fancybox/blank.gif
208
+ - public/gfx/fancybox/fancy_close.png
209
+ - public/gfx/fancybox/fancy_loading.png
210
+ - public/gfx/fancybox/fancy_nav_left.png
211
+ - public/gfx/fancybox/fancy_nav_right.png
212
+ - public/gfx/fancybox/fancy_title_left.png
213
+ - public/gfx/fancybox/fancy_title_main.png
214
+ - public/gfx/fancybox/fancy_title_over.png
215
+ - public/gfx/fancybox/fancy_title_right.png
216
+ - public/gfx/fancybox/fancybox-x.png
217
+ - public/gfx/fancybox/fancybox.png
218
+ - public/gfx/forms/date-next.gif
219
+ - public/gfx/forms/date-prev.gif
220
+ - public/gfx/forms/forms-checkbox.gif
221
+ - public/gfx/forms/forms-date.gif
222
+ - public/gfx/forms/forms-file.gif
223
+ - public/gfx/forms/forms-input-big.gif
224
+ - public/gfx/forms/forms-input-medium.gif
225
+ - public/gfx/forms/forms-input-small.gif
226
+ - public/gfx/forms/forms-input-xl.gif
227
+ - public/gfx/forms/forms-radio.gif
228
+ - public/gfx/forms/forms-selectbox-small.gif
229
+ - public/gfx/forms/forms-selectbox.gif
230
+ - public/gfx/forms/forms-textarea-big.gif
231
+ - public/gfx/forms/forms-textarea-medium.gif
232
+ - public/gfx/forms/forms-textarea-small.gif
233
+ - public/gfx/forms/forms-textarea-xl.gif
234
+ - public/gfx/icon-delete.png
235
+ - public/gfx/icon-edit.png
236
+ - public/gfx/icon-home.gif
237
+ - public/gfx/img-delete.png
238
+ - public/gfx/img-hover.png
239
+ - public/gfx/img-zoom.png
240
+ - public/gfx/jquery.wysiwyg.gif
241
+ - public/gfx/label-icons.gif
242
+ - public/gfx/label.gif
243
+ - public/gfx/li-down.gif
244
+ - public/gfx/li.gif
245
+ - public/gfx/link-button-big.gif
246
+ - public/gfx/link-button-medium.gif
247
+ - public/gfx/link-button.gif
248
+ - public/gfx/loading-2.gif
249
+ - public/gfx/loading.gif
250
+ - public/gfx/logo.png
251
+ - public/gfx/modal-title.gif
252
+ - public/gfx/photos/00.jpg
253
+ - public/gfx/photos/01.jpg
254
+ - public/gfx/photos/01xl.jpg
255
+ - public/gfx/photos/02.jpg
256
+ - public/gfx/photos/02xl.jpg
257
+ - public/gfx/photos/03.jpg
258
+ - public/gfx/photos/03xl.jpg
259
+ - public/gfx/photos/04.jpg
260
+ - public/gfx/photos/04xl.jpg
261
+ - public/gfx/photos/05.jpg
262
+ - public/gfx/photos/05xl.jpg
263
+ - public/gfx/photos/06.jpg
264
+ - public/gfx/photos/06xl.jpg
265
+ - public/gfx/photos/07.jpg
266
+ - public/gfx/photos/07xl.jpg
267
+ - public/gfx/photos/08.jpg
268
+ - public/gfx/photos/08xl.jpg
269
+ - public/gfx/photos/09.jpg
270
+ - public/gfx/photos/09xl.jpg
271
+ - public/gfx/photos/10.jpg
272
+ - public/gfx/photos/10xl.jpg
273
+ - public/gfx/photos/11.jpg
274
+ - public/gfx/photos/11xl.jpg
275
+ - public/gfx/photos/12.jpg
276
+ - public/gfx/photos/12xl.jpg
277
+ - public/gfx/photos/13.jpg
278
+ - public/gfx/photos/13xl.jpg
279
+ - public/gfx/photos/14.jpg
280
+ - public/gfx/photos/14xl.jpg
281
+ - public/gfx/photos/15.jpg
282
+ - public/gfx/photos/15xl.jpg
283
+ - public/gfx/search-button.gif
284
+ - public/gfx/search-input.gif
285
+ - public/gfx/slider-button.gif
286
+ - public/gfx/system-messages.gif
287
+ - public/gfx/table-asc-arrow.gif
288
+ - public/gfx/table-desc-arrow.gif
289
+ - public/gfx/table-first.gif
290
+ - public/gfx/table-last.gif
291
+ - public/gfx/table-next.gif
292
+ - public/gfx/table-number.gif
293
+ - public/gfx/table-prev.gif
294
+ - public/gfx/table-rows.gif
295
+ - public/gfx/table-search.gif
296
+ - public/gfx/table-thead.gif
297
+ - public/gfx/tooltip.gif
298
+ - public/gfx/treeview/ajax-loader.gif
299
+ - public/gfx/treeview/file.gif
300
+ - public/gfx/treeview/folder-closed.gif
301
+ - public/gfx/treeview/folder.gif
302
+ - public/gfx/treeview/minus.gif
303
+ - public/gfx/treeview/plus.gif
304
+ - public/gfx/treeview/treeview-default-line.gif
305
+ - public/gfx/treeview/treeview-default.gif
306
+ - public/js/controls/wysiwyg.image.js
307
+ - public/js/controls/wysiwyg.link.js
308
+ - public/js/controls/wysiwyg.table.js
309
+ - public/js/customInput.jquery.js
310
+ - public/js/excanvas.min.js
311
+ - public/js/hoverIntent.js
312
+ - public/js/inline.js
313
+ - public/js/jquery-1.7.1.min.js
314
+ - public/js/jquery-ui-select.js
315
+ - public/js/jquery-ui-timepicker-addon.js
316
+ - public/js/jquery-ui.js
317
+ - public/js/jquery.dataTables.js
318
+ - public/js/jquery.fancybox-1.3.4.js
319
+ - public/js/jquery.filestyle.mini.js
320
+ - public/js/jquery.flot.js
321
+ - public/js/jquery.flot.resize.min.js
322
+ - public/js/jquery.graphtable-0.2.js
323
+ - public/js/jquery.tipsy.js
324
+ - public/js/jquery.treeview.js
325
+ - public/js/jquery.wysiwyg.js
326
+ - public/js/plugins/wysiwyg.rmFormat.js
327
+ - public/js/superfish.js
328
+ - public/js/supersubs.js
149
329
  - spec/cobweb/cobweb_crawl_helper_spec.rb
150
330
  - spec/cobweb/cobweb_crawl_spec.rb
151
331
  - spec/cobweb/cobweb_crawler_spec.rb
@@ -327,189 +507,9 @@ files:
327
507
  - spec/samples/sample_site/typography.html
328
508
  - spec/spec.opts
329
509
  - spec/spec_helper.rb
330
- - lib/cobweb.rb
331
- - lib/cobweb_crawl_helper.rb
332
- - lib/cobweb_crawler.rb
333
- - lib/cobweb_dsl.rb
334
- - lib/cobweb_finished_job.rb
335
- - lib/cobweb_links.rb
336
- - lib/cobweb_process_job.rb
337
- - lib/cobweb_version.rb
338
- - lib/content_link_parser.rb
339
- - lib/crawl.rb
340
- - lib/crawl_finished_worker.rb
341
- - lib/crawl_helper.rb
342
- - lib/crawl_job.rb
343
- - lib/crawl_object.rb
344
- - lib/crawl_process_worker.rb
345
- - lib/crawl_worker.rb
346
- - lib/document.rb
347
- - lib/encoding_safe_process_job.rb
348
- - lib/export_command.rb
349
- - lib/hash_util.rb
350
- - lib/redirect_error.rb
351
- - lib/redis_connection.rb
352
- - lib/report_command.rb
353
- - lib/robots.rb
354
- - lib/server.rb
355
- - lib/sidekiq/cobweb_helper.rb
356
- - lib/stats.rb
357
- - lib/string.rb
358
- - lib/uri_helper.rb
359
510
  - views/home.haml
360
511
  - views/layout.haml
361
512
  - views/statistics.haml
362
- - public/css/accordion.css
363
- - public/css/custom.css
364
- - public/css/datatable.css
365
- - public/css/datepicker.css
366
- - public/css/form-buttons.css
367
- - public/css/forms.css
368
- - public/css/jquery.fancybox-1.3.4.css
369
- - public/css/jquery.treeview.css
370
- - public/css/link-buttons.css
371
- - public/css/login.css
372
- - public/css/menu.css
373
- - public/css/messages.css
374
- - public/css/modalbox.css
375
- - public/css/statics.css
376
- - public/css/style.css
377
- - public/css/style_text.css
378
- - public/css/tabs.css
379
- - public/css/wysiwyg-editor.css
380
- - public/css/wysiwyg.css
381
- - public/css/wysiwyg.modal.css
382
- - public/gfx/back-menu.gif
383
- - public/gfx/back-submenu.gif
384
- - public/gfx/background.gif
385
- - public/gfx/box-hide.png
386
- - public/gfx/box-search.png
387
- - public/gfx/box-title.gif
388
- - public/gfx/code.gif
389
- - public/gfx/datepicker-arrows.gif
390
- - public/gfx/fancybox/blank.gif
391
- - public/gfx/fancybox/fancy_close.png
392
- - public/gfx/fancybox/fancy_loading.png
393
- - public/gfx/fancybox/fancy_nav_left.png
394
- - public/gfx/fancybox/fancy_nav_right.png
395
- - public/gfx/fancybox/fancy_title_left.png
396
- - public/gfx/fancybox/fancy_title_main.png
397
- - public/gfx/fancybox/fancy_title_over.png
398
- - public/gfx/fancybox/fancy_title_right.png
399
- - public/gfx/fancybox/fancybox-x.png
400
- - public/gfx/fancybox/fancybox.png
401
- - public/gfx/forms/date-next.gif
402
- - public/gfx/forms/date-prev.gif
403
- - public/gfx/forms/forms-checkbox.gif
404
- - public/gfx/forms/forms-date.gif
405
- - public/gfx/forms/forms-file.gif
406
- - public/gfx/forms/forms-input-big.gif
407
- - public/gfx/forms/forms-input-medium.gif
408
- - public/gfx/forms/forms-input-small.gif
409
- - public/gfx/forms/forms-input-xl.gif
410
- - public/gfx/forms/forms-radio.gif
411
- - public/gfx/forms/forms-selectbox-small.gif
412
- - public/gfx/forms/forms-selectbox.gif
413
- - public/gfx/forms/forms-textarea-big.gif
414
- - public/gfx/forms/forms-textarea-medium.gif
415
- - public/gfx/forms/forms-textarea-small.gif
416
- - public/gfx/forms/forms-textarea-xl.gif
417
- - public/gfx/icon-delete.png
418
- - public/gfx/icon-edit.png
419
- - public/gfx/icon-home.gif
420
- - public/gfx/img-delete.png
421
- - public/gfx/img-hover.png
422
- - public/gfx/img-zoom.png
423
- - public/gfx/jquery.wysiwyg.gif
424
- - public/gfx/label-icons.gif
425
- - public/gfx/label.gif
426
- - public/gfx/li-down.gif
427
- - public/gfx/li.gif
428
- - public/gfx/link-button-big.gif
429
- - public/gfx/link-button-medium.gif
430
- - public/gfx/link-button.gif
431
- - public/gfx/loading-2.gif
432
- - public/gfx/loading.gif
433
- - public/gfx/logo.png
434
- - public/gfx/modal-title.gif
435
- - public/gfx/photos/00.jpg
436
- - public/gfx/photos/01.jpg
437
- - public/gfx/photos/01xl.jpg
438
- - public/gfx/photos/02.jpg
439
- - public/gfx/photos/02xl.jpg
440
- - public/gfx/photos/03.jpg
441
- - public/gfx/photos/03xl.jpg
442
- - public/gfx/photos/04.jpg
443
- - public/gfx/photos/04xl.jpg
444
- - public/gfx/photos/05.jpg
445
- - public/gfx/photos/05xl.jpg
446
- - public/gfx/photos/06.jpg
447
- - public/gfx/photos/06xl.jpg
448
- - public/gfx/photos/07.jpg
449
- - public/gfx/photos/07xl.jpg
450
- - public/gfx/photos/08.jpg
451
- - public/gfx/photos/08xl.jpg
452
- - public/gfx/photos/09.jpg
453
- - public/gfx/photos/09xl.jpg
454
- - public/gfx/photos/10.jpg
455
- - public/gfx/photos/10xl.jpg
456
- - public/gfx/photos/11.jpg
457
- - public/gfx/photos/11xl.jpg
458
- - public/gfx/photos/12.jpg
459
- - public/gfx/photos/12xl.jpg
460
- - public/gfx/photos/13.jpg
461
- - public/gfx/photos/13xl.jpg
462
- - public/gfx/photos/14.jpg
463
- - public/gfx/photos/14xl.jpg
464
- - public/gfx/photos/15.jpg
465
- - public/gfx/photos/15xl.jpg
466
- - public/gfx/search-button.gif
467
- - public/gfx/search-input.gif
468
- - public/gfx/slider-button.gif
469
- - public/gfx/system-messages.gif
470
- - public/gfx/table-asc-arrow.gif
471
- - public/gfx/table-desc-arrow.gif
472
- - public/gfx/table-first.gif
473
- - public/gfx/table-last.gif
474
- - public/gfx/table-next.gif
475
- - public/gfx/table-number.gif
476
- - public/gfx/table-prev.gif
477
- - public/gfx/table-rows.gif
478
- - public/gfx/table-search.gif
479
- - public/gfx/table-thead.gif
480
- - public/gfx/tooltip.gif
481
- - public/gfx/treeview/ajax-loader.gif
482
- - public/gfx/treeview/file.gif
483
- - public/gfx/treeview/folder-closed.gif
484
- - public/gfx/treeview/folder.gif
485
- - public/gfx/treeview/minus.gif
486
- - public/gfx/treeview/plus.gif
487
- - public/gfx/treeview/treeview-default-line.gif
488
- - public/gfx/treeview/treeview-default.gif
489
- - public/js/controls/wysiwyg.image.js
490
- - public/js/controls/wysiwyg.link.js
491
- - public/js/controls/wysiwyg.table.js
492
- - public/js/customInput.jquery.js
493
- - public/js/excanvas.min.js
494
- - public/js/hoverIntent.js
495
- - public/js/inline.js
496
- - public/js/jquery-1.7.1.min.js
497
- - public/js/jquery-ui-select.js
498
- - public/js/jquery-ui-timepicker-addon.js
499
- - public/js/jquery-ui.js
500
- - public/js/jquery.dataTables.js
501
- - public/js/jquery.fancybox-1.3.4.js
502
- - public/js/jquery.filestyle.mini.js
503
- - public/js/jquery.flot.js
504
- - public/js/jquery.flot.resize.min.js
505
- - public/js/jquery.graphtable-0.2.js
506
- - public/js/jquery.tipsy.js
507
- - public/js/jquery.treeview.js
508
- - public/js/jquery.wysiwyg.js
509
- - public/js/plugins/wysiwyg.rmFormat.js
510
- - public/js/superfish.js
511
- - public/js/supersubs.js
512
- - README.textile
513
513
  homepage: http://github.com/stewartmckee/cobweb
514
514
  licenses:
515
515
  - MIT
@@ -520,17 +520,17 @@ require_paths:
520
520
  - lib
521
521
  required_ruby_version: !ruby/object:Gem::Requirement
522
522
  requirements:
523
- - - ! '>='
523
+ - - ">="
524
524
  - !ruby/object:Gem::Version
525
525
  version: '0'
526
526
  required_rubygems_version: !ruby/object:Gem::Requirement
527
527
  requirements:
528
- - - ! '>='
528
+ - - ">="
529
529
  - !ruby/object:Gem::Version
530
530
  version: '0'
531
531
  requirements: []
532
532
  rubyforge_project:
533
- rubygems_version: 2.1.11
533
+ rubygems_version: 2.2.2
534
534
  signing_key:
535
535
  specification_version: 4
536
536
  summary: Cobweb is a web crawler that can use resque to cluster crawls to quickly