cobweb 1.0.20 → 1.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/lib/cobweb_version.rb +2 -2
- data/lib/crawl_helper.rb +22 -22
- metadata +203 -203
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
MWE3ZTAwYjExZjc4NzU2MDYzOTlhOTQwMTNlNTcyZjNmZTYwNmU3Zg==
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f7c3816549392f4fa31701ae65bff51fbe22db89
|
4
|
+
data.tar.gz: a8cec8a17ec20f31a85980f75cb790331a6be16d
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
Mjk0ZjM1YThhNzhkMGNjNjJiZTJkNjM1OWQ1MGMzZmVlMDI5MzUyOTU5YTRk
|
11
|
-
NzEzZjBiZjM2OTUxZTc2NzZjZDIyOWQ4ZmVlYzYyOGViMDIyYzY=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
ZTJiYmRlNDY0M2FkNTdlN2I0ZjNiODYxOGQyN2MxZGZlMGViMWIxZDA4YmY1
|
14
|
-
ZDc2ZDU3NDc4ODg1YmExYjFmYjMyY2U0MDU4MGQ0OTJkZjRmNjAyYmQ3NWVl
|
15
|
-
ODY0ZTE5MGUzNzAzZWFlMzdmZmY1YzNhMmEzNWE1NzVkYzAwZDE=
|
6
|
+
metadata.gz: b2b172dd7f45efb8b5eccacad67b35683ee1f0867f8bfc423b5b8a91ed3b3cac22e5b2343a96d4a8bfdfe9426bf4461ced4385ea96e7bc850e80e3de4b0ce976
|
7
|
+
data.tar.gz: 2bace4df48372e0253973e7600e8d48ad06ab12a948723a5850620bfcb31efffba2fdaf6f36ae5502c054d8457a1dec9a23675636ce9a4c4474cf5b3086f6697
|
data/lib/cobweb_version.rb
CHANGED
data/lib/crawl_helper.rb
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
class CrawlHelper
|
2
2
|
|
3
|
-
require "net/https"
|
3
|
+
require "net/https"
|
4
4
|
require "uri"
|
5
5
|
require "redis"
|
6
|
-
require
|
7
|
-
|
6
|
+
require "redis-namespace"
|
7
|
+
|
8
8
|
def self.crawl_page(content_request)
|
9
9
|
# change all hash keys to symbols
|
10
10
|
content_request = HashUtil.deep_symbolize_keys(content_request)
|
11
11
|
@content_request = content_request
|
12
|
-
|
12
|
+
|
13
13
|
content_request[:redis_options] = {} unless content_request.has_key? :redis_options
|
14
14
|
content_request[:crawl_limit_by_page] = false unless content_request.has_key? :crawl_limit_by_page
|
15
15
|
content_request[:valid_mime_types] = ["*/*"] unless content_request.has_key? :valid_mime_types
|
16
16
|
content_request[:queue_system] = content_request[:queue_system].to_sym
|
17
|
-
|
17
|
+
|
18
18
|
@redis = NamespacedRedisConnection.new(content_request[:redis_options], "cobweb-#{Cobweb.version}-#{content_request[:crawl_id]}")
|
19
19
|
@stats = Stats.new(content_request)
|
20
|
-
|
20
|
+
|
21
21
|
@debug = content_request[:debug]
|
22
|
-
|
22
|
+
|
23
23
|
decrement_queue_counter
|
24
|
-
|
24
|
+
|
25
25
|
# check we haven't crawled this url before
|
26
26
|
unless @redis.sismember "crawled", content_request[:url]
|
27
27
|
# if there is no limit or we're still under it lets get the url
|
@@ -99,12 +99,12 @@ class CrawlHelper
|
|
99
99
|
else
|
100
100
|
puts "ignoring #{content_request[:url]} as outside of crawl limits." if content_request[:debug]
|
101
101
|
end
|
102
|
-
|
102
|
+
|
103
103
|
else
|
104
104
|
@redis.srem "queued", content_request[:url]
|
105
105
|
puts "Already crawled #{content_request[:url]}" if content_request[:debug]
|
106
106
|
end
|
107
|
-
|
107
|
+
|
108
108
|
# if there's nothing left queued or the crawled limit has been reached
|
109
109
|
refresh_counters
|
110
110
|
if content_request[:crawl_limit].nil? || content_request[:crawl_limit] == 0
|
@@ -114,7 +114,7 @@ class CrawlHelper
|
|
114
114
|
elsif (@queue_counter +@crawl_started_counter-@crawl_counter)== 0 || @crawl_counter >= content_request[:crawl_limit].to_i
|
115
115
|
finished(content_request)
|
116
116
|
end
|
117
|
-
|
117
|
+
|
118
118
|
end
|
119
119
|
|
120
120
|
# Sets the crawl status to 'Crawl Finished' and enqueues the crawl finished job
|
@@ -123,11 +123,11 @@ class CrawlHelper
|
|
123
123
|
if @redis.hget("statistics", "current_status")!= "Crawl Finished"
|
124
124
|
ap "CRAWL FINISHED #{content_request[:url]}, #{counters}, #{@redis.get("original_base_url")}, #{@redis.get("crawled_base_url")}" if content_request[:debug]
|
125
125
|
@stats.end_crawl(content_request)
|
126
|
-
|
126
|
+
|
127
127
|
additional_stats = {:crawl_id => content_request[:crawl_id], :crawled_base_url => @redis.get("crawled_base_url")}
|
128
128
|
additional_stats[:redis_options] = content_request[:redis_options] unless content_request[:redis_options] == {}
|
129
129
|
additional_stats[:source_id] = content_request[:source_id] unless content_request[:source_id].nil?
|
130
|
-
|
130
|
+
|
131
131
|
if content_request[:queue_system] == :resque
|
132
132
|
Resque.enqueue(const_get(content_request[:crawl_finished_queue]), @stats.get_statistics.merge(additional_stats))
|
133
133
|
elsif content_request[:queue_system] == :sidekiq
|
@@ -140,7 +140,7 @@ class CrawlHelper
|
|
140
140
|
# nothing to report here, we're skipping the remaining urls as we're outside of the crawl limit
|
141
141
|
end
|
142
142
|
end
|
143
|
-
|
143
|
+
|
144
144
|
# Enqueues the content to the processing queue setup in options
|
145
145
|
def self.send_to_processing_queue(content, content_request)
|
146
146
|
content_to_send = content.merge({:internal_urls => content_request[:internal_urls], :redis_options => content_request[:redis_options], :source_id => content_request[:source_id], :crawl_id => content_request[:crawl_id]})
|
@@ -171,7 +171,7 @@ class CrawlHelper
|
|
171
171
|
end
|
172
172
|
|
173
173
|
private
|
174
|
-
|
174
|
+
|
175
175
|
# Helper method to determine if this content is to be processed or not
|
176
176
|
def self.is_permitted_type(content)
|
177
177
|
@content_request[:valid_mime_types].each do |mime_type|
|
@@ -179,19 +179,19 @@ class CrawlHelper
|
|
179
179
|
end
|
180
180
|
false
|
181
181
|
end
|
182
|
-
|
182
|
+
|
183
183
|
# Returns true if the crawl count is within limits
|
184
184
|
def self.within_crawl_limits?(crawl_limit)
|
185
185
|
refresh_counters
|
186
186
|
crawl_limit.nil? or @crawl_started_counter < crawl_limit.to_i
|
187
187
|
end
|
188
|
-
|
188
|
+
|
189
189
|
# Returns true if the queue count is calculated to be still within limits when complete
|
190
190
|
def self.within_queue_limits?(crawl_limit)
|
191
191
|
refresh_counters
|
192
192
|
(@content_request[:crawl_limit_by_page]&& (crawl_limit.nil? or @crawl_counter < crawl_limit.to_i)) || within_crawl_limits?(crawl_limit) && (crawl_limit.nil? || (@queue_counter + @crawl_counter) < crawl_limit.to_i)
|
193
193
|
end
|
194
|
-
|
194
|
+
|
195
195
|
# Sets the base url in redis. If the first page is a redirect, it sets the base_url to the destination
|
196
196
|
def self.set_base_url(redis, content, content_request)
|
197
197
|
if redis.get("base_url").nil?
|
@@ -202,7 +202,7 @@ class CrawlHelper
|
|
202
202
|
redis.set("base_url", content[:url])
|
203
203
|
end
|
204
204
|
end
|
205
|
-
|
205
|
+
|
206
206
|
# Enqueues content to the crawl_job queue
|
207
207
|
def self.enqueue_content(content_request, link)
|
208
208
|
new_request = content_request.clone
|
@@ -219,7 +219,7 @@ class CrawlHelper
|
|
219
219
|
@redis.sadd "queued", link
|
220
220
|
increment_queue_counter
|
221
221
|
end
|
222
|
-
|
222
|
+
|
223
223
|
# Increments the queue counter and refreshes crawl counters
|
224
224
|
def self.increment_queue_counter
|
225
225
|
@redis.incr "queue-counter"
|
@@ -245,7 +245,7 @@ class CrawlHelper
|
|
245
245
|
@crawl_started_counter = @redis.get("crawl-started-counter").to_i
|
246
246
|
@queue_counter = @redis.get("queue-counter").to_i
|
247
247
|
end
|
248
|
-
|
248
|
+
|
249
249
|
def self.print_counters
|
250
250
|
puts counters
|
251
251
|
end
|
@@ -253,4 +253,4 @@ class CrawlHelper
|
|
253
253
|
def self.counters
|
254
254
|
"@crawl_counter: #{@crawl_counter} @crawl_started_counter: #{@crawl_started_counter} @queue_counter: #{@queue_counter}"
|
255
255
|
end
|
256
|
-
end
|
256
|
+
end
|
metadata
CHANGED
@@ -1,139 +1,139 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cobweb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.21
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stewart McKee
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: addressable
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: awesome_print
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: sinatra
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: haml
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- -
|
87
|
+
- - ">="
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: '0'
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- -
|
94
|
+
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: redis-namespace
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- -
|
101
|
+
- - ">="
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: '0'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- -
|
108
|
+
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: json
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
|
-
- -
|
115
|
+
- - ">="
|
116
116
|
- !ruby/object:Gem::Version
|
117
117
|
version: '0'
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
|
-
- -
|
122
|
+
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: slop
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
|
-
- -
|
129
|
+
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
131
|
version: '0'
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
|
-
- -
|
136
|
+
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0'
|
139
139
|
description: Cobweb is a web crawler that can use resque to cluster crawls to quickly
|
@@ -146,6 +146,186 @@ extensions: []
|
|
146
146
|
extra_rdoc_files:
|
147
147
|
- README.textile
|
148
148
|
files:
|
149
|
+
- README.textile
|
150
|
+
- lib/cobweb.rb
|
151
|
+
- lib/cobweb_crawl_helper.rb
|
152
|
+
- lib/cobweb_crawler.rb
|
153
|
+
- lib/cobweb_dsl.rb
|
154
|
+
- lib/cobweb_finished_job.rb
|
155
|
+
- lib/cobweb_links.rb
|
156
|
+
- lib/cobweb_process_job.rb
|
157
|
+
- lib/cobweb_version.rb
|
158
|
+
- lib/content_link_parser.rb
|
159
|
+
- lib/crawl.rb
|
160
|
+
- lib/crawl_finished_worker.rb
|
161
|
+
- lib/crawl_helper.rb
|
162
|
+
- lib/crawl_job.rb
|
163
|
+
- lib/crawl_object.rb
|
164
|
+
- lib/crawl_process_worker.rb
|
165
|
+
- lib/crawl_worker.rb
|
166
|
+
- lib/document.rb
|
167
|
+
- lib/encoding_safe_process_job.rb
|
168
|
+
- lib/export_command.rb
|
169
|
+
- lib/hash_util.rb
|
170
|
+
- lib/redirect_error.rb
|
171
|
+
- lib/redis_connection.rb
|
172
|
+
- lib/report_command.rb
|
173
|
+
- lib/robots.rb
|
174
|
+
- lib/server.rb
|
175
|
+
- lib/sidekiq/cobweb_helper.rb
|
176
|
+
- lib/stats.rb
|
177
|
+
- lib/string.rb
|
178
|
+
- lib/uri_helper.rb
|
179
|
+
- public/css/accordion.css
|
180
|
+
- public/css/custom.css
|
181
|
+
- public/css/datatable.css
|
182
|
+
- public/css/datepicker.css
|
183
|
+
- public/css/form-buttons.css
|
184
|
+
- public/css/forms.css
|
185
|
+
- public/css/jquery.fancybox-1.3.4.css
|
186
|
+
- public/css/jquery.treeview.css
|
187
|
+
- public/css/link-buttons.css
|
188
|
+
- public/css/login.css
|
189
|
+
- public/css/menu.css
|
190
|
+
- public/css/messages.css
|
191
|
+
- public/css/modalbox.css
|
192
|
+
- public/css/statics.css
|
193
|
+
- public/css/style.css
|
194
|
+
- public/css/style_text.css
|
195
|
+
- public/css/tabs.css
|
196
|
+
- public/css/wysiwyg-editor.css
|
197
|
+
- public/css/wysiwyg.css
|
198
|
+
- public/css/wysiwyg.modal.css
|
199
|
+
- public/gfx/back-menu.gif
|
200
|
+
- public/gfx/back-submenu.gif
|
201
|
+
- public/gfx/background.gif
|
202
|
+
- public/gfx/box-hide.png
|
203
|
+
- public/gfx/box-search.png
|
204
|
+
- public/gfx/box-title.gif
|
205
|
+
- public/gfx/code.gif
|
206
|
+
- public/gfx/datepicker-arrows.gif
|
207
|
+
- public/gfx/fancybox/blank.gif
|
208
|
+
- public/gfx/fancybox/fancy_close.png
|
209
|
+
- public/gfx/fancybox/fancy_loading.png
|
210
|
+
- public/gfx/fancybox/fancy_nav_left.png
|
211
|
+
- public/gfx/fancybox/fancy_nav_right.png
|
212
|
+
- public/gfx/fancybox/fancy_title_left.png
|
213
|
+
- public/gfx/fancybox/fancy_title_main.png
|
214
|
+
- public/gfx/fancybox/fancy_title_over.png
|
215
|
+
- public/gfx/fancybox/fancy_title_right.png
|
216
|
+
- public/gfx/fancybox/fancybox-x.png
|
217
|
+
- public/gfx/fancybox/fancybox.png
|
218
|
+
- public/gfx/forms/date-next.gif
|
219
|
+
- public/gfx/forms/date-prev.gif
|
220
|
+
- public/gfx/forms/forms-checkbox.gif
|
221
|
+
- public/gfx/forms/forms-date.gif
|
222
|
+
- public/gfx/forms/forms-file.gif
|
223
|
+
- public/gfx/forms/forms-input-big.gif
|
224
|
+
- public/gfx/forms/forms-input-medium.gif
|
225
|
+
- public/gfx/forms/forms-input-small.gif
|
226
|
+
- public/gfx/forms/forms-input-xl.gif
|
227
|
+
- public/gfx/forms/forms-radio.gif
|
228
|
+
- public/gfx/forms/forms-selectbox-small.gif
|
229
|
+
- public/gfx/forms/forms-selectbox.gif
|
230
|
+
- public/gfx/forms/forms-textarea-big.gif
|
231
|
+
- public/gfx/forms/forms-textarea-medium.gif
|
232
|
+
- public/gfx/forms/forms-textarea-small.gif
|
233
|
+
- public/gfx/forms/forms-textarea-xl.gif
|
234
|
+
- public/gfx/icon-delete.png
|
235
|
+
- public/gfx/icon-edit.png
|
236
|
+
- public/gfx/icon-home.gif
|
237
|
+
- public/gfx/img-delete.png
|
238
|
+
- public/gfx/img-hover.png
|
239
|
+
- public/gfx/img-zoom.png
|
240
|
+
- public/gfx/jquery.wysiwyg.gif
|
241
|
+
- public/gfx/label-icons.gif
|
242
|
+
- public/gfx/label.gif
|
243
|
+
- public/gfx/li-down.gif
|
244
|
+
- public/gfx/li.gif
|
245
|
+
- public/gfx/link-button-big.gif
|
246
|
+
- public/gfx/link-button-medium.gif
|
247
|
+
- public/gfx/link-button.gif
|
248
|
+
- public/gfx/loading-2.gif
|
249
|
+
- public/gfx/loading.gif
|
250
|
+
- public/gfx/logo.png
|
251
|
+
- public/gfx/modal-title.gif
|
252
|
+
- public/gfx/photos/00.jpg
|
253
|
+
- public/gfx/photos/01.jpg
|
254
|
+
- public/gfx/photos/01xl.jpg
|
255
|
+
- public/gfx/photos/02.jpg
|
256
|
+
- public/gfx/photos/02xl.jpg
|
257
|
+
- public/gfx/photos/03.jpg
|
258
|
+
- public/gfx/photos/03xl.jpg
|
259
|
+
- public/gfx/photos/04.jpg
|
260
|
+
- public/gfx/photos/04xl.jpg
|
261
|
+
- public/gfx/photos/05.jpg
|
262
|
+
- public/gfx/photos/05xl.jpg
|
263
|
+
- public/gfx/photos/06.jpg
|
264
|
+
- public/gfx/photos/06xl.jpg
|
265
|
+
- public/gfx/photos/07.jpg
|
266
|
+
- public/gfx/photos/07xl.jpg
|
267
|
+
- public/gfx/photos/08.jpg
|
268
|
+
- public/gfx/photos/08xl.jpg
|
269
|
+
- public/gfx/photos/09.jpg
|
270
|
+
- public/gfx/photos/09xl.jpg
|
271
|
+
- public/gfx/photos/10.jpg
|
272
|
+
- public/gfx/photos/10xl.jpg
|
273
|
+
- public/gfx/photos/11.jpg
|
274
|
+
- public/gfx/photos/11xl.jpg
|
275
|
+
- public/gfx/photos/12.jpg
|
276
|
+
- public/gfx/photos/12xl.jpg
|
277
|
+
- public/gfx/photos/13.jpg
|
278
|
+
- public/gfx/photos/13xl.jpg
|
279
|
+
- public/gfx/photos/14.jpg
|
280
|
+
- public/gfx/photos/14xl.jpg
|
281
|
+
- public/gfx/photos/15.jpg
|
282
|
+
- public/gfx/photos/15xl.jpg
|
283
|
+
- public/gfx/search-button.gif
|
284
|
+
- public/gfx/search-input.gif
|
285
|
+
- public/gfx/slider-button.gif
|
286
|
+
- public/gfx/system-messages.gif
|
287
|
+
- public/gfx/table-asc-arrow.gif
|
288
|
+
- public/gfx/table-desc-arrow.gif
|
289
|
+
- public/gfx/table-first.gif
|
290
|
+
- public/gfx/table-last.gif
|
291
|
+
- public/gfx/table-next.gif
|
292
|
+
- public/gfx/table-number.gif
|
293
|
+
- public/gfx/table-prev.gif
|
294
|
+
- public/gfx/table-rows.gif
|
295
|
+
- public/gfx/table-search.gif
|
296
|
+
- public/gfx/table-thead.gif
|
297
|
+
- public/gfx/tooltip.gif
|
298
|
+
- public/gfx/treeview/ajax-loader.gif
|
299
|
+
- public/gfx/treeview/file.gif
|
300
|
+
- public/gfx/treeview/folder-closed.gif
|
301
|
+
- public/gfx/treeview/folder.gif
|
302
|
+
- public/gfx/treeview/minus.gif
|
303
|
+
- public/gfx/treeview/plus.gif
|
304
|
+
- public/gfx/treeview/treeview-default-line.gif
|
305
|
+
- public/gfx/treeview/treeview-default.gif
|
306
|
+
- public/js/controls/wysiwyg.image.js
|
307
|
+
- public/js/controls/wysiwyg.link.js
|
308
|
+
- public/js/controls/wysiwyg.table.js
|
309
|
+
- public/js/customInput.jquery.js
|
310
|
+
- public/js/excanvas.min.js
|
311
|
+
- public/js/hoverIntent.js
|
312
|
+
- public/js/inline.js
|
313
|
+
- public/js/jquery-1.7.1.min.js
|
314
|
+
- public/js/jquery-ui-select.js
|
315
|
+
- public/js/jquery-ui-timepicker-addon.js
|
316
|
+
- public/js/jquery-ui.js
|
317
|
+
- public/js/jquery.dataTables.js
|
318
|
+
- public/js/jquery.fancybox-1.3.4.js
|
319
|
+
- public/js/jquery.filestyle.mini.js
|
320
|
+
- public/js/jquery.flot.js
|
321
|
+
- public/js/jquery.flot.resize.min.js
|
322
|
+
- public/js/jquery.graphtable-0.2.js
|
323
|
+
- public/js/jquery.tipsy.js
|
324
|
+
- public/js/jquery.treeview.js
|
325
|
+
- public/js/jquery.wysiwyg.js
|
326
|
+
- public/js/plugins/wysiwyg.rmFormat.js
|
327
|
+
- public/js/superfish.js
|
328
|
+
- public/js/supersubs.js
|
149
329
|
- spec/cobweb/cobweb_crawl_helper_spec.rb
|
150
330
|
- spec/cobweb/cobweb_crawl_spec.rb
|
151
331
|
- spec/cobweb/cobweb_crawler_spec.rb
|
@@ -327,189 +507,9 @@ files:
|
|
327
507
|
- spec/samples/sample_site/typography.html
|
328
508
|
- spec/spec.opts
|
329
509
|
- spec/spec_helper.rb
|
330
|
-
- lib/cobweb.rb
|
331
|
-
- lib/cobweb_crawl_helper.rb
|
332
|
-
- lib/cobweb_crawler.rb
|
333
|
-
- lib/cobweb_dsl.rb
|
334
|
-
- lib/cobweb_finished_job.rb
|
335
|
-
- lib/cobweb_links.rb
|
336
|
-
- lib/cobweb_process_job.rb
|
337
|
-
- lib/cobweb_version.rb
|
338
|
-
- lib/content_link_parser.rb
|
339
|
-
- lib/crawl.rb
|
340
|
-
- lib/crawl_finished_worker.rb
|
341
|
-
- lib/crawl_helper.rb
|
342
|
-
- lib/crawl_job.rb
|
343
|
-
- lib/crawl_object.rb
|
344
|
-
- lib/crawl_process_worker.rb
|
345
|
-
- lib/crawl_worker.rb
|
346
|
-
- lib/document.rb
|
347
|
-
- lib/encoding_safe_process_job.rb
|
348
|
-
- lib/export_command.rb
|
349
|
-
- lib/hash_util.rb
|
350
|
-
- lib/redirect_error.rb
|
351
|
-
- lib/redis_connection.rb
|
352
|
-
- lib/report_command.rb
|
353
|
-
- lib/robots.rb
|
354
|
-
- lib/server.rb
|
355
|
-
- lib/sidekiq/cobweb_helper.rb
|
356
|
-
- lib/stats.rb
|
357
|
-
- lib/string.rb
|
358
|
-
- lib/uri_helper.rb
|
359
510
|
- views/home.haml
|
360
511
|
- views/layout.haml
|
361
512
|
- views/statistics.haml
|
362
|
-
- public/css/accordion.css
|
363
|
-
- public/css/custom.css
|
364
|
-
- public/css/datatable.css
|
365
|
-
- public/css/datepicker.css
|
366
|
-
- public/css/form-buttons.css
|
367
|
-
- public/css/forms.css
|
368
|
-
- public/css/jquery.fancybox-1.3.4.css
|
369
|
-
- public/css/jquery.treeview.css
|
370
|
-
- public/css/link-buttons.css
|
371
|
-
- public/css/login.css
|
372
|
-
- public/css/menu.css
|
373
|
-
- public/css/messages.css
|
374
|
-
- public/css/modalbox.css
|
375
|
-
- public/css/statics.css
|
376
|
-
- public/css/style.css
|
377
|
-
- public/css/style_text.css
|
378
|
-
- public/css/tabs.css
|
379
|
-
- public/css/wysiwyg-editor.css
|
380
|
-
- public/css/wysiwyg.css
|
381
|
-
- public/css/wysiwyg.modal.css
|
382
|
-
- public/gfx/back-menu.gif
|
383
|
-
- public/gfx/back-submenu.gif
|
384
|
-
- public/gfx/background.gif
|
385
|
-
- public/gfx/box-hide.png
|
386
|
-
- public/gfx/box-search.png
|
387
|
-
- public/gfx/box-title.gif
|
388
|
-
- public/gfx/code.gif
|
389
|
-
- public/gfx/datepicker-arrows.gif
|
390
|
-
- public/gfx/fancybox/blank.gif
|
391
|
-
- public/gfx/fancybox/fancy_close.png
|
392
|
-
- public/gfx/fancybox/fancy_loading.png
|
393
|
-
- public/gfx/fancybox/fancy_nav_left.png
|
394
|
-
- public/gfx/fancybox/fancy_nav_right.png
|
395
|
-
- public/gfx/fancybox/fancy_title_left.png
|
396
|
-
- public/gfx/fancybox/fancy_title_main.png
|
397
|
-
- public/gfx/fancybox/fancy_title_over.png
|
398
|
-
- public/gfx/fancybox/fancy_title_right.png
|
399
|
-
- public/gfx/fancybox/fancybox-x.png
|
400
|
-
- public/gfx/fancybox/fancybox.png
|
401
|
-
- public/gfx/forms/date-next.gif
|
402
|
-
- public/gfx/forms/date-prev.gif
|
403
|
-
- public/gfx/forms/forms-checkbox.gif
|
404
|
-
- public/gfx/forms/forms-date.gif
|
405
|
-
- public/gfx/forms/forms-file.gif
|
406
|
-
- public/gfx/forms/forms-input-big.gif
|
407
|
-
- public/gfx/forms/forms-input-medium.gif
|
408
|
-
- public/gfx/forms/forms-input-small.gif
|
409
|
-
- public/gfx/forms/forms-input-xl.gif
|
410
|
-
- public/gfx/forms/forms-radio.gif
|
411
|
-
- public/gfx/forms/forms-selectbox-small.gif
|
412
|
-
- public/gfx/forms/forms-selectbox.gif
|
413
|
-
- public/gfx/forms/forms-textarea-big.gif
|
414
|
-
- public/gfx/forms/forms-textarea-medium.gif
|
415
|
-
- public/gfx/forms/forms-textarea-small.gif
|
416
|
-
- public/gfx/forms/forms-textarea-xl.gif
|
417
|
-
- public/gfx/icon-delete.png
|
418
|
-
- public/gfx/icon-edit.png
|
419
|
-
- public/gfx/icon-home.gif
|
420
|
-
- public/gfx/img-delete.png
|
421
|
-
- public/gfx/img-hover.png
|
422
|
-
- public/gfx/img-zoom.png
|
423
|
-
- public/gfx/jquery.wysiwyg.gif
|
424
|
-
- public/gfx/label-icons.gif
|
425
|
-
- public/gfx/label.gif
|
426
|
-
- public/gfx/li-down.gif
|
427
|
-
- public/gfx/li.gif
|
428
|
-
- public/gfx/link-button-big.gif
|
429
|
-
- public/gfx/link-button-medium.gif
|
430
|
-
- public/gfx/link-button.gif
|
431
|
-
- public/gfx/loading-2.gif
|
432
|
-
- public/gfx/loading.gif
|
433
|
-
- public/gfx/logo.png
|
434
|
-
- public/gfx/modal-title.gif
|
435
|
-
- public/gfx/photos/00.jpg
|
436
|
-
- public/gfx/photos/01.jpg
|
437
|
-
- public/gfx/photos/01xl.jpg
|
438
|
-
- public/gfx/photos/02.jpg
|
439
|
-
- public/gfx/photos/02xl.jpg
|
440
|
-
- public/gfx/photos/03.jpg
|
441
|
-
- public/gfx/photos/03xl.jpg
|
442
|
-
- public/gfx/photos/04.jpg
|
443
|
-
- public/gfx/photos/04xl.jpg
|
444
|
-
- public/gfx/photos/05.jpg
|
445
|
-
- public/gfx/photos/05xl.jpg
|
446
|
-
- public/gfx/photos/06.jpg
|
447
|
-
- public/gfx/photos/06xl.jpg
|
448
|
-
- public/gfx/photos/07.jpg
|
449
|
-
- public/gfx/photos/07xl.jpg
|
450
|
-
- public/gfx/photos/08.jpg
|
451
|
-
- public/gfx/photos/08xl.jpg
|
452
|
-
- public/gfx/photos/09.jpg
|
453
|
-
- public/gfx/photos/09xl.jpg
|
454
|
-
- public/gfx/photos/10.jpg
|
455
|
-
- public/gfx/photos/10xl.jpg
|
456
|
-
- public/gfx/photos/11.jpg
|
457
|
-
- public/gfx/photos/11xl.jpg
|
458
|
-
- public/gfx/photos/12.jpg
|
459
|
-
- public/gfx/photos/12xl.jpg
|
460
|
-
- public/gfx/photos/13.jpg
|
461
|
-
- public/gfx/photos/13xl.jpg
|
462
|
-
- public/gfx/photos/14.jpg
|
463
|
-
- public/gfx/photos/14xl.jpg
|
464
|
-
- public/gfx/photos/15.jpg
|
465
|
-
- public/gfx/photos/15xl.jpg
|
466
|
-
- public/gfx/search-button.gif
|
467
|
-
- public/gfx/search-input.gif
|
468
|
-
- public/gfx/slider-button.gif
|
469
|
-
- public/gfx/system-messages.gif
|
470
|
-
- public/gfx/table-asc-arrow.gif
|
471
|
-
- public/gfx/table-desc-arrow.gif
|
472
|
-
- public/gfx/table-first.gif
|
473
|
-
- public/gfx/table-last.gif
|
474
|
-
- public/gfx/table-next.gif
|
475
|
-
- public/gfx/table-number.gif
|
476
|
-
- public/gfx/table-prev.gif
|
477
|
-
- public/gfx/table-rows.gif
|
478
|
-
- public/gfx/table-search.gif
|
479
|
-
- public/gfx/table-thead.gif
|
480
|
-
- public/gfx/tooltip.gif
|
481
|
-
- public/gfx/treeview/ajax-loader.gif
|
482
|
-
- public/gfx/treeview/file.gif
|
483
|
-
- public/gfx/treeview/folder-closed.gif
|
484
|
-
- public/gfx/treeview/folder.gif
|
485
|
-
- public/gfx/treeview/minus.gif
|
486
|
-
- public/gfx/treeview/plus.gif
|
487
|
-
- public/gfx/treeview/treeview-default-line.gif
|
488
|
-
- public/gfx/treeview/treeview-default.gif
|
489
|
-
- public/js/controls/wysiwyg.image.js
|
490
|
-
- public/js/controls/wysiwyg.link.js
|
491
|
-
- public/js/controls/wysiwyg.table.js
|
492
|
-
- public/js/customInput.jquery.js
|
493
|
-
- public/js/excanvas.min.js
|
494
|
-
- public/js/hoverIntent.js
|
495
|
-
- public/js/inline.js
|
496
|
-
- public/js/jquery-1.7.1.min.js
|
497
|
-
- public/js/jquery-ui-select.js
|
498
|
-
- public/js/jquery-ui-timepicker-addon.js
|
499
|
-
- public/js/jquery-ui.js
|
500
|
-
- public/js/jquery.dataTables.js
|
501
|
-
- public/js/jquery.fancybox-1.3.4.js
|
502
|
-
- public/js/jquery.filestyle.mini.js
|
503
|
-
- public/js/jquery.flot.js
|
504
|
-
- public/js/jquery.flot.resize.min.js
|
505
|
-
- public/js/jquery.graphtable-0.2.js
|
506
|
-
- public/js/jquery.tipsy.js
|
507
|
-
- public/js/jquery.treeview.js
|
508
|
-
- public/js/jquery.wysiwyg.js
|
509
|
-
- public/js/plugins/wysiwyg.rmFormat.js
|
510
|
-
- public/js/superfish.js
|
511
|
-
- public/js/supersubs.js
|
512
|
-
- README.textile
|
513
513
|
homepage: http://github.com/stewartmckee/cobweb
|
514
514
|
licenses:
|
515
515
|
- MIT
|
@@ -520,17 +520,17 @@ require_paths:
|
|
520
520
|
- lib
|
521
521
|
required_ruby_version: !ruby/object:Gem::Requirement
|
522
522
|
requirements:
|
523
|
-
- -
|
523
|
+
- - ">="
|
524
524
|
- !ruby/object:Gem::Version
|
525
525
|
version: '0'
|
526
526
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
527
527
|
requirements:
|
528
|
-
- -
|
528
|
+
- - ">="
|
529
529
|
- !ruby/object:Gem::Version
|
530
530
|
version: '0'
|
531
531
|
requirements: []
|
532
532
|
rubyforge_project:
|
533
|
-
rubygems_version: 2.
|
533
|
+
rubygems_version: 2.2.2
|
534
534
|
signing_key:
|
535
535
|
specification_version: 4
|
536
536
|
summary: Cobweb is a web crawler that can use resque to cluster crawls to quickly
|