cobweb 1.0.20 → 1.0.21
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -13
- data/lib/cobweb_version.rb +2 -2
- data/lib/crawl_helper.rb +22 -22
- metadata +203 -203
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
data.tar.gz: !binary |-
|
6
|
-
MWE3ZTAwYjExZjc4NzU2MDYzOTlhOTQwMTNlNTcyZjNmZTYwNmU3Zg==
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f7c3816549392f4fa31701ae65bff51fbe22db89
|
4
|
+
data.tar.gz: a8cec8a17ec20f31a85980f75cb790331a6be16d
|
7
5
|
SHA512:
|
8
|
-
metadata.gz:
|
9
|
-
|
10
|
-
Mjk0ZjM1YThhNzhkMGNjNjJiZTJkNjM1OWQ1MGMzZmVlMDI5MzUyOTU5YTRk
|
11
|
-
NzEzZjBiZjM2OTUxZTc2NzZjZDIyOWQ4ZmVlYzYyOGViMDIyYzY=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
ZTJiYmRlNDY0M2FkNTdlN2I0ZjNiODYxOGQyN2MxZGZlMGViMWIxZDA4YmY1
|
14
|
-
ZDc2ZDU3NDc4ODg1YmExYjFmYjMyY2U0MDU4MGQ0OTJkZjRmNjAyYmQ3NWVl
|
15
|
-
ODY0ZTE5MGUzNzAzZWFlMzdmZmY1YzNhMmEzNWE1NzVkYzAwZDE=
|
6
|
+
metadata.gz: b2b172dd7f45efb8b5eccacad67b35683ee1f0867f8bfc423b5b8a91ed3b3cac22e5b2343a96d4a8bfdfe9426bf4461ced4385ea96e7bc850e80e3de4b0ce976
|
7
|
+
data.tar.gz: 2bace4df48372e0253973e7600e8d48ad06ab12a948723a5850620bfcb31efffba2fdaf6f36ae5502c054d8457a1dec9a23675636ce9a4c4474cf5b3086f6697
|
data/lib/cobweb_version.rb
CHANGED
data/lib/crawl_helper.rb
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
class CrawlHelper
|
2
2
|
|
3
|
-
require "net/https"
|
3
|
+
require "net/https"
|
4
4
|
require "uri"
|
5
5
|
require "redis"
|
6
|
-
require
|
7
|
-
|
6
|
+
require "redis-namespace"
|
7
|
+
|
8
8
|
def self.crawl_page(content_request)
|
9
9
|
# change all hash keys to symbols
|
10
10
|
content_request = HashUtil.deep_symbolize_keys(content_request)
|
11
11
|
@content_request = content_request
|
12
|
-
|
12
|
+
|
13
13
|
content_request[:redis_options] = {} unless content_request.has_key? :redis_options
|
14
14
|
content_request[:crawl_limit_by_page] = false unless content_request.has_key? :crawl_limit_by_page
|
15
15
|
content_request[:valid_mime_types] = ["*/*"] unless content_request.has_key? :valid_mime_types
|
16
16
|
content_request[:queue_system] = content_request[:queue_system].to_sym
|
17
|
-
|
17
|
+
|
18
18
|
@redis = NamespacedRedisConnection.new(content_request[:redis_options], "cobweb-#{Cobweb.version}-#{content_request[:crawl_id]}")
|
19
19
|
@stats = Stats.new(content_request)
|
20
|
-
|
20
|
+
|
21
21
|
@debug = content_request[:debug]
|
22
|
-
|
22
|
+
|
23
23
|
decrement_queue_counter
|
24
|
-
|
24
|
+
|
25
25
|
# check we haven't crawled this url before
|
26
26
|
unless @redis.sismember "crawled", content_request[:url]
|
27
27
|
# if there is no limit or we're still under it lets get the url
|
@@ -99,12 +99,12 @@ class CrawlHelper
|
|
99
99
|
else
|
100
100
|
puts "ignoring #{content_request[:url]} as outside of crawl limits." if content_request[:debug]
|
101
101
|
end
|
102
|
-
|
102
|
+
|
103
103
|
else
|
104
104
|
@redis.srem "queued", content_request[:url]
|
105
105
|
puts "Already crawled #{content_request[:url]}" if content_request[:debug]
|
106
106
|
end
|
107
|
-
|
107
|
+
|
108
108
|
# if there's nothing left queued or the crawled limit has been reached
|
109
109
|
refresh_counters
|
110
110
|
if content_request[:crawl_limit].nil? || content_request[:crawl_limit] == 0
|
@@ -114,7 +114,7 @@ class CrawlHelper
|
|
114
114
|
elsif (@queue_counter +@crawl_started_counter-@crawl_counter)== 0 || @crawl_counter >= content_request[:crawl_limit].to_i
|
115
115
|
finished(content_request)
|
116
116
|
end
|
117
|
-
|
117
|
+
|
118
118
|
end
|
119
119
|
|
120
120
|
# Sets the crawl status to 'Crawl Finished' and enqueues the crawl finished job
|
@@ -123,11 +123,11 @@ class CrawlHelper
|
|
123
123
|
if @redis.hget("statistics", "current_status")!= "Crawl Finished"
|
124
124
|
ap "CRAWL FINISHED #{content_request[:url]}, #{counters}, #{@redis.get("original_base_url")}, #{@redis.get("crawled_base_url")}" if content_request[:debug]
|
125
125
|
@stats.end_crawl(content_request)
|
126
|
-
|
126
|
+
|
127
127
|
additional_stats = {:crawl_id => content_request[:crawl_id], :crawled_base_url => @redis.get("crawled_base_url")}
|
128
128
|
additional_stats[:redis_options] = content_request[:redis_options] unless content_request[:redis_options] == {}
|
129
129
|
additional_stats[:source_id] = content_request[:source_id] unless content_request[:source_id].nil?
|
130
|
-
|
130
|
+
|
131
131
|
if content_request[:queue_system] == :resque
|
132
132
|
Resque.enqueue(const_get(content_request[:crawl_finished_queue]), @stats.get_statistics.merge(additional_stats))
|
133
133
|
elsif content_request[:queue_system] == :sidekiq
|
@@ -140,7 +140,7 @@ class CrawlHelper
|
|
140
140
|
# nothing to report here, we're skipping the remaining urls as we're outside of the crawl limit
|
141
141
|
end
|
142
142
|
end
|
143
|
-
|
143
|
+
|
144
144
|
# Enqueues the content to the processing queue setup in options
|
145
145
|
def self.send_to_processing_queue(content, content_request)
|
146
146
|
content_to_send = content.merge({:internal_urls => content_request[:internal_urls], :redis_options => content_request[:redis_options], :source_id => content_request[:source_id], :crawl_id => content_request[:crawl_id]})
|
@@ -171,7 +171,7 @@ class CrawlHelper
|
|
171
171
|
end
|
172
172
|
|
173
173
|
private
|
174
|
-
|
174
|
+
|
175
175
|
# Helper method to determine if this content is to be processed or not
|
176
176
|
def self.is_permitted_type(content)
|
177
177
|
@content_request[:valid_mime_types].each do |mime_type|
|
@@ -179,19 +179,19 @@ class CrawlHelper
|
|
179
179
|
end
|
180
180
|
false
|
181
181
|
end
|
182
|
-
|
182
|
+
|
183
183
|
# Returns true if the crawl count is within limits
|
184
184
|
def self.within_crawl_limits?(crawl_limit)
|
185
185
|
refresh_counters
|
186
186
|
crawl_limit.nil? or @crawl_started_counter < crawl_limit.to_i
|
187
187
|
end
|
188
|
-
|
188
|
+
|
189
189
|
# Returns true if the queue count is calculated to be still within limits when complete
|
190
190
|
def self.within_queue_limits?(crawl_limit)
|
191
191
|
refresh_counters
|
192
192
|
(@content_request[:crawl_limit_by_page]&& (crawl_limit.nil? or @crawl_counter < crawl_limit.to_i)) || within_crawl_limits?(crawl_limit) && (crawl_limit.nil? || (@queue_counter + @crawl_counter) < crawl_limit.to_i)
|
193
193
|
end
|
194
|
-
|
194
|
+
|
195
195
|
# Sets the base url in redis. If the first page is a redirect, it sets the base_url to the destination
|
196
196
|
def self.set_base_url(redis, content, content_request)
|
197
197
|
if redis.get("base_url").nil?
|
@@ -202,7 +202,7 @@ class CrawlHelper
|
|
202
202
|
redis.set("base_url", content[:url])
|
203
203
|
end
|
204
204
|
end
|
205
|
-
|
205
|
+
|
206
206
|
# Enqueues content to the crawl_job queue
|
207
207
|
def self.enqueue_content(content_request, link)
|
208
208
|
new_request = content_request.clone
|
@@ -219,7 +219,7 @@ class CrawlHelper
|
|
219
219
|
@redis.sadd "queued", link
|
220
220
|
increment_queue_counter
|
221
221
|
end
|
222
|
-
|
222
|
+
|
223
223
|
# Increments the queue counter and refreshes crawl counters
|
224
224
|
def self.increment_queue_counter
|
225
225
|
@redis.incr "queue-counter"
|
@@ -245,7 +245,7 @@ class CrawlHelper
|
|
245
245
|
@crawl_started_counter = @redis.get("crawl-started-counter").to_i
|
246
246
|
@queue_counter = @redis.get("queue-counter").to_i
|
247
247
|
end
|
248
|
-
|
248
|
+
|
249
249
|
def self.print_counters
|
250
250
|
puts counters
|
251
251
|
end
|
@@ -253,4 +253,4 @@ class CrawlHelper
|
|
253
253
|
def self.counters
|
254
254
|
"@crawl_counter: #{@crawl_counter} @crawl_started_counter: #{@crawl_started_counter} @queue_counter: #{@queue_counter}"
|
255
255
|
end
|
256
|
-
end
|
256
|
+
end
|
metadata
CHANGED
@@ -1,139 +1,139 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cobweb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.21
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Stewart McKee
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: addressable
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: awesome_print
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: sinatra
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: '0'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: haml
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- -
|
87
|
+
- - ">="
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: '0'
|
90
90
|
type: :runtime
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- -
|
94
|
+
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: redis-namespace
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- -
|
101
|
+
- - ">="
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: '0'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- -
|
108
|
+
- - ">="
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: json
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
|
-
- -
|
115
|
+
- - ">="
|
116
116
|
- !ruby/object:Gem::Version
|
117
117
|
version: '0'
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
|
-
- -
|
122
|
+
- - ">="
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: slop
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
|
-
- -
|
129
|
+
- - ">="
|
130
130
|
- !ruby/object:Gem::Version
|
131
131
|
version: '0'
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
|
-
- -
|
136
|
+
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0'
|
139
139
|
description: Cobweb is a web crawler that can use resque to cluster crawls to quickly
|
@@ -146,6 +146,186 @@ extensions: []
|
|
146
146
|
extra_rdoc_files:
|
147
147
|
- README.textile
|
148
148
|
files:
|
149
|
+
- README.textile
|
150
|
+
- lib/cobweb.rb
|
151
|
+
- lib/cobweb_crawl_helper.rb
|
152
|
+
- lib/cobweb_crawler.rb
|
153
|
+
- lib/cobweb_dsl.rb
|
154
|
+
- lib/cobweb_finished_job.rb
|
155
|
+
- lib/cobweb_links.rb
|
156
|
+
- lib/cobweb_process_job.rb
|
157
|
+
- lib/cobweb_version.rb
|
158
|
+
- lib/content_link_parser.rb
|
159
|
+
- lib/crawl.rb
|
160
|
+
- lib/crawl_finished_worker.rb
|
161
|
+
- lib/crawl_helper.rb
|
162
|
+
- lib/crawl_job.rb
|
163
|
+
- lib/crawl_object.rb
|
164
|
+
- lib/crawl_process_worker.rb
|
165
|
+
- lib/crawl_worker.rb
|
166
|
+
- lib/document.rb
|
167
|
+
- lib/encoding_safe_process_job.rb
|
168
|
+
- lib/export_command.rb
|
169
|
+
- lib/hash_util.rb
|
170
|
+
- lib/redirect_error.rb
|
171
|
+
- lib/redis_connection.rb
|
172
|
+
- lib/report_command.rb
|
173
|
+
- lib/robots.rb
|
174
|
+
- lib/server.rb
|
175
|
+
- lib/sidekiq/cobweb_helper.rb
|
176
|
+
- lib/stats.rb
|
177
|
+
- lib/string.rb
|
178
|
+
- lib/uri_helper.rb
|
179
|
+
- public/css/accordion.css
|
180
|
+
- public/css/custom.css
|
181
|
+
- public/css/datatable.css
|
182
|
+
- public/css/datepicker.css
|
183
|
+
- public/css/form-buttons.css
|
184
|
+
- public/css/forms.css
|
185
|
+
- public/css/jquery.fancybox-1.3.4.css
|
186
|
+
- public/css/jquery.treeview.css
|
187
|
+
- public/css/link-buttons.css
|
188
|
+
- public/css/login.css
|
189
|
+
- public/css/menu.css
|
190
|
+
- public/css/messages.css
|
191
|
+
- public/css/modalbox.css
|
192
|
+
- public/css/statics.css
|
193
|
+
- public/css/style.css
|
194
|
+
- public/css/style_text.css
|
195
|
+
- public/css/tabs.css
|
196
|
+
- public/css/wysiwyg-editor.css
|
197
|
+
- public/css/wysiwyg.css
|
198
|
+
- public/css/wysiwyg.modal.css
|
199
|
+
- public/gfx/back-menu.gif
|
200
|
+
- public/gfx/back-submenu.gif
|
201
|
+
- public/gfx/background.gif
|
202
|
+
- public/gfx/box-hide.png
|
203
|
+
- public/gfx/box-search.png
|
204
|
+
- public/gfx/box-title.gif
|
205
|
+
- public/gfx/code.gif
|
206
|
+
- public/gfx/datepicker-arrows.gif
|
207
|
+
- public/gfx/fancybox/blank.gif
|
208
|
+
- public/gfx/fancybox/fancy_close.png
|
209
|
+
- public/gfx/fancybox/fancy_loading.png
|
210
|
+
- public/gfx/fancybox/fancy_nav_left.png
|
211
|
+
- public/gfx/fancybox/fancy_nav_right.png
|
212
|
+
- public/gfx/fancybox/fancy_title_left.png
|
213
|
+
- public/gfx/fancybox/fancy_title_main.png
|
214
|
+
- public/gfx/fancybox/fancy_title_over.png
|
215
|
+
- public/gfx/fancybox/fancy_title_right.png
|
216
|
+
- public/gfx/fancybox/fancybox-x.png
|
217
|
+
- public/gfx/fancybox/fancybox.png
|
218
|
+
- public/gfx/forms/date-next.gif
|
219
|
+
- public/gfx/forms/date-prev.gif
|
220
|
+
- public/gfx/forms/forms-checkbox.gif
|
221
|
+
- public/gfx/forms/forms-date.gif
|
222
|
+
- public/gfx/forms/forms-file.gif
|
223
|
+
- public/gfx/forms/forms-input-big.gif
|
224
|
+
- public/gfx/forms/forms-input-medium.gif
|
225
|
+
- public/gfx/forms/forms-input-small.gif
|
226
|
+
- public/gfx/forms/forms-input-xl.gif
|
227
|
+
- public/gfx/forms/forms-radio.gif
|
228
|
+
- public/gfx/forms/forms-selectbox-small.gif
|
229
|
+
- public/gfx/forms/forms-selectbox.gif
|
230
|
+
- public/gfx/forms/forms-textarea-big.gif
|
231
|
+
- public/gfx/forms/forms-textarea-medium.gif
|
232
|
+
- public/gfx/forms/forms-textarea-small.gif
|
233
|
+
- public/gfx/forms/forms-textarea-xl.gif
|
234
|
+
- public/gfx/icon-delete.png
|
235
|
+
- public/gfx/icon-edit.png
|
236
|
+
- public/gfx/icon-home.gif
|
237
|
+
- public/gfx/img-delete.png
|
238
|
+
- public/gfx/img-hover.png
|
239
|
+
- public/gfx/img-zoom.png
|
240
|
+
- public/gfx/jquery.wysiwyg.gif
|
241
|
+
- public/gfx/label-icons.gif
|
242
|
+
- public/gfx/label.gif
|
243
|
+
- public/gfx/li-down.gif
|
244
|
+
- public/gfx/li.gif
|
245
|
+
- public/gfx/link-button-big.gif
|
246
|
+
- public/gfx/link-button-medium.gif
|
247
|
+
- public/gfx/link-button.gif
|
248
|
+
- public/gfx/loading-2.gif
|
249
|
+
- public/gfx/loading.gif
|
250
|
+
- public/gfx/logo.png
|
251
|
+
- public/gfx/modal-title.gif
|
252
|
+
- public/gfx/photos/00.jpg
|
253
|
+
- public/gfx/photos/01.jpg
|
254
|
+
- public/gfx/photos/01xl.jpg
|
255
|
+
- public/gfx/photos/02.jpg
|
256
|
+
- public/gfx/photos/02xl.jpg
|
257
|
+
- public/gfx/photos/03.jpg
|
258
|
+
- public/gfx/photos/03xl.jpg
|
259
|
+
- public/gfx/photos/04.jpg
|
260
|
+
- public/gfx/photos/04xl.jpg
|
261
|
+
- public/gfx/photos/05.jpg
|
262
|
+
- public/gfx/photos/05xl.jpg
|
263
|
+
- public/gfx/photos/06.jpg
|
264
|
+
- public/gfx/photos/06xl.jpg
|
265
|
+
- public/gfx/photos/07.jpg
|
266
|
+
- public/gfx/photos/07xl.jpg
|
267
|
+
- public/gfx/photos/08.jpg
|
268
|
+
- public/gfx/photos/08xl.jpg
|
269
|
+
- public/gfx/photos/09.jpg
|
270
|
+
- public/gfx/photos/09xl.jpg
|
271
|
+
- public/gfx/photos/10.jpg
|
272
|
+
- public/gfx/photos/10xl.jpg
|
273
|
+
- public/gfx/photos/11.jpg
|
274
|
+
- public/gfx/photos/11xl.jpg
|
275
|
+
- public/gfx/photos/12.jpg
|
276
|
+
- public/gfx/photos/12xl.jpg
|
277
|
+
- public/gfx/photos/13.jpg
|
278
|
+
- public/gfx/photos/13xl.jpg
|
279
|
+
- public/gfx/photos/14.jpg
|
280
|
+
- public/gfx/photos/14xl.jpg
|
281
|
+
- public/gfx/photos/15.jpg
|
282
|
+
- public/gfx/photos/15xl.jpg
|
283
|
+
- public/gfx/search-button.gif
|
284
|
+
- public/gfx/search-input.gif
|
285
|
+
- public/gfx/slider-button.gif
|
286
|
+
- public/gfx/system-messages.gif
|
287
|
+
- public/gfx/table-asc-arrow.gif
|
288
|
+
- public/gfx/table-desc-arrow.gif
|
289
|
+
- public/gfx/table-first.gif
|
290
|
+
- public/gfx/table-last.gif
|
291
|
+
- public/gfx/table-next.gif
|
292
|
+
- public/gfx/table-number.gif
|
293
|
+
- public/gfx/table-prev.gif
|
294
|
+
- public/gfx/table-rows.gif
|
295
|
+
- public/gfx/table-search.gif
|
296
|
+
- public/gfx/table-thead.gif
|
297
|
+
- public/gfx/tooltip.gif
|
298
|
+
- public/gfx/treeview/ajax-loader.gif
|
299
|
+
- public/gfx/treeview/file.gif
|
300
|
+
- public/gfx/treeview/folder-closed.gif
|
301
|
+
- public/gfx/treeview/folder.gif
|
302
|
+
- public/gfx/treeview/minus.gif
|
303
|
+
- public/gfx/treeview/plus.gif
|
304
|
+
- public/gfx/treeview/treeview-default-line.gif
|
305
|
+
- public/gfx/treeview/treeview-default.gif
|
306
|
+
- public/js/controls/wysiwyg.image.js
|
307
|
+
- public/js/controls/wysiwyg.link.js
|
308
|
+
- public/js/controls/wysiwyg.table.js
|
309
|
+
- public/js/customInput.jquery.js
|
310
|
+
- public/js/excanvas.min.js
|
311
|
+
- public/js/hoverIntent.js
|
312
|
+
- public/js/inline.js
|
313
|
+
- public/js/jquery-1.7.1.min.js
|
314
|
+
- public/js/jquery-ui-select.js
|
315
|
+
- public/js/jquery-ui-timepicker-addon.js
|
316
|
+
- public/js/jquery-ui.js
|
317
|
+
- public/js/jquery.dataTables.js
|
318
|
+
- public/js/jquery.fancybox-1.3.4.js
|
319
|
+
- public/js/jquery.filestyle.mini.js
|
320
|
+
- public/js/jquery.flot.js
|
321
|
+
- public/js/jquery.flot.resize.min.js
|
322
|
+
- public/js/jquery.graphtable-0.2.js
|
323
|
+
- public/js/jquery.tipsy.js
|
324
|
+
- public/js/jquery.treeview.js
|
325
|
+
- public/js/jquery.wysiwyg.js
|
326
|
+
- public/js/plugins/wysiwyg.rmFormat.js
|
327
|
+
- public/js/superfish.js
|
328
|
+
- public/js/supersubs.js
|
149
329
|
- spec/cobweb/cobweb_crawl_helper_spec.rb
|
150
330
|
- spec/cobweb/cobweb_crawl_spec.rb
|
151
331
|
- spec/cobweb/cobweb_crawler_spec.rb
|
@@ -327,189 +507,9 @@ files:
|
|
327
507
|
- spec/samples/sample_site/typography.html
|
328
508
|
- spec/spec.opts
|
329
509
|
- spec/spec_helper.rb
|
330
|
-
- lib/cobweb.rb
|
331
|
-
- lib/cobweb_crawl_helper.rb
|
332
|
-
- lib/cobweb_crawler.rb
|
333
|
-
- lib/cobweb_dsl.rb
|
334
|
-
- lib/cobweb_finished_job.rb
|
335
|
-
- lib/cobweb_links.rb
|
336
|
-
- lib/cobweb_process_job.rb
|
337
|
-
- lib/cobweb_version.rb
|
338
|
-
- lib/content_link_parser.rb
|
339
|
-
- lib/crawl.rb
|
340
|
-
- lib/crawl_finished_worker.rb
|
341
|
-
- lib/crawl_helper.rb
|
342
|
-
- lib/crawl_job.rb
|
343
|
-
- lib/crawl_object.rb
|
344
|
-
- lib/crawl_process_worker.rb
|
345
|
-
- lib/crawl_worker.rb
|
346
|
-
- lib/document.rb
|
347
|
-
- lib/encoding_safe_process_job.rb
|
348
|
-
- lib/export_command.rb
|
349
|
-
- lib/hash_util.rb
|
350
|
-
- lib/redirect_error.rb
|
351
|
-
- lib/redis_connection.rb
|
352
|
-
- lib/report_command.rb
|
353
|
-
- lib/robots.rb
|
354
|
-
- lib/server.rb
|
355
|
-
- lib/sidekiq/cobweb_helper.rb
|
356
|
-
- lib/stats.rb
|
357
|
-
- lib/string.rb
|
358
|
-
- lib/uri_helper.rb
|
359
510
|
- views/home.haml
|
360
511
|
- views/layout.haml
|
361
512
|
- views/statistics.haml
|
362
|
-
- public/css/accordion.css
|
363
|
-
- public/css/custom.css
|
364
|
-
- public/css/datatable.css
|
365
|
-
- public/css/datepicker.css
|
366
|
-
- public/css/form-buttons.css
|
367
|
-
- public/css/forms.css
|
368
|
-
- public/css/jquery.fancybox-1.3.4.css
|
369
|
-
- public/css/jquery.treeview.css
|
370
|
-
- public/css/link-buttons.css
|
371
|
-
- public/css/login.css
|
372
|
-
- public/css/menu.css
|
373
|
-
- public/css/messages.css
|
374
|
-
- public/css/modalbox.css
|
375
|
-
- public/css/statics.css
|
376
|
-
- public/css/style.css
|
377
|
-
- public/css/style_text.css
|
378
|
-
- public/css/tabs.css
|
379
|
-
- public/css/wysiwyg-editor.css
|
380
|
-
- public/css/wysiwyg.css
|
381
|
-
- public/css/wysiwyg.modal.css
|
382
|
-
- public/gfx/back-menu.gif
|
383
|
-
- public/gfx/back-submenu.gif
|
384
|
-
- public/gfx/background.gif
|
385
|
-
- public/gfx/box-hide.png
|
386
|
-
- public/gfx/box-search.png
|
387
|
-
- public/gfx/box-title.gif
|
388
|
-
- public/gfx/code.gif
|
389
|
-
- public/gfx/datepicker-arrows.gif
|
390
|
-
- public/gfx/fancybox/blank.gif
|
391
|
-
- public/gfx/fancybox/fancy_close.png
|
392
|
-
- public/gfx/fancybox/fancy_loading.png
|
393
|
-
- public/gfx/fancybox/fancy_nav_left.png
|
394
|
-
- public/gfx/fancybox/fancy_nav_right.png
|
395
|
-
- public/gfx/fancybox/fancy_title_left.png
|
396
|
-
- public/gfx/fancybox/fancy_title_main.png
|
397
|
-
- public/gfx/fancybox/fancy_title_over.png
|
398
|
-
- public/gfx/fancybox/fancy_title_right.png
|
399
|
-
- public/gfx/fancybox/fancybox-x.png
|
400
|
-
- public/gfx/fancybox/fancybox.png
|
401
|
-
- public/gfx/forms/date-next.gif
|
402
|
-
- public/gfx/forms/date-prev.gif
|
403
|
-
- public/gfx/forms/forms-checkbox.gif
|
404
|
-
- public/gfx/forms/forms-date.gif
|
405
|
-
- public/gfx/forms/forms-file.gif
|
406
|
-
- public/gfx/forms/forms-input-big.gif
|
407
|
-
- public/gfx/forms/forms-input-medium.gif
|
408
|
-
- public/gfx/forms/forms-input-small.gif
|
409
|
-
- public/gfx/forms/forms-input-xl.gif
|
410
|
-
- public/gfx/forms/forms-radio.gif
|
411
|
-
- public/gfx/forms/forms-selectbox-small.gif
|
412
|
-
- public/gfx/forms/forms-selectbox.gif
|
413
|
-
- public/gfx/forms/forms-textarea-big.gif
|
414
|
-
- public/gfx/forms/forms-textarea-medium.gif
|
415
|
-
- public/gfx/forms/forms-textarea-small.gif
|
416
|
-
- public/gfx/forms/forms-textarea-xl.gif
|
417
|
-
- public/gfx/icon-delete.png
|
418
|
-
- public/gfx/icon-edit.png
|
419
|
-
- public/gfx/icon-home.gif
|
420
|
-
- public/gfx/img-delete.png
|
421
|
-
- public/gfx/img-hover.png
|
422
|
-
- public/gfx/img-zoom.png
|
423
|
-
- public/gfx/jquery.wysiwyg.gif
|
424
|
-
- public/gfx/label-icons.gif
|
425
|
-
- public/gfx/label.gif
|
426
|
-
- public/gfx/li-down.gif
|
427
|
-
- public/gfx/li.gif
|
428
|
-
- public/gfx/link-button-big.gif
|
429
|
-
- public/gfx/link-button-medium.gif
|
430
|
-
- public/gfx/link-button.gif
|
431
|
-
- public/gfx/loading-2.gif
|
432
|
-
- public/gfx/loading.gif
|
433
|
-
- public/gfx/logo.png
|
434
|
-
- public/gfx/modal-title.gif
|
435
|
-
- public/gfx/photos/00.jpg
|
436
|
-
- public/gfx/photos/01.jpg
|
437
|
-
- public/gfx/photos/01xl.jpg
|
438
|
-
- public/gfx/photos/02.jpg
|
439
|
-
- public/gfx/photos/02xl.jpg
|
440
|
-
- public/gfx/photos/03.jpg
|
441
|
-
- public/gfx/photos/03xl.jpg
|
442
|
-
- public/gfx/photos/04.jpg
|
443
|
-
- public/gfx/photos/04xl.jpg
|
444
|
-
- public/gfx/photos/05.jpg
|
445
|
-
- public/gfx/photos/05xl.jpg
|
446
|
-
- public/gfx/photos/06.jpg
|
447
|
-
- public/gfx/photos/06xl.jpg
|
448
|
-
- public/gfx/photos/07.jpg
|
449
|
-
- public/gfx/photos/07xl.jpg
|
450
|
-
- public/gfx/photos/08.jpg
|
451
|
-
- public/gfx/photos/08xl.jpg
|
452
|
-
- public/gfx/photos/09.jpg
|
453
|
-
- public/gfx/photos/09xl.jpg
|
454
|
-
- public/gfx/photos/10.jpg
|
455
|
-
- public/gfx/photos/10xl.jpg
|
456
|
-
- public/gfx/photos/11.jpg
|
457
|
-
- public/gfx/photos/11xl.jpg
|
458
|
-
- public/gfx/photos/12.jpg
|
459
|
-
- public/gfx/photos/12xl.jpg
|
460
|
-
- public/gfx/photos/13.jpg
|
461
|
-
- public/gfx/photos/13xl.jpg
|
462
|
-
- public/gfx/photos/14.jpg
|
463
|
-
- public/gfx/photos/14xl.jpg
|
464
|
-
- public/gfx/photos/15.jpg
|
465
|
-
- public/gfx/photos/15xl.jpg
|
466
|
-
- public/gfx/search-button.gif
|
467
|
-
- public/gfx/search-input.gif
|
468
|
-
- public/gfx/slider-button.gif
|
469
|
-
- public/gfx/system-messages.gif
|
470
|
-
- public/gfx/table-asc-arrow.gif
|
471
|
-
- public/gfx/table-desc-arrow.gif
|
472
|
-
- public/gfx/table-first.gif
|
473
|
-
- public/gfx/table-last.gif
|
474
|
-
- public/gfx/table-next.gif
|
475
|
-
- public/gfx/table-number.gif
|
476
|
-
- public/gfx/table-prev.gif
|
477
|
-
- public/gfx/table-rows.gif
|
478
|
-
- public/gfx/table-search.gif
|
479
|
-
- public/gfx/table-thead.gif
|
480
|
-
- public/gfx/tooltip.gif
|
481
|
-
- public/gfx/treeview/ajax-loader.gif
|
482
|
-
- public/gfx/treeview/file.gif
|
483
|
-
- public/gfx/treeview/folder-closed.gif
|
484
|
-
- public/gfx/treeview/folder.gif
|
485
|
-
- public/gfx/treeview/minus.gif
|
486
|
-
- public/gfx/treeview/plus.gif
|
487
|
-
- public/gfx/treeview/treeview-default-line.gif
|
488
|
-
- public/gfx/treeview/treeview-default.gif
|
489
|
-
- public/js/controls/wysiwyg.image.js
|
490
|
-
- public/js/controls/wysiwyg.link.js
|
491
|
-
- public/js/controls/wysiwyg.table.js
|
492
|
-
- public/js/customInput.jquery.js
|
493
|
-
- public/js/excanvas.min.js
|
494
|
-
- public/js/hoverIntent.js
|
495
|
-
- public/js/inline.js
|
496
|
-
- public/js/jquery-1.7.1.min.js
|
497
|
-
- public/js/jquery-ui-select.js
|
498
|
-
- public/js/jquery-ui-timepicker-addon.js
|
499
|
-
- public/js/jquery-ui.js
|
500
|
-
- public/js/jquery.dataTables.js
|
501
|
-
- public/js/jquery.fancybox-1.3.4.js
|
502
|
-
- public/js/jquery.filestyle.mini.js
|
503
|
-
- public/js/jquery.flot.js
|
504
|
-
- public/js/jquery.flot.resize.min.js
|
505
|
-
- public/js/jquery.graphtable-0.2.js
|
506
|
-
- public/js/jquery.tipsy.js
|
507
|
-
- public/js/jquery.treeview.js
|
508
|
-
- public/js/jquery.wysiwyg.js
|
509
|
-
- public/js/plugins/wysiwyg.rmFormat.js
|
510
|
-
- public/js/superfish.js
|
511
|
-
- public/js/supersubs.js
|
512
|
-
- README.textile
|
513
513
|
homepage: http://github.com/stewartmckee/cobweb
|
514
514
|
licenses:
|
515
515
|
- MIT
|
@@ -520,17 +520,17 @@ require_paths:
|
|
520
520
|
- lib
|
521
521
|
required_ruby_version: !ruby/object:Gem::Requirement
|
522
522
|
requirements:
|
523
|
-
- -
|
523
|
+
- - ">="
|
524
524
|
- !ruby/object:Gem::Version
|
525
525
|
version: '0'
|
526
526
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
527
527
|
requirements:
|
528
|
-
- -
|
528
|
+
- - ">="
|
529
529
|
- !ruby/object:Gem::Version
|
530
530
|
version: '0'
|
531
531
|
requirements: []
|
532
532
|
rubyforge_project:
|
533
|
-
rubygems_version: 2.
|
533
|
+
rubygems_version: 2.2.2
|
534
534
|
signing_key:
|
535
535
|
specification_version: 4
|
536
536
|
summary: Cobweb is a web crawler that can use resque to cluster crawls to quickly
|