parallel588_polipus 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.gitignore +53 -0
- data/.rspec +2 -0
- data/.rubocop.yml +17 -0
- data/.rubocop_todo.yml +33 -0
- data/.travis.yml +22 -0
- data/AUTHORS.md +5 -0
- data/CHANGELOG.md +61 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +20 -0
- data/README.md +70 -0
- data/Rakefile +8 -0
- data/examples/basic.rb +63 -0
- data/examples/error_handling.rb +23 -0
- data/examples/incremental.rb +63 -0
- data/examples/robots_txt_handling.rb +14 -0
- data/examples/survival.rb +10 -0
- data/lib/polipus.rb +488 -0
- data/lib/polipus/http.rb +282 -0
- data/lib/polipus/page.rb +256 -0
- data/lib/polipus/plugin.rb +14 -0
- data/lib/polipus/plugins/cleaner.rb +25 -0
- data/lib/polipus/plugins/sample.rb +15 -0
- data/lib/polipus/plugins/sleeper.rb +22 -0
- data/lib/polipus/queue_overflow.rb +26 -0
- data/lib/polipus/queue_overflow/base.rb +7 -0
- data/lib/polipus/queue_overflow/dev_null_queue.rb +34 -0
- data/lib/polipus/queue_overflow/manager.rb +57 -0
- data/lib/polipus/queue_overflow/mongo_queue.rb +60 -0
- data/lib/polipus/queue_overflow/mongo_queue_capped.rb +29 -0
- data/lib/polipus/queue_overflow/worker.rb +24 -0
- data/lib/polipus/robotex.rb +145 -0
- data/lib/polipus/signal_handler.rb +42 -0
- data/lib/polipus/storage.rb +31 -0
- data/lib/polipus/storage/base.rb +20 -0
- data/lib/polipus/storage/dev_null.rb +35 -0
- data/lib/polipus/storage/memory_store.rb +56 -0
- data/lib/polipus/storage/mongo_store.rb +90 -0
- data/lib/polipus/storage/rethink_store.rb +90 -0
- data/lib/polipus/url_tracker.rb +21 -0
- data/lib/polipus/url_tracker/bloomfilter.rb +27 -0
- data/lib/polipus/url_tracker/redis_set.rb +27 -0
- data/lib/polipus/version.rb +5 -0
- data/polipus.gemspec +44 -0
- data/spec/cassettes/11c3eb8bf35dfc179dc5ce44f6f5f458.yml +6144 -0
- data/spec/cassettes/1f6e1d7743ecaa86594b4e68a6462689.yml +11320 -0
- data/spec/cassettes/6adfecdb274dd26ffd3713169583ca91.yml +18236 -0
- data/spec/cassettes/978ac0eeb5df63a019b754cc8a965b06.yml +18296 -0
- data/spec/cassettes/b389efd1dcb8f09393b5aae1627c2a83.yml +36569 -0
- data/spec/cassettes/bc6fb220895689be7eeb05b09969a18d.yml +61 -0
- data/spec/cassettes/c5ce68499027d490adfbb6e5541881e4.yml +18165 -0
- data/spec/cassettes/ce16b11a7df0b70fe90c7f90063fdb8c.yml +11758 -0
- data/spec/cassettes/gzipped_on.yml +147 -0
- data/spec/cassettes/http_cookies.yml +133 -0
- data/spec/cassettes/http_tconnection_max_hits.yml +4138 -0
- data/spec/cassettes/http_test.yml +1418 -0
- data/spec/cassettes/http_test_redirect.yml +71 -0
- data/spec/clear.rb +12 -0
- data/spec/polipus/http_spec.rb +139 -0
- data/spec/polipus/page_spec.rb +68 -0
- data/spec/polipus/queue_overflow/manager_spec.rb +88 -0
- data/spec/polipus/queue_overflow_spec.rb +66 -0
- data/spec/polipus/robotex_spec.rb +85 -0
- data/spec/polipus/signal_handler_spec.rb +15 -0
- data/spec/polipus/storage/memory_store_spec.rb +87 -0
- data/spec/polipus/storage/mongo_store_spec.rb +119 -0
- data/spec/polipus/storage/rethink_store_spec.rb +117 -0
- data/spec/polipus/url_tracker_spec.rb +29 -0
- data/spec/polipus_spec.rb +107 -0
- data/spec/spec_helper.rb +42 -0
- metadata +348 -0
@@ -0,0 +1,107 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe Polipus::PolipusCrawler do
|
5
|
+
after(:each) { Redis.new(db: 10).flushdb }
|
6
|
+
let(:p_options) do
|
7
|
+
{
|
8
|
+
workers: 1,
|
9
|
+
redis_options: { host: 'localhost', db: 10 },
|
10
|
+
depth_limit: 1,
|
11
|
+
queue_timeout: 1,
|
12
|
+
user_agent: 'polipus-rspec',
|
13
|
+
logger: logger,
|
14
|
+
logger_level: Logger::DEBUG,
|
15
|
+
storage: Polipus::Storage.memory_store
|
16
|
+
}
|
17
|
+
end
|
18
|
+
let(:polipus) do
|
19
|
+
Polipus::PolipusCrawler.new('polipus-rspec', ['http://rubygems.org/gems'], p_options)
|
20
|
+
end
|
21
|
+
|
22
|
+
let(:init_page)do
|
23
|
+
Polipus::Page.new 'http://rubygems.org/gems'
|
24
|
+
end
|
25
|
+
|
26
|
+
let(:logger) { Logger.new(nil) }
|
27
|
+
|
28
|
+
context 'polipus' do
|
29
|
+
it 'should create a polipus instance' do
|
30
|
+
expect(polipus).to be_an_instance_of Polipus::PolipusCrawler
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'should execute a crawling session' do
|
34
|
+
polipus.takeover
|
35
|
+
expect(polipus.storage.exists?(init_page)).to be_truthy
|
36
|
+
expect(polipus.storage.get(init_page).links.count).to be polipus.storage.count
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should filter unwanted urls' do
|
40
|
+
polipus.skip_links_like(/\/pages\//)
|
41
|
+
polipus.takeover
|
42
|
+
expect(polipus.storage.get(init_page).links
|
43
|
+
.reject { |e| e.path.to_s =~ /\/pages\// }.count).to be polipus.storage.count
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'should follow only wanted urls' do
|
47
|
+
polipus.follow_links_like(/\/pages\//)
|
48
|
+
polipus.follow_links_like(/\/gems$/)
|
49
|
+
polipus.takeover
|
50
|
+
expect(polipus.storage.get(init_page).links
|
51
|
+
.reject { |e| ![/\/pages\//, /\/gems$/].any? { |p| e.path =~ p } }
|
52
|
+
.count).to be polipus.storage.count
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'should refresh expired pages' do
|
56
|
+
polipus.ttl_page = 3600
|
57
|
+
polipus.takeover
|
58
|
+
polipus.storage.each do |_id, page|
|
59
|
+
page.fetched_at = page.fetched_at - 3600
|
60
|
+
polipus.storage.add(page)
|
61
|
+
end
|
62
|
+
polipus.storage.each { |_id, page| expect(page.expired?(3600)).to be_truthy }
|
63
|
+
polipus.takeover
|
64
|
+
polipus.storage.each { |_id, page| expect(page.expired?(3600)).to be_falsey }
|
65
|
+
end
|
66
|
+
|
67
|
+
it 'should re-download seeder urls no matter what' do
|
68
|
+
cache_hit = {}
|
69
|
+
polipus.follow_links_like(/\/gems$/)
|
70
|
+
polipus.on_page_downloaded do |page|
|
71
|
+
cache_hit[page.url.to_s] ||= 0
|
72
|
+
cache_hit[page.url.to_s] += 1
|
73
|
+
end
|
74
|
+
polipus.takeover
|
75
|
+
polipus.takeover
|
76
|
+
expect(cache_hit['http://rubygems.org/gems']).to be 2
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'should call on_page_error code blocks when a page has error' do
|
80
|
+
p = Polipus::PolipusCrawler.new('polipus-rspec', ['http://dasd.adad.dom/'], p_options.merge(open_timeout: 1, read_timeout: 1))
|
81
|
+
a_page = nil
|
82
|
+
p.on_page_error { |page| a_page = page }
|
83
|
+
p.takeover
|
84
|
+
expect(a_page).not_to be_nil
|
85
|
+
expect(a_page.error).not_to be_nil
|
86
|
+
end
|
87
|
+
|
88
|
+
it 'should obey to the robots.txt file' do
|
89
|
+
lopt = p_options
|
90
|
+
lopt[:obey_robots_txt] = true
|
91
|
+
polipus = Polipus::PolipusCrawler.new('polipus-rspec', ['https://rubygems.org/gems/polipus'], lopt)
|
92
|
+
polipus.depth_limit = 1
|
93
|
+
polipus.takeover
|
94
|
+
polipus.storage.each { |_id, page| expect(page.url.path =~ /$\/downloads\//).to be_falsey }
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'should obey to the robots.txt file with list user_agent' do
|
98
|
+
user_agent = "Mozilla/5.0 (Windows; U; Windows NT 5.1; tr; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8 ( .NET CLR 3.5.30729; .NET4.0E)"
|
99
|
+
lopt = p_options
|
100
|
+
lopt[:obey_robots_txt] = true
|
101
|
+
lopt[:user_agent] = [user_agent]
|
102
|
+
flexmock(Polipus::Robotex).should_receive(:new).with(user_agent)
|
103
|
+
Polipus::PolipusCrawler.new('polipus-rspec', ['https://rubygems.org/gems/polipus'], lopt)
|
104
|
+
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# Require this file using `require "spec_helper"`
|
2
|
+
# to ensure that it is only loaded once.
|
3
|
+
#
|
4
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
5
|
+
require 'digest/md5'
|
6
|
+
require 'coveralls'
|
7
|
+
require 'vcr'
|
8
|
+
require 'webmock/rspec'
|
9
|
+
|
10
|
+
Coveralls.wear!
|
11
|
+
|
12
|
+
VCR.configure do |c|
|
13
|
+
c.cassette_library_dir = "#{File.dirname(__FILE__)}/cassettes"
|
14
|
+
c.hook_into :webmock
|
15
|
+
end
|
16
|
+
|
17
|
+
require 'polipus'
|
18
|
+
|
19
|
+
RSpec.configure do |config|
|
20
|
+
config.run_all_when_everything_filtered = true
|
21
|
+
config.filter_run :focus
|
22
|
+
|
23
|
+
# Run specs in random order to surface order dependencies. If you find an
|
24
|
+
# order dependency and want to debug it, you can fix the order by providing
|
25
|
+
# the seed, which is printed after each run.
|
26
|
+
# --seed 1234
|
27
|
+
config.order = 'random'
|
28
|
+
config.mock_with :flexmock
|
29
|
+
config.around(:each) do |example|
|
30
|
+
t = Time.now
|
31
|
+
print example.metadata[:full_description]
|
32
|
+
VCR.use_cassette(Digest::MD5.hexdigest(example.metadata[:full_description])) do
|
33
|
+
example.run
|
34
|
+
puts " [#{Time.now - t}s]"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
config.before(:each) { Polipus::SignalHandler.disable }
|
38
|
+
end
|
39
|
+
|
40
|
+
def page_factory(url, params = {})
|
41
|
+
Polipus::Page.new url, params
|
42
|
+
end
|
metadata
ADDED
@@ -0,0 +1,348 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: parallel588_polipus
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Francesco Laurita
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-03-03 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: nokogiri
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.6.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '1.6'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.6.0
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: http-cookie
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '1.0'
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 1.0.1
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '1.0'
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 1.0.1
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: redis
|
55
|
+
requirement: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '3.0'
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 3.0.4
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '3.0'
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 3.0.4
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: hiredis
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '0.5'
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.4.5
|
83
|
+
type: :runtime
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.5'
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: 0.4.5
|
93
|
+
- !ruby/object:Gem::Dependency
|
94
|
+
name: redis-queue
|
95
|
+
requirement: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - "~>"
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0.0'
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: 0.0.4
|
103
|
+
type: :runtime
|
104
|
+
prerelease: false
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0.0'
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: 0.0.4
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: redis-bloomfilter
|
115
|
+
requirement: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - "~>"
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0.0'
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: 0.0.3
|
123
|
+
type: :runtime
|
124
|
+
prerelease: false
|
125
|
+
version_requirements: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - "~>"
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0.0'
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: 0.0.3
|
133
|
+
- !ruby/object:Gem::Dependency
|
134
|
+
name: mongo
|
135
|
+
requirement: !ruby/object:Gem::Requirement
|
136
|
+
requirements:
|
137
|
+
- - "~>"
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
version: 1.11.0
|
140
|
+
type: :development
|
141
|
+
prerelease: false
|
142
|
+
version_requirements: !ruby/object:Gem::Requirement
|
143
|
+
requirements:
|
144
|
+
- - "~>"
|
145
|
+
- !ruby/object:Gem::Version
|
146
|
+
version: 1.11.0
|
147
|
+
- !ruby/object:Gem::Dependency
|
148
|
+
name: rethinkdb
|
149
|
+
requirement: !ruby/object:Gem::Requirement
|
150
|
+
requirements:
|
151
|
+
- - "~>"
|
152
|
+
- !ruby/object:Gem::Version
|
153
|
+
version: 1.15.0
|
154
|
+
type: :development
|
155
|
+
prerelease: false
|
156
|
+
version_requirements: !ruby/object:Gem::Requirement
|
157
|
+
requirements:
|
158
|
+
- - "~>"
|
159
|
+
- !ruby/object:Gem::Version
|
160
|
+
version: 1.15.0
|
161
|
+
- !ruby/object:Gem::Dependency
|
162
|
+
name: rake
|
163
|
+
requirement: !ruby/object:Gem::Requirement
|
164
|
+
requirements:
|
165
|
+
- - "~>"
|
166
|
+
- !ruby/object:Gem::Version
|
167
|
+
version: '10.3'
|
168
|
+
type: :development
|
169
|
+
prerelease: false
|
170
|
+
version_requirements: !ruby/object:Gem::Requirement
|
171
|
+
requirements:
|
172
|
+
- - "~>"
|
173
|
+
- !ruby/object:Gem::Version
|
174
|
+
version: '10.3'
|
175
|
+
- !ruby/object:Gem::Dependency
|
176
|
+
name: rspec
|
177
|
+
requirement: !ruby/object:Gem::Requirement
|
178
|
+
requirements:
|
179
|
+
- - "~>"
|
180
|
+
- !ruby/object:Gem::Version
|
181
|
+
version: 3.1.0
|
182
|
+
type: :development
|
183
|
+
prerelease: false
|
184
|
+
version_requirements: !ruby/object:Gem::Requirement
|
185
|
+
requirements:
|
186
|
+
- - "~>"
|
187
|
+
- !ruby/object:Gem::Version
|
188
|
+
version: 3.1.0
|
189
|
+
- !ruby/object:Gem::Dependency
|
190
|
+
name: flexmock
|
191
|
+
requirement: !ruby/object:Gem::Requirement
|
192
|
+
requirements:
|
193
|
+
- - "~>"
|
194
|
+
- !ruby/object:Gem::Version
|
195
|
+
version: '1.3'
|
196
|
+
type: :development
|
197
|
+
prerelease: false
|
198
|
+
version_requirements: !ruby/object:Gem::Requirement
|
199
|
+
requirements:
|
200
|
+
- - "~>"
|
201
|
+
- !ruby/object:Gem::Version
|
202
|
+
version: '1.3'
|
203
|
+
- !ruby/object:Gem::Dependency
|
204
|
+
name: vcr
|
205
|
+
requirement: !ruby/object:Gem::Requirement
|
206
|
+
requirements:
|
207
|
+
- - "~>"
|
208
|
+
- !ruby/object:Gem::Version
|
209
|
+
version: 2.9.0
|
210
|
+
type: :development
|
211
|
+
prerelease: false
|
212
|
+
version_requirements: !ruby/object:Gem::Requirement
|
213
|
+
requirements:
|
214
|
+
- - "~>"
|
215
|
+
- !ruby/object:Gem::Version
|
216
|
+
version: 2.9.0
|
217
|
+
- !ruby/object:Gem::Dependency
|
218
|
+
name: webmock
|
219
|
+
requirement: !ruby/object:Gem::Requirement
|
220
|
+
requirements:
|
221
|
+
- - "~>"
|
222
|
+
- !ruby/object:Gem::Version
|
223
|
+
version: 1.20.0
|
224
|
+
type: :development
|
225
|
+
prerelease: false
|
226
|
+
version_requirements: !ruby/object:Gem::Requirement
|
227
|
+
requirements:
|
228
|
+
- - "~>"
|
229
|
+
- !ruby/object:Gem::Version
|
230
|
+
version: 1.20.0
|
231
|
+
- !ruby/object:Gem::Dependency
|
232
|
+
name: coveralls
|
233
|
+
requirement: !ruby/object:Gem::Requirement
|
234
|
+
requirements:
|
235
|
+
- - ">="
|
236
|
+
- !ruby/object:Gem::Version
|
237
|
+
version: '0'
|
238
|
+
type: :development
|
239
|
+
prerelease: false
|
240
|
+
version_requirements: !ruby/object:Gem::Requirement
|
241
|
+
requirements:
|
242
|
+
- - ">="
|
243
|
+
- !ruby/object:Gem::Version
|
244
|
+
version: '0'
|
245
|
+
description: "\n An easy to use distributed web-crawler framework based on Redis\n
|
246
|
+
\ "
|
247
|
+
email:
|
248
|
+
- francesco.laurita@gmail.com
|
249
|
+
executables: []
|
250
|
+
extensions: []
|
251
|
+
extra_rdoc_files: []
|
252
|
+
files:
|
253
|
+
- ".document"
|
254
|
+
- ".gitignore"
|
255
|
+
- ".rspec"
|
256
|
+
- ".rubocop.yml"
|
257
|
+
- ".rubocop_todo.yml"
|
258
|
+
- ".travis.yml"
|
259
|
+
- AUTHORS.md
|
260
|
+
- CHANGELOG.md
|
261
|
+
- Gemfile
|
262
|
+
- LICENSE.txt
|
263
|
+
- README.md
|
264
|
+
- Rakefile
|
265
|
+
- examples/basic.rb
|
266
|
+
- examples/error_handling.rb
|
267
|
+
- examples/incremental.rb
|
268
|
+
- examples/robots_txt_handling.rb
|
269
|
+
- examples/survival.rb
|
270
|
+
- lib/polipus.rb
|
271
|
+
- lib/polipus/http.rb
|
272
|
+
- lib/polipus/page.rb
|
273
|
+
- lib/polipus/plugin.rb
|
274
|
+
- lib/polipus/plugins/cleaner.rb
|
275
|
+
- lib/polipus/plugins/sample.rb
|
276
|
+
- lib/polipus/plugins/sleeper.rb
|
277
|
+
- lib/polipus/queue_overflow.rb
|
278
|
+
- lib/polipus/queue_overflow/base.rb
|
279
|
+
- lib/polipus/queue_overflow/dev_null_queue.rb
|
280
|
+
- lib/polipus/queue_overflow/manager.rb
|
281
|
+
- lib/polipus/queue_overflow/mongo_queue.rb
|
282
|
+
- lib/polipus/queue_overflow/mongo_queue_capped.rb
|
283
|
+
- lib/polipus/queue_overflow/worker.rb
|
284
|
+
- lib/polipus/robotex.rb
|
285
|
+
- lib/polipus/signal_handler.rb
|
286
|
+
- lib/polipus/storage.rb
|
287
|
+
- lib/polipus/storage/base.rb
|
288
|
+
- lib/polipus/storage/dev_null.rb
|
289
|
+
- lib/polipus/storage/memory_store.rb
|
290
|
+
- lib/polipus/storage/mongo_store.rb
|
291
|
+
- lib/polipus/storage/rethink_store.rb
|
292
|
+
- lib/polipus/url_tracker.rb
|
293
|
+
- lib/polipus/url_tracker/bloomfilter.rb
|
294
|
+
- lib/polipus/url_tracker/redis_set.rb
|
295
|
+
- lib/polipus/version.rb
|
296
|
+
- polipus.gemspec
|
297
|
+
- spec/cassettes/11c3eb8bf35dfc179dc5ce44f6f5f458.yml
|
298
|
+
- spec/cassettes/1f6e1d7743ecaa86594b4e68a6462689.yml
|
299
|
+
- spec/cassettes/6adfecdb274dd26ffd3713169583ca91.yml
|
300
|
+
- spec/cassettes/978ac0eeb5df63a019b754cc8a965b06.yml
|
301
|
+
- spec/cassettes/b389efd1dcb8f09393b5aae1627c2a83.yml
|
302
|
+
- spec/cassettes/bc6fb220895689be7eeb05b09969a18d.yml
|
303
|
+
- spec/cassettes/c5ce68499027d490adfbb6e5541881e4.yml
|
304
|
+
- spec/cassettes/ce16b11a7df0b70fe90c7f90063fdb8c.yml
|
305
|
+
- spec/cassettes/gzipped_on.yml
|
306
|
+
- spec/cassettes/http_cookies.yml
|
307
|
+
- spec/cassettes/http_tconnection_max_hits.yml
|
308
|
+
- spec/cassettes/http_test.yml
|
309
|
+
- spec/cassettes/http_test_redirect.yml
|
310
|
+
- spec/clear.rb
|
311
|
+
- spec/polipus/http_spec.rb
|
312
|
+
- spec/polipus/page_spec.rb
|
313
|
+
- spec/polipus/queue_overflow/manager_spec.rb
|
314
|
+
- spec/polipus/queue_overflow_spec.rb
|
315
|
+
- spec/polipus/robotex_spec.rb
|
316
|
+
- spec/polipus/signal_handler_spec.rb
|
317
|
+
- spec/polipus/storage/memory_store_spec.rb
|
318
|
+
- spec/polipus/storage/mongo_store_spec.rb
|
319
|
+
- spec/polipus/storage/rethink_store_spec.rb
|
320
|
+
- spec/polipus/url_tracker_spec.rb
|
321
|
+
- spec/polipus_spec.rb
|
322
|
+
- spec/spec_helper.rb
|
323
|
+
homepage: https://github.com/taganaka/polipus
|
324
|
+
licenses:
|
325
|
+
- MIT
|
326
|
+
metadata: {}
|
327
|
+
post_install_message:
|
328
|
+
rdoc_options: []
|
329
|
+
require_paths:
|
330
|
+
- lib
|
331
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
332
|
+
requirements:
|
333
|
+
- - ">="
|
334
|
+
- !ruby/object:Gem::Version
|
335
|
+
version: '0'
|
336
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
337
|
+
requirements:
|
338
|
+
- - ">="
|
339
|
+
- !ruby/object:Gem::Version
|
340
|
+
version: '0'
|
341
|
+
requirements: []
|
342
|
+
rubyforge_project: polipus
|
343
|
+
rubygems_version: 2.4.5
|
344
|
+
signing_key:
|
345
|
+
specification_version: 4
|
346
|
+
summary: Polipus distributed web-crawler framework
|
347
|
+
test_files: []
|
348
|
+
has_rdoc:
|