polipus 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.rubocop.yml +17 -0
- data/.rubocop_todo.yml +37 -0
- data/.travis.yml +2 -1
- data/CHANGELOG.md +20 -0
- data/README.md +10 -0
- data/Rakefile +4 -4
- data/examples/basic.rb +16 -19
- data/examples/incremental.rb +17 -17
- data/examples/robots_txt_handling.rb +1 -1
- data/examples/survival.rb +3 -3
- data/lib/polipus.rb +186 -229
- data/lib/polipus/http.rb +41 -42
- data/lib/polipus/page.rb +33 -34
- data/lib/polipus/plugin.rb +2 -2
- data/lib/polipus/plugins/cleaner.rb +7 -8
- data/lib/polipus/plugins/sample.rb +6 -9
- data/lib/polipus/plugins/sleeper.rb +7 -8
- data/lib/polipus/queue_overflow.rb +11 -11
- data/lib/polipus/queue_overflow/base.rb +1 -1
- data/lib/polipus/queue_overflow/dev_null_queue.rb +9 -9
- data/lib/polipus/queue_overflow/manager.rb +28 -25
- data/lib/polipus/queue_overflow/mongo_queue.rb +24 -26
- data/lib/polipus/queue_overflow/mongo_queue_capped.rb +12 -12
- data/lib/polipus/robotex.rb +41 -51
- data/lib/polipus/signal_handler.rb +41 -0
- data/lib/polipus/storage.rb +11 -11
- data/lib/polipus/storage/base.rb +10 -8
- data/lib/polipus/storage/dev_null.rb +6 -7
- data/lib/polipus/storage/memory_store.rb +21 -22
- data/lib/polipus/storage/mongo_store.rb +34 -38
- data/lib/polipus/storage/s3_store.rb +33 -38
- data/lib/polipus/url_tracker.rb +3 -3
- data/lib/polipus/url_tracker/bloomfilter.rb +4 -5
- data/lib/polipus/url_tracker/redis_set.rb +3 -4
- data/lib/polipus/version.rb +3 -3
- data/polipus.gemspec +12 -13
- data/spec/clear.rb +3 -3
- data/spec/http_spec.rb +27 -28
- data/spec/page_spec.rb +16 -16
- data/spec/polipus_spec.rb +34 -31
- data/spec/queue_overflow_manager_spec.rb +30 -28
- data/spec/queue_overflow_spec.rb +15 -15
- data/spec/robotex_spec.rb +9 -10
- data/spec/signal_handler_spec.rb +18 -0
- data/spec/spec_helper.rb +7 -6
- data/spec/storage_memory_spec.rb +18 -18
- data/spec/storage_mongo_spec.rb +19 -19
- data/spec/storage_s3_spec.rb +30 -31
- data/spec/url_tracker_spec.rb +7 -7
- metadata +7 -2
data/spec/url_tracker_spec.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'polipus/url_tracker'
|
3
3
|
|
4
4
|
describe Polipus::UrlTracker do
|
5
5
|
before(:all) do
|
@@ -13,16 +13,16 @@ describe Polipus::UrlTracker do
|
|
13
13
|
end
|
14
14
|
|
15
15
|
it 'should work (bf)' do
|
16
|
-
url =
|
16
|
+
url = 'http://www.asd.com/asd/lol'
|
17
17
|
@bf.visit url
|
18
18
|
@bf.visited?(url).should be_true
|
19
|
-
@bf.visited?(
|
19
|
+
@bf.visited?('http://www.google.com').should be_false
|
20
20
|
end
|
21
21
|
|
22
22
|
it 'should work (redis_set)' do
|
23
|
-
url =
|
23
|
+
url = 'http://www.asd.com/asd/lol'
|
24
24
|
@set.visit url
|
25
25
|
@set.visited?(url).should be_true
|
26
|
-
@set.visited?(
|
26
|
+
@set.visited?('http://www.google.com').should be_false
|
27
27
|
end
|
28
|
-
end
|
28
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: polipus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francesco Laurita
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: redis-bloomfilter
|
@@ -315,6 +315,8 @@ files:
|
|
315
315
|
- .document
|
316
316
|
- .gitignore
|
317
317
|
- .rspec
|
318
|
+
- .rubocop.yml
|
319
|
+
- .rubocop_todo.yml
|
318
320
|
- .travis.yml
|
319
321
|
- AUTHORS.md
|
320
322
|
- CHANGELOG.md
|
@@ -341,6 +343,7 @@ files:
|
|
341
343
|
- lib/polipus/queue_overflow/mongo_queue.rb
|
342
344
|
- lib/polipus/queue_overflow/mongo_queue_capped.rb
|
343
345
|
- lib/polipus/robotex.rb
|
346
|
+
- lib/polipus/signal_handler.rb
|
344
347
|
- lib/polipus/storage.rb
|
345
348
|
- lib/polipus/storage/base.rb
|
346
349
|
- lib/polipus/storage/dev_null.rb
|
@@ -381,6 +384,7 @@ files:
|
|
381
384
|
- spec/queue_overflow_manager_spec.rb
|
382
385
|
- spec/queue_overflow_spec.rb
|
383
386
|
- spec/robotex_spec.rb
|
387
|
+
- spec/signal_handler_spec.rb
|
384
388
|
- spec/spec_helper.rb
|
385
389
|
- spec/storage_memory_spec.rb
|
386
390
|
- spec/storage_mongo_spec.rb
|
@@ -440,6 +444,7 @@ test_files:
|
|
440
444
|
- spec/queue_overflow_manager_spec.rb
|
441
445
|
- spec/queue_overflow_spec.rb
|
442
446
|
- spec/robotex_spec.rb
|
447
|
+
- spec/signal_handler_spec.rb
|
443
448
|
- spec/spec_helper.rb
|
444
449
|
- spec/storage_memory_spec.rb
|
445
450
|
- spec/storage_mongo_spec.rb
|