polipus 0.3.7 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.rspec +1 -1
- data/.rubocop.yml +3 -3
- data/.rubocop_todo.yml +1 -1
- data/.travis.yml +14 -4
- data/AUTHORS.md +1 -0
- data/CHANGELOG.md +9 -1
- data/Gemfile +9 -0
- data/README.md +2 -3
- data/Rakefile +1 -3
- data/examples/basic.rb +8 -1
- data/lib/polipus.rb +25 -13
- data/lib/polipus/queue_overflow.rb +1 -0
- data/lib/polipus/queue_overflow/manager.rb +1 -0
- data/lib/polipus/queue_overflow/mongo_queue.rb +1 -1
- data/lib/polipus/queue_overflow/worker.rb +24 -0
- data/lib/polipus/storage.rb +10 -16
- data/lib/polipus/storage/mongo_store.rb +6 -1
- data/lib/polipus/storage/rethink_store.rb +90 -0
- data/lib/polipus/version.rb +1 -1
- data/polipus.gemspec +16 -18
- data/spec/{http_spec.rb → polipus/http_spec.rb} +26 -37
- data/spec/{page_spec.rb → polipus/page_spec.rb} +7 -11
- data/spec/{queue_overflow_manager_spec.rb → polipus/queue_overflow/manager_spec.rb} +22 -29
- data/spec/{queue_overflow_spec.rb → polipus/queue_overflow_spec.rb} +14 -20
- data/spec/{robotex_spec.rb → polipus/robotex_spec.rb} +10 -11
- data/spec/{signal_handler_spec.rb → polipus/signal_handler_spec.rb} +2 -6
- data/spec/{storage_memory_spec.rb → polipus/storage/memory_store_spec.rb} +18 -21
- data/spec/{storage_mongo_spec.rb → polipus/storage/mongo_store_spec.rb} +23 -25
- data/spec/polipus/storage/rethink_store_spec.rb +117 -0
- data/spec/{url_tracker_spec.rb → polipus/url_tracker_spec.rb} +4 -4
- data/spec/polipus_spec.rb +13 -15
- data/spec/spec_helper.rb +13 -12
- metadata +76 -154
- data/lib/polipus/storage/s3_store.rb +0 -96
- data/spec/cassettes/08b228db424a926e1ed6ab63b38d847e.yml +0 -166
- data/spec/cassettes/20aa41f181b49f00078c3ca30bad5afe.yml +0 -166
- data/spec/cassettes/4640919145753505af2d0f8423de37f3.yml +0 -270
- data/spec/cassettes/66aae15a03f4aab8efd15e40d2d7882a.yml +0 -194
- data/spec/cassettes/76b7c197c95a5bf9b1e882c567192d72.yml +0 -183
- data/spec/cassettes/9b1d523b7f5db7214f8a8bd9272cccba.yml +0 -221
- data/spec/cassettes/ab333f89535a2efb284913fede6aa7c7.yml +0 -221
- data/spec/cassettes/ae5d7cffde3f53122cdf79f3d1367e8e.yml +0 -221
- data/spec/cassettes/ffe3d588b6df4b9de35e5a7ccaf5a81b.yml +0 -695
- data/spec/storage_s3_spec.rb +0 -115
data/spec/storage_s3_spec.rb
DELETED
@@ -1,115 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
require 'spec_helper'
|
3
|
-
require 'aws/s3'
|
4
|
-
require 'polipus/storage/s3_store'
|
5
|
-
describe Polipus::Storage::S3Store do
|
6
|
-
|
7
|
-
before(:each) do
|
8
|
-
@storage = Polipus::Storage.s3_store(
|
9
|
-
'_test_pages',
|
10
|
-
|
11
|
-
access_key_id: 'XXXXXXX',
|
12
|
-
secret_access_key: 'XXXX'
|
13
|
-
|
14
|
-
)
|
15
|
-
end
|
16
|
-
|
17
|
-
after(:each) { @storage.clear }
|
18
|
-
|
19
|
-
it 'should store a page' do
|
20
|
-
|
21
|
-
p = page_factory 'http://www.google.com', code: 200, body: '<html></html>'
|
22
|
-
uuid = @storage.add p
|
23
|
-
uuid.should be == 'ed646a3334ca891fd3467db131372140'
|
24
|
-
@storage.count.should be 1
|
25
|
-
p = @storage.get p
|
26
|
-
p.url.to_s.should be == 'http://www.google.com'
|
27
|
-
p.body.should be == '<html></html>'
|
28
|
-
@storage.remove p
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
it 'should update a page' do
|
33
|
-
p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
|
34
|
-
@storage.add p
|
35
|
-
p = @storage.get p
|
36
|
-
p.code.should be == 301
|
37
|
-
@storage.count.should be == 1
|
38
|
-
@storage.remove p
|
39
|
-
end
|
40
|
-
|
41
|
-
it 'should iterate over stored pages' do
|
42
|
-
10.times { |i| @storage.add page_factory("http://www.google.com/p_#{i}", code: 200, body: "<html>#{i}</html>") }
|
43
|
-
@storage.count.should be 10
|
44
|
-
@storage.each do |k, _page|
|
45
|
-
k.should be =~ /[a-f0-9]{32}/
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
it 'should delete a page' do
|
50
|
-
p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
|
51
|
-
@storage.add p
|
52
|
-
@storage.remove p
|
53
|
-
@storage.get(p).should be_nil
|
54
|
-
@storage.count.should be 0
|
55
|
-
end
|
56
|
-
|
57
|
-
it 'should store a page removing a query string from the uuid generation' do
|
58
|
-
p = page_factory 'http://www.asd.com/?asd=lol', code: 200, body: '<html></html>'
|
59
|
-
p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', code: 200, body: '<html></html>'
|
60
|
-
@storage.include_query_string_in_uuid = false
|
61
|
-
@storage.add p
|
62
|
-
@storage.exists?(p_no_query).should be_true
|
63
|
-
@storage.remove p
|
64
|
-
end
|
65
|
-
|
66
|
-
it 'should store a page removing a query string from the uuid generation no ending slash' do
|
67
|
-
p = page_factory 'http://www.asd.com?asd=lol', code: 200, body: '<html></html>'
|
68
|
-
p_no_query = page_factory 'http://www.asd.com', code: 200, body: '<html></html>'
|
69
|
-
@storage.include_query_string_in_uuid = false
|
70
|
-
@storage.add p
|
71
|
-
@storage.exists?(p_no_query).should be_true
|
72
|
-
@storage.remove p
|
73
|
-
end
|
74
|
-
|
75
|
-
it 'should store a page with user data associated' do
|
76
|
-
p = page_factory 'http://www.user.com', code: 200, body: '<html></html>'
|
77
|
-
p.user_data.name = 'Test User Data'
|
78
|
-
@storage.add p
|
79
|
-
@storage.exists?(p).should be_true
|
80
|
-
p = @storage.get(p)
|
81
|
-
p.user_data.name.should be == 'Test User Data'
|
82
|
-
@storage.remove p
|
83
|
-
end
|
84
|
-
|
85
|
-
it 'should honor the except parameters' do
|
86
|
-
storage = Polipus::Storage.s3_store(
|
87
|
-
'_test_pages',
|
88
|
-
{
|
89
|
-
access_key_id: 'XXXXXXX',
|
90
|
-
secret_access_key: 'XXXX'
|
91
|
-
},
|
92
|
-
['body']
|
93
|
-
)
|
94
|
-
p = page_factory 'http://www.user-doo.com', code: 200, body: '<html></html>'
|
95
|
-
storage.add p
|
96
|
-
p = storage.get p
|
97
|
-
|
98
|
-
p.body.should be_nil
|
99
|
-
storage.clear
|
100
|
-
end
|
101
|
-
|
102
|
-
it 'should return false if a doc not exists' do
|
103
|
-
@storage.include_query_string_in_uuid = false
|
104
|
-
p_other = page_factory 'http://www.asdrrrr.com', code: 200, body: '<html></html>'
|
105
|
-
@storage.exists?(p_other).should be_false
|
106
|
-
@storage.add p_other
|
107
|
-
@storage.exists?(p_other).should be_true
|
108
|
-
p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', code: 200, body: '<html></html>'
|
109
|
-
@storage.exists?(p_other).should be_true
|
110
|
-
@storage.include_query_string_in_uuid = true
|
111
|
-
@storage.exists?(p_other).should be_false
|
112
|
-
@storage.remove p_other
|
113
|
-
end
|
114
|
-
|
115
|
-
end
|