polipus 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.rubocop.yml +17 -0
- data/.rubocop_todo.yml +37 -0
- data/.travis.yml +2 -1
- data/CHANGELOG.md +20 -0
- data/README.md +10 -0
- data/Rakefile +4 -4
- data/examples/basic.rb +16 -19
- data/examples/incremental.rb +17 -17
- data/examples/robots_txt_handling.rb +1 -1
- data/examples/survival.rb +3 -3
- data/lib/polipus.rb +186 -229
- data/lib/polipus/http.rb +41 -42
- data/lib/polipus/page.rb +33 -34
- data/lib/polipus/plugin.rb +2 -2
- data/lib/polipus/plugins/cleaner.rb +7 -8
- data/lib/polipus/plugins/sample.rb +6 -9
- data/lib/polipus/plugins/sleeper.rb +7 -8
- data/lib/polipus/queue_overflow.rb +11 -11
- data/lib/polipus/queue_overflow/base.rb +1 -1
- data/lib/polipus/queue_overflow/dev_null_queue.rb +9 -9
- data/lib/polipus/queue_overflow/manager.rb +28 -25
- data/lib/polipus/queue_overflow/mongo_queue.rb +24 -26
- data/lib/polipus/queue_overflow/mongo_queue_capped.rb +12 -12
- data/lib/polipus/robotex.rb +41 -51
- data/lib/polipus/signal_handler.rb +41 -0
- data/lib/polipus/storage.rb +11 -11
- data/lib/polipus/storage/base.rb +10 -8
- data/lib/polipus/storage/dev_null.rb +6 -7
- data/lib/polipus/storage/memory_store.rb +21 -22
- data/lib/polipus/storage/mongo_store.rb +34 -38
- data/lib/polipus/storage/s3_store.rb +33 -38
- data/lib/polipus/url_tracker.rb +3 -3
- data/lib/polipus/url_tracker/bloomfilter.rb +4 -5
- data/lib/polipus/url_tracker/redis_set.rb +3 -4
- data/lib/polipus/version.rb +3 -3
- data/polipus.gemspec +12 -13
- data/spec/clear.rb +3 -3
- data/spec/http_spec.rb +27 -28
- data/spec/page_spec.rb +16 -16
- data/spec/polipus_spec.rb +34 -31
- data/spec/queue_overflow_manager_spec.rb +30 -28
- data/spec/queue_overflow_spec.rb +15 -15
- data/spec/robotex_spec.rb +9 -10
- data/spec/signal_handler_spec.rb +18 -0
- data/spec/spec_helper.rb +7 -6
- data/spec/storage_memory_spec.rb +18 -18
- data/spec/storage_mongo_spec.rb +19 -19
- data/spec/storage_s3_spec.rb +30 -31
- data/spec/url_tracker_spec.rb +7 -7
- metadata +7 -2
data/spec/queue_overflow_spec.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'polipus/queue_overflow'
|
3
3
|
|
4
4
|
describe Polipus::QueueOverflow do
|
5
5
|
|
6
6
|
before(:all) do
|
7
|
-
@queue_overflow = Polipus::QueueOverflow.mongo_queue(nil,
|
8
|
-
@queue_overflow_capped = Polipus::QueueOverflow.mongo_queue_capped(nil,
|
9
|
-
@queue_overflow_uniq = Polipus::QueueOverflow.mongo_queue(nil,
|
7
|
+
@queue_overflow = Polipus::QueueOverflow.mongo_queue(nil, 'queue_test')
|
8
|
+
@queue_overflow_capped = Polipus::QueueOverflow.mongo_queue_capped(nil, 'queue_test_c', max: 20)
|
9
|
+
@queue_overflow_uniq = Polipus::QueueOverflow.mongo_queue(nil, 'queue_test_u', ensure_uniq: true)
|
10
10
|
|
11
11
|
end
|
12
12
|
|
@@ -26,45 +26,45 @@ describe Polipus::QueueOverflow do
|
|
26
26
|
[@queue_overflow, @queue_overflow_capped, @queue_overflow_uniq].each do |q|
|
27
27
|
q.empty?.should be_true
|
28
28
|
q.pop.should be_nil
|
29
|
-
q <<
|
29
|
+
q << 'test'
|
30
30
|
q.size.should be == 1
|
31
|
-
q.pop.should be ==
|
31
|
+
q.pop.should be == 'test'
|
32
32
|
q.empty?.should be_true
|
33
33
|
q.pop.should be_nil
|
34
34
|
q.size.should be == 0
|
35
35
|
q.empty?.should be_true
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
38
|
end
|
39
39
|
|
40
40
|
it 'should act as a queue' do
|
41
41
|
[@queue_overflow, @queue_overflow_capped, @queue_overflow_uniq].each do |q|
|
42
42
|
10.times { |i| q << "message_#{i}" }
|
43
43
|
q.size.should be == 10
|
44
|
-
q.pop.should be ==
|
44
|
+
q.pop.should be == 'message_0'
|
45
45
|
end
|
46
|
-
|
46
|
+
|
47
47
|
end
|
48
48
|
|
49
49
|
it 'should work with complex paylod' do
|
50
50
|
[@queue_overflow, @queue_overflow_capped, @queue_overflow_uniq].each do |q|
|
51
|
-
a = {'a' => [1,2,3], 'b' => 'a_string'}
|
51
|
+
a = { 'a' => [1, 2, 3], 'b' => 'a_string' }
|
52
52
|
q << a.to_json
|
53
53
|
b = q.pop
|
54
54
|
JSON.parse(b).should be == a
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
end
|
58
58
|
|
59
59
|
it 'should honor max items if it is capped' do
|
60
60
|
30.times { |i| @queue_overflow_capped << "message_#{i}" }
|
61
61
|
@queue_overflow_capped.size.should be == 20
|
62
|
-
@queue_overflow_capped.pop.should be ==
|
62
|
+
@queue_overflow_capped.pop.should be == 'message_10'
|
63
63
|
end
|
64
64
|
|
65
65
|
it 'should contains only unique items' do
|
66
|
-
20.times {@queue_overflow_uniq <<
|
67
|
-
20.times {@queue_overflow_uniq <<
|
66
|
+
20.times { @queue_overflow_uniq << 'A' }
|
67
|
+
20.times { @queue_overflow_uniq << 'B' }
|
68
68
|
@queue_overflow_uniq.size.should be == 2
|
69
69
|
end
|
70
70
|
|
data/spec/robotex_spec.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
-
require
|
2
|
+
require 'polipus/robotex'
|
3
3
|
describe Polipus::Robotex do
|
4
|
-
let(:spec_domain){
|
4
|
+
let(:spec_domain) { 'http://www.example.com/' }
|
5
5
|
before(:each) do
|
6
6
|
robots = <<-END
|
7
7
|
User-Agent: msnbot
|
@@ -18,9 +18,8 @@ Disallow: /locked
|
|
18
18
|
Allow: /locked
|
19
19
|
END
|
20
20
|
stub_request(:get, 'http://www.example.com/robots.txt')
|
21
|
-
.to_return(:
|
21
|
+
.to_return(body: robots, status: [200, 'OK'], headers: { 'Content-Type' => 'text/plain' })
|
22
22
|
end
|
23
|
-
|
24
23
|
|
25
24
|
describe '#initialize' do
|
26
25
|
context 'when no arguments are supplied' do
|
@@ -71,16 +70,16 @@ END
|
|
71
70
|
context 'when no Crawl-Delay is specified for the user-agent' do
|
72
71
|
it 'returns nil' do
|
73
72
|
robotex = Polipus::Robotex.new
|
74
|
-
robotex.delay(spec_domain).should be_nil
|
73
|
+
robotex.delay(spec_domain).should be_nil
|
75
74
|
end
|
76
75
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
76
|
+
context 'when Crawl-Delay is specified for the user-agent' do
|
77
|
+
it 'returns the delay as a Fixnum' do
|
78
|
+
robotex = Polipus::Robotex.new('msnbot')
|
79
|
+
robotex.delay(spec_domain).should == 20
|
80
|
+
end
|
81
81
|
end
|
82
82
|
end
|
83
|
-
end
|
84
83
|
end
|
85
84
|
|
86
85
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Polipus::SignalHandler do
|
4
|
+
|
5
|
+
context 'signal handler' do
|
6
|
+
|
7
|
+
it 'should be enabled by default' do
|
8
|
+
Polipus::PolipusCrawler.new('polipus-rspec', [])
|
9
|
+
Polipus::SignalHandler.enabled?.should be true
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should be disabled if specified' do
|
13
|
+
Polipus::PolipusCrawler.new('polipus-rspec', [], enable_signal_handler: false)
|
14
|
+
Polipus::SignalHandler.enabled?.should be false
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -4,9 +4,9 @@
|
|
4
4
|
# loaded once.
|
5
5
|
#
|
6
6
|
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
|
-
require
|
8
|
-
require
|
9
|
-
require
|
7
|
+
require 'digest/md5'
|
8
|
+
require 'coveralls'
|
9
|
+
require 'webmock/rspec'
|
10
10
|
|
11
11
|
Coveralls.wear!
|
12
12
|
|
@@ -26,14 +26,15 @@ RSpec.configure do |config|
|
|
26
26
|
example.run
|
27
27
|
end
|
28
28
|
end
|
29
|
+
config.before(:each) { Polipus::SignalHandler.disable }
|
29
30
|
end
|
30
|
-
require
|
31
|
-
require
|
31
|
+
require 'vcr'
|
32
|
+
require 'polipus'
|
32
33
|
VCR.configure do |c|
|
33
34
|
c.cassette_library_dir = "#{File.dirname(__FILE__)}/cassettes"
|
34
35
|
c.hook_into :webmock
|
35
36
|
end
|
36
37
|
|
37
|
-
def page_factory
|
38
|
+
def page_factory(url, params = {})
|
38
39
|
Polipus::Page.new url, params
|
39
40
|
end
|
data/spec/storage_memory_spec.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'mongo'
|
3
|
+
require 'polipus/storage/memory_store'
|
4
4
|
describe Polipus::Storage::MemoryStore do
|
5
|
-
|
6
|
-
let(:storage){Polipus::Storage.memory_store}
|
5
|
+
|
6
|
+
let(:storage) { Polipus::Storage.memory_store }
|
7
7
|
|
8
8
|
it 'should store a page' do
|
9
|
-
p = page_factory 'http://www.google.com', :
|
9
|
+
p = page_factory 'http://www.google.com', code: 200, body: '<html></html>'
|
10
10
|
uuid = storage.add p
|
11
11
|
uuid.should be == 'ed646a3334ca891fd3467db131372140'
|
12
12
|
storage.count.should be 1
|
@@ -16,7 +16,7 @@ describe Polipus::Storage::MemoryStore do
|
|
16
16
|
end
|
17
17
|
|
18
18
|
it 'should update a page' do
|
19
|
-
p = page_factory 'http://www.google.com', :
|
19
|
+
p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
|
20
20
|
storage.add p
|
21
21
|
p = storage.get p
|
22
22
|
p.code.should be == 301
|
@@ -24,21 +24,21 @@ describe Polipus::Storage::MemoryStore do
|
|
24
24
|
|
25
25
|
it 'should iterate over stored pages' do
|
26
26
|
storage.each do |k, page|
|
27
|
-
k.should be ==
|
27
|
+
k.should be == 'ed646a3334ca891fd3467db131372140'
|
28
28
|
page.url.to_s.should be == 'http://www.google.com'
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
32
|
it 'should delete a page' do
|
33
|
-
p = page_factory 'http://www.google.com', :
|
33
|
+
p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
|
34
34
|
storage.remove p
|
35
35
|
storage.get(p).should be_nil
|
36
36
|
storage.count.should be 0
|
37
37
|
end
|
38
38
|
|
39
39
|
it 'should store a page removing a query string from the uuid generation' do
|
40
|
-
p = page_factory 'http://www.asd.com/?asd=lol', :
|
41
|
-
p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', :
|
40
|
+
p = page_factory 'http://www.asd.com/?asd=lol', code: 200, body: '<html></html>'
|
41
|
+
p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', code: 200, body: '<html></html>'
|
42
42
|
storage.include_query_string_in_uuid = false
|
43
43
|
storage.add p
|
44
44
|
storage.exists?(p_no_query).should be_true
|
@@ -46,8 +46,8 @@ describe Polipus::Storage::MemoryStore do
|
|
46
46
|
end
|
47
47
|
|
48
48
|
it 'should store a page removing a query string from the uuid generation no ending slash' do
|
49
|
-
p = page_factory 'http://www.asd.com?asd=lol', :
|
50
|
-
p_no_query = page_factory 'http://www.asd.com', :
|
49
|
+
p = page_factory 'http://www.asd.com?asd=lol', code: 200, body: '<html></html>'
|
50
|
+
p_no_query = page_factory 'http://www.asd.com', code: 200, body: '<html></html>'
|
51
51
|
storage.include_query_string_in_uuid = false
|
52
52
|
storage.add p
|
53
53
|
storage.exists?(p_no_query).should be_true
|
@@ -55,7 +55,7 @@ describe Polipus::Storage::MemoryStore do
|
|
55
55
|
end
|
56
56
|
|
57
57
|
it 'should store a page with user data associated' do
|
58
|
-
p = page_factory 'http://www.user.com', :
|
58
|
+
p = page_factory 'http://www.user.com', code: 200, body: '<html></html>'
|
59
59
|
p.user_data.name = 'Test User Data'
|
60
60
|
storage.add p
|
61
61
|
storage.exists?(p).should be_true
|
@@ -66,7 +66,7 @@ describe Polipus::Storage::MemoryStore do
|
|
66
66
|
|
67
67
|
it 'should honor the except parameters' do
|
68
68
|
storage = Polipus::Storage.mongo_store(@mongo, '_test_pages', ['body'])
|
69
|
-
p = page_factory 'http://www.user-doo.com', :
|
69
|
+
p = page_factory 'http://www.user-doo.com', code: 200, body: '<html></html>'
|
70
70
|
storage.add p
|
71
71
|
p = storage.get p
|
72
72
|
p.body.should be_empty
|
@@ -75,15 +75,15 @@ describe Polipus::Storage::MemoryStore do
|
|
75
75
|
|
76
76
|
it 'should return false if a doc not exists' do
|
77
77
|
storage.include_query_string_in_uuid = false
|
78
|
-
p_other = page_factory 'http://www.asdrrrr.com', :
|
78
|
+
p_other = page_factory 'http://www.asdrrrr.com', code: 200, body: '<html></html>'
|
79
79
|
storage.exists?(p_other).should be_false
|
80
80
|
storage.add p_other
|
81
81
|
storage.exists?(p_other).should be_true
|
82
|
-
p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', :
|
82
|
+
p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', code: 200, body: '<html></html>'
|
83
83
|
storage.exists?(p_other).should be_true
|
84
84
|
storage.include_query_string_in_uuid = true
|
85
85
|
storage.exists?(p_other).should be_false
|
86
86
|
|
87
87
|
end
|
88
88
|
|
89
|
-
end
|
89
|
+
end
|
data/spec/storage_mongo_spec.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'mongo'
|
3
|
+
require 'polipus/storage/mongo_store'
|
4
4
|
describe Polipus::Storage::MongoStore do
|
5
5
|
before(:all)do
|
6
|
-
@mongo = Mongo::Connection.new(
|
6
|
+
@mongo = Mongo::Connection.new('localhost', 27_017, pool_size: 15, pool_timeout: 5).db('_test_polipus')
|
7
7
|
@mongo['_test_pages'].drop
|
8
8
|
@storage = Polipus::Storage.mongo_store(@mongo, '_test_pages')
|
9
9
|
end
|
@@ -17,7 +17,7 @@ describe Polipus::Storage::MongoStore do
|
|
17
17
|
end
|
18
18
|
|
19
19
|
it 'should store a page' do
|
20
|
-
p = page_factory 'http://www.google.com', :
|
20
|
+
p = page_factory 'http://www.google.com', code: 200, body: '<html></html>'
|
21
21
|
uuid = @storage.add p
|
22
22
|
uuid.should be == 'ed646a3334ca891fd3467db131372140'
|
23
23
|
@storage.count.should be 1
|
@@ -28,7 +28,7 @@ describe Polipus::Storage::MongoStore do
|
|
28
28
|
end
|
29
29
|
|
30
30
|
it 'should update a page' do
|
31
|
-
p = page_factory 'http://www.google.com', :
|
31
|
+
p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
|
32
32
|
@storage.add p
|
33
33
|
p = @storage.get p
|
34
34
|
p.code.should be == 301
|
@@ -37,21 +37,21 @@ describe Polipus::Storage::MongoStore do
|
|
37
37
|
|
38
38
|
it 'should iterate over stored pages' do
|
39
39
|
@storage.each do |k, page|
|
40
|
-
k.should be ==
|
40
|
+
k.should be == 'ed646a3334ca891fd3467db131372140'
|
41
41
|
page.url.to_s.should be == 'http://www.google.com'
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
45
|
it 'should delete a page' do
|
46
|
-
p = page_factory 'http://www.google.com', :
|
46
|
+
p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
|
47
47
|
@storage.remove p
|
48
48
|
@storage.get(p).should be_nil
|
49
49
|
@storage.count.should be 0
|
50
50
|
end
|
51
51
|
|
52
52
|
it 'should store a page removing a query string from the uuid generation' do
|
53
|
-
p = page_factory 'http://www.asd.com/?asd=lol', :
|
54
|
-
p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', :
|
53
|
+
p = page_factory 'http://www.asd.com/?asd=lol', code: 200, body: '<html></html>'
|
54
|
+
p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', code: 200, body: '<html></html>'
|
55
55
|
@storage.include_query_string_in_uuid = false
|
56
56
|
@storage.add p
|
57
57
|
@storage.exists?(p_no_query).should be_true
|
@@ -59,8 +59,8 @@ describe Polipus::Storage::MongoStore do
|
|
59
59
|
end
|
60
60
|
|
61
61
|
it 'should store a page removing a query string from the uuid generation no ending slash' do
|
62
|
-
p = page_factory 'http://www.asd.com?asd=lol', :
|
63
|
-
p_no_query = page_factory 'http://www.asd.com', :
|
62
|
+
p = page_factory 'http://www.asd.com?asd=lol', code: 200, body: '<html></html>'
|
63
|
+
p_no_query = page_factory 'http://www.asd.com', code: 200, body: '<html></html>'
|
64
64
|
@storage.include_query_string_in_uuid = false
|
65
65
|
@storage.add p
|
66
66
|
@storage.exists?(p_no_query).should be_true
|
@@ -68,7 +68,7 @@ describe Polipus::Storage::MongoStore do
|
|
68
68
|
end
|
69
69
|
|
70
70
|
it 'should store a page with user data associated' do
|
71
|
-
p = page_factory 'http://www.user.com', :
|
71
|
+
p = page_factory 'http://www.user.com', code: 200, body: '<html></html>'
|
72
72
|
p.user_data.name = 'Test User Data'
|
73
73
|
@storage.add p
|
74
74
|
@storage.exists?(p).should be_true
|
@@ -79,7 +79,7 @@ describe Polipus::Storage::MongoStore do
|
|
79
79
|
|
80
80
|
it 'should honor the except parameters' do
|
81
81
|
storage = Polipus::Storage.mongo_store(@mongo, '_test_pages', ['body'])
|
82
|
-
p = page_factory 'http://www.user-doo.com', :
|
82
|
+
p = page_factory 'http://www.user-doo.com', code: 200, body: '<html></html>'
|
83
83
|
storage.add p
|
84
84
|
p = storage.get p
|
85
85
|
p.body.should be_empty
|
@@ -88,11 +88,11 @@ describe Polipus::Storage::MongoStore do
|
|
88
88
|
|
89
89
|
it 'should return false if a doc not exists' do
|
90
90
|
@storage.include_query_string_in_uuid = false
|
91
|
-
p_other = page_factory 'http://www.asdrrrr.com', :
|
91
|
+
p_other = page_factory 'http://www.asdrrrr.com', code: 200, body: '<html></html>'
|
92
92
|
@storage.exists?(p_other).should be_false
|
93
93
|
@storage.add p_other
|
94
94
|
@storage.exists?(p_other).should be_true
|
95
|
-
p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', :
|
95
|
+
p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', code: 200, body: '<html></html>'
|
96
96
|
@storage.exists?(p_other).should be_true
|
97
97
|
@storage.include_query_string_in_uuid = true
|
98
98
|
@storage.exists?(p_other).should be_false
|
@@ -101,7 +101,7 @@ describe Polipus::Storage::MongoStore do
|
|
101
101
|
|
102
102
|
it 'should set page.fetched_at based on the id creation' do
|
103
103
|
storage = Polipus::Storage.mongo_store(@mongo, '_test_pages')
|
104
|
-
p = page_factory 'http://www.user-doojo.com', :
|
104
|
+
p = page_factory 'http://www.user-doojo.com', code: 200, body: '<html></html>'
|
105
105
|
storage.add p
|
106
106
|
p.fetched_at.should be_nil
|
107
107
|
p = storage.get p
|
@@ -110,11 +110,11 @@ describe Polipus::Storage::MongoStore do
|
|
110
110
|
|
111
111
|
it 'should NOT set page.fetched_at if already present' do
|
112
112
|
storage = Polipus::Storage.mongo_store(@mongo, '_test_pages')
|
113
|
-
p = page_factory 'http://www.user-doojooo.com', :
|
113
|
+
p = page_factory 'http://www.user-doojooo.com', code: 200, body: '<html></html>'
|
114
114
|
p.fetched_at = 10
|
115
115
|
storage.add p
|
116
116
|
p = storage.get p
|
117
117
|
p.fetched_at.should be 10
|
118
118
|
end
|
119
119
|
|
120
|
-
end
|
120
|
+
end
|
data/spec/storage_s3_spec.rb
CHANGED
@@ -1,24 +1,23 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'aws/s3'
|
3
|
+
require 'polipus/storage/s3_store'
|
4
4
|
describe Polipus::Storage::S3Store do
|
5
|
-
|
5
|
+
|
6
6
|
before(:each) do
|
7
7
|
@storage = Polipus::Storage.s3_store(
|
8
|
-
'_test_pages',
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
'_test_pages',
|
9
|
+
|
10
|
+
access_key_id: 'XXXXXXX',
|
11
|
+
secret_access_key: 'XXXX'
|
12
|
+
|
13
13
|
)
|
14
14
|
end
|
15
|
-
|
16
|
-
after(:each) {@storage.clear}
|
17
|
-
|
15
|
+
|
16
|
+
after(:each) { @storage.clear }
|
18
17
|
|
19
18
|
it 'should store a page' do
|
20
19
|
|
21
|
-
p = page_factory 'http://www.google.com', :
|
20
|
+
p = page_factory 'http://www.google.com', code: 200, body: '<html></html>'
|
22
21
|
uuid = @storage.add p
|
23
22
|
uuid.should be == 'ed646a3334ca891fd3467db131372140'
|
24
23
|
@storage.count.should be 1
|
@@ -30,7 +29,7 @@ describe Polipus::Storage::S3Store do
|
|
30
29
|
end
|
31
30
|
|
32
31
|
it 'should update a page' do
|
33
|
-
p = page_factory 'http://www.google.com', :
|
32
|
+
p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
|
34
33
|
@storage.add p
|
35
34
|
p = @storage.get p
|
36
35
|
p.code.should be == 301
|
@@ -39,15 +38,15 @@ describe Polipus::Storage::S3Store do
|
|
39
38
|
end
|
40
39
|
|
41
40
|
it 'should iterate over stored pages' do
|
42
|
-
10.times {|i| @storage.add page_factory("http://www.google.com/p_#{i}", :
|
41
|
+
10.times { |i| @storage.add page_factory("http://www.google.com/p_#{i}", code: 200, body: "<html>#{i}</html>") }
|
43
42
|
@storage.count.should be 10
|
44
|
-
@storage.each do |k,
|
43
|
+
@storage.each do |k, _page|
|
45
44
|
k.should be =~ /[a-f0-9]{32}/
|
46
45
|
end
|
47
46
|
end
|
48
47
|
|
49
48
|
it 'should delete a page' do
|
50
|
-
p = page_factory 'http://www.google.com', :
|
49
|
+
p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
|
51
50
|
@storage.add p
|
52
51
|
@storage.remove p
|
53
52
|
@storage.get(p).should be_nil
|
@@ -55,8 +54,8 @@ describe Polipus::Storage::S3Store do
|
|
55
54
|
end
|
56
55
|
|
57
56
|
it 'should store a page removing a query string from the uuid generation' do
|
58
|
-
p = page_factory 'http://www.asd.com/?asd=lol', :
|
59
|
-
p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', :
|
57
|
+
p = page_factory 'http://www.asd.com/?asd=lol', code: 200, body: '<html></html>'
|
58
|
+
p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', code: 200, body: '<html></html>'
|
60
59
|
@storage.include_query_string_in_uuid = false
|
61
60
|
@storage.add p
|
62
61
|
@storage.exists?(p_no_query).should be_true
|
@@ -64,8 +63,8 @@ describe Polipus::Storage::S3Store do
|
|
64
63
|
end
|
65
64
|
|
66
65
|
it 'should store a page removing a query string from the uuid generation no ending slash' do
|
67
|
-
p = page_factory 'http://www.asd.com?asd=lol', :
|
68
|
-
p_no_query = page_factory 'http://www.asd.com', :
|
66
|
+
p = page_factory 'http://www.asd.com?asd=lol', code: 200, body: '<html></html>'
|
67
|
+
p_no_query = page_factory 'http://www.asd.com', code: 200, body: '<html></html>'
|
69
68
|
@storage.include_query_string_in_uuid = false
|
70
69
|
@storage.add p
|
71
70
|
@storage.exists?(p_no_query).should be_true
|
@@ -73,7 +72,7 @@ describe Polipus::Storage::S3Store do
|
|
73
72
|
end
|
74
73
|
|
75
74
|
it 'should store a page with user data associated' do
|
76
|
-
p = page_factory 'http://www.user.com', :
|
75
|
+
p = page_factory 'http://www.user.com', code: 200, body: '<html></html>'
|
77
76
|
p.user_data.name = 'Test User Data'
|
78
77
|
@storage.add p
|
79
78
|
@storage.exists?(p).should be_true
|
@@ -83,15 +82,15 @@ describe Polipus::Storage::S3Store do
|
|
83
82
|
end
|
84
83
|
|
85
84
|
it 'should honor the except parameters' do
|
86
|
-
storage =
|
87
|
-
'_test_pages',
|
88
|
-
|
89
|
-
:
|
90
|
-
:
|
85
|
+
storage = Polipus::Storage.s3_store(
|
86
|
+
'_test_pages',
|
87
|
+
{
|
88
|
+
access_key_id: 'XXXXXXX',
|
89
|
+
secret_access_key: 'XXXX'
|
91
90
|
},
|
92
91
|
['body']
|
93
92
|
)
|
94
|
-
p = page_factory 'http://www.user-doo.com', :
|
93
|
+
p = page_factory 'http://www.user-doo.com', code: 200, body: '<html></html>'
|
95
94
|
storage.add p
|
96
95
|
p = storage.get p
|
97
96
|
|
@@ -101,15 +100,15 @@ describe Polipus::Storage::S3Store do
|
|
101
100
|
|
102
101
|
it 'should return false if a doc not exists' do
|
103
102
|
@storage.include_query_string_in_uuid = false
|
104
|
-
p_other = page_factory 'http://www.asdrrrr.com', :
|
103
|
+
p_other = page_factory 'http://www.asdrrrr.com', code: 200, body: '<html></html>'
|
105
104
|
@storage.exists?(p_other).should be_false
|
106
105
|
@storage.add p_other
|
107
106
|
@storage.exists?(p_other).should be_true
|
108
|
-
p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', :
|
107
|
+
p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', code: 200, body: '<html></html>'
|
109
108
|
@storage.exists?(p_other).should be_true
|
110
109
|
@storage.include_query_string_in_uuid = true
|
111
110
|
@storage.exists?(p_other).should be_false
|
112
111
|
@storage.remove p_other
|
113
112
|
end
|
114
113
|
|
115
|
-
end
|
114
|
+
end
|