polipus 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/.rubocop.yml +17 -0
- data/.rubocop_todo.yml +37 -0
- data/.travis.yml +2 -1
- data/CHANGELOG.md +20 -0
- data/README.md +10 -0
- data/Rakefile +4 -4
- data/examples/basic.rb +16 -19
- data/examples/incremental.rb +17 -17
- data/examples/robots_txt_handling.rb +1 -1
- data/examples/survival.rb +3 -3
- data/lib/polipus.rb +186 -229
- data/lib/polipus/http.rb +41 -42
- data/lib/polipus/page.rb +33 -34
- data/lib/polipus/plugin.rb +2 -2
- data/lib/polipus/plugins/cleaner.rb +7 -8
- data/lib/polipus/plugins/sample.rb +6 -9
- data/lib/polipus/plugins/sleeper.rb +7 -8
- data/lib/polipus/queue_overflow.rb +11 -11
- data/lib/polipus/queue_overflow/base.rb +1 -1
- data/lib/polipus/queue_overflow/dev_null_queue.rb +9 -9
- data/lib/polipus/queue_overflow/manager.rb +28 -25
- data/lib/polipus/queue_overflow/mongo_queue.rb +24 -26
- data/lib/polipus/queue_overflow/mongo_queue_capped.rb +12 -12
- data/lib/polipus/robotex.rb +41 -51
- data/lib/polipus/signal_handler.rb +41 -0
- data/lib/polipus/storage.rb +11 -11
- data/lib/polipus/storage/base.rb +10 -8
- data/lib/polipus/storage/dev_null.rb +6 -7
- data/lib/polipus/storage/memory_store.rb +21 -22
- data/lib/polipus/storage/mongo_store.rb +34 -38
- data/lib/polipus/storage/s3_store.rb +33 -38
- data/lib/polipus/url_tracker.rb +3 -3
- data/lib/polipus/url_tracker/bloomfilter.rb +4 -5
- data/lib/polipus/url_tracker/redis_set.rb +3 -4
- data/lib/polipus/version.rb +3 -3
- data/polipus.gemspec +12 -13
- data/spec/clear.rb +3 -3
- data/spec/http_spec.rb +27 -28
- data/spec/page_spec.rb +16 -16
- data/spec/polipus_spec.rb +34 -31
- data/spec/queue_overflow_manager_spec.rb +30 -28
- data/spec/queue_overflow_spec.rb +15 -15
- data/spec/robotex_spec.rb +9 -10
- data/spec/signal_handler_spec.rb +18 -0
- data/spec/spec_helper.rb +7 -6
- data/spec/storage_memory_spec.rb +18 -18
- data/spec/storage_mongo_spec.rb +19 -19
- data/spec/storage_s3_spec.rb +30 -31
- data/spec/url_tracker_spec.rb +7 -7
- metadata +7 -2
data/spec/queue_overflow_spec.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'polipus/queue_overflow'
|
3
3
|
|
4
4
|
describe Polipus::QueueOverflow do
|
5
5
|
|
6
6
|
before(:all) do
|
7
|
-
@queue_overflow = Polipus::QueueOverflow.mongo_queue(nil,
|
8
|
-
@queue_overflow_capped = Polipus::QueueOverflow.mongo_queue_capped(nil,
|
9
|
-
@queue_overflow_uniq = Polipus::QueueOverflow.mongo_queue(nil,
|
7
|
+
@queue_overflow = Polipus::QueueOverflow.mongo_queue(nil, 'queue_test')
|
8
|
+
@queue_overflow_capped = Polipus::QueueOverflow.mongo_queue_capped(nil, 'queue_test_c', max: 20)
|
9
|
+
@queue_overflow_uniq = Polipus::QueueOverflow.mongo_queue(nil, 'queue_test_u', ensure_uniq: true)
|
10
10
|
|
11
11
|
end
|
12
12
|
|
@@ -26,45 +26,45 @@ describe Polipus::QueueOverflow do
|
|
26
26
|
[@queue_overflow, @queue_overflow_capped, @queue_overflow_uniq].each do |q|
|
27
27
|
q.empty?.should be_true
|
28
28
|
q.pop.should be_nil
|
29
|
-
q <<
|
29
|
+
q << 'test'
|
30
30
|
q.size.should be == 1
|
31
|
-
q.pop.should be ==
|
31
|
+
q.pop.should be == 'test'
|
32
32
|
q.empty?.should be_true
|
33
33
|
q.pop.should be_nil
|
34
34
|
q.size.should be == 0
|
35
35
|
q.empty?.should be_true
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
38
|
end
|
39
39
|
|
40
40
|
it 'should act as a queue' do
|
41
41
|
[@queue_overflow, @queue_overflow_capped, @queue_overflow_uniq].each do |q|
|
42
42
|
10.times { |i| q << "message_#{i}" }
|
43
43
|
q.size.should be == 10
|
44
|
-
q.pop.should be ==
|
44
|
+
q.pop.should be == 'message_0'
|
45
45
|
end
|
46
|
-
|
46
|
+
|
47
47
|
end
|
48
48
|
|
49
49
|
it 'should work with complex paylod' do
|
50
50
|
[@queue_overflow, @queue_overflow_capped, @queue_overflow_uniq].each do |q|
|
51
|
-
a = {'a' => [1,2,3], 'b' => 'a_string'}
|
51
|
+
a = { 'a' => [1, 2, 3], 'b' => 'a_string' }
|
52
52
|
q << a.to_json
|
53
53
|
b = q.pop
|
54
54
|
JSON.parse(b).should be == a
|
55
55
|
end
|
56
|
-
|
56
|
+
|
57
57
|
end
|
58
58
|
|
59
59
|
it 'should honor max items if it is capped' do
|
60
60
|
30.times { |i| @queue_overflow_capped << "message_#{i}" }
|
61
61
|
@queue_overflow_capped.size.should be == 20
|
62
|
-
@queue_overflow_capped.pop.should be ==
|
62
|
+
@queue_overflow_capped.pop.should be == 'message_10'
|
63
63
|
end
|
64
64
|
|
65
65
|
it 'should contains only unique items' do
|
66
|
-
20.times {@queue_overflow_uniq <<
|
67
|
-
20.times {@queue_overflow_uniq <<
|
66
|
+
20.times { @queue_overflow_uniq << 'A' }
|
67
|
+
20.times { @queue_overflow_uniq << 'B' }
|
68
68
|
@queue_overflow_uniq.size.should be == 2
|
69
69
|
end
|
70
70
|
|
data/spec/robotex_spec.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'spec_helper'
|
2
|
-
require
|
2
|
+
require 'polipus/robotex'
|
3
3
|
describe Polipus::Robotex do
|
4
|
-
let(:spec_domain){
|
4
|
+
let(:spec_domain) { 'http://www.example.com/' }
|
5
5
|
before(:each) do
|
6
6
|
robots = <<-END
|
7
7
|
User-Agent: msnbot
|
@@ -18,9 +18,8 @@ Disallow: /locked
|
|
18
18
|
Allow: /locked
|
19
19
|
END
|
20
20
|
stub_request(:get, 'http://www.example.com/robots.txt')
|
21
|
-
.to_return(:
|
21
|
+
.to_return(body: robots, status: [200, 'OK'], headers: { 'Content-Type' => 'text/plain' })
|
22
22
|
end
|
23
|
-
|
24
23
|
|
25
24
|
describe '#initialize' do
|
26
25
|
context 'when no arguments are supplied' do
|
@@ -71,16 +70,16 @@ END
|
|
71
70
|
context 'when no Crawl-Delay is specified for the user-agent' do
|
72
71
|
it 'returns nil' do
|
73
72
|
robotex = Polipus::Robotex.new
|
74
|
-
robotex.delay(spec_domain).should be_nil
|
73
|
+
robotex.delay(spec_domain).should be_nil
|
75
74
|
end
|
76
75
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
76
|
+
context 'when Crawl-Delay is specified for the user-agent' do
|
77
|
+
it 'returns the delay as a Fixnum' do
|
78
|
+
robotex = Polipus::Robotex.new('msnbot')
|
79
|
+
robotex.delay(spec_domain).should == 20
|
80
|
+
end
|
81
81
|
end
|
82
82
|
end
|
83
|
-
end
|
84
83
|
end
|
85
84
|
|
86
85
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Polipus::SignalHandler do
|
4
|
+
|
5
|
+
context 'signal handler' do
|
6
|
+
|
7
|
+
it 'should be enabled by default' do
|
8
|
+
Polipus::PolipusCrawler.new('polipus-rspec', [])
|
9
|
+
Polipus::SignalHandler.enabled?.should be true
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should be disabled if specified' do
|
13
|
+
Polipus::PolipusCrawler.new('polipus-rspec', [], enable_signal_handler: false)
|
14
|
+
Polipus::SignalHandler.enabled?.should be false
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -4,9 +4,9 @@
|
|
4
4
|
# loaded once.
|
5
5
|
#
|
6
6
|
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
|
-
require
|
8
|
-
require
|
9
|
-
require
|
7
|
+
require 'digest/md5'
|
8
|
+
require 'coveralls'
|
9
|
+
require 'webmock/rspec'
|
10
10
|
|
11
11
|
Coveralls.wear!
|
12
12
|
|
@@ -26,14 +26,15 @@ RSpec.configure do |config|
|
|
26
26
|
example.run
|
27
27
|
end
|
28
28
|
end
|
29
|
+
config.before(:each) { Polipus::SignalHandler.disable }
|
29
30
|
end
|
30
|
-
require
|
31
|
-
require
|
31
|
+
require 'vcr'
|
32
|
+
require 'polipus'
|
32
33
|
VCR.configure do |c|
|
33
34
|
c.cassette_library_dir = "#{File.dirname(__FILE__)}/cassettes"
|
34
35
|
c.hook_into :webmock
|
35
36
|
end
|
36
37
|
|
37
|
-
def page_factory
|
38
|
+
def page_factory(url, params = {})
|
38
39
|
Polipus::Page.new url, params
|
39
40
|
end
|
data/spec/storage_memory_spec.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'mongo'
|
3
|
+
require 'polipus/storage/memory_store'
|
4
4
|
describe Polipus::Storage::MemoryStore do
|
5
|
-
|
6
|
-
let(:storage){Polipus::Storage.memory_store}
|
5
|
+
|
6
|
+
let(:storage) { Polipus::Storage.memory_store }
|
7
7
|
|
8
8
|
it 'should store a page' do
|
9
|
-
p = page_factory 'http://www.google.com', :
|
9
|
+
p = page_factory 'http://www.google.com', code: 200, body: '<html></html>'
|
10
10
|
uuid = storage.add p
|
11
11
|
uuid.should be == 'ed646a3334ca891fd3467db131372140'
|
12
12
|
storage.count.should be 1
|
@@ -16,7 +16,7 @@ describe Polipus::Storage::MemoryStore do
|
|
16
16
|
end
|
17
17
|
|
18
18
|
it 'should update a page' do
|
19
|
-
p = page_factory 'http://www.google.com', :
|
19
|
+
p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
|
20
20
|
storage.add p
|
21
21
|
p = storage.get p
|
22
22
|
p.code.should be == 301
|
@@ -24,21 +24,21 @@ describe Polipus::Storage::MemoryStore do
|
|
24
24
|
|
25
25
|
it 'should iterate over stored pages' do
|
26
26
|
storage.each do |k, page|
|
27
|
-
k.should be ==
|
27
|
+
k.should be == 'ed646a3334ca891fd3467db131372140'
|
28
28
|
page.url.to_s.should be == 'http://www.google.com'
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
32
32
|
it 'should delete a page' do
|
33
|
-
p = page_factory 'http://www.google.com', :
|
33
|
+
p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
|
34
34
|
storage.remove p
|
35
35
|
storage.get(p).should be_nil
|
36
36
|
storage.count.should be 0
|
37
37
|
end
|
38
38
|
|
39
39
|
it 'should store a page removing a query string from the uuid generation' do
|
40
|
-
p = page_factory 'http://www.asd.com/?asd=lol', :
|
41
|
-
p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', :
|
40
|
+
p = page_factory 'http://www.asd.com/?asd=lol', code: 200, body: '<html></html>'
|
41
|
+
p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', code: 200, body: '<html></html>'
|
42
42
|
storage.include_query_string_in_uuid = false
|
43
43
|
storage.add p
|
44
44
|
storage.exists?(p_no_query).should be_true
|
@@ -46,8 +46,8 @@ describe Polipus::Storage::MemoryStore do
|
|
46
46
|
end
|
47
47
|
|
48
48
|
it 'should store a page removing a query string from the uuid generation no ending slash' do
|
49
|
-
p = page_factory 'http://www.asd.com?asd=lol', :
|
50
|
-
p_no_query = page_factory 'http://www.asd.com', :
|
49
|
+
p = page_factory 'http://www.asd.com?asd=lol', code: 200, body: '<html></html>'
|
50
|
+
p_no_query = page_factory 'http://www.asd.com', code: 200, body: '<html></html>'
|
51
51
|
storage.include_query_string_in_uuid = false
|
52
52
|
storage.add p
|
53
53
|
storage.exists?(p_no_query).should be_true
|
@@ -55,7 +55,7 @@ describe Polipus::Storage::MemoryStore do
|
|
55
55
|
end
|
56
56
|
|
57
57
|
it 'should store a page with user data associated' do
|
58
|
-
p = page_factory 'http://www.user.com', :
|
58
|
+
p = page_factory 'http://www.user.com', code: 200, body: '<html></html>'
|
59
59
|
p.user_data.name = 'Test User Data'
|
60
60
|
storage.add p
|
61
61
|
storage.exists?(p).should be_true
|
@@ -66,7 +66,7 @@ describe Polipus::Storage::MemoryStore do
|
|
66
66
|
|
67
67
|
it 'should honor the except parameters' do
|
68
68
|
storage = Polipus::Storage.mongo_store(@mongo, '_test_pages', ['body'])
|
69
|
-
p = page_factory 'http://www.user-doo.com', :
|
69
|
+
p = page_factory 'http://www.user-doo.com', code: 200, body: '<html></html>'
|
70
70
|
storage.add p
|
71
71
|
p = storage.get p
|
72
72
|
p.body.should be_empty
|
@@ -75,15 +75,15 @@ describe Polipus::Storage::MemoryStore do
|
|
75
75
|
|
76
76
|
it 'should return false if a doc not exists' do
|
77
77
|
storage.include_query_string_in_uuid = false
|
78
|
-
p_other = page_factory 'http://www.asdrrrr.com', :
|
78
|
+
p_other = page_factory 'http://www.asdrrrr.com', code: 200, body: '<html></html>'
|
79
79
|
storage.exists?(p_other).should be_false
|
80
80
|
storage.add p_other
|
81
81
|
storage.exists?(p_other).should be_true
|
82
|
-
p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', :
|
82
|
+
p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', code: 200, body: '<html></html>'
|
83
83
|
storage.exists?(p_other).should be_true
|
84
84
|
storage.include_query_string_in_uuid = true
|
85
85
|
storage.exists?(p_other).should be_false
|
86
86
|
|
87
87
|
end
|
88
88
|
|
89
|
-
end
|
89
|
+
end
|
data/spec/storage_mongo_spec.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'mongo'
|
3
|
+
require 'polipus/storage/mongo_store'
|
4
4
|
describe Polipus::Storage::MongoStore do
|
5
5
|
before(:all)do
|
6
|
-
@mongo = Mongo::Connection.new(
|
6
|
+
@mongo = Mongo::Connection.new('localhost', 27_017, pool_size: 15, pool_timeout: 5).db('_test_polipus')
|
7
7
|
@mongo['_test_pages'].drop
|
8
8
|
@storage = Polipus::Storage.mongo_store(@mongo, '_test_pages')
|
9
9
|
end
|
@@ -17,7 +17,7 @@ describe Polipus::Storage::MongoStore do
|
|
17
17
|
end
|
18
18
|
|
19
19
|
it 'should store a page' do
|
20
|
-
p = page_factory 'http://www.google.com', :
|
20
|
+
p = page_factory 'http://www.google.com', code: 200, body: '<html></html>'
|
21
21
|
uuid = @storage.add p
|
22
22
|
uuid.should be == 'ed646a3334ca891fd3467db131372140'
|
23
23
|
@storage.count.should be 1
|
@@ -28,7 +28,7 @@ describe Polipus::Storage::MongoStore do
|
|
28
28
|
end
|
29
29
|
|
30
30
|
it 'should update a page' do
|
31
|
-
p = page_factory 'http://www.google.com', :
|
31
|
+
p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
|
32
32
|
@storage.add p
|
33
33
|
p = @storage.get p
|
34
34
|
p.code.should be == 301
|
@@ -37,21 +37,21 @@ describe Polipus::Storage::MongoStore do
|
|
37
37
|
|
38
38
|
it 'should iterate over stored pages' do
|
39
39
|
@storage.each do |k, page|
|
40
|
-
k.should be ==
|
40
|
+
k.should be == 'ed646a3334ca891fd3467db131372140'
|
41
41
|
page.url.to_s.should be == 'http://www.google.com'
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
45
|
it 'should delete a page' do
|
46
|
-
p = page_factory 'http://www.google.com', :
|
46
|
+
p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
|
47
47
|
@storage.remove p
|
48
48
|
@storage.get(p).should be_nil
|
49
49
|
@storage.count.should be 0
|
50
50
|
end
|
51
51
|
|
52
52
|
it 'should store a page removing a query string from the uuid generation' do
|
53
|
-
p = page_factory 'http://www.asd.com/?asd=lol', :
|
54
|
-
p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', :
|
53
|
+
p = page_factory 'http://www.asd.com/?asd=lol', code: 200, body: '<html></html>'
|
54
|
+
p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', code: 200, body: '<html></html>'
|
55
55
|
@storage.include_query_string_in_uuid = false
|
56
56
|
@storage.add p
|
57
57
|
@storage.exists?(p_no_query).should be_true
|
@@ -59,8 +59,8 @@ describe Polipus::Storage::MongoStore do
|
|
59
59
|
end
|
60
60
|
|
61
61
|
it 'should store a page removing a query string from the uuid generation no ending slash' do
|
62
|
-
p = page_factory 'http://www.asd.com?asd=lol', :
|
63
|
-
p_no_query = page_factory 'http://www.asd.com', :
|
62
|
+
p = page_factory 'http://www.asd.com?asd=lol', code: 200, body: '<html></html>'
|
63
|
+
p_no_query = page_factory 'http://www.asd.com', code: 200, body: '<html></html>'
|
64
64
|
@storage.include_query_string_in_uuid = false
|
65
65
|
@storage.add p
|
66
66
|
@storage.exists?(p_no_query).should be_true
|
@@ -68,7 +68,7 @@ describe Polipus::Storage::MongoStore do
|
|
68
68
|
end
|
69
69
|
|
70
70
|
it 'should store a page with user data associated' do
|
71
|
-
p = page_factory 'http://www.user.com', :
|
71
|
+
p = page_factory 'http://www.user.com', code: 200, body: '<html></html>'
|
72
72
|
p.user_data.name = 'Test User Data'
|
73
73
|
@storage.add p
|
74
74
|
@storage.exists?(p).should be_true
|
@@ -79,7 +79,7 @@ describe Polipus::Storage::MongoStore do
|
|
79
79
|
|
80
80
|
it 'should honor the except parameters' do
|
81
81
|
storage = Polipus::Storage.mongo_store(@mongo, '_test_pages', ['body'])
|
82
|
-
p = page_factory 'http://www.user-doo.com', :
|
82
|
+
p = page_factory 'http://www.user-doo.com', code: 200, body: '<html></html>'
|
83
83
|
storage.add p
|
84
84
|
p = storage.get p
|
85
85
|
p.body.should be_empty
|
@@ -88,11 +88,11 @@ describe Polipus::Storage::MongoStore do
|
|
88
88
|
|
89
89
|
it 'should return false if a doc not exists' do
|
90
90
|
@storage.include_query_string_in_uuid = false
|
91
|
-
p_other = page_factory 'http://www.asdrrrr.com', :
|
91
|
+
p_other = page_factory 'http://www.asdrrrr.com', code: 200, body: '<html></html>'
|
92
92
|
@storage.exists?(p_other).should be_false
|
93
93
|
@storage.add p_other
|
94
94
|
@storage.exists?(p_other).should be_true
|
95
|
-
p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', :
|
95
|
+
p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', code: 200, body: '<html></html>'
|
96
96
|
@storage.exists?(p_other).should be_true
|
97
97
|
@storage.include_query_string_in_uuid = true
|
98
98
|
@storage.exists?(p_other).should be_false
|
@@ -101,7 +101,7 @@ describe Polipus::Storage::MongoStore do
|
|
101
101
|
|
102
102
|
it 'should set page.fetched_at based on the id creation' do
|
103
103
|
storage = Polipus::Storage.mongo_store(@mongo, '_test_pages')
|
104
|
-
p = page_factory 'http://www.user-doojo.com', :
|
104
|
+
p = page_factory 'http://www.user-doojo.com', code: 200, body: '<html></html>'
|
105
105
|
storage.add p
|
106
106
|
p.fetched_at.should be_nil
|
107
107
|
p = storage.get p
|
@@ -110,11 +110,11 @@ describe Polipus::Storage::MongoStore do
|
|
110
110
|
|
111
111
|
it 'should NOT set page.fetched_at if already present' do
|
112
112
|
storage = Polipus::Storage.mongo_store(@mongo, '_test_pages')
|
113
|
-
p = page_factory 'http://www.user-doojooo.com', :
|
113
|
+
p = page_factory 'http://www.user-doojooo.com', code: 200, body: '<html></html>'
|
114
114
|
p.fetched_at = 10
|
115
115
|
storage.add p
|
116
116
|
p = storage.get p
|
117
117
|
p.fetched_at.should be 10
|
118
118
|
end
|
119
119
|
|
120
|
-
end
|
120
|
+
end
|
data/spec/storage_s3_spec.rb
CHANGED
@@ -1,24 +1,23 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'aws/s3'
|
3
|
+
require 'polipus/storage/s3_store'
|
4
4
|
describe Polipus::Storage::S3Store do
|
5
|
-
|
5
|
+
|
6
6
|
before(:each) do
|
7
7
|
@storage = Polipus::Storage.s3_store(
|
8
|
-
'_test_pages',
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
'_test_pages',
|
9
|
+
|
10
|
+
access_key_id: 'XXXXXXX',
|
11
|
+
secret_access_key: 'XXXX'
|
12
|
+
|
13
13
|
)
|
14
14
|
end
|
15
|
-
|
16
|
-
after(:each) {@storage.clear}
|
17
|
-
|
15
|
+
|
16
|
+
after(:each) { @storage.clear }
|
18
17
|
|
19
18
|
it 'should store a page' do
|
20
19
|
|
21
|
-
p = page_factory 'http://www.google.com', :
|
20
|
+
p = page_factory 'http://www.google.com', code: 200, body: '<html></html>'
|
22
21
|
uuid = @storage.add p
|
23
22
|
uuid.should be == 'ed646a3334ca891fd3467db131372140'
|
24
23
|
@storage.count.should be 1
|
@@ -30,7 +29,7 @@ describe Polipus::Storage::S3Store do
|
|
30
29
|
end
|
31
30
|
|
32
31
|
it 'should update a page' do
|
33
|
-
p = page_factory 'http://www.google.com', :
|
32
|
+
p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
|
34
33
|
@storage.add p
|
35
34
|
p = @storage.get p
|
36
35
|
p.code.should be == 301
|
@@ -39,15 +38,15 @@ describe Polipus::Storage::S3Store do
|
|
39
38
|
end
|
40
39
|
|
41
40
|
it 'should iterate over stored pages' do
|
42
|
-
10.times {|i| @storage.add page_factory("http://www.google.com/p_#{i}", :
|
41
|
+
10.times { |i| @storage.add page_factory("http://www.google.com/p_#{i}", code: 200, body: "<html>#{i}</html>") }
|
43
42
|
@storage.count.should be 10
|
44
|
-
@storage.each do |k,
|
43
|
+
@storage.each do |k, _page|
|
45
44
|
k.should be =~ /[a-f0-9]{32}/
|
46
45
|
end
|
47
46
|
end
|
48
47
|
|
49
48
|
it 'should delete a page' do
|
50
|
-
p = page_factory 'http://www.google.com', :
|
49
|
+
p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
|
51
50
|
@storage.add p
|
52
51
|
@storage.remove p
|
53
52
|
@storage.get(p).should be_nil
|
@@ -55,8 +54,8 @@ describe Polipus::Storage::S3Store do
|
|
55
54
|
end
|
56
55
|
|
57
56
|
it 'should store a page removing a query string from the uuid generation' do
|
58
|
-
p = page_factory 'http://www.asd.com/?asd=lol', :
|
59
|
-
p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', :
|
57
|
+
p = page_factory 'http://www.asd.com/?asd=lol', code: 200, body: '<html></html>'
|
58
|
+
p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', code: 200, body: '<html></html>'
|
60
59
|
@storage.include_query_string_in_uuid = false
|
61
60
|
@storage.add p
|
62
61
|
@storage.exists?(p_no_query).should be_true
|
@@ -64,8 +63,8 @@ describe Polipus::Storage::S3Store do
|
|
64
63
|
end
|
65
64
|
|
66
65
|
it 'should store a page removing a query string from the uuid generation no ending slash' do
|
67
|
-
p = page_factory 'http://www.asd.com?asd=lol', :
|
68
|
-
p_no_query = page_factory 'http://www.asd.com', :
|
66
|
+
p = page_factory 'http://www.asd.com?asd=lol', code: 200, body: '<html></html>'
|
67
|
+
p_no_query = page_factory 'http://www.asd.com', code: 200, body: '<html></html>'
|
69
68
|
@storage.include_query_string_in_uuid = false
|
70
69
|
@storage.add p
|
71
70
|
@storage.exists?(p_no_query).should be_true
|
@@ -73,7 +72,7 @@ describe Polipus::Storage::S3Store do
|
|
73
72
|
end
|
74
73
|
|
75
74
|
it 'should store a page with user data associated' do
|
76
|
-
p = page_factory 'http://www.user.com', :
|
75
|
+
p = page_factory 'http://www.user.com', code: 200, body: '<html></html>'
|
77
76
|
p.user_data.name = 'Test User Data'
|
78
77
|
@storage.add p
|
79
78
|
@storage.exists?(p).should be_true
|
@@ -83,15 +82,15 @@ describe Polipus::Storage::S3Store do
|
|
83
82
|
end
|
84
83
|
|
85
84
|
it 'should honor the except parameters' do
|
86
|
-
storage =
|
87
|
-
'_test_pages',
|
88
|
-
|
89
|
-
:
|
90
|
-
:
|
85
|
+
storage = Polipus::Storage.s3_store(
|
86
|
+
'_test_pages',
|
87
|
+
{
|
88
|
+
access_key_id: 'XXXXXXX',
|
89
|
+
secret_access_key: 'XXXX'
|
91
90
|
},
|
92
91
|
['body']
|
93
92
|
)
|
94
|
-
p = page_factory 'http://www.user-doo.com', :
|
93
|
+
p = page_factory 'http://www.user-doo.com', code: 200, body: '<html></html>'
|
95
94
|
storage.add p
|
96
95
|
p = storage.get p
|
97
96
|
|
@@ -101,15 +100,15 @@ describe Polipus::Storage::S3Store do
|
|
101
100
|
|
102
101
|
it 'should return false if a doc not exists' do
|
103
102
|
@storage.include_query_string_in_uuid = false
|
104
|
-
p_other = page_factory 'http://www.asdrrrr.com', :
|
103
|
+
p_other = page_factory 'http://www.asdrrrr.com', code: 200, body: '<html></html>'
|
105
104
|
@storage.exists?(p_other).should be_false
|
106
105
|
@storage.add p_other
|
107
106
|
@storage.exists?(p_other).should be_true
|
108
|
-
p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', :
|
107
|
+
p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', code: 200, body: '<html></html>'
|
109
108
|
@storage.exists?(p_other).should be_true
|
110
109
|
@storage.include_query_string_in_uuid = true
|
111
110
|
@storage.exists?(p_other).should be_false
|
112
111
|
@storage.remove p_other
|
113
112
|
end
|
114
113
|
|
115
|
-
end
|
114
|
+
end
|