polipus 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +15 -0
  2. data/.document +5 -0
  3. data/.gitignore +53 -0
  4. data/.rspec +2 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +55 -0
  8. data/README.rdoc +3 -0
  9. data/Rakefile +9 -0
  10. data/examples/basic.rb +58 -0
  11. data/examples/survival.rb +9 -0
  12. data/lib/polipus.rb +451 -0
  13. data/lib/polipus/http.rb +195 -0
  14. data/lib/polipus/page.rb +219 -0
  15. data/lib/polipus/plugin.rb +13 -0
  16. data/lib/polipus/plugins/cleaner.rb +25 -0
  17. data/lib/polipus/plugins/sample.rb +17 -0
  18. data/lib/polipus/plugins/sleeper.rb +22 -0
  19. data/lib/polipus/queue_overflow.rb +24 -0
  20. data/lib/polipus/queue_overflow/base.rb +6 -0
  21. data/lib/polipus/queue_overflow/dev_null_queue.rb +33 -0
  22. data/lib/polipus/queue_overflow/manager.rb +50 -0
  23. data/lib/polipus/queue_overflow/mongo_queue.rb +61 -0
  24. data/lib/polipus/queue_overflow/mongo_queue_capped.rb +28 -0
  25. data/lib/polipus/storage.rb +31 -0
  26. data/lib/polipus/storage/base.rb +17 -0
  27. data/lib/polipus/storage/dev_null.rb +35 -0
  28. data/lib/polipus/storage/mongo_store.rb +86 -0
  29. data/lib/polipus/storage/s3_store.rb +100 -0
  30. data/lib/polipus/url_tracker.rb +20 -0
  31. data/lib/polipus/url_tracker/bloomfilter.rb +27 -0
  32. data/lib/polipus/url_tracker/redis_set.rb +27 -0
  33. data/lib/polipus/version.rb +4 -0
  34. data/polipus.gemspec +39 -0
  35. data/spec/cassettes/08b228db424a926e1ed6ab63b38d847e.yml +166 -0
  36. data/spec/cassettes/20aa41f181b49f00078c3ca30bad5afe.yml +166 -0
  37. data/spec/cassettes/4640919145753505af2d0f8423de37f3.yml +270 -0
  38. data/spec/cassettes/66aae15a03f4aab8efd15e40d2d7882a.yml +194 -0
  39. data/spec/cassettes/76b7c197c95a5bf9b1e882c567192d72.yml +183 -0
  40. data/spec/cassettes/9b1d523b7f5db7214f8a8bd9272cccba.yml +221 -0
  41. data/spec/cassettes/ab333f89535a2efb284913fede6aa7c7.yml +221 -0
  42. data/spec/cassettes/ae5d7cffde3f53122cdf79f3d1367e8e.yml +221 -0
  43. data/spec/cassettes/ffe3d588b6df4b9de35e5a7ccaf5a81b.yml +695 -0
  44. data/spec/cassettes/http_test.yml +1418 -0
  45. data/spec/cassettes/http_test_redirect.yml +71 -0
  46. data/spec/clear.rb +11 -0
  47. data/spec/http_spec.rb +31 -0
  48. data/spec/page_spec.rb +22 -0
  49. data/spec/queue_overflow_manager_spec.rb +89 -0
  50. data/spec/queue_overflow_spec.rb +71 -0
  51. data/spec/spec_helper.rb +34 -0
  52. data/spec/storage_mongo_spec.rb +102 -0
  53. data/spec/storage_s3_spec.rb +115 -0
  54. data/spec/url_tracker_spec.rb +28 -0
  55. metadata +313 -0
@@ -0,0 +1,71 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: http://greenbytes.de/tech/tc/httpredirects/t300bodyandloc.asis
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ Accept:
11
+ - ! '*/*'
12
+ User-Agent:
13
+ - Ruby
14
+ response:
15
+ status:
16
+ code: 300
17
+ message: Multiple Choices
18
+ headers:
19
+ Date:
20
+ - Mon, 10 Jun 2013 08:58:25 GMT
21
+ Server:
22
+ - Apache/2.2.22 (Ubuntu)
23
+ Cache-Control:
24
+ - no-cache
25
+ Location:
26
+ - http://greenbytes.de/tech/tc/httpredirects/300.txt
27
+ Content-Length:
28
+ - '27'
29
+ body:
30
+ encoding: US-ASCII
31
+ string: ! '300 Redirect Response Body
32
+
33
+ '
34
+ http_version:
35
+ recorded_at: Mon, 10 Jun 2013 08:58:25 GMT
36
+ - request:
37
+ method: get
38
+ uri: http://greenbytes.de/tech/tc/httpredirects/300.txt
39
+ body:
40
+ encoding: US-ASCII
41
+ string: ''
42
+ headers:
43
+ Accept:
44
+ - ! '*/*'
45
+ User-Agent:
46
+ - Ruby
47
+ response:
48
+ status:
49
+ code: 200
50
+ message: OK
51
+ headers:
52
+ Date:
53
+ - Mon, 10 Jun 2013 08:58:25 GMT
54
+ Server:
55
+ - Apache/2.2.22 (Ubuntu)
56
+ Last-Modified:
57
+ - Tue, 08 Jan 2013 17:31:05 GMT
58
+ Etag:
59
+ - ! '"b8306c-31-4d2ca4f7df2ca"'
60
+ Accept-Ranges:
61
+ - bytes
62
+ Content-Length:
63
+ - '49'
64
+ Content-Type:
65
+ - text/plain
66
+ body:
67
+ encoding: US-ASCII
68
+ string: ! "You have reached the target\r\nof a 300 redirect.\r\n"
69
+ http_version:
70
+ recorded_at: Mon, 10 Jun 2013 08:58:25 GMT
71
+ recorded_with: VCR 2.5.0
data/spec/clear.rb ADDED
@@ -0,0 +1,11 @@
1
+ require "yaml"
2
+ Dir.glob('./cassettes/*.yml').each do|f|
3
+ next unless f =~ /[a-f0-9]{32}/
4
+ d = YAML.load_file(f)
5
+ d['http_interactions'].each do |r|
6
+ r['request'].delete('headers')
7
+ r['response'].delete('headers')
8
+ end
9
+ File.open(f, 'w') {|fw| fw.write(d.to_yaml) }
10
+ #puts d.to_yaml
11
+ end
data/spec/http_spec.rb ADDED
@@ -0,0 +1,31 @@
1
+ require "spec_helper"
2
+ require "mongo"
3
+ require "polipus/http"
4
+ require "polipus/page"
5
+
6
+ describe Polipus::HTTP do
7
+
8
+ it 'should download a page' do
9
+
10
+ VCR.use_cassette('http_test') do
11
+ http = Polipus::HTTP.new
12
+ page = http.fetch_page("http://sfbay.craigslist.org/apa/")
13
+ page.should be_an_instance_of(Polipus::Page)
14
+ page.doc.search("title").text.strip.should be == "SF bay area apts/housing for rent classifieds - craigslist"
15
+ end
16
+ end
17
+
18
+ it 'should follow a redirect' do
19
+ VCR.use_cassette('http_test_redirect') do
20
+
21
+ http = Polipus::HTTP.new
22
+ page = http.fetch_page("http://greenbytes.de/tech/tc/httpredirects/t300bodyandloc.asis")
23
+
24
+ page.should be_an_instance_of(Polipus::Page)
25
+ page.code.should be == 200
26
+ page.url.to_s.should be == "http://greenbytes.de/tech/tc/httpredirects/300.txt"
27
+ page.body.strip.should be == "You have reached the target\r\nof a 300 redirect."
28
+ end
29
+ end
30
+
31
+ end
data/spec/page_spec.rb ADDED
@@ -0,0 +1,22 @@
1
+ require "spec_helper"
2
+ require "polipus/page"
3
+
4
+ describe Polipus::Page do
5
+ it 'should honor domain_aliases attribute' do
6
+ body = <<EOF
7
+ <html>
8
+ <body>
9
+ <a href="/page/1">1</a>
10
+ <a href="/page/2">2</a>
11
+ <a href="http://www.google.com/page/3">3</a>
12
+ <a href="http://google.com/page/3">4</a>
13
+ <a href="http://not.google.com/page/3">4</a>
14
+ </body>
15
+ </html>
16
+ EOF
17
+ h = {'content-type' => ['text/html']}
18
+ domain_aliases = %w(www.google.com google.com)
19
+ p = Polipus::Page.new 'http://www.google.com/', :code => 200, :body => body, :headers => h, :domain_aliases => domain_aliases
20
+ p.links.count.should be == 4
21
+ end
22
+ end
@@ -0,0 +1,89 @@
1
+ require "spec_helper"
2
+ require "polipus/queue_overflow"
3
+ require "redis-queue"
4
+
5
+ describe Polipus::QueueOverflow::Manager do
6
+ before(:all) do
7
+ @mongo = Mongo::Connection.new("localhost", 27017, :pool_size => 15, :pool_timeout => 5).db('_test_polipus')
8
+ @mongo['_test_pages'].drop
9
+ @storage = Polipus::Storage.mongo_store(@mongo, '_test_pages')
10
+ @redis_q = Redis::Queue.new("queue_test","bp_queue_test", :redis => Redis.new())
11
+ @queue_overflow = Polipus::QueueOverflow.mongo_queue(nil, "queue_test")
12
+ @polipus = flexmock("polipus")
13
+ @polipus.should_receive(:queue_overflow_adapter).and_return(@queue_overflow)
14
+ @polipus.should_receive(:storage).and_return(@storage)
15
+ @manager = Polipus::QueueOverflow::Manager.new(@polipus, @redis_q, 10)
16
+ end
17
+
18
+ before(:each) do
19
+ @queue_overflow.clear
20
+ @redis_q.clear
21
+ @storage.clear
22
+ end
23
+
24
+ after(:all) do
25
+ @queue_overflow.clear
26
+ @redis_q.clear
27
+ end
28
+
29
+ it 'should remove 10 items' do
30
+ @manager.perform.should be == [0,0]
31
+ 20.times {|i| @redis_q << page_factory("http://www.user-doo.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
32
+ @manager.perform.should be == [10, 0]
33
+ @queue_overflow.size.should be == 10
34
+ @redis_q.size.should be == 10
35
+ end
36
+
37
+ it 'should restore 10 items' do
38
+ @manager.perform.should be == [0,0]
39
+ 10.times {|i| @queue_overflow << page_factory("http://www.user-doo-bla.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
40
+ @manager.perform.should be == [0, 10]
41
+ @queue_overflow.size.should be == 0
42
+ @redis_q.size.should be == 10
43
+ @manager.perform.should be == [0, 0]
44
+
45
+ end
46
+
47
+ it 'should restore 3 items' do
48
+
49
+ @manager.perform.should be == [0,0]
50
+ 3.times {|i| @queue_overflow << page_factory("http://www.user-doo-bu.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
51
+ @manager.perform.should be == [0, 3]
52
+ @queue_overflow.size.should be == 0
53
+ @redis_q.size.should be == 3
54
+ @manager.perform.should be == [0, 0]
55
+
56
+ end
57
+
58
+ it 'should restore 0 items' do
59
+
60
+ @manager.perform.should be == [0,0]
61
+ 10.times {|i|
62
+ p = page_factory("http://www.user-doo-bu.com/page_#{i}", :code => 200, :body => '<html></html>')
63
+ @storage.add p
64
+ @queue_overflow << p.to_json
65
+ }
66
+ @manager.perform.should be == [0, 0]
67
+ @queue_overflow.size.should be == 0
68
+ @redis_q.size.should be == 0
69
+ @manager.perform.should be == [0, 0]
70
+
71
+ end
72
+
73
+ it 'should filter an url based on the spec' do
74
+ @queue_overflow.clear
75
+ @redis_q.clear
76
+ 10.times {|i| @queue_overflow << page_factory("http://www.user-doo.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
77
+ @manager.url_filter do |page|
78
+ page.url.to_s.end_with?("page_0") ? false : true
79
+ end
80
+ @manager.perform.should be == [0,9]
81
+ @queue_overflow.size.should be == 0
82
+ @redis_q.size.should be == 9
83
+ @manager.url_filter do |page|
84
+ true
85
+ end
86
+
87
+ end
88
+
89
+ end
@@ -0,0 +1,71 @@
1
+ require "spec_helper"
2
+ require "polipus/queue_overflow"
3
+
4
+ describe Polipus::QueueOverflow do
5
+
6
+ before(:all) do
7
+ @queue_overflow = Polipus::QueueOverflow.mongo_queue(nil, "queue_test")
8
+ @queue_overflow_capped = Polipus::QueueOverflow.mongo_queue_capped(nil, "queue_test_c", {:max => 20})
9
+ @queue_overflow_uniq = Polipus::QueueOverflow.mongo_queue(nil, "queue_test_u", {:ensure_uniq => true })
10
+
11
+ end
12
+
13
+ before(:each) do
14
+ @queue_overflow.clear
15
+ @queue_overflow_capped.clear
16
+ @queue_overflow_uniq.clear
17
+ end
18
+
19
+ after(:all) do
20
+ @queue_overflow.clear
21
+ @queue_overflow_uniq.clear
22
+ @queue_overflow_capped.clear
23
+ end
24
+
25
+ it 'should work' do
26
+ [@queue_overflow, @queue_overflow_capped, @queue_overflow_uniq].each do |q|
27
+ q.empty?.should be_true
28
+ q.pop.should be_nil
29
+ q << "test"
30
+ q.size.should be == 1
31
+ q.pop.should be == "test"
32
+ q.empty?.should be_true
33
+ q.pop.should be_nil
34
+ q.size.should be == 0
35
+ q.empty?.should be_true
36
+ end
37
+
38
+ end
39
+
40
+ it 'should act as a queue' do
41
+ [@queue_overflow, @queue_overflow_capped, @queue_overflow_uniq].each do |q|
42
+ 10.times { |i| q << "message_#{i}" }
43
+ q.size.should be == 10
44
+ q.pop.should be == "message_0"
45
+ end
46
+
47
+ end
48
+
49
+ it 'should work with complex paylod' do
50
+ [@queue_overflow, @queue_overflow_capped, @queue_overflow_uniq].each do |q|
51
+ a = {'a' => [1,2,3], 'b' => 'a_string'}
52
+ q << a.to_json
53
+ b = q.pop
54
+ JSON.parse(b).should be == a
55
+ end
56
+
57
+ end
58
+
59
+ it 'should honor max items if it is capped' do
60
+ 30.times { |i| @queue_overflow_capped << "message_#{i}" }
61
+ @queue_overflow_capped.size.should be == 20
62
+ @queue_overflow_capped.pop.should be == "message_10"
63
+ end
64
+
65
+ it 'should contains only unique items' do
66
+ 20.times {@queue_overflow_uniq << "A"}
67
+ 20.times {@queue_overflow_uniq << "B"}
68
+ @queue_overflow_uniq.size.should be == 2
69
+ end
70
+
71
+ end
@@ -0,0 +1,34 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+ require "digest/md5"
8
+ RSpec.configure do |config|
9
+ config.treat_symbols_as_metadata_keys_with_true_values = true
10
+ config.run_all_when_everything_filtered = true
11
+ config.filter_run :focus
12
+
13
+ # Run specs in random order to surface order dependencies. If you find an
14
+ # order dependency and want to debug it, you can fix the order by providing
15
+ # the seed, which is printed after each run.
16
+ # --seed 1234
17
+ config.order = 'random'
18
+ config.mock_with :flexmock
19
+ config.around(:each) do |example|
20
+ VCR.use_cassette(Digest::MD5.hexdigest(example.metadata[:full_description])) do
21
+ example.run
22
+ end
23
+ end
24
+ end
25
+ require "vcr"
26
+ require "polipus"
27
+ VCR.configure do |c|
28
+ c.cassette_library_dir = "#{File.dirname(__FILE__)}/cassettes"
29
+ c.hook_into :webmock
30
+ end
31
+
32
+ def page_factory url, params = {}
33
+ Polipus::Page.new url, params
34
+ end
@@ -0,0 +1,102 @@
1
+ require "spec_helper"
2
+ require "mongo"
3
+ require "polipus/storage/mongo_store"
4
+ describe Polipus::Storage::MongoStore do
5
+ before(:all)do
6
+ @mongo = Mongo::Connection.new("localhost", 27017, :pool_size => 15, :pool_timeout => 5).db('_test_polipus')
7
+ @mongo['_test_pages'].drop
8
+ @storage = Polipus::Storage.mongo_store(@mongo, '_test_pages')
9
+ end
10
+
11
+ after(:all) do
12
+ @mongo['_test_pages'].drop
13
+ end
14
+
15
+ after(:each) do
16
+ @mongo['_test_pages'].drop
17
+ end
18
+
19
+ it 'should store a page' do
20
+ p = page_factory 'http://www.google.com', :code => 200, :body => '<html></html>'
21
+ uuid = @storage.add p
22
+ uuid.should be == 'ed646a3334ca891fd3467db131372140'
23
+ @storage.count.should be 1
24
+ @mongo['_test_pages'].count.should be 1
25
+ p = @storage.get p
26
+ p.url.to_s.should be == 'http://www.google.com'
27
+ p.body.should be == '<html></html>'
28
+ end
29
+
30
+ it 'should update a page' do
31
+ p = page_factory 'http://www.google.com', :code => 301, :body => '<html></html>'
32
+ @storage.add p
33
+ p = @storage.get p
34
+ p.code.should be == 301
35
+ @mongo['_test_pages'].count.should be 1
36
+ end
37
+
38
+ it 'should iterate over stored pages' do
39
+ @storage.each do |k, page|
40
+ k.should be == "ed646a3334ca891fd3467db131372140"
41
+ page.url.to_s.should be == 'http://www.google.com'
42
+ end
43
+ end
44
+
45
+ it 'should delete a page' do
46
+ p = page_factory 'http://www.google.com', :code => 301, :body => '<html></html>'
47
+ @storage.remove p
48
+ @storage.get(p).should be_nil
49
+ @storage.count.should be 0
50
+ end
51
+
52
+ it 'should store a page removing a query string from the uuid generation' do
53
+ p = page_factory 'http://www.asd.com/?asd=lol', :code => 200, :body => '<html></html>'
54
+ p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', :code => 200, :body => '<html></html>'
55
+ @storage.include_query_string_in_uuid = false
56
+ @storage.add p
57
+ @storage.exists?(p_no_query).should be_true
58
+ @storage.remove p
59
+ end
60
+
61
+ it 'should store a page removing a query string from the uuid generation no ending slash' do
62
+ p = page_factory 'http://www.asd.com?asd=lol', :code => 200, :body => '<html></html>'
63
+ p_no_query = page_factory 'http://www.asd.com', :code => 200, :body => '<html></html>'
64
+ @storage.include_query_string_in_uuid = false
65
+ @storage.add p
66
+ @storage.exists?(p_no_query).should be_true
67
+ @storage.remove p
68
+ end
69
+
70
+ it 'should store a page with user data associated' do
71
+ p = page_factory 'http://www.user.com', :code => 200, :body => '<html></html>'
72
+ p.user_data.name = 'Test User Data'
73
+ @storage.add p
74
+ @storage.exists?(p).should be_true
75
+ p = @storage.get(p)
76
+ p.user_data.name.should be == 'Test User Data'
77
+ @storage.remove p
78
+ end
79
+
80
+ it 'should honor the except parameters' do
81
+ storage = Polipus::Storage.mongo_store(@mongo, '_test_pages', ['body'])
82
+ p = page_factory 'http://www.user-doo.com', :code => 200, :body => '<html></html>'
83
+ storage.add p
84
+ p = storage.get p
85
+ p.body.should be_empty
86
+ storage.clear
87
+ end
88
+
89
+ it 'should return false if a doc not exists' do
90
+ @storage.include_query_string_in_uuid = false
91
+ p_other = page_factory 'http://www.asdrrrr.com', :code => 200, :body => '<html></html>'
92
+ @storage.exists?(p_other).should be_false
93
+ @storage.add p_other
94
+ @storage.exists?(p_other).should be_true
95
+ p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', :code => 200, :body => '<html></html>'
96
+ @storage.exists?(p_other).should be_true
97
+ @storage.include_query_string_in_uuid = true
98
+ @storage.exists?(p_other).should be_false
99
+
100
+ end
101
+
102
+ end
@@ -0,0 +1,115 @@
1
+ require "spec_helper"
2
+ require "aws/s3"
3
+ require "polipus/storage/s3_store"
4
+ describe Polipus::Storage::S3Store do
5
+
6
+ before(:each) do
7
+ @storage = Polipus::Storage.s3_store(
8
+ '_test_pages',
9
+ {
10
+ :access_key_id => 'XXXXXXX',
11
+ :secret_access_key => 'XXXX'
12
+ }
13
+ )
14
+ end
15
+
16
+ after(:each) {@storage.clear}
17
+
18
+
19
+ it 'should store a page' do
20
+
21
+ p = page_factory 'http://www.google.com', :code => 200, :body => '<html></html>'
22
+ uuid = @storage.add p
23
+ uuid.should be == 'ed646a3334ca891fd3467db131372140'
24
+ @storage.count.should be 1
25
+ p = @storage.get p
26
+ p.url.to_s.should be == 'http://www.google.com'
27
+ p.body.should be == '<html></html>'
28
+ @storage.remove p
29
+
30
+ end
31
+
32
+ it 'should update a page' do
33
+ p = page_factory 'http://www.google.com', :code => 301, :body => '<html></html>'
34
+ @storage.add p
35
+ p = @storage.get p
36
+ p.code.should be == 301
37
+ @storage.count.should be == 1
38
+ @storage.remove p
39
+ end
40
+
41
+ it 'should iterate over stored pages' do
42
+ 10.times {|i| @storage.add page_factory("http://www.google.com/p_#{i}", :code => 200, :body => "<html>#{i}</html>")}
43
+ @storage.count.should be 10
44
+ @storage.each do |k, page|
45
+ k.should be =~ /[a-f0-9]{32}/
46
+ end
47
+ end
48
+
49
+ it 'should delete a page' do
50
+ p = page_factory 'http://www.google.com', :code => 301, :body => '<html></html>'
51
+ @storage.add p
52
+ @storage.remove p
53
+ @storage.get(p).should be_nil
54
+ @storage.count.should be 0
55
+ end
56
+
57
+ it 'should store a page removing a query string from the uuid generation' do
58
+ p = page_factory 'http://www.asd.com/?asd=lol', :code => 200, :body => '<html></html>'
59
+ p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', :code => 200, :body => '<html></html>'
60
+ @storage.include_query_string_in_uuid = false
61
+ @storage.add p
62
+ @storage.exists?(p_no_query).should be_true
63
+ @storage.remove p
64
+ end
65
+
66
+ it 'should store a page removing a query string from the uuid generation no ending slash' do
67
+ p = page_factory 'http://www.asd.com?asd=lol', :code => 200, :body => '<html></html>'
68
+ p_no_query = page_factory 'http://www.asd.com', :code => 200, :body => '<html></html>'
69
+ @storage.include_query_string_in_uuid = false
70
+ @storage.add p
71
+ @storage.exists?(p_no_query).should be_true
72
+ @storage.remove p
73
+ end
74
+
75
+ it 'should store a page with user data associated' do
76
+ p = page_factory 'http://www.user.com', :code => 200, :body => '<html></html>'
77
+ p.user_data.name = 'Test User Data'
78
+ @storage.add p
79
+ @storage.exists?(p).should be_true
80
+ p = @storage.get(p)
81
+ p.user_data.name.should be == 'Test User Data'
82
+ @storage.remove p
83
+ end
84
+
85
+ it 'should honor the except parameters' do
86
+ storage = storage = Polipus::Storage.s3_store(
87
+ '_test_pages',
88
+ {
89
+ :access_key_id => 'XXXXXXX',
90
+ :secret_access_key => 'XXXX'
91
+ },
92
+ ['body']
93
+ )
94
+ p = page_factory 'http://www.user-doo.com', :code => 200, :body => '<html></html>'
95
+ storage.add p
96
+ p = storage.get p
97
+
98
+ p.body.should be_nil
99
+ storage.clear
100
+ end
101
+
102
+ it 'should return false if a doc not exists' do
103
+ @storage.include_query_string_in_uuid = false
104
+ p_other = page_factory 'http://www.asdrrrr.com', :code => 200, :body => '<html></html>'
105
+ @storage.exists?(p_other).should be_false
106
+ @storage.add p_other
107
+ @storage.exists?(p_other).should be_true
108
+ p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', :code => 200, :body => '<html></html>'
109
+ @storage.exists?(p_other).should be_true
110
+ @storage.include_query_string_in_uuid = true
111
+ @storage.exists?(p_other).should be_false
112
+ @storage.remove p_other
113
+ end
114
+
115
+ end