polipus 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. checksums.yaml +15 -0
  2. data/.document +5 -0
  3. data/.gitignore +53 -0
  4. data/.rspec +2 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE.txt +20 -0
  7. data/README.md +55 -0
  8. data/README.rdoc +3 -0
  9. data/Rakefile +9 -0
  10. data/examples/basic.rb +58 -0
  11. data/examples/survival.rb +9 -0
  12. data/lib/polipus.rb +451 -0
  13. data/lib/polipus/http.rb +195 -0
  14. data/lib/polipus/page.rb +219 -0
  15. data/lib/polipus/plugin.rb +13 -0
  16. data/lib/polipus/plugins/cleaner.rb +25 -0
  17. data/lib/polipus/plugins/sample.rb +17 -0
  18. data/lib/polipus/plugins/sleeper.rb +22 -0
  19. data/lib/polipus/queue_overflow.rb +24 -0
  20. data/lib/polipus/queue_overflow/base.rb +6 -0
  21. data/lib/polipus/queue_overflow/dev_null_queue.rb +33 -0
  22. data/lib/polipus/queue_overflow/manager.rb +50 -0
  23. data/lib/polipus/queue_overflow/mongo_queue.rb +61 -0
  24. data/lib/polipus/queue_overflow/mongo_queue_capped.rb +28 -0
  25. data/lib/polipus/storage.rb +31 -0
  26. data/lib/polipus/storage/base.rb +17 -0
  27. data/lib/polipus/storage/dev_null.rb +35 -0
  28. data/lib/polipus/storage/mongo_store.rb +86 -0
  29. data/lib/polipus/storage/s3_store.rb +100 -0
  30. data/lib/polipus/url_tracker.rb +20 -0
  31. data/lib/polipus/url_tracker/bloomfilter.rb +27 -0
  32. data/lib/polipus/url_tracker/redis_set.rb +27 -0
  33. data/lib/polipus/version.rb +4 -0
  34. data/polipus.gemspec +39 -0
  35. data/spec/cassettes/08b228db424a926e1ed6ab63b38d847e.yml +166 -0
  36. data/spec/cassettes/20aa41f181b49f00078c3ca30bad5afe.yml +166 -0
  37. data/spec/cassettes/4640919145753505af2d0f8423de37f3.yml +270 -0
  38. data/spec/cassettes/66aae15a03f4aab8efd15e40d2d7882a.yml +194 -0
  39. data/spec/cassettes/76b7c197c95a5bf9b1e882c567192d72.yml +183 -0
  40. data/spec/cassettes/9b1d523b7f5db7214f8a8bd9272cccba.yml +221 -0
  41. data/spec/cassettes/ab333f89535a2efb284913fede6aa7c7.yml +221 -0
  42. data/spec/cassettes/ae5d7cffde3f53122cdf79f3d1367e8e.yml +221 -0
  43. data/spec/cassettes/ffe3d588b6df4b9de35e5a7ccaf5a81b.yml +695 -0
  44. data/spec/cassettes/http_test.yml +1418 -0
  45. data/spec/cassettes/http_test_redirect.yml +71 -0
  46. data/spec/clear.rb +11 -0
  47. data/spec/http_spec.rb +31 -0
  48. data/spec/page_spec.rb +22 -0
  49. data/spec/queue_overflow_manager_spec.rb +89 -0
  50. data/spec/queue_overflow_spec.rb +71 -0
  51. data/spec/spec_helper.rb +34 -0
  52. data/spec/storage_mongo_spec.rb +102 -0
  53. data/spec/storage_s3_spec.rb +115 -0
  54. data/spec/url_tracker_spec.rb +28 -0
  55. metadata +313 -0
@@ -0,0 +1,71 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: http://greenbytes.de/tech/tc/httpredirects/t300bodyandloc.asis
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ Accept:
11
+ - ! '*/*'
12
+ User-Agent:
13
+ - Ruby
14
+ response:
15
+ status:
16
+ code: 300
17
+ message: Multiple Choices
18
+ headers:
19
+ Date:
20
+ - Mon, 10 Jun 2013 08:58:25 GMT
21
+ Server:
22
+ - Apache/2.2.22 (Ubuntu)
23
+ Cache-Control:
24
+ - no-cache
25
+ Location:
26
+ - http://greenbytes.de/tech/tc/httpredirects/300.txt
27
+ Content-Length:
28
+ - '27'
29
+ body:
30
+ encoding: US-ASCII
31
+ string: ! '300 Redirect Response Body
32
+
33
+ '
34
+ http_version:
35
+ recorded_at: Mon, 10 Jun 2013 08:58:25 GMT
36
+ - request:
37
+ method: get
38
+ uri: http://greenbytes.de/tech/tc/httpredirects/300.txt
39
+ body:
40
+ encoding: US-ASCII
41
+ string: ''
42
+ headers:
43
+ Accept:
44
+ - ! '*/*'
45
+ User-Agent:
46
+ - Ruby
47
+ response:
48
+ status:
49
+ code: 200
50
+ message: OK
51
+ headers:
52
+ Date:
53
+ - Mon, 10 Jun 2013 08:58:25 GMT
54
+ Server:
55
+ - Apache/2.2.22 (Ubuntu)
56
+ Last-Modified:
57
+ - Tue, 08 Jan 2013 17:31:05 GMT
58
+ Etag:
59
+ - ! '"b8306c-31-4d2ca4f7df2ca"'
60
+ Accept-Ranges:
61
+ - bytes
62
+ Content-Length:
63
+ - '49'
64
+ Content-Type:
65
+ - text/plain
66
+ body:
67
+ encoding: US-ASCII
68
+ string: ! "You have reached the target\r\nof a 300 redirect.\r\n"
69
+ http_version:
70
+ recorded_at: Mon, 10 Jun 2013 08:58:25 GMT
71
+ recorded_with: VCR 2.5.0
data/spec/clear.rb ADDED
@@ -0,0 +1,11 @@
1
+ require "yaml"
2
+ Dir.glob('./cassettes/*.yml').each do|f|
3
+ next unless f =~ /[a-f0-9]{32}/
4
+ d = YAML.load_file(f)
5
+ d['http_interactions'].each do |r|
6
+ r['request'].delete('headers')
7
+ r['response'].delete('headers')
8
+ end
9
+ File.open(f, 'w') {|fw| fw.write(d.to_yaml) }
10
+ #puts d.to_yaml
11
+ end
data/spec/http_spec.rb ADDED
@@ -0,0 +1,31 @@
1
+ require "spec_helper"
2
+ require "mongo"
3
+ require "polipus/http"
4
+ require "polipus/page"
5
+
6
+ describe Polipus::HTTP do
7
+
8
+ it 'should download a page' do
9
+
10
+ VCR.use_cassette('http_test') do
11
+ http = Polipus::HTTP.new
12
+ page = http.fetch_page("http://sfbay.craigslist.org/apa/")
13
+ page.should be_an_instance_of(Polipus::Page)
14
+ page.doc.search("title").text.strip.should be == "SF bay area apts/housing for rent classifieds - craigslist"
15
+ end
16
+ end
17
+
18
+ it 'should follow a redirect' do
19
+ VCR.use_cassette('http_test_redirect') do
20
+
21
+ http = Polipus::HTTP.new
22
+ page = http.fetch_page("http://greenbytes.de/tech/tc/httpredirects/t300bodyandloc.asis")
23
+
24
+ page.should be_an_instance_of(Polipus::Page)
25
+ page.code.should be == 200
26
+ page.url.to_s.should be == "http://greenbytes.de/tech/tc/httpredirects/300.txt"
27
+ page.body.strip.should be == "You have reached the target\r\nof a 300 redirect."
28
+ end
29
+ end
30
+
31
+ end
data/spec/page_spec.rb ADDED
@@ -0,0 +1,22 @@
1
+ require "spec_helper"
2
+ require "polipus/page"
3
+
4
+ describe Polipus::Page do
5
+ it 'should honor domain_aliases attribute' do
6
+ body = <<EOF
7
+ <html>
8
+ <body>
9
+ <a href="/page/1">1</a>
10
+ <a href="/page/2">2</a>
11
+ <a href="http://www.google.com/page/3">3</a>
12
+ <a href="http://google.com/page/3">4</a>
13
+ <a href="http://not.google.com/page/3">4</a>
14
+ </body>
15
+ </html>
16
+ EOF
17
+ h = {'content-type' => ['text/html']}
18
+ domain_aliases = %w(www.google.com google.com)
19
+ p = Polipus::Page.new 'http://www.google.com/', :code => 200, :body => body, :headers => h, :domain_aliases => domain_aliases
20
+ p.links.count.should be == 4
21
+ end
22
+ end
@@ -0,0 +1,89 @@
1
+ require "spec_helper"
2
+ require "polipus/queue_overflow"
3
+ require "redis-queue"
4
+
5
+ describe Polipus::QueueOverflow::Manager do
6
+ before(:all) do
7
+ @mongo = Mongo::Connection.new("localhost", 27017, :pool_size => 15, :pool_timeout => 5).db('_test_polipus')
8
+ @mongo['_test_pages'].drop
9
+ @storage = Polipus::Storage.mongo_store(@mongo, '_test_pages')
10
+ @redis_q = Redis::Queue.new("queue_test","bp_queue_test", :redis => Redis.new())
11
+ @queue_overflow = Polipus::QueueOverflow.mongo_queue(nil, "queue_test")
12
+ @polipus = flexmock("polipus")
13
+ @polipus.should_receive(:queue_overflow_adapter).and_return(@queue_overflow)
14
+ @polipus.should_receive(:storage).and_return(@storage)
15
+ @manager = Polipus::QueueOverflow::Manager.new(@polipus, @redis_q, 10)
16
+ end
17
+
18
+ before(:each) do
19
+ @queue_overflow.clear
20
+ @redis_q.clear
21
+ @storage.clear
22
+ end
23
+
24
+ after(:all) do
25
+ @queue_overflow.clear
26
+ @redis_q.clear
27
+ end
28
+
29
+ it 'should remove 10 items' do
30
+ @manager.perform.should be == [0,0]
31
+ 20.times {|i| @redis_q << page_factory("http://www.user-doo.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
32
+ @manager.perform.should be == [10, 0]
33
+ @queue_overflow.size.should be == 10
34
+ @redis_q.size.should be == 10
35
+ end
36
+
37
+ it 'should restore 10 items' do
38
+ @manager.perform.should be == [0,0]
39
+ 10.times {|i| @queue_overflow << page_factory("http://www.user-doo-bla.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
40
+ @manager.perform.should be == [0, 10]
41
+ @queue_overflow.size.should be == 0
42
+ @redis_q.size.should be == 10
43
+ @manager.perform.should be == [0, 0]
44
+
45
+ end
46
+
47
+ it 'should restore 3 items' do
48
+
49
+ @manager.perform.should be == [0,0]
50
+ 3.times {|i| @queue_overflow << page_factory("http://www.user-doo-bu.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
51
+ @manager.perform.should be == [0, 3]
52
+ @queue_overflow.size.should be == 0
53
+ @redis_q.size.should be == 3
54
+ @manager.perform.should be == [0, 0]
55
+
56
+ end
57
+
58
+ it 'should restore 0 items' do
59
+
60
+ @manager.perform.should be == [0,0]
61
+ 10.times {|i|
62
+ p = page_factory("http://www.user-doo-bu.com/page_#{i}", :code => 200, :body => '<html></html>')
63
+ @storage.add p
64
+ @queue_overflow << p.to_json
65
+ }
66
+ @manager.perform.should be == [0, 0]
67
+ @queue_overflow.size.should be == 0
68
+ @redis_q.size.should be == 0
69
+ @manager.perform.should be == [0, 0]
70
+
71
+ end
72
+
73
+ it 'should filter an url based on the spec' do
74
+ @queue_overflow.clear
75
+ @redis_q.clear
76
+ 10.times {|i| @queue_overflow << page_factory("http://www.user-doo.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
77
+ @manager.url_filter do |page|
78
+ page.url.to_s.end_with?("page_0") ? false : true
79
+ end
80
+ @manager.perform.should be == [0,9]
81
+ @queue_overflow.size.should be == 0
82
+ @redis_q.size.should be == 9
83
+ @manager.url_filter do |page|
84
+ true
85
+ end
86
+
87
+ end
88
+
89
+ end
@@ -0,0 +1,71 @@
1
+ require "spec_helper"
2
+ require "polipus/queue_overflow"
3
+
4
+ describe Polipus::QueueOverflow do
5
+
6
+ before(:all) do
7
+ @queue_overflow = Polipus::QueueOverflow.mongo_queue(nil, "queue_test")
8
+ @queue_overflow_capped = Polipus::QueueOverflow.mongo_queue_capped(nil, "queue_test_c", {:max => 20})
9
+ @queue_overflow_uniq = Polipus::QueueOverflow.mongo_queue(nil, "queue_test_u", {:ensure_uniq => true })
10
+
11
+ end
12
+
13
+ before(:each) do
14
+ @queue_overflow.clear
15
+ @queue_overflow_capped.clear
16
+ @queue_overflow_uniq.clear
17
+ end
18
+
19
+ after(:all) do
20
+ @queue_overflow.clear
21
+ @queue_overflow_uniq.clear
22
+ @queue_overflow_capped.clear
23
+ end
24
+
25
+ it 'should work' do
26
+ [@queue_overflow, @queue_overflow_capped, @queue_overflow_uniq].each do |q|
27
+ q.empty?.should be_true
28
+ q.pop.should be_nil
29
+ q << "test"
30
+ q.size.should be == 1
31
+ q.pop.should be == "test"
32
+ q.empty?.should be_true
33
+ q.pop.should be_nil
34
+ q.size.should be == 0
35
+ q.empty?.should be_true
36
+ end
37
+
38
+ end
39
+
40
+ it 'should act as a queue' do
41
+ [@queue_overflow, @queue_overflow_capped, @queue_overflow_uniq].each do |q|
42
+ 10.times { |i| q << "message_#{i}" }
43
+ q.size.should be == 10
44
+ q.pop.should be == "message_0"
45
+ end
46
+
47
+ end
48
+
49
+ it 'should work with complex paylod' do
50
+ [@queue_overflow, @queue_overflow_capped, @queue_overflow_uniq].each do |q|
51
+ a = {'a' => [1,2,3], 'b' => 'a_string'}
52
+ q << a.to_json
53
+ b = q.pop
54
+ JSON.parse(b).should be == a
55
+ end
56
+
57
+ end
58
+
59
+ it 'should honor max items if it is capped' do
60
+ 30.times { |i| @queue_overflow_capped << "message_#{i}" }
61
+ @queue_overflow_capped.size.should be == 20
62
+ @queue_overflow_capped.pop.should be == "message_10"
63
+ end
64
+
65
+ it 'should contains only unique items' do
66
+ 20.times {@queue_overflow_uniq << "A"}
67
+ 20.times {@queue_overflow_uniq << "B"}
68
+ @queue_overflow_uniq.size.should be == 2
69
+ end
70
+
71
+ end
@@ -0,0 +1,34 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+ require "digest/md5"
8
+ RSpec.configure do |config|
9
+ config.treat_symbols_as_metadata_keys_with_true_values = true
10
+ config.run_all_when_everything_filtered = true
11
+ config.filter_run :focus
12
+
13
+ # Run specs in random order to surface order dependencies. If you find an
14
+ # order dependency and want to debug it, you can fix the order by providing
15
+ # the seed, which is printed after each run.
16
+ # --seed 1234
17
+ config.order = 'random'
18
+ config.mock_with :flexmock
19
+ config.around(:each) do |example|
20
+ VCR.use_cassette(Digest::MD5.hexdigest(example.metadata[:full_description])) do
21
+ example.run
22
+ end
23
+ end
24
+ end
25
+ require "vcr"
26
+ require "polipus"
27
+ VCR.configure do |c|
28
+ c.cassette_library_dir = "#{File.dirname(__FILE__)}/cassettes"
29
+ c.hook_into :webmock
30
+ end
31
+
32
+ def page_factory url, params = {}
33
+ Polipus::Page.new url, params
34
+ end
@@ -0,0 +1,102 @@
1
+ require "spec_helper"
2
+ require "mongo"
3
+ require "polipus/storage/mongo_store"
4
+ describe Polipus::Storage::MongoStore do
5
+ before(:all)do
6
+ @mongo = Mongo::Connection.new("localhost", 27017, :pool_size => 15, :pool_timeout => 5).db('_test_polipus')
7
+ @mongo['_test_pages'].drop
8
+ @storage = Polipus::Storage.mongo_store(@mongo, '_test_pages')
9
+ end
10
+
11
+ after(:all) do
12
+ @mongo['_test_pages'].drop
13
+ end
14
+
15
+ after(:each) do
16
+ @mongo['_test_pages'].drop
17
+ end
18
+
19
+ it 'should store a page' do
20
+ p = page_factory 'http://www.google.com', :code => 200, :body => '<html></html>'
21
+ uuid = @storage.add p
22
+ uuid.should be == 'ed646a3334ca891fd3467db131372140'
23
+ @storage.count.should be 1
24
+ @mongo['_test_pages'].count.should be 1
25
+ p = @storage.get p
26
+ p.url.to_s.should be == 'http://www.google.com'
27
+ p.body.should be == '<html></html>'
28
+ end
29
+
30
+ it 'should update a page' do
31
+ p = page_factory 'http://www.google.com', :code => 301, :body => '<html></html>'
32
+ @storage.add p
33
+ p = @storage.get p
34
+ p.code.should be == 301
35
+ @mongo['_test_pages'].count.should be 1
36
+ end
37
+
38
+ it 'should iterate over stored pages' do
39
+ @storage.each do |k, page|
40
+ k.should be == "ed646a3334ca891fd3467db131372140"
41
+ page.url.to_s.should be == 'http://www.google.com'
42
+ end
43
+ end
44
+
45
+ it 'should delete a page' do
46
+ p = page_factory 'http://www.google.com', :code => 301, :body => '<html></html>'
47
+ @storage.remove p
48
+ @storage.get(p).should be_nil
49
+ @storage.count.should be 0
50
+ end
51
+
52
+ it 'should store a page removing a query string from the uuid generation' do
53
+ p = page_factory 'http://www.asd.com/?asd=lol', :code => 200, :body => '<html></html>'
54
+ p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', :code => 200, :body => '<html></html>'
55
+ @storage.include_query_string_in_uuid = false
56
+ @storage.add p
57
+ @storage.exists?(p_no_query).should be_true
58
+ @storage.remove p
59
+ end
60
+
61
+ it 'should store a page removing a query string from the uuid generation no ending slash' do
62
+ p = page_factory 'http://www.asd.com?asd=lol', :code => 200, :body => '<html></html>'
63
+ p_no_query = page_factory 'http://www.asd.com', :code => 200, :body => '<html></html>'
64
+ @storage.include_query_string_in_uuid = false
65
+ @storage.add p
66
+ @storage.exists?(p_no_query).should be_true
67
+ @storage.remove p
68
+ end
69
+
70
+ it 'should store a page with user data associated' do
71
+ p = page_factory 'http://www.user.com', :code => 200, :body => '<html></html>'
72
+ p.user_data.name = 'Test User Data'
73
+ @storage.add p
74
+ @storage.exists?(p).should be_true
75
+ p = @storage.get(p)
76
+ p.user_data.name.should be == 'Test User Data'
77
+ @storage.remove p
78
+ end
79
+
80
+ it 'should honor the except parameters' do
81
+ storage = Polipus::Storage.mongo_store(@mongo, '_test_pages', ['body'])
82
+ p = page_factory 'http://www.user-doo.com', :code => 200, :body => '<html></html>'
83
+ storage.add p
84
+ p = storage.get p
85
+ p.body.should be_empty
86
+ storage.clear
87
+ end
88
+
89
+ it 'should return false if a doc not exists' do
90
+ @storage.include_query_string_in_uuid = false
91
+ p_other = page_factory 'http://www.asdrrrr.com', :code => 200, :body => '<html></html>'
92
+ @storage.exists?(p_other).should be_false
93
+ @storage.add p_other
94
+ @storage.exists?(p_other).should be_true
95
+ p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', :code => 200, :body => '<html></html>'
96
+ @storage.exists?(p_other).should be_true
97
+ @storage.include_query_string_in_uuid = true
98
+ @storage.exists?(p_other).should be_false
99
+
100
+ end
101
+
102
+ end
@@ -0,0 +1,115 @@
1
+ require "spec_helper"
2
+ require "aws/s3"
3
+ require "polipus/storage/s3_store"
4
+ describe Polipus::Storage::S3Store do
5
+
6
+ before(:each) do
7
+ @storage = Polipus::Storage.s3_store(
8
+ '_test_pages',
9
+ {
10
+ :access_key_id => 'XXXXXXX',
11
+ :secret_access_key => 'XXXX'
12
+ }
13
+ )
14
+ end
15
+
16
+ after(:each) {@storage.clear}
17
+
18
+
19
+ it 'should store a page' do
20
+
21
+ p = page_factory 'http://www.google.com', :code => 200, :body => '<html></html>'
22
+ uuid = @storage.add p
23
+ uuid.should be == 'ed646a3334ca891fd3467db131372140'
24
+ @storage.count.should be 1
25
+ p = @storage.get p
26
+ p.url.to_s.should be == 'http://www.google.com'
27
+ p.body.should be == '<html></html>'
28
+ @storage.remove p
29
+
30
+ end
31
+
32
+ it 'should update a page' do
33
+ p = page_factory 'http://www.google.com', :code => 301, :body => '<html></html>'
34
+ @storage.add p
35
+ p = @storage.get p
36
+ p.code.should be == 301
37
+ @storage.count.should be == 1
38
+ @storage.remove p
39
+ end
40
+
41
+ it 'should iterate over stored pages' do
42
+ 10.times {|i| @storage.add page_factory("http://www.google.com/p_#{i}", :code => 200, :body => "<html>#{i}</html>")}
43
+ @storage.count.should be 10
44
+ @storage.each do |k, page|
45
+ k.should be =~ /[a-f0-9]{32}/
46
+ end
47
+ end
48
+
49
+ it 'should delete a page' do
50
+ p = page_factory 'http://www.google.com', :code => 301, :body => '<html></html>'
51
+ @storage.add p
52
+ @storage.remove p
53
+ @storage.get(p).should be_nil
54
+ @storage.count.should be 0
55
+ end
56
+
57
+ it 'should store a page removing a query string from the uuid generation' do
58
+ p = page_factory 'http://www.asd.com/?asd=lol', :code => 200, :body => '<html></html>'
59
+ p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', :code => 200, :body => '<html></html>'
60
+ @storage.include_query_string_in_uuid = false
61
+ @storage.add p
62
+ @storage.exists?(p_no_query).should be_true
63
+ @storage.remove p
64
+ end
65
+
66
+ it 'should store a page removing a query string from the uuid generation no ending slash' do
67
+ p = page_factory 'http://www.asd.com?asd=lol', :code => 200, :body => '<html></html>'
68
+ p_no_query = page_factory 'http://www.asd.com', :code => 200, :body => '<html></html>'
69
+ @storage.include_query_string_in_uuid = false
70
+ @storage.add p
71
+ @storage.exists?(p_no_query).should be_true
72
+ @storage.remove p
73
+ end
74
+
75
+ it 'should store a page with user data associated' do
76
+ p = page_factory 'http://www.user.com', :code => 200, :body => '<html></html>'
77
+ p.user_data.name = 'Test User Data'
78
+ @storage.add p
79
+ @storage.exists?(p).should be_true
80
+ p = @storage.get(p)
81
+ p.user_data.name.should be == 'Test User Data'
82
+ @storage.remove p
83
+ end
84
+
85
+ it 'should honor the except parameters' do
86
+ storage = storage = Polipus::Storage.s3_store(
87
+ '_test_pages',
88
+ {
89
+ :access_key_id => 'XXXXXXX',
90
+ :secret_access_key => 'XXXX'
91
+ },
92
+ ['body']
93
+ )
94
+ p = page_factory 'http://www.user-doo.com', :code => 200, :body => '<html></html>'
95
+ storage.add p
96
+ p = storage.get p
97
+
98
+ p.body.should be_nil
99
+ storage.clear
100
+ end
101
+
102
+ it 'should return false if a doc not exists' do
103
+ @storage.include_query_string_in_uuid = false
104
+ p_other = page_factory 'http://www.asdrrrr.com', :code => 200, :body => '<html></html>'
105
+ @storage.exists?(p_other).should be_false
106
+ @storage.add p_other
107
+ @storage.exists?(p_other).should be_true
108
+ p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', :code => 200, :body => '<html></html>'
109
+ @storage.exists?(p_other).should be_true
110
+ @storage.include_query_string_in_uuid = true
111
+ @storage.exists?(p_other).should be_false
112
+ @storage.remove p_other
113
+ end
114
+
115
+ end