polipus 0.3.7 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +8 -8
  2. data/.rspec +1 -1
  3. data/.rubocop.yml +3 -3
  4. data/.rubocop_todo.yml +1 -1
  5. data/.travis.yml +14 -4
  6. data/AUTHORS.md +1 -0
  7. data/CHANGELOG.md +9 -1
  8. data/Gemfile +9 -0
  9. data/README.md +2 -3
  10. data/Rakefile +1 -3
  11. data/examples/basic.rb +8 -1
  12. data/lib/polipus.rb +25 -13
  13. data/lib/polipus/queue_overflow.rb +1 -0
  14. data/lib/polipus/queue_overflow/manager.rb +1 -0
  15. data/lib/polipus/queue_overflow/mongo_queue.rb +1 -1
  16. data/lib/polipus/queue_overflow/worker.rb +24 -0
  17. data/lib/polipus/storage.rb +10 -16
  18. data/lib/polipus/storage/mongo_store.rb +6 -1
  19. data/lib/polipus/storage/rethink_store.rb +90 -0
  20. data/lib/polipus/version.rb +1 -1
  21. data/polipus.gemspec +16 -18
  22. data/spec/{http_spec.rb → polipus/http_spec.rb} +26 -37
  23. data/spec/{page_spec.rb → polipus/page_spec.rb} +7 -11
  24. data/spec/{queue_overflow_manager_spec.rb → polipus/queue_overflow/manager_spec.rb} +22 -29
  25. data/spec/{queue_overflow_spec.rb → polipus/queue_overflow_spec.rb} +14 -20
  26. data/spec/{robotex_spec.rb → polipus/robotex_spec.rb} +10 -11
  27. data/spec/{signal_handler_spec.rb → polipus/signal_handler_spec.rb} +2 -6
  28. data/spec/{storage_memory_spec.rb → polipus/storage/memory_store_spec.rb} +18 -21
  29. data/spec/{storage_mongo_spec.rb → polipus/storage/mongo_store_spec.rb} +23 -25
  30. data/spec/polipus/storage/rethink_store_spec.rb +117 -0
  31. data/spec/{url_tracker_spec.rb → polipus/url_tracker_spec.rb} +4 -4
  32. data/spec/polipus_spec.rb +13 -15
  33. data/spec/spec_helper.rb +13 -12
  34. metadata +76 -154
  35. data/lib/polipus/storage/s3_store.rb +0 -96
  36. data/spec/cassettes/08b228db424a926e1ed6ab63b38d847e.yml +0 -166
  37. data/spec/cassettes/20aa41f181b49f00078c3ca30bad5afe.yml +0 -166
  38. data/spec/cassettes/4640919145753505af2d0f8423de37f3.yml +0 -270
  39. data/spec/cassettes/66aae15a03f4aab8efd15e40d2d7882a.yml +0 -194
  40. data/spec/cassettes/76b7c197c95a5bf9b1e882c567192d72.yml +0 -183
  41. data/spec/cassettes/9b1d523b7f5db7214f8a8bd9272cccba.yml +0 -221
  42. data/spec/cassettes/ab333f89535a2efb284913fede6aa7c7.yml +0 -221
  43. data/spec/cassettes/ae5d7cffde3f53122cdf79f3d1367e8e.yml +0 -221
  44. data/spec/cassettes/ffe3d588b6df4b9de35e5a7ccaf5a81b.yml +0 -695
  45. data/spec/storage_s3_spec.rb +0 -115
@@ -1,115 +0,0 @@
1
- # encoding: UTF-8
2
- require 'spec_helper'
3
- require 'aws/s3'
4
- require 'polipus/storage/s3_store'
5
- describe Polipus::Storage::S3Store do
6
-
7
- before(:each) do
8
- @storage = Polipus::Storage.s3_store(
9
- '_test_pages',
10
-
11
- access_key_id: 'XXXXXXX',
12
- secret_access_key: 'XXXX'
13
-
14
- )
15
- end
16
-
17
- after(:each) { @storage.clear }
18
-
19
- it 'should store a page' do
20
-
21
- p = page_factory 'http://www.google.com', code: 200, body: '<html></html>'
22
- uuid = @storage.add p
23
- uuid.should be == 'ed646a3334ca891fd3467db131372140'
24
- @storage.count.should be 1
25
- p = @storage.get p
26
- p.url.to_s.should be == 'http://www.google.com'
27
- p.body.should be == '<html></html>'
28
- @storage.remove p
29
-
30
- end
31
-
32
- it 'should update a page' do
33
- p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
34
- @storage.add p
35
- p = @storage.get p
36
- p.code.should be == 301
37
- @storage.count.should be == 1
38
- @storage.remove p
39
- end
40
-
41
- it 'should iterate over stored pages' do
42
- 10.times { |i| @storage.add page_factory("http://www.google.com/p_#{i}", code: 200, body: "<html>#{i}</html>") }
43
- @storage.count.should be 10
44
- @storage.each do |k, _page|
45
- k.should be =~ /[a-f0-9]{32}/
46
- end
47
- end
48
-
49
- it 'should delete a page' do
50
- p = page_factory 'http://www.google.com', code: 301, body: '<html></html>'
51
- @storage.add p
52
- @storage.remove p
53
- @storage.get(p).should be_nil
54
- @storage.count.should be 0
55
- end
56
-
57
- it 'should store a page removing a query string from the uuid generation' do
58
- p = page_factory 'http://www.asd.com/?asd=lol', code: 200, body: '<html></html>'
59
- p_no_query = page_factory 'http://www.asd.com/?asdas=dasda&adsda=1', code: 200, body: '<html></html>'
60
- @storage.include_query_string_in_uuid = false
61
- @storage.add p
62
- @storage.exists?(p_no_query).should be_true
63
- @storage.remove p
64
- end
65
-
66
- it 'should store a page removing a query string from the uuid generation no ending slash' do
67
- p = page_factory 'http://www.asd.com?asd=lol', code: 200, body: '<html></html>'
68
- p_no_query = page_factory 'http://www.asd.com', code: 200, body: '<html></html>'
69
- @storage.include_query_string_in_uuid = false
70
- @storage.add p
71
- @storage.exists?(p_no_query).should be_true
72
- @storage.remove p
73
- end
74
-
75
- it 'should store a page with user data associated' do
76
- p = page_factory 'http://www.user.com', code: 200, body: '<html></html>'
77
- p.user_data.name = 'Test User Data'
78
- @storage.add p
79
- @storage.exists?(p).should be_true
80
- p = @storage.get(p)
81
- p.user_data.name.should be == 'Test User Data'
82
- @storage.remove p
83
- end
84
-
85
- it 'should honor the except parameters' do
86
- storage = Polipus::Storage.s3_store(
87
- '_test_pages',
88
- {
89
- access_key_id: 'XXXXXXX',
90
- secret_access_key: 'XXXX'
91
- },
92
- ['body']
93
- )
94
- p = page_factory 'http://www.user-doo.com', code: 200, body: '<html></html>'
95
- storage.add p
96
- p = storage.get p
97
-
98
- p.body.should be_nil
99
- storage.clear
100
- end
101
-
102
- it 'should return false if a doc not exists' do
103
- @storage.include_query_string_in_uuid = false
104
- p_other = page_factory 'http://www.asdrrrr.com', code: 200, body: '<html></html>'
105
- @storage.exists?(p_other).should be_false
106
- @storage.add p_other
107
- @storage.exists?(p_other).should be_true
108
- p_other = page_factory 'http://www.asdrrrr.com?trk=asd-lol', code: 200, body: '<html></html>'
109
- @storage.exists?(p_other).should be_true
110
- @storage.include_query_string_in_uuid = true
111
- @storage.exists?(p_other).should be_false
112
- @storage.remove p_other
113
- end
114
-
115
- end