polipus 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +8 -8
  2. data/.rubocop.yml +17 -0
  3. data/.rubocop_todo.yml +37 -0
  4. data/.travis.yml +2 -1
  5. data/CHANGELOG.md +20 -0
  6. data/README.md +10 -0
  7. data/Rakefile +4 -4
  8. data/examples/basic.rb +16 -19
  9. data/examples/incremental.rb +17 -17
  10. data/examples/robots_txt_handling.rb +1 -1
  11. data/examples/survival.rb +3 -3
  12. data/lib/polipus.rb +186 -229
  13. data/lib/polipus/http.rb +41 -42
  14. data/lib/polipus/page.rb +33 -34
  15. data/lib/polipus/plugin.rb +2 -2
  16. data/lib/polipus/plugins/cleaner.rb +7 -8
  17. data/lib/polipus/plugins/sample.rb +6 -9
  18. data/lib/polipus/plugins/sleeper.rb +7 -8
  19. data/lib/polipus/queue_overflow.rb +11 -11
  20. data/lib/polipus/queue_overflow/base.rb +1 -1
  21. data/lib/polipus/queue_overflow/dev_null_queue.rb +9 -9
  22. data/lib/polipus/queue_overflow/manager.rb +28 -25
  23. data/lib/polipus/queue_overflow/mongo_queue.rb +24 -26
  24. data/lib/polipus/queue_overflow/mongo_queue_capped.rb +12 -12
  25. data/lib/polipus/robotex.rb +41 -51
  26. data/lib/polipus/signal_handler.rb +41 -0
  27. data/lib/polipus/storage.rb +11 -11
  28. data/lib/polipus/storage/base.rb +10 -8
  29. data/lib/polipus/storage/dev_null.rb +6 -7
  30. data/lib/polipus/storage/memory_store.rb +21 -22
  31. data/lib/polipus/storage/mongo_store.rb +34 -38
  32. data/lib/polipus/storage/s3_store.rb +33 -38
  33. data/lib/polipus/url_tracker.rb +3 -3
  34. data/lib/polipus/url_tracker/bloomfilter.rb +4 -5
  35. data/lib/polipus/url_tracker/redis_set.rb +3 -4
  36. data/lib/polipus/version.rb +3 -3
  37. data/polipus.gemspec +12 -13
  38. data/spec/clear.rb +3 -3
  39. data/spec/http_spec.rb +27 -28
  40. data/spec/page_spec.rb +16 -16
  41. data/spec/polipus_spec.rb +34 -31
  42. data/spec/queue_overflow_manager_spec.rb +30 -28
  43. data/spec/queue_overflow_spec.rb +15 -15
  44. data/spec/robotex_spec.rb +9 -10
  45. data/spec/signal_handler_spec.rb +18 -0
  46. data/spec/spec_helper.rb +7 -6
  47. data/spec/storage_memory_spec.rb +18 -18
  48. data/spec/storage_mongo_spec.rb +19 -19
  49. data/spec/storage_s3_spec.rb +30 -31
  50. data/spec/url_tracker_spec.rb +7 -7
  51. metadata +7 -2
@@ -1,4 +1,4 @@
1
1
  module Polipus
2
- VERSION = "0.3.0"
3
- HOMEPAGE = "https://github.com/taganaka/polipus"
4
- end
2
+ VERSION = '0.3.1'
3
+ HOMEPAGE = 'https://github.com/taganaka/polipus'
4
+ end
data/polipus.gemspec CHANGED
@@ -1,25 +1,25 @@
1
1
  # -*- encoding: utf-8 -*-
2
- $:.push File.expand_path("../lib", __FILE__)
3
- require "polipus/version"
2
+ $LOAD_PATH.push File.expand_path('../lib', __FILE__)
3
+ require 'polipus/version'
4
4
 
5
5
  Gem::Specification.new do |s|
6
- s.name = "polipus"
6
+ s.name = 'polipus'
7
7
  s.version = Polipus::VERSION
8
- s.authors = ["Francesco Laurita"]
9
- s.email = ["francesco.laurita@gmail.com"]
8
+ s.authors = ['Francesco Laurita']
9
+ s.email = ['francesco.laurita@gmail.com']
10
10
  s.homepage = Polipus::HOMEPAGE
11
- s.summary = %q{Polipus distributed web-crawler framework}
12
- s.description = %q{
11
+ s.summary = %q(Polipus distributed web-crawler framework)
12
+ s.description = %q(
13
13
  An easy to use distributed web-crawler framework based on Redis
14
- }
15
- s.licenses = ["MIT"]
14
+ )
15
+ s.licenses = ['MIT']
16
16
 
17
- s.rubyforge_project = "polipus"
17
+ s.rubyforge_project = 'polipus'
18
18
 
19
19
  s.files = `git ls-files`.split("\n")
20
20
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
- s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
22
- s.require_paths = ["lib"]
21
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
22
+ s.require_paths = ['lib']
23
23
 
24
24
  s.add_runtime_dependency 'redis-bloomfilter', '~> 0.0', '>= 0.0.1'
25
25
  s.add_runtime_dependency 'redis-queue', '~> 0.0', '>= 0.0.3'
@@ -43,5 +43,4 @@ Gem::Specification.new do |s|
43
43
  s.add_development_dependency 'rake', '~> 10.3', '>= 10.3.2'
44
44
  s.add_development_dependency 'coveralls'
45
45
 
46
-
47
46
  end
data/spec/clear.rb CHANGED
@@ -1,4 +1,4 @@
1
- require "yaml"
1
+ require 'yaml'
2
2
  Dir.glob('./cassettes/*.yml').each do|f|
3
3
  next unless f =~ /[a-f0-9]{32}/
4
4
  d = YAML.load_file(f)
@@ -6,6 +6,6 @@ Dir.glob('./cassettes/*.yml').each do|f|
6
6
  r['request'].delete('headers')
7
7
  r['response'].delete('headers')
8
8
  end
9
- File.open(f, 'w') {|fw| fw.write(d.to_yaml) }
10
- #puts d.to_yaml
9
+ File.open(f, 'w') { |fw| fw.write(d.to_yaml) }
10
+ # puts d.to_yaml
11
11
  end
data/spec/http_spec.rb CHANGED
@@ -1,16 +1,16 @@
1
- require "spec_helper"
2
- require "mongo"
3
- require "polipus/http"
4
- require "polipus/page"
1
+ require 'spec_helper'
2
+ require 'mongo'
3
+ require 'polipus/http'
4
+ require 'polipus/page'
5
5
 
6
6
  describe Polipus::HTTP do
7
-
7
+
8
8
  it 'should download a page' do
9
9
  VCR.use_cassette('http_test') do
10
10
  http = Polipus::HTTP.new
11
- page = http.fetch_page("http://sfbay.craigslist.org/apa/")
11
+ page = http.fetch_page('http://sfbay.craigslist.org/apa/')
12
12
  page.should be_an_instance_of(Polipus::Page)
13
- page.doc.search("title").text.strip.should eq "SF bay area apts/housing for rent classifieds - craigslist"
13
+ page.doc.search('title').text.strip.should eq 'SF bay area apts/housing for rent classifieds - craigslist'
14
14
  page.fetched_at.should_not be_nil
15
15
  page.fetched?.should be_true
16
16
  end
@@ -20,11 +20,11 @@ describe Polipus::HTTP do
20
20
  VCR.use_cassette('http_test_redirect') do
21
21
 
22
22
  http = Polipus::HTTP.new
23
- page = http.fetch_page("http://greenbytes.de/tech/tc/httpredirects/t300bodyandloc.asis")
23
+ page = http.fetch_page('http://greenbytes.de/tech/tc/httpredirects/t300bodyandloc.asis')
24
24
 
25
25
  page.should be_an_instance_of(Polipus::Page)
26
26
  page.code.should be 200
27
- page.url.to_s.should eq "http://greenbytes.de/tech/tc/httpredirects/300.txt"
27
+ page.url.to_s.should eq 'http://greenbytes.de/tech/tc/httpredirects/300.txt'
28
28
  page.body.strip.should eq "You have reached the target\r\nof a 300 redirect."
29
29
  end
30
30
  end
@@ -32,59 +32,58 @@ describe Polipus::HTTP do
32
32
  describe 'proxy settings' do
33
33
 
34
34
  it 'should set proxy correctly using a procedure' do
35
- http = Polipus::HTTP.new({proxy_host: -> con { "127.0.0.0" }, proxy_port: -> con { 8080 }})
36
- http.proxy_host.should eq "127.0.0.0"
35
+ http = Polipus::HTTP.new(proxy_host: -> _con { '127.0.0.0' }, proxy_port: -> _con { 8080 })
36
+ http.proxy_host.should eq '127.0.0.0'
37
37
  http.proxy_port.should be 8080
38
38
  end
39
39
 
40
40
  it 'should set proxy correctly using shorthand method' do
41
- http = Polipus::HTTP.new({proxy_host_port: -> con {["127.0.0.0", 8080] }})
42
- http.proxy_host_port.should eq ["127.0.0.0", 8080]
41
+ http = Polipus::HTTP.new(proxy_host_port: -> _con { ['127.0.0.0', 8080] })
42
+ http.proxy_host_port.should eq ['127.0.0.0', 8080]
43
43
  http.proxy_port.should be 8080
44
- http.proxy_host.should eq "127.0.0.0"
44
+ http.proxy_host.should eq '127.0.0.0'
45
45
  end
46
46
 
47
47
  it 'should set proxy settings' do
48
- http = Polipus::HTTP.new({proxy_host: "127.0.0.0", proxy_port: 8080 })
48
+ http = Polipus::HTTP.new(proxy_host: '127.0.0.0', proxy_port: 8080)
49
49
  http.proxy_port.should be 8080
50
- http.proxy_host.should eq "127.0.0.0"
50
+ http.proxy_host.should eq '127.0.0.0'
51
51
  end
52
52
 
53
53
  end
54
54
 
55
-
56
55
  describe 'compressed content handling' do
57
56
 
58
57
  it 'should decode gzip content' do
59
58
  VCR.use_cassette('gzipped_on') do
60
59
  http = Polipus::HTTP.new(logger: Logger.new(STDOUT))
61
- page = http.fetch_page("http://www.whatsmyip.org/http-compression-test/")
60
+ page = http.fetch_page('http://www.whatsmyip.org/http-compression-test/')
62
61
  page.doc.css('.gzip_yes').should_not be_empty
63
62
  end
64
63
  end
65
64
 
66
65
  it 'should decode deflate content' do
67
66
  http = Polipus::HTTP.new(logger: Logger.new(STDOUT))
68
- page = http.fetch_page("http://david.fullrecall.com/browser-http-compression-test?compression=deflate-http")
67
+ page = http.fetch_page('http://david.fullrecall.com/browser-http-compression-test?compression=deflate-http')
69
68
  page.headers.fetch('content-encoding').first.should eq 'deflate'
70
- page.body.include?("deflate-http").should be_true
69
+ page.body.include?('deflate-http').should be_true
71
70
  end
72
71
 
73
72
  end
74
73
 
75
74
  describe 'staled connections' do
76
-
75
+
77
76
  it 'should refresh a staled connection' do
78
77
  VCR.use_cassette('http_tconnection_max_hits') do
79
78
  http = Polipus::HTTP.new(connection_max_hits: 1, logger: Logger.new(STDOUT))
80
79
  http.class.__send__(:attr_reader, :connections)
81
80
  http.class.__send__(:attr_reader, :connections_hits)
82
- http.fetch_page("https://www.yahoo.com/")
81
+ http.fetch_page('https://www.yahoo.com/')
83
82
  http.connections['www.yahoo.com'][443].should_not be_nil
84
83
  old_conn = http.connections['www.yahoo.com'][443]
85
84
  http.connections_hits['www.yahoo.com'][443].should be 1
86
85
 
87
- http.fetch_page("https://www.yahoo.com/tech/expectant-parents-asked-the-internet-to-name-their-83416450388.html")
86
+ http.fetch_page('https://www.yahoo.com/tech/expectant-parents-asked-the-internet-to-name-their-83416450388.html')
88
87
  http.connections_hits['www.yahoo.com'][443].should be 1
89
88
  http.connections['www.yahoo.com'][443].should_not be old_conn
90
89
  end
@@ -97,9 +96,9 @@ describe Polipus::HTTP do
97
96
  it 'should handle cookies correctly' do
98
97
  VCR.use_cassette('http_cookies') do
99
98
  http = Polipus::HTTP.new(accept_cookies: true)
100
- http.fetch_page "http://www.whatarecookies.com/cookietest.asp"
99
+ http.fetch_page 'http://www.whatarecookies.com/cookietest.asp'
101
100
  http.accept_cookies?.should be_true
102
- http.cookie_jar.cookies(URI("http://www.whatarecookies.com/cookietest.asp")).should_not be_empty
101
+ http.cookie_jar.cookies(URI('http://www.whatarecookies.com/cookietest.asp')).should_not be_empty
103
102
  end
104
103
  end
105
104
 
@@ -108,10 +107,10 @@ describe Polipus::HTTP do
108
107
  describe 'net errors' do
109
108
  it 'should handle net errors correctly' do
110
109
  VCR.use_cassette('http_errors') do
111
- http = Polipus::HTTP.new(open_timeout:1, read_timeout: 1)
112
- http.fetch_page("http://www.wrong-domain.lol/").error.should_not be_nil
110
+ http = Polipus::HTTP.new(open_timeout: 1, read_timeout: 1)
111
+ http.fetch_page('http://www.wrong-domain.lol/').error.should_not be_nil
113
112
  end
114
113
  end
115
114
  end
116
115
 
117
- end
116
+ end
data/spec/page_spec.rb CHANGED
@@ -1,5 +1,5 @@
1
- require "spec_helper"
2
- require "polipus/page"
1
+ require 'spec_helper'
2
+ require 'polipus/page'
3
3
 
4
4
  describe Polipus::Page do
5
5
  let(:page) do
@@ -14,29 +14,29 @@ describe Polipus::Page do
14
14
  </body>
15
15
  </html>
16
16
  EOF
17
- Polipus::Page.new 'http://www.google.com/',
18
- code: 200,
19
- body: body,
20
- headers: {'content-type' => ['text/html']},
21
- domain_aliases: %w(www.google.com google.com)
17
+ Polipus::Page.new 'http://www.google.com/',
18
+ code: 200,
19
+ body: body,
20
+ headers: { 'content-type' => ['text/html'] },
21
+ domain_aliases: %w(www.google.com google.com)
22
22
  end
23
23
 
24
24
  it 'should be fetched' do
25
25
  page.fetched?.should be_true
26
26
  end
27
-
27
+
28
28
  it 'should honor domain_aliases attribute' do
29
29
  page.links.count.should be 4
30
30
  end
31
31
 
32
32
  context 'page expiring' do
33
33
  let(:page) do
34
- Polipus::Page.new 'http://www.google.com/',
35
- code: 200,
36
- body: '',
37
- headers: {'content-type' => ['text/html']},
38
- domain_aliases: %w(www.google.com google.com),
39
- fetched_at: (Time.now.to_i - 30)
34
+ Polipus::Page.new 'http://www.google.com/',
35
+ code: 200,
36
+ body: '',
37
+ headers: { 'content-type' => ['text/html'] },
38
+ domain_aliases: %w(www.google.com google.com),
39
+ fetched_at: (Time.now.to_i - 30)
40
40
  end
41
41
 
42
42
  it 'should be marked at expired' do
@@ -49,12 +49,12 @@ EOF
49
49
  end
50
50
 
51
51
  context 'page error' do
52
-
52
+
53
53
  let(:page) do
54
54
  Polipus::Page.new 'http://www.google.com/', error: 'an error'
55
55
  end
56
56
 
57
- it 'should serialize an error' do
57
+ it 'should serialize an error' do
58
58
  page.to_hash['error'].should eq 'an error'
59
59
  end
60
60
 
data/spec/polipus_spec.rb CHANGED
@@ -1,11 +1,11 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe Polipus::PolipusCrawler do
4
- after(:each) {Redis.new(db:10).flushdb}
5
- let(:p_options) {
4
+ after(:each) { Redis.new(db: 10).flushdb }
5
+ let(:p_options) do
6
6
  {
7
7
  workers: 1,
8
- redis_options: {host: 'localhost', db:10},
8
+ redis_options: { host: 'localhost', db: 10 },
9
9
  depth_limit: 1,
10
10
  queue_timeout: 1,
11
11
  user_agent: 'polipus-rspec',
@@ -13,55 +13,58 @@ describe Polipus::PolipusCrawler do
13
13
  logger_level: Logger::DEBUG,
14
14
  storage: Polipus::Storage.memory_store
15
15
  }
16
- }
17
- let(:polipus) {
18
- Polipus::PolipusCrawler.new("polipus-rspec", ["http://rubygems.org/gems"], p_options)
19
- }
16
+ end
17
+ let(:polipus) do
18
+ Polipus::PolipusCrawler.new('polipus-rspec', ['http://rubygems.org/gems'], p_options)
19
+ end
20
20
 
21
- let(:init_page){
22
- Polipus::Page.new "http://rubygems.org/gems"
23
- }
21
+ let(:init_page)do
22
+ Polipus::Page.new 'http://rubygems.org/gems'
23
+ end
24
24
 
25
- let(:logger){Logger.new(nil)}
25
+ let(:logger) { Logger.new(nil) }
26
26
 
27
- context "polipus" do
27
+ context 'polipus' do
28
28
 
29
- it "should create a polipus instance" do
29
+ it 'should create a polipus instance' do
30
30
  polipus.should be_an_instance_of Polipus::PolipusCrawler
31
31
  end
32
32
 
33
- it "should execute a crawling session" do
33
+ it 'should execute a crawling session' do
34
34
  polipus.takeover
35
35
  polipus.storage.exists?(init_page).should be_true
36
36
  polipus.storage.get(init_page).links.count.should be polipus.storage.count
37
37
  end
38
38
 
39
- it "should filter unwanted urls" do
39
+ it 'should filter unwanted urls' do
40
40
  polipus.skip_links_like(/\/pages\//)
41
41
  polipus.takeover
42
42
  polipus.storage.get(init_page).links
43
43
  .reject { |e| e.path.to_s =~ /\/pages\// }.count.should be polipus.storage.count
44
44
  end
45
45
 
46
- it "should follow only wanted urls" do
46
+ it 'should follow only wanted urls' do
47
47
  polipus.follow_links_like(/\/pages\//)
48
48
  polipus.follow_links_like(/\/gems$/)
49
49
  polipus.takeover
50
50
  polipus.storage.get(init_page).links
51
- .reject { |e| ![/\/pages\//, /\/gems$/].any?{|p| e.path =~ p} }
51
+ .reject { |e| ![/\/pages\//, /\/gems$/].any? { |p| e.path =~ p } }
52
52
  .count.should be polipus.storage.count
53
53
  end
54
54
 
55
- it "should refresh expired pages" do
55
+ it 'should refresh expired pages' do
56
56
  polipus.ttl_page = 3600
57
57
  polipus.takeover
58
- polipus.storage.each {|id, page| page.fetched_at = page.fetched_at - 3600; polipus.storage.add(page)}
59
- polipus.storage.each {|id, page| page.expired?(3600).should be_true}
58
+ polipus.storage.each do |_id, page|
59
+ page.fetched_at = page.fetched_at - 3600
60
+ polipus.storage.add(page)
61
+ end
62
+ polipus.storage.each { |_id, page| page.expired?(3600).should be_true }
60
63
  polipus.takeover
61
- polipus.storage.each {|id, page| page.expired?(3600).should be_false}
64
+ polipus.storage.each { |_id, page| page.expired?(3600).should be_false }
62
65
  end
63
66
 
64
- it "should re-download seeder urls no matter what" do
67
+ it 'should re-download seeder urls no matter what' do
65
68
  cache_hit = {}
66
69
  polipus.follow_links_like(/\/gems$/)
67
70
  polipus.on_page_downloaded do |page|
@@ -70,26 +73,26 @@ describe Polipus::PolipusCrawler do
70
73
  end
71
74
  polipus.takeover
72
75
  polipus.takeover
73
- cache_hit["http://rubygems.org/gems"].should be 2
76
+ cache_hit['http://rubygems.org/gems'].should be 2
74
77
  end
75
78
 
76
- it "should call on_page_error code blocks when a page has error" do
77
- p = Polipus::PolipusCrawler.new("polipus-rspec", ["http://dasd.adad.dom/"], p_options.merge(open_timeout:1, read_timeout: 1))
79
+ it 'should call on_page_error code blocks when a page has error' do
80
+ p = Polipus::PolipusCrawler.new('polipus-rspec', ['http://dasd.adad.dom/'], p_options.merge(open_timeout: 1, read_timeout: 1))
78
81
  a_page = nil
79
- p.on_page_error {|page| a_page = page}
82
+ p.on_page_error { |page| a_page = page }
80
83
  p.takeover
81
84
  a_page.should_not be_nil
82
85
  a_page.error.should_not be_nil
83
86
  end
84
87
 
85
- it "should obey to the robots.txt file" do
88
+ it 'should obey to the robots.txt file' do
86
89
  lopt = p_options
87
90
  lopt[:obey_robots_txt] = true
88
- polipus = Polipus::PolipusCrawler.new("polipus-rspec", ["https://rubygems.org/gems/polipus"], lopt)
91
+ polipus = Polipus::PolipusCrawler.new('polipus-rspec', ['https://rubygems.org/gems/polipus'], lopt)
89
92
  polipus.depth_limit = 1
90
93
  polipus.takeover
91
- polipus.storage.each {|id, page| (page.url.path =~ /$\/downloads\//).should be_false}
94
+ polipus.storage.each { |_id, page| (page.url.path =~ /$\/downloads\//).should be_false }
92
95
  end
93
96
 
94
97
  end
95
- end
98
+ end
@@ -1,20 +1,22 @@
1
- require "spec_helper"
2
- require "polipus/queue_overflow"
3
- require "redis-queue"
1
+ require 'spec_helper'
2
+ require 'mongo'
3
+ require 'polipus/queue_overflow'
4
+ require 'redis-queue'
4
5
 
5
6
  describe Polipus::QueueOverflow::Manager do
6
7
  before(:all) do
7
- @mongo = Mongo::Connection.new("localhost", 27017, :pool_size => 15, :pool_timeout => 5).db('_test_polipus')
8
+ @mongo = Mongo::Connection.new('localhost', 27_017, pool_size: 15, pool_timeout: 5).db('_test_polipus')
8
9
  @mongo['_test_pages'].drop
9
10
  @storage = Polipus::Storage.mongo_store(@mongo, '_test_pages')
10
- @redis_q = Redis::Queue.new("queue_test","bp_queue_test", :redis => Redis.new())
11
- @queue_overflow = Polipus::QueueOverflow.mongo_queue(nil, "queue_test")
11
+ @redis_q = Redis::Queue.new('queue_test', 'bp_queue_test', redis: Redis.new)
12
+ @queue_overflow = Polipus::QueueOverflow.mongo_queue(nil, 'queue_test')
12
13
  @redis = Redis.new
13
- @polipus = flexmock("polipus")
14
+ @polipus = flexmock('polipus')
14
15
  @polipus.should_receive(:queue_overflow_adapter).and_return(@queue_overflow)
15
16
  @polipus.should_receive(:storage).and_return(@storage)
16
17
  @polipus.should_receive(:redis).and_return(@redis)
17
- @polipus.should_receive(:job_name).and_return("___test")
18
+ @polipus.should_receive(:job_name).and_return('___test')
19
+ @polipus.should_receive(:logger).and_return(Logger.new(nil))
18
20
  @manager = Polipus::QueueOverflow::Manager.new(@polipus, @redis_q, 10)
19
21
  end
20
22
 
@@ -30,16 +32,16 @@ describe Polipus::QueueOverflow::Manager do
30
32
  end
31
33
 
32
34
  it 'should remove 10 items' do
33
- @manager.perform.should be == [0,0]
34
- 20.times {|i| @redis_q << page_factory("http://www.user-doo.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
35
+ @manager.perform.should be == [0, 0]
36
+ 20.times { |i| @redis_q << page_factory("http://www.user-doo.com/page_#{i}", code: 200, body: '<html></html>').to_json }
35
37
  @manager.perform.should be == [10, 0]
36
38
  @queue_overflow.size.should be == 10
37
39
  @redis_q.size.should be == 10
38
40
  end
39
41
 
40
42
  it 'should restore 10 items' do
41
- @manager.perform.should be == [0,0]
42
- 10.times {|i| @queue_overflow << page_factory("http://www.user-doo-bla.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
43
+ @manager.perform.should be == [0, 0]
44
+ 10.times { |i| @queue_overflow << page_factory("http://www.user-doo-bla.com/page_#{i}", code: 200, body: '<html></html>').to_json }
43
45
  @manager.perform.should be == [0, 10]
44
46
  @queue_overflow.size.should be == 0
45
47
  @redis_q.size.should be == 10
@@ -48,45 +50,45 @@ describe Polipus::QueueOverflow::Manager do
48
50
  end
49
51
 
50
52
  it 'should restore 3 items' do
51
-
52
- @manager.perform.should be == [0,0]
53
- 3.times {|i| @queue_overflow << page_factory("http://www.user-doo-bu.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
53
+
54
+ @manager.perform.should be == [0, 0]
55
+ 3.times { |i| @queue_overflow << page_factory("http://www.user-doo-bu.com/page_#{i}", code: 200, body: '<html></html>').to_json }
54
56
  @manager.perform.should be == [0, 3]
55
57
  @queue_overflow.size.should be == 0
56
58
  @redis_q.size.should be == 3
57
59
  @manager.perform.should be == [0, 0]
58
-
60
+
59
61
  end
60
62
 
61
63
  it 'should restore 0 items' do
62
-
63
- @manager.perform.should be == [0,0]
64
- 10.times {|i|
65
- p = page_factory("http://www.user-doo-bu.com/page_#{i}", :code => 200, :body => '<html></html>')
64
+
65
+ @manager.perform.should be == [0, 0]
66
+ 10.times do|i|
67
+ p = page_factory("http://www.user-doo-bu.com/page_#{i}", code: 200, body: '<html></html>')
66
68
  @storage.add p
67
- @queue_overflow << p.to_json
68
- }
69
+ @queue_overflow << p.to_json
70
+ end
69
71
  @manager.perform.should be == [0, 0]
70
72
  @queue_overflow.size.should be == 0
71
73
  @redis_q.size.should be == 0
72
74
  @manager.perform.should be == [0, 0]
73
-
75
+
74
76
  end
75
77
 
76
78
  it 'should filter an url based on the spec' do
77
79
  @queue_overflow.clear
78
80
  @redis_q.clear
79
- 10.times {|i| @queue_overflow << page_factory("http://www.user-doo.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
81
+ 10.times { |i| @queue_overflow << page_factory("http://www.user-doo.com/page_#{i}", code: 200, body: '<html></html>').to_json }
80
82
  @manager.url_filter do |page|
81
- page.url.to_s.end_with?("page_0") ? false : true
83
+ page.url.to_s.end_with?('page_0') ? false : true
82
84
  end
83
- @manager.perform.should be == [0,9]
85
+ @manager.perform.should be == [0, 9]
84
86
  @queue_overflow.size.should be == 0
85
87
  @redis_q.size.should be == 9
86
- @manager.url_filter do |page|
88
+ @manager.url_filter do |_page|
87
89
  true
88
90
  end
89
91
 
90
92
  end
91
93
 
92
- end
94
+ end