polipus 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +8 -8
  2. data/.rubocop.yml +17 -0
  3. data/.rubocop_todo.yml +37 -0
  4. data/.travis.yml +2 -1
  5. data/CHANGELOG.md +20 -0
  6. data/README.md +10 -0
  7. data/Rakefile +4 -4
  8. data/examples/basic.rb +16 -19
  9. data/examples/incremental.rb +17 -17
  10. data/examples/robots_txt_handling.rb +1 -1
  11. data/examples/survival.rb +3 -3
  12. data/lib/polipus.rb +186 -229
  13. data/lib/polipus/http.rb +41 -42
  14. data/lib/polipus/page.rb +33 -34
  15. data/lib/polipus/plugin.rb +2 -2
  16. data/lib/polipus/plugins/cleaner.rb +7 -8
  17. data/lib/polipus/plugins/sample.rb +6 -9
  18. data/lib/polipus/plugins/sleeper.rb +7 -8
  19. data/lib/polipus/queue_overflow.rb +11 -11
  20. data/lib/polipus/queue_overflow/base.rb +1 -1
  21. data/lib/polipus/queue_overflow/dev_null_queue.rb +9 -9
  22. data/lib/polipus/queue_overflow/manager.rb +28 -25
  23. data/lib/polipus/queue_overflow/mongo_queue.rb +24 -26
  24. data/lib/polipus/queue_overflow/mongo_queue_capped.rb +12 -12
  25. data/lib/polipus/robotex.rb +41 -51
  26. data/lib/polipus/signal_handler.rb +41 -0
  27. data/lib/polipus/storage.rb +11 -11
  28. data/lib/polipus/storage/base.rb +10 -8
  29. data/lib/polipus/storage/dev_null.rb +6 -7
  30. data/lib/polipus/storage/memory_store.rb +21 -22
  31. data/lib/polipus/storage/mongo_store.rb +34 -38
  32. data/lib/polipus/storage/s3_store.rb +33 -38
  33. data/lib/polipus/url_tracker.rb +3 -3
  34. data/lib/polipus/url_tracker/bloomfilter.rb +4 -5
  35. data/lib/polipus/url_tracker/redis_set.rb +3 -4
  36. data/lib/polipus/version.rb +3 -3
  37. data/polipus.gemspec +12 -13
  38. data/spec/clear.rb +3 -3
  39. data/spec/http_spec.rb +27 -28
  40. data/spec/page_spec.rb +16 -16
  41. data/spec/polipus_spec.rb +34 -31
  42. data/spec/queue_overflow_manager_spec.rb +30 -28
  43. data/spec/queue_overflow_spec.rb +15 -15
  44. data/spec/robotex_spec.rb +9 -10
  45. data/spec/signal_handler_spec.rb +18 -0
  46. data/spec/spec_helper.rb +7 -6
  47. data/spec/storage_memory_spec.rb +18 -18
  48. data/spec/storage_mongo_spec.rb +19 -19
  49. data/spec/storage_s3_spec.rb +30 -31
  50. data/spec/url_tracker_spec.rb +7 -7
  51. metadata +7 -2
@@ -1,4 +1,4 @@
1
1
  module Polipus
2
- VERSION = "0.3.0"
3
- HOMEPAGE = "https://github.com/taganaka/polipus"
4
- end
2
+ VERSION = '0.3.1'
3
+ HOMEPAGE = 'https://github.com/taganaka/polipus'
4
+ end
data/polipus.gemspec CHANGED
@@ -1,25 +1,25 @@
1
1
  # -*- encoding: utf-8 -*-
2
- $:.push File.expand_path("../lib", __FILE__)
3
- require "polipus/version"
2
+ $LOAD_PATH.push File.expand_path('../lib', __FILE__)
3
+ require 'polipus/version'
4
4
 
5
5
  Gem::Specification.new do |s|
6
- s.name = "polipus"
6
+ s.name = 'polipus'
7
7
  s.version = Polipus::VERSION
8
- s.authors = ["Francesco Laurita"]
9
- s.email = ["francesco.laurita@gmail.com"]
8
+ s.authors = ['Francesco Laurita']
9
+ s.email = ['francesco.laurita@gmail.com']
10
10
  s.homepage = Polipus::HOMEPAGE
11
- s.summary = %q{Polipus distributed web-crawler framework}
12
- s.description = %q{
11
+ s.summary = %q(Polipus distributed web-crawler framework)
12
+ s.description = %q(
13
13
  An easy to use distributed web-crawler framework based on Redis
14
- }
15
- s.licenses = ["MIT"]
14
+ )
15
+ s.licenses = ['MIT']
16
16
 
17
- s.rubyforge_project = "polipus"
17
+ s.rubyforge_project = 'polipus'
18
18
 
19
19
  s.files = `git ls-files`.split("\n")
20
20
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
21
- s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
22
- s.require_paths = ["lib"]
21
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
22
+ s.require_paths = ['lib']
23
23
 
24
24
  s.add_runtime_dependency 'redis-bloomfilter', '~> 0.0', '>= 0.0.1'
25
25
  s.add_runtime_dependency 'redis-queue', '~> 0.0', '>= 0.0.3'
@@ -43,5 +43,4 @@ Gem::Specification.new do |s|
43
43
  s.add_development_dependency 'rake', '~> 10.3', '>= 10.3.2'
44
44
  s.add_development_dependency 'coveralls'
45
45
 
46
-
47
46
  end
data/spec/clear.rb CHANGED
@@ -1,4 +1,4 @@
1
- require "yaml"
1
+ require 'yaml'
2
2
  Dir.glob('./cassettes/*.yml').each do|f|
3
3
  next unless f =~ /[a-f0-9]{32}/
4
4
  d = YAML.load_file(f)
@@ -6,6 +6,6 @@ Dir.glob('./cassettes/*.yml').each do|f|
6
6
  r['request'].delete('headers')
7
7
  r['response'].delete('headers')
8
8
  end
9
- File.open(f, 'w') {|fw| fw.write(d.to_yaml) }
10
- #puts d.to_yaml
9
+ File.open(f, 'w') { |fw| fw.write(d.to_yaml) }
10
+ # puts d.to_yaml
11
11
  end
data/spec/http_spec.rb CHANGED
@@ -1,16 +1,16 @@
1
- require "spec_helper"
2
- require "mongo"
3
- require "polipus/http"
4
- require "polipus/page"
1
+ require 'spec_helper'
2
+ require 'mongo'
3
+ require 'polipus/http'
4
+ require 'polipus/page'
5
5
 
6
6
  describe Polipus::HTTP do
7
-
7
+
8
8
  it 'should download a page' do
9
9
  VCR.use_cassette('http_test') do
10
10
  http = Polipus::HTTP.new
11
- page = http.fetch_page("http://sfbay.craigslist.org/apa/")
11
+ page = http.fetch_page('http://sfbay.craigslist.org/apa/')
12
12
  page.should be_an_instance_of(Polipus::Page)
13
- page.doc.search("title").text.strip.should eq "SF bay area apts/housing for rent classifieds - craigslist"
13
+ page.doc.search('title').text.strip.should eq 'SF bay area apts/housing for rent classifieds - craigslist'
14
14
  page.fetched_at.should_not be_nil
15
15
  page.fetched?.should be_true
16
16
  end
@@ -20,11 +20,11 @@ describe Polipus::HTTP do
20
20
  VCR.use_cassette('http_test_redirect') do
21
21
 
22
22
  http = Polipus::HTTP.new
23
- page = http.fetch_page("http://greenbytes.de/tech/tc/httpredirects/t300bodyandloc.asis")
23
+ page = http.fetch_page('http://greenbytes.de/tech/tc/httpredirects/t300bodyandloc.asis')
24
24
 
25
25
  page.should be_an_instance_of(Polipus::Page)
26
26
  page.code.should be 200
27
- page.url.to_s.should eq "http://greenbytes.de/tech/tc/httpredirects/300.txt"
27
+ page.url.to_s.should eq 'http://greenbytes.de/tech/tc/httpredirects/300.txt'
28
28
  page.body.strip.should eq "You have reached the target\r\nof a 300 redirect."
29
29
  end
30
30
  end
@@ -32,59 +32,58 @@ describe Polipus::HTTP do
32
32
  describe 'proxy settings' do
33
33
 
34
34
  it 'should set proxy correctly using a procedure' do
35
- http = Polipus::HTTP.new({proxy_host: -> con { "127.0.0.0" }, proxy_port: -> con { 8080 }})
36
- http.proxy_host.should eq "127.0.0.0"
35
+ http = Polipus::HTTP.new(proxy_host: -> _con { '127.0.0.0' }, proxy_port: -> _con { 8080 })
36
+ http.proxy_host.should eq '127.0.0.0'
37
37
  http.proxy_port.should be 8080
38
38
  end
39
39
 
40
40
  it 'should set proxy correctly using shorthand method' do
41
- http = Polipus::HTTP.new({proxy_host_port: -> con {["127.0.0.0", 8080] }})
42
- http.proxy_host_port.should eq ["127.0.0.0", 8080]
41
+ http = Polipus::HTTP.new(proxy_host_port: -> _con { ['127.0.0.0', 8080] })
42
+ http.proxy_host_port.should eq ['127.0.0.0', 8080]
43
43
  http.proxy_port.should be 8080
44
- http.proxy_host.should eq "127.0.0.0"
44
+ http.proxy_host.should eq '127.0.0.0'
45
45
  end
46
46
 
47
47
  it 'should set proxy settings' do
48
- http = Polipus::HTTP.new({proxy_host: "127.0.0.0", proxy_port: 8080 })
48
+ http = Polipus::HTTP.new(proxy_host: '127.0.0.0', proxy_port: 8080)
49
49
  http.proxy_port.should be 8080
50
- http.proxy_host.should eq "127.0.0.0"
50
+ http.proxy_host.should eq '127.0.0.0'
51
51
  end
52
52
 
53
53
  end
54
54
 
55
-
56
55
  describe 'compressed content handling' do
57
56
 
58
57
  it 'should decode gzip content' do
59
58
  VCR.use_cassette('gzipped_on') do
60
59
  http = Polipus::HTTP.new(logger: Logger.new(STDOUT))
61
- page = http.fetch_page("http://www.whatsmyip.org/http-compression-test/")
60
+ page = http.fetch_page('http://www.whatsmyip.org/http-compression-test/')
62
61
  page.doc.css('.gzip_yes').should_not be_empty
63
62
  end
64
63
  end
65
64
 
66
65
  it 'should decode deflate content' do
67
66
  http = Polipus::HTTP.new(logger: Logger.new(STDOUT))
68
- page = http.fetch_page("http://david.fullrecall.com/browser-http-compression-test?compression=deflate-http")
67
+ page = http.fetch_page('http://david.fullrecall.com/browser-http-compression-test?compression=deflate-http')
69
68
  page.headers.fetch('content-encoding').first.should eq 'deflate'
70
- page.body.include?("deflate-http").should be_true
69
+ page.body.include?('deflate-http').should be_true
71
70
  end
72
71
 
73
72
  end
74
73
 
75
74
  describe 'staled connections' do
76
-
75
+
77
76
  it 'should refresh a staled connection' do
78
77
  VCR.use_cassette('http_tconnection_max_hits') do
79
78
  http = Polipus::HTTP.new(connection_max_hits: 1, logger: Logger.new(STDOUT))
80
79
  http.class.__send__(:attr_reader, :connections)
81
80
  http.class.__send__(:attr_reader, :connections_hits)
82
- http.fetch_page("https://www.yahoo.com/")
81
+ http.fetch_page('https://www.yahoo.com/')
83
82
  http.connections['www.yahoo.com'][443].should_not be_nil
84
83
  old_conn = http.connections['www.yahoo.com'][443]
85
84
  http.connections_hits['www.yahoo.com'][443].should be 1
86
85
 
87
- http.fetch_page("https://www.yahoo.com/tech/expectant-parents-asked-the-internet-to-name-their-83416450388.html")
86
+ http.fetch_page('https://www.yahoo.com/tech/expectant-parents-asked-the-internet-to-name-their-83416450388.html')
88
87
  http.connections_hits['www.yahoo.com'][443].should be 1
89
88
  http.connections['www.yahoo.com'][443].should_not be old_conn
90
89
  end
@@ -97,9 +96,9 @@ describe Polipus::HTTP do
97
96
  it 'should handle cookies correctly' do
98
97
  VCR.use_cassette('http_cookies') do
99
98
  http = Polipus::HTTP.new(accept_cookies: true)
100
- http.fetch_page "http://www.whatarecookies.com/cookietest.asp"
99
+ http.fetch_page 'http://www.whatarecookies.com/cookietest.asp'
101
100
  http.accept_cookies?.should be_true
102
- http.cookie_jar.cookies(URI("http://www.whatarecookies.com/cookietest.asp")).should_not be_empty
101
+ http.cookie_jar.cookies(URI('http://www.whatarecookies.com/cookietest.asp')).should_not be_empty
103
102
  end
104
103
  end
105
104
 
@@ -108,10 +107,10 @@ describe Polipus::HTTP do
108
107
  describe 'net errors' do
109
108
  it 'should handle net errors correctly' do
110
109
  VCR.use_cassette('http_errors') do
111
- http = Polipus::HTTP.new(open_timeout:1, read_timeout: 1)
112
- http.fetch_page("http://www.wrong-domain.lol/").error.should_not be_nil
110
+ http = Polipus::HTTP.new(open_timeout: 1, read_timeout: 1)
111
+ http.fetch_page('http://www.wrong-domain.lol/').error.should_not be_nil
113
112
  end
114
113
  end
115
114
  end
116
115
 
117
- end
116
+ end
data/spec/page_spec.rb CHANGED
@@ -1,5 +1,5 @@
1
- require "spec_helper"
2
- require "polipus/page"
1
+ require 'spec_helper'
2
+ require 'polipus/page'
3
3
 
4
4
  describe Polipus::Page do
5
5
  let(:page) do
@@ -14,29 +14,29 @@ describe Polipus::Page do
14
14
  </body>
15
15
  </html>
16
16
  EOF
17
- Polipus::Page.new 'http://www.google.com/',
18
- code: 200,
19
- body: body,
20
- headers: {'content-type' => ['text/html']},
21
- domain_aliases: %w(www.google.com google.com)
17
+ Polipus::Page.new 'http://www.google.com/',
18
+ code: 200,
19
+ body: body,
20
+ headers: { 'content-type' => ['text/html'] },
21
+ domain_aliases: %w(www.google.com google.com)
22
22
  end
23
23
 
24
24
  it 'should be fetched' do
25
25
  page.fetched?.should be_true
26
26
  end
27
-
27
+
28
28
  it 'should honor domain_aliases attribute' do
29
29
  page.links.count.should be 4
30
30
  end
31
31
 
32
32
  context 'page expiring' do
33
33
  let(:page) do
34
- Polipus::Page.new 'http://www.google.com/',
35
- code: 200,
36
- body: '',
37
- headers: {'content-type' => ['text/html']},
38
- domain_aliases: %w(www.google.com google.com),
39
- fetched_at: (Time.now.to_i - 30)
34
+ Polipus::Page.new 'http://www.google.com/',
35
+ code: 200,
36
+ body: '',
37
+ headers: { 'content-type' => ['text/html'] },
38
+ domain_aliases: %w(www.google.com google.com),
39
+ fetched_at: (Time.now.to_i - 30)
40
40
  end
41
41
 
42
42
  it 'should be marked at expired' do
@@ -49,12 +49,12 @@ EOF
49
49
  end
50
50
 
51
51
  context 'page error' do
52
-
52
+
53
53
  let(:page) do
54
54
  Polipus::Page.new 'http://www.google.com/', error: 'an error'
55
55
  end
56
56
 
57
- it 'should serialize an error' do
57
+ it 'should serialize an error' do
58
58
  page.to_hash['error'].should eq 'an error'
59
59
  end
60
60
 
data/spec/polipus_spec.rb CHANGED
@@ -1,11 +1,11 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe Polipus::PolipusCrawler do
4
- after(:each) {Redis.new(db:10).flushdb}
5
- let(:p_options) {
4
+ after(:each) { Redis.new(db: 10).flushdb }
5
+ let(:p_options) do
6
6
  {
7
7
  workers: 1,
8
- redis_options: {host: 'localhost', db:10},
8
+ redis_options: { host: 'localhost', db: 10 },
9
9
  depth_limit: 1,
10
10
  queue_timeout: 1,
11
11
  user_agent: 'polipus-rspec',
@@ -13,55 +13,58 @@ describe Polipus::PolipusCrawler do
13
13
  logger_level: Logger::DEBUG,
14
14
  storage: Polipus::Storage.memory_store
15
15
  }
16
- }
17
- let(:polipus) {
18
- Polipus::PolipusCrawler.new("polipus-rspec", ["http://rubygems.org/gems"], p_options)
19
- }
16
+ end
17
+ let(:polipus) do
18
+ Polipus::PolipusCrawler.new('polipus-rspec', ['http://rubygems.org/gems'], p_options)
19
+ end
20
20
 
21
- let(:init_page){
22
- Polipus::Page.new "http://rubygems.org/gems"
23
- }
21
+ let(:init_page)do
22
+ Polipus::Page.new 'http://rubygems.org/gems'
23
+ end
24
24
 
25
- let(:logger){Logger.new(nil)}
25
+ let(:logger) { Logger.new(nil) }
26
26
 
27
- context "polipus" do
27
+ context 'polipus' do
28
28
 
29
- it "should create a polipus instance" do
29
+ it 'should create a polipus instance' do
30
30
  polipus.should be_an_instance_of Polipus::PolipusCrawler
31
31
  end
32
32
 
33
- it "should execute a crawling session" do
33
+ it 'should execute a crawling session' do
34
34
  polipus.takeover
35
35
  polipus.storage.exists?(init_page).should be_true
36
36
  polipus.storage.get(init_page).links.count.should be polipus.storage.count
37
37
  end
38
38
 
39
- it "should filter unwanted urls" do
39
+ it 'should filter unwanted urls' do
40
40
  polipus.skip_links_like(/\/pages\//)
41
41
  polipus.takeover
42
42
  polipus.storage.get(init_page).links
43
43
  .reject { |e| e.path.to_s =~ /\/pages\// }.count.should be polipus.storage.count
44
44
  end
45
45
 
46
- it "should follow only wanted urls" do
46
+ it 'should follow only wanted urls' do
47
47
  polipus.follow_links_like(/\/pages\//)
48
48
  polipus.follow_links_like(/\/gems$/)
49
49
  polipus.takeover
50
50
  polipus.storage.get(init_page).links
51
- .reject { |e| ![/\/pages\//, /\/gems$/].any?{|p| e.path =~ p} }
51
+ .reject { |e| ![/\/pages\//, /\/gems$/].any? { |p| e.path =~ p } }
52
52
  .count.should be polipus.storage.count
53
53
  end
54
54
 
55
- it "should refresh expired pages" do
55
+ it 'should refresh expired pages' do
56
56
  polipus.ttl_page = 3600
57
57
  polipus.takeover
58
- polipus.storage.each {|id, page| page.fetched_at = page.fetched_at - 3600; polipus.storage.add(page)}
59
- polipus.storage.each {|id, page| page.expired?(3600).should be_true}
58
+ polipus.storage.each do |_id, page|
59
+ page.fetched_at = page.fetched_at - 3600
60
+ polipus.storage.add(page)
61
+ end
62
+ polipus.storage.each { |_id, page| page.expired?(3600).should be_true }
60
63
  polipus.takeover
61
- polipus.storage.each {|id, page| page.expired?(3600).should be_false}
64
+ polipus.storage.each { |_id, page| page.expired?(3600).should be_false }
62
65
  end
63
66
 
64
- it "should re-download seeder urls no matter what" do
67
+ it 'should re-download seeder urls no matter what' do
65
68
  cache_hit = {}
66
69
  polipus.follow_links_like(/\/gems$/)
67
70
  polipus.on_page_downloaded do |page|
@@ -70,26 +73,26 @@ describe Polipus::PolipusCrawler do
70
73
  end
71
74
  polipus.takeover
72
75
  polipus.takeover
73
- cache_hit["http://rubygems.org/gems"].should be 2
76
+ cache_hit['http://rubygems.org/gems'].should be 2
74
77
  end
75
78
 
76
- it "should call on_page_error code blocks when a page has error" do
77
- p = Polipus::PolipusCrawler.new("polipus-rspec", ["http://dasd.adad.dom/"], p_options.merge(open_timeout:1, read_timeout: 1))
79
+ it 'should call on_page_error code blocks when a page has error' do
80
+ p = Polipus::PolipusCrawler.new('polipus-rspec', ['http://dasd.adad.dom/'], p_options.merge(open_timeout: 1, read_timeout: 1))
78
81
  a_page = nil
79
- p.on_page_error {|page| a_page = page}
82
+ p.on_page_error { |page| a_page = page }
80
83
  p.takeover
81
84
  a_page.should_not be_nil
82
85
  a_page.error.should_not be_nil
83
86
  end
84
87
 
85
- it "should obey to the robots.txt file" do
88
+ it 'should obey to the robots.txt file' do
86
89
  lopt = p_options
87
90
  lopt[:obey_robots_txt] = true
88
- polipus = Polipus::PolipusCrawler.new("polipus-rspec", ["https://rubygems.org/gems/polipus"], lopt)
91
+ polipus = Polipus::PolipusCrawler.new('polipus-rspec', ['https://rubygems.org/gems/polipus'], lopt)
89
92
  polipus.depth_limit = 1
90
93
  polipus.takeover
91
- polipus.storage.each {|id, page| (page.url.path =~ /$\/downloads\//).should be_false}
94
+ polipus.storage.each { |_id, page| (page.url.path =~ /$\/downloads\//).should be_false }
92
95
  end
93
96
 
94
97
  end
95
- end
98
+ end
@@ -1,20 +1,22 @@
1
- require "spec_helper"
2
- require "polipus/queue_overflow"
3
- require "redis-queue"
1
+ require 'spec_helper'
2
+ require 'mongo'
3
+ require 'polipus/queue_overflow'
4
+ require 'redis-queue'
4
5
 
5
6
  describe Polipus::QueueOverflow::Manager do
6
7
  before(:all) do
7
- @mongo = Mongo::Connection.new("localhost", 27017, :pool_size => 15, :pool_timeout => 5).db('_test_polipus')
8
+ @mongo = Mongo::Connection.new('localhost', 27_017, pool_size: 15, pool_timeout: 5).db('_test_polipus')
8
9
  @mongo['_test_pages'].drop
9
10
  @storage = Polipus::Storage.mongo_store(@mongo, '_test_pages')
10
- @redis_q = Redis::Queue.new("queue_test","bp_queue_test", :redis => Redis.new())
11
- @queue_overflow = Polipus::QueueOverflow.mongo_queue(nil, "queue_test")
11
+ @redis_q = Redis::Queue.new('queue_test', 'bp_queue_test', redis: Redis.new)
12
+ @queue_overflow = Polipus::QueueOverflow.mongo_queue(nil, 'queue_test')
12
13
  @redis = Redis.new
13
- @polipus = flexmock("polipus")
14
+ @polipus = flexmock('polipus')
14
15
  @polipus.should_receive(:queue_overflow_adapter).and_return(@queue_overflow)
15
16
  @polipus.should_receive(:storage).and_return(@storage)
16
17
  @polipus.should_receive(:redis).and_return(@redis)
17
- @polipus.should_receive(:job_name).and_return("___test")
18
+ @polipus.should_receive(:job_name).and_return('___test')
19
+ @polipus.should_receive(:logger).and_return(Logger.new(nil))
18
20
  @manager = Polipus::QueueOverflow::Manager.new(@polipus, @redis_q, 10)
19
21
  end
20
22
 
@@ -30,16 +32,16 @@ describe Polipus::QueueOverflow::Manager do
30
32
  end
31
33
 
32
34
  it 'should remove 10 items' do
33
- @manager.perform.should be == [0,0]
34
- 20.times {|i| @redis_q << page_factory("http://www.user-doo.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
35
+ @manager.perform.should be == [0, 0]
36
+ 20.times { |i| @redis_q << page_factory("http://www.user-doo.com/page_#{i}", code: 200, body: '<html></html>').to_json }
35
37
  @manager.perform.should be == [10, 0]
36
38
  @queue_overflow.size.should be == 10
37
39
  @redis_q.size.should be == 10
38
40
  end
39
41
 
40
42
  it 'should restore 10 items' do
41
- @manager.perform.should be == [0,0]
42
- 10.times {|i| @queue_overflow << page_factory("http://www.user-doo-bla.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
43
+ @manager.perform.should be == [0, 0]
44
+ 10.times { |i| @queue_overflow << page_factory("http://www.user-doo-bla.com/page_#{i}", code: 200, body: '<html></html>').to_json }
43
45
  @manager.perform.should be == [0, 10]
44
46
  @queue_overflow.size.should be == 0
45
47
  @redis_q.size.should be == 10
@@ -48,45 +50,45 @@ describe Polipus::QueueOverflow::Manager do
48
50
  end
49
51
 
50
52
  it 'should restore 3 items' do
51
-
52
- @manager.perform.should be == [0,0]
53
- 3.times {|i| @queue_overflow << page_factory("http://www.user-doo-bu.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
53
+
54
+ @manager.perform.should be == [0, 0]
55
+ 3.times { |i| @queue_overflow << page_factory("http://www.user-doo-bu.com/page_#{i}", code: 200, body: '<html></html>').to_json }
54
56
  @manager.perform.should be == [0, 3]
55
57
  @queue_overflow.size.should be == 0
56
58
  @redis_q.size.should be == 3
57
59
  @manager.perform.should be == [0, 0]
58
-
60
+
59
61
  end
60
62
 
61
63
  it 'should restore 0 items' do
62
-
63
- @manager.perform.should be == [0,0]
64
- 10.times {|i|
65
- p = page_factory("http://www.user-doo-bu.com/page_#{i}", :code => 200, :body => '<html></html>')
64
+
65
+ @manager.perform.should be == [0, 0]
66
+ 10.times do|i|
67
+ p = page_factory("http://www.user-doo-bu.com/page_#{i}", code: 200, body: '<html></html>')
66
68
  @storage.add p
67
- @queue_overflow << p.to_json
68
- }
69
+ @queue_overflow << p.to_json
70
+ end
69
71
  @manager.perform.should be == [0, 0]
70
72
  @queue_overflow.size.should be == 0
71
73
  @redis_q.size.should be == 0
72
74
  @manager.perform.should be == [0, 0]
73
-
75
+
74
76
  end
75
77
 
76
78
  it 'should filter an url based on the spec' do
77
79
  @queue_overflow.clear
78
80
  @redis_q.clear
79
- 10.times {|i| @queue_overflow << page_factory("http://www.user-doo.com/page_#{i}", :code => 200, :body => '<html></html>').to_json }
81
+ 10.times { |i| @queue_overflow << page_factory("http://www.user-doo.com/page_#{i}", code: 200, body: '<html></html>').to_json }
80
82
  @manager.url_filter do |page|
81
- page.url.to_s.end_with?("page_0") ? false : true
83
+ page.url.to_s.end_with?('page_0') ? false : true
82
84
  end
83
- @manager.perform.should be == [0,9]
85
+ @manager.perform.should be == [0, 9]
84
86
  @queue_overflow.size.should be == 0
85
87
  @redis_q.size.should be == 9
86
- @manager.url_filter do |page|
88
+ @manager.url_filter do |_page|
87
89
  true
88
90
  end
89
91
 
90
92
  end
91
93
 
92
- end
94
+ end