polipus 0.3.7 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +8 -8
  2. data/.rspec +1 -1
  3. data/.rubocop.yml +3 -3
  4. data/.rubocop_todo.yml +1 -1
  5. data/.travis.yml +14 -4
  6. data/AUTHORS.md +1 -0
  7. data/CHANGELOG.md +9 -1
  8. data/Gemfile +9 -0
  9. data/README.md +2 -3
  10. data/Rakefile +1 -3
  11. data/examples/basic.rb +8 -1
  12. data/lib/polipus.rb +25 -13
  13. data/lib/polipus/queue_overflow.rb +1 -0
  14. data/lib/polipus/queue_overflow/manager.rb +1 -0
  15. data/lib/polipus/queue_overflow/mongo_queue.rb +1 -1
  16. data/lib/polipus/queue_overflow/worker.rb +24 -0
  17. data/lib/polipus/storage.rb +10 -16
  18. data/lib/polipus/storage/mongo_store.rb +6 -1
  19. data/lib/polipus/storage/rethink_store.rb +90 -0
  20. data/lib/polipus/version.rb +1 -1
  21. data/polipus.gemspec +16 -18
  22. data/spec/{http_spec.rb → polipus/http_spec.rb} +26 -37
  23. data/spec/{page_spec.rb → polipus/page_spec.rb} +7 -11
  24. data/spec/{queue_overflow_manager_spec.rb → polipus/queue_overflow/manager_spec.rb} +22 -29
  25. data/spec/{queue_overflow_spec.rb → polipus/queue_overflow_spec.rb} +14 -20
  26. data/spec/{robotex_spec.rb → polipus/robotex_spec.rb} +10 -11
  27. data/spec/{signal_handler_spec.rb → polipus/signal_handler_spec.rb} +2 -6
  28. data/spec/{storage_memory_spec.rb → polipus/storage/memory_store_spec.rb} +18 -21
  29. data/spec/{storage_mongo_spec.rb → polipus/storage/mongo_store_spec.rb} +23 -25
  30. data/spec/polipus/storage/rethink_store_spec.rb +117 -0
  31. data/spec/{url_tracker_spec.rb → polipus/url_tracker_spec.rb} +4 -4
  32. data/spec/polipus_spec.rb +13 -15
  33. data/spec/spec_helper.rb +13 -12
  34. metadata +76 -154
  35. data/lib/polipus/storage/s3_store.rb +0 -96
  36. data/spec/cassettes/08b228db424a926e1ed6ab63b38d847e.yml +0 -166
  37. data/spec/cassettes/20aa41f181b49f00078c3ca30bad5afe.yml +0 -166
  38. data/spec/cassettes/4640919145753505af2d0f8423de37f3.yml +0 -270
  39. data/spec/cassettes/66aae15a03f4aab8efd15e40d2d7882a.yml +0 -194
  40. data/spec/cassettes/76b7c197c95a5bf9b1e882c567192d72.yml +0 -183
  41. data/spec/cassettes/9b1d523b7f5db7214f8a8bd9272cccba.yml +0 -221
  42. data/spec/cassettes/ab333f89535a2efb284913fede6aa7c7.yml +0 -221
  43. data/spec/cassettes/ae5d7cffde3f53122cdf79f3d1367e8e.yml +0 -221
  44. data/spec/cassettes/ffe3d588b6df4b9de35e5a7ccaf5a81b.yml +0 -695
  45. data/spec/storage_s3_spec.rb +0 -115
@@ -1,5 +1,5 @@
1
1
  # encoding: UTF-8
2
2
  module Polipus
3
- VERSION = '0.3.7'
3
+ VERSION = '0.4.0'
4
4
  HOMEPAGE = 'https://github.com/taganaka/polipus'
5
5
  end
@@ -13,6 +13,7 @@ Gem::Specification.new do |s|
13
13
  An easy to use distributed web-crawler framework based on Redis
14
14
  )
15
15
  s.licenses = ['MIT']
16
+ s.platform = Gem::Platform::RUBY
16
17
 
17
18
  s.rubyforge_project = 'polipus'
18
19
 
@@ -21,26 +22,23 @@ Gem::Specification.new do |s|
21
22
  s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
22
23
  s.require_paths = ['lib']
23
24
 
24
- s.add_runtime_dependency 'redis-bloomfilter', '~> 0.0', '>= 0.0.1'
25
- s.add_runtime_dependency 'redis-queue', '~> 0.0', '>= 0.0.3'
26
25
  s.add_runtime_dependency 'nokogiri', '~> 1.6', '>= 1.6.0'
27
- s.add_runtime_dependency 'hiredis', '~> 0.4', '>= 0.4.5'
28
- s.add_runtime_dependency 'redis', '~> 3.0', '>= 3.0.4'
29
- s.add_runtime_dependency 'mongo', '~> 1.9.0', '>= 1.9.2'
30
-
31
- if defined?(JRUBY_VERSION)
32
- s.add_runtime_dependency 'bson', '~> 1.9', '>= 1.9.2'
33
- else
34
- s.add_runtime_dependency 'bson_ext', '~> 1.9', '>= 1.9.2'
35
- end
36
- s.add_runtime_dependency 'aws-s3', '~> 0.6', '>= 0.6.3'
37
26
  s.add_runtime_dependency 'http-cookie', '~> 1.0', '>= 1.0.1'
38
27
 
39
- s.add_development_dependency 'rspec', '~> 2.14', '>= 2.14.1'
40
- s.add_development_dependency 'vcr', '~> 2.5', '>= 2.5.0'
41
- s.add_development_dependency 'webmock', '>= 1.8.0', '< 1.12'
42
- s.add_development_dependency 'flexmock', '~> 1.3', '>= 1.3.2'
43
- s.add_development_dependency 'rake', '~> 10.3', '>= 10.3.2'
44
- s.add_development_dependency 'coveralls'
28
+ s.add_runtime_dependency 'redis', '~> 3.0', '>= 3.0.4'
29
+ s.add_runtime_dependency 'hiredis', '~> 0.5', '>= 0.4.5'
30
+ s.add_runtime_dependency 'redis-queue', '~> 0.0', '>= 0.0.4'
31
+ s.add_runtime_dependency 'redis-bloomfilter', '~> 0.0', '>= 0.0.3'
32
+
33
+ s.add_development_dependency 'mongo', '~>1.11.0'
34
+ s.add_development_dependency 'rethinkdb', '~>1.15.0'
45
35
 
36
+ s.add_development_dependency 'rake', '~> 10.3'
37
+ s.add_development_dependency 'rspec', '~> 3.1.0'
38
+ s.add_development_dependency 'flexmock', '~> 1.3'
39
+
40
+ s.add_development_dependency 'vcr', '~> 2.9.0'
41
+ s.add_development_dependency 'webmock', '~> 1.20.0'
42
+
43
+ s.add_development_dependency 'coveralls'
46
44
  end
@@ -5,118 +5,107 @@ require 'polipus/http'
5
5
  require 'polipus/page'
6
6
 
7
7
  describe Polipus::HTTP do
8
-
9
8
  it 'should download a page' do
10
9
  VCR.use_cassette('http_test') do
11
10
  http = Polipus::HTTP.new
12
11
  page = http.fetch_page('http://sfbay.craigslist.org/apa/')
13
- page.should be_an_instance_of(Polipus::Page)
14
- page.doc.search('title').text.strip.should eq 'SF bay area apts/housing for rent classifieds - craigslist'
15
- page.fetched_at.should_not be_nil
16
- page.fetched?.should be_true
12
+ expect(page).to be_an_instance_of(Polipus::Page)
13
+ expect(page.doc.search('title').text.strip).to eq 'SF bay area apts/housing for rent classifieds - craigslist'
14
+ expect(page.fetched_at).not_to be_nil
15
+ expect(page.fetched?).to be_truthy
17
16
  end
18
17
  end
19
18
 
20
19
  it 'should follow a redirect' do
21
20
  VCR.use_cassette('http_test_redirect') do
22
-
23
21
  http = Polipus::HTTP.new
24
22
  page = http.fetch_page('http://greenbytes.de/tech/tc/httpredirects/t300bodyandloc.asis')
25
23
 
26
- page.should be_an_instance_of(Polipus::Page)
27
- page.code.should be 200
28
- page.url.to_s.should eq 'http://greenbytes.de/tech/tc/httpredirects/300.txt'
29
- page.body.strip.should eq "You have reached the target\r\nof a 300 redirect."
24
+ expect(page).to be_an_instance_of(Polipus::Page)
25
+ expect(page.code).to be 200
26
+ expect(page.url.to_s).to eq 'http://greenbytes.de/tech/tc/httpredirects/300.txt'
27
+ expect(page.body.strip).to eq "You have reached the target\r\nof a 300 redirect."
30
28
  end
31
29
  end
32
30
 
33
31
  describe 'proxy settings' do
34
-
35
32
  it 'should set proxy correctly using a procedure' do
36
33
  http = Polipus::HTTP.new(proxy_host: -> _con { '127.0.0.0' }, proxy_port: -> _con { 8080 })
37
- http.proxy_host.should eq '127.0.0.0'
38
- http.proxy_port.should be 8080
34
+ expect(http.proxy_host).to eq '127.0.0.0'
35
+ expect(http.proxy_port).to be 8080
39
36
  end
40
37
 
41
38
  it 'should set proxy correctly using shorthand method' do
42
39
  http = Polipus::HTTP.new(proxy_host_port: -> _con { ['127.0.0.0', 8080] })
43
- http.proxy_host_port.should eq ['127.0.0.0', 8080]
40
+ expect(http.proxy_host_port).to eq ['127.0.0.0', 8080]
44
41
  end
45
42
 
46
43
  it 'should set proxy w/ auth correctly using shorthand method' do
47
44
  http = Polipus::HTTP.new(proxy_host_port: -> _con { ['127.0.0.0', 8080, 'a', 'b'] })
48
- http.proxy_host_port.should eq ['127.0.0.0', 8080, 'a', 'b']
45
+ expect(http.proxy_host_port).to eq ['127.0.0.0', 8080, 'a', 'b']
49
46
  end
50
47
 
51
48
  it 'should set proxy settings' do
52
49
  http = Polipus::HTTP.new(proxy_host: '127.0.0.0', proxy_port: 8080, proxy_user: 'a', proxy_pass: 'b')
53
- http.proxy_port.should be 8080
54
- http.proxy_host.should eq '127.0.0.0'
55
- http.proxy_user.should eq 'a'
56
- http.proxy_pass.should eq 'b'
50
+ expect(http.proxy_port).to be 8080
51
+ expect(http.proxy_host).to eq '127.0.0.0'
52
+ expect(http.proxy_user).to eq 'a'
53
+ expect(http.proxy_pass).to eq 'b'
57
54
  end
58
-
59
55
  end
60
56
 
61
57
  describe 'compressed content handling' do
62
-
63
58
  it 'should decode gzip content' do
64
59
  VCR.use_cassette('gzipped_on') do
65
60
  http = Polipus::HTTP.new(logger: Logger.new(STDOUT))
66
61
  page = http.fetch_page('http://www.whatsmyip.org/http-compression-test/')
67
- page.doc.css('.gzip_yes').should_not be_empty
62
+ expect(page.doc.css('.gzip_yes')).not_to be_empty
68
63
  end
69
64
  end
70
65
 
71
66
  it 'should decode deflate content' do
72
67
  http = Polipus::HTTP.new(logger: Logger.new(STDOUT))
73
68
  page = http.fetch_page('http://david.fullrecall.com/browser-http-compression-test?compression=deflate-http')
74
- page.headers.fetch('content-encoding').first.should eq 'deflate'
75
- page.body.include?('deflate-http').should be_true
69
+ expect(page.headers.fetch('content-encoding').first).to eq 'deflate'
70
+ expect(page.body.include?('deflate-http')).to be_truthy
76
71
  end
77
-
78
72
  end
79
73
 
80
74
  describe 'staled connections' do
81
-
82
75
  it 'should refresh a staled connection' do
83
76
  VCR.use_cassette('http_tconnection_max_hits') do
84
77
  http = Polipus::HTTP.new(connection_max_hits: 1, logger: Logger.new(STDOUT))
85
78
  http.class.__send__(:attr_reader, :connections)
86
79
  http.class.__send__(:attr_reader, :connections_hits)
87
80
  http.fetch_page('https://www.yahoo.com/')
88
- http.connections['www.yahoo.com'][443].should_not be_nil
81
+ expect(http.connections['www.yahoo.com'][443]).not_to be_nil
89
82
  old_conn = http.connections['www.yahoo.com'][443]
90
- http.connections_hits['www.yahoo.com'][443].should be 1
83
+ expect(http.connections_hits['www.yahoo.com'][443]).to be 1
91
84
 
92
85
  http.fetch_page('https://www.yahoo.com/tech/expectant-parents-asked-the-internet-to-name-their-83416450388.html')
93
- http.connections_hits['www.yahoo.com'][443].should be 1
94
- http.connections['www.yahoo.com'][443].should_not be old_conn
86
+ expect(http.connections_hits['www.yahoo.com'][443]).to be 1
87
+ expect(http.connections['www.yahoo.com'][443]).not_to be old_conn
95
88
  end
96
89
  end
97
-
98
90
  end
99
91
 
100
92
  describe 'cookies' do
101
-
102
93
  it 'should handle cookies correctly' do
103
94
  VCR.use_cassette('http_cookies') do
104
95
  http = Polipus::HTTP.new(accept_cookies: true)
105
96
  http.fetch_page 'http://www.whatarecookies.com/cookietest.asp'
106
- http.accept_cookies?.should be_true
107
- http.cookie_jar.cookies(URI('http://www.whatarecookies.com/cookietest.asp')).should_not be_empty
97
+ expect(http.accept_cookies?).to be_truthy
98
+ expect(http.cookie_jar.cookies(URI('http://www.whatarecookies.com/cookietest.asp'))).not_to be_empty
108
99
  end
109
100
  end
110
-
111
101
  end
112
102
 
113
103
  describe 'net errors' do
114
104
  it 'should handle net errors correctly' do
115
105
  VCR.use_cassette('http_errors') do
116
106
  http = Polipus::HTTP.new(open_timeout: 1, read_timeout: 1)
117
- http.fetch_page('http://www.wrong-domain.lol/').error.should_not be_nil
107
+ expect(http.fetch_page('http://www.wrong-domain.lol/').error).not_to be_nil
118
108
  end
119
109
  end
120
110
  end
121
-
122
111
  end
@@ -23,11 +23,11 @@ EOF
23
23
  end
24
24
 
25
25
  it 'should be fetched' do
26
- page.fetched?.should be_true
26
+ expect(page.fetched?).to be_truthy
27
27
  end
28
28
 
29
29
  it 'should honor domain_aliases attribute' do
30
- page.links.count.should be 4
30
+ expect(page.links.count).to be 4
31
31
  end
32
32
 
33
33
  context 'page expiring' do
@@ -41,32 +41,28 @@ EOF
41
41
  end
42
42
 
43
43
  it 'should be marked at expired' do
44
- page.expired?(20).should be_true
44
+ expect(page.expired?(20)).to be_truthy
45
45
  end
46
46
 
47
47
  it 'should NOT be marked at expired' do
48
- page.expired?(60).should be_false
48
+ expect(page.expired?(60)).to be_falsey
49
49
  end
50
50
  end
51
51
 
52
52
  context 'page error' do
53
-
54
53
  let(:page) do
55
54
  Polipus::Page.new 'http://www.google.com/', error: 'an error'
56
55
  end
57
56
 
58
57
  it 'should serialize an error' do
59
- page.to_hash['error'].should eq 'an error'
58
+ expect(page.to_hash['error']).to eq 'an error'
60
59
  end
61
-
62
60
  end
63
61
 
64
62
  context 'page code' do
65
63
  it 'should identify HTTPSuccess code' do
66
- Polipus::Page.new('http://www.google.com/', code: 201).success?.should be_true
67
- Polipus::Page.new('http://www.google.com/', code: 404).success?.should be_false
64
+ expect(Polipus::Page.new('http://www.google.com/', code: 201).success?).to be_truthy
65
+ expect(Polipus::Page.new('http://www.google.com/', code: 404).success?).to be_falsey
68
66
  end
69
-
70
67
  end
71
-
72
68
  end
@@ -33,47 +33,42 @@ describe Polipus::QueueOverflow::Manager do
33
33
  end
34
34
 
35
35
  it 'should remove 10 items' do
36
- @manager.perform.should be == [0, 0]
36
+ expect(@manager.perform).to eq([0, 0])
37
37
  20.times { |i| @redis_q << page_factory("http://www.user-doo.com/page_#{i}", code: 200, body: '<html></html>').to_json }
38
- @manager.perform.should be == [10, 0]
39
- @queue_overflow.size.should be == 10
40
- @redis_q.size.should be == 10
38
+ expect(@manager.perform).to eq([10, 0])
39
+ expect(@queue_overflow.size).to eq(10)
40
+ expect(@redis_q.size).to eq(10)
41
41
  end
42
42
 
43
43
  it 'should restore 10 items' do
44
- @manager.perform.should be == [0, 0]
44
+ expect(@manager.perform).to eq([0, 0])
45
45
  10.times { |i| @queue_overflow << page_factory("http://www.user-doo-bla.com/page_#{i}", code: 200, body: '<html></html>').to_json }
46
- @manager.perform.should be == [0, 10]
47
- @queue_overflow.size.should be == 0
48
- @redis_q.size.should be == 10
49
- @manager.perform.should be == [0, 0]
50
-
46
+ expect(@manager.perform).to eq([0, 10])
47
+ expect(@queue_overflow.size).to eq(0)
48
+ expect(@redis_q.size).to eq(10)
49
+ expect(@manager.perform).to eq([0, 0])
51
50
  end
52
51
 
53
52
  it 'should restore 3 items' do
54
-
55
- @manager.perform.should be == [0, 0]
53
+ expect(@manager.perform).to eq([0, 0])
56
54
  3.times { |i| @queue_overflow << page_factory("http://www.user-doo-bu.com/page_#{i}", code: 200, body: '<html></html>').to_json }
57
- @manager.perform.should be == [0, 3]
58
- @queue_overflow.size.should be == 0
59
- @redis_q.size.should be == 3
60
- @manager.perform.should be == [0, 0]
61
-
55
+ expect(@manager.perform).to eq([0, 3])
56
+ expect(@queue_overflow.size).to eq(0)
57
+ expect(@redis_q.size).to eq(3)
58
+ expect(@manager.perform).to eq([0, 0])
62
59
  end
63
60
 
64
61
  it 'should restore 0 items' do
65
-
66
- @manager.perform.should be == [0, 0]
62
+ expect(@manager.perform).to eq([0, 0])
67
63
  10.times do|i|
68
64
  p = page_factory("http://www.user-doo-bu.com/page_#{i}", code: 200, body: '<html></html>')
69
65
  @storage.add p
70
66
  @queue_overflow << p.to_json
71
67
  end
72
- @manager.perform.should be == [0, 0]
73
- @queue_overflow.size.should be == 0
74
- @redis_q.size.should be == 0
75
- @manager.perform.should be == [0, 0]
76
-
68
+ expect(@manager.perform).to eq([0, 0])
69
+ expect(@queue_overflow.size).to eq(0)
70
+ expect(@redis_q.size).to eq(0)
71
+ expect(@manager.perform).to eq([0, 0])
77
72
  end
78
73
 
79
74
  it 'should filter an url based on the spec' do
@@ -83,13 +78,11 @@ describe Polipus::QueueOverflow::Manager do
83
78
  @manager.url_filter do |page|
84
79
  page.url.to_s.end_with?('page_0') ? false : true
85
80
  end
86
- @manager.perform.should be == [0, 9]
87
- @queue_overflow.size.should be == 0
88
- @redis_q.size.should be == 9
81
+ expect(@manager.perform).to eq([0, 9])
82
+ expect(@queue_overflow.size).to eq(0)
83
+ expect(@redis_q.size).to eq(9)
89
84
  @manager.url_filter do |_page|
90
85
  true
91
86
  end
92
-
93
87
  end
94
-
95
88
  end
@@ -3,12 +3,10 @@ require 'spec_helper'
3
3
  require 'polipus/queue_overflow'
4
4
 
5
5
  describe Polipus::QueueOverflow do
6
-
7
6
  before(:all) do
8
7
  @queue_overflow = Polipus::QueueOverflow.mongo_queue(nil, 'queue_test')
9
8
  @queue_overflow_capped = Polipus::QueueOverflow.mongo_queue_capped(nil, 'queue_test_c', max: 20)
10
9
  @queue_overflow_uniq = Polipus::QueueOverflow.mongo_queue(nil, 'queue_test_u', ensure_uniq: true)
11
-
12
10
  end
13
11
 
14
12
  before(:each) do
@@ -25,26 +23,24 @@ describe Polipus::QueueOverflow do
25
23
 
26
24
  it 'should work' do
27
25
  [@queue_overflow, @queue_overflow_capped, @queue_overflow_uniq].each do |q|
28
- q.empty?.should be_true
29
- q.pop.should be_nil
26
+ expect(q.empty?).to be_truthy
27
+ expect(q.pop).to be_nil
30
28
  q << 'test'
31
- q.size.should be == 1
32
- q.pop.should be == 'test'
33
- q.empty?.should be_true
34
- q.pop.should be_nil
35
- q.size.should be == 0
36
- q.empty?.should be_true
29
+ expect(q.size).to eq(1)
30
+ expect(q.pop).to eq('test')
31
+ expect(q.empty?).to be_truthy
32
+ expect(q.pop).to be_nil
33
+ expect(q.size).to eq(0)
34
+ expect(q.empty?).to be_truthy
37
35
  end
38
-
39
36
  end
40
37
 
41
38
  it 'should act as a queue' do
42
39
  [@queue_overflow, @queue_overflow_capped, @queue_overflow_uniq].each do |q|
43
40
  10.times { |i| q << "message_#{i}" }
44
- q.size.should be == 10
45
- q.pop.should be == 'message_0'
41
+ expect(q.size).to eq(10)
42
+ expect(q.pop).to eq('message_0')
46
43
  end
47
-
48
44
  end
49
45
 
50
46
  it 'should work with complex paylod' do
@@ -52,21 +48,19 @@ describe Polipus::QueueOverflow do
52
48
  a = { 'a' => [1, 2, 3], 'b' => 'a_string' }
53
49
  q << a.to_json
54
50
  b = q.pop
55
- JSON.parse(b).should be == a
51
+ expect(JSON.parse(b)).to eq(a)
56
52
  end
57
-
58
53
  end
59
54
 
60
55
  it 'should honor max items if it is capped' do
61
56
  30.times { |i| @queue_overflow_capped << "message_#{i}" }
62
- @queue_overflow_capped.size.should be == 20
63
- @queue_overflow_capped.pop.should be == 'message_10'
57
+ expect(@queue_overflow_capped.size).to eq(20)
58
+ expect(@queue_overflow_capped.pop).to eq('message_10')
64
59
  end
65
60
 
66
61
  it 'should contains only unique items' do
67
62
  20.times { @queue_overflow_uniq << 'A' }
68
63
  20.times { @queue_overflow_uniq << 'B' }
69
- @queue_overflow_uniq.size.should be == 2
64
+ expect(@queue_overflow_uniq.size).to eq(2)
70
65
  end
71
-
72
66
  end
@@ -1,6 +1,6 @@
1
- # encoding: UTF-8
2
1
  require 'spec_helper'
3
2
  require 'polipus/robotex'
3
+
4
4
  describe Polipus::Robotex do
5
5
  let(:spec_domain) { 'http://www.example.com/' }
6
6
  before(:each) do
@@ -19,20 +19,20 @@ Disallow: /locked
19
19
  Allow: /locked
20
20
  END
21
21
  stub_request(:get, 'http://www.example.com/robots.txt')
22
- .to_return(body: robots, status: [200, 'OK'], headers: { 'Content-Type' => 'text/plain' })
22
+ .to_return(body: robots, status: [200, 'OK'], headers: { 'Content-Type' => 'text/plain' })
23
23
  end
24
24
 
25
25
  describe '#initialize' do
26
26
  context 'when no arguments are supplied' do
27
27
  it 'returns a Robotex with the default user-agent' do
28
- Polipus::Robotex.new.user_agent.should == "Robotex/#{Polipus::Robotex::VERSION} (http://www.github.com/chriskite/robotex)"
28
+ expect(Polipus::Robotex.new.user_agent).to eq("Robotex/#{Polipus::Robotex::VERSION} (http://www.github.com/chriskite/robotex)")
29
29
  end
30
30
  end
31
31
 
32
32
  context 'when a user-agent is specified' do
33
33
  it 'returns a Robotex with the specified user-agent' do
34
34
  ua = 'My User Agent'
35
- Polipus::Robotex.new(ua).user_agent.should == ua
35
+ expect(Polipus::Robotex.new(ua).user_agent).to eq(ua)
36
36
  end
37
37
  end
38
38
  end
@@ -41,28 +41,28 @@ END
41
41
  context 'when the robots.txt disallows the user-agent to the url' do
42
42
  it 'returns false' do
43
43
  robotex = Polipus::Robotex.new('bender')
44
- robotex.allowed?(spec_domain + 'my_shiny_metal_ass').should be_false
44
+ expect(robotex.allowed?(spec_domain + 'my_shiny_metal_ass')).to be_falsey
45
45
  end
46
46
  end
47
47
 
48
48
  context 'when the robots.txt disallows the user-agent to some urls, but allows this one' do
49
49
  it 'returns true' do
50
50
  robotex = Polipus::Robotex.new('bender')
51
- robotex.allowed?(spec_domain + 'cigars').should be_true
51
+ expect(robotex.allowed?(spec_domain + 'cigars')).to be_truthy
52
52
  end
53
53
  end
54
54
 
55
55
  context 'when the robots.txt disallows any user-agent to the url' do
56
56
  it 'returns false' do
57
57
  robotex = Polipus::Robotex.new
58
- robotex.allowed?(spec_domain + 'login').should be_false
58
+ expect(robotex.allowed?(spec_domain + 'login')).to be_falsey
59
59
  end
60
60
  end
61
61
 
62
62
  context 'when the robots.txt disallows and then allows the url' do
63
63
  it 'returns false' do
64
64
  robotex = Polipus::Robotex.new
65
- robotex.allowed?(spec_domain + 'locked').should be_false
65
+ expect(robotex.allowed?(spec_domain + 'locked')).to be_falsey
66
66
  end
67
67
  end
68
68
  end
@@ -71,16 +71,15 @@ END
71
71
  context 'when no Crawl-Delay is specified for the user-agent' do
72
72
  it 'returns nil' do
73
73
  robotex = Polipus::Robotex.new
74
- robotex.delay(spec_domain).should be_nil
74
+ expect(robotex.delay(spec_domain)).to be_nil
75
75
  end
76
76
 
77
77
  context 'when Crawl-Delay is specified for the user-agent' do
78
78
  it 'returns the delay as a Fixnum' do
79
79
  robotex = Polipus::Robotex.new('msnbot')
80
- robotex.delay(spec_domain).should == 20
80
+ expect(robotex.delay(spec_domain)).to eq(20)
81
81
  end
82
82
  end
83
83
  end
84
84
  end
85
-
86
85
  end