doctor_scrape 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +18 -0
- data/.rbenv-version +1 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/Guardfile +10 -0
- data/LICENSE +22 -0
- data/README.md +78 -0
- data/Rakefile +14 -0
- data/doctor_scrape.gemspec +35 -0
- data/lib/doctor_scrape/data.rb +12 -0
- data/lib/doctor_scrape/redirect_follower.rb +29 -0
- data/lib/doctor_scrape/scraper/base.rb +50 -0
- data/lib/doctor_scrape/scraper/bora.rb +25 -0
- data/lib/doctor_scrape/scraper/diva.rb +16 -0
- data/lib/doctor_scrape/scraper/duo.rb +32 -0
- data/lib/doctor_scrape/scraper/meta.rb +39 -0
- data/lib/doctor_scrape/scraper/unknown.rb +20 -0
- data/lib/doctor_scrape/search.rb +42 -0
- data/lib/doctor_scrape/version.rb +3 -0
- data/lib/doctor_scrape.rb +37 -0
- data/spec/cassettes/brage_bibsys_no_hibo_handle_URN_NBN_no-bibsys_brage_17854.yml +624 -0
- data/spec/cassettes/brage_bibsys_no_nhh_handle_URN_NBN_no-bibsys_brage_22813.yml +535 -0
- data/spec/cassettes/brage_bibsys_no_nhh_handle_URN_NBN_no-bibsys_brage_24121.yml +388 -0
- data/spec/cassettes/https___bora_hib_no_handle_10049_234.yml +429 -0
- data/spec/cassettes/https___bora_hib_no_handle_10049_330.yml +347 -0
- data/spec/cassettes/https___bora_uib_no_handle_1956_3282.yml +682 -0
- data/spec/cassettes/nora_search.yml +795 -0
- data/spec/cassettes/ntnu.diva-portal.org_smash_record.jsf?searchId=1&pid=diva2:122798.yml +398 -0
- data/spec/cassettes/ntnu_diva-portal_org_smash_record_jsf_searchId_1_pid_diva2_122794.yml +398 -0
- data/spec/cassettes/ntnu_diva-portal_org_smash_record_jsf_searchId_1_pid_diva2_122798.yml +487 -0
- data/spec/cassettes/www_duo_uio_no_sok_work_html_WORKID_112975.yml +248 -0
- data/spec/cassettes/www_duo_uio_no_sok_work_html_WORKID_149776.yml +240 -0
- data/spec/cassettes/www_ub_uit_no_munin_handle_10037_3822.yml +602 -0
- data/spec/cassettes/www_ub_uit_no_munin_handle_10037_3826.yml +534 -0
- data/spec/data_spec.rb +22 -0
- data/spec/parse/bibsys_spec.rb +40 -0
- data/spec/parse/bora_hib_spec.rb +29 -0
- data/spec/parse/bora_uib_spec.rb +18 -0
- data/spec/parse/diva_spec.rb +29 -0
- data/spec/parse/duo_spec.rb +29 -0
- data/spec/parse/munin_spec.rb +30 -0
- data/spec/redirect_follower_spec.rb +37 -0
- data/spec/scraper_spec.rb +43 -0
- data/spec/scrapers/base_spec.rb +6 -0
- data/spec/scrapers/bora_spec.rb +6 -0
- data/spec/scrapers/diva_spec.rb +6 -0
- data/spec/scrapers/duo_spec.rb +6 -0
- data/spec/scrapers/meta_spec.rb +6 -0
- data/spec/scrapers/unknown_spec.rb +18 -0
- data/spec/search_spec.rb +111 -0
- data/spec/shared/scraper.rb +62 -0
- data/spec/spec_helper.rb +29 -0
- data/spec/support/setup_scraper.rb +8 -0
- metadata +292 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
require 'spec_helper'
|
|
3
|
+
|
|
4
|
+
describe "ntnu.diva-portal.org" do
|
|
5
|
+
let(:scraper) { DoctorScrape::Scraper::Diva.new url }
|
|
6
|
+
|
|
7
|
+
context "http://ntnu.diva-portal.org/smash/record.jsf?searchId=1&pid=diva2:122798" do
|
|
8
|
+
setup_scraper_for "http://ntnu.diva-portal.org/smash/record.jsf?searchId=1&pid=diva2:122798"
|
|
9
|
+
|
|
10
|
+
its(:title) { should eq "Sporene der hjemme : Om 15-16-åringer og deres hverdagskultur – basert på en undersøkelse om kulturelle og estetiske praksiser i noen utvalgte nordiske ungdomsrom" }
|
|
11
|
+
its(:author) { should eq "Aagre, Willy" }
|
|
12
|
+
its(:issued) { should eq "2006" }
|
|
13
|
+
its(:abstract) { should be_nil }
|
|
14
|
+
its(:permalink) { should eq "http://urn.kb.se/resolve?urn=urn:nbn:no:ntnu:diva-1697" }
|
|
15
|
+
its(:pdf) { should eq "http://ntnu.diva-portal.org/smash/get/diva2:122798/FULLTEXT01" }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
context "http://ntnu.diva-portal.org/smash/record.jsf?searchId=1&pid=diva2:122794" do
|
|
19
|
+
setup_scraper_for "http://ntnu.diva-portal.org/smash/record.jsf?searchId=1&pid=diva2:122794"
|
|
20
|
+
|
|
21
|
+
its(:title) { should eq "Knowledge Management in Software Engineering: A Systematic Review of Studied Concepts and Research Methods Used" }
|
|
22
|
+
its(:author) { should eq "Bjørnson, Finn Olav; Dingsøyr, Torgeir" }
|
|
23
|
+
its(:issued) { should eq "" }
|
|
24
|
+
its(:abstract) { should be_nil }
|
|
25
|
+
its(:permalink) { should eq "http://urn.kb.se/resolve?urn=urn:nbn:no:ntnu:diva-2833" }
|
|
26
|
+
its(:pdf) { should eq "" }
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
require "spec_helper"
|
|
3
|
+
|
|
4
|
+
describe "duo.uio.no" do
|
|
5
|
+
let(:scraper) { DoctorScrape::Scraper::Duo.new url }
|
|
6
|
+
|
|
7
|
+
context "http://www.duo.uio.no/sok/work.html?WORKID=112975" do
|
|
8
|
+
setup_scraper_for "http://www.duo.uio.no/sok/work.html?WORKID=112975"
|
|
9
|
+
|
|
10
|
+
its(:title) { should eq "Meaningful Method Names" }
|
|
11
|
+
its(:author) { should eq "Høst, Einar" }
|
|
12
|
+
its(:issued) { should eq "2011" }
|
|
13
|
+
its(:pdf) { should eq "http://www.duo.uio.no/sok/work.html?WORKID=112975&fid=65890" }
|
|
14
|
+
its(:permalink) { should eq "http://urn.nb.no/URN:NBN:no-27629" }
|
|
15
|
+
its(:abstract) { should match /We build computer programs by creating named abtractions/ }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
context "http://www.duo.uio.no/sok/work.html?WORKID=149776" do
|
|
19
|
+
setup_scraper_for "http://www.duo.uio.no/sok/work.html?WORKID=149776"
|
|
20
|
+
|
|
21
|
+
its(:title) { should eq "CacheCast: a system for efficient single source multiple destination data transfer" }
|
|
22
|
+
its(:author) { should eq "Srebrny, Piotr" }
|
|
23
|
+
its(:issued) { should eq "2011" }
|
|
24
|
+
its(:pdf) { should eq "http://www.duo.uio.no/sok/work.html?WORKID=149776&fid=91631" }
|
|
25
|
+
its(:permalink) { should eq "http://urn.nb.no/URN:NBN:no-30226" }
|
|
26
|
+
its(:abstract) { should match /^The basic function of the Internet is to.*stream to thousands of clients\.$/m }
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
require 'spec_helper'
|
|
3
|
+
|
|
4
|
+
describe "ub.uit.no/munin" do
|
|
5
|
+
let(:scraper) { DoctorScrape::Scraper::Meta.new url }
|
|
6
|
+
|
|
7
|
+
context "http://www.ub.uit.no/munin/handle/10037/3822" do
|
|
8
|
+
setup_scraper_for "http://www.ub.uit.no/munin/handle/10037/3822"
|
|
9
|
+
|
|
10
|
+
its(:title) { should eq "The hidden children of Eve : Sámi poetics guovtti ilmmi gaskkas" }
|
|
11
|
+
its(:author) { should eq "Jernsletten, Kristin (Kikki)" }
|
|
12
|
+
its(:issued) { should eq "2012-02-29" }
|
|
13
|
+
its(:permalink) { should eq "http://hdl.handle.net/10037/3822" }
|
|
14
|
+
its(:pdf) { should eq "http://www.ub.uit.no/munin/bitstream/10037/3822/4/thesis.pdf" }
|
|
15
|
+
its(:isbn) { should be_nil }
|
|
16
|
+
its(:abstract) { should match /^Tesen går ut på at samisk litteraturforståelse og verdensoppfatning.*videre forskning\.$/m }
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
context "http://www.ub.uit.no/munin/handle/10037/3826" do
|
|
20
|
+
setup_scraper_for "http://www.ub.uit.no/munin/handle/10037/3826"
|
|
21
|
+
|
|
22
|
+
its(:title) { should eq "IKT-forbindelser i helsesektoren, Sammenvevinger av IKT, steder, yrker, kjønn og politikk" }
|
|
23
|
+
its(:author) { should eq "Dyb, Kari" }
|
|
24
|
+
its(:issued) { should eq "2011-04-29" }
|
|
25
|
+
its(:permalink) { should eq "http://hdl.handle.net/10037/3826" }
|
|
26
|
+
its(:pdf) { should eq "http://www.ub.uit.no/munin/bitstream/10037/3826/3/paper_4.pdf" }
|
|
27
|
+
its(:abstract) { should match /^Til tross for betydelig politisk satsing.*teknologien blir brukt i sektoren\./m }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
require 'spec_helper'
|
|
3
|
+
|
|
4
|
+
describe DoctorScrape::RedirectFollower do
|
|
5
|
+
let(:url) { "http://bit.ly/foobar" }
|
|
6
|
+
let(:endpoint) { "http://example.com" }
|
|
7
|
+
let(:resolver) { DoctorScrape::RedirectFollower.new url }
|
|
8
|
+
before { stub_request(:any, endpoint).to_return(:body => "You found me!") }
|
|
9
|
+
|
|
10
|
+
context "when url doesn't redirect" do
|
|
11
|
+
before { stub_request(:any, url).to_return(:body => "Ok") }
|
|
12
|
+
specify { resolver.resolve.should eq url }
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
context "when url redirects" do
|
|
16
|
+
before { stub_request(:any, url).to_return(:status => [ 302, "Moved Temporarily" ], headers: { location: endpoint }) }
|
|
17
|
+
specify { resolver.resolve.should eq endpoint }
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
context "too many redirects" do
|
|
21
|
+
before { stub_request(:any, url).to_return(:status => [ 302, "Moved Temporarily" ], headers: { location: url }) }
|
|
22
|
+
|
|
23
|
+
it "raises error after 5 redirects" do
|
|
24
|
+
expect { resolver.resolve }.to raise_error
|
|
25
|
+
a_request(:get, url).should have_been_made.times(5)
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
context "when exception occurs" do
|
|
30
|
+
it "returns the last url" do
|
|
31
|
+
stub_request(:get, url).to_return(:status => [ 302, "Moved Temporarily" ], headers: { location: endpoint })
|
|
32
|
+
stub_request(:get, endpoint).to_raise Net::HTTPBadResponse
|
|
33
|
+
resolver.resolve.should eq(endpoint)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
shared_examples :it_matches do |urls|
|
|
4
|
+
let(:expected_class) { described_class }
|
|
5
|
+
|
|
6
|
+
[urls].flatten.each do |url|
|
|
7
|
+
context url do
|
|
8
|
+
subject { DoctorScrape::Scraper.for url }
|
|
9
|
+
it { should be_a expected_class }
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
describe DoctorScrape::Scraper::Unknown do
|
|
16
|
+
it_behaves_like :it_matches, "http://foo.example.com"
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
describe DoctorScrape::Scraper::Duo do
|
|
20
|
+
it_behaves_like :it_matches, [
|
|
21
|
+
"http://duo.uio.no/sok/work.html?WORKID=1234",
|
|
22
|
+
"http://www.duo.uio.no/sok/work.html?WORKID=1234",
|
|
23
|
+
]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
describe DoctorScrape::Scraper::Diva do
|
|
27
|
+
it_behaves_like :it_matches, [
|
|
28
|
+
"http://ntnu.diva-portal.org/smash/record.jsf?searchId=1&pid=diva2:122798",
|
|
29
|
+
"http://urn.kb.se/resolve?urn=urn:nbn:no:ntnu:diva-15280"
|
|
30
|
+
]
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
describe DoctorScrape::Scraper::Bora do
|
|
34
|
+
it_behaves_like :it_matches, "https://bora.hib.no/handle/10049/330"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
describe DoctorScrape::Scraper::Meta do
|
|
38
|
+
it_behaves_like :it_matches, [
|
|
39
|
+
"http://brage.bibsys.no/nhh/handle/URN:NBN:no-bibsys_brage_24121",
|
|
40
|
+
"http://brage.bibsys.no/hibo/handle/URN:NBN:no-bibsys_brage_17854",
|
|
41
|
+
"http://idtjeneste.nb.no/URN:NBN:no-bibsys_brage_24791"
|
|
42
|
+
]
|
|
43
|
+
end
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
require "spec_helper"
|
|
3
|
+
|
|
4
|
+
describe DoctorScrape::Scraper::Unknown do
|
|
5
|
+
subject { DoctorScrape::Scraper::Unknown.new "http://foobar.com" }
|
|
6
|
+
|
|
7
|
+
its(:errors?) { should be_true }
|
|
8
|
+
its(:errors) { should == ["Don't know how to scrape this page"] }
|
|
9
|
+
|
|
10
|
+
specify { subject.fetch.should be_false }
|
|
11
|
+
specify { subject.scrape.should be_false }
|
|
12
|
+
|
|
13
|
+
context "after scrape" do
|
|
14
|
+
before { subject.scrape }
|
|
15
|
+
its(:errors) { should == [ "Don't know how to scrape this page" ] }
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
end
|
data/spec/search_spec.rb
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
describe DoctorScrape::Search do
|
|
4
|
+
|
|
5
|
+
describe "nora" do
|
|
6
|
+
let(:default_params) do
|
|
7
|
+
{
|
|
8
|
+
"PAGESIZE" => "50",
|
|
9
|
+
"FROM" => "2007",
|
|
10
|
+
"TO" => "2012",
|
|
11
|
+
"SEARCHMODE" => "TOPIC",
|
|
12
|
+
"DOCUMENTTYPES" => "Doctoral thesis",
|
|
13
|
+
"RESULTMODE" => "rss"
|
|
14
|
+
}
|
|
15
|
+
end
|
|
16
|
+
use_vcr_cassette "nora_search"
|
|
17
|
+
let(:url) { "http://www.ub.uio.no/nora/result.html" }
|
|
18
|
+
subject { DoctorScrape::Search.nora }
|
|
19
|
+
|
|
20
|
+
specify { should be_a Array }
|
|
21
|
+
its(:first) { should match /^http:\/\// }
|
|
22
|
+
|
|
23
|
+
context "without options" do
|
|
24
|
+
it "uses default options" do
|
|
25
|
+
stub = stub_request(:get, url).with(:query => default_params)
|
|
26
|
+
DoctorScrape::Search.nora
|
|
27
|
+
stub.should have_been_requested
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it "doesn't resolve redirects" do
|
|
31
|
+
DoctorScrape::Search.should_not_receive(:resolve_urls)
|
|
32
|
+
DoctorScrape::Search.should_not_receive(:resolve_scrapers)
|
|
33
|
+
DoctorScrape::Search.nora
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
context "with options" do
|
|
38
|
+
|
|
39
|
+
# TODO: DRY this up
|
|
40
|
+
|
|
41
|
+
it ":limit changes PAGESIZE" do
|
|
42
|
+
params = default_params.update("PAGESIZE" => "10")
|
|
43
|
+
stub_request(:any, url).with(:query => params)
|
|
44
|
+
DoctorScrape::Search.nora :limit => 10
|
|
45
|
+
a_request(:get, url).with(:query => params).should have_been_requested
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
it ":from changes start year" do
|
|
49
|
+
params = default_params.update("FROM" => "2010")
|
|
50
|
+
stub_request(:any, url).with(:query => params)
|
|
51
|
+
DoctorScrape::Search.nora :from => 2010
|
|
52
|
+
a_request(:get, url).with(:query => params).should have_been_requested
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
it ":to changes end year" do
|
|
56
|
+
params = default_params.update("TO" => "2008")
|
|
57
|
+
stub_request(:any, url).with(:query => params)
|
|
58
|
+
DoctorScrape::Search.nora :to => 2008
|
|
59
|
+
a_request(:get, url).with(:query => params).should have_been_requested
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
it "can be combined" do
|
|
63
|
+
params = default_params.update("TO" => "2008", "FROM" => "1980", "PAGESIZE" => "1")
|
|
64
|
+
stub_request(:any, url).with(:query => params)
|
|
65
|
+
DoctorScrape::Search.nora :to => 2008, :from => "1980", limit: 1
|
|
66
|
+
a_request(:get, url).with(:query => params).should have_been_requested
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
context "resolve is true" do
|
|
70
|
+
it "resolves scrapers" do
|
|
71
|
+
DoctorScrape::Search.should_receive(:resolve_scrapers)
|
|
72
|
+
DoctorScrape::Search.nora resolve: true
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
context "when search fails" do
|
|
79
|
+
it "returns empty array" do
|
|
80
|
+
stub_request(:get, url).with(:query => default_params).to_return(:body => "")
|
|
81
|
+
DoctorScrape::Search.nora.should be_empty
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
describe "resolve_urls" do
|
|
88
|
+
let(:urls) { [ "http://bit.ly", "http://example.com", "http://goo.gl" ] }
|
|
89
|
+
let(:resolved) { ["http://bitly.com", "http://example.com", "http://googl.com"] }
|
|
90
|
+
|
|
91
|
+
it "returns array with resolved urls" do
|
|
92
|
+
resolvers = resolved.map { |r| double "resolver", :resolve => r }
|
|
93
|
+
urls.each_with_index do |url, i|
|
|
94
|
+
DoctorScrape::RedirectFollower.should_receive(:new).with(url) { resolvers[i] }
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
DoctorScrape::Search.resolve_urls(urls).should == resolved
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
describe "resolve_scrapers" do
|
|
102
|
+
let(:urls) { [ "http://duo.uio.no/", "http://http://foo.bibsys.no", "http://bora.hib.no" ] }
|
|
103
|
+
|
|
104
|
+
it "returns array with appropriate scraper for each url" do
|
|
105
|
+
urls.each { |url| stub_request(:get, url) }
|
|
106
|
+
DoctorScrape::Scraper.should_receive(:for).exactly(3).times { DoctorScrape::Scraper::Meta.new "foo" }
|
|
107
|
+
DoctorScrape::Search.resolve_scrapers(urls).first.should be_a DoctorScrape::Scraper::Meta
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
shared_examples_for :scraper do
|
|
4
|
+
let(:url) { "http://test.host" }
|
|
5
|
+
let(:scraper) { described_class.new url }
|
|
6
|
+
|
|
7
|
+
context "#fetch" do
|
|
8
|
+
it "uses #url for the url" do
|
|
9
|
+
stub_request :any, "http://www.example.com"
|
|
10
|
+
scraper.should_receive(:url) { "http://www.example.com" }
|
|
11
|
+
scraper.fetch
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
context "#scrape" do
|
|
16
|
+
|
|
17
|
+
context "when successful" do
|
|
18
|
+
|
|
19
|
+
before do
|
|
20
|
+
scraper.stub :fetch
|
|
21
|
+
scraper.stub :parse
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
it "clears errors before scrape" do
|
|
25
|
+
scraper.instance_variable_set("@errors", ['foo', 'bar'])
|
|
26
|
+
scraper.scrape
|
|
27
|
+
scraper.errors.should be_empty
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it "returns true" do
|
|
31
|
+
scraper.stub(:errors?) { false }
|
|
32
|
+
scraper.scrape.should be_true
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
context "when there are errors" do
|
|
38
|
+
|
|
39
|
+
context "parser error" do
|
|
40
|
+
before { scraper.stub(:parse).and_raise "parse error" }
|
|
41
|
+
it "returns false " do
|
|
42
|
+
scraper.scrape.should be_false
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
[ [ 404, "Not Found" ], [ 500, "Internal Server Error" ] ].each do |error|
|
|
47
|
+
context error.join(" ") do
|
|
48
|
+
before do
|
|
49
|
+
stub_request(:any, scraper.url).to_return(:status => [error.first, error.last])
|
|
50
|
+
scraper.scrape
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
specify { scraper.errors.first.should be_a OpenURI::HTTPError }
|
|
54
|
+
specify { scraper.errors.first.message.should eq error.join(" ") }
|
|
55
|
+
specify { scraper.data.fields.should == [ :url ] }
|
|
56
|
+
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
end
|
|
62
|
+
end
|
data/spec/spec_helper.rb
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
if ENV["SIMPLECOV"]
|
|
2
|
+
require 'simplecov'
|
|
3
|
+
SimpleCov.start do
|
|
4
|
+
add_filter "/vendor"
|
|
5
|
+
end
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
require 'rspec/autorun'
|
|
9
|
+
require 'webmock/rspec'
|
|
10
|
+
require 'vcr'
|
|
11
|
+
require 'doctor_scrape'
|
|
12
|
+
require 'awesome_print'
|
|
13
|
+
|
|
14
|
+
Dir["./spec/shared/**/*.rb"].each {|f| require f}
|
|
15
|
+
Dir["./spec/support/**/*.rb"].each {|f| require f}
|
|
16
|
+
|
|
17
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
|
18
|
+
RSpec.configure do |config|
|
|
19
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
|
20
|
+
config.run_all_when_everything_filtered = true
|
|
21
|
+
config.filter_run :focus
|
|
22
|
+
config.extend VCR::RSpec::Macros
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
VCR.configure do |config|
|
|
26
|
+
config.cassette_library_dir = "spec/cassettes"
|
|
27
|
+
config.hook_into :webmock
|
|
28
|
+
config.default_cassette_options = { :serialize_with => :yaml }
|
|
29
|
+
end
|