doctor_scrape 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -15,7 +15,9 @@ Gem::Specification.new do |gem|
15
15
  gem.require_paths = ["lib"]
16
16
  gem.version = DoctorScrape::VERSION
17
17
 
18
- gem.add_dependency "mechanize", ["~> 2.3"]
18
+ # gem.add_dependency "mechanize", ["~> 2.3"]
19
+ gem.add_dependency "nokogiri", ["~> 1.5.0"]
20
+ gem.add_dependency "unwind", ["~> 0.9.6"]
19
21
  # gem.add_dependency "text", ["~> 1.0.3"]
20
22
 
21
23
  gem.add_development_dependency "rspec", ["~> 2.8.0"]
@@ -31,5 +33,4 @@ Gem::Specification.new do |gem|
31
33
  gem.add_development_dependency "hirb", ["~> 0.6.0"]
32
34
  gem.add_development_dependency "awesome_print", ["~> 1.0.2"]
33
35
  gem.add_development_dependency "simplecov", ["~> 0.6.0"]
34
- # gem.add_development_dependency "psych", ["~> 1.2.2"]
35
36
  end
data/lib/doctor_scrape.rb CHANGED
@@ -1,14 +1,13 @@
1
1
  # encoding: utf-8
2
2
  require 'doctor_scrape/version'
3
- require 'mechanize'
4
3
  require 'nokogiri'
5
4
  require 'ostruct'
6
5
  require 'open-uri'
6
+ require 'unwind'
7
7
 
8
8
  module DoctorScrape
9
9
  autoload :Data, 'doctor_scrape/data'
10
10
  autoload :Search, 'doctor_scrape/search'
11
- autoload :RedirectFollower, 'doctor_scrape/redirect_follower'
12
11
 
13
12
  module Scraper
14
13
  autoload :Base, 'doctor_scrape/scraper/base'
@@ -29,7 +29,7 @@ module DoctorScrape
29
29
  end
30
30
 
31
31
  def resolve_urls(urls)
32
- urls.map { |url| RedirectFollower.new(url).resolve }
32
+ urls.map { |url| Unwind::RedirectFollower.resolve(url).final_url }
33
33
  end
34
34
 
35
35
  def resolve_scrapers(urls)
@@ -1,3 +1,3 @@
1
1
  module DoctorScrape
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
data/spec/search_spec.rb CHANGED
@@ -89,9 +89,9 @@ describe DoctorScrape::Search do
89
89
  let(:resolved) { ["http://bitly.com", "http://example.com", "http://googl.com"] }
90
90
 
91
91
  it "returns array with resolved urls" do
92
- resolvers = resolved.map { |r| double "resolver", :resolve => r }
92
+ resolvers = resolved.map { |r| double "resolver", :final_url => r }
93
93
  urls.each_with_index do |url, i|
94
- DoctorScrape::RedirectFollower.should_receive(:new).with(url) { resolvers[i] }
94
+ Unwind::RedirectFollower.should_receive(:resolve).with(url) { resolvers[i] }
95
95
  end
96
96
 
97
97
  DoctorScrape::Search.resolve_urls(urls).should == resolved
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doctor_scrape
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,22 +9,33 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-28 00:00:00.000000000 Z
12
+ date: 2012-03-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: mechanize
16
- requirement: &70186292553660 !ruby/object:Gem::Requirement
15
+ name: nokogiri
16
+ requirement: &70186149359280 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
20
20
  - !ruby/object:Gem::Version
21
- version: '2.3'
21
+ version: 1.5.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70186292553660
24
+ version_requirements: *70186149359280
25
+ - !ruby/object:Gem::Dependency
26
+ name: unwind
27
+ requirement: &70186149358760 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: 0.9.6
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70186149358760
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: rspec
27
- requirement: &70186292553140 !ruby/object:Gem::Requirement
38
+ requirement: &70186149358280 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ~>
@@ -32,10 +43,10 @@ dependencies:
32
43
  version: 2.8.0
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *70186292553140
46
+ version_requirements: *70186149358280
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: vcr
38
- requirement: &70186292552660 !ruby/object:Gem::Requirement
49
+ requirement: &70186149357800 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ~>
@@ -43,10 +54,10 @@ dependencies:
43
54
  version: 2.0.0.rc1
44
55
  type: :development
45
56
  prerelease: false
46
- version_requirements: *70186292552660
57
+ version_requirements: *70186149357800
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: webmock
49
- requirement: &70186292552180 !ruby/object:Gem::Requirement
60
+ requirement: &70186149357320 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - <
@@ -54,10 +65,10 @@ dependencies:
54
65
  version: '1.8'
55
66
  type: :development
56
67
  prerelease: false
57
- version_requirements: *70186292552180
68
+ version_requirements: *70186149357320
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: guard
60
- requirement: &70186292551700 !ruby/object:Gem::Requirement
71
+ requirement: &70186149356840 !ruby/object:Gem::Requirement
61
72
  none: false
62
73
  requirements:
63
74
  - - ~>
@@ -65,10 +76,10 @@ dependencies:
65
76
  version: 1.0.0
66
77
  type: :development
67
78
  prerelease: false
68
- version_requirements: *70186292551700
79
+ version_requirements: *70186149356840
69
80
  - !ruby/object:Gem::Dependency
70
81
  name: guard-rspec
71
- requirement: &70186292551220 !ruby/object:Gem::Requirement
82
+ requirement: &70186149356360 !ruby/object:Gem::Requirement
72
83
  none: false
73
84
  requirements:
74
85
  - - ~>
@@ -76,10 +87,10 @@ dependencies:
76
87
  version: 0.6.0
77
88
  type: :development
78
89
  prerelease: false
79
- version_requirements: *70186292551220
90
+ version_requirements: *70186149356360
80
91
  - !ruby/object:Gem::Dependency
81
92
  name: ruby_gntp
82
- requirement: &70186292550740 !ruby/object:Gem::Requirement
93
+ requirement: &70186149355880 !ruby/object:Gem::Requirement
83
94
  none: false
84
95
  requirements:
85
96
  - - ~>
@@ -87,10 +98,10 @@ dependencies:
87
98
  version: 0.3.4
88
99
  type: :development
89
100
  prerelease: false
90
- version_requirements: *70186292550740
101
+ version_requirements: *70186149355880
91
102
  - !ruby/object:Gem::Dependency
92
103
  name: rb-fsevent
93
- requirement: &70186292550260 !ruby/object:Gem::Requirement
104
+ requirement: &70186149355400 !ruby/object:Gem::Requirement
94
105
  none: false
95
106
  requirements:
96
107
  - - ~>
@@ -98,10 +109,10 @@ dependencies:
98
109
  version: 0.9.0
99
110
  type: :development
100
111
  prerelease: false
101
- version_requirements: *70186292550260
112
+ version_requirements: *70186149355400
102
113
  - !ruby/object:Gem::Dependency
103
114
  name: pry
104
- requirement: &70186292549780 !ruby/object:Gem::Requirement
115
+ requirement: &70186149354920 !ruby/object:Gem::Requirement
105
116
  none: false
106
117
  requirements:
107
118
  - - ~>
@@ -109,10 +120,10 @@ dependencies:
109
120
  version: 0.9.8.2
110
121
  type: :development
111
122
  prerelease: false
112
- version_requirements: *70186292549780
123
+ version_requirements: *70186149354920
113
124
  - !ruby/object:Gem::Dependency
114
125
  name: pry-doc
115
- requirement: &70186292549300 !ruby/object:Gem::Requirement
126
+ requirement: &70186149354440 !ruby/object:Gem::Requirement
116
127
  none: false
117
128
  requirements:
118
129
  - - ~>
@@ -120,10 +131,10 @@ dependencies:
120
131
  version: 0.4.0
121
132
  type: :development
122
133
  prerelease: false
123
- version_requirements: *70186292549300
134
+ version_requirements: *70186149354440
124
135
  - !ruby/object:Gem::Dependency
125
136
  name: pry-editline
126
- requirement: &70186292548820 !ruby/object:Gem::Requirement
137
+ requirement: &70186149353960 !ruby/object:Gem::Requirement
127
138
  none: false
128
139
  requirements:
129
140
  - - ~>
@@ -131,10 +142,10 @@ dependencies:
131
142
  version: 1.1.1
132
143
  type: :development
133
144
  prerelease: false
134
- version_requirements: *70186292548820
145
+ version_requirements: *70186149353960
135
146
  - !ruby/object:Gem::Dependency
136
147
  name: hirb
137
- requirement: &70186292548340 !ruby/object:Gem::Requirement
148
+ requirement: &70186149353480 !ruby/object:Gem::Requirement
138
149
  none: false
139
150
  requirements:
140
151
  - - ~>
@@ -142,10 +153,10 @@ dependencies:
142
153
  version: 0.6.0
143
154
  type: :development
144
155
  prerelease: false
145
- version_requirements: *70186292548340
156
+ version_requirements: *70186149353480
146
157
  - !ruby/object:Gem::Dependency
147
158
  name: awesome_print
148
- requirement: &70186292547860 !ruby/object:Gem::Requirement
159
+ requirement: &70186149353000 !ruby/object:Gem::Requirement
149
160
  none: false
150
161
  requirements:
151
162
  - - ~>
@@ -153,10 +164,10 @@ dependencies:
153
164
  version: 1.0.2
154
165
  type: :development
155
166
  prerelease: false
156
- version_requirements: *70186292547860
167
+ version_requirements: *70186149353000
157
168
  - !ruby/object:Gem::Dependency
158
169
  name: simplecov
159
- requirement: &70186292547380 !ruby/object:Gem::Requirement
170
+ requirement: &70186149352520 !ruby/object:Gem::Requirement
160
171
  none: false
161
172
  requirements:
162
173
  - - ~>
@@ -164,7 +175,7 @@ dependencies:
164
175
  version: 0.6.0
165
176
  type: :development
166
177
  prerelease: false
167
- version_requirements: *70186292547380
178
+ version_requirements: *70186149352520
168
179
  description: Library for scraping norwegian doctoral dissertations
169
180
  email:
170
181
  - gudleik@gmail.com
@@ -183,7 +194,6 @@ files:
183
194
  - doctor_scrape.gemspec
184
195
  - lib/doctor_scrape.rb
185
196
  - lib/doctor_scrape/data.rb
186
- - lib/doctor_scrape/redirect_follower.rb
187
197
  - lib/doctor_scrape/scraper/base.rb
188
198
  - lib/doctor_scrape/scraper/bora.rb
189
199
  - lib/doctor_scrape/scraper/diva.rb
@@ -214,7 +224,6 @@ files:
214
224
  - spec/parse/diva_spec.rb
215
225
  - spec/parse/duo_spec.rb
216
226
  - spec/parse/munin_spec.rb
217
- - spec/redirect_follower_spec.rb
218
227
  - spec/scraper_spec.rb
219
228
  - spec/scrapers/base_spec.rb
220
229
  - spec/scrapers/bora_spec.rb
@@ -240,7 +249,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
240
249
  version: '0'
241
250
  segments:
242
251
  - 0
243
- hash: -4122406901322646134
252
+ hash: -2449353312367516367
244
253
  required_rubygems_version: !ruby/object:Gem::Requirement
245
254
  none: false
246
255
  requirements:
@@ -249,7 +258,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
249
258
  version: '0'
250
259
  segments:
251
260
  - 0
252
- hash: -4122406901322646134
261
+ hash: -2449353312367516367
253
262
  requirements: []
254
263
  rubyforge_project:
255
264
  rubygems_version: 1.8.11
@@ -279,7 +288,6 @@ test_files:
279
288
  - spec/parse/diva_spec.rb
280
289
  - spec/parse/duo_spec.rb
281
290
  - spec/parse/munin_spec.rb
282
- - spec/redirect_follower_spec.rb
283
291
  - spec/scraper_spec.rb
284
292
  - spec/scrapers/base_spec.rb
285
293
  - spec/scrapers/bora_spec.rb
@@ -1,29 +0,0 @@
1
- # encoding: utf-8
2
- require 'net/https'
3
- module DoctorScrape
4
- class TooManyRedirects < StandardError; end
5
-
6
- class RedirectFollower
7
- attr_accessor :url
8
-
9
- def initialize(url)
10
- @url = url
11
- end
12
-
13
- def resolve(limit=5)
14
- raise TooManyRedirects if limit == 0
15
-
16
- response = Net::HTTP.get_response URI.parse(@url)
17
-
18
- if response.is_a? Net::HTTPRedirection
19
- @url = response['location']
20
- resolve limit - 1
21
- end
22
-
23
- @url
24
- rescue Net::HTTPBadResponse => error
25
- # This can safely be ignored
26
- end
27
-
28
- end
29
- end
@@ -1,37 +0,0 @@
1
- # encoding: utf-8
2
- require 'spec_helper'
3
-
4
- describe DoctorScrape::RedirectFollower do
5
- let(:url) { "http://bit.ly/foobar" }
6
- let(:endpoint) { "http://example.com" }
7
- let(:resolver) { DoctorScrape::RedirectFollower.new url }
8
- before { stub_request(:any, endpoint).to_return(:body => "You found me!") }
9
-
10
- context "when url doesn't redirect" do
11
- before { stub_request(:any, url).to_return(:body => "Ok") }
12
- specify { resolver.resolve.should eq url }
13
- end
14
-
15
- context "when url redirects" do
16
- before { stub_request(:any, url).to_return(:status => [ 302, "Moved Temporarily" ], headers: { location: endpoint }) }
17
- specify { resolver.resolve.should eq endpoint }
18
- end
19
-
20
- context "too many redirects" do
21
- before { stub_request(:any, url).to_return(:status => [ 302, "Moved Temporarily" ], headers: { location: url }) }
22
-
23
- it "raises error after 5 redirects" do
24
- expect { resolver.resolve }.to raise_error
25
- a_request(:get, url).should have_been_made.times(5)
26
- end
27
- end
28
-
29
- context "when exception occurs" do
30
- it "returns the last url" do
31
- stub_request(:get, url).to_return(:status => [ 302, "Moved Temporarily" ], headers: { location: endpoint })
32
- stub_request(:get, endpoint).to_raise Net::HTTPBadResponse
33
- resolver.resolve.should eq(endpoint)
34
- end
35
- end
36
-
37
- end