doctor_scrape 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,7 +15,9 @@ Gem::Specification.new do |gem|
15
15
  gem.require_paths = ["lib"]
16
16
  gem.version = DoctorScrape::VERSION
17
17
 
18
- gem.add_dependency "mechanize", ["~> 2.3"]
18
+ # gem.add_dependency "mechanize", ["~> 2.3"]
19
+ gem.add_dependency "nokogiri", ["~> 1.5.0"]
20
+ gem.add_dependency "unwind", ["~> 0.9.6"]
19
21
  # gem.add_dependency "text", ["~> 1.0.3"]
20
22
 
21
23
  gem.add_development_dependency "rspec", ["~> 2.8.0"]
@@ -31,5 +33,4 @@ Gem::Specification.new do |gem|
31
33
  gem.add_development_dependency "hirb", ["~> 0.6.0"]
32
34
  gem.add_development_dependency "awesome_print", ["~> 1.0.2"]
33
35
  gem.add_development_dependency "simplecov", ["~> 0.6.0"]
34
- # gem.add_development_dependency "psych", ["~> 1.2.2"]
35
36
  end
data/lib/doctor_scrape.rb CHANGED
@@ -1,14 +1,13 @@
1
1
  # encoding: utf-8
2
2
  require 'doctor_scrape/version'
3
- require 'mechanize'
4
3
  require 'nokogiri'
5
4
  require 'ostruct'
6
5
  require 'open-uri'
6
+ require 'unwind'
7
7
 
8
8
  module DoctorScrape
9
9
  autoload :Data, 'doctor_scrape/data'
10
10
  autoload :Search, 'doctor_scrape/search'
11
- autoload :RedirectFollower, 'doctor_scrape/redirect_follower'
12
11
 
13
12
  module Scraper
14
13
  autoload :Base, 'doctor_scrape/scraper/base'
@@ -29,7 +29,7 @@ module DoctorScrape
29
29
  end
30
30
 
31
31
  def resolve_urls(urls)
32
- urls.map { |url| RedirectFollower.new(url).resolve }
32
+ urls.map { |url| Unwind::RedirectFollower.resolve(url).final_url }
33
33
  end
34
34
 
35
35
  def resolve_scrapers(urls)
@@ -1,3 +1,3 @@
1
1
  module DoctorScrape
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
data/spec/search_spec.rb CHANGED
@@ -89,9 +89,9 @@ describe DoctorScrape::Search do
89
89
  let(:resolved) { ["http://bitly.com", "http://example.com", "http://googl.com"] }
90
90
 
91
91
  it "returns array with resolved urls" do
92
- resolvers = resolved.map { |r| double "resolver", :resolve => r }
92
+ resolvers = resolved.map { |r| double "resolver", :final_url => r }
93
93
  urls.each_with_index do |url, i|
94
- DoctorScrape::RedirectFollower.should_receive(:new).with(url) { resolvers[i] }
94
+ Unwind::RedirectFollower.should_receive(:resolve).with(url) { resolvers[i] }
95
95
  end
96
96
 
97
97
  DoctorScrape::Search.resolve_urls(urls).should == resolved
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doctor_scrape
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,22 +9,33 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-28 00:00:00.000000000 Z
12
+ date: 2012-03-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: mechanize
16
- requirement: &70186292553660 !ruby/object:Gem::Requirement
15
+ name: nokogiri
16
+ requirement: &70186149359280 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
20
20
  - !ruby/object:Gem::Version
21
- version: '2.3'
21
+ version: 1.5.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70186292553660
24
+ version_requirements: *70186149359280
25
+ - !ruby/object:Gem::Dependency
26
+ name: unwind
27
+ requirement: &70186149358760 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: 0.9.6
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70186149358760
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: rspec
27
- requirement: &70186292553140 !ruby/object:Gem::Requirement
38
+ requirement: &70186149358280 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ~>
@@ -32,10 +43,10 @@ dependencies:
32
43
  version: 2.8.0
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *70186292553140
46
+ version_requirements: *70186149358280
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: vcr
38
- requirement: &70186292552660 !ruby/object:Gem::Requirement
49
+ requirement: &70186149357800 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ~>
@@ -43,10 +54,10 @@ dependencies:
43
54
  version: 2.0.0.rc1
44
55
  type: :development
45
56
  prerelease: false
46
- version_requirements: *70186292552660
57
+ version_requirements: *70186149357800
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: webmock
49
- requirement: &70186292552180 !ruby/object:Gem::Requirement
60
+ requirement: &70186149357320 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - <
@@ -54,10 +65,10 @@ dependencies:
54
65
  version: '1.8'
55
66
  type: :development
56
67
  prerelease: false
57
- version_requirements: *70186292552180
68
+ version_requirements: *70186149357320
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: guard
60
- requirement: &70186292551700 !ruby/object:Gem::Requirement
71
+ requirement: &70186149356840 !ruby/object:Gem::Requirement
61
72
  none: false
62
73
  requirements:
63
74
  - - ~>
@@ -65,10 +76,10 @@ dependencies:
65
76
  version: 1.0.0
66
77
  type: :development
67
78
  prerelease: false
68
- version_requirements: *70186292551700
79
+ version_requirements: *70186149356840
69
80
  - !ruby/object:Gem::Dependency
70
81
  name: guard-rspec
71
- requirement: &70186292551220 !ruby/object:Gem::Requirement
82
+ requirement: &70186149356360 !ruby/object:Gem::Requirement
72
83
  none: false
73
84
  requirements:
74
85
  - - ~>
@@ -76,10 +87,10 @@ dependencies:
76
87
  version: 0.6.0
77
88
  type: :development
78
89
  prerelease: false
79
- version_requirements: *70186292551220
90
+ version_requirements: *70186149356360
80
91
  - !ruby/object:Gem::Dependency
81
92
  name: ruby_gntp
82
- requirement: &70186292550740 !ruby/object:Gem::Requirement
93
+ requirement: &70186149355880 !ruby/object:Gem::Requirement
83
94
  none: false
84
95
  requirements:
85
96
  - - ~>
@@ -87,10 +98,10 @@ dependencies:
87
98
  version: 0.3.4
88
99
  type: :development
89
100
  prerelease: false
90
- version_requirements: *70186292550740
101
+ version_requirements: *70186149355880
91
102
  - !ruby/object:Gem::Dependency
92
103
  name: rb-fsevent
93
- requirement: &70186292550260 !ruby/object:Gem::Requirement
104
+ requirement: &70186149355400 !ruby/object:Gem::Requirement
94
105
  none: false
95
106
  requirements:
96
107
  - - ~>
@@ -98,10 +109,10 @@ dependencies:
98
109
  version: 0.9.0
99
110
  type: :development
100
111
  prerelease: false
101
- version_requirements: *70186292550260
112
+ version_requirements: *70186149355400
102
113
  - !ruby/object:Gem::Dependency
103
114
  name: pry
104
- requirement: &70186292549780 !ruby/object:Gem::Requirement
115
+ requirement: &70186149354920 !ruby/object:Gem::Requirement
105
116
  none: false
106
117
  requirements:
107
118
  - - ~>
@@ -109,10 +120,10 @@ dependencies:
109
120
  version: 0.9.8.2
110
121
  type: :development
111
122
  prerelease: false
112
- version_requirements: *70186292549780
123
+ version_requirements: *70186149354920
113
124
  - !ruby/object:Gem::Dependency
114
125
  name: pry-doc
115
- requirement: &70186292549300 !ruby/object:Gem::Requirement
126
+ requirement: &70186149354440 !ruby/object:Gem::Requirement
116
127
  none: false
117
128
  requirements:
118
129
  - - ~>
@@ -120,10 +131,10 @@ dependencies:
120
131
  version: 0.4.0
121
132
  type: :development
122
133
  prerelease: false
123
- version_requirements: *70186292549300
134
+ version_requirements: *70186149354440
124
135
  - !ruby/object:Gem::Dependency
125
136
  name: pry-editline
126
- requirement: &70186292548820 !ruby/object:Gem::Requirement
137
+ requirement: &70186149353960 !ruby/object:Gem::Requirement
127
138
  none: false
128
139
  requirements:
129
140
  - - ~>
@@ -131,10 +142,10 @@ dependencies:
131
142
  version: 1.1.1
132
143
  type: :development
133
144
  prerelease: false
134
- version_requirements: *70186292548820
145
+ version_requirements: *70186149353960
135
146
  - !ruby/object:Gem::Dependency
136
147
  name: hirb
137
- requirement: &70186292548340 !ruby/object:Gem::Requirement
148
+ requirement: &70186149353480 !ruby/object:Gem::Requirement
138
149
  none: false
139
150
  requirements:
140
151
  - - ~>
@@ -142,10 +153,10 @@ dependencies:
142
153
  version: 0.6.0
143
154
  type: :development
144
155
  prerelease: false
145
- version_requirements: *70186292548340
156
+ version_requirements: *70186149353480
146
157
  - !ruby/object:Gem::Dependency
147
158
  name: awesome_print
148
- requirement: &70186292547860 !ruby/object:Gem::Requirement
159
+ requirement: &70186149353000 !ruby/object:Gem::Requirement
149
160
  none: false
150
161
  requirements:
151
162
  - - ~>
@@ -153,10 +164,10 @@ dependencies:
153
164
  version: 1.0.2
154
165
  type: :development
155
166
  prerelease: false
156
- version_requirements: *70186292547860
167
+ version_requirements: *70186149353000
157
168
  - !ruby/object:Gem::Dependency
158
169
  name: simplecov
159
- requirement: &70186292547380 !ruby/object:Gem::Requirement
170
+ requirement: &70186149352520 !ruby/object:Gem::Requirement
160
171
  none: false
161
172
  requirements:
162
173
  - - ~>
@@ -164,7 +175,7 @@ dependencies:
164
175
  version: 0.6.0
165
176
  type: :development
166
177
  prerelease: false
167
- version_requirements: *70186292547380
178
+ version_requirements: *70186149352520
168
179
  description: Library for scraping norwegian doctoral dissertations
169
180
  email:
170
181
  - gudleik@gmail.com
@@ -183,7 +194,6 @@ files:
183
194
  - doctor_scrape.gemspec
184
195
  - lib/doctor_scrape.rb
185
196
  - lib/doctor_scrape/data.rb
186
- - lib/doctor_scrape/redirect_follower.rb
187
197
  - lib/doctor_scrape/scraper/base.rb
188
198
  - lib/doctor_scrape/scraper/bora.rb
189
199
  - lib/doctor_scrape/scraper/diva.rb
@@ -214,7 +224,6 @@ files:
214
224
  - spec/parse/diva_spec.rb
215
225
  - spec/parse/duo_spec.rb
216
226
  - spec/parse/munin_spec.rb
217
- - spec/redirect_follower_spec.rb
218
227
  - spec/scraper_spec.rb
219
228
  - spec/scrapers/base_spec.rb
220
229
  - spec/scrapers/bora_spec.rb
@@ -240,7 +249,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
240
249
  version: '0'
241
250
  segments:
242
251
  - 0
243
- hash: -4122406901322646134
252
+ hash: -2449353312367516367
244
253
  required_rubygems_version: !ruby/object:Gem::Requirement
245
254
  none: false
246
255
  requirements:
@@ -249,7 +258,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
249
258
  version: '0'
250
259
  segments:
251
260
  - 0
252
- hash: -4122406901322646134
261
+ hash: -2449353312367516367
253
262
  requirements: []
254
263
  rubyforge_project:
255
264
  rubygems_version: 1.8.11
@@ -279,7 +288,6 @@ test_files:
279
288
  - spec/parse/diva_spec.rb
280
289
  - spec/parse/duo_spec.rb
281
290
  - spec/parse/munin_spec.rb
282
- - spec/redirect_follower_spec.rb
283
291
  - spec/scraper_spec.rb
284
292
  - spec/scrapers/base_spec.rb
285
293
  - spec/scrapers/bora_spec.rb
@@ -1,29 +0,0 @@
1
- # encoding: utf-8
2
- require 'net/https'
3
- module DoctorScrape
4
- class TooManyRedirects < StandardError; end
5
-
6
- class RedirectFollower
7
- attr_accessor :url
8
-
9
- def initialize(url)
10
- @url = url
11
- end
12
-
13
- def resolve(limit=5)
14
- raise TooManyRedirects if limit == 0
15
-
16
- response = Net::HTTP.get_response URI.parse(@url)
17
-
18
- if response.is_a? Net::HTTPRedirection
19
- @url = response['location']
20
- resolve limit - 1
21
- end
22
-
23
- @url
24
- rescue Net::HTTPBadResponse => error
25
- # This can safely be ignored
26
- end
27
-
28
- end
29
- end
@@ -1,37 +0,0 @@
1
- # encoding: utf-8
2
- require 'spec_helper'
3
-
4
- describe DoctorScrape::RedirectFollower do
5
- let(:url) { "http://bit.ly/foobar" }
6
- let(:endpoint) { "http://example.com" }
7
- let(:resolver) { DoctorScrape::RedirectFollower.new url }
8
- before { stub_request(:any, endpoint).to_return(:body => "You found me!") }
9
-
10
- context "when url doesn't redirect" do
11
- before { stub_request(:any, url).to_return(:body => "Ok") }
12
- specify { resolver.resolve.should eq url }
13
- end
14
-
15
- context "when url redirects" do
16
- before { stub_request(:any, url).to_return(:status => [ 302, "Moved Temporarily" ], headers: { location: endpoint }) }
17
- specify { resolver.resolve.should eq endpoint }
18
- end
19
-
20
- context "too many redirects" do
21
- before { stub_request(:any, url).to_return(:status => [ 302, "Moved Temporarily" ], headers: { location: url }) }
22
-
23
- it "raises error after 5 redirects" do
24
- expect { resolver.resolve }.to raise_error
25
- a_request(:get, url).should have_been_made.times(5)
26
- end
27
- end
28
-
29
- context "when exception occurs" do
30
- it "returns the last url" do
31
- stub_request(:get, url).to_return(:status => [ 302, "Moved Temporarily" ], headers: { location: endpoint })
32
- stub_request(:get, endpoint).to_raise Net::HTTPBadResponse
33
- resolver.resolve.should eq(endpoint)
34
- end
35
- end
36
-
37
- end