upton 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/upton.rb +86 -116
- data/lib/upton/downloader.rb +126 -0
- data/lib/upton/utils.rb +43 -0
- data/spec/data/propublica.html +269 -269
- data/spec/data/propublica_search.html +388 -0
- data/spec/data/propublica_search_page_2.html +375 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/upton_downloader_spec.rb +75 -0
- data/spec/upton_spec.rb +110 -47
- metadata +26 -3
- data/lib/utils.rb +0 -74
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
+
# Require this file using `require "spec_helper"` to ensure that it is only
|
4
|
+
# loaded once.
|
5
|
+
#
|
6
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
7
|
+
|
8
|
+
require "webmock/rspec"
|
9
|
+
RSpec.configure do |config|
|
10
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
11
|
+
config.run_all_when_everything_filtered = true
|
12
|
+
config.filter_run :focus
|
13
|
+
|
14
|
+
# Run specs in random order to surface order dependencies. If you find an
|
15
|
+
# order dependency and want to debug it, you can fix the order by providing
|
16
|
+
# the seed, which is printed after each run.
|
17
|
+
# --seed 1234
|
18
|
+
config.order = 'random'
|
19
|
+
WebMock.disable_net_connect!(:allow_localhost => true)
|
20
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require "spec_helper.rb"
|
2
|
+
require_relative "../lib/upton/downloader.rb"
|
3
|
+
|
4
|
+
describe Upton::Downloader do
|
5
|
+
|
6
|
+
def remove_default_cache_folder!
|
7
|
+
FileUtils.rm_rf(default_cache_folder)
|
8
|
+
end
|
9
|
+
|
10
|
+
def default_cache_folder
|
11
|
+
"#{Dir.tmpdir}/upton"
|
12
|
+
end
|
13
|
+
|
14
|
+
let(:cache) { Upton::Downloader.new("http://www.example.com") }
|
15
|
+
let(:uncache) { Upton::Downloader.new("http://www.example.com", cache: false ) }
|
16
|
+
|
17
|
+
context "When caching enabled" do
|
18
|
+
|
19
|
+
context "When disk cache is unavailable" do
|
20
|
+
before(:each) do
|
21
|
+
remove_default_cache_folder!
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should download from the resource once" do
|
25
|
+
stub = stub_request(:get, "http://www.example.com")
|
26
|
+
cache.get
|
27
|
+
stub.should have_been_requested.once
|
28
|
+
end
|
29
|
+
|
30
|
+
it "should use the cache from the second request" do
|
31
|
+
stub = stub_request(:get, "http://www.example.com")
|
32
|
+
cache.get
|
33
|
+
cache.get
|
34
|
+
stub.should have_been_requested.once
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
context "cache available" do
|
40
|
+
it "should not make a http request" do
|
41
|
+
stub = stub_request(:get, "http://www.example.com")
|
42
|
+
cache.get
|
43
|
+
stub.should_not have_been_requested
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
context "Different urls should have different caches" do
|
49
|
+
let(:cache_one) { Upton::Downloader.new("http://www.example.com", cache: true) }
|
50
|
+
let(:cache_two) { Upton::Downloader.new("http://www.example.com?a=1&b=2", cache: true) }
|
51
|
+
|
52
|
+
it "should create two cached files inside the cache directory" do
|
53
|
+
remove_default_cache_folder!
|
54
|
+
stub_one = stub_request(:get, "http://www.example.com")
|
55
|
+
stub_two = stub_request(:get, "http://www.example.com?a=1&b=2")
|
56
|
+
|
57
|
+
cache_one.get
|
58
|
+
cache_two.get
|
59
|
+
Dir.entries(default_cache_folder).count.should eq(4)
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
context "When caching disabled" do
|
66
|
+
context "When #download is called twice" do
|
67
|
+
it "should make two requests" do
|
68
|
+
stub = stub_request(:get, "http://www.example.com")
|
69
|
+
uncache.get
|
70
|
+
uncache.get
|
71
|
+
stub.should have_been_requested.twice
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/spec/upton_spec.rb
CHANGED
@@ -5,53 +5,29 @@ require 'thin'
|
|
5
5
|
require 'nokogiri'
|
6
6
|
require 'restclient'
|
7
7
|
require 'fileutils'
|
8
|
+
require "spec_helper.rb"
|
9
|
+
|
8
10
|
require './lib/upton'
|
9
11
|
|
12
|
+
|
10
13
|
describe Upton do
|
11
14
|
before :all do
|
12
|
-
|
13
|
-
class Server
|
14
|
-
def call(env)
|
15
|
-
@root = File.expand_path(File.dirname(__FILE__))
|
16
|
-
path = Rack::Utils.unescape(env['PATH_INFO'])
|
17
|
-
path += 'index.html' if path == '/'
|
18
|
-
file = File.join(@root, "data", path)
|
19
|
-
|
20
|
-
params = Rack::Utils.parse_nested_query(env['QUERY_STRING'])
|
21
|
-
|
22
|
-
if File.exists?(file)
|
23
|
-
[ 200, {"Content-Type" => "text/html; charset=utf-8"}, File.read(file) ]
|
24
|
-
else
|
25
|
-
[ 404, {'Content-Type' => 'text/plain'}, 'file not found' ]
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
def start_test_server
|
31
|
-
@server_thread = Thread.new do
|
32
|
-
Rack::Handler::Thin.run ::Server.new, :Port => 9876
|
33
|
-
end
|
34
|
-
sleep(1) # wait a sec for the server to be booted
|
35
|
-
end
|
36
|
-
|
37
|
-
start_test_server()
|
38
|
-
|
39
|
-
@headlines = ["Webinar: How to Use Prescriber Checkup to Power Your Reporting",
|
15
|
+
@headlines = ["Webinar: How to Use Prescriber Checkup to Power Your Reporting",
|
40
16
|
"",
|
41
17
|
"A Prosecutor, a Wrongful Conviction and a Question of Justice",
|
42
18
|
"Six Facts Lost in the IRS Scandal"]
|
43
|
-
@most_commented_heds = [["Six Facts Lost in the IRS Scandal",
|
44
|
-
"How the IRS’s Nonprofit Division Got So Dysfunctional",
|
45
|
-
"Sound, Fury and the IRS Mess",
|
46
|
-
"The Most Important #Muckreads on Rape in the Military",
|
47
|
-
"Congressmen to Hagel: Where Are the Missing War Records?",
|
48
|
-
"As Need for New Flood Maps Rises, Congress and Obama Cut Funding",
|
49
|
-
"A Prosecutor, a Wrongful Conviction and a Question of Justice",
|
50
|
-
"A Prolonged Stay: The Reasons Behind the Slow Pace of Executions",
|
19
|
+
@most_commented_heds = [["Six Facts Lost in the IRS Scandal",
|
20
|
+
"How the IRS’s Nonprofit Division Got So Dysfunctional",
|
21
|
+
"Sound, Fury and the IRS Mess",
|
22
|
+
"The Most Important #Muckreads on Rape in the Military",
|
23
|
+
"Congressmen to Hagel: Where Are the Missing War Records?",
|
24
|
+
"As Need for New Flood Maps Rises, Congress and Obama Cut Funding",
|
25
|
+
"A Prosecutor, a Wrongful Conviction and a Question of Justice",
|
26
|
+
"A Prolonged Stay: The Reasons Behind the Slow Pace of Executions",
|
51
27
|
"The Story Behind Our Hospital Interactive",
|
52
28
|
"irs-test-charts-for-embedding"]]
|
53
|
-
@east_timor_prime_ministers = [[
|
54
|
-
["#", "Portrait", "Name(Birth–Death)", "Term of Office", "Party",
|
29
|
+
@east_timor_prime_ministers = [[
|
30
|
+
["#", "Portrait", "Name(Birth–Death)", "Term of Office", "Party",
|
55
31
|
"1", "2", "3", "4",],
|
56
32
|
[],
|
57
33
|
["", "Mari Alkatiri(b. 1949)", "20 May 2002", "26 June 2006[1]", "FRETILIN"],
|
@@ -59,12 +35,27 @@ describe Upton do
|
|
59
35
|
["", "Estanislau da Silva(b. 1952)", "19 May 2007", "8 August 2007", "FRETILIN"],
|
60
36
|
["", "Xanana Gusmão(b. 1946)", "8 August 2007", "Incumbent", "CNRT"],
|
61
37
|
]]
|
38
|
+
@searchResults = ["Webinar: How to Use Prescriber Checkup to Power Your Reporting",
|
39
|
+
"A Prosecutor, a Wrongful Conviction and a Question of Justice",
|
40
|
+
"Six Facts Lost in the IRS Scandal"]
|
62
41
|
end
|
63
42
|
|
64
43
|
it "should scrape in the basic case" do
|
65
|
-
|
44
|
+
stub_request(:get, "www.example.com/propublica.html").
|
45
|
+
to_return(:body => File.new('./spec/data/propublica.html'), :status => 200)
|
46
|
+
stub_request(:get, "www.example.com/discussion.html").
|
47
|
+
to_return(:body => File.new('./spec/data/discussion.html'), :status => 200)
|
48
|
+
stub_request(:get, "www.example.com/prosecutor.html").
|
49
|
+
to_return(:body => File.new('./spec/data/prosecutor.html'), :status => 200)
|
50
|
+
stub_request(:get, "www.example.com/webinar.html").
|
51
|
+
to_return(:body => File.new('./spec/data/webinar.html'), :status => 200)
|
52
|
+
stub_request(:get, "www.example.com/sixfacts.html").
|
53
|
+
to_return(:body => File.new('./spec/data/sixfacts.html'), :status => 200)
|
54
|
+
|
55
|
+
propubscraper = Upton::Scraper.new("http://www.example.com/propublica.html", "section#river section h1 a")
|
66
56
|
propubscraper.debug = true
|
67
57
|
propubscraper.verbose = true
|
58
|
+
propubscraper.sleep_time_between_requests = 0
|
68
59
|
|
69
60
|
heds = propubscraper.scrape do |article_str|
|
70
61
|
doc = Nokogiri::HTML(article_str)
|
@@ -74,18 +65,31 @@ describe Upton do
|
|
74
65
|
heds.should eql @headlines
|
75
66
|
end
|
76
67
|
|
77
|
-
it 'should properly handle relative urls' do
|
68
|
+
it 'should properly handle relative urls' do
|
78
69
|
# uses a modified page from the previous test in which the target
|
79
70
|
# href, http://127.0.0.1:9876/prosecutors.html, has been changed
|
80
71
|
# to a relative url
|
81
72
|
#
|
82
|
-
# Note: this test is a bit quirky, because it passes on the fact that
|
73
|
+
# Note: this test is a bit quirky, because it passes on the fact that
|
83
74
|
# the resolve_url creates a url identical to one that is already stashed ("prosecutors.html").
|
84
75
|
# So it works, but because of a coupling to how Upton handles caching in the file system
|
85
76
|
|
86
|
-
|
77
|
+
stub_request(:get, "www.example.com/propublica-relative.html").
|
78
|
+
to_return(:body => File.new('./spec/data/propublica-relative.html'), :status => 200)
|
79
|
+
stub_request(:get, "www.example.com/prosecutor.html").
|
80
|
+
to_return(:body => File.new('./spec/data/prosecutor.html'), :status => 200)
|
81
|
+
stub_request(:get, "www.example.com/sixfacts.html").
|
82
|
+
to_return(:body => File.new('./spec/data/sixfacts.html'), :status => 200)
|
83
|
+
stub_request(:get, "www.example.com/webinar.html").
|
84
|
+
to_return(:body => File.new('./spec/data/webinar.html'), :status => 200)
|
85
|
+
stub_request(:get, "www.example.com/discussion.html").
|
86
|
+
to_return(:body => File.new('./spec/data/discussion.html'), :status => 200)
|
87
|
+
|
88
|
+
|
89
|
+
propubscraper = Upton::Scraper.new("http://www.example.com/propublica-relative.html", "section#river h1 a")
|
87
90
|
propubscraper.debug = true
|
88
91
|
propubscraper.verbose = true
|
92
|
+
propubscraper.sleep_time_between_requests = 0
|
89
93
|
|
90
94
|
heds = propubscraper.scrape do |article_str|
|
91
95
|
doc = Nokogiri::HTML(article_str)
|
@@ -96,23 +100,82 @@ describe Upton do
|
|
96
100
|
end
|
97
101
|
|
98
102
|
it "should scrape a list properly with the list helper" do
|
99
|
-
|
103
|
+
stub_request(:get, "www.example.com/propublica.html").
|
104
|
+
to_return(:body => File.new('./spec/data/propublica.html'), :status => 200)
|
105
|
+
|
106
|
+
propubscraper = Upton::Scraper.new(["http://www.example.com/propublica.html"])
|
100
107
|
propubscraper.debug = true
|
101
108
|
propubscraper.verbose = true
|
102
|
-
|
109
|
+
propubscraper.sleep_time_between_requests = 0
|
110
|
+
|
111
|
+
list = propubscraper.scrape(&Upton::Utils.list("#jamb.wNarrow #most-commented li a"))
|
103
112
|
FileUtils.rm_r("test_stashes") if Dir.exists?("test_stashes")
|
104
113
|
list.should eql @most_commented_heds
|
105
114
|
end
|
106
115
|
|
107
116
|
it "should scrape a table properly with the table helper" do
|
108
|
-
|
117
|
+
stub_request(:get, "www.example.com/easttimor.html").
|
118
|
+
to_return(:body => File.new('./spec/data/easttimor.html'), :status => 200)
|
119
|
+
|
120
|
+
propubscraper = Upton::Scraper.new(["http://www.example.com/easttimor.html"])
|
109
121
|
propubscraper.debug = true
|
110
122
|
propubscraper.verbose = true
|
123
|
+
propubscraper.sleep_time_between_requests = 0
|
124
|
+
|
111
125
|
table = propubscraper.scrape(&Upton::Utils.table('//table[contains(concat(" ", normalize-space(@class), " "), " wikitable ")][2]'))
|
112
126
|
FileUtils.rm_r("test_stashes") if Dir.exists?("test_stashes")
|
113
127
|
table.should eql @east_timor_prime_ministers
|
114
128
|
end
|
115
129
|
|
116
|
-
it "should test saving files with the right encoding"
|
117
|
-
|
130
|
+
it "should test saving files with the right encoding" do
|
131
|
+
pending "finding a site that gives funny encodings"
|
132
|
+
end
|
133
|
+
|
134
|
+
it "should scrape paginated pages" do
|
135
|
+
stub_request(:get, "www.example.com/propublica_search.html").
|
136
|
+
to_return(:body => File.new('./spec/data/propublica_search.html'), :status => 200)
|
137
|
+
stub_request(:get, "www.example.com/propublica_search.html?p=2").
|
138
|
+
to_return(:body => File.new('./spec/data/propublica_search_page_2.html'), :status => 200)
|
139
|
+
stub_request(:get, "www.example.com/propublica_search.html?p=3").
|
140
|
+
to_return(:body => '', :status => 200)
|
141
|
+
stub_request(:get, "www.example.com/webinar.html").
|
142
|
+
to_return(:body => File.new('./spec/data/webinar.html'), :status => 200)
|
143
|
+
stub_request(:get, "www.example.com/prosecutor.html").
|
144
|
+
to_return(:body => File.new('./spec/data/prosecutor.html'), :status => 200)
|
145
|
+
stub_request(:get, "www.example.com/sixfacts.html").
|
146
|
+
to_return(:body => File.new('./spec/data/sixfacts.html'), :status => 200)
|
147
|
+
|
148
|
+
|
149
|
+
propubscraper = Upton::Scraper.new("http://www.example.com/propublica_search.html", '.compact-list a.title-link')
|
150
|
+
propubscraper.debug = true
|
151
|
+
propubscraper.verbose = true
|
152
|
+
propubscraper.paginated = true
|
153
|
+
propubscraper.pagination_param = 'p'
|
154
|
+
propubscraper.pagination_max_pages = 3
|
155
|
+
propubscraper.sleep_time_between_requests = 0
|
156
|
+
|
157
|
+
results = propubscraper.scrape do |article_str|
|
158
|
+
doc = Nokogiri::HTML(article_str)
|
159
|
+
hed = doc.css('h1.article-title').text
|
160
|
+
end
|
161
|
+
FileUtils.rm_r("test_stashes") if Dir.exists?("test_stashes")
|
162
|
+
results.should eql @searchResults
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
before do
|
167
|
+
Upton::Scraper.stub(:sleep)
|
168
|
+
end
|
169
|
+
|
170
|
+
it "should sleep after uncached requests" do
|
171
|
+
stub_request(:get, "www.example.com")
|
172
|
+
u = Upton::Scraper.new("http://www.example.com", '.whatever')
|
173
|
+
u.should_receive(:sleep)
|
174
|
+
stub = stub_request(:get, "http://www.example.com")
|
175
|
+
u.scrape
|
176
|
+
end
|
177
|
+
|
178
|
+
it "should be silent if verbose if false" do
|
179
|
+
pending
|
180
|
+
end
|
118
181
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: upton
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeremy B. Merrill
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-08-
|
11
|
+
date: 2013-08-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - '>='
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: webmock
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: thin
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -130,7 +144,9 @@ extensions: []
|
|
130
144
|
extra_rdoc_files: []
|
131
145
|
files:
|
132
146
|
- lib/upton.rb
|
133
|
-
- lib/utils.rb
|
147
|
+
- lib/upton/utils.rb
|
148
|
+
- lib/upton/downloader.rb
|
149
|
+
- spec/data/propublica_search_page_2.html
|
134
150
|
- spec/data/webinar.html
|
135
151
|
- spec/data/propublica-relative.html
|
136
152
|
- spec/data/propublica.html
|
@@ -138,7 +154,10 @@ files:
|
|
138
154
|
- spec/data/sixfacts.html
|
139
155
|
- spec/data/discussion.html
|
140
156
|
- spec/data/easttimor.html
|
157
|
+
- spec/data/propublica_search.html
|
141
158
|
- spec/upton_spec.rb
|
159
|
+
- spec/spec_helper.rb
|
160
|
+
- spec/upton_downloader_spec.rb
|
142
161
|
homepage: http://github.org/propublica/upton
|
143
162
|
licenses:
|
144
163
|
- MIT
|
@@ -164,6 +183,7 @@ signing_key:
|
|
164
183
|
specification_version: 4
|
165
184
|
summary: A simple web-scraping framework
|
166
185
|
test_files:
|
186
|
+
- spec/data/propublica_search_page_2.html
|
167
187
|
- spec/data/webinar.html
|
168
188
|
- spec/data/propublica-relative.html
|
169
189
|
- spec/data/propublica.html
|
@@ -171,5 +191,8 @@ test_files:
|
|
171
191
|
- spec/data/sixfacts.html
|
172
192
|
- spec/data/discussion.html
|
173
193
|
- spec/data/easttimor.html
|
194
|
+
- spec/data/propublica_search.html
|
174
195
|
- spec/upton_spec.rb
|
196
|
+
- spec/spec_helper.rb
|
197
|
+
- spec/upton_downloader_spec.rb
|
175
198
|
has_rdoc: true
|
data/lib/utils.rb
DELETED
@@ -1,74 +0,0 @@
|
|
1
|
-
# encoding: UTF-8
|
2
|
-
|
3
|
-
##
|
4
|
-
# This module contains a collection of helpers for Upton
|
5
|
-
##
|
6
|
-
module Upton
|
7
|
-
|
8
|
-
##
|
9
|
-
# This class contains a collection of helpers for Upton
|
10
|
-
#
|
11
|
-
# Each method returns a Proc that (with an & ) can be used as the final
|
12
|
-
# argument to Upton's `scrape` and `scrape_to_csv`
|
13
|
-
##
|
14
|
-
module Utils
|
15
|
-
|
16
|
-
##
|
17
|
-
# Scrapes an HTML <table> element into an Array of Arrays. The header, if
|
18
|
-
# present, is returned as the first row.
|
19
|
-
##
|
20
|
-
def self.table(table_selector, selector_method=:xpath)
|
21
|
-
return Proc.new do |instance_html|
|
22
|
-
html = ::Nokogiri::HTML(instance_html)
|
23
|
-
output = []
|
24
|
-
headers = html.send(selector_method, table_selector).css("th").map &:text
|
25
|
-
output << headers
|
26
|
-
|
27
|
-
table = html.send(selector_method, table_selector).css("tr").each{|tr| output << tr.css("td").map(&:text) }
|
28
|
-
output
|
29
|
-
end
|
30
|
-
end
|
31
|
-
|
32
|
-
##
|
33
|
-
# Scrapes any set of HTML elements into an Array.
|
34
|
-
##
|
35
|
-
def self.list(list_selector, selector_method=:xpath)
|
36
|
-
return Proc.new do |instance_html|
|
37
|
-
html = ::Nokogiri::HTML(instance_html)
|
38
|
-
html.send(selector_method, list_selector).map{|list_element| list_element.text }
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
##
|
43
|
-
# Takes :_href and resolves it to an absolute URL according to
|
44
|
-
# the supplied :_page_url. They can be either Strings or URI
|
45
|
-
# instances.
|
46
|
-
#
|
47
|
-
# raises ArgumentError if either href or page_url is nil
|
48
|
-
# raises ArgumentError if page_url is not absolute
|
49
|
-
#
|
50
|
-
# returns: a String with absolute URL
|
51
|
-
def self.resolve_url(_href, _page_url)
|
52
|
-
|
53
|
-
page_url = URI(_page_url).dup
|
54
|
-
raise ArgumentError, "#{page_url} must be absolute" unless page_url.absolute?
|
55
|
-
|
56
|
-
href = URI(_href).dup
|
57
|
-
|
58
|
-
# return :href if :href is already absolute
|
59
|
-
return href.to_s if href.absolute?
|
60
|
-
|
61
|
-
|
62
|
-
# TODO: There may be edge cases worth considering
|
63
|
-
# but this should handle the following non-absolute href possibilities:
|
64
|
-
# //anothersite.com (keeps scheme, too!)
|
65
|
-
# /root/dir
|
66
|
-
# relative/dir
|
67
|
-
# ?query=2
|
68
|
-
# #bang
|
69
|
-
|
70
|
-
URI.join(page_url, href).to_s
|
71
|
-
end
|
72
|
-
|
73
|
-
end
|
74
|
-
end
|