postrank-uri 1.0.12 → 1.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/postrank-uri.rb +34 -14
- data/lib/postrank-uri/version.rb +1 -1
- data/spec/postrank-uri_spec.rb +1 -1
- metadata +2 -2
data/lib/postrank-uri.rb
CHANGED
@@ -102,11 +102,9 @@ module PostRank
|
|
102
102
|
urls = []
|
103
103
|
Nokogiri.HTML(text).search('a').each do |a|
|
104
104
|
begin
|
105
|
-
url = clean(a.attr('href'), :raw => true)
|
106
|
-
|
107
|
-
|
108
|
-
url.host = host
|
109
|
-
end
|
105
|
+
url = clean(a.attr('href'), :raw => true, :host => host)
|
106
|
+
|
107
|
+
next unless url.absolute?
|
110
108
|
|
111
109
|
urls.push [url.to_s, a.text]
|
112
110
|
rescue
|
@@ -129,7 +127,7 @@ module PostRank
|
|
129
127
|
end
|
130
128
|
|
131
129
|
def clean(uri, opts = {})
|
132
|
-
uri = normalize(c18n(unescape(uri)))
|
130
|
+
uri = normalize(c18n(unescape(uri), opts))
|
133
131
|
opts[:raw] ? uri : uri.to_s
|
134
132
|
end
|
135
133
|
|
@@ -137,8 +135,8 @@ module PostRank
|
|
137
135
|
Digest::MD5.hexdigest(opts[:clean] == true ? clean(uri) : uri)
|
138
136
|
end
|
139
137
|
|
140
|
-
def normalize(uri)
|
141
|
-
u = parse(uri)
|
138
|
+
def normalize(uri, opts = {})
|
139
|
+
u = parse(uri, opts)
|
142
140
|
u.path = u.path.squeeze('/')
|
143
141
|
u.path = u.path.chomp('/') if u.path.size != 1
|
144
142
|
u.query = nil if u.query && u.query.empty?
|
@@ -146,8 +144,8 @@ module PostRank
|
|
146
144
|
u
|
147
145
|
end
|
148
146
|
|
149
|
-
def c18n(uri)
|
150
|
-
u = parse(uri)
|
147
|
+
def c18n(uri, opts = {})
|
148
|
+
u = parse(uri, opts)
|
151
149
|
u = embedded(u)
|
152
150
|
|
153
151
|
if q = u.query_values(:notation => :flat_array)
|
@@ -181,12 +179,34 @@ module PostRank
|
|
181
179
|
uri
|
182
180
|
end
|
183
181
|
|
184
|
-
def parse(uri)
|
182
|
+
def parse(uri, opts = {})
|
185
183
|
return uri if uri.is_a? Addressable::URI
|
186
184
|
|
187
|
-
uri =
|
188
|
-
|
185
|
+
uri = Addressable::URI.parse(uri)
|
186
|
+
|
187
|
+
unless uri.host
|
188
|
+
if uri.scheme
|
189
|
+
# With no host and scheme yes, the parser exploded
|
190
|
+
return parse("http://#{uri}", opts)
|
191
|
+
end
|
192
|
+
|
193
|
+
if opts[:host]
|
194
|
+
uri.host = opts[:host]
|
195
|
+
else
|
196
|
+
parts = uri.path.to_s.split(/[\/:]/)
|
197
|
+
if parts.first =~ URIREGEX[:valid_domain]
|
198
|
+
host = parts.shift
|
199
|
+
uri.path = '/' + parts.join('/')
|
200
|
+
uri.host = host
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
uri.scheme = 'http' if uri.host && !uri.scheme
|
206
|
+
|
207
|
+
uri.normalize
|
189
208
|
end
|
190
209
|
|
191
210
|
end
|
192
|
-
end
|
211
|
+
end
|
212
|
+
|
data/lib/postrank-uri/version.rb
CHANGED
data/spec/postrank-uri_spec.rb
CHANGED
@@ -293,7 +293,7 @@ describe PostRank::URI do
|
|
293
293
|
"ExampLe.com:3000" => "example.com",
|
294
294
|
"http://alex.pages.example.COM" => "example.com",
|
295
295
|
"http://www.example.ag.it/2011/04/01/blah" => "example.ag.it",
|
296
|
-
"ftp://www.example.com/2011/04/01/blah" =>
|
296
|
+
"ftp://www.example.com/2011/04/01/blah" => 'example.com',
|
297
297
|
"http://com" => nil,
|
298
298
|
"http://alex.pages.examplecom" => nil,
|
299
299
|
"example" => nil,
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: postrank-uri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.0.
|
5
|
+
version: 1.0.13
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Ilya Grigorik
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-05-
|
13
|
+
date: 2011-05-10 00:00:00 -04:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|