postrank-uri 1.0.12 → 1.0.13
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/postrank-uri.rb +34 -14
- data/lib/postrank-uri/version.rb +1 -1
- data/spec/postrank-uri_spec.rb +1 -1
- metadata +2 -2
data/lib/postrank-uri.rb
CHANGED
@@ -102,11 +102,9 @@ module PostRank
|
|
102
102
|
urls = []
|
103
103
|
Nokogiri.HTML(text).search('a').each do |a|
|
104
104
|
begin
|
105
|
-
url = clean(a.attr('href'), :raw => true)
|
106
|
-
|
107
|
-
|
108
|
-
url.host = host
|
109
|
-
end
|
105
|
+
url = clean(a.attr('href'), :raw => true, :host => host)
|
106
|
+
|
107
|
+
next unless url.absolute?
|
110
108
|
|
111
109
|
urls.push [url.to_s, a.text]
|
112
110
|
rescue
|
@@ -129,7 +127,7 @@ module PostRank
|
|
129
127
|
end
|
130
128
|
|
131
129
|
def clean(uri, opts = {})
|
132
|
-
uri = normalize(c18n(unescape(uri)))
|
130
|
+
uri = normalize(c18n(unescape(uri), opts))
|
133
131
|
opts[:raw] ? uri : uri.to_s
|
134
132
|
end
|
135
133
|
|
@@ -137,8 +135,8 @@ module PostRank
|
|
137
135
|
Digest::MD5.hexdigest(opts[:clean] == true ? clean(uri) : uri)
|
138
136
|
end
|
139
137
|
|
140
|
-
def normalize(uri)
|
141
|
-
u = parse(uri)
|
138
|
+
def normalize(uri, opts = {})
|
139
|
+
u = parse(uri, opts)
|
142
140
|
u.path = u.path.squeeze('/')
|
143
141
|
u.path = u.path.chomp('/') if u.path.size != 1
|
144
142
|
u.query = nil if u.query && u.query.empty?
|
@@ -146,8 +144,8 @@ module PostRank
|
|
146
144
|
u
|
147
145
|
end
|
148
146
|
|
149
|
-
def c18n(uri)
|
150
|
-
u = parse(uri)
|
147
|
+
def c18n(uri, opts = {})
|
148
|
+
u = parse(uri, opts)
|
151
149
|
u = embedded(u)
|
152
150
|
|
153
151
|
if q = u.query_values(:notation => :flat_array)
|
@@ -181,12 +179,34 @@ module PostRank
|
|
181
179
|
uri
|
182
180
|
end
|
183
181
|
|
184
|
-
def parse(uri)
|
182
|
+
def parse(uri, opts = {})
|
185
183
|
return uri if uri.is_a? Addressable::URI
|
186
184
|
|
187
|
-
uri =
|
188
|
-
|
185
|
+
uri = Addressable::URI.parse(uri)
|
186
|
+
|
187
|
+
unless uri.host
|
188
|
+
if uri.scheme
|
189
|
+
# With no host and scheme yes, the parser exploded
|
190
|
+
return parse("http://#{uri}", opts)
|
191
|
+
end
|
192
|
+
|
193
|
+
if opts[:host]
|
194
|
+
uri.host = opts[:host]
|
195
|
+
else
|
196
|
+
parts = uri.path.to_s.split(/[\/:]/)
|
197
|
+
if parts.first =~ URIREGEX[:valid_domain]
|
198
|
+
host = parts.shift
|
199
|
+
uri.path = '/' + parts.join('/')
|
200
|
+
uri.host = host
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
uri.scheme = 'http' if uri.host && !uri.scheme
|
206
|
+
|
207
|
+
uri.normalize
|
189
208
|
end
|
190
209
|
|
191
210
|
end
|
192
|
-
end
|
211
|
+
end
|
212
|
+
|
data/lib/postrank-uri/version.rb
CHANGED
data/spec/postrank-uri_spec.rb
CHANGED
@@ -293,7 +293,7 @@ describe PostRank::URI do
|
|
293
293
|
"ExampLe.com:3000" => "example.com",
|
294
294
|
"http://alex.pages.example.COM" => "example.com",
|
295
295
|
"http://www.example.ag.it/2011/04/01/blah" => "example.ag.it",
|
296
|
-
"ftp://www.example.com/2011/04/01/blah" =>
|
296
|
+
"ftp://www.example.com/2011/04/01/blah" => 'example.com',
|
297
297
|
"http://com" => nil,
|
298
298
|
"http://alex.pages.examplecom" => nil,
|
299
299
|
"example" => nil,
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: postrank-uri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.0.
|
5
|
+
version: 1.0.13
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Ilya Grigorik
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-05-
|
13
|
+
date: 2011-05-10 00:00:00 -04:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|