postrank-uri 1.0.12 → 1.0.13

Sign up to get free protection for your applications and to get access to all the features.
data/lib/postrank-uri.rb CHANGED
@@ -102,11 +102,9 @@ module PostRank
102
102
  urls = []
103
103
  Nokogiri.HTML(text).search('a').each do |a|
104
104
  begin
105
- url = clean(a.attr('href'), :raw => true)
106
- if url.host.empty?
107
- next if host.nil?
108
- url.host = host
109
- end
105
+ url = clean(a.attr('href'), :raw => true, :host => host)
106
+
107
+ next unless url.absolute?
110
108
 
111
109
  urls.push [url.to_s, a.text]
112
110
  rescue
@@ -129,7 +127,7 @@ module PostRank
129
127
  end
130
128
 
131
129
  def clean(uri, opts = {})
132
- uri = normalize(c18n(unescape(uri)))
130
+ uri = normalize(c18n(unescape(uri), opts))
133
131
  opts[:raw] ? uri : uri.to_s
134
132
  end
135
133
 
@@ -137,8 +135,8 @@ module PostRank
137
135
  Digest::MD5.hexdigest(opts[:clean] == true ? clean(uri) : uri)
138
136
  end
139
137
 
140
- def normalize(uri)
141
- u = parse(uri)
138
+ def normalize(uri, opts = {})
139
+ u = parse(uri, opts)
142
140
  u.path = u.path.squeeze('/')
143
141
  u.path = u.path.chomp('/') if u.path.size != 1
144
142
  u.query = nil if u.query && u.query.empty?
@@ -146,8 +144,8 @@ module PostRank
146
144
  u
147
145
  end
148
146
 
149
- def c18n(uri)
150
- u = parse(uri)
147
+ def c18n(uri, opts = {})
148
+ u = parse(uri, opts)
151
149
  u = embedded(u)
152
150
 
153
151
  if q = u.query_values(:notation => :flat_array)
@@ -181,12 +179,34 @@ module PostRank
181
179
  uri
182
180
  end
183
181
 
184
- def parse(uri)
182
+ def parse(uri, opts = {})
185
183
  return uri if uri.is_a? Addressable::URI
186
184
 
187
- uri = uri.index(URIREGEX[:protocol]) == 0 ? uri : "http://#{uri}"
188
- Addressable::URI.parse(uri).normalize
185
+ uri = Addressable::URI.parse(uri)
186
+
187
+ unless uri.host
188
+ if uri.scheme
189
+ # With no host and scheme yes, the parser exploded
190
+ return parse("http://#{uri}", opts)
191
+ end
192
+
193
+ if opts[:host]
194
+ uri.host = opts[:host]
195
+ else
196
+ parts = uri.path.to_s.split(/[\/:]/)
197
+ if parts.first =~ URIREGEX[:valid_domain]
198
+ host = parts.shift
199
+ uri.path = '/' + parts.join('/')
200
+ uri.host = host
201
+ end
202
+ end
203
+ end
204
+
205
+ uri.scheme = 'http' if uri.host && !uri.scheme
206
+
207
+ uri.normalize
189
208
  end
190
209
 
191
210
  end
192
- end
211
+ end
212
+
@@ -1,5 +1,5 @@
1
1
  module PostRank
2
2
  module URI
3
- VERSION = "1.0.12"
3
+ VERSION = "1.0.13"
4
4
  end
5
5
  end
@@ -293,7 +293,7 @@ describe PostRank::URI do
293
293
  "ExampLe.com:3000" => "example.com",
294
294
  "http://alex.pages.example.COM" => "example.com",
295
295
  "http://www.example.ag.it/2011/04/01/blah" => "example.ag.it",
296
- "ftp://www.example.com/2011/04/01/blah" => nil,
296
+ "ftp://www.example.com/2011/04/01/blah" => 'example.com',
297
297
  "http://com" => nil,
298
298
  "http://alex.pages.examplecom" => nil,
299
299
  "example" => nil,
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: postrank-uri
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.12
5
+ version: 1.0.13
6
6
  platform: ruby
7
7
  authors:
8
8
  - Ilya Grigorik
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-05-06 00:00:00 -04:00
13
+ date: 2011-05-10 00:00:00 -04:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency