postrank-uri 1.0.12 → 1.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/postrank-uri.rb CHANGED
@@ -102,11 +102,9 @@ module PostRank
102
102
  urls = []
103
103
  Nokogiri.HTML(text).search('a').each do |a|
104
104
  begin
105
- url = clean(a.attr('href'), :raw => true)
106
- if url.host.empty?
107
- next if host.nil?
108
- url.host = host
109
- end
105
+ url = clean(a.attr('href'), :raw => true, :host => host)
106
+
107
+ next unless url.absolute?
110
108
 
111
109
  urls.push [url.to_s, a.text]
112
110
  rescue
@@ -129,7 +127,7 @@ module PostRank
129
127
  end
130
128
 
131
129
  def clean(uri, opts = {})
132
- uri = normalize(c18n(unescape(uri)))
130
+ uri = normalize(c18n(unescape(uri), opts))
133
131
  opts[:raw] ? uri : uri.to_s
134
132
  end
135
133
 
@@ -137,8 +135,8 @@ module PostRank
137
135
  Digest::MD5.hexdigest(opts[:clean] == true ? clean(uri) : uri)
138
136
  end
139
137
 
140
- def normalize(uri)
141
- u = parse(uri)
138
+ def normalize(uri, opts = {})
139
+ u = parse(uri, opts)
142
140
  u.path = u.path.squeeze('/')
143
141
  u.path = u.path.chomp('/') if u.path.size != 1
144
142
  u.query = nil if u.query && u.query.empty?
@@ -146,8 +144,8 @@ module PostRank
146
144
  u
147
145
  end
148
146
 
149
- def c18n(uri)
150
- u = parse(uri)
147
+ def c18n(uri, opts = {})
148
+ u = parse(uri, opts)
151
149
  u = embedded(u)
152
150
 
153
151
  if q = u.query_values(:notation => :flat_array)
@@ -181,12 +179,34 @@ module PostRank
181
179
  uri
182
180
  end
183
181
 
184
- def parse(uri)
182
+ def parse(uri, opts = {})
185
183
  return uri if uri.is_a? Addressable::URI
186
184
 
187
- uri = uri.index(URIREGEX[:protocol]) == 0 ? uri : "http://#{uri}"
188
- Addressable::URI.parse(uri).normalize
185
+ uri = Addressable::URI.parse(uri)
186
+
187
+ unless uri.host
188
+ if uri.scheme
189
+ # With no host and scheme yes, the parser exploded
190
+ return parse("http://#{uri}", opts)
191
+ end
192
+
193
+ if opts[:host]
194
+ uri.host = opts[:host]
195
+ else
196
+ parts = uri.path.to_s.split(/[\/:]/)
197
+ if parts.first =~ URIREGEX[:valid_domain]
198
+ host = parts.shift
199
+ uri.path = '/' + parts.join('/')
200
+ uri.host = host
201
+ end
202
+ end
203
+ end
204
+
205
+ uri.scheme = 'http' if uri.host && !uri.scheme
206
+
207
+ uri.normalize
189
208
  end
190
209
 
191
210
  end
192
- end
211
+ end
212
+
@@ -1,5 +1,5 @@
1
1
  module PostRank
2
2
  module URI
3
- VERSION = "1.0.12"
3
+ VERSION = "1.0.13"
4
4
  end
5
5
  end
@@ -293,7 +293,7 @@ describe PostRank::URI do
293
293
  "ExampLe.com:3000" => "example.com",
294
294
  "http://alex.pages.example.COM" => "example.com",
295
295
  "http://www.example.ag.it/2011/04/01/blah" => "example.ag.it",
296
- "ftp://www.example.com/2011/04/01/blah" => nil,
296
+ "ftp://www.example.com/2011/04/01/blah" => 'example.com',
297
297
  "http://com" => nil,
298
298
  "http://alex.pages.examplecom" => nil,
299
299
  "example" => nil,
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: postrank-uri
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.12
5
+ version: 1.0.13
6
6
  platform: ruby
7
7
  authors:
8
8
  - Ilya Grigorik
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-05-06 00:00:00 -04:00
13
+ date: 2011-05-10 00:00:00 -04:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency