postrank-uri 1.0.13 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,6 +18,22 @@ module Addressable
18
18
  dom = dp.public_suffix
19
19
  dom = dp.domain.downcase + "." + dom unless dp.domain.empty?
20
20
  end
21
+
22
+ def normalized_query
23
+ @normalized_query ||= (begin
24
+ if self.query && self.query.strip != ''
25
+ (self.query.strip.split("&", -1).map do |pair|
26
+ Addressable::URI.normalize_component(
27
+ pair,
28
+ Addressable::URI::CharacterClasses::QUERY.sub("\\&", "")
29
+ )
30
+ end).join("&")
31
+ else
32
+ nil
33
+ end
34
+ end)
35
+ end
36
+
21
37
  end
22
38
  end
23
39
 
@@ -33,7 +49,7 @@ module PostRank
33
49
  URIREGEX = {}
34
50
  URIREGEX[:protocol] = /https?:\/\//i
35
51
  URIREGEX[:valid_preceding_chars] = /(?:|\.|[^-\/"':!=A-Z0-9_@@]|^|\:)/i
36
- URIREGEX[:valid_domain] = /(?:[^[:punct:]\s][\.-](?=[^[:punct:]\s])|[^[:punct:]\s]){1,}\.[a-z]{2,}(?::[0-9]+)?/i
52
+ URIREGEX[:valid_domain] = /\b(?:[a-z0-9-]{1,63}\.){1,}[a-z]{2,63}(?::[0-9]+)?/i
37
53
  URIREGEX[:valid_general_url_path_chars] = /[a-z0-9!\*';:=\+\,\$\/%#\[\]\-_~]/i
38
54
 
39
55
  # Allow URL paths to contain balanced parens
@@ -203,8 +219,7 @@ module PostRank
203
219
  end
204
220
 
205
221
  uri.scheme = 'http' if uri.host && !uri.scheme
206
-
207
- uri.normalize
222
+ uri.normalize!
208
223
  end
209
224
 
210
225
  end
@@ -1,5 +1,5 @@
1
1
  module PostRank
2
2
  module URI
3
- VERSION = "1.0.13"
3
+ VERSION = "1.0.14"
4
4
  end
5
5
  end
@@ -18,6 +18,7 @@ Gem::Specification.new do |s|
18
18
  s.add_dependency "domainatrix"
19
19
  s.add_dependency "nokogiri"
20
20
  s.add_development_dependency "rspec"
21
+ #s.add_development_dependency "idn" # test with idn
21
22
 
22
23
  s.files = `git ls-files`.split("\n")
23
24
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -243,6 +243,11 @@ describe PostRank::URI do
243
243
  u.should include('http://abc.co/')
244
244
  end
245
245
 
246
+ it "should pickup urls inside tags" do
247
+ u = e("<a href='http://bit.ly/3fds3'>abc.com</a>")
248
+ u.should include('http://abc.com/')
249
+ end
250
+
246
251
  context "multibyte characters" do
247
252
  it "should stop extracting URLs at the full-width CJK space character" do
248
253
  e("http://www.youtube.com/watch?v=w_j4Lda25jA  とんかつ定食").should == ["http://www.youtube.com/watch?v=w_j4Lda25jA"]
@@ -298,7 +303,8 @@ describe PostRank::URI do
298
303
  "http://alex.pages.examplecom" => nil,
299
304
  "example" => nil,
300
305
  "http://127.0.0.1" => nil,
301
- "localhost" => nil
306
+ "localhost" => nil,
307
+ "hello-there.com/you" => "hello-there.com"
302
308
  }
303
309
 
304
310
  url_list.each_pair do |url, expected_result|
@@ -309,4 +315,10 @@ describe PostRank::URI do
309
315
  end
310
316
  end
311
317
  end
318
+
319
+ context "parse" do
320
+ it 'should not fail on large host-part look-alikes' do
321
+ PostRank::URI.parse('a'*64+'.ca').host.should == nil
322
+ end
323
+ end
312
324
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: postrank-uri
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 1.0.13
5
+ version: 1.0.14
6
6
  platform: ruby
7
7
  authors:
8
8
  - Ilya Grigorik
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-05-10 00:00:00 -04:00
13
+ date: 2011-05-18 00:00:00 -04:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency