postrank-uri 1.0.18 → 1.0.20

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1f9a235d0c2287732278b672997027c7d4093d0f
4
- data.tar.gz: 8c91e4b1c787c43e6ea2ccc51593be3809746fa7
3
+ metadata.gz: 2a453df060f3bb0d7ea04c7031d17f9e2039e8fc
4
+ data.tar.gz: fc665791c45e60179c706e9e0e2b3c8c50a58f73
5
5
  SHA512:
6
- metadata.gz: c702922f05473c762d7223777eb48641db8c4b0e156539c729433f3aa9174b010bce707801ee8c7ec310e4181edb3402c77131d3a66b0bdc02e99f87c1d9982a
7
- data.tar.gz: 06040dead1c6348febb64211e2787216c287c87baa09555947e03a4bea09b7293ed33faed08851e28aaa2ff08389d121a48746cc6a6f3af6c7e023f75db1917a
6
+ metadata.gz: 4c6eec4d6e64e4c400d1ba9e2508a6bca95008452d3e64f22511b56236892a12afd582a7d8be28b5044e0e03f709459f5bba238359ac6408d42221912e8970a3
7
+ data.tar.gz: 7816c251f9ba449f2f3ea3c8e8e4fb1f80c2e76f8fd004c2c73021b3e47fc7cc1068467f90e17986f041ea948c375b24ce7248a837d8f0d13dedc1f0c4773ed1
data/.gitignore CHANGED
@@ -1,2 +1,3 @@
1
1
  pkg
2
2
  Gemfile.lock
3
+ /gemfiles
@@ -0,0 +1,19 @@
1
+ appraise "nokogiri-1.7" do
2
+ gem "nokogiri", "~> 1.7.0"
3
+ end
4
+
5
+ appraise "nokogiri-1.6" do
6
+ gem "nokogiri", "~> 1.6.1"
7
+ end
8
+
9
+ appraise "addressable-2.3" do
10
+ gem "addressable", "~> 2.3.0"
11
+ end
12
+
13
+ appraise "addressable-2.4" do
14
+ gem "addressable", "~> 2.4.0"
15
+ end
16
+
17
+ appraise "addressable-2.5" do
18
+ gem "addressable", "~> 2.5.0"
19
+ end
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2011 Ilya Grigorik
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md CHANGED
@@ -37,4 +37,29 @@ In a nutshell, we need to make sure that creative cases like the ones below all
37
37
 
38
38
  As part of URI canonicalization the library will remove common tracking parameters from Google Analytics and several other providers. Beyond that, host-specific rules are also applied. For example, nytimes.com likes to add a 'partner' query parameter for tracking purposes, but which has no effect on the content - hence, it is removed from the URI. For full list, see the c14n.yml file.
39
39
 
40
- Detecting "duplicate URLs" is a hard problem to solve (expensive in all senses), instead we are compiling a manually assembled database. If you find cases which are missing, please do report them, or send us a pull request!
40
+ Detecting "duplicate URLs" is a hard problem to solve (expensive in all senses), instead we are compiling a manually assembled database. If you find cases which are missing, please do report them, or send us a pull request!
41
+
42
+ ## Development
43
+
44
+ ### Setup
45
+
46
+ ```
47
+ bundle install
48
+ ```
49
+
50
+ ### Running tests
51
+
52
+ ```
53
+ bundle exec rake
54
+ ```
55
+
56
+ ### Running dependency appraisals
57
+
58
+ To verify `postrake-uri` works with different versions of its runtime dependencies you can run:
59
+
60
+ ```
61
+ bundle exec appraisal install
62
+ bundle exec rake appraisal
63
+ ```
64
+
65
+ This will execute the test suite with different versions of the dependencies.
data/Rakefile CHANGED
@@ -3,3 +3,6 @@ Bundler::GemHelper.install_tasks
3
3
 
4
4
  require 'rspec/core/rake_task'
5
5
  RSpec::Core::RakeTask.new(:spec)
6
+ task :default => :spec
7
+
8
+ require 'appraisal'
@@ -1,4 +1,4 @@
1
-
1
+ # encoding: utf-8
2
2
  require 'addressable/uri'
3
3
  require 'digest/md5'
4
4
  require 'nokogiri'
@@ -9,7 +9,7 @@ module Addressable
9
9
  class URI
10
10
  def domain
11
11
  host = self.host
12
- (host && PublicSuffix.valid?(host)) ? PublicSuffix.parse(host).domain : nil
12
+ (host && PublicSuffix.valid?(host, default_rule: nil)) ? PublicSuffix.parse(host).domain : nil
13
13
  end
14
14
 
15
15
  def normalized_query
@@ -86,8 +86,9 @@ module PostRank
86
86
  )
87
87
  }iox;
88
88
 
89
+ URIREGEX[:reserved_characters] = /%3F|%26/i
89
90
  URIREGEX[:escape] = /([^ a-zA-Z0-9_.-]+)/x
90
- URIREGEX[:unescape] = /((?:%[0-9a-fA-F]{2})+)/x
91
+ URIREGEX[:unescape] = /(%[0-9a-fA-F]{2})/x
91
92
  URIREGEX.each_pair{|k,v| v.freeze }
92
93
 
93
94
  module_function
@@ -97,7 +98,7 @@ module PostRank
97
98
  urls = []
98
99
  text.to_s.scan(URIREGEX[:valid_url]) do |all, before, url, protocol, domain, path, query|
99
100
  # Only extract the URL if the domain is valid
100
- if PublicSuffix.valid?(domain)
101
+ if PublicSuffix.valid?(domain, default_rule: nil)
101
102
  url = clean(url)
102
103
  urls.push url.to_s
103
104
  end
@@ -131,8 +132,12 @@ module PostRank
131
132
  def unescape(uri)
132
133
  u = parse(uri)
133
134
  u.query = u.query.tr('+', ' ') if u.query
134
- u.to_s.gsub(URIREGEX[:unescape]) do
135
- [$1.delete('%')].pack('H*')
135
+ u.to_s.gsub(URIREGEX[:unescape]) do |encoded|
136
+ if encoded.match? URIREGEX[:reserved_characters]
137
+ encoded
138
+ else
139
+ [encoded.delete('%')].pack('H*')
140
+ end
136
141
  end
137
142
  end
138
143
 
@@ -225,7 +230,7 @@ module PostRank
225
230
  cleaned_uri = clean(uri, :raw => true)
226
231
 
227
232
  if host = cleaned_uri.host
228
- is_valid = PublicSuffix.valid?(Addressable::IDNA.to_unicode(host))
233
+ is_valid = PublicSuffix.valid?(Addressable::IDNA.to_unicode(host), default_rule: nil)
229
234
  end
230
235
 
231
236
  is_valid
@@ -1,5 +1,5 @@
1
1
  module PostRank
2
2
  module URI
3
- VERSION = "1.0.18"
3
+ VERSION = "1.0.20"
4
4
  end
5
5
  end
@@ -11,14 +11,17 @@ Gem::Specification.new do |s|
11
11
  s.homepage = "http://github.com/postrank-labs/postrank-uri"
12
12
  s.summary = "URI normalization, c14n, escaping, and extraction"
13
13
  s.description = s.summary
14
+ s.license = 'MIT'
14
15
 
15
16
  s.rubyforge_project = "postrank-uri"
16
17
 
17
- s.add_dependency "addressable", "~> 2.3.0"
18
- s.add_dependency "public_suffix", "~> 1.1.3"
19
- s.add_dependency "nokogiri", "~> 1.6.1"
18
+ s.add_dependency "addressable", ">= 2.3.0", "< 2.6"
19
+ s.add_dependency "public_suffix", ">= 2.0.0", "< 2.1"
20
+ s.add_dependency "nokogiri", ">= 1.6.1", "< 1.8"
20
21
 
22
+ s.add_development_dependency "rake"
21
23
  s.add_development_dependency "rspec"
24
+ s.add_development_dependency "appraisal", ">= 2.0.0", "< 3.0"
22
25
 
23
26
  s.files = `git ls-files`.split("\n")
24
27
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -3,103 +3,101 @@
3
3
  require 'helper'
4
4
 
5
5
  describe PostRank::URI do
6
-
7
- let(:igvita) { 'http://igvita.com/' }
8
-
9
6
  context "escaping" do
10
- it "should escape PostRank::URI string" do
11
- PostRank::URI.escape('id=1').should == 'id%3D1'
7
+ it "escapes PostRank::URI string" do
8
+ expect(PostRank::URI.escape('id=1')).to eq('id%3D1')
12
9
  end
13
10
 
14
- it "should escape spaces as %20's" do
15
- PostRank::URI.escape('id= 1').should match('%20')
11
+ it "escapes spaces as %20's" do
12
+ expect(PostRank::URI.escape('id= 1')).to match('%20')
16
13
  end
17
14
  end
18
15
 
19
16
  context "unescape" do
20
- it "should unescape PostRank::URI" do
21
- PostRank::URI.unescape(PostRank::URI.escape('id=1')).should == 'id=1'
17
+ it "unescapes PostRank::URI" do
18
+ expect(PostRank::URI.unescape(PostRank::URI.escape('id=1'))).to eq('id=1')
22
19
  end
23
20
 
24
- it "should unescape PostRank::URI with spaces" do
25
- PostRank::URI.unescape(PostRank::URI.escape('id= 1')).should == 'id= 1'
21
+ it "unescapes PostRank::URI with spaces" do
22
+ expect(PostRank::URI.unescape(PostRank::URI.escape('id= 1'))).to eq('id= 1')
26
23
  end
27
24
 
28
25
  context "accept improperly escaped PostRank::URI strings" do
29
26
  # See http://tools.ietf.org/html/rfc3986#section-2.3
30
27
 
31
- it "should unescape PostRank::URI with spaces encoded as '+'" do
32
- PostRank::URI.unescape('?id=+1').should == '?id= 1'
28
+ it "unescapes PostRank::URI with spaces encoded as '+'" do
29
+ expect(PostRank::URI.unescape('?id=+1')).to eq('?id= 1')
33
30
  end
34
31
 
35
- it "should unescape PostRank::URI with spaces encoded as '+'" do
36
- PostRank::URI.unescape('?id%3D+1').should == '?id= 1'
32
+ it "unescapes PostRank::URI with spaces encoded as '+'" do
33
+ expect(PostRank::URI.unescape('?id%3D+1')).to eq('?id= 1')
37
34
  end
38
35
 
39
- it "should unescape PostRank::URI with spaces encoded as %20" do
40
- PostRank::URI.unescape('?id=%201').should == '?id= 1'
36
+ it "unescapes PostRank::URI with spaces encoded as %20" do
37
+ expect(PostRank::URI.unescape('?id=%201')).to eq('?id= 1')
41
38
  end
42
39
 
43
- it "should not unescape '+' to spaces in paths" do
44
- PostRank::URI.unescape('/foo+bar?id=foo+bar').should == '/foo+bar?id=foo bar'
40
+ it "does not unescape '+' to spaces in paths" do
41
+ expect(PostRank::URI.unescape('/foo+bar?id=foo+bar')).to eq('/foo+bar?id=foo bar')
45
42
  end
46
43
  end
47
44
 
48
45
  end
49
46
 
50
47
  context "normalize" do
48
+ let(:igvita) { 'http://igvita.com/' }
49
+
51
50
  def n(uri)
52
51
  PostRank::URI.normalize(uri).to_s
53
52
  end
54
53
 
55
- it "should normalize paths in PostRank::URIs" do
56
- n('http://igvita.com/').should == igvita
57
- n('http://igvita.com').to_s.should == igvita
58
- n('http://igvita.com///').should == igvita
54
+ it "normalizes paths in PostRank::URIs" do
55
+ expect(n('http://igvita.com/')).to eq(igvita)
56
+ expect(n('http://igvita.com').to_s).to eq(igvita)
57
+ expect(n('http://igvita.com///')).to eq(igvita)
59
58
 
60
- n('http://igvita.com/../').should == igvita
61
- n('http://igvita.com/a/b/../../').should == igvita
62
- n('http://igvita.com/a/b/../..').should == igvita
59
+ expect(n('http://igvita.com/../')).to eq(igvita)
60
+ expect(n('http://igvita.com/a/b/../../')).to eq(igvita)
61
+ expect(n('http://igvita.com/a/b/../..')).to eq(igvita)
63
62
  end
64
63
 
65
- it "should normalize query strings in PostRank::URIs" do
66
- n('http://igvita.com/?').should == igvita
67
- n('http://igvita.com?').should == igvita
68
- n('http://igvita.com/a/../?').should == igvita
64
+ it "normalizes query strings in PostRank::URIs" do
65
+ expect(n('http://igvita.com/?')).to eq(igvita)
66
+ expect(n('http://igvita.com?')).to eq(igvita)
67
+ expect(n('http://igvita.com/a/../?')).to eq(igvita)
69
68
  end
70
69
 
71
- it "should normalize anchors in PostRank::URIs" do
72
- n('http://igvita.com#test').should == igvita
73
- n('http://igvita.com#test#test').should == igvita
74
- n('http://igvita.com/a/../?#test').should == igvita
70
+ it "normalizes anchors in PostRank::URIs" do
71
+ expect(n('http://igvita.com#test')).to eq(igvita)
72
+ expect(n('http://igvita.com#test#test')).to eq(igvita)
73
+ expect(n('http://igvita.com/a/../?#test')).to eq(igvita)
75
74
  end
76
75
 
77
- it "should clean whitespace in PostRank::URIs" do
78
- n('http://igvita.com/a/../? ').should == igvita
79
- n('http://igvita.com/a/../? #test').should == igvita
80
- n('http://igvita.com/ /../').should == igvita
76
+ it "cleans whitespace in PostRank::URIs" do
77
+ expect(n('http://igvita.com/a/../? ')).to eq(igvita)
78
+ expect(n('http://igvita.com/a/../? #test')).to eq(igvita)
79
+ expect(n('http://igvita.com/ /../')).to eq(igvita)
81
80
  end
82
81
 
83
- it "should default to http scheme if missing" do
84
- n('igvita.com').should == igvita
85
- n('https://test.com/').to_s.should == 'https://test.com/'
82
+ it "defaults to http scheme if missing" do
83
+ expect(n('igvita.com')).to eq(igvita)
84
+ expect(n('https://test.com/').to_s).to eq('https://test.com/')
86
85
  end
87
86
 
88
- it "should downcase hostname" do
89
- n('IGVITA.COM').should == igvita
90
- n('IGVITA.COM/ABC').should == (igvita + "ABC")
87
+ it "downcases the hostname" do
88
+ expect(n('IGVITA.COM')).to eq(igvita)
89
+ expect(n('IGVITA.COM/ABC')).to eq(igvita + "ABC")
91
90
  end
92
91
 
93
- it "should remove trailing slash on paths" do
94
- n('http://igvita.com/').should == 'http://igvita.com/'
92
+ it "removes trailing slash on paths" do
93
+ expect(n('http://igvita.com/')).to eq('http://igvita.com/')
95
94
 
96
- n('http://igvita.com/a').should == 'http://igvita.com/a'
97
- n('http://igvita.com/a/').should == 'http://igvita.com/a'
95
+ expect(n('http://igvita.com/a')).to eq('http://igvita.com/a')
96
+ expect(n('http://igvita.com/a/')).to eq('http://igvita.com/a')
98
97
 
99
- n('http://igvita.com/a/b').should == 'http://igvita.com/a/b'
100
- n('http://igvita.com/a/b/').should == 'http://igvita.com/a/b'
98
+ expect(n('http://igvita.com/a/b')).to eq('http://igvita.com/a/b')
99
+ expect(n('http://igvita.com/a/b/')).to eq('http://igvita.com/a/b')
101
100
  end
102
-
103
101
  end
104
102
 
105
103
  context "canonicalization" do
@@ -109,60 +107,60 @@ describe PostRank::URI do
109
107
 
110
108
  context "query parameters" do
111
109
  it "should handle nester parameters" do
112
- c('igvita.com/?id=a&utm_source=a').should == 'http://igvita.com/?id=a'
110
+ expect(c('igvita.com/?id=a&utm_source=a')).to eq('http://igvita.com/?id=a')
113
111
  end
114
112
 
115
- it "should preserve order of parameters" do
113
+ it "preserves the order of parameters" do
116
114
  url = 'http://a.com/?'+('a'..'z').to_a.shuffle.map {|e| "#{e}=#{e}"}.join("&")
117
- c(url).should == url
115
+ expect(c(url)).to eq(url)
118
116
  end
119
117
 
120
- it "should remove Google Analytics parameters" do
121
- c('igvita.com/?id=a&utm_source=a').should == 'http://igvita.com/?id=a'
122
- c('igvita.com/?id=a&utm_source=a&utm_valid').should == 'http://igvita.com/?id=a&utm_valid'
118
+ it "removes Google Analytics parameters" do
119
+ expect(c('igvita.com/?id=a&utm_source=a')).to eq('http://igvita.com/?id=a')
120
+ expect(c('igvita.com/?id=a&utm_source=a&utm_valid')).to eq('http://igvita.com/?id=a&utm_valid')
123
121
  end
124
122
 
125
- it "should remove awesm/sms parameters" do
126
- c('igvita.com/?id=a&utm_source=a&awesm=b').should == 'http://igvita.com/?id=a'
127
- c('igvita.com/?id=a&sms_ss=a').should == 'http://igvita.com/?id=a'
123
+ it "removes awesm/sms parameters" do
124
+ expect(c('igvita.com/?id=a&utm_source=a&awesm=b')).to eq('http://igvita.com/?id=a')
125
+ expect(c('igvita.com/?id=a&sms_ss=a')).to eq('http://igvita.com/?id=a')
128
126
  end
129
127
 
130
- it "should remove PHPSESSID parameter" do
131
- c('http://www.nachi.org/forum?PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd').should == 'http://www.nachi.org/forum?'
132
- c('http://www.nachi.org/forum/?PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd').should == 'http://www.nachi.org/forum/?'
133
- c('http://www.nachi.org/forum?id=123&PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd').should == 'http://www.nachi.org/forum?id=123'
128
+ it "removes PHPSESSID parameter" do
129
+ expect(c('http://www.nachi.org/forum?PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd')).to eq('http://www.nachi.org/forum?')
130
+ expect(c('http://www.nachi.org/forum/?PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd')).to eq('http://www.nachi.org/forum/?')
131
+ expect(c('http://www.nachi.org/forum?id=123&PHPSESSID=9ee2fb10b7274ef2b15d1d4006b8c8dd')).to eq('http://www.nachi.org/forum?id=123')
134
132
  end
135
133
  end
136
134
 
137
135
  context "hashbang" do
138
- it "should rewrite twitter links to crawlable versions" do
139
- c('http://twitter.com/#!/igrigorik').should == 'http://twitter.com/igrigorik'
140
- c('http://twitter.com/#!/a/statuses/1').should == 'http://twitter.com/a/statuses/1'
141
- c('http://nontwitter.com/#!/a/statuses/1').should == 'http://nontwitter.com/#!/a/statuses/1'
136
+ it "rewrites twitter links to crawlable versions" do
137
+ expect(c('http://twitter.com/#!/igrigorik')).to eq('http://twitter.com/igrigorik')
138
+ expect(c('http://twitter.com/#!/a/statuses/1')).to eq('http://twitter.com/a/statuses/1')
139
+ expect(c('http://nontwitter.com/#!/a/statuses/1')).to eq('http://nontwitter.com/#!/a/statuses/1')
142
140
  end
143
141
  end
144
142
 
145
143
  context "tumblr" do
146
- it "should strip slug" do
147
- c('http://test.tumblr.com/post/4533459403/some-text').should == 'http://test.tumblr.com/post/4533459403/'
148
- c('http://tumblr.com/xjl2evo3hh').should == 'http://tumblr.com/xjl2evo3hh'
144
+ it "strips the slug" do
145
+ expect(c('http://test.tumblr.com/post/4533459403/some-text')).to eq('http://test.tumblr.com/post/4533459403/')
146
+ expect(c('http://tumblr.com/xjl2evo3hh')).to eq('http://tumblr.com/xjl2evo3hh')
149
147
  end
150
148
  end
151
149
 
152
150
  context "embedded links" do
153
- it "should extract embedded redirects from Google News" do
151
+ it "extracts embedded redirects from Google News" do
154
152
  u = c('http://news.google.com/news/url?sa=t&fd=R&&url=http://www.ctv.ca/CTVNews/Politics/20110111/')
155
- u.should == 'http://www.ctv.ca/CTVNews/Politics/20110111'
153
+ expect(u).to eq('http://www.ctv.ca/CTVNews/Politics/20110111')
156
154
  end
157
155
 
158
- it "should extract embedded redirects from xfruits.com" do
156
+ it "extracts embedded redirects from xfruits.com" do
159
157
  u = c('http://xfruits.com/MrGroar/?url=http%3A%2F%2Faap.lesroyaumes.com%2Fdepeches%2Fdepeche351820908.html')
160
- u.should == 'http://aap.lesroyaumes.com/depeches/depeche351820908.html'
158
+ expect(u).to eq('http://aap.lesroyaumes.com/depeches/depeche351820908.html')
161
159
  end
162
160
 
163
- it "should extract embedded redirects from MySpace" do
161
+ it "extracts embedded redirects from MySpace" do
164
162
  u = c('http://www.myspace.com/Modules/PostTo/Pages/?u=http%3A%2F%2Fghanaian-chronicle.com%2Fnews%2Fother-news%2Fcanadian-high-commissioner-urges-media%2F&t=Canadian%20High%20Commissioner%20urges%20media')
165
- u.should == 'http://ghanaian-chronicle.com/news/other-news/canadian-high-commissioner-urges-media'
163
+ expect(u).to eq('http://ghanaian-chronicle.com/news/other-news/canadian-high-commissioner-urges-media')
166
164
  end
167
165
  end
168
166
  end
@@ -172,25 +170,42 @@ describe PostRank::URI do
172
170
  PostRank::URI.clean(uri)
173
171
  end
174
172
 
175
- it "should unescape, c14n and normalize" do
176
- c('http://igvita.com/?id=1').should == 'http://igvita.com/?id=1'
177
- c('igvita.com/?id=1').should == 'http://igvita.com/?id=1'
173
+ it "unescapes, canonicalizes and normalizes" do
174
+ expect(c('http://igvita.com/?id=1')).to eq('http://igvita.com/?id=1')
175
+ expect(c('igvita.com/?id=1')).to eq('http://igvita.com/?id=1')
178
176
 
179
- c('http://igvita.com/?id= 1').should == 'http://igvita.com/?id=%201'
180
- c('http://igvita.com/?id=+1').should == 'http://igvita.com/?id=%201'
181
- c('http://igvita.com/?id%3D%201').should == 'http://igvita.com/?id=%201'
177
+ expect(c('http://igvita.com/?id= 1')).to eq('http://igvita.com/?id=%201')
178
+ expect(c('http://igvita.com/?id=+1')).to eq('http://igvita.com/?id=%201')
179
+ expect(c('http://igvita.com/?id%3D%201')).to eq('http://igvita.com/?id=%201')
182
180
 
183
- c('igvita.com/a/..?id=1&utm_source=a&awesm=b#c').should == 'http://igvita.com/?id=1'
181
+ expect(c('igvita.com/a/..?id=1&utm_source=a&awesm=b#c')).to eq('http://igvita.com/?id=1')
184
182
 
185
- c('igvita.com?id=<>').should == 'http://igvita.com/?id=%3C%3E'
186
- c('igvita.com?id="').should == 'http://igvita.com/?id=%22'
183
+ expect(c('igvita.com?id=<>')).to eq('http://igvita.com/?id=%3C%3E')
184
+ expect(c('igvita.com?id="')).to eq('http://igvita.com/?id=%22')
187
185
 
188
- c('test.tumblr.com/post/23223/text-stub').should == 'http://test.tumblr.com/post/23223'
186
+ expect(c('test.tumblr.com/post/23223/text-stub')).to eq('http://test.tumblr.com/post/23223')
189
187
  end
190
188
 
191
- it "should clean host specific parameters" do
189
+ it "cleans host specific parameters" do
192
190
  YAML.load_file('spec/c14n_hosts.yml').each do |orig, clean|
193
- c(orig).should == clean
191
+ expect(c(orig)).to eq(clean)
192
+ end
193
+ end
194
+
195
+ context "reserved characters" do
196
+ it "preserves encoded question marks" do
197
+ expect(c('http://en.wikipedia.org/wiki/Whose_Line_Is_It_Anyway%3F_%28U.S._TV_series%29')).
198
+ to eq('http://en.wikipedia.org/wiki/Whose_Line_Is_It_Anyway%3F_(U.S._TV_series)')
199
+ end
200
+
201
+ it "preserves encoded ampersands" do
202
+ expect(c('http://example.com/?foo=BAR%26BAZ')).
203
+ to eq('http://example.com/?foo=BAR%26BAZ')
204
+ end
205
+
206
+ it "preserves consecutive reserved characters" do
207
+ expect(c('http://example.com/so-quizical%3F%3F%3F?foo=bar')).
208
+ to eq('http://example.com/so-quizical%3F%3F%3F?foo=bar')
194
209
  end
195
210
  end
196
211
  end
@@ -200,20 +215,20 @@ describe PostRank::URI do
200
215
  PostRank::URI.hash(uri, opts)
201
216
  end
202
217
 
203
- it "should compute the MD5 hash without cleaning the URI" do
218
+ it "computes the MD5 hash without cleaning the URI" do
204
219
  hash = '55fae8910d312b7878a3201ed653b881'
205
220
 
206
- h('http://everburning.com/feed/post/1').should == hash
207
- h('everburning.com/feed/post/1').should_not == hash
221
+ expect(h('http://everburning.com/feed/post/1')).to eq(hash)
222
+ expect(h('everburning.com/feed/post/1')).not_to eq(hash)
208
223
  end
209
224
 
210
- it "should normalize the URI if requested and compute MD5 hash" do
225
+ it "normalizes the URI if requested and compute MD5 hash" do
211
226
  hash = '55fae8910d312b7878a3201ed653b881'
212
227
 
213
- h('http://EverBurning.Com/feed/post/1', :clean => true).should == hash
214
- h('Everburning.com/feed/post/1', :clean => true).should == hash
215
- h('everburning.com/feed/post/1', :clean => true).should == hash
216
- h('everburning.com/feed/post/1/', :clean => true).should == hash
228
+ expect(h('http://EverBurning.Com/feed/post/1', :clean => true)).to eq(hash)
229
+ expect(h('Everburning.com/feed/post/1', :clean => true)).to eq(hash)
230
+ expect(h('everburning.com/feed/post/1', :clean => true)).to eq(hash)
231
+ expect(h('everburning.com/feed/post/1/', :clean => true)).to eq(hash)
217
232
  end
218
233
  end
219
234
 
@@ -223,81 +238,81 @@ describe PostRank::URI do
223
238
  end
224
239
 
225
240
  context "TLDs" do
226
- it "should not pick up bad grammar as a domain name and think it has a link" do
227
- e("yah.lets").should be_empty
241
+ it "does not pick up bad grammar as a domain name and think it has a link" do
242
+ expect(e("yah.lets")).to be_empty
228
243
  end
229
244
 
230
- it "should not pickup bad TLDS" do
231
- e('stuff.zz a.b.c d.zq').should be_empty
245
+ it "does not pickup bad TLDS" do
246
+ expect(e('stuff.zz a.b.c d.zq')).to be_empty
232
247
  end
233
248
  end
234
249
 
235
- it "should extract twitter links with hashbangs" do
236
- e('test http://twitter.com/#!/igrigorik').should include('http://twitter.com/igrigorik')
250
+ it "extracts twitter links with hashbangs" do
251
+ expect(e('test http://twitter.com/#!/igrigorik')).to include('http://twitter.com/igrigorik')
237
252
  end
238
253
 
239
- it "should extract mobile twitter links with hashbangs" do
240
- e('test http://mobile.twitter.com/#!/_mm6').should include('http://mobile.twitter.com/_mm6')
254
+ it "extracts mobile twitter links with hashbangs" do
255
+ expect(e('test http://mobile.twitter.com/#!/_mm6')).to include('http://mobile.twitter.com/_mm6')
241
256
  end
242
257
 
243
- it "should handle a URL that comes after text without a space" do
244
- e("text:http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
245
- e("text;http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
246
- e("text.http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
247
- e("text-http://spn.tw/tfnLT").should include("http://spn.tw/tfnLT")
258
+ it "handles a URL that comes after text without a space" do
259
+ expect(e("text:http://spn.tw/tfnLT")).to include("http://spn.tw/tfnLT")
260
+ expect(e("text;http://spn.tw/tfnLT")).to include("http://spn.tw/tfnLT")
261
+ expect(e("text.http://spn.tw/tfnLT")).to include("http://spn.tw/tfnLT")
262
+ expect(e("text-http://spn.tw/tfnLT")).to include("http://spn.tw/tfnLT")
248
263
  end
249
264
 
250
- it "should not pick up anything on or after the first . in the path of a URL with a shortener domain" do
251
- e("http://bit.ly/9cJ2mz......if ur pickin up anythign here, u FAIL.").should == ["http://bit.ly/9cJ2mz"]
265
+ it "does not pick up anything on or after the first . in the path of a URL with a shortener domain" do
266
+ expect(e("http://bit.ly/9cJ2mz......if ur pickin up anythign here, u FAIL.")).to eq(["http://bit.ly/9cJ2mz"])
252
267
  end
253
268
 
254
- it "should pickup urls without protocol" do
269
+ it "picks up urls without protocol" do
255
270
  u = e('abc.com abc.co')
256
- u.should include('http://abc.com/')
257
- u.should include('http://abc.co/')
271
+ expect(u).to include('http://abc.com/')
272
+ expect(u).to include('http://abc.co/')
258
273
  end
259
274
 
260
- it "should pickup urls inside tags" do
275
+ it "picks up urls inside tags" do
261
276
  u = e("<a href='http://bit.ly/3fds3'>abc.com</a>")
262
- u.should include('http://abc.com/')
277
+ expect(u).to include('http://abc.com/')
263
278
  end
264
279
 
265
280
  context "multibyte characters" do
266
- it "should stop extracting URLs at the full-width CJK space character" do
267
- e("http://www.youtube.com/watch?v=w_j4Lda25jA  とんかつ定食").should == ["http://www.youtube.com/watch?v=w_j4Lda25jA"]
281
+ it "stops extracting URLs at the full-width CJK space character" do
282
+ expect(e("http://www.youtube.com/watch?v=w_j4Lda25jA  とんかつ定食")).to eq(["http://www.youtube.com/watch?v=w_j4Lda25jA"])
268
283
  end
269
284
  end
270
285
 
271
286
  end
272
287
 
273
288
  context "href extract" do
274
- it "should extract links from html text" do
289
+ it "extracts links from html text" do
275
290
  g,b = PostRank::URI.extract_href("<a href='google.com'>link to google</a> with text <a href='b.com'>stuff</a>")
276
291
 
277
- g.first.should == 'http://google.com/'
278
- b.first.should == 'http://b.com/'
292
+ expect(g.first).to eq('http://google.com/')
293
+ expect(b.first).to eq('http://b.com/')
279
294
 
280
- g.last.should == 'link to google'
281
- b.last.should == 'stuff'
295
+ expect(g.last).to eq('link to google')
296
+ expect(b.last).to eq('stuff')
282
297
  end
283
298
 
284
- it "should handle empty hrefs" do
285
- lambda do
299
+ it "handles empty hrefs" do
300
+ expect do
286
301
  l = PostRank::URI.extract_href("<a>link to google</a> with text <a href=''>stuff</a>")
287
- l.should be_empty
288
- end.should_not raise_error
302
+ expect(l).to be_empty
303
+ end.not_to raise_error
289
304
  end
290
305
 
291
306
  context "relative paths" do
292
- it "should reject relative paths" do
307
+ it "rejects relative paths" do
293
308
  l = PostRank::URI.extract_href("<a href='/stuff'>link to stuff</a>")
294
- l.should be_empty
309
+ expect(l).to be_empty
295
310
  end
296
311
 
297
- it "should resolve relative paths if host is provided" do
312
+ it "resolves relative paths if host is provided" do
298
313
  i = PostRank::URI.extract_href("<a href='/stuff'>link to stuff</a>", "igvita.com").first
299
- i.first.should == 'http://igvita.com/stuff'
300
- i.last.should == 'link to stuff'
314
+ expect(i.first).to eq('http://igvita.com/stuff')
315
+ expect(i.last).to eq('link to stuff')
301
316
  end
302
317
  end
303
318
 
@@ -322,51 +337,51 @@ describe PostRank::URI do
322
337
  }
323
338
 
324
339
  url_list.each_pair do |url, expected_result|
325
- it "should extract #{expected_result.inspect} from #{url}" do
340
+ it "extracts #{expected_result.inspect} from #{url}" do
326
341
  u = PostRank::URI.clean(url, :raw => true)
327
- u.domain.should == expected_result
342
+ expect(u.domain).to eq(expected_result)
328
343
  end
329
344
  end
330
345
  end
331
346
  end
332
347
 
333
348
  context "parse" do
334
- it 'should not fail on large host-part look-alikes' do
335
- PostRank::URI.parse('a'*64+'.ca').host.should == nil
349
+ it 'does not fail on large host-part look-alikes' do
350
+ expect(PostRank::URI.parse('a'*64+'.ca').host).to eq(nil)
336
351
  end
337
352
 
338
- it 'should not pancake javascript scheme URIs' do
339
- PostRank::URI.parse('javascript:void(0);').scheme.should == 'javascript'
353
+ it 'does not pancake javascript scheme URIs' do
354
+ expect(PostRank::URI.parse('javascript:void(0);').scheme).to eq('javascript')
340
355
  end
341
356
 
342
- it 'should not pancake mailto scheme URIs' do
343
- PostRank::URI.parse('mailto:void(0);').scheme.should == 'mailto'
357
+ it 'does not pancake mailto scheme URIs' do
358
+ expect(PostRank::URI.parse('mailto:void(0);').scheme).to eq('mailto')
344
359
  end
345
360
 
346
- it 'should not pancake xmpp scheme URIs' do
347
- PostRank::URI.parse('xmpp:void(0);').scheme.should == 'xmpp'
361
+ it 'does not pancake xmpp scheme URIs' do
362
+ expect(PostRank::URI.parse('xmpp:void(0);').scheme).to eq('xmpp')
348
363
  end
349
364
  end
350
365
 
351
366
  context 'valid?' do
352
367
  it 'marks incomplete URI string as invalid' do
353
- PostRank::URI.valid?('/path/page.html').should be_false
368
+ expect(PostRank::URI.valid?('/path/page.html')).to be false
354
369
  end
355
370
 
356
371
  it 'marks www.test.c as invalid' do
357
- PostRank::URI.valid?('http://www.test.c').should be_false
372
+ expect(PostRank::URI.valid?('http://www.test.c')).to be false
358
373
  end
359
374
 
360
375
  it 'marks www.test.com as valid' do
361
- PostRank::URI.valid?('http://www.test.com').should be_true
376
+ expect(PostRank::URI.valid?('http://www.test.com')).to be true
362
377
  end
363
378
 
364
379
  it 'marks Unicode domain as valid (NOTE: works only with a scheme)' do
365
- PostRank::URI.valid?('http://президент.рф').should be_true
380
+ expect(PostRank::URI.valid?('http://президент.рф')).to be true
366
381
  end
367
382
 
368
383
  it 'marks punycode domain domain as valid' do
369
- PostRank::URI.valid?('xn--d1abbgf6aiiy.xn--p1ai').should be_true
384
+ expect(PostRank::URI.valid?('xn--d1abbgf6aiiy.xn--p1ai')).to be true
370
385
  end
371
386
  end
372
387
  end
metadata CHANGED
@@ -1,71 +1,123 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: postrank-uri
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.18
4
+ version: 1.0.20
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Grigorik
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-10 00:00:00.000000000 Z
11
+ date: 2017-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: 2.3.0
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '2.6'
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - ~>
27
+ - - ">="
25
28
  - !ruby/object:Gem::Version
26
29
  version: 2.3.0
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '2.6'
27
33
  - !ruby/object:Gem::Dependency
28
34
  name: public_suffix
29
35
  requirement: !ruby/object:Gem::Requirement
30
36
  requirements:
31
- - - ~>
37
+ - - ">="
32
38
  - !ruby/object:Gem::Version
33
- version: 1.1.3
39
+ version: 2.0.0
40
+ - - "<"
41
+ - !ruby/object:Gem::Version
42
+ version: '2.1'
34
43
  type: :runtime
35
44
  prerelease: false
36
45
  version_requirements: !ruby/object:Gem::Requirement
37
46
  requirements:
38
- - - ~>
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: 2.0.0
50
+ - - "<"
39
51
  - !ruby/object:Gem::Version
40
- version: 1.1.3
52
+ version: '2.1'
41
53
  - !ruby/object:Gem::Dependency
42
54
  name: nokogiri
43
55
  requirement: !ruby/object:Gem::Requirement
44
56
  requirements:
45
- - - ~>
57
+ - - ">="
46
58
  - !ruby/object:Gem::Version
47
59
  version: 1.6.1
60
+ - - "<"
61
+ - !ruby/object:Gem::Version
62
+ version: '1.8'
48
63
  type: :runtime
49
64
  prerelease: false
50
65
  version_requirements: !ruby/object:Gem::Requirement
51
66
  requirements:
52
- - - ~>
67
+ - - ">="
53
68
  - !ruby/object:Gem::Version
54
69
  version: 1.6.1
70
+ - - "<"
71
+ - !ruby/object:Gem::Version
72
+ version: '1.8'
73
+ - !ruby/object:Gem::Dependency
74
+ name: rake
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ type: :development
81
+ prerelease: false
82
+ version_requirements: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - ">="
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
55
87
  - !ruby/object:Gem::Dependency
56
88
  name: rspec
57
89
  requirement: !ruby/object:Gem::Requirement
58
90
  requirements:
59
- - - '>='
91
+ - - ">="
60
92
  - !ruby/object:Gem::Version
61
93
  version: '0'
62
94
  type: :development
63
95
  prerelease: false
64
96
  version_requirements: !ruby/object:Gem::Requirement
65
97
  requirements:
66
- - - '>='
98
+ - - ">="
67
99
  - !ruby/object:Gem::Version
68
100
  version: '0'
101
+ - !ruby/object:Gem::Dependency
102
+ name: appraisal
103
+ requirement: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ version: 2.0.0
108
+ - - "<"
109
+ - !ruby/object:Gem::Version
110
+ version: '3.0'
111
+ type: :development
112
+ prerelease: false
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: 2.0.0
118
+ - - "<"
119
+ - !ruby/object:Gem::Version
120
+ version: '3.0'
69
121
  description: URI normalization, c14n, escaping, and extraction
70
122
  email:
71
123
  - ilya@igvita.com
@@ -73,9 +125,11 @@ executables: []
73
125
  extensions: []
74
126
  extra_rdoc_files: []
75
127
  files:
76
- - .gitignore
77
- - .rspec
128
+ - ".gitignore"
129
+ - ".rspec"
130
+ - Appraisals
78
131
  - Gemfile
132
+ - LICENSE
79
133
  - README.md
80
134
  - Rakefile
81
135
  - lib/postrank-uri.rb
@@ -86,7 +140,8 @@ files:
86
140
  - spec/helper.rb
87
141
  - spec/postrank-uri_spec.rb
88
142
  homepage: http://github.com/postrank-labs/postrank-uri
89
- licenses: []
143
+ licenses:
144
+ - MIT
90
145
  metadata: {}
91
146
  post_install_message:
92
147
  rdoc_options: []
@@ -94,17 +149,17 @@ require_paths:
94
149
  - lib
95
150
  required_ruby_version: !ruby/object:Gem::Requirement
96
151
  requirements:
97
- - - '>='
152
+ - - ">="
98
153
  - !ruby/object:Gem::Version
99
154
  version: '0'
100
155
  required_rubygems_version: !ruby/object:Gem::Requirement
101
156
  requirements:
102
- - - '>='
157
+ - - ">="
103
158
  - !ruby/object:Gem::Version
104
159
  version: '0'
105
160
  requirements: []
106
161
  rubyforge_project: postrank-uri
107
- rubygems_version: 2.0.6
162
+ rubygems_version: 2.6.8
108
163
  signing_key:
109
164
  specification_version: 4
110
165
  summary: URI normalization, c14n, escaping, and extraction
@@ -112,4 +167,3 @@ test_files:
112
167
  - spec/c14n_hosts.yml
113
168
  - spec/helper.rb
114
169
  - spec/postrank-uri_spec.rb
115
- has_rdoc: