postrank-uri 1.0.23 → 1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +6 -4
- data/Appraisals +10 -14
- data/README.md +1 -1
- data/lib/postrank-uri/version.rb +1 -1
- data/lib/postrank-uri.rb +2 -1
- data/postrank-uri.gemspec +4 -6
- data/spec/postrank-uri_spec.rb +4 -0
- metadata +12 -25
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d760886f160f892a1ef8cf6e0b6e4bb3509e568fd3cac1fd242873ce4f8621f4
|
4
|
+
data.tar.gz: 94167480b4a74048a82322f79cd4a13017ff9ce0730455fe4c5b1988ff823563
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b39395c9aac115958e6f7d977cc1e2bd3ed087230e8c4b76035b4e96a906bab9b1e3f8aa10b73c822762591acfebc86ece9fd1deeca79aeceb93601832e650e6
|
7
|
+
data.tar.gz: 4905d528cb0a31bc25fd9af7830849a93ff04d874d8d87f68eb861805ee0b8563a523e8b39e0c3444d17539fa06cf5b96cff2092ef3f6c490f0d02029a395244
|
data/.travis.yml
CHANGED
data/Appraisals
CHANGED
@@ -1,19 +1,15 @@
|
|
1
|
-
|
2
|
-
gem "nokogiri", "~> 1.7.0"
|
3
|
-
end
|
4
|
-
|
5
|
-
appraise "nokogiri-1.6" do
|
6
|
-
gem "nokogiri", "~> 1.6.1"
|
7
|
-
end
|
1
|
+
nokogiri_versions = ["1.8", "1.9", "1.10"]
|
8
2
|
|
9
|
-
|
10
|
-
|
3
|
+
nokogiri_versions.each do |version|
|
4
|
+
appraise "nokogiri-#{version}" do
|
5
|
+
gem "nokogiri", "~> #{version}.0"
|
6
|
+
end
|
11
7
|
end
|
12
8
|
|
13
|
-
|
14
|
-
gem "addressable", "~> 2.4.0"
|
15
|
-
end
|
9
|
+
addressable_versions = ["2.4", "2.5", "2.6"]
|
16
10
|
|
17
|
-
|
18
|
-
|
11
|
+
addressable_versions.each do |version|
|
12
|
+
appraise "addressable-#{version}" do
|
13
|
+
gem "addressable", "~> #{version}.0"
|
14
|
+
end
|
19
15
|
end
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/postrank-uri.svg)](https://rubygems.org/gems/postrank-uri) [![Build Status](https://travis-ci.org/postrank-labs/postrank-uri.svg?branch=master)](https://travis-ci.org/postrank-labs/postrank-uri)
|
4
4
|
|
5
|
-
A collection of convenience methods (Ruby 2.
|
5
|
+
A collection of convenience methods (Ruby 2.3+) for dealing with extracting, (un)escaping, normalization, and canonicalization of URIs. At PostRank we process over 20M URI associated activities each day, and we need to make sure that we can reliably extract the URIs from a variety of text formats, deal with all the numerous and creative ways users like to escape and unescape their URIs, normalize the resulting URIs, and finally apply a set of custom canonicalization rules to make sure that we can cross-reference when the users are talking about the same URL.
|
6
6
|
|
7
7
|
In a nutshell, we need to make sure that creative cases like the ones below all resolve to same URI:
|
8
8
|
|
data/lib/postrank-uri/version.rb
CHANGED
data/lib/postrank-uri.rb
CHANGED
@@ -89,6 +89,7 @@ module PostRank
|
|
89
89
|
URIREGEX[:reserved_characters] = /%3F|%26/i
|
90
90
|
URIREGEX[:escape] = /([^ a-zA-Z0-9_.-]+)/x
|
91
91
|
URIREGEX[:unescape] = /(%[0-9a-fA-F]{2})/x
|
92
|
+
URIREGEX[:double_slash_outside_scheme] = /(?<!http:|https:)\/{2}/x
|
92
93
|
URIREGEX.each_pair{|k,v| v.freeze }
|
93
94
|
|
94
95
|
module_function
|
@@ -152,7 +153,7 @@ module PostRank
|
|
152
153
|
|
153
154
|
def normalize(uri, opts = {})
|
154
155
|
u = parse(uri, opts)
|
155
|
-
u.path = u.path.
|
156
|
+
u.path = u.path.gsub(URIREGEX[:double_slash_outside_scheme], '/')
|
156
157
|
u.path = u.path.chomp('/') if u.path.size != 1
|
157
158
|
u.query = nil if u.query && u.query.empty?
|
158
159
|
u.fragment = nil
|
data/postrank-uri.gemspec
CHANGED
@@ -12,13 +12,11 @@ Gem::Specification.new do |s|
|
|
12
12
|
s.summary = "URI normalization, c14n, escaping, and extraction"
|
13
13
|
s.description = s.summary
|
14
14
|
s.license = 'MIT'
|
15
|
-
s.required_ruby_version = ">= 2.
|
15
|
+
s.required_ruby_version = ">= 2.3.0"
|
16
16
|
|
17
|
-
s.
|
18
|
-
|
19
|
-
s.add_dependency "
|
20
|
-
s.add_dependency "public_suffix", ">= 2.0.0", "< 2.1"
|
21
|
-
s.add_dependency "nokogiri", ">= 1.6.1", "< 1.9"
|
17
|
+
s.add_dependency "addressable", ">= 2.4.0"
|
18
|
+
s.add_dependency "public_suffix", ">= 4.0.0", "< 5"
|
19
|
+
s.add_dependency "nokogiri", ">= 1.8.0"
|
22
20
|
|
23
21
|
s.add_development_dependency "rake"
|
24
22
|
s.add_development_dependency "rspec"
|
data/spec/postrank-uri_spec.rb
CHANGED
@@ -98,6 +98,10 @@ describe PostRank::URI do
|
|
98
98
|
expect(n('http://igvita.com/a/b')).to eq('http://igvita.com/a/b')
|
99
99
|
expect(n('http://igvita.com/a/b/')).to eq('http://igvita.com/a/b')
|
100
100
|
end
|
101
|
+
it 'preserves nested urls' do
|
102
|
+
expect(n('http://igvita.com/a/b/http://hello.com')).to eq('http://igvita.com/a/b/http://hello.com')
|
103
|
+
expect(n('http://igvita.com/a//b/https://hello.com')).to eq('http://igvita.com/a/b/https://hello.com')
|
104
|
+
end
|
101
105
|
end
|
102
106
|
|
103
107
|
context "canonicalization" do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: postrank-uri
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: '1.1'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ilya Grigorik
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -16,60 +16,48 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 2.
|
20
|
-
- - "<"
|
21
|
-
- !ruby/object:Gem::Version
|
22
|
-
version: '2.6'
|
19
|
+
version: 2.4.0
|
23
20
|
type: :runtime
|
24
21
|
prerelease: false
|
25
22
|
version_requirements: !ruby/object:Gem::Requirement
|
26
23
|
requirements:
|
27
24
|
- - ">="
|
28
25
|
- !ruby/object:Gem::Version
|
29
|
-
version: 2.
|
30
|
-
- - "<"
|
31
|
-
- !ruby/object:Gem::Version
|
32
|
-
version: '2.6'
|
26
|
+
version: 2.4.0
|
33
27
|
- !ruby/object:Gem::Dependency
|
34
28
|
name: public_suffix
|
35
29
|
requirement: !ruby/object:Gem::Requirement
|
36
30
|
requirements:
|
37
31
|
- - ">="
|
38
32
|
- !ruby/object:Gem::Version
|
39
|
-
version:
|
33
|
+
version: 4.0.0
|
40
34
|
- - "<"
|
41
35
|
- !ruby/object:Gem::Version
|
42
|
-
version: '
|
36
|
+
version: '5'
|
43
37
|
type: :runtime
|
44
38
|
prerelease: false
|
45
39
|
version_requirements: !ruby/object:Gem::Requirement
|
46
40
|
requirements:
|
47
41
|
- - ">="
|
48
42
|
- !ruby/object:Gem::Version
|
49
|
-
version:
|
43
|
+
version: 4.0.0
|
50
44
|
- - "<"
|
51
45
|
- !ruby/object:Gem::Version
|
52
|
-
version: '
|
46
|
+
version: '5'
|
53
47
|
- !ruby/object:Gem::Dependency
|
54
48
|
name: nokogiri
|
55
49
|
requirement: !ruby/object:Gem::Requirement
|
56
50
|
requirements:
|
57
51
|
- - ">="
|
58
52
|
- !ruby/object:Gem::Version
|
59
|
-
version: 1.
|
60
|
-
- - "<"
|
61
|
-
- !ruby/object:Gem::Version
|
62
|
-
version: '1.9'
|
53
|
+
version: 1.8.0
|
63
54
|
type: :runtime
|
64
55
|
prerelease: false
|
65
56
|
version_requirements: !ruby/object:Gem::Requirement
|
66
57
|
requirements:
|
67
58
|
- - ">="
|
68
59
|
- !ruby/object:Gem::Version
|
69
|
-
version: 1.
|
70
|
-
- - "<"
|
71
|
-
- !ruby/object:Gem::Version
|
72
|
-
version: '1.9'
|
60
|
+
version: 1.8.0
|
73
61
|
- !ruby/object:Gem::Dependency
|
74
62
|
name: rake
|
75
63
|
requirement: !ruby/object:Gem::Requirement
|
@@ -152,15 +140,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
152
140
|
requirements:
|
153
141
|
- - ">="
|
154
142
|
- !ruby/object:Gem::Version
|
155
|
-
version: 2.
|
143
|
+
version: 2.3.0
|
156
144
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
157
145
|
requirements:
|
158
146
|
- - ">="
|
159
147
|
- !ruby/object:Gem::Version
|
160
148
|
version: '0'
|
161
149
|
requirements: []
|
162
|
-
|
163
|
-
rubygems_version: 2.6.11
|
150
|
+
rubygems_version: 3.0.3.1
|
164
151
|
signing_key:
|
165
152
|
specification_version: 4
|
166
153
|
summary: URI normalization, c14n, escaping, and extraction
|