postrank-uri 1.0.23 → 1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 45096e106f5528a2cee2b9b61856889699b1319c
4
- data.tar.gz: d487c8d4ea2e19a78f8a191de9cf5e2bc1946060
2
+ SHA256:
3
+ metadata.gz: d760886f160f892a1ef8cf6e0b6e4bb3509e568fd3cac1fd242873ce4f8621f4
4
+ data.tar.gz: 94167480b4a74048a82322f79cd4a13017ff9ce0730455fe4c5b1988ff823563
5
5
  SHA512:
6
- metadata.gz: 34b61b475882e456ae4d68a2e440c9006a619c849b0dea0fad4f1ec15728aea9e2efeb04a9fea24a39ceb874e5a9eb1fae75db87180676da123ae99dfb319bd4
7
- data.tar.gz: dd6a80d445bcd08ac8cde60ebb44f66836e8612e4313b52ae7187829722fec9b5d1d96e3f7bf6ba6201cea0d41432007c483fbd3da91970c559d515e70c27477
6
+ metadata.gz: b39395c9aac115958e6f7d977cc1e2bd3ed087230e8c4b76035b4e96a906bab9b1e3f8aa10b73c822762591acfebc86ece9fd1deeca79aeceb93601832e650e6
7
+ data.tar.gz: 4905d528cb0a31bc25fd9af7830849a93ff04d874d8d87f68eb861805ee0b8563a523e8b39e0c3444d17539fa06cf5b96cff2092ef3f6c490f0d02029a395244
data/.travis.yml CHANGED
@@ -1,10 +1,12 @@
1
1
  language: ruby
2
2
  cache: bundler
3
3
  rvm:
4
- - 2.1
5
- - 2.2
6
- - 2.3
7
- - 2.4.0
4
+ - 2.3.8
5
+ - 2.4.10
6
+ - 2.5.9
7
+ - 2.6.8
8
+ - 2.7.4
9
+ - 3.0.2
8
10
  before_install:
9
11
  - gem install bundler
10
12
  install:
data/Appraisals CHANGED
@@ -1,19 +1,15 @@
1
- appraise "nokogiri-1.7" do
2
- gem "nokogiri", "~> 1.7.0"
3
- end
4
-
5
- appraise "nokogiri-1.6" do
6
- gem "nokogiri", "~> 1.6.1"
7
- end
1
+ nokogiri_versions = ["1.8", "1.9", "1.10"]
8
2
 
9
- appraise "addressable-2.3" do
10
- gem "addressable", "~> 2.3.0"
3
+ nokogiri_versions.each do |version|
4
+ appraise "nokogiri-#{version}" do
5
+ gem "nokogiri", "~> #{version}.0"
6
+ end
11
7
  end
12
8
 
13
- appraise "addressable-2.4" do
14
- gem "addressable", "~> 2.4.0"
15
- end
9
+ addressable_versions = ["2.4", "2.5", "2.6"]
16
10
 
17
- appraise "addressable-2.5" do
18
- gem "addressable", "~> 2.5.0"
11
+ addressable_versions.each do |version|
12
+ appraise "addressable-#{version}" do
13
+ gem "addressable", "~> #{version}.0"
14
+ end
19
15
  end
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/postrank-uri.svg)](https://rubygems.org/gems/postrank-uri) [![Build Status](https://travis-ci.org/postrank-labs/postrank-uri.svg?branch=master)](https://travis-ci.org/postrank-labs/postrank-uri)
4
4
 
5
- A collection of convenience methods (Ruby 2.0+) for dealing with extracting, (un)escaping, normalization, and canonicalization of URIs. At PostRank we process over 20M URI associated activities each day, and we need to make sure that we can reliably extract the URIs from a variety of text formats, deal with all the numerous and creative ways users like to escape and unescape their URIs, normalize the resulting URIs, and finally apply a set of custom canonicalization rules to make sure that we can cross-reference when the users are talking about the same URL.
5
+ A collection of convenience methods (Ruby 2.3+) for dealing with extracting, (un)escaping, normalization, and canonicalization of URIs. At PostRank we process over 20M URI associated activities each day, and we need to make sure that we can reliably extract the URIs from a variety of text formats, deal with all the numerous and creative ways users like to escape and unescape their URIs, normalize the resulting URIs, and finally apply a set of custom canonicalization rules to make sure that we can cross-reference when the users are talking about the same URL.
6
6
 
7
7
  In a nutshell, we need to make sure that creative cases like the ones below all resolve to same URI:
8
8
 
@@ -1,5 +1,5 @@
1
1
  module PostRank
2
2
  module URI
3
- VERSION = "1.0.23"
3
+ VERSION = "1.1"
4
4
  end
5
5
  end
data/lib/postrank-uri.rb CHANGED
@@ -89,6 +89,7 @@ module PostRank
89
89
  URIREGEX[:reserved_characters] = /%3F|%26/i
90
90
  URIREGEX[:escape] = /([^ a-zA-Z0-9_.-]+)/x
91
91
  URIREGEX[:unescape] = /(%[0-9a-fA-F]{2})/x
92
+ URIREGEX[:double_slash_outside_scheme] = /(?<!http:|https:)\/{2}/x
92
93
  URIREGEX.each_pair{|k,v| v.freeze }
93
94
 
94
95
  module_function
@@ -152,7 +153,7 @@ module PostRank
152
153
 
153
154
  def normalize(uri, opts = {})
154
155
  u = parse(uri, opts)
155
- u.path = u.path.squeeze('/')
156
+ u.path = u.path.gsub(URIREGEX[:double_slash_outside_scheme], '/')
156
157
  u.path = u.path.chomp('/') if u.path.size != 1
157
158
  u.query = nil if u.query && u.query.empty?
158
159
  u.fragment = nil
data/postrank-uri.gemspec CHANGED
@@ -12,13 +12,11 @@ Gem::Specification.new do |s|
12
12
  s.summary = "URI normalization, c14n, escaping, and extraction"
13
13
  s.description = s.summary
14
14
  s.license = 'MIT'
15
- s.required_ruby_version = ">= 2.0.0"
15
+ s.required_ruby_version = ">= 2.3.0"
16
16
 
17
- s.rubyforge_project = "postrank-uri"
18
-
19
- s.add_dependency "addressable", ">= 2.3.0", "< 2.6"
20
- s.add_dependency "public_suffix", ">= 2.0.0", "< 2.1"
21
- s.add_dependency "nokogiri", ">= 1.6.1", "< 1.9"
17
+ s.add_dependency "addressable", ">= 2.4.0"
18
+ s.add_dependency "public_suffix", ">= 4.0.0", "< 5"
19
+ s.add_dependency "nokogiri", ">= 1.8.0"
22
20
 
23
21
  s.add_development_dependency "rake"
24
22
  s.add_development_dependency "rspec"
@@ -98,6 +98,10 @@ describe PostRank::URI do
98
98
  expect(n('http://igvita.com/a/b')).to eq('http://igvita.com/a/b')
99
99
  expect(n('http://igvita.com/a/b/')).to eq('http://igvita.com/a/b')
100
100
  end
101
+ it 'preserves nested urls' do
102
+ expect(n('http://igvita.com/a/b/http://hello.com')).to eq('http://igvita.com/a/b/http://hello.com')
103
+ expect(n('http://igvita.com/a//b/https://hello.com')).to eq('http://igvita.com/a/b/https://hello.com')
104
+ end
101
105
  end
102
106
 
103
107
  context "canonicalization" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: postrank-uri
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.23
4
+ version: '1.1'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Grigorik
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-16 00:00:00.000000000 Z
11
+ date: 2023-02-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -16,60 +16,48 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 2.3.0
20
- - - "<"
21
- - !ruby/object:Gem::Version
22
- version: '2.6'
19
+ version: 2.4.0
23
20
  type: :runtime
24
21
  prerelease: false
25
22
  version_requirements: !ruby/object:Gem::Requirement
26
23
  requirements:
27
24
  - - ">="
28
25
  - !ruby/object:Gem::Version
29
- version: 2.3.0
30
- - - "<"
31
- - !ruby/object:Gem::Version
32
- version: '2.6'
26
+ version: 2.4.0
33
27
  - !ruby/object:Gem::Dependency
34
28
  name: public_suffix
35
29
  requirement: !ruby/object:Gem::Requirement
36
30
  requirements:
37
31
  - - ">="
38
32
  - !ruby/object:Gem::Version
39
- version: 2.0.0
33
+ version: 4.0.0
40
34
  - - "<"
41
35
  - !ruby/object:Gem::Version
42
- version: '2.1'
36
+ version: '5'
43
37
  type: :runtime
44
38
  prerelease: false
45
39
  version_requirements: !ruby/object:Gem::Requirement
46
40
  requirements:
47
41
  - - ">="
48
42
  - !ruby/object:Gem::Version
49
- version: 2.0.0
43
+ version: 4.0.0
50
44
  - - "<"
51
45
  - !ruby/object:Gem::Version
52
- version: '2.1'
46
+ version: '5'
53
47
  - !ruby/object:Gem::Dependency
54
48
  name: nokogiri
55
49
  requirement: !ruby/object:Gem::Requirement
56
50
  requirements:
57
51
  - - ">="
58
52
  - !ruby/object:Gem::Version
59
- version: 1.6.1
60
- - - "<"
61
- - !ruby/object:Gem::Version
62
- version: '1.9'
53
+ version: 1.8.0
63
54
  type: :runtime
64
55
  prerelease: false
65
56
  version_requirements: !ruby/object:Gem::Requirement
66
57
  requirements:
67
58
  - - ">="
68
59
  - !ruby/object:Gem::Version
69
- version: 1.6.1
70
- - - "<"
71
- - !ruby/object:Gem::Version
72
- version: '1.9'
60
+ version: 1.8.0
73
61
  - !ruby/object:Gem::Dependency
74
62
  name: rake
75
63
  requirement: !ruby/object:Gem::Requirement
@@ -152,15 +140,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
152
140
  requirements:
153
141
  - - ">="
154
142
  - !ruby/object:Gem::Version
155
- version: 2.0.0
143
+ version: 2.3.0
156
144
  required_rubygems_version: !ruby/object:Gem::Requirement
157
145
  requirements:
158
146
  - - ">="
159
147
  - !ruby/object:Gem::Version
160
148
  version: '0'
161
149
  requirements: []
162
- rubyforge_project: postrank-uri
163
- rubygems_version: 2.6.11
150
+ rubygems_version: 3.0.3.1
164
151
  signing_key:
165
152
  specification_version: 4
166
153
  summary: URI normalization, c14n, escaping, and extraction