postrank-uri 1.0.23 → 1.0.24

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 45096e106f5528a2cee2b9b61856889699b1319c
4
- data.tar.gz: d487c8d4ea2e19a78f8a191de9cf5e2bc1946060
3
+ metadata.gz: 8ea2cc7f1dc58cb559b9168ff0f83150f1ec6119
4
+ data.tar.gz: 73d97d1f7c56b4b0644eb9a8ad54490ca1561fbf
5
5
  SHA512:
6
- metadata.gz: 34b61b475882e456ae4d68a2e440c9006a619c849b0dea0fad4f1ec15728aea9e2efeb04a9fea24a39ceb874e5a9eb1fae75db87180676da123ae99dfb319bd4
7
- data.tar.gz: dd6a80d445bcd08ac8cde60ebb44f66836e8612e4313b52ae7187829722fec9b5d1d96e3f7bf6ba6201cea0d41432007c483fbd3da91970c559d515e70c27477
6
+ metadata.gz: 1fa2d5475a617ab8181554f4d15f4d72a1c67f40bb60c16502a575e16de6d721edcbe0c6ca1e5c331588510e0dd1cced56ea9dd4704dc7e9ab59b71c6a6385a5
7
+ data.tar.gz: 6ab0bf3e698d99127db88528a8fefba1b4d4c7667a6c42cc256a71dadb2dd78dd1f080e206312cc395b9a9c3c68b1fb62335c2f6a48346871cd85466ac86660b
@@ -1,10 +1,10 @@
1
1
  language: ruby
2
2
  cache: bundler
3
3
  rvm:
4
- - 2.1
5
- - 2.2
6
- - 2.3
7
- - 2.4.0
4
+ - 2.3.8
5
+ - 2.4.5
6
+ - 2.5.3
7
+ - 2.6.1
8
8
  before_install:
9
9
  - gem install bundler
10
10
  install:
data/Appraisals CHANGED
@@ -1,19 +1,15 @@
1
- appraise "nokogiri-1.7" do
2
- gem "nokogiri", "~> 1.7.0"
3
- end
4
-
5
- appraise "nokogiri-1.6" do
6
- gem "nokogiri", "~> 1.6.1"
7
- end
1
+ nokogiri_versions = ["1.8", "1.9", "1.10"]
8
2
 
9
- appraise "addressable-2.3" do
10
- gem "addressable", "~> 2.3.0"
3
+ nokogiri_versions.each do |version|
4
+ appraise "nokogiri-#{version}" do
5
+ gem "nokogiri", "~> #{version}.0"
6
+ end
11
7
  end
12
8
 
13
- appraise "addressable-2.4" do
14
- gem "addressable", "~> 2.4.0"
15
- end
9
+ addressable_versions = ["2.4", "2.5", "2.6"]
16
10
 
17
- appraise "addressable-2.5" do
18
- gem "addressable", "~> 2.5.0"
11
+ addressable_versions.each do |version|
12
+ appraise "addressable-#{version}" do
13
+ gem "addressable", "~> #{version}.0"
14
+ end
19
15
  end
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/postrank-uri.svg)](https://rubygems.org/gems/postrank-uri) [![Build Status](https://travis-ci.org/postrank-labs/postrank-uri.svg?branch=master)](https://travis-ci.org/postrank-labs/postrank-uri)
4
4
 
5
- A collection of convenience methods (Ruby 2.0+) for dealing with extracting, (un)escaping, normalization, and canonicalization of URIs. At PostRank we process over 20M URI associated activities each day, and we need to make sure that we can reliably extract the URIs from a variety of text formats, deal with all the numerous and creative ways users like to escape and unescape their URIs, normalize the resulting URIs, and finally apply a set of custom canonicalization rules to make sure that we can cross-reference when the users are talking about the same URL.
5
+ A collection of convenience methods (Ruby 2.3+) for dealing with extracting, (un)escaping, normalization, and canonicalization of URIs. At PostRank we process over 20M URI associated activities each day, and we need to make sure that we can reliably extract the URIs from a variety of text formats, deal with all the numerous and creative ways users like to escape and unescape their URIs, normalize the resulting URIs, and finally apply a set of custom canonicalization rules to make sure that we can cross-reference when the users are talking about the same URL.
6
6
 
7
7
  In a nutshell, we need to make sure that creative cases like the ones below all resolve to same URI:
8
8
 
@@ -89,6 +89,7 @@ module PostRank
89
89
  URIREGEX[:reserved_characters] = /%3F|%26/i
90
90
  URIREGEX[:escape] = /([^ a-zA-Z0-9_.-]+)/x
91
91
  URIREGEX[:unescape] = /(%[0-9a-fA-F]{2})/x
92
+ URIREGEX[:double_slash_outside_scheme] = /(?<!http:|https:)\/{2}/x
92
93
  URIREGEX.each_pair{|k,v| v.freeze }
93
94
 
94
95
  module_function
@@ -152,7 +153,7 @@ module PostRank
152
153
 
153
154
  def normalize(uri, opts = {})
154
155
  u = parse(uri, opts)
155
- u.path = u.path.squeeze('/')
156
+ u.path = u.path.gsub(URIREGEX[:double_slash_outside_scheme], '/')
156
157
  u.path = u.path.chomp('/') if u.path.size != 1
157
158
  u.query = nil if u.query && u.query.empty?
158
159
  u.fragment = nil
@@ -1,5 +1,5 @@
1
1
  module PostRank
2
2
  module URI
3
- VERSION = "1.0.23"
3
+ VERSION = "1.0.24"
4
4
  end
5
5
  end
@@ -12,13 +12,13 @@ Gem::Specification.new do |s|
12
12
  s.summary = "URI normalization, c14n, escaping, and extraction"
13
13
  s.description = s.summary
14
14
  s.license = 'MIT'
15
- s.required_ruby_version = ">= 2.0.0"
15
+ s.required_ruby_version = ">= 2.3.0"
16
16
 
17
17
  s.rubyforge_project = "postrank-uri"
18
18
 
19
- s.add_dependency "addressable", ">= 2.3.0", "< 2.6"
19
+ s.add_dependency "addressable", ">= 2.4.0"
20
20
  s.add_dependency "public_suffix", ">= 2.0.0", "< 2.1"
21
- s.add_dependency "nokogiri", ">= 1.6.1", "< 1.9"
21
+ s.add_dependency "nokogiri", ">= 1.8.0"
22
22
 
23
23
  s.add_development_dependency "rake"
24
24
  s.add_development_dependency "rspec"
@@ -98,6 +98,10 @@ describe PostRank::URI do
98
98
  expect(n('http://igvita.com/a/b')).to eq('http://igvita.com/a/b')
99
99
  expect(n('http://igvita.com/a/b/')).to eq('http://igvita.com/a/b')
100
100
  end
101
+ it 'preserves nested urls' do
102
+ expect(n('http://igvita.com/a/b/http://hello.com')).to eq('http://igvita.com/a/b/http://hello.com')
103
+ expect(n('http://igvita.com/a//b/https://hello.com')).to eq('http://igvita.com/a/b/https://hello.com')
104
+ end
101
105
  end
102
106
 
103
107
  context "canonicalization" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: postrank-uri
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.23
4
+ version: 1.0.24
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Grigorik
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-16 00:00:00.000000000 Z
11
+ date: 2019-04-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -16,20 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 2.3.0
20
- - - "<"
21
- - !ruby/object:Gem::Version
22
- version: '2.6'
19
+ version: 2.4.0
23
20
  type: :runtime
24
21
  prerelease: false
25
22
  version_requirements: !ruby/object:Gem::Requirement
26
23
  requirements:
27
24
  - - ">="
28
25
  - !ruby/object:Gem::Version
29
- version: 2.3.0
30
- - - "<"
31
- - !ruby/object:Gem::Version
32
- version: '2.6'
26
+ version: 2.4.0
33
27
  - !ruby/object:Gem::Dependency
34
28
  name: public_suffix
35
29
  requirement: !ruby/object:Gem::Requirement
@@ -56,20 +50,14 @@ dependencies:
56
50
  requirements:
57
51
  - - ">="
58
52
  - !ruby/object:Gem::Version
59
- version: 1.6.1
60
- - - "<"
61
- - !ruby/object:Gem::Version
62
- version: '1.9'
53
+ version: 1.8.0
63
54
  type: :runtime
64
55
  prerelease: false
65
56
  version_requirements: !ruby/object:Gem::Requirement
66
57
  requirements:
67
58
  - - ">="
68
59
  - !ruby/object:Gem::Version
69
- version: 1.6.1
70
- - - "<"
71
- - !ruby/object:Gem::Version
72
- version: '1.9'
60
+ version: 1.8.0
73
61
  - !ruby/object:Gem::Dependency
74
62
  name: rake
75
63
  requirement: !ruby/object:Gem::Requirement
@@ -152,7 +140,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
152
140
  requirements:
153
141
  - - ">="
154
142
  - !ruby/object:Gem::Version
155
- version: 2.0.0
143
+ version: 2.3.0
156
144
  required_rubygems_version: !ruby/object:Gem::Requirement
157
145
  requirements:
158
146
  - - ">="