twingly-url 4.2.0 → 5.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +136 -25
- data/lib/twingly/public_suffix_list.rb +37 -0
- data/lib/twingly/url.rb +14 -8
- data/lib/twingly/version.rb +1 -1
- metadata +19 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c842338446cf451ce1bfc0530f3ef53552208a6
|
4
|
+
data.tar.gz: b36802a8c3f58444b84254d73dfc5666a0d11b13
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 803792e18a70bf53df2fc8e50b380e02244985e43c632ced88932ab0c14baa914835d514ddfaa03d0a4441275580acaa9562cca2f28a346ab803cdf20f0ff491
|
7
|
+
data.tar.gz: e169afed23eaabe22730db9416fa10c43a19629ee1374d97c83cf3a49de220b686137da39c2ab69e0f09fa7a6c097be3068d6a9f67ce077ae07d45ed99ae0bc6
|
data/README.md
CHANGED
@@ -27,33 +27,132 @@ Usage (this output was created with [`examples/url.rb`][examples]):
|
|
27
27
|
require "twingly/url"
|
28
28
|
|
29
29
|
url = Twingly::URL.parse("http://www.twingly.co.uk/search")
|
30
|
-
url.scheme
|
31
|
-
url.
|
32
|
-
url.
|
33
|
-
url.
|
34
|
-
url.
|
35
|
-
url.
|
36
|
-
url.
|
37
|
-
url.
|
38
|
-
url.
|
39
|
-
url.
|
40
|
-
url.
|
30
|
+
url.scheme # => "http"
|
31
|
+
url.normalized.scheme # => "http"
|
32
|
+
url.trd # => "www"
|
33
|
+
url.normalized.trd # => "www"
|
34
|
+
url.sld # => "twingly"
|
35
|
+
url.normalized.sld # => "twingly"
|
36
|
+
url.tld # => "co.uk"
|
37
|
+
url.normalized.tld # => "co.uk"
|
38
|
+
url.ttld # => "uk"
|
39
|
+
url.normalized.ttld # => "uk"
|
40
|
+
url.domain # => "twingly.co.uk"
|
41
|
+
url.normalized.domain # => "twingly.co.uk"
|
42
|
+
url.host # => "www.twingly.co.uk"
|
43
|
+
url.normalized.host # => "www.twingly.co.uk"
|
44
|
+
url.origin # => "http://www.twingly.co.uk"
|
45
|
+
url.normalized.origin # => "http://www.twingly.co.uk"
|
46
|
+
url.path # => "/search"
|
47
|
+
url.normalized.path # => "/search"
|
48
|
+
url.without_scheme # => "//www.twingly.co.uk/search"
|
49
|
+
url.normalized.without_scheme # => "//www.twingly.co.uk/search"
|
50
|
+
url.userinfo # => ""
|
51
|
+
url.normalized.userinfo # => ""
|
52
|
+
url.user # => ""
|
53
|
+
url.normalized.user # => ""
|
54
|
+
url.password # => ""
|
55
|
+
url.normalized.password # => ""
|
56
|
+
url.valid? # => "true"
|
57
|
+
url.normalized.valid? # => "true"
|
58
|
+
url.to_s # => "http://www.twingly.co.uk/search"
|
59
|
+
url.normalized.to_s # => "http://www.twingly.co.uk/search"
|
60
|
+
|
61
|
+
url = Twingly::URL.parse("http://räksmörgås.макдональдс.рф/foo")
|
62
|
+
url.scheme # => "http"
|
63
|
+
url.normalized.scheme # => "http"
|
64
|
+
url.trd # => "räksmörgås"
|
65
|
+
url.normalized.trd # => "xn--rksmrgs-5wao1o"
|
66
|
+
url.sld # => "макдональдс"
|
67
|
+
url.normalized.sld # => "xn--80aalb1aicli8a5i"
|
68
|
+
url.tld # => "рф"
|
69
|
+
url.normalized.tld # => "xn--p1ai"
|
70
|
+
url.ttld # => "рф"
|
71
|
+
url.normalized.ttld # => "xn--p1ai"
|
72
|
+
url.domain # => "макдональдс.рф"
|
73
|
+
url.normalized.domain # => "xn--80aalb1aicli8a5i.xn--p1ai"
|
74
|
+
url.host # => "räksmörgås.макдональдс.рф"
|
75
|
+
url.normalized.host # => "xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
|
76
|
+
url.origin # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
|
77
|
+
url.normalized.origin # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
|
78
|
+
url.path # => "/foo"
|
79
|
+
url.normalized.path # => "/foo"
|
80
|
+
url.without_scheme # => "//räksmörgås.макдональдс.рф/foo"
|
81
|
+
url.normalized.without_scheme # => "//xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
|
82
|
+
url.userinfo # => ""
|
83
|
+
url.normalized.userinfo # => ""
|
84
|
+
url.user # => ""
|
85
|
+
url.normalized.user # => ""
|
86
|
+
url.password # => ""
|
87
|
+
url.normalized.password # => ""
|
88
|
+
url.valid? # => "true"
|
89
|
+
url.normalized.valid? # => "true"
|
90
|
+
url.to_s # => "http://räksmörgås.макдональдс.рф/foo"
|
91
|
+
url.normalized.to_s # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
|
92
|
+
|
93
|
+
url = Twingly::URL.parse("http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo")
|
94
|
+
url.scheme # => "http"
|
95
|
+
url.normalized.scheme # => "http"
|
96
|
+
url.trd # => "xn--rksmrgs-5wao1o"
|
97
|
+
url.normalized.trd # => "xn--rksmrgs-5wao1o"
|
98
|
+
url.sld # => "xn--80aalb1aicli8a5i"
|
99
|
+
url.normalized.sld # => "xn--80aalb1aicli8a5i"
|
100
|
+
url.tld # => "xn--p1ai"
|
101
|
+
url.normalized.tld # => "xn--p1ai"
|
102
|
+
url.ttld # => "xn--p1ai"
|
103
|
+
url.normalized.ttld # => "xn--p1ai"
|
104
|
+
url.domain # => "xn--80aalb1aicli8a5i.xn--p1ai"
|
105
|
+
url.normalized.domain # => "xn--80aalb1aicli8a5i.xn--p1ai"
|
106
|
+
url.host # => "xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
|
107
|
+
url.normalized.host # => "xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
|
108
|
+
url.origin # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
|
109
|
+
url.normalized.origin # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
|
110
|
+
url.path # => "/foo"
|
111
|
+
url.normalized.path # => "/foo"
|
112
|
+
url.without_scheme # => "//xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
|
113
|
+
url.normalized.without_scheme # => "//xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
|
114
|
+
url.userinfo # => ""
|
115
|
+
url.normalized.userinfo # => ""
|
116
|
+
url.user # => ""
|
117
|
+
url.normalized.user # => ""
|
118
|
+
url.password # => ""
|
119
|
+
url.normalized.password # => ""
|
120
|
+
url.valid? # => "true"
|
121
|
+
url.normalized.valid? # => "true"
|
122
|
+
url.to_s # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
|
123
|
+
url.normalized.to_s # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
|
41
124
|
|
42
125
|
url = Twingly::URL.parse("https://admin:correcthorsebatterystaple@example.com/")
|
43
|
-
url.scheme
|
44
|
-
url.
|
45
|
-
url.
|
46
|
-
url.
|
47
|
-
url.
|
48
|
-
url.
|
49
|
-
url.
|
50
|
-
url.
|
51
|
-
url.
|
52
|
-
url.
|
53
|
-
url.
|
54
|
-
url.
|
55
|
-
url.
|
56
|
-
url.
|
126
|
+
url.scheme # => "https"
|
127
|
+
url.normalized.scheme # => "https"
|
128
|
+
url.trd # => ""
|
129
|
+
url.normalized.trd # => "www"
|
130
|
+
url.sld # => "example"
|
131
|
+
url.normalized.sld # => "example"
|
132
|
+
url.tld # => "com"
|
133
|
+
url.normalized.tld # => "com"
|
134
|
+
url.ttld # => "com"
|
135
|
+
url.normalized.ttld # => "com"
|
136
|
+
url.domain # => "example.com"
|
137
|
+
url.normalized.domain # => "example.com"
|
138
|
+
url.host # => "example.com"
|
139
|
+
url.normalized.host # => "www.example.com"
|
140
|
+
url.origin # => "https://example.com"
|
141
|
+
url.normalized.origin # => "https://www.example.com"
|
142
|
+
url.path # => "/"
|
143
|
+
url.normalized.path # => "/"
|
144
|
+
url.without_scheme # => "//admin:correcthorsebatterystaple@example.com/"
|
145
|
+
url.normalized.without_scheme # => "//admin:correcthorsebatterystaple@www.example.com/"
|
146
|
+
url.userinfo # => "admin:correcthorsebatterystaple"
|
147
|
+
url.normalized.userinfo # => "admin:correcthorsebatterystaple"
|
148
|
+
url.user # => "admin"
|
149
|
+
url.normalized.user # => "admin"
|
150
|
+
url.password # => "correcthorsebatterystaple"
|
151
|
+
url.normalized.password # => "correcthorsebatterystaple"
|
152
|
+
url.valid? # => "true"
|
153
|
+
url.normalized.valid? # => "true"
|
154
|
+
url.to_s # => "https://admin:correcthorsebatterystaple@example.com/"
|
155
|
+
url.normalized.to_s # => "https://admin:correcthorsebatterystaple@www.example.com/"
|
57
156
|
```
|
58
157
|
|
59
158
|
### Dependencies
|
@@ -63,6 +162,14 @@ The gem requires libidn.
|
|
63
162
|
sudo apt-get install libidn11 # Ubuntu
|
64
163
|
brew install libidn # OS X
|
65
164
|
|
165
|
+
## Development
|
166
|
+
|
167
|
+
To inspect the [Public Suffix List], this handy command can be used (also works in projects that use `twingly-url` as an dependency).
|
168
|
+
|
169
|
+
open $(bundle show public_suffix)/data/list.txt
|
170
|
+
|
171
|
+
[Public Suffix List]: https://github.com/weppos/publicsuffix-ruby
|
172
|
+
|
66
173
|
## Tests
|
67
174
|
|
68
175
|
Run tests with
|
@@ -91,6 +198,10 @@ Note that this isn't a benchmark, we're using [ruby-prof] which will slow things
|
|
91
198
|
|
92
199
|
bundle exec rake release
|
93
200
|
|
201
|
+
* Update the changelog with [GitHub Changelog Generator](https://github.com/skywinder/github-changelog-generator/) (`gem install github_changelog_generator` if you don't have it, set `CHANGELOG_GITHUB_TOKEN` to a personal access token to avoid rate limiting by GitHub). This command will update `CHANGELOG.md`, commit and push manually.
|
202
|
+
|
203
|
+
github_changelog_generator
|
204
|
+
|
94
205
|
[twingly-rubygems]: https://rubygems.org/profiles/twingly
|
95
206
|
[ruby-prof]: http://ruby-prof.rubyforge.org/
|
96
207
|
[examples]: examples/url.rb
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require "public_suffix"
|
2
|
+
|
3
|
+
module Twingly
|
4
|
+
class PublicSuffixList
|
5
|
+
ACE_PREFIX = /\Axn\-\-/i.freeze
|
6
|
+
|
7
|
+
private_constant :ACE_PREFIX
|
8
|
+
|
9
|
+
# Extend the PSL with ASCII form of all internationalized domain names
|
10
|
+
def self.with_punycoded_names
|
11
|
+
list_data = File.read(PublicSuffix::List::DEFAULT_LIST_PATH)
|
12
|
+
list = PublicSuffix::List.parse(list_data, private_domains: false)
|
13
|
+
|
14
|
+
punycoded_names(list).each do |punycoded_name|
|
15
|
+
new_rule = PublicSuffix::Rule.factory(punycoded_name)
|
16
|
+
list.add(new_rule, reindex: false)
|
17
|
+
end
|
18
|
+
|
19
|
+
list.reindex!
|
20
|
+
|
21
|
+
list
|
22
|
+
end
|
23
|
+
|
24
|
+
private_class_method \
|
25
|
+
def self.punycoded_names(list)
|
26
|
+
names = list.map { |rule| Addressable::IDNA.to_ascii(rule.value) }
|
27
|
+
names.select { |name| punycoded_name?(name) }
|
28
|
+
end
|
29
|
+
|
30
|
+
private_class_method \
|
31
|
+
def self.punycoded_name?(name)
|
32
|
+
PublicSuffix::Domain.name_to_labels(name).any? do |label|
|
33
|
+
label =~ ACE_PREFIX
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/twingly/url.rb
CHANGED
@@ -2,17 +2,17 @@ require "addressable/uri"
|
|
2
2
|
require "addressable/idna/native"
|
3
3
|
require "public_suffix"
|
4
4
|
|
5
|
+
require_relative "public_suffix_list"
|
5
6
|
require_relative "url/null_url"
|
6
7
|
require_relative "url/error"
|
7
8
|
require_relative "version"
|
8
9
|
|
9
|
-
PublicSuffix::List.private_domains = false
|
10
|
-
|
11
10
|
module Twingly
|
12
11
|
class URL
|
13
12
|
include Comparable
|
14
13
|
|
15
14
|
ACCEPTED_SCHEMES = /\Ahttps?\z/i
|
15
|
+
CUSTOM_PSL = PublicSuffixList.with_punycoded_names
|
16
16
|
ENDS_WITH_SLASH = /\/+$/
|
17
17
|
ERRORS_TO_EXTEND = [
|
18
18
|
Addressable::URI::InvalidURIError,
|
@@ -20,7 +20,10 @@ module Twingly
|
|
20
20
|
IDN::Idna::IdnaError,
|
21
21
|
]
|
22
22
|
|
23
|
-
private_constant :ACCEPTED_SCHEMES
|
23
|
+
private_constant :ACCEPTED_SCHEMES
|
24
|
+
private_constant :CUSTOM_PSL
|
25
|
+
private_constant :ENDS_WITH_SLASH
|
26
|
+
private_constant :ERRORS_TO_EXTEND
|
24
27
|
|
25
28
|
class << self
|
26
29
|
def parse(potential_url)
|
@@ -36,9 +39,12 @@ module Twingly
|
|
36
39
|
scheme = addressable_uri.scheme
|
37
40
|
raise Twingly::URL::Error::ParseError unless scheme =~ ACCEPTED_SCHEMES
|
38
41
|
|
39
|
-
|
42
|
+
# URLs that can't be normalized should not be valid
|
43
|
+
try_addressable_normalize(addressable_uri)
|
40
44
|
|
41
|
-
|
45
|
+
host = addressable_uri.host
|
46
|
+
public_suffix_domain = PublicSuffix.parse(host, list: CUSTOM_PSL,
|
47
|
+
default_rule: nil)
|
42
48
|
raise Twingly::URL::Error::ParseError if public_suffix_domain.nil?
|
43
49
|
|
44
50
|
raise Twingly::URL::Error::ParseError if public_suffix_domain.sld.nil?
|
@@ -63,8 +69,8 @@ module Twingly
|
|
63
69
|
|
64
70
|
# Workaround for the following bug in addressable:
|
65
71
|
# https://github.com/sporkmonger/addressable/issues/224
|
66
|
-
def
|
67
|
-
addressable_uri.
|
72
|
+
def try_addressable_normalize(addressable_uri)
|
73
|
+
addressable_uri.normalize
|
68
74
|
rescue ArgumentError => error
|
69
75
|
if error.message.include?("invalid byte sequence in UTF-8")
|
70
76
|
raise Twingly::URL::Error::ParseError
|
@@ -76,7 +82,7 @@ module Twingly
|
|
76
82
|
private :new
|
77
83
|
private :internal_parse
|
78
84
|
private :to_addressable_uri
|
79
|
-
private :
|
85
|
+
private :try_addressable_normalize
|
80
86
|
end
|
81
87
|
|
82
88
|
def initialize(addressable_uri, public_suffix_domain)
|
data/lib/twingly/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: twingly-url
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 5.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Twingly AB
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-09-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: addressable
|
@@ -16,28 +16,40 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '2'
|
19
|
+
version: '2.4'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.4.0
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
27
|
- - "~>"
|
25
28
|
- !ruby/object:Gem::Version
|
26
|
-
version: '2'
|
29
|
+
version: '2.4'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.4.0
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
34
|
name: public_suffix
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
30
36
|
requirements:
|
31
37
|
- - "~>"
|
32
38
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
39
|
+
version: '2.0'
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 2.0.2
|
34
43
|
type: :runtime
|
35
44
|
prerelease: false
|
36
45
|
version_requirements: !ruby/object:Gem::Requirement
|
37
46
|
requirements:
|
38
47
|
- - "~>"
|
39
48
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
49
|
+
version: '2.0'
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 2.0.2
|
41
53
|
- !ruby/object:Gem::Dependency
|
42
54
|
name: idn-ruby
|
43
55
|
requirement: !ruby/object:Gem::Requirement
|
@@ -116,6 +128,7 @@ extensions: []
|
|
116
128
|
extra_rdoc_files: []
|
117
129
|
files:
|
118
130
|
- README.md
|
131
|
+
- lib/twingly/public_suffix_list.rb
|
119
132
|
- lib/twingly/url.rb
|
120
133
|
- lib/twingly/url/error.rb
|
121
134
|
- lib/twingly/url/hasher.rb
|