twingly-url 4.2.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e450e3b0005ae549f7465be270f474d921f1c5bd
4
- data.tar.gz: 897b0ff72f7d298edd7846068d6d3ae4a0197cec
3
+ metadata.gz: 5c842338446cf451ce1bfc0530f3ef53552208a6
4
+ data.tar.gz: b36802a8c3f58444b84254d73dfc5666a0d11b13
5
5
  SHA512:
6
- metadata.gz: 56efced017c87d78a72ef97cf2d98f89c5e2329b79d9ca55440d4564b5c24edd8a95d9f3ce3bf9ff65d23ca58990269fd02c7244abe12cd686b15da061e7ab9a
7
- data.tar.gz: 0fc17c60dafbbd9278f5ad1aee81553c42bf76e1734eacd45e944e2700f3770e6a0a3056630efec5c8274ed61d3ccc5e5deec95f34d079c7048390eca64e1245
6
+ metadata.gz: 803792e18a70bf53df2fc8e50b380e02244985e43c632ced88932ab0c14baa914835d514ddfaa03d0a4441275580acaa9562cca2f28a346ab803cdf20f0ff491
7
+ data.tar.gz: e169afed23eaabe22730db9416fa10c43a19629ee1374d97c83cf3a49de220b686137da39c2ab69e0f09fa7a6c097be3068d6a9f67ce077ae07d45ed99ae0bc6
data/README.md CHANGED
@@ -27,33 +27,132 @@ Usage (this output was created with [`examples/url.rb`][examples]):
27
27
  require "twingly/url"
28
28
 
29
29
  url = Twingly::URL.parse("http://www.twingly.co.uk/search")
30
- url.scheme # => "http"
31
- url.trd # => "www"
32
- url.sld # => "twingly"
33
- url.tld # => "co.uk"
34
- url.ttld # => "uk"
35
- url.domain # => "twingly.co.uk"
36
- url.host # => "www.twingly.co.uk"
37
- url.origin # => "http://www.twingly.co.uk"
38
- url.path # => "/search"
39
- url.without_scheme # => "//www.twingly.co.uk/search"
40
- url.valid? # => "true"
30
+ url.scheme # => "http"
31
+ url.normalized.scheme # => "http"
32
+ url.trd # => "www"
33
+ url.normalized.trd # => "www"
34
+ url.sld # => "twingly"
35
+ url.normalized.sld # => "twingly"
36
+ url.tld # => "co.uk"
37
+ url.normalized.tld # => "co.uk"
38
+ url.ttld # => "uk"
39
+ url.normalized.ttld # => "uk"
40
+ url.domain # => "twingly.co.uk"
41
+ url.normalized.domain # => "twingly.co.uk"
42
+ url.host # => "www.twingly.co.uk"
43
+ url.normalized.host # => "www.twingly.co.uk"
44
+ url.origin # => "http://www.twingly.co.uk"
45
+ url.normalized.origin # => "http://www.twingly.co.uk"
46
+ url.path # => "/search"
47
+ url.normalized.path # => "/search"
48
+ url.without_scheme # => "//www.twingly.co.uk/search"
49
+ url.normalized.without_scheme # => "//www.twingly.co.uk/search"
50
+ url.userinfo # => ""
51
+ url.normalized.userinfo # => ""
52
+ url.user # => ""
53
+ url.normalized.user # => ""
54
+ url.password # => ""
55
+ url.normalized.password # => ""
56
+ url.valid? # => "true"
57
+ url.normalized.valid? # => "true"
58
+ url.to_s # => "http://www.twingly.co.uk/search"
59
+ url.normalized.to_s # => "http://www.twingly.co.uk/search"
60
+
61
+ url = Twingly::URL.parse("http://räksmörgås.макдональдс.рф/foo")
62
+ url.scheme # => "http"
63
+ url.normalized.scheme # => "http"
64
+ url.trd # => "räksmörgås"
65
+ url.normalized.trd # => "xn--rksmrgs-5wao1o"
66
+ url.sld # => "макдональдс"
67
+ url.normalized.sld # => "xn--80aalb1aicli8a5i"
68
+ url.tld # => "рф"
69
+ url.normalized.tld # => "xn--p1ai"
70
+ url.ttld # => "рф"
71
+ url.normalized.ttld # => "xn--p1ai"
72
+ url.domain # => "макдональдс.рф"
73
+ url.normalized.domain # => "xn--80aalb1aicli8a5i.xn--p1ai"
74
+ url.host # => "räksmörgås.макдональдс.рф"
75
+ url.normalized.host # => "xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
76
+ url.origin # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
77
+ url.normalized.origin # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
78
+ url.path # => "/foo"
79
+ url.normalized.path # => "/foo"
80
+ url.without_scheme # => "//räksmörgås.макдональдс.рф/foo"
81
+ url.normalized.without_scheme # => "//xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
82
+ url.userinfo # => ""
83
+ url.normalized.userinfo # => ""
84
+ url.user # => ""
85
+ url.normalized.user # => ""
86
+ url.password # => ""
87
+ url.normalized.password # => ""
88
+ url.valid? # => "true"
89
+ url.normalized.valid? # => "true"
90
+ url.to_s # => "http://räksmörgås.макдональдс.рф/foo"
91
+ url.normalized.to_s # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
92
+
93
+ url = Twingly::URL.parse("http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo")
94
+ url.scheme # => "http"
95
+ url.normalized.scheme # => "http"
96
+ url.trd # => "xn--rksmrgs-5wao1o"
97
+ url.normalized.trd # => "xn--rksmrgs-5wao1o"
98
+ url.sld # => "xn--80aalb1aicli8a5i"
99
+ url.normalized.sld # => "xn--80aalb1aicli8a5i"
100
+ url.tld # => "xn--p1ai"
101
+ url.normalized.tld # => "xn--p1ai"
102
+ url.ttld # => "xn--p1ai"
103
+ url.normalized.ttld # => "xn--p1ai"
104
+ url.domain # => "xn--80aalb1aicli8a5i.xn--p1ai"
105
+ url.normalized.domain # => "xn--80aalb1aicli8a5i.xn--p1ai"
106
+ url.host # => "xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
107
+ url.normalized.host # => "xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
108
+ url.origin # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
109
+ url.normalized.origin # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
110
+ url.path # => "/foo"
111
+ url.normalized.path # => "/foo"
112
+ url.without_scheme # => "//xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
113
+ url.normalized.without_scheme # => "//xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
114
+ url.userinfo # => ""
115
+ url.normalized.userinfo # => ""
116
+ url.user # => ""
117
+ url.normalized.user # => ""
118
+ url.password # => ""
119
+ url.normalized.password # => ""
120
+ url.valid? # => "true"
121
+ url.normalized.valid? # => "true"
122
+ url.to_s # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
123
+ url.normalized.to_s # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
41
124
 
42
125
  url = Twingly::URL.parse("https://admin:correcthorsebatterystaple@example.com/")
43
- url.scheme # => "https"
44
- url.trd # => ""
45
- url.sld # => "example"
46
- url.tld # => "com"
47
- url.ttld # => "com"
48
- url.domain # => "example.com"
49
- url.host # => "example.com"
50
- url.origin # => "https://example.com"
51
- url.path # => "/"
52
- url.without_scheme # => "//admin:correcthorsebatterystaple@example.com/"
53
- url.userinfo # => "admin:correcthorsebatterystaple"
54
- url.user # => "admin"
55
- url.password # => "correcthorsebatterystaple"
56
- url.valid? # => "true"
126
+ url.scheme # => "https"
127
+ url.normalized.scheme # => "https"
128
+ url.trd # => ""
129
+ url.normalized.trd # => "www"
130
+ url.sld # => "example"
131
+ url.normalized.sld # => "example"
132
+ url.tld # => "com"
133
+ url.normalized.tld # => "com"
134
+ url.ttld # => "com"
135
+ url.normalized.ttld # => "com"
136
+ url.domain # => "example.com"
137
+ url.normalized.domain # => "example.com"
138
+ url.host # => "example.com"
139
+ url.normalized.host # => "www.example.com"
140
+ url.origin # => "https://example.com"
141
+ url.normalized.origin # => "https://www.example.com"
142
+ url.path # => "/"
143
+ url.normalized.path # => "/"
144
+ url.without_scheme # => "//admin:correcthorsebatterystaple@example.com/"
145
+ url.normalized.without_scheme # => "//admin:correcthorsebatterystaple@www.example.com/"
146
+ url.userinfo # => "admin:correcthorsebatterystaple"
147
+ url.normalized.userinfo # => "admin:correcthorsebatterystaple"
148
+ url.user # => "admin"
149
+ url.normalized.user # => "admin"
150
+ url.password # => "correcthorsebatterystaple"
151
+ url.normalized.password # => "correcthorsebatterystaple"
152
+ url.valid? # => "true"
153
+ url.normalized.valid? # => "true"
154
+ url.to_s # => "https://admin:correcthorsebatterystaple@example.com/"
155
+ url.normalized.to_s # => "https://admin:correcthorsebatterystaple@www.example.com/"
57
156
  ```
58
157
 
59
158
  ### Dependencies
@@ -63,6 +162,14 @@ The gem requires libidn.
63
162
  sudo apt-get install libidn11 # Ubuntu
64
163
  brew install libidn # OS X
65
164
 
165
+ ## Development
166
+
167
+ To inspect the [Public Suffix List], this handy command can be used (also works in projects that use `twingly-url` as an dependency).
168
+
169
+ open $(bundle show public_suffix)/data/list.txt
170
+
171
+ [Public Suffix List]: https://github.com/weppos/publicsuffix-ruby
172
+
66
173
  ## Tests
67
174
 
68
175
  Run tests with
@@ -91,6 +198,10 @@ Note that this isn't a benchmark, we're using [ruby-prof] which will slow things
91
198
 
92
199
  bundle exec rake release
93
200
 
201
+ * Update the changelog with [GitHub Changelog Generator](https://github.com/skywinder/github-changelog-generator/) (`gem install github_changelog_generator` if you don't have it, set `CHANGELOG_GITHUB_TOKEN` to a personal access token to avoid rate limiting by GitHub). This command will update `CHANGELOG.md`, commit and push manually.
202
+
203
+ github_changelog_generator
204
+
94
205
  [twingly-rubygems]: https://rubygems.org/profiles/twingly
95
206
  [ruby-prof]: http://ruby-prof.rubyforge.org/
96
207
  [examples]: examples/url.rb
@@ -0,0 +1,37 @@
1
+ require "public_suffix"
2
+
3
+ module Twingly
4
+ class PublicSuffixList
5
+ ACE_PREFIX = /\Axn\-\-/i.freeze
6
+
7
+ private_constant :ACE_PREFIX
8
+
9
+ # Extend the PSL with ASCII form of all internationalized domain names
10
+ def self.with_punycoded_names
11
+ list_data = File.read(PublicSuffix::List::DEFAULT_LIST_PATH)
12
+ list = PublicSuffix::List.parse(list_data, private_domains: false)
13
+
14
+ punycoded_names(list).each do |punycoded_name|
15
+ new_rule = PublicSuffix::Rule.factory(punycoded_name)
16
+ list.add(new_rule, reindex: false)
17
+ end
18
+
19
+ list.reindex!
20
+
21
+ list
22
+ end
23
+
24
+ private_class_method \
25
+ def self.punycoded_names(list)
26
+ names = list.map { |rule| Addressable::IDNA.to_ascii(rule.value) }
27
+ names.select { |name| punycoded_name?(name) }
28
+ end
29
+
30
+ private_class_method \
31
+ def self.punycoded_name?(name)
32
+ PublicSuffix::Domain.name_to_labels(name).any? do |label|
33
+ label =~ ACE_PREFIX
34
+ end
35
+ end
36
+ end
37
+ end
data/lib/twingly/url.rb CHANGED
@@ -2,17 +2,17 @@ require "addressable/uri"
2
2
  require "addressable/idna/native"
3
3
  require "public_suffix"
4
4
 
5
+ require_relative "public_suffix_list"
5
6
  require_relative "url/null_url"
6
7
  require_relative "url/error"
7
8
  require_relative "version"
8
9
 
9
- PublicSuffix::List.private_domains = false
10
-
11
10
  module Twingly
12
11
  class URL
13
12
  include Comparable
14
13
 
15
14
  ACCEPTED_SCHEMES = /\Ahttps?\z/i
15
+ CUSTOM_PSL = PublicSuffixList.with_punycoded_names
16
16
  ENDS_WITH_SLASH = /\/+$/
17
17
  ERRORS_TO_EXTEND = [
18
18
  Addressable::URI::InvalidURIError,
@@ -20,7 +20,10 @@ module Twingly
20
20
  IDN::Idna::IdnaError,
21
21
  ]
22
22
 
23
- private_constant :ACCEPTED_SCHEMES, :ENDS_WITH_SLASH, :ERRORS_TO_EXTEND
23
+ private_constant :ACCEPTED_SCHEMES
24
+ private_constant :CUSTOM_PSL
25
+ private_constant :ENDS_WITH_SLASH
26
+ private_constant :ERRORS_TO_EXTEND
24
27
 
25
28
  class << self
26
29
  def parse(potential_url)
@@ -36,9 +39,12 @@ module Twingly
36
39
  scheme = addressable_uri.scheme
37
40
  raise Twingly::URL::Error::ParseError unless scheme =~ ACCEPTED_SCHEMES
38
41
 
39
- display_uri = addressable_display_uri(addressable_uri)
42
+ # URLs that can't be normalized should not be valid
43
+ try_addressable_normalize(addressable_uri)
40
44
 
41
- public_suffix_domain = PublicSuffix.parse(display_uri.host)
45
+ host = addressable_uri.host
46
+ public_suffix_domain = PublicSuffix.parse(host, list: CUSTOM_PSL,
47
+ default_rule: nil)
42
48
  raise Twingly::URL::Error::ParseError if public_suffix_domain.nil?
43
49
 
44
50
  raise Twingly::URL::Error::ParseError if public_suffix_domain.sld.nil?
@@ -63,8 +69,8 @@ module Twingly
63
69
 
64
70
  # Workaround for the following bug in addressable:
65
71
  # https://github.com/sporkmonger/addressable/issues/224
66
- def addressable_display_uri(addressable_uri)
67
- addressable_uri.display_uri
72
+ def try_addressable_normalize(addressable_uri)
73
+ addressable_uri.normalize
68
74
  rescue ArgumentError => error
69
75
  if error.message.include?("invalid byte sequence in UTF-8")
70
76
  raise Twingly::URL::Error::ParseError
@@ -76,7 +82,7 @@ module Twingly
76
82
  private :new
77
83
  private :internal_parse
78
84
  private :to_addressable_uri
79
- private :addressable_display_uri
85
+ private :try_addressable_normalize
80
86
  end
81
87
 
82
88
  def initialize(addressable_uri, public_suffix_domain)
@@ -1,5 +1,5 @@
1
1
  module Twingly
2
2
  class URL
3
- VERSION = "4.2.0"
3
+ VERSION = "5.0.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twingly-url
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.2.0
4
+ version: 5.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Twingly AB
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-31 00:00:00.000000000 Z
11
+ date: 2016-09-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -16,28 +16,40 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2'
19
+ version: '2.4'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 2.4.0
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
27
  - - "~>"
25
28
  - !ruby/object:Gem::Version
26
- version: '2'
29
+ version: '2.4'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 2.4.0
27
33
  - !ruby/object:Gem::Dependency
28
34
  name: public_suffix
29
35
  requirement: !ruby/object:Gem::Requirement
30
36
  requirements:
31
37
  - - "~>"
32
38
  - !ruby/object:Gem::Version
33
- version: '1.4'
39
+ version: '2.0'
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 2.0.2
34
43
  type: :runtime
35
44
  prerelease: false
36
45
  version_requirements: !ruby/object:Gem::Requirement
37
46
  requirements:
38
47
  - - "~>"
39
48
  - !ruby/object:Gem::Version
40
- version: '1.4'
49
+ version: '2.0'
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 2.0.2
41
53
  - !ruby/object:Gem::Dependency
42
54
  name: idn-ruby
43
55
  requirement: !ruby/object:Gem::Requirement
@@ -116,6 +128,7 @@ extensions: []
116
128
  extra_rdoc_files: []
117
129
  files:
118
130
  - README.md
131
+ - lib/twingly/public_suffix_list.rb
119
132
  - lib/twingly/url.rb
120
133
  - lib/twingly/url/error.rb
121
134
  - lib/twingly/url/hasher.rb