twingly-url 4.2.0 → 5.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e450e3b0005ae549f7465be270f474d921f1c5bd
4
- data.tar.gz: 897b0ff72f7d298edd7846068d6d3ae4a0197cec
3
+ metadata.gz: 5c842338446cf451ce1bfc0530f3ef53552208a6
4
+ data.tar.gz: b36802a8c3f58444b84254d73dfc5666a0d11b13
5
5
  SHA512:
6
- metadata.gz: 56efced017c87d78a72ef97cf2d98f89c5e2329b79d9ca55440d4564b5c24edd8a95d9f3ce3bf9ff65d23ca58990269fd02c7244abe12cd686b15da061e7ab9a
7
- data.tar.gz: 0fc17c60dafbbd9278f5ad1aee81553c42bf76e1734eacd45e944e2700f3770e6a0a3056630efec5c8274ed61d3ccc5e5deec95f34d079c7048390eca64e1245
6
+ metadata.gz: 803792e18a70bf53df2fc8e50b380e02244985e43c632ced88932ab0c14baa914835d514ddfaa03d0a4441275580acaa9562cca2f28a346ab803cdf20f0ff491
7
+ data.tar.gz: e169afed23eaabe22730db9416fa10c43a19629ee1374d97c83cf3a49de220b686137da39c2ab69e0f09fa7a6c097be3068d6a9f67ce077ae07d45ed99ae0bc6
data/README.md CHANGED
@@ -27,33 +27,132 @@ Usage (this output was created with [`examples/url.rb`][examples]):
27
27
  require "twingly/url"
28
28
 
29
29
  url = Twingly::URL.parse("http://www.twingly.co.uk/search")
30
- url.scheme # => "http"
31
- url.trd # => "www"
32
- url.sld # => "twingly"
33
- url.tld # => "co.uk"
34
- url.ttld # => "uk"
35
- url.domain # => "twingly.co.uk"
36
- url.host # => "www.twingly.co.uk"
37
- url.origin # => "http://www.twingly.co.uk"
38
- url.path # => "/search"
39
- url.without_scheme # => "//www.twingly.co.uk/search"
40
- url.valid? # => "true"
30
+ url.scheme # => "http"
31
+ url.normalized.scheme # => "http"
32
+ url.trd # => "www"
33
+ url.normalized.trd # => "www"
34
+ url.sld # => "twingly"
35
+ url.normalized.sld # => "twingly"
36
+ url.tld # => "co.uk"
37
+ url.normalized.tld # => "co.uk"
38
+ url.ttld # => "uk"
39
+ url.normalized.ttld # => "uk"
40
+ url.domain # => "twingly.co.uk"
41
+ url.normalized.domain # => "twingly.co.uk"
42
+ url.host # => "www.twingly.co.uk"
43
+ url.normalized.host # => "www.twingly.co.uk"
44
+ url.origin # => "http://www.twingly.co.uk"
45
+ url.normalized.origin # => "http://www.twingly.co.uk"
46
+ url.path # => "/search"
47
+ url.normalized.path # => "/search"
48
+ url.without_scheme # => "//www.twingly.co.uk/search"
49
+ url.normalized.without_scheme # => "//www.twingly.co.uk/search"
50
+ url.userinfo # => ""
51
+ url.normalized.userinfo # => ""
52
+ url.user # => ""
53
+ url.normalized.user # => ""
54
+ url.password # => ""
55
+ url.normalized.password # => ""
56
+ url.valid? # => "true"
57
+ url.normalized.valid? # => "true"
58
+ url.to_s # => "http://www.twingly.co.uk/search"
59
+ url.normalized.to_s # => "http://www.twingly.co.uk/search"
60
+
61
+ url = Twingly::URL.parse("http://räksmörgås.макдональдс.рф/foo")
62
+ url.scheme # => "http"
63
+ url.normalized.scheme # => "http"
64
+ url.trd # => "räksmörgås"
65
+ url.normalized.trd # => "xn--rksmrgs-5wao1o"
66
+ url.sld # => "макдональдс"
67
+ url.normalized.sld # => "xn--80aalb1aicli8a5i"
68
+ url.tld # => "рф"
69
+ url.normalized.tld # => "xn--p1ai"
70
+ url.ttld # => "рф"
71
+ url.normalized.ttld # => "xn--p1ai"
72
+ url.domain # => "макдональдс.рф"
73
+ url.normalized.domain # => "xn--80aalb1aicli8a5i.xn--p1ai"
74
+ url.host # => "räksmörgås.макдональдс.рф"
75
+ url.normalized.host # => "xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
76
+ url.origin # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
77
+ url.normalized.origin # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
78
+ url.path # => "/foo"
79
+ url.normalized.path # => "/foo"
80
+ url.without_scheme # => "//räksmörgås.макдональдс.рф/foo"
81
+ url.normalized.without_scheme # => "//xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
82
+ url.userinfo # => ""
83
+ url.normalized.userinfo # => ""
84
+ url.user # => ""
85
+ url.normalized.user # => ""
86
+ url.password # => ""
87
+ url.normalized.password # => ""
88
+ url.valid? # => "true"
89
+ url.normalized.valid? # => "true"
90
+ url.to_s # => "http://räksmörgås.макдональдс.рф/foo"
91
+ url.normalized.to_s # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
92
+
93
+ url = Twingly::URL.parse("http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo")
94
+ url.scheme # => "http"
95
+ url.normalized.scheme # => "http"
96
+ url.trd # => "xn--rksmrgs-5wao1o"
97
+ url.normalized.trd # => "xn--rksmrgs-5wao1o"
98
+ url.sld # => "xn--80aalb1aicli8a5i"
99
+ url.normalized.sld # => "xn--80aalb1aicli8a5i"
100
+ url.tld # => "xn--p1ai"
101
+ url.normalized.tld # => "xn--p1ai"
102
+ url.ttld # => "xn--p1ai"
103
+ url.normalized.ttld # => "xn--p1ai"
104
+ url.domain # => "xn--80aalb1aicli8a5i.xn--p1ai"
105
+ url.normalized.domain # => "xn--80aalb1aicli8a5i.xn--p1ai"
106
+ url.host # => "xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
107
+ url.normalized.host # => "xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
108
+ url.origin # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
109
+ url.normalized.origin # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
110
+ url.path # => "/foo"
111
+ url.normalized.path # => "/foo"
112
+ url.without_scheme # => "//xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
113
+ url.normalized.without_scheme # => "//xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
114
+ url.userinfo # => ""
115
+ url.normalized.userinfo # => ""
116
+ url.user # => ""
117
+ url.normalized.user # => ""
118
+ url.password # => ""
119
+ url.normalized.password # => ""
120
+ url.valid? # => "true"
121
+ url.normalized.valid? # => "true"
122
+ url.to_s # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
123
+ url.normalized.to_s # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
41
124
 
42
125
  url = Twingly::URL.parse("https://admin:correcthorsebatterystaple@example.com/")
43
- url.scheme # => "https"
44
- url.trd # => ""
45
- url.sld # => "example"
46
- url.tld # => "com"
47
- url.ttld # => "com"
48
- url.domain # => "example.com"
49
- url.host # => "example.com"
50
- url.origin # => "https://example.com"
51
- url.path # => "/"
52
- url.without_scheme # => "//admin:correcthorsebatterystaple@example.com/"
53
- url.userinfo # => "admin:correcthorsebatterystaple"
54
- url.user # => "admin"
55
- url.password # => "correcthorsebatterystaple"
56
- url.valid? # => "true"
126
+ url.scheme # => "https"
127
+ url.normalized.scheme # => "https"
128
+ url.trd # => ""
129
+ url.normalized.trd # => "www"
130
+ url.sld # => "example"
131
+ url.normalized.sld # => "example"
132
+ url.tld # => "com"
133
+ url.normalized.tld # => "com"
134
+ url.ttld # => "com"
135
+ url.normalized.ttld # => "com"
136
+ url.domain # => "example.com"
137
+ url.normalized.domain # => "example.com"
138
+ url.host # => "example.com"
139
+ url.normalized.host # => "www.example.com"
140
+ url.origin # => "https://example.com"
141
+ url.normalized.origin # => "https://www.example.com"
142
+ url.path # => "/"
143
+ url.normalized.path # => "/"
144
+ url.without_scheme # => "//admin:correcthorsebatterystaple@example.com/"
145
+ url.normalized.without_scheme # => "//admin:correcthorsebatterystaple@www.example.com/"
146
+ url.userinfo # => "admin:correcthorsebatterystaple"
147
+ url.normalized.userinfo # => "admin:correcthorsebatterystaple"
148
+ url.user # => "admin"
149
+ url.normalized.user # => "admin"
150
+ url.password # => "correcthorsebatterystaple"
151
+ url.normalized.password # => "correcthorsebatterystaple"
152
+ url.valid? # => "true"
153
+ url.normalized.valid? # => "true"
154
+ url.to_s # => "https://admin:correcthorsebatterystaple@example.com/"
155
+ url.normalized.to_s # => "https://admin:correcthorsebatterystaple@www.example.com/"
57
156
  ```
58
157
 
59
158
  ### Dependencies
@@ -63,6 +162,14 @@ The gem requires libidn.
63
162
  sudo apt-get install libidn11 # Ubuntu
64
163
  brew install libidn # OS X
65
164
 
165
+ ## Development
166
+
167
+ To inspect the [Public Suffix List], this handy command can be used (also works in projects that use `twingly-url` as an dependency).
168
+
169
+ open $(bundle show public_suffix)/data/list.txt
170
+
171
+ [Public Suffix List]: https://github.com/weppos/publicsuffix-ruby
172
+
66
173
  ## Tests
67
174
 
68
175
  Run tests with
@@ -91,6 +198,10 @@ Note that this isn't a benchmark, we're using [ruby-prof] which will slow things
91
198
 
92
199
  bundle exec rake release
93
200
 
201
+ * Update the changelog with [GitHub Changelog Generator](https://github.com/skywinder/github-changelog-generator/) (`gem install github_changelog_generator` if you don't have it, set `CHANGELOG_GITHUB_TOKEN` to a personal access token to avoid rate limiting by GitHub). This command will update `CHANGELOG.md`, commit and push manually.
202
+
203
+ github_changelog_generator
204
+
94
205
  [twingly-rubygems]: https://rubygems.org/profiles/twingly
95
206
  [ruby-prof]: http://ruby-prof.rubyforge.org/
96
207
  [examples]: examples/url.rb
@@ -0,0 +1,37 @@
1
+ require "public_suffix"
2
+
3
+ module Twingly
4
+ class PublicSuffixList
5
+ ACE_PREFIX = /\Axn\-\-/i.freeze
6
+
7
+ private_constant :ACE_PREFIX
8
+
9
+ # Extend the PSL with ASCII form of all internationalized domain names
10
+ def self.with_punycoded_names
11
+ list_data = File.read(PublicSuffix::List::DEFAULT_LIST_PATH)
12
+ list = PublicSuffix::List.parse(list_data, private_domains: false)
13
+
14
+ punycoded_names(list).each do |punycoded_name|
15
+ new_rule = PublicSuffix::Rule.factory(punycoded_name)
16
+ list.add(new_rule, reindex: false)
17
+ end
18
+
19
+ list.reindex!
20
+
21
+ list
22
+ end
23
+
24
+ private_class_method \
25
+ def self.punycoded_names(list)
26
+ names = list.map { |rule| Addressable::IDNA.to_ascii(rule.value) }
27
+ names.select { |name| punycoded_name?(name) }
28
+ end
29
+
30
+ private_class_method \
31
+ def self.punycoded_name?(name)
32
+ PublicSuffix::Domain.name_to_labels(name).any? do |label|
33
+ label =~ ACE_PREFIX
34
+ end
35
+ end
36
+ end
37
+ end
data/lib/twingly/url.rb CHANGED
@@ -2,17 +2,17 @@ require "addressable/uri"
2
2
  require "addressable/idna/native"
3
3
  require "public_suffix"
4
4
 
5
+ require_relative "public_suffix_list"
5
6
  require_relative "url/null_url"
6
7
  require_relative "url/error"
7
8
  require_relative "version"
8
9
 
9
- PublicSuffix::List.private_domains = false
10
-
11
10
  module Twingly
12
11
  class URL
13
12
  include Comparable
14
13
 
15
14
  ACCEPTED_SCHEMES = /\Ahttps?\z/i
15
+ CUSTOM_PSL = PublicSuffixList.with_punycoded_names
16
16
  ENDS_WITH_SLASH = /\/+$/
17
17
  ERRORS_TO_EXTEND = [
18
18
  Addressable::URI::InvalidURIError,
@@ -20,7 +20,10 @@ module Twingly
20
20
  IDN::Idna::IdnaError,
21
21
  ]
22
22
 
23
- private_constant :ACCEPTED_SCHEMES, :ENDS_WITH_SLASH, :ERRORS_TO_EXTEND
23
+ private_constant :ACCEPTED_SCHEMES
24
+ private_constant :CUSTOM_PSL
25
+ private_constant :ENDS_WITH_SLASH
26
+ private_constant :ERRORS_TO_EXTEND
24
27
 
25
28
  class << self
26
29
  def parse(potential_url)
@@ -36,9 +39,12 @@ module Twingly
36
39
  scheme = addressable_uri.scheme
37
40
  raise Twingly::URL::Error::ParseError unless scheme =~ ACCEPTED_SCHEMES
38
41
 
39
- display_uri = addressable_display_uri(addressable_uri)
42
+ # URLs that can't be normalized should not be valid
43
+ try_addressable_normalize(addressable_uri)
40
44
 
41
- public_suffix_domain = PublicSuffix.parse(display_uri.host)
45
+ host = addressable_uri.host
46
+ public_suffix_domain = PublicSuffix.parse(host, list: CUSTOM_PSL,
47
+ default_rule: nil)
42
48
  raise Twingly::URL::Error::ParseError if public_suffix_domain.nil?
43
49
 
44
50
  raise Twingly::URL::Error::ParseError if public_suffix_domain.sld.nil?
@@ -63,8 +69,8 @@ module Twingly
63
69
 
64
70
  # Workaround for the following bug in addressable:
65
71
  # https://github.com/sporkmonger/addressable/issues/224
66
- def addressable_display_uri(addressable_uri)
67
- addressable_uri.display_uri
72
+ def try_addressable_normalize(addressable_uri)
73
+ addressable_uri.normalize
68
74
  rescue ArgumentError => error
69
75
  if error.message.include?("invalid byte sequence in UTF-8")
70
76
  raise Twingly::URL::Error::ParseError
@@ -76,7 +82,7 @@ module Twingly
76
82
  private :new
77
83
  private :internal_parse
78
84
  private :to_addressable_uri
79
- private :addressable_display_uri
85
+ private :try_addressable_normalize
80
86
  end
81
87
 
82
88
  def initialize(addressable_uri, public_suffix_domain)
@@ -1,5 +1,5 @@
1
1
  module Twingly
2
2
  class URL
3
- VERSION = "4.2.0"
3
+ VERSION = "5.0.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twingly-url
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.2.0
4
+ version: 5.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Twingly AB
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-31 00:00:00.000000000 Z
11
+ date: 2016-09-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: addressable
@@ -16,28 +16,40 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2'
19
+ version: '2.4'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 2.4.0
20
23
  type: :runtime
21
24
  prerelease: false
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
27
  - - "~>"
25
28
  - !ruby/object:Gem::Version
26
- version: '2'
29
+ version: '2.4'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 2.4.0
27
33
  - !ruby/object:Gem::Dependency
28
34
  name: public_suffix
29
35
  requirement: !ruby/object:Gem::Requirement
30
36
  requirements:
31
37
  - - "~>"
32
38
  - !ruby/object:Gem::Version
33
- version: '1.4'
39
+ version: '2.0'
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 2.0.2
34
43
  type: :runtime
35
44
  prerelease: false
36
45
  version_requirements: !ruby/object:Gem::Requirement
37
46
  requirements:
38
47
  - - "~>"
39
48
  - !ruby/object:Gem::Version
40
- version: '1.4'
49
+ version: '2.0'
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 2.0.2
41
53
  - !ruby/object:Gem::Dependency
42
54
  name: idn-ruby
43
55
  requirement: !ruby/object:Gem::Requirement
@@ -116,6 +128,7 @@ extensions: []
116
128
  extra_rdoc_files: []
117
129
  files:
118
130
  - README.md
131
+ - lib/twingly/public_suffix_list.rb
119
132
  - lib/twingly/url.rb
120
133
  - lib/twingly/url/error.rb
121
134
  - lib/twingly/url/hasher.rb