public_suffix 1.5.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/.rubocop_defaults.yml +127 -0
- data/.travis.yml +10 -5
- data/2.0-Upgrade.md +35 -0
- data/CHANGELOG.md +25 -0
- data/Gemfile +7 -1
- data/LICENSE.txt +1 -1
- data/README.md +89 -62
- data/Rakefile +23 -20
- data/data/{definitions.txt → list.txt} +472 -286
- data/lib/public_suffix.rb +96 -52
- data/lib/public_suffix/domain.rb +26 -156
- data/lib/public_suffix/errors.rb +5 -17
- data/lib/public_suffix/list.rb +107 -122
- data/lib/public_suffix/rule.rb +169 -190
- data/lib/public_suffix/version.rb +3 -13
- data/public_suffix.gemspec +4 -4
- data/test/acceptance_test.rb +57 -34
- data/test/benchmark_helper.rb +4 -0
- data/test/execution_profiler.rb +14 -0
- data/test/initialization_profiler.rb +11 -0
- data/test/performance_benchmark.rb +38 -0
- data/test/psl_test.rb +49 -0
- data/test/test_helper.rb +12 -5
- data/test/tests.txt +98 -0
- data/test/unit/domain_test.rb +18 -84
- data/test/unit/errors_test.rb +2 -2
- data/test/unit/list_test.rb +131 -59
- data/test/unit/public_suffix_test.rb +105 -34
- data/test/unit/rule_test.rb +52 -135
- metadata +20 -6
- data/.gemtest +0 -0
data/lib/public_suffix.rb
CHANGED
@@ -1,28 +1,30 @@
|
|
1
|
-
#
|
2
|
-
# Public Suffix
|
1
|
+
# = Public Suffix
|
3
2
|
#
|
4
3
|
# Domain name parser based on the Public Suffix List.
|
5
4
|
#
|
6
|
-
# Copyright (c) 2009-
|
7
|
-
#
|
5
|
+
# Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
|
8
6
|
|
9
|
-
require
|
10
|
-
require
|
11
|
-
require
|
12
|
-
require
|
13
|
-
require
|
7
|
+
require "public_suffix/domain"
|
8
|
+
require "public_suffix/version"
|
9
|
+
require "public_suffix/errors"
|
10
|
+
require "public_suffix/rule"
|
11
|
+
require "public_suffix/list"
|
14
12
|
|
13
|
+
# PublicSuffix is a Ruby domain name parser based on the Public Suffix List.
|
14
|
+
#
|
15
|
+
# The [Public Suffix List](https://publicsuffix.org) is a cross-vendor initiative
|
16
|
+
# to provide an accurate list of domain name suffixes.
|
17
|
+
#
|
18
|
+
# The Public Suffix List is an initiative of the Mozilla Project,
|
19
|
+
# but is maintained as a community resource. It is available for use in any software,
|
20
|
+
# but was originally created to meet the needs of browser manufacturers.
|
15
21
|
module PublicSuffix
|
16
22
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
#
|
22
|
-
# @param [PublicSuffix::List] list
|
23
|
-
# The rule list to search, defaults to the default {PublicSuffix::List}
|
24
|
-
#
|
25
|
-
# @return [PublicSuffix::Domain]
|
23
|
+
DOT = ".".freeze
|
24
|
+
BANG = "!".freeze
|
25
|
+
STAR = "*".freeze
|
26
|
+
|
27
|
+
# Parses +name+ and returns the {PublicSuffix::Domain} instance.
|
26
28
|
#
|
27
29
|
# @example Parse a valid domain
|
28
30
|
# PublicSuffix.parse("google.com")
|
@@ -48,47 +50,37 @@ module PublicSuffix
|
|
48
50
|
# PublicSuffix.parse("http://www.google.com")
|
49
51
|
# # => PublicSuffix::DomainInvalid
|
50
52
|
#
|
53
|
+
#
|
54
|
+
# @param [String, #to_s] name The domain name or fully qualified domain name to parse.
|
55
|
+
# @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
|
56
|
+
# @param [Boolean] ignore_private
|
57
|
+
# @return [PublicSuffix::Domain]
|
58
|
+
#
|
51
59
|
# @raise [PublicSuffix::Error]
|
52
60
|
# If domain is not a valid domain.
|
53
61
|
# @raise [PublicSuffix::DomainNotAllowed]
|
54
|
-
# If a rule for +domain+ is found, but the rule
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
rule
|
62
|
+
# If a rule for +domain+ is found, but the rule doesn't allow +domain+.
|
63
|
+
def self.parse(name, list: List.default, default_rule: list.default_rule, ignore_private: false)
|
64
|
+
what = normalize(name)
|
65
|
+
raise what if what.is_a?(DomainInvalid)
|
66
|
+
|
67
|
+
rule = list.find(what, default: default_rule, ignore_private: ignore_private)
|
60
68
|
|
61
69
|
if rule.nil?
|
62
|
-
raise DomainInvalid, "`#{
|
70
|
+
raise DomainInvalid, "`#{what}` is not a valid domain"
|
63
71
|
end
|
64
|
-
if
|
65
|
-
raise DomainNotAllowed, "`#{
|
72
|
+
if rule.decompose(what).last.nil?
|
73
|
+
raise DomainNotAllowed, "`#{what}` is not allowed according to Registry policy"
|
66
74
|
end
|
67
75
|
|
68
|
-
|
69
|
-
|
70
|
-
parts = left.split(".")
|
71
|
-
# If we have 0 parts left, there is just a tld and no domain or subdomain
|
72
|
-
# If we have 1 part left, there is just a tld, domain and not subdomain
|
73
|
-
# If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain
|
74
|
-
tld = right
|
75
|
-
sld = parts.empty? ? nil : parts.pop
|
76
|
-
trd = parts.empty? ? nil : parts.join(".")
|
77
|
-
|
78
|
-
Domain.new(tld, sld, trd)
|
76
|
+
decompose(what, rule)
|
79
77
|
end
|
80
78
|
|
81
|
-
# Checks whether +domain+ is assigned and allowed,
|
82
|
-
# without actually parsing it.
|
79
|
+
# Checks whether +domain+ is assigned and allowed, without actually parsing it.
|
83
80
|
#
|
84
81
|
# This method doesn't care whether domain is a domain or subdomain.
|
85
82
|
# The validation is performed using the default {PublicSuffix::List}.
|
86
83
|
#
|
87
|
-
# @param [String, #to_s] domain
|
88
|
-
# The domain name or fully qualified domain name to validate.
|
89
|
-
#
|
90
|
-
# @return [Boolean]
|
91
|
-
#
|
92
84
|
# @example Validate a valid domain
|
93
85
|
# PublicSuffix.valid?("example.com")
|
94
86
|
# # => true
|
@@ -97,9 +89,9 @@ module PublicSuffix
|
|
97
89
|
# PublicSuffix.valid?("www.example.com")
|
98
90
|
# # => true
|
99
91
|
#
|
100
|
-
# @example Validate a not-
|
101
|
-
# PublicSuffix.valid?("example.
|
102
|
-
# # =>
|
92
|
+
# @example Validate a not-listed domain
|
93
|
+
# PublicSuffix.valid?("example.tldnotlisted")
|
94
|
+
# # => true
|
103
95
|
#
|
104
96
|
# @example Validate a not-allowed domain
|
105
97
|
# PublicSuffix.valid?("example.do")
|
@@ -117,10 +109,62 @@ module PublicSuffix
|
|
117
109
|
# PublicSuffix.valid?("http://www.example.com")
|
118
110
|
# # => false
|
119
111
|
#
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
112
|
+
#
|
113
|
+
# @param [String, #to_s] name The domain name or fully qualified domain name to validate.
|
114
|
+
# @param [Boolean] ignore_private
|
115
|
+
# @return [Boolean]
|
116
|
+
def self.valid?(name, list: List.default, default_rule: nil, ignore_private: false)
|
117
|
+
what = normalize(name)
|
118
|
+
return false if what.is_a?(DomainInvalid)
|
119
|
+
|
120
|
+
default_rule ||= list.default_rule
|
121
|
+
rule = list.find(what, default: default_rule, ignore_private: ignore_private)
|
122
|
+
|
123
|
+
!rule.nil? && !rule.decompose(what).last.nil?
|
124
|
+
end
|
125
|
+
|
126
|
+
# Attempt to parse the name and returns the domain, if valid.
|
127
|
+
#
|
128
|
+
# This method doesn't raise. Instead, it returns nil if the domain is not valid for whatever reason.
|
129
|
+
#
|
130
|
+
# @param [String, #to_s] name The domain name or fully qualified domain name to parse.
|
131
|
+
# @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
|
132
|
+
# @param [Boolean] ignore_private
|
133
|
+
# @return [String]
|
134
|
+
def self.domain(name, **options)
|
135
|
+
parse(name, **options).domain
|
136
|
+
rescue PublicSuffix::Error
|
137
|
+
nil
|
138
|
+
end
|
139
|
+
|
140
|
+
|
141
|
+
# private
|
142
|
+
|
143
|
+
def self.decompose(name, rule)
|
144
|
+
left, right = rule.decompose(name)
|
145
|
+
|
146
|
+
parts = left.split(DOT)
|
147
|
+
# If we have 0 parts left, there is just a tld and no domain or subdomain
|
148
|
+
# If we have 1 part left, there is just a tld, domain and not subdomain
|
149
|
+
# If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain
|
150
|
+
tld = right
|
151
|
+
sld = parts.empty? ? nil : parts.pop
|
152
|
+
trd = parts.empty? ? nil : parts.join(DOT)
|
153
|
+
|
154
|
+
Domain.new(tld, sld, trd)
|
155
|
+
end
|
156
|
+
|
157
|
+
# Pretend we know how to deal with user input.
|
158
|
+
def self.normalize(name)
|
159
|
+
name = name.to_s.dup
|
160
|
+
name.strip!
|
161
|
+
name.chomp!(DOT)
|
162
|
+
name.downcase!
|
163
|
+
|
164
|
+
return DomainInvalid.new("Name is blank") if name.empty?
|
165
|
+
return DomainInvalid.new("Name starts with a dot") if name.start_with?(DOT)
|
166
|
+
return DomainInvalid.new("%s is not expected to contain a scheme" % name) if name.include?("://")
|
167
|
+
name
|
124
168
|
end
|
125
169
|
|
126
170
|
end
|
data/lib/public_suffix/domain.rb
CHANGED
@@ -1,37 +1,33 @@
|
|
1
|
-
#
|
2
|
-
# Public Suffix
|
1
|
+
# = Public Suffix
|
3
2
|
#
|
4
3
|
# Domain name parser based on the Public Suffix List.
|
5
4
|
#
|
6
|
-
# Copyright (c) 2009-
|
7
|
-
#
|
5
|
+
# Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
|
8
6
|
|
9
7
|
module PublicSuffix
|
10
8
|
|
9
|
+
# Domain represents a domain name, composed by a TLD, SLD and TRD.
|
11
10
|
class Domain
|
12
11
|
|
13
|
-
# Splits a string into
|
14
|
-
# as a domain in reverse order from the input string.
|
15
|
-
#
|
16
|
-
# The input is not validated, but it is assumed to be a valid domain.
|
12
|
+
# Splits a string into the labels, that is the dot-separated parts.
|
17
13
|
#
|
18
|
-
#
|
19
|
-
# The domain name to split.
|
20
|
-
#
|
21
|
-
# @return [Array<String>]
|
14
|
+
# The input is not validated, but it is assumed to be a valid domain name.
|
22
15
|
#
|
23
16
|
# @example
|
24
17
|
#
|
25
|
-
#
|
26
|
-
# # => ['
|
18
|
+
# name_to_labels('example.com')
|
19
|
+
# # => ['example', 'com']
|
27
20
|
#
|
28
|
-
#
|
29
|
-
# # => ['
|
21
|
+
# name_to_labels('example.co.uk')
|
22
|
+
# # => ['example', 'co', 'uk']
|
30
23
|
#
|
31
|
-
|
32
|
-
|
24
|
+
# @param name [String, #to_s] The domain name to split.
|
25
|
+
# @return [Array<String>]
|
26
|
+
def self.name_to_labels(name)
|
27
|
+
name.to_s.split(DOT)
|
33
28
|
end
|
34
29
|
|
30
|
+
|
35
31
|
attr_reader :tld, :sld, :trd
|
36
32
|
|
37
33
|
# Creates and returns a new {PublicSuffix::Domain} instance.
|
@@ -64,7 +60,7 @@ module PublicSuffix
|
|
64
60
|
# PublicSuffix::Domain.new("com", "example", "wwww")
|
65
61
|
# # => #<PublicSuffix::Domain @tld="com", @trd=nil, @sld="example">
|
66
62
|
#
|
67
|
-
def initialize(*args
|
63
|
+
def initialize(*args)
|
68
64
|
@tld, @sld, @trd = args
|
69
65
|
yield(self) if block_given?
|
70
66
|
end
|
@@ -105,7 +101,7 @@ module PublicSuffix
|
|
105
101
|
# # => "www.google.com"
|
106
102
|
#
|
107
103
|
def name
|
108
|
-
[@trd, @sld, @tld].compact.join(
|
104
|
+
[@trd, @sld, @tld].compact.join(DOT)
|
109
105
|
end
|
110
106
|
|
111
107
|
# Returns a domain-like representation of this object
|
@@ -123,10 +119,6 @@ module PublicSuffix
|
|
123
119
|
# This method doesn't validate the input. It handles the domain
|
124
120
|
# as a valid domain name and simply applies the necessary transformations.
|
125
121
|
#
|
126
|
-
# # This is an invalid domain
|
127
|
-
# PublicSuffix::Domain.new("qqq", "google").domain
|
128
|
-
# # => "google.qqq"
|
129
|
-
#
|
130
122
|
# This method returns a FQD, not just the domain part.
|
131
123
|
# To get the domain part, use <tt>#sld</tt> (aka second level domain).
|
132
124
|
#
|
@@ -136,18 +128,15 @@ module PublicSuffix
|
|
136
128
|
# PublicSuffix::Domain.new("com", "google", "www").sld
|
137
129
|
# # => "google"
|
138
130
|
#
|
139
|
-
# @return [String]
|
140
|
-
#
|
141
131
|
# @see #domain?
|
142
132
|
# @see #subdomain
|
143
133
|
#
|
134
|
+
# @return [String]
|
144
135
|
def domain
|
145
|
-
if domain?
|
146
|
-
[@sld, @tld].join(".")
|
147
|
-
end
|
136
|
+
[@sld, @tld].join(DOT) if domain?
|
148
137
|
end
|
149
138
|
|
150
|
-
# Returns a
|
139
|
+
# Returns a subdomain-like representation of this object
|
151
140
|
# if the object is a {#subdomain?}, <tt>nil</tt> otherwise.
|
152
141
|
#
|
153
142
|
# PublicSuffix::Domain.new("com").subdomain
|
@@ -162,11 +151,7 @@ module PublicSuffix
|
|
162
151
|
# This method doesn't validate the input. It handles the domain
|
163
152
|
# as a valid domain name and simply applies the necessary transformations.
|
164
153
|
#
|
165
|
-
#
|
166
|
-
# PublicSuffix::Domain.new("qqq", "google", "www").subdomain
|
167
|
-
# # => "www.google.qqq"
|
168
|
-
#
|
169
|
-
# This method returns a FQD, not just the domain part.
|
154
|
+
# This method returns a FQD, not just the subdomain part.
|
170
155
|
# To get the subdomain part, use <tt>#trd</tt> (aka third level domain).
|
171
156
|
#
|
172
157
|
# PublicSuffix::Domain.new("com", "google", "www").subdomain
|
@@ -175,25 +160,12 @@ module PublicSuffix
|
|
175
160
|
# PublicSuffix::Domain.new("com", "google", "www").trd
|
176
161
|
# # => "www"
|
177
162
|
#
|
178
|
-
# @return [String]
|
179
|
-
#
|
180
163
|
# @see #subdomain?
|
181
164
|
# @see #domain
|
182
165
|
#
|
166
|
+
# @return [String]
|
183
167
|
def subdomain
|
184
|
-
if subdomain?
|
185
|
-
[@trd, @sld, @tld].join(".")
|
186
|
-
end
|
187
|
-
end
|
188
|
-
|
189
|
-
# Returns the rule matching this domain
|
190
|
-
# in the default {PublicSuffix::List}.
|
191
|
-
#
|
192
|
-
# @return [PublicSuffix::Rule::Base, nil]
|
193
|
-
# The rule instance a rule matches current domain,
|
194
|
-
# nil if no rule is found.
|
195
|
-
def rule
|
196
|
-
List.default.find(name)
|
168
|
+
[@trd, @sld, @tld].join(DOT) if subdomain?
|
197
169
|
end
|
198
170
|
|
199
171
|
# Checks whether <tt>self</tt> looks like a domain.
|
@@ -204,8 +176,6 @@ module PublicSuffix
|
|
204
176
|
# If you also want to validate the domain,
|
205
177
|
# use {#valid_domain?} instead.
|
206
178
|
#
|
207
|
-
# @return [Boolean]
|
208
|
-
#
|
209
179
|
# @example
|
210
180
|
#
|
211
181
|
# PublicSuffix::Domain.new("com").domain?
|
@@ -219,11 +189,12 @@ module PublicSuffix
|
|
219
189
|
#
|
220
190
|
# # This is an invalid domain, but returns true
|
221
191
|
# # because this method doesn't validate the content.
|
222
|
-
# PublicSuffix::Domain.new("
|
192
|
+
# PublicSuffix::Domain.new("com", nil).domain?
|
223
193
|
# # => true
|
224
194
|
#
|
225
195
|
# @see #subdomain?
|
226
196
|
#
|
197
|
+
# @return [Boolean]
|
227
198
|
def domain?
|
228
199
|
!(@tld.nil? || @sld.nil?)
|
229
200
|
end
|
@@ -236,8 +207,6 @@ module PublicSuffix
|
|
236
207
|
# If you also want to validate the domain,
|
237
208
|
# use {#valid_subdomain?} instead.
|
238
209
|
#
|
239
|
-
# @return [Boolean]
|
240
|
-
#
|
241
210
|
# @example
|
242
211
|
#
|
243
212
|
# PublicSuffix::Domain.new("com").subdomain?
|
@@ -251,115 +220,16 @@ module PublicSuffix
|
|
251
220
|
#
|
252
221
|
# # This is an invalid domain, but returns true
|
253
222
|
# # because this method doesn't validate the content.
|
254
|
-
# PublicSuffix::Domain.new("
|
223
|
+
# PublicSuffix::Domain.new("com", "example", nil).subdomain?
|
255
224
|
# # => true
|
256
225
|
#
|
257
226
|
# @see #domain?
|
258
227
|
#
|
228
|
+
# @return [Boolean]
|
259
229
|
def subdomain?
|
260
230
|
!(@tld.nil? || @sld.nil? || @trd.nil?)
|
261
231
|
end
|
262
232
|
|
263
|
-
# Checks whether <tt>self</tt> is exclusively a domain,
|
264
|
-
# and not a subdomain.
|
265
|
-
#
|
266
|
-
# @return [Boolean]
|
267
|
-
def is_a_domain?
|
268
|
-
domain? && !subdomain?
|
269
|
-
end
|
270
|
-
|
271
|
-
# Checks whether <tt>self</tt> is exclusively a subdomain.
|
272
|
-
#
|
273
|
-
# @return [Boolean]
|
274
|
-
def is_a_subdomain?
|
275
|
-
subdomain?
|
276
|
-
end
|
277
|
-
|
278
|
-
# Checks whether <tt>self</tt> is assigned and allowed
|
279
|
-
# according to default {List}.
|
280
|
-
#
|
281
|
-
# This method triggers a new rule lookup in the default {List},
|
282
|
-
# which is a quite intensive task.
|
283
|
-
#
|
284
|
-
# @return [Boolean]
|
285
|
-
#
|
286
|
-
# @example Check a valid domain
|
287
|
-
# Domain.new("com", "example").valid?
|
288
|
-
# # => true
|
289
|
-
#
|
290
|
-
# @example Check a valid subdomain
|
291
|
-
# Domain.new("com", "example", "www").valid?
|
292
|
-
# # => true
|
293
|
-
#
|
294
|
-
# @example Check a not-assigned domain
|
295
|
-
# Domain.new("qqq", "example").valid?
|
296
|
-
# # => false
|
297
|
-
#
|
298
|
-
# @example Check a not-allowed domain
|
299
|
-
# Domain.new("do", "example").valid?
|
300
|
-
# # => false
|
301
|
-
# Domain.new("do", "example", "www").valid?
|
302
|
-
# # => true
|
303
|
-
#
|
304
|
-
def valid?
|
305
|
-
r = rule
|
306
|
-
!r.nil? && r.allow?(name)
|
307
|
-
end
|
308
|
-
|
309
|
-
# Checks whether <tt>self</tt> looks like a domain and validates
|
310
|
-
# according to default {List}.
|
311
|
-
#
|
312
|
-
# @return [Boolean]
|
313
|
-
#
|
314
|
-
# @example
|
315
|
-
#
|
316
|
-
# PublicSuffix::Domain.new("com").domain?
|
317
|
-
# # => false
|
318
|
-
#
|
319
|
-
# PublicSuffix::Domain.new("com", "google").domain?
|
320
|
-
# # => true
|
321
|
-
#
|
322
|
-
# PublicSuffix::Domain.new("com", "google", "www").domain?
|
323
|
-
# # => true
|
324
|
-
#
|
325
|
-
# # This is an invalid domain
|
326
|
-
# PublicSuffix::Domain.new("qqq", "google").false?
|
327
|
-
# # => true
|
328
|
-
#
|
329
|
-
# @see #domain?
|
330
|
-
# @see #valid?
|
331
|
-
#
|
332
|
-
def valid_domain?
|
333
|
-
domain? && valid?
|
334
|
-
end
|
335
|
-
|
336
|
-
# Checks whether <tt>self</tt> looks like a subdomain and validates
|
337
|
-
# according to default {List}.
|
338
|
-
#
|
339
|
-
# @return [Boolean]
|
340
|
-
#
|
341
|
-
# @example
|
342
|
-
#
|
343
|
-
# PublicSuffix::Domain.new("com").subdomain?
|
344
|
-
# # => false
|
345
|
-
#
|
346
|
-
# PublicSuffix::Domain.new("com", "google").subdomain?
|
347
|
-
# # => false
|
348
|
-
#
|
349
|
-
# PublicSuffix::Domain.new("com", "google", "www").subdomain?
|
350
|
-
# # => true
|
351
|
-
#
|
352
|
-
# # This is an invalid domain
|
353
|
-
# PublicSuffix::Domain.new("qqq", "google", "www").subdomain?
|
354
|
-
# # => false
|
355
|
-
#
|
356
|
-
# @see #subdomain?
|
357
|
-
# @see #valid?
|
358
|
-
#
|
359
|
-
def valid_subdomain?
|
360
|
-
subdomain? && valid?
|
361
|
-
end
|
362
|
-
|
363
233
|
end
|
364
234
|
|
365
235
|
end
|