public_suffix 1.5.3 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/.rubocop_defaults.yml +127 -0
- data/.travis.yml +10 -5
- data/2.0-Upgrade.md +35 -0
- data/CHANGELOG.md +25 -0
- data/Gemfile +7 -1
- data/LICENSE.txt +1 -1
- data/README.md +89 -62
- data/Rakefile +23 -20
- data/data/{definitions.txt → list.txt} +472 -286
- data/lib/public_suffix.rb +96 -52
- data/lib/public_suffix/domain.rb +26 -156
- data/lib/public_suffix/errors.rb +5 -17
- data/lib/public_suffix/list.rb +107 -122
- data/lib/public_suffix/rule.rb +169 -190
- data/lib/public_suffix/version.rb +3 -13
- data/public_suffix.gemspec +4 -4
- data/test/acceptance_test.rb +57 -34
- data/test/benchmark_helper.rb +4 -0
- data/test/execution_profiler.rb +14 -0
- data/test/initialization_profiler.rb +11 -0
- data/test/performance_benchmark.rb +38 -0
- data/test/psl_test.rb +49 -0
- data/test/test_helper.rb +12 -5
- data/test/tests.txt +98 -0
- data/test/unit/domain_test.rb +18 -84
- data/test/unit/errors_test.rb +2 -2
- data/test/unit/list_test.rb +131 -59
- data/test/unit/public_suffix_test.rb +105 -34
- data/test/unit/rule_test.rb +52 -135
- metadata +20 -6
- data/.gemtest +0 -0
data/lib/public_suffix.rb
CHANGED
@@ -1,28 +1,30 @@
|
|
1
|
-
#
|
2
|
-
# Public Suffix
|
1
|
+
# = Public Suffix
|
3
2
|
#
|
4
3
|
# Domain name parser based on the Public Suffix List.
|
5
4
|
#
|
6
|
-
# Copyright (c) 2009-
|
7
|
-
#
|
5
|
+
# Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
|
8
6
|
|
9
|
-
require
|
10
|
-
require
|
11
|
-
require
|
12
|
-
require
|
13
|
-
require
|
7
|
+
require "public_suffix/domain"
|
8
|
+
require "public_suffix/version"
|
9
|
+
require "public_suffix/errors"
|
10
|
+
require "public_suffix/rule"
|
11
|
+
require "public_suffix/list"
|
14
12
|
|
13
|
+
# PublicSuffix is a Ruby domain name parser based on the Public Suffix List.
|
14
|
+
#
|
15
|
+
# The [Public Suffix List](https://publicsuffix.org) is a cross-vendor initiative
|
16
|
+
# to provide an accurate list of domain name suffixes.
|
17
|
+
#
|
18
|
+
# The Public Suffix List is an initiative of the Mozilla Project,
|
19
|
+
# but is maintained as a community resource. It is available for use in any software,
|
20
|
+
# but was originally created to meet the needs of browser manufacturers.
|
15
21
|
module PublicSuffix
|
16
22
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
#
|
22
|
-
# @param [PublicSuffix::List] list
|
23
|
-
# The rule list to search, defaults to the default {PublicSuffix::List}
|
24
|
-
#
|
25
|
-
# @return [PublicSuffix::Domain]
|
23
|
+
DOT = ".".freeze
|
24
|
+
BANG = "!".freeze
|
25
|
+
STAR = "*".freeze
|
26
|
+
|
27
|
+
# Parses +name+ and returns the {PublicSuffix::Domain} instance.
|
26
28
|
#
|
27
29
|
# @example Parse a valid domain
|
28
30
|
# PublicSuffix.parse("google.com")
|
@@ -48,47 +50,37 @@ module PublicSuffix
|
|
48
50
|
# PublicSuffix.parse("http://www.google.com")
|
49
51
|
# # => PublicSuffix::DomainInvalid
|
50
52
|
#
|
53
|
+
#
|
54
|
+
# @param [String, #to_s] name The domain name or fully qualified domain name to parse.
|
55
|
+
# @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
|
56
|
+
# @param [Boolean] ignore_private
|
57
|
+
# @return [PublicSuffix::Domain]
|
58
|
+
#
|
51
59
|
# @raise [PublicSuffix::Error]
|
52
60
|
# If domain is not a valid domain.
|
53
61
|
# @raise [PublicSuffix::DomainNotAllowed]
|
54
|
-
# If a rule for +domain+ is found, but the rule
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
rule
|
62
|
+
# If a rule for +domain+ is found, but the rule doesn't allow +domain+.
|
63
|
+
def self.parse(name, list: List.default, default_rule: list.default_rule, ignore_private: false)
|
64
|
+
what = normalize(name)
|
65
|
+
raise what if what.is_a?(DomainInvalid)
|
66
|
+
|
67
|
+
rule = list.find(what, default: default_rule, ignore_private: ignore_private)
|
60
68
|
|
61
69
|
if rule.nil?
|
62
|
-
raise DomainInvalid, "`#{
|
70
|
+
raise DomainInvalid, "`#{what}` is not a valid domain"
|
63
71
|
end
|
64
|
-
if
|
65
|
-
raise DomainNotAllowed, "`#{
|
72
|
+
if rule.decompose(what).last.nil?
|
73
|
+
raise DomainNotAllowed, "`#{what}` is not allowed according to Registry policy"
|
66
74
|
end
|
67
75
|
|
68
|
-
|
69
|
-
|
70
|
-
parts = left.split(".")
|
71
|
-
# If we have 0 parts left, there is just a tld and no domain or subdomain
|
72
|
-
# If we have 1 part left, there is just a tld, domain and not subdomain
|
73
|
-
# If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain
|
74
|
-
tld = right
|
75
|
-
sld = parts.empty? ? nil : parts.pop
|
76
|
-
trd = parts.empty? ? nil : parts.join(".")
|
77
|
-
|
78
|
-
Domain.new(tld, sld, trd)
|
76
|
+
decompose(what, rule)
|
79
77
|
end
|
80
78
|
|
81
|
-
# Checks whether +domain+ is assigned and allowed,
|
82
|
-
# without actually parsing it.
|
79
|
+
# Checks whether +domain+ is assigned and allowed, without actually parsing it.
|
83
80
|
#
|
84
81
|
# This method doesn't care whether domain is a domain or subdomain.
|
85
82
|
# The validation is performed using the default {PublicSuffix::List}.
|
86
83
|
#
|
87
|
-
# @param [String, #to_s] domain
|
88
|
-
# The domain name or fully qualified domain name to validate.
|
89
|
-
#
|
90
|
-
# @return [Boolean]
|
91
|
-
#
|
92
84
|
# @example Validate a valid domain
|
93
85
|
# PublicSuffix.valid?("example.com")
|
94
86
|
# # => true
|
@@ -97,9 +89,9 @@ module PublicSuffix
|
|
97
89
|
# PublicSuffix.valid?("www.example.com")
|
98
90
|
# # => true
|
99
91
|
#
|
100
|
-
# @example Validate a not-
|
101
|
-
# PublicSuffix.valid?("example.
|
102
|
-
# # =>
|
92
|
+
# @example Validate a not-listed domain
|
93
|
+
# PublicSuffix.valid?("example.tldnotlisted")
|
94
|
+
# # => true
|
103
95
|
#
|
104
96
|
# @example Validate a not-allowed domain
|
105
97
|
# PublicSuffix.valid?("example.do")
|
@@ -117,10 +109,62 @@ module PublicSuffix
|
|
117
109
|
# PublicSuffix.valid?("http://www.example.com")
|
118
110
|
# # => false
|
119
111
|
#
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
112
|
+
#
|
113
|
+
# @param [String, #to_s] name The domain name or fully qualified domain name to validate.
|
114
|
+
# @param [Boolean] ignore_private
|
115
|
+
# @return [Boolean]
|
116
|
+
def self.valid?(name, list: List.default, default_rule: nil, ignore_private: false)
|
117
|
+
what = normalize(name)
|
118
|
+
return false if what.is_a?(DomainInvalid)
|
119
|
+
|
120
|
+
default_rule ||= list.default_rule
|
121
|
+
rule = list.find(what, default: default_rule, ignore_private: ignore_private)
|
122
|
+
|
123
|
+
!rule.nil? && !rule.decompose(what).last.nil?
|
124
|
+
end
|
125
|
+
|
126
|
+
# Attempt to parse the name and returns the domain, if valid.
|
127
|
+
#
|
128
|
+
# This method doesn't raise. Instead, it returns nil if the domain is not valid for whatever reason.
|
129
|
+
#
|
130
|
+
# @param [String, #to_s] name The domain name or fully qualified domain name to parse.
|
131
|
+
# @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
|
132
|
+
# @param [Boolean] ignore_private
|
133
|
+
# @return [String]
|
134
|
+
def self.domain(name, **options)
|
135
|
+
parse(name, **options).domain
|
136
|
+
rescue PublicSuffix::Error
|
137
|
+
nil
|
138
|
+
end
|
139
|
+
|
140
|
+
|
141
|
+
# private
|
142
|
+
|
143
|
+
def self.decompose(name, rule)
|
144
|
+
left, right = rule.decompose(name)
|
145
|
+
|
146
|
+
parts = left.split(DOT)
|
147
|
+
# If we have 0 parts left, there is just a tld and no domain or subdomain
|
148
|
+
# If we have 1 part left, there is just a tld, domain and not subdomain
|
149
|
+
# If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain
|
150
|
+
tld = right
|
151
|
+
sld = parts.empty? ? nil : parts.pop
|
152
|
+
trd = parts.empty? ? nil : parts.join(DOT)
|
153
|
+
|
154
|
+
Domain.new(tld, sld, trd)
|
155
|
+
end
|
156
|
+
|
157
|
+
# Pretend we know how to deal with user input.
|
158
|
+
def self.normalize(name)
|
159
|
+
name = name.to_s.dup
|
160
|
+
name.strip!
|
161
|
+
name.chomp!(DOT)
|
162
|
+
name.downcase!
|
163
|
+
|
164
|
+
return DomainInvalid.new("Name is blank") if name.empty?
|
165
|
+
return DomainInvalid.new("Name starts with a dot") if name.start_with?(DOT)
|
166
|
+
return DomainInvalid.new("%s is not expected to contain a scheme" % name) if name.include?("://")
|
167
|
+
name
|
124
168
|
end
|
125
169
|
|
126
170
|
end
|
data/lib/public_suffix/domain.rb
CHANGED
@@ -1,37 +1,33 @@
|
|
1
|
-
#
|
2
|
-
# Public Suffix
|
1
|
+
# = Public Suffix
|
3
2
|
#
|
4
3
|
# Domain name parser based on the Public Suffix List.
|
5
4
|
#
|
6
|
-
# Copyright (c) 2009-
|
7
|
-
#
|
5
|
+
# Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
|
8
6
|
|
9
7
|
module PublicSuffix
|
10
8
|
|
9
|
+
# Domain represents a domain name, composed by a TLD, SLD and TRD.
|
11
10
|
class Domain
|
12
11
|
|
13
|
-
# Splits a string into
|
14
|
-
# as a domain in reverse order from the input string.
|
15
|
-
#
|
16
|
-
# The input is not validated, but it is assumed to be a valid domain.
|
12
|
+
# Splits a string into the labels, that is the dot-separated parts.
|
17
13
|
#
|
18
|
-
#
|
19
|
-
# The domain name to split.
|
20
|
-
#
|
21
|
-
# @return [Array<String>]
|
14
|
+
# The input is not validated, but it is assumed to be a valid domain name.
|
22
15
|
#
|
23
16
|
# @example
|
24
17
|
#
|
25
|
-
#
|
26
|
-
# # => ['
|
18
|
+
# name_to_labels('example.com')
|
19
|
+
# # => ['example', 'com']
|
27
20
|
#
|
28
|
-
#
|
29
|
-
# # => ['
|
21
|
+
# name_to_labels('example.co.uk')
|
22
|
+
# # => ['example', 'co', 'uk']
|
30
23
|
#
|
31
|
-
|
32
|
-
|
24
|
+
# @param name [String, #to_s] The domain name to split.
|
25
|
+
# @return [Array<String>]
|
26
|
+
def self.name_to_labels(name)
|
27
|
+
name.to_s.split(DOT)
|
33
28
|
end
|
34
29
|
|
30
|
+
|
35
31
|
attr_reader :tld, :sld, :trd
|
36
32
|
|
37
33
|
# Creates and returns a new {PublicSuffix::Domain} instance.
|
@@ -64,7 +60,7 @@ module PublicSuffix
|
|
64
60
|
# PublicSuffix::Domain.new("com", "example", "wwww")
|
65
61
|
# # => #<PublicSuffix::Domain @tld="com", @trd=nil, @sld="example">
|
66
62
|
#
|
67
|
-
def initialize(*args
|
63
|
+
def initialize(*args)
|
68
64
|
@tld, @sld, @trd = args
|
69
65
|
yield(self) if block_given?
|
70
66
|
end
|
@@ -105,7 +101,7 @@ module PublicSuffix
|
|
105
101
|
# # => "www.google.com"
|
106
102
|
#
|
107
103
|
def name
|
108
|
-
[@trd, @sld, @tld].compact.join(
|
104
|
+
[@trd, @sld, @tld].compact.join(DOT)
|
109
105
|
end
|
110
106
|
|
111
107
|
# Returns a domain-like representation of this object
|
@@ -123,10 +119,6 @@ module PublicSuffix
|
|
123
119
|
# This method doesn't validate the input. It handles the domain
|
124
120
|
# as a valid domain name and simply applies the necessary transformations.
|
125
121
|
#
|
126
|
-
# # This is an invalid domain
|
127
|
-
# PublicSuffix::Domain.new("qqq", "google").domain
|
128
|
-
# # => "google.qqq"
|
129
|
-
#
|
130
122
|
# This method returns a FQD, not just the domain part.
|
131
123
|
# To get the domain part, use <tt>#sld</tt> (aka second level domain).
|
132
124
|
#
|
@@ -136,18 +128,15 @@ module PublicSuffix
|
|
136
128
|
# PublicSuffix::Domain.new("com", "google", "www").sld
|
137
129
|
# # => "google"
|
138
130
|
#
|
139
|
-
# @return [String]
|
140
|
-
#
|
141
131
|
# @see #domain?
|
142
132
|
# @see #subdomain
|
143
133
|
#
|
134
|
+
# @return [String]
|
144
135
|
def domain
|
145
|
-
if domain?
|
146
|
-
[@sld, @tld].join(".")
|
147
|
-
end
|
136
|
+
[@sld, @tld].join(DOT) if domain?
|
148
137
|
end
|
149
138
|
|
150
|
-
# Returns a
|
139
|
+
# Returns a subdomain-like representation of this object
|
151
140
|
# if the object is a {#subdomain?}, <tt>nil</tt> otherwise.
|
152
141
|
#
|
153
142
|
# PublicSuffix::Domain.new("com").subdomain
|
@@ -162,11 +151,7 @@ module PublicSuffix
|
|
162
151
|
# This method doesn't validate the input. It handles the domain
|
163
152
|
# as a valid domain name and simply applies the necessary transformations.
|
164
153
|
#
|
165
|
-
#
|
166
|
-
# PublicSuffix::Domain.new("qqq", "google", "www").subdomain
|
167
|
-
# # => "www.google.qqq"
|
168
|
-
#
|
169
|
-
# This method returns a FQD, not just the domain part.
|
154
|
+
# This method returns a FQD, not just the subdomain part.
|
170
155
|
# To get the subdomain part, use <tt>#trd</tt> (aka third level domain).
|
171
156
|
#
|
172
157
|
# PublicSuffix::Domain.new("com", "google", "www").subdomain
|
@@ -175,25 +160,12 @@ module PublicSuffix
|
|
175
160
|
# PublicSuffix::Domain.new("com", "google", "www").trd
|
176
161
|
# # => "www"
|
177
162
|
#
|
178
|
-
# @return [String]
|
179
|
-
#
|
180
163
|
# @see #subdomain?
|
181
164
|
# @see #domain
|
182
165
|
#
|
166
|
+
# @return [String]
|
183
167
|
def subdomain
|
184
|
-
if subdomain?
|
185
|
-
[@trd, @sld, @tld].join(".")
|
186
|
-
end
|
187
|
-
end
|
188
|
-
|
189
|
-
# Returns the rule matching this domain
|
190
|
-
# in the default {PublicSuffix::List}.
|
191
|
-
#
|
192
|
-
# @return [PublicSuffix::Rule::Base, nil]
|
193
|
-
# The rule instance a rule matches current domain,
|
194
|
-
# nil if no rule is found.
|
195
|
-
def rule
|
196
|
-
List.default.find(name)
|
168
|
+
[@trd, @sld, @tld].join(DOT) if subdomain?
|
197
169
|
end
|
198
170
|
|
199
171
|
# Checks whether <tt>self</tt> looks like a domain.
|
@@ -204,8 +176,6 @@ module PublicSuffix
|
|
204
176
|
# If you also want to validate the domain,
|
205
177
|
# use {#valid_domain?} instead.
|
206
178
|
#
|
207
|
-
# @return [Boolean]
|
208
|
-
#
|
209
179
|
# @example
|
210
180
|
#
|
211
181
|
# PublicSuffix::Domain.new("com").domain?
|
@@ -219,11 +189,12 @@ module PublicSuffix
|
|
219
189
|
#
|
220
190
|
# # This is an invalid domain, but returns true
|
221
191
|
# # because this method doesn't validate the content.
|
222
|
-
# PublicSuffix::Domain.new("
|
192
|
+
# PublicSuffix::Domain.new("com", nil).domain?
|
223
193
|
# # => true
|
224
194
|
#
|
225
195
|
# @see #subdomain?
|
226
196
|
#
|
197
|
+
# @return [Boolean]
|
227
198
|
def domain?
|
228
199
|
!(@tld.nil? || @sld.nil?)
|
229
200
|
end
|
@@ -236,8 +207,6 @@ module PublicSuffix
|
|
236
207
|
# If you also want to validate the domain,
|
237
208
|
# use {#valid_subdomain?} instead.
|
238
209
|
#
|
239
|
-
# @return [Boolean]
|
240
|
-
#
|
241
210
|
# @example
|
242
211
|
#
|
243
212
|
# PublicSuffix::Domain.new("com").subdomain?
|
@@ -251,115 +220,16 @@ module PublicSuffix
|
|
251
220
|
#
|
252
221
|
# # This is an invalid domain, but returns true
|
253
222
|
# # because this method doesn't validate the content.
|
254
|
-
# PublicSuffix::Domain.new("
|
223
|
+
# PublicSuffix::Domain.new("com", "example", nil).subdomain?
|
255
224
|
# # => true
|
256
225
|
#
|
257
226
|
# @see #domain?
|
258
227
|
#
|
228
|
+
# @return [Boolean]
|
259
229
|
def subdomain?
|
260
230
|
!(@tld.nil? || @sld.nil? || @trd.nil?)
|
261
231
|
end
|
262
232
|
|
263
|
-
# Checks whether <tt>self</tt> is exclusively a domain,
|
264
|
-
# and not a subdomain.
|
265
|
-
#
|
266
|
-
# @return [Boolean]
|
267
|
-
def is_a_domain?
|
268
|
-
domain? && !subdomain?
|
269
|
-
end
|
270
|
-
|
271
|
-
# Checks whether <tt>self</tt> is exclusively a subdomain.
|
272
|
-
#
|
273
|
-
# @return [Boolean]
|
274
|
-
def is_a_subdomain?
|
275
|
-
subdomain?
|
276
|
-
end
|
277
|
-
|
278
|
-
# Checks whether <tt>self</tt> is assigned and allowed
|
279
|
-
# according to default {List}.
|
280
|
-
#
|
281
|
-
# This method triggers a new rule lookup in the default {List},
|
282
|
-
# which is a quite intensive task.
|
283
|
-
#
|
284
|
-
# @return [Boolean]
|
285
|
-
#
|
286
|
-
# @example Check a valid domain
|
287
|
-
# Domain.new("com", "example").valid?
|
288
|
-
# # => true
|
289
|
-
#
|
290
|
-
# @example Check a valid subdomain
|
291
|
-
# Domain.new("com", "example", "www").valid?
|
292
|
-
# # => true
|
293
|
-
#
|
294
|
-
# @example Check a not-assigned domain
|
295
|
-
# Domain.new("qqq", "example").valid?
|
296
|
-
# # => false
|
297
|
-
#
|
298
|
-
# @example Check a not-allowed domain
|
299
|
-
# Domain.new("do", "example").valid?
|
300
|
-
# # => false
|
301
|
-
# Domain.new("do", "example", "www").valid?
|
302
|
-
# # => true
|
303
|
-
#
|
304
|
-
def valid?
|
305
|
-
r = rule
|
306
|
-
!r.nil? && r.allow?(name)
|
307
|
-
end
|
308
|
-
|
309
|
-
# Checks whether <tt>self</tt> looks like a domain and validates
|
310
|
-
# according to default {List}.
|
311
|
-
#
|
312
|
-
# @return [Boolean]
|
313
|
-
#
|
314
|
-
# @example
|
315
|
-
#
|
316
|
-
# PublicSuffix::Domain.new("com").domain?
|
317
|
-
# # => false
|
318
|
-
#
|
319
|
-
# PublicSuffix::Domain.new("com", "google").domain?
|
320
|
-
# # => true
|
321
|
-
#
|
322
|
-
# PublicSuffix::Domain.new("com", "google", "www").domain?
|
323
|
-
# # => true
|
324
|
-
#
|
325
|
-
# # This is an invalid domain
|
326
|
-
# PublicSuffix::Domain.new("qqq", "google").false?
|
327
|
-
# # => true
|
328
|
-
#
|
329
|
-
# @see #domain?
|
330
|
-
# @see #valid?
|
331
|
-
#
|
332
|
-
def valid_domain?
|
333
|
-
domain? && valid?
|
334
|
-
end
|
335
|
-
|
336
|
-
# Checks whether <tt>self</tt> looks like a subdomain and validates
|
337
|
-
# according to default {List}.
|
338
|
-
#
|
339
|
-
# @return [Boolean]
|
340
|
-
#
|
341
|
-
# @example
|
342
|
-
#
|
343
|
-
# PublicSuffix::Domain.new("com").subdomain?
|
344
|
-
# # => false
|
345
|
-
#
|
346
|
-
# PublicSuffix::Domain.new("com", "google").subdomain?
|
347
|
-
# # => false
|
348
|
-
#
|
349
|
-
# PublicSuffix::Domain.new("com", "google", "www").subdomain?
|
350
|
-
# # => true
|
351
|
-
#
|
352
|
-
# # This is an invalid domain
|
353
|
-
# PublicSuffix::Domain.new("qqq", "google", "www").subdomain?
|
354
|
-
# # => false
|
355
|
-
#
|
356
|
-
# @see #subdomain?
|
357
|
-
# @see #valid?
|
358
|
-
#
|
359
|
-
def valid_subdomain?
|
360
|
-
subdomain? && valid?
|
361
|
-
end
|
362
|
-
|
363
233
|
end
|
364
234
|
|
365
235
|
end
|