public_suffix 1.5.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/.rubocop_defaults.yml +127 -0
- data/.travis.yml +10 -5
- data/2.0-Upgrade.md +35 -0
- data/CHANGELOG.md +25 -0
- data/Gemfile +7 -1
- data/LICENSE.txt +1 -1
- data/README.md +89 -62
- data/Rakefile +23 -20
- data/data/{definitions.txt → list.txt} +472 -286
- data/lib/public_suffix.rb +96 -52
- data/lib/public_suffix/domain.rb +26 -156
- data/lib/public_suffix/errors.rb +5 -17
- data/lib/public_suffix/list.rb +107 -122
- data/lib/public_suffix/rule.rb +169 -190
- data/lib/public_suffix/version.rb +3 -13
- data/public_suffix.gemspec +4 -4
- data/test/acceptance_test.rb +57 -34
- data/test/benchmark_helper.rb +4 -0
- data/test/execution_profiler.rb +14 -0
- data/test/initialization_profiler.rb +11 -0
- data/test/performance_benchmark.rb +38 -0
- data/test/psl_test.rb +49 -0
- data/test/test_helper.rb +12 -5
- data/test/tests.txt +98 -0
- data/test/unit/domain_test.rb +18 -84
- data/test/unit/errors_test.rb +2 -2
- data/test/unit/list_test.rb +131 -59
- data/test/unit/public_suffix_test.rb +105 -34
- data/test/unit/rule_test.rb +52 -135
- metadata +20 -6
- data/.gemtest +0 -0
data/lib/public_suffix/errors.rb
CHANGED
@@ -1,19 +1,16 @@
|
|
1
|
-
#
|
2
|
-
# Public Suffix
|
1
|
+
# = Public Suffix
|
3
2
|
#
|
4
3
|
# Domain name parser based on the Public Suffix List.
|
5
4
|
#
|
6
|
-
# Copyright (c) 2009-
|
7
|
-
#
|
5
|
+
# Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
|
8
6
|
|
9
7
|
module PublicSuffix
|
10
8
|
|
11
9
|
class Error < StandardError
|
12
10
|
end
|
13
11
|
|
14
|
-
# Raised when trying to parse an invalid
|
15
|
-
# A
|
16
|
-
# in the definition list.
|
12
|
+
# Raised when trying to parse an invalid name.
|
13
|
+
# A name is considered invalid when no rule is found in the definition list.
|
17
14
|
#
|
18
15
|
# @example
|
19
16
|
#
|
@@ -26,10 +23,7 @@ module PublicSuffix
|
|
26
23
|
class DomainInvalid < Error
|
27
24
|
end
|
28
25
|
|
29
|
-
# Raised when trying to parse a
|
30
|
-
# which is formally defined by a rule,
|
31
|
-
# but the rules set a requirement which is not satisfied
|
32
|
-
# by the input you are trying to parse.
|
26
|
+
# Raised when trying to parse a name that matches a suffix.
|
33
27
|
#
|
34
28
|
# @example
|
35
29
|
#
|
@@ -42,10 +36,4 @@ module PublicSuffix
|
|
42
36
|
class DomainNotAllowed < DomainInvalid
|
43
37
|
end
|
44
38
|
|
45
|
-
# Backward Compatibility
|
46
|
-
#
|
47
|
-
# @deprecated Use {PublicSuffix::DomainInvalid}.
|
48
|
-
#
|
49
|
-
InvalidDomain = DomainInvalid
|
50
|
-
|
51
39
|
end
|
data/lib/public_suffix/list.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
|
-
#
|
2
|
-
# Public Suffix
|
1
|
+
# = Public Suffix
|
3
2
|
#
|
4
3
|
# Domain name parser based on the Public Suffix List.
|
5
4
|
#
|
6
|
-
# Copyright (c) 2009-
|
7
|
-
#
|
5
|
+
# Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
|
8
6
|
|
9
7
|
module PublicSuffix
|
10
8
|
|
@@ -42,17 +40,16 @@ module PublicSuffix
|
|
42
40
|
class List
|
43
41
|
include Enumerable
|
44
42
|
|
45
|
-
|
46
|
-
attr_writer :default_definition
|
47
|
-
end
|
43
|
+
DEFAULT_LIST_PATH = File.join(File.dirname(__FILE__), "..", "..", "data", "list.txt")
|
48
44
|
|
49
45
|
# Gets the default rule list.
|
46
|
+
#
|
50
47
|
# Initializes a new {PublicSuffix::List} parsing the content
|
51
|
-
# of {PublicSuffix::List.
|
48
|
+
# of {PublicSuffix::List.default_list_content}, if required.
|
52
49
|
#
|
53
50
|
# @return [PublicSuffix::List]
|
54
|
-
def self.default
|
55
|
-
@default ||= parse(
|
51
|
+
def self.default(**options)
|
52
|
+
@default ||= parse(File.read(DEFAULT_LIST_PATH), options)
|
56
53
|
end
|
57
54
|
|
58
55
|
# Sets the default rule list to +value+.
|
@@ -65,25 +62,6 @@ module PublicSuffix
|
|
65
62
|
@default = value
|
66
63
|
end
|
67
64
|
|
68
|
-
# Shows if support for private (non-ICANN) domains is enabled or not
|
69
|
-
#
|
70
|
-
# @return [Boolean]
|
71
|
-
def self.private_domains?
|
72
|
-
@private_domains != false
|
73
|
-
end
|
74
|
-
|
75
|
-
# Enables/disables support for private (non-ICANN) domains
|
76
|
-
# Implicitly reloads the list
|
77
|
-
#
|
78
|
-
# @param [Boolean] value
|
79
|
-
# enable/disable support
|
80
|
-
#
|
81
|
-
# @return [PublicSuffix::List]
|
82
|
-
def self.private_domains=(value)
|
83
|
-
@private_domains = !!value
|
84
|
-
self.clear
|
85
|
-
end
|
86
|
-
|
87
65
|
# Sets the default rule list to +nil+.
|
88
66
|
#
|
89
67
|
# @return [self]
|
@@ -92,92 +70,89 @@ module PublicSuffix
|
|
92
70
|
self
|
93
71
|
end
|
94
72
|
|
95
|
-
#
|
96
|
-
# parsing the content of {PublicSuffix::List.default_definition}.
|
97
|
-
#
|
98
|
-
# @return [PublicSuffix::List]
|
99
|
-
def self.reload
|
100
|
-
self.clear.default
|
101
|
-
end
|
102
|
-
|
103
|
-
DEFAULT_DEFINITION_PATH = File.join(File.dirname(__FILE__), "..", "..", "data", "definitions.txt")
|
104
|
-
|
105
|
-
# Gets the default definition list.
|
106
|
-
# Can be any <tt>IOStream</tt> including a <tt>File</tt>
|
107
|
-
# or a simple <tt>String</tt>.
|
108
|
-
# The object must respond to <tt>#each_line</tt>.
|
109
|
-
#
|
110
|
-
# @return [File]
|
111
|
-
def self.default_definition
|
112
|
-
@default_definition || File.new(DEFAULT_DEFINITION_PATH, "r:utf-8")
|
113
|
-
end
|
73
|
+
# rubocop:disable Metrics/MethodLength
|
114
74
|
|
115
75
|
# Parse given +input+ treating the content as Public Suffix List.
|
116
76
|
#
|
117
77
|
# See http://publicsuffix.org/format/ for more details about input format.
|
118
78
|
#
|
119
|
-
# @param [
|
120
|
-
#
|
79
|
+
# @param string [#each_line] The list to parse.
|
80
|
+
# @param private_domain [Boolean] whether to ignore the private domains section.
|
121
81
|
# @return [Array<PublicSuffix::Rule::*>]
|
122
|
-
def self.parse(input)
|
82
|
+
def self.parse(input, private_domains: true)
|
83
|
+
comment_token = "//".freeze
|
84
|
+
private_token = "===BEGIN PRIVATE DOMAINS===".freeze
|
85
|
+
section = nil # 1 == ICANN, 2 == PRIVATE
|
86
|
+
|
123
87
|
new do |list|
|
124
88
|
input.each_line do |line|
|
125
89
|
line.strip!
|
126
|
-
|
127
|
-
|
128
|
-
|
90
|
+
case # rubocop:disable Style/EmptyCaseCondition
|
91
|
+
|
92
|
+
# skip blank lines
|
93
|
+
when line.empty?
|
129
94
|
next
|
130
|
-
|
131
|
-
|
95
|
+
|
96
|
+
# include private domains or stop scanner
|
97
|
+
when line.include?(private_token)
|
98
|
+
break if !private_domains
|
99
|
+
section = 2
|
100
|
+
|
101
|
+
# skip comments
|
102
|
+
when line.start_with?(comment_token)
|
132
103
|
next
|
133
|
-
|
104
|
+
|
134
105
|
else
|
135
|
-
list.add(Rule.factory(line), false)
|
106
|
+
list.add(Rule.factory(line, private: section == 2), reindex: false)
|
107
|
+
|
136
108
|
end
|
137
109
|
end
|
138
110
|
end
|
139
111
|
end
|
112
|
+
# rubocop:enable Metrics/MethodLength
|
113
|
+
|
140
114
|
|
141
115
|
# Gets the array of rules.
|
142
116
|
#
|
143
117
|
# @return [Array<PublicSuffix::Rule::*>]
|
144
118
|
attr_reader :rules
|
145
119
|
|
146
|
-
# Gets the naive index, a hash that with the keys being the first label of
|
147
|
-
# every rule pointing to an array of integers (indexes of the rules in @rules).
|
148
|
-
#
|
149
|
-
# @return [Array]
|
150
|
-
attr_reader :indexes
|
151
120
|
|
152
121
|
# Initializes an empty {PublicSuffix::List}.
|
153
122
|
#
|
154
123
|
# @yield [self] Yields on self.
|
155
124
|
# @yieldparam [PublicSuffix::List] self The newly created instance.
|
156
125
|
#
|
157
|
-
def initialize
|
158
|
-
@rules
|
126
|
+
def initialize
|
127
|
+
@rules = []
|
159
128
|
yield(self) if block_given?
|
160
|
-
|
129
|
+
reindex!
|
161
130
|
end
|
162
131
|
|
132
|
+
|
163
133
|
# Creates a naive index for +@rules+. Just a hash that will tell
|
164
134
|
# us where the elements of +@rules+ are relative to its first
|
165
135
|
# {PublicSuffix::Rule::Base#labels} element.
|
166
136
|
#
|
167
137
|
# For instance if @rules[5] and @rules[4] are the only elements of the list
|
168
|
-
# where Rule#labels.first is 'us' @indexes['us'] #=> [5,4], that way in
|
138
|
+
# where Rule#labels.first is 'us' @indexes['us'] #=> [5,4], that way in
|
169
139
|
# select we can avoid mapping every single rule against the candidate domain.
|
170
|
-
def
|
140
|
+
def reindex!
|
171
141
|
@indexes = {}
|
172
|
-
@rules.
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
@indexes[elm] << inx
|
177
|
-
end
|
142
|
+
@rules.each_with_index do |rule, index|
|
143
|
+
tld = Domain.name_to_labels(rule.value).last
|
144
|
+
@indexes[tld] ||= []
|
145
|
+
@indexes[tld] << index
|
178
146
|
end
|
179
147
|
end
|
180
148
|
|
149
|
+
# Gets the naive index, a hash that with the keys being the first label of
|
150
|
+
# every rule pointing to an array of integers (indexes of the rules in @rules).
|
151
|
+
def indexes
|
152
|
+
@indexes.dup
|
153
|
+
end
|
154
|
+
|
155
|
+
|
181
156
|
# Checks whether two lists are equal.
|
182
157
|
#
|
183
158
|
# List <tt>one</tt> is equal to <tt>two</tt>, if <tt>two</tt> is an instance of
|
@@ -190,39 +165,31 @@ module PublicSuffix
|
|
190
165
|
# @return [Boolean]
|
191
166
|
def ==(other)
|
192
167
|
return false unless other.is_a?(List)
|
193
|
-
|
194
|
-
self.rules == other.rules
|
168
|
+
equal?(other) || rules == other.rules
|
195
169
|
end
|
196
|
-
alias
|
170
|
+
alias eql? ==
|
197
171
|
|
198
172
|
# Iterates each rule in the list.
|
199
173
|
def each(*args, &block)
|
200
174
|
@rules.each(*args, &block)
|
201
175
|
end
|
202
176
|
|
203
|
-
# Gets the list as array.
|
204
|
-
#
|
205
|
-
# @return [Array<PublicSuffix::Rule::*>]
|
206
|
-
def to_a
|
207
|
-
@rules
|
208
|
-
end
|
209
177
|
|
210
|
-
# Adds the given object to the list
|
211
|
-
# and optionally refreshes the rule index.
|
178
|
+
# Adds the given object to the list and optionally refreshes the rule index.
|
212
179
|
#
|
213
180
|
# @param [PublicSuffix::Rule::*] rule
|
214
181
|
# The rule to add to the list.
|
215
|
-
# @param [Boolean]
|
182
|
+
# @param [Boolean] reindex
|
216
183
|
# Set to true to recreate the rule index
|
217
184
|
# after the rule has been added to the list.
|
218
185
|
#
|
219
186
|
# @return [self]
|
220
187
|
#
|
221
|
-
# @see #
|
188
|
+
# @see #reindex!
|
222
189
|
#
|
223
|
-
def add(rule,
|
190
|
+
def add(rule, reindex: true)
|
224
191
|
@rules << rule
|
225
|
-
|
192
|
+
reindex! if reindex
|
226
193
|
self
|
227
194
|
end
|
228
195
|
alias << add
|
@@ -233,7 +200,6 @@ module PublicSuffix
|
|
233
200
|
def size
|
234
201
|
@rules.size
|
235
202
|
end
|
236
|
-
alias length size
|
237
203
|
|
238
204
|
# Checks whether the list is empty.
|
239
205
|
#
|
@@ -247,54 +213,73 @@ module PublicSuffix
|
|
247
213
|
# @return [self]
|
248
214
|
def clear
|
249
215
|
@rules.clear
|
216
|
+
reindex!
|
250
217
|
self
|
251
218
|
end
|
252
219
|
|
253
|
-
#
|
220
|
+
# Finds and returns the most appropriate rule for the domain name.
|
254
221
|
#
|
255
222
|
# From the Public Suffix List documentation:
|
256
223
|
#
|
257
|
-
#
|
224
|
+
# - If a hostname matches more than one rule in the file,
|
258
225
|
# the longest matching rule (the one with the most levels) will be used.
|
259
|
-
#
|
226
|
+
# - An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule.
|
260
227
|
# An exception rule takes priority over any other matching rule.
|
261
228
|
#
|
262
|
-
#
|
263
|
-
#
|
264
|
-
#
|
265
|
-
#
|
266
|
-
#
|
267
|
-
#
|
268
|
-
#
|
269
|
-
#
|
270
|
-
#
|
271
|
-
#
|
272
|
-
#
|
273
|
-
# @param [String, #to_s]
|
274
|
-
#
|
275
|
-
# @return [PublicSuffix::Rule
|
276
|
-
def find(
|
277
|
-
|
278
|
-
|
279
|
-
|
229
|
+
# ## Algorithm description
|
230
|
+
#
|
231
|
+
# 1. Match domain against all rules and take note of the matching ones.
|
232
|
+
# 2. If no rules match, the prevailing rule is "*".
|
233
|
+
# 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
|
234
|
+
# 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
|
235
|
+
# 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
|
236
|
+
# 6. The public suffix is the set of labels from the domain
|
237
|
+
# which directly match the labels of the prevailing rule (joined by dots).
|
238
|
+
# 7. The registered domain is the public suffix plus one additional label.
|
239
|
+
#
|
240
|
+
# @param name [String, #to_s] The domain name.
|
241
|
+
# @param [PublicSuffix::Rule::*] default The default rule to return in case no rule matches.
|
242
|
+
# @return [PublicSuffix::Rule::*]
|
243
|
+
def find(name, default: default_rule, **options)
|
244
|
+
rule = select(name, **options).inject do |l, r|
|
245
|
+
return r if r.class == Rule::Exception
|
246
|
+
l.length > r.length ? l : r
|
247
|
+
end
|
248
|
+
rule || default
|
280
249
|
end
|
281
250
|
|
282
251
|
# Selects all the rules matching given domain.
|
283
252
|
#
|
284
|
-
#
|
285
|
-
#
|
253
|
+
# Internally, the lookup heavily rely on the `@indexes`. The input is split into labels,
|
254
|
+
# and we retriever from the index only the rules that end with the input label. After that,
|
255
|
+
# a sequential scan is performed. In most cases, where the number of rules for the same label
|
256
|
+
# is limited, this algorithm is efficient enough.
|
286
257
|
#
|
287
|
-
#
|
258
|
+
# If `ignore_private` is set to true, the algorithm will skip the rules that are flagged as private domain.
|
259
|
+
# Note that the rules will still be part of the loop. If you frequently need to access lists
|
260
|
+
# ignoring the private domains, you should create a list that doesn't include these domains setting the
|
261
|
+
# `private_domains: false` option when calling {.parse}.
|
288
262
|
#
|
263
|
+
# @param [String, #to_s] name The domain name.
|
264
|
+
# @param [Boolean] ignore_private
|
289
265
|
# @return [Array<PublicSuffix::Rule::*>]
|
290
|
-
def select(
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
266
|
+
def select(name, ignore_private: false)
|
267
|
+
name = name.to_s
|
268
|
+
indices = (@indexes[Domain.name_to_labels(name).last] || [])
|
269
|
+
|
270
|
+
finder = @rules.values_at(*indices).lazy
|
271
|
+
finder = finder.select { |rule| rule.match?(name) }
|
272
|
+
finder = finder.select { |rule| !rule.private } if ignore_private
|
273
|
+
finder.to_a
|
274
|
+
end
|
275
|
+
|
276
|
+
# Gets the default rule.
|
277
|
+
#
|
278
|
+
# @see PublicSuffix::Rule.default_rule
|
279
|
+
#
|
280
|
+
# @return [PublicSuffix::Rule::*]
|
281
|
+
def default_rule
|
282
|
+
PublicSuffix::Rule.default
|
298
283
|
end
|
299
284
|
|
300
285
|
end
|
data/lib/public_suffix/rule.rb
CHANGED
@@ -1,17 +1,15 @@
|
|
1
|
-
#
|
2
|
-
# Public Suffix
|
1
|
+
# = Public Suffix
|
3
2
|
#
|
4
3
|
# Domain name parser based on the Public Suffix List.
|
5
4
|
#
|
6
|
-
# Copyright (c) 2009-
|
7
|
-
#
|
5
|
+
# Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
|
8
6
|
|
9
7
|
module PublicSuffix
|
10
8
|
|
11
9
|
# A Rule is a special object which holds a single definition
|
12
10
|
# of the Public Suffix List.
|
13
11
|
#
|
14
|
-
# There are 3 types of
|
12
|
+
# There are 3 types of rules, each one represented by a specific
|
15
13
|
# subclass within the +PublicSuffix::Rule+ namespace.
|
16
14
|
#
|
17
15
|
# To create a new Rule, use the {PublicSuffix::Rule#factory} method.
|
@@ -21,12 +19,11 @@ module PublicSuffix
|
|
21
19
|
#
|
22
20
|
module Rule
|
23
21
|
|
24
|
-
#
|
25
22
|
# = Abstract rule class
|
26
23
|
#
|
27
24
|
# This represent the base class for a Rule definition
|
28
|
-
# in the {Public Suffix List}[
|
29
|
-
#
|
25
|
+
# in the {Public Suffix List}[https://publicsuffix.org].
|
26
|
+
#
|
30
27
|
# This is intended to be an Abstract class
|
31
28
|
# and you shouldn't create a direct instance. The only purpose
|
32
29
|
# of this class is to expose a common interface
|
@@ -36,28 +33,21 @@ module PublicSuffix
|
|
36
33
|
# * {PublicSuffix::Rule::Exception}
|
37
34
|
# * {PublicSuffix::Rule::Wildcard}
|
38
35
|
#
|
39
|
-
#
|
36
|
+
# ## Properties
|
40
37
|
#
|
41
38
|
# A rule is composed by 4 properties:
|
42
39
|
#
|
43
|
-
#
|
44
|
-
# in the public suffix list
|
45
|
-
# value - The value, a normalized version of the rule name.
|
40
|
+
# value - A normalized version of the rule name.
|
46
41
|
# The normalization process depends on rule tpe.
|
47
|
-
# type - The rule type (:normal, :wildcard, :exception)
|
48
|
-
# labels - The canonicalized rule name
|
49
42
|
#
|
50
43
|
# Here's an example
|
51
44
|
#
|
52
45
|
# PublicSuffix::Rule.factory("*.google.com")
|
53
46
|
# #<PublicSuffix::Rule::Wildcard:0x1015c14b0
|
54
|
-
# @labels=["com", "google"],
|
55
|
-
# @name="*.google.com",
|
56
|
-
# @type=:wildcard,
|
57
47
|
# @value="google.com"
|
58
48
|
# >
|
59
49
|
#
|
60
|
-
#
|
50
|
+
# ## Rule Creation
|
61
51
|
#
|
62
52
|
# The best way to create a new rule is passing the rule name
|
63
53
|
# to the <tt>PublicSuffix::Rule.factory</tt> method.
|
@@ -71,35 +61,34 @@ module PublicSuffix
|
|
71
61
|
# This method will detect the rule type and create an instance
|
72
62
|
# from the proper rule class.
|
73
63
|
#
|
74
|
-
#
|
64
|
+
# ## Rule Usage
|
75
65
|
#
|
76
|
-
# A rule describes the composition of a domain name
|
77
|
-
#
|
78
|
-
# into tld, sld and trd.
|
66
|
+
# A rule describes the composition of a domain name and explains how to tokenize
|
67
|
+
# the name into tld, sld and trd.
|
79
68
|
#
|
80
|
-
# To use a rule, you first need to be sure the
|
69
|
+
# To use a rule, you first need to be sure the name you want to tokenize
|
81
70
|
# can be handled by the current rule.
|
82
71
|
# You can use the <tt>#match?</tt> method.
|
83
72
|
#
|
84
73
|
# rule = PublicSuffix::Rule.factory("com")
|
85
|
-
#
|
74
|
+
#
|
86
75
|
# rule.match?("google.com")
|
87
76
|
# # => true
|
88
|
-
#
|
77
|
+
#
|
89
78
|
# rule.match?("google.com")
|
90
79
|
# # => false
|
91
80
|
#
|
92
|
-
# Rule order is significant. A
|
81
|
+
# Rule order is significant. A name can match more than one rule.
|
93
82
|
# See the {Public Suffix Documentation}[http://publicsuffix.org/format/]
|
94
83
|
# to learn more about rule priority.
|
95
84
|
#
|
96
85
|
# When you have the right rule, you can use it to tokenize the domain name.
|
97
|
-
#
|
86
|
+
#
|
98
87
|
# rule = PublicSuffix::Rule.factory("com")
|
99
|
-
#
|
88
|
+
#
|
100
89
|
# rule.decompose("google.com")
|
101
90
|
# # => ["google", "com"]
|
102
|
-
#
|
91
|
+
#
|
103
92
|
# rule.decompose("www.google.com")
|
104
93
|
# # => ["www.google", "com"]
|
105
94
|
#
|
@@ -107,145 +96,107 @@ module PublicSuffix
|
|
107
96
|
#
|
108
97
|
class Base
|
109
98
|
|
110
|
-
|
99
|
+
# @return [String] the rule definition
|
100
|
+
attr_reader :value
|
111
101
|
|
112
|
-
#
|
113
|
-
|
114
|
-
#
|
115
|
-
# @param [String] name
|
116
|
-
# The name of the rule
|
117
|
-
# @param [String] value
|
118
|
-
# The value of the rule. If nil, defaults to +name+.
|
119
|
-
#
|
120
|
-
def initialize(name, value = nil)
|
121
|
-
@name = name.to_s
|
122
|
-
@value = value || @name
|
123
|
-
@labels = Domain.domain_to_labels(@value)
|
124
|
-
end
|
102
|
+
# @return [Boolean] true if the rule is a private domain
|
103
|
+
attr_reader :private
|
125
104
|
|
126
|
-
#
|
127
|
-
# The rule type name.
|
128
|
-
#
|
129
|
-
# @return [Symbol]
|
130
|
-
#
|
131
|
-
def self.type
|
132
|
-
@type ||= self.name.split("::").last.downcase.to_sym
|
133
|
-
end
|
134
105
|
|
106
|
+
# Initializes a new rule with name and value.
|
107
|
+
# If value is +nil+, name also becomes the value for this rule.
|
135
108
|
#
|
136
|
-
# @
|
137
|
-
|
138
|
-
|
139
|
-
|
109
|
+
# @param value [String] the value of the rule
|
110
|
+
def initialize(value, private: false)
|
111
|
+
@value = value.to_s
|
112
|
+
@private = private
|
140
113
|
end
|
141
114
|
|
142
115
|
# Checks whether this rule is equal to <tt>other</tt>.
|
143
116
|
#
|
144
|
-
# @param
|
145
|
-
# The rule to compare.
|
146
|
-
#
|
117
|
+
# @param [PublicSuffix::Rule::*] other The rule to compare
|
147
118
|
# @return [Boolean]
|
148
119
|
# Returns true if this rule and other are instances of the same class
|
149
120
|
# and has the same value, false otherwise.
|
150
121
|
def ==(other)
|
151
|
-
|
152
|
-
self.equal?(other) ||
|
153
|
-
self.name == other.name
|
122
|
+
equal?(other) || (self.class == other.class && value == other.value)
|
154
123
|
end
|
155
|
-
alias
|
124
|
+
alias eql? ==
|
156
125
|
|
157
|
-
# Checks if this rule matches +
|
126
|
+
# Checks if this rule matches +name+.
|
158
127
|
#
|
159
|
-
#
|
160
|
-
#
|
128
|
+
# A domain name is said to match a rule if and only if
|
129
|
+
# all of the following conditions are met:
|
161
130
|
#
|
162
|
-
#
|
131
|
+
# - When the domain and rule are split into corresponding labels,
|
132
|
+
# that the domain contains as many or more labels than the rule.
|
133
|
+
# - Beginning with the right-most labels of both the domain and the rule,
|
134
|
+
# and continuing for all labels in the rule, one finds that for every pair,
|
135
|
+
# either they are identical, or that the label from the rule is "*".
|
136
|
+
#
|
137
|
+
# @see https://publicsuffix.org/list/
|
163
138
|
#
|
164
139
|
# @example
|
165
|
-
#
|
166
|
-
# # #<PublicSuffix::Rule::Normal>
|
167
|
-
# rule.match?("example.com")
|
140
|
+
# Rule.factory("com").match?("example.com")
|
168
141
|
# # => true
|
169
|
-
#
|
142
|
+
# Rule.factory("com").match?("example.net")
|
170
143
|
# # => false
|
171
144
|
#
|
172
|
-
|
173
|
-
l1 = labels
|
174
|
-
l2 = Domain.domain_to_labels(domain)
|
175
|
-
odiff(l1, l2).empty?
|
176
|
-
end
|
177
|
-
|
178
|
-
# Checks if this rule allows +domain+.
|
179
|
-
#
|
180
|
-
# @param [String, #to_s] domain
|
181
|
-
# The domain name to check.
|
182
|
-
#
|
145
|
+
# @param name [String, #to_s] The domain name to check.
|
183
146
|
# @return [Boolean]
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
# # => true
|
192
|
-
#
|
193
|
-
def allow?(domain)
|
194
|
-
!decompose(domain).last.nil?
|
195
|
-
end
|
196
|
-
|
197
|
-
# Gets the length of this rule for comparison.
|
198
|
-
# The length usually matches the number of rule +parts+.
|
199
|
-
#
|
200
|
-
# Subclasses might actually override this method.
|
201
|
-
#
|
202
|
-
# @return [Integer] The number of parts.
|
203
|
-
def length
|
204
|
-
parts.length
|
147
|
+
def match?(name)
|
148
|
+
# Note: it works because of the assumption there are no
|
149
|
+
# rules like foo.*.com. If the assumption is incorrect,
|
150
|
+
# we need to properly walk the input and skip parts according
|
151
|
+
# to wildcard component.
|
152
|
+
diff = name.chomp(value)
|
153
|
+
diff.empty? || diff[-1] == "."
|
205
154
|
end
|
206
155
|
|
207
|
-
#
|
208
|
-
# @raise [NotImplementedError]
|
209
156
|
# @abstract
|
210
157
|
def parts
|
211
|
-
raise
|
158
|
+
raise NotImplementedError
|
212
159
|
end
|
213
160
|
|
214
|
-
#
|
215
|
-
# @param [String, #to_s] domain
|
216
|
-
# The domain name to decompose.
|
217
|
-
#
|
218
|
-
# @return [Array<String, nil>]
|
219
|
-
#
|
220
|
-
# @raise [NotImplementedError]
|
221
161
|
# @abstract
|
222
|
-
def
|
223
|
-
raise
|
162
|
+
def length
|
163
|
+
raise NotImplementedError
|
224
164
|
end
|
225
165
|
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
while(ii < one.size && one[ii] == two[ii])
|
232
|
-
ii += 1
|
233
|
-
end
|
234
|
-
|
235
|
-
one[ii..one.length]
|
166
|
+
# @abstract
|
167
|
+
# @param [String, #to_s] name The domain name to decompose
|
168
|
+
# @return [Array<String, nil>]
|
169
|
+
def decompose(*)
|
170
|
+
raise NotImplementedError
|
236
171
|
end
|
237
172
|
|
238
173
|
end
|
239
174
|
|
175
|
+
# Normal represents a standard rule (e.g. com).
|
240
176
|
class Normal < Base
|
241
177
|
|
242
|
-
# Initializes a new rule
|
178
|
+
# Initializes a new rule from +definition+.
|
179
|
+
#
|
180
|
+
# @param definition [String] the rule as defined in the PSL
|
181
|
+
def initialize(definition, **options)
|
182
|
+
super(definition, **options)
|
183
|
+
end
|
184
|
+
|
185
|
+
# Gets the original rule definition.
|
243
186
|
#
|
244
|
-
# @
|
245
|
-
|
187
|
+
# @return [String] The rule definition.
|
188
|
+
def rule
|
189
|
+
value
|
190
|
+
end
|
191
|
+
|
192
|
+
# Decomposes the domain name according to rule properties.
|
246
193
|
#
|
247
|
-
|
248
|
-
|
194
|
+
# @param [String, #to_s] name The domain name to decompose
|
195
|
+
# @return [Array<String>] The array with [trd + sld, tld].
|
196
|
+
def decompose(domain)
|
197
|
+
suffix = parts.join('\.')
|
198
|
+
matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
|
199
|
+
matches ? matches[1..2] : [nil, nil]
|
249
200
|
end
|
250
201
|
|
251
202
|
# dot-split rule value and returns all rule parts
|
@@ -253,74 +204,96 @@ module PublicSuffix
|
|
253
204
|
#
|
254
205
|
# @return [Array<String>]
|
255
206
|
def parts
|
256
|
-
@
|
207
|
+
@value.split(DOT)
|
257
208
|
end
|
258
209
|
|
259
|
-
#
|
260
|
-
#
|
261
|
-
# @param [String, #to_s] domain
|
262
|
-
# The domain name to decompose.
|
210
|
+
# Gets the length of this rule for comparison,
|
211
|
+
# represented by the number of dot-separated parts in the rule.
|
263
212
|
#
|
264
|
-
# @return [
|
265
|
-
|
266
|
-
|
267
|
-
def decompose(domain)
|
268
|
-
domain.to_s.chomp(".") =~ /^(.*)\.(#{parts.join('\.')})$/
|
269
|
-
[$1, $2]
|
213
|
+
# @return [Integer] The length of the rule.
|
214
|
+
def length
|
215
|
+
@length ||= parts.length
|
270
216
|
end
|
271
217
|
|
272
218
|
end
|
273
219
|
|
220
|
+
# Wildcard represents a wildcard rule (e.g. *.co.uk).
|
274
221
|
class Wildcard < Base
|
275
222
|
|
276
|
-
# Initializes a new rule
|
223
|
+
# Initializes a new rule from +definition+.
|
277
224
|
#
|
278
|
-
#
|
279
|
-
#
|
225
|
+
# The wildcard "*" is removed from the value, as it's common
|
226
|
+
# for each wildcard rule.
|
280
227
|
#
|
281
|
-
|
282
|
-
|
228
|
+
# @param definition [String] the rule as defined in the PSL
|
229
|
+
def initialize(definition, **options)
|
230
|
+
super(definition.to_s[2..-1], **options)
|
283
231
|
end
|
284
232
|
|
285
|
-
#
|
286
|
-
# in the order they appear in the value.
|
233
|
+
# Gets the original rule definition.
|
287
234
|
#
|
288
|
-
# @return [
|
289
|
-
def
|
290
|
-
|
235
|
+
# @return [String] The rule definition.
|
236
|
+
def rule
|
237
|
+
value == "" ? STAR : STAR + DOT + value
|
291
238
|
end
|
292
239
|
|
293
|
-
#
|
294
|
-
# the +*+ char.
|
240
|
+
# Decomposes the domain name according to rule properties.
|
295
241
|
#
|
296
|
-
# @
|
297
|
-
|
298
|
-
|
242
|
+
# @param [String, #to_s] name The domain name to decompose
|
243
|
+
# @return [Array<String>] The array with [trd + sld, tld].
|
244
|
+
def decompose(domain)
|
245
|
+
suffix = ([".*?"] + parts).join('\.')
|
246
|
+
matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
|
247
|
+
matches ? matches[1..2] : [nil, nil]
|
299
248
|
end
|
300
249
|
|
301
|
-
#
|
302
|
-
#
|
303
|
-
# @param [String, #to_s] domain
|
304
|
-
# The domain name to decompose.
|
250
|
+
# dot-split rule value and returns all rule parts
|
251
|
+
# in the order they appear in the value.
|
305
252
|
#
|
306
253
|
# @return [Array<String>]
|
307
|
-
|
254
|
+
def parts
|
255
|
+
@value.split(DOT)
|
256
|
+
end
|
257
|
+
|
258
|
+
# Gets the length of this rule for comparison,
|
259
|
+
# represented by the number of dot-separated parts in the rule
|
260
|
+
# plus 1 for the *.
|
308
261
|
#
|
309
|
-
|
310
|
-
|
311
|
-
|
262
|
+
# @return [Integer] The length of the rule.
|
263
|
+
def length
|
264
|
+
@length ||= parts.length + 1 # * counts as 1
|
312
265
|
end
|
313
266
|
|
314
267
|
end
|
315
268
|
|
269
|
+
# Exception represents an exception rule (e.g. !parliament.uk).
|
316
270
|
class Exception < Base
|
317
271
|
|
318
|
-
# Initializes a new rule
|
272
|
+
# Initializes a new rule from +definition+.
|
273
|
+
#
|
274
|
+
# The bang ! is removed from the value, as it's common
|
275
|
+
# for each wildcard rule.
|
276
|
+
#
|
277
|
+
# @param definition [String] the rule as defined in the PSL
|
278
|
+
def initialize(definition, **options)
|
279
|
+
super(definition.to_s[1..-1], **options)
|
280
|
+
end
|
281
|
+
|
282
|
+
# Gets the original rule definition.
|
319
283
|
#
|
320
|
-
# @
|
284
|
+
# @return [String] The rule definition.
|
285
|
+
def rule
|
286
|
+
BANG + value
|
287
|
+
end
|
288
|
+
|
289
|
+
# Decomposes the domain name according to rule properties.
|
321
290
|
#
|
322
|
-
|
323
|
-
|
291
|
+
# @param [String, #to_s] name The domain name to decompose
|
292
|
+
# @return [Array<String>] The array with [trd + sld, tld].
|
293
|
+
def decompose(domain)
|
294
|
+
suffix = parts.join('\.')
|
295
|
+
matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
|
296
|
+
matches ? matches[1..2] : [nil, nil]
|
324
297
|
end
|
325
298
|
|
326
299
|
# dot-split rule value and returns all rule parts
|
@@ -329,42 +302,28 @@ module PublicSuffix
|
|
329
302
|
#
|
330
303
|
# See http://publicsuffix.org/format/:
|
331
304
|
# If the prevailing rule is a exception rule,
|
332
|
-
# modify it by removing the leftmost label.
|
305
|
+
# modify it by removing the leftmost label.
|
333
306
|
#
|
334
307
|
# @return [Array<String>]
|
335
308
|
def parts
|
336
|
-
@
|
309
|
+
@value.split(DOT)[1..-1]
|
337
310
|
end
|
338
311
|
|
339
|
-
#
|
340
|
-
#
|
341
|
-
# @param [String, #to_s] domain
|
342
|
-
# The domain name to decompose.
|
343
|
-
#
|
344
|
-
# @return [Array<String>]
|
345
|
-
# The array with [trd + sld, tld].
|
312
|
+
# Gets the length of this rule for comparison,
|
313
|
+
# represented by the number of dot-separated parts in the rule.
|
346
314
|
#
|
347
|
-
|
348
|
-
|
349
|
-
|
315
|
+
# @return [Integer] The length of the rule.
|
316
|
+
def length
|
317
|
+
@length ||= parts.length
|
350
318
|
end
|
351
319
|
|
352
320
|
end
|
353
321
|
|
354
|
-
RULES = {
|
355
|
-
'*' => Wildcard,
|
356
|
-
'!' => Exception
|
357
|
-
}
|
358
|
-
RULES.default = Normal
|
359
322
|
|
360
323
|
# Takes the +name+ of the rule, detects the specific rule class
|
361
324
|
# and creates a new instance of that class.
|
362
325
|
# The +name+ becomes the rule +value+.
|
363
326
|
#
|
364
|
-
# @param [String] name The rule definition.
|
365
|
-
#
|
366
|
-
# @return [PublicSuffix::Rule::*] A rule instance.
|
367
|
-
#
|
368
327
|
# @example Creates a Normal rule
|
369
328
|
# PublicSuffix::Rule.factory("ar")
|
370
329
|
# # => #<PublicSuffix::Rule::Normal>
|
@@ -377,8 +336,28 @@ module PublicSuffix
|
|
377
336
|
# PublicSuffix::Rule.factory("!congresodelalengua3.ar")
|
378
337
|
# # => #<PublicSuffix::Rule::Exception>
|
379
338
|
#
|
380
|
-
|
381
|
-
|
339
|
+
# @param [String] content The rule content.
|
340
|
+
# @return [PublicSuffix::Rule::*] A rule instance.
|
341
|
+
def self.factory(content, **options)
|
342
|
+
case content.to_s[0, 1]
|
343
|
+
when STAR
|
344
|
+
Wildcard
|
345
|
+
when BANG
|
346
|
+
Exception
|
347
|
+
else
|
348
|
+
Normal
|
349
|
+
end.new(content, **options)
|
350
|
+
end
|
351
|
+
|
352
|
+
# The default rule to use if no rule match.
|
353
|
+
#
|
354
|
+
# The default rule is "*". From https://publicsuffix.org/list/:
|
355
|
+
#
|
356
|
+
# > If no rules match, the prevailing rule is "*".
|
357
|
+
#
|
358
|
+
# @return [PublicSuffix::Rule::Wildcard] The default rule.
|
359
|
+
def self.default
|
360
|
+
factory(STAR)
|
382
361
|
end
|
383
362
|
|
384
363
|
end
|