public_suffix 1.5.3 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/.rubocop_defaults.yml +127 -0
- data/.travis.yml +10 -5
- data/2.0-Upgrade.md +35 -0
- data/CHANGELOG.md +25 -0
- data/Gemfile +7 -1
- data/LICENSE.txt +1 -1
- data/README.md +89 -62
- data/Rakefile +23 -20
- data/data/{definitions.txt → list.txt} +472 -286
- data/lib/public_suffix.rb +96 -52
- data/lib/public_suffix/domain.rb +26 -156
- data/lib/public_suffix/errors.rb +5 -17
- data/lib/public_suffix/list.rb +107 -122
- data/lib/public_suffix/rule.rb +169 -190
- data/lib/public_suffix/version.rb +3 -13
- data/public_suffix.gemspec +4 -4
- data/test/acceptance_test.rb +57 -34
- data/test/benchmark_helper.rb +4 -0
- data/test/execution_profiler.rb +14 -0
- data/test/initialization_profiler.rb +11 -0
- data/test/performance_benchmark.rb +38 -0
- data/test/psl_test.rb +49 -0
- data/test/test_helper.rb +12 -5
- data/test/tests.txt +98 -0
- data/test/unit/domain_test.rb +18 -84
- data/test/unit/errors_test.rb +2 -2
- data/test/unit/list_test.rb +131 -59
- data/test/unit/public_suffix_test.rb +105 -34
- data/test/unit/rule_test.rb +52 -135
- metadata +20 -6
- data/.gemtest +0 -0
data/lib/public_suffix/errors.rb
CHANGED
@@ -1,19 +1,16 @@
|
|
1
|
-
#
|
2
|
-
# Public Suffix
|
1
|
+
# = Public Suffix
|
3
2
|
#
|
4
3
|
# Domain name parser based on the Public Suffix List.
|
5
4
|
#
|
6
|
-
# Copyright (c) 2009-
|
7
|
-
#
|
5
|
+
# Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
|
8
6
|
|
9
7
|
module PublicSuffix
|
10
8
|
|
11
9
|
class Error < StandardError
|
12
10
|
end
|
13
11
|
|
14
|
-
# Raised when trying to parse an invalid
|
15
|
-
# A
|
16
|
-
# in the definition list.
|
12
|
+
# Raised when trying to parse an invalid name.
|
13
|
+
# A name is considered invalid when no rule is found in the definition list.
|
17
14
|
#
|
18
15
|
# @example
|
19
16
|
#
|
@@ -26,10 +23,7 @@ module PublicSuffix
|
|
26
23
|
class DomainInvalid < Error
|
27
24
|
end
|
28
25
|
|
29
|
-
# Raised when trying to parse a
|
30
|
-
# which is formally defined by a rule,
|
31
|
-
# but the rules set a requirement which is not satisfied
|
32
|
-
# by the input you are trying to parse.
|
26
|
+
# Raised when trying to parse a name that matches a suffix.
|
33
27
|
#
|
34
28
|
# @example
|
35
29
|
#
|
@@ -42,10 +36,4 @@ module PublicSuffix
|
|
42
36
|
class DomainNotAllowed < DomainInvalid
|
43
37
|
end
|
44
38
|
|
45
|
-
# Backward Compatibility
|
46
|
-
#
|
47
|
-
# @deprecated Use {PublicSuffix::DomainInvalid}.
|
48
|
-
#
|
49
|
-
InvalidDomain = DomainInvalid
|
50
|
-
|
51
39
|
end
|
data/lib/public_suffix/list.rb
CHANGED
@@ -1,10 +1,8 @@
|
|
1
|
-
#
|
2
|
-
# Public Suffix
|
1
|
+
# = Public Suffix
|
3
2
|
#
|
4
3
|
# Domain name parser based on the Public Suffix List.
|
5
4
|
#
|
6
|
-
# Copyright (c) 2009-
|
7
|
-
#
|
5
|
+
# Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
|
8
6
|
|
9
7
|
module PublicSuffix
|
10
8
|
|
@@ -42,17 +40,16 @@ module PublicSuffix
|
|
42
40
|
class List
|
43
41
|
include Enumerable
|
44
42
|
|
45
|
-
|
46
|
-
attr_writer :default_definition
|
47
|
-
end
|
43
|
+
DEFAULT_LIST_PATH = File.join(File.dirname(__FILE__), "..", "..", "data", "list.txt")
|
48
44
|
|
49
45
|
# Gets the default rule list.
|
46
|
+
#
|
50
47
|
# Initializes a new {PublicSuffix::List} parsing the content
|
51
|
-
# of {PublicSuffix::List.
|
48
|
+
# of {PublicSuffix::List.default_list_content}, if required.
|
52
49
|
#
|
53
50
|
# @return [PublicSuffix::List]
|
54
|
-
def self.default
|
55
|
-
@default ||= parse(
|
51
|
+
def self.default(**options)
|
52
|
+
@default ||= parse(File.read(DEFAULT_LIST_PATH), options)
|
56
53
|
end
|
57
54
|
|
58
55
|
# Sets the default rule list to +value+.
|
@@ -65,25 +62,6 @@ module PublicSuffix
|
|
65
62
|
@default = value
|
66
63
|
end
|
67
64
|
|
68
|
-
# Shows if support for private (non-ICANN) domains is enabled or not
|
69
|
-
#
|
70
|
-
# @return [Boolean]
|
71
|
-
def self.private_domains?
|
72
|
-
@private_domains != false
|
73
|
-
end
|
74
|
-
|
75
|
-
# Enables/disables support for private (non-ICANN) domains
|
76
|
-
# Implicitly reloads the list
|
77
|
-
#
|
78
|
-
# @param [Boolean] value
|
79
|
-
# enable/disable support
|
80
|
-
#
|
81
|
-
# @return [PublicSuffix::List]
|
82
|
-
def self.private_domains=(value)
|
83
|
-
@private_domains = !!value
|
84
|
-
self.clear
|
85
|
-
end
|
86
|
-
|
87
65
|
# Sets the default rule list to +nil+.
|
88
66
|
#
|
89
67
|
# @return [self]
|
@@ -92,92 +70,89 @@ module PublicSuffix
|
|
92
70
|
self
|
93
71
|
end
|
94
72
|
|
95
|
-
#
|
96
|
-
# parsing the content of {PublicSuffix::List.default_definition}.
|
97
|
-
#
|
98
|
-
# @return [PublicSuffix::List]
|
99
|
-
def self.reload
|
100
|
-
self.clear.default
|
101
|
-
end
|
102
|
-
|
103
|
-
DEFAULT_DEFINITION_PATH = File.join(File.dirname(__FILE__), "..", "..", "data", "definitions.txt")
|
104
|
-
|
105
|
-
# Gets the default definition list.
|
106
|
-
# Can be any <tt>IOStream</tt> including a <tt>File</tt>
|
107
|
-
# or a simple <tt>String</tt>.
|
108
|
-
# The object must respond to <tt>#each_line</tt>.
|
109
|
-
#
|
110
|
-
# @return [File]
|
111
|
-
def self.default_definition
|
112
|
-
@default_definition || File.new(DEFAULT_DEFINITION_PATH, "r:utf-8")
|
113
|
-
end
|
73
|
+
# rubocop:disable Metrics/MethodLength
|
114
74
|
|
115
75
|
# Parse given +input+ treating the content as Public Suffix List.
|
116
76
|
#
|
117
77
|
# See http://publicsuffix.org/format/ for more details about input format.
|
118
78
|
#
|
119
|
-
# @param [
|
120
|
-
#
|
79
|
+
# @param string [#each_line] The list to parse.
|
80
|
+
# @param private_domain [Boolean] whether to ignore the private domains section.
|
121
81
|
# @return [Array<PublicSuffix::Rule::*>]
|
122
|
-
def self.parse(input)
|
82
|
+
def self.parse(input, private_domains: true)
|
83
|
+
comment_token = "//".freeze
|
84
|
+
private_token = "===BEGIN PRIVATE DOMAINS===".freeze
|
85
|
+
section = nil # 1 == ICANN, 2 == PRIVATE
|
86
|
+
|
123
87
|
new do |list|
|
124
88
|
input.each_line do |line|
|
125
89
|
line.strip!
|
126
|
-
|
127
|
-
|
128
|
-
|
90
|
+
case # rubocop:disable Style/EmptyCaseCondition
|
91
|
+
|
92
|
+
# skip blank lines
|
93
|
+
when line.empty?
|
129
94
|
next
|
130
|
-
|
131
|
-
|
95
|
+
|
96
|
+
# include private domains or stop scanner
|
97
|
+
when line.include?(private_token)
|
98
|
+
break if !private_domains
|
99
|
+
section = 2
|
100
|
+
|
101
|
+
# skip comments
|
102
|
+
when line.start_with?(comment_token)
|
132
103
|
next
|
133
|
-
|
104
|
+
|
134
105
|
else
|
135
|
-
list.add(Rule.factory(line), false)
|
106
|
+
list.add(Rule.factory(line, private: section == 2), reindex: false)
|
107
|
+
|
136
108
|
end
|
137
109
|
end
|
138
110
|
end
|
139
111
|
end
|
112
|
+
# rubocop:enable Metrics/MethodLength
|
113
|
+
|
140
114
|
|
141
115
|
# Gets the array of rules.
|
142
116
|
#
|
143
117
|
# @return [Array<PublicSuffix::Rule::*>]
|
144
118
|
attr_reader :rules
|
145
119
|
|
146
|
-
# Gets the naive index, a hash that with the keys being the first label of
|
147
|
-
# every rule pointing to an array of integers (indexes of the rules in @rules).
|
148
|
-
#
|
149
|
-
# @return [Array]
|
150
|
-
attr_reader :indexes
|
151
120
|
|
152
121
|
# Initializes an empty {PublicSuffix::List}.
|
153
122
|
#
|
154
123
|
# @yield [self] Yields on self.
|
155
124
|
# @yieldparam [PublicSuffix::List] self The newly created instance.
|
156
125
|
#
|
157
|
-
def initialize
|
158
|
-
@rules
|
126
|
+
def initialize
|
127
|
+
@rules = []
|
159
128
|
yield(self) if block_given?
|
160
|
-
|
129
|
+
reindex!
|
161
130
|
end
|
162
131
|
|
132
|
+
|
163
133
|
# Creates a naive index for +@rules+. Just a hash that will tell
|
164
134
|
# us where the elements of +@rules+ are relative to its first
|
165
135
|
# {PublicSuffix::Rule::Base#labels} element.
|
166
136
|
#
|
167
137
|
# For instance if @rules[5] and @rules[4] are the only elements of the list
|
168
|
-
# where Rule#labels.first is 'us' @indexes['us'] #=> [5,4], that way in
|
138
|
+
# where Rule#labels.first is 'us' @indexes['us'] #=> [5,4], that way in
|
169
139
|
# select we can avoid mapping every single rule against the candidate domain.
|
170
|
-
def
|
140
|
+
def reindex!
|
171
141
|
@indexes = {}
|
172
|
-
@rules.
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
@indexes[elm] << inx
|
177
|
-
end
|
142
|
+
@rules.each_with_index do |rule, index|
|
143
|
+
tld = Domain.name_to_labels(rule.value).last
|
144
|
+
@indexes[tld] ||= []
|
145
|
+
@indexes[tld] << index
|
178
146
|
end
|
179
147
|
end
|
180
148
|
|
149
|
+
# Gets the naive index, a hash that with the keys being the first label of
|
150
|
+
# every rule pointing to an array of integers (indexes of the rules in @rules).
|
151
|
+
def indexes
|
152
|
+
@indexes.dup
|
153
|
+
end
|
154
|
+
|
155
|
+
|
181
156
|
# Checks whether two lists are equal.
|
182
157
|
#
|
183
158
|
# List <tt>one</tt> is equal to <tt>two</tt>, if <tt>two</tt> is an instance of
|
@@ -190,39 +165,31 @@ module PublicSuffix
|
|
190
165
|
# @return [Boolean]
|
191
166
|
def ==(other)
|
192
167
|
return false unless other.is_a?(List)
|
193
|
-
|
194
|
-
self.rules == other.rules
|
168
|
+
equal?(other) || rules == other.rules
|
195
169
|
end
|
196
|
-
alias
|
170
|
+
alias eql? ==
|
197
171
|
|
198
172
|
# Iterates each rule in the list.
|
199
173
|
def each(*args, &block)
|
200
174
|
@rules.each(*args, &block)
|
201
175
|
end
|
202
176
|
|
203
|
-
# Gets the list as array.
|
204
|
-
#
|
205
|
-
# @return [Array<PublicSuffix::Rule::*>]
|
206
|
-
def to_a
|
207
|
-
@rules
|
208
|
-
end
|
209
177
|
|
210
|
-
# Adds the given object to the list
|
211
|
-
# and optionally refreshes the rule index.
|
178
|
+
# Adds the given object to the list and optionally refreshes the rule index.
|
212
179
|
#
|
213
180
|
# @param [PublicSuffix::Rule::*] rule
|
214
181
|
# The rule to add to the list.
|
215
|
-
# @param [Boolean]
|
182
|
+
# @param [Boolean] reindex
|
216
183
|
# Set to true to recreate the rule index
|
217
184
|
# after the rule has been added to the list.
|
218
185
|
#
|
219
186
|
# @return [self]
|
220
187
|
#
|
221
|
-
# @see #
|
188
|
+
# @see #reindex!
|
222
189
|
#
|
223
|
-
def add(rule,
|
190
|
+
def add(rule, reindex: true)
|
224
191
|
@rules << rule
|
225
|
-
|
192
|
+
reindex! if reindex
|
226
193
|
self
|
227
194
|
end
|
228
195
|
alias << add
|
@@ -233,7 +200,6 @@ module PublicSuffix
|
|
233
200
|
def size
|
234
201
|
@rules.size
|
235
202
|
end
|
236
|
-
alias length size
|
237
203
|
|
238
204
|
# Checks whether the list is empty.
|
239
205
|
#
|
@@ -247,54 +213,73 @@ module PublicSuffix
|
|
247
213
|
# @return [self]
|
248
214
|
def clear
|
249
215
|
@rules.clear
|
216
|
+
reindex!
|
250
217
|
self
|
251
218
|
end
|
252
219
|
|
253
|
-
#
|
220
|
+
# Finds and returns the most appropriate rule for the domain name.
|
254
221
|
#
|
255
222
|
# From the Public Suffix List documentation:
|
256
223
|
#
|
257
|
-
#
|
224
|
+
# - If a hostname matches more than one rule in the file,
|
258
225
|
# the longest matching rule (the one with the most levels) will be used.
|
259
|
-
#
|
226
|
+
# - An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule.
|
260
227
|
# An exception rule takes priority over any other matching rule.
|
261
228
|
#
|
262
|
-
#
|
263
|
-
#
|
264
|
-
#
|
265
|
-
#
|
266
|
-
#
|
267
|
-
#
|
268
|
-
#
|
269
|
-
#
|
270
|
-
#
|
271
|
-
#
|
272
|
-
#
|
273
|
-
# @param [String, #to_s]
|
274
|
-
#
|
275
|
-
# @return [PublicSuffix::Rule
|
276
|
-
def find(
|
277
|
-
|
278
|
-
|
279
|
-
|
229
|
+
# ## Algorithm description
|
230
|
+
#
|
231
|
+
# 1. Match domain against all rules and take note of the matching ones.
|
232
|
+
# 2. If no rules match, the prevailing rule is "*".
|
233
|
+
# 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
|
234
|
+
# 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
|
235
|
+
# 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
|
236
|
+
# 6. The public suffix is the set of labels from the domain
|
237
|
+
# which directly match the labels of the prevailing rule (joined by dots).
|
238
|
+
# 7. The registered domain is the public suffix plus one additional label.
|
239
|
+
#
|
240
|
+
# @param name [String, #to_s] The domain name.
|
241
|
+
# @param [PublicSuffix::Rule::*] default The default rule to return in case no rule matches.
|
242
|
+
# @return [PublicSuffix::Rule::*]
|
243
|
+
def find(name, default: default_rule, **options)
|
244
|
+
rule = select(name, **options).inject do |l, r|
|
245
|
+
return r if r.class == Rule::Exception
|
246
|
+
l.length > r.length ? l : r
|
247
|
+
end
|
248
|
+
rule || default
|
280
249
|
end
|
281
250
|
|
282
251
|
# Selects all the rules matching given domain.
|
283
252
|
#
|
284
|
-
#
|
285
|
-
#
|
253
|
+
# Internally, the lookup heavily rely on the `@indexes`. The input is split into labels,
|
254
|
+
# and we retriever from the index only the rules that end with the input label. After that,
|
255
|
+
# a sequential scan is performed. In most cases, where the number of rules for the same label
|
256
|
+
# is limited, this algorithm is efficient enough.
|
286
257
|
#
|
287
|
-
#
|
258
|
+
# If `ignore_private` is set to true, the algorithm will skip the rules that are flagged as private domain.
|
259
|
+
# Note that the rules will still be part of the loop. If you frequently need to access lists
|
260
|
+
# ignoring the private domains, you should create a list that doesn't include these domains setting the
|
261
|
+
# `private_domains: false` option when calling {.parse}.
|
288
262
|
#
|
263
|
+
# @param [String, #to_s] name The domain name.
|
264
|
+
# @param [Boolean] ignore_private
|
289
265
|
# @return [Array<PublicSuffix::Rule::*>]
|
290
|
-
def select(
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
266
|
+
def select(name, ignore_private: false)
|
267
|
+
name = name.to_s
|
268
|
+
indices = (@indexes[Domain.name_to_labels(name).last] || [])
|
269
|
+
|
270
|
+
finder = @rules.values_at(*indices).lazy
|
271
|
+
finder = finder.select { |rule| rule.match?(name) }
|
272
|
+
finder = finder.select { |rule| !rule.private } if ignore_private
|
273
|
+
finder.to_a
|
274
|
+
end
|
275
|
+
|
276
|
+
# Gets the default rule.
|
277
|
+
#
|
278
|
+
# @see PublicSuffix::Rule.default_rule
|
279
|
+
#
|
280
|
+
# @return [PublicSuffix::Rule::*]
|
281
|
+
def default_rule
|
282
|
+
PublicSuffix::Rule.default
|
298
283
|
end
|
299
284
|
|
300
285
|
end
|
data/lib/public_suffix/rule.rb
CHANGED
@@ -1,17 +1,15 @@
|
|
1
|
-
#
|
2
|
-
# Public Suffix
|
1
|
+
# = Public Suffix
|
3
2
|
#
|
4
3
|
# Domain name parser based on the Public Suffix List.
|
5
4
|
#
|
6
|
-
# Copyright (c) 2009-
|
7
|
-
#
|
5
|
+
# Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
|
8
6
|
|
9
7
|
module PublicSuffix
|
10
8
|
|
11
9
|
# A Rule is a special object which holds a single definition
|
12
10
|
# of the Public Suffix List.
|
13
11
|
#
|
14
|
-
# There are 3 types of
|
12
|
+
# There are 3 types of rules, each one represented by a specific
|
15
13
|
# subclass within the +PublicSuffix::Rule+ namespace.
|
16
14
|
#
|
17
15
|
# To create a new Rule, use the {PublicSuffix::Rule#factory} method.
|
@@ -21,12 +19,11 @@ module PublicSuffix
|
|
21
19
|
#
|
22
20
|
module Rule
|
23
21
|
|
24
|
-
#
|
25
22
|
# = Abstract rule class
|
26
23
|
#
|
27
24
|
# This represent the base class for a Rule definition
|
28
|
-
# in the {Public Suffix List}[
|
29
|
-
#
|
25
|
+
# in the {Public Suffix List}[https://publicsuffix.org].
|
26
|
+
#
|
30
27
|
# This is intended to be an Abstract class
|
31
28
|
# and you shouldn't create a direct instance. The only purpose
|
32
29
|
# of this class is to expose a common interface
|
@@ -36,28 +33,21 @@ module PublicSuffix
|
|
36
33
|
# * {PublicSuffix::Rule::Exception}
|
37
34
|
# * {PublicSuffix::Rule::Wildcard}
|
38
35
|
#
|
39
|
-
#
|
36
|
+
# ## Properties
|
40
37
|
#
|
41
38
|
# A rule is composed by 4 properties:
|
42
39
|
#
|
43
|
-
#
|
44
|
-
# in the public suffix list
|
45
|
-
# value - The value, a normalized version of the rule name.
|
40
|
+
# value - A normalized version of the rule name.
|
46
41
|
# The normalization process depends on rule tpe.
|
47
|
-
# type - The rule type (:normal, :wildcard, :exception)
|
48
|
-
# labels - The canonicalized rule name
|
49
42
|
#
|
50
43
|
# Here's an example
|
51
44
|
#
|
52
45
|
# PublicSuffix::Rule.factory("*.google.com")
|
53
46
|
# #<PublicSuffix::Rule::Wildcard:0x1015c14b0
|
54
|
-
# @labels=["com", "google"],
|
55
|
-
# @name="*.google.com",
|
56
|
-
# @type=:wildcard,
|
57
47
|
# @value="google.com"
|
58
48
|
# >
|
59
49
|
#
|
60
|
-
#
|
50
|
+
# ## Rule Creation
|
61
51
|
#
|
62
52
|
# The best way to create a new rule is passing the rule name
|
63
53
|
# to the <tt>PublicSuffix::Rule.factory</tt> method.
|
@@ -71,35 +61,34 @@ module PublicSuffix
|
|
71
61
|
# This method will detect the rule type and create an instance
|
72
62
|
# from the proper rule class.
|
73
63
|
#
|
74
|
-
#
|
64
|
+
# ## Rule Usage
|
75
65
|
#
|
76
|
-
# A rule describes the composition of a domain name
|
77
|
-
#
|
78
|
-
# into tld, sld and trd.
|
66
|
+
# A rule describes the composition of a domain name and explains how to tokenize
|
67
|
+
# the name into tld, sld and trd.
|
79
68
|
#
|
80
|
-
# To use a rule, you first need to be sure the
|
69
|
+
# To use a rule, you first need to be sure the name you want to tokenize
|
81
70
|
# can be handled by the current rule.
|
82
71
|
# You can use the <tt>#match?</tt> method.
|
83
72
|
#
|
84
73
|
# rule = PublicSuffix::Rule.factory("com")
|
85
|
-
#
|
74
|
+
#
|
86
75
|
# rule.match?("google.com")
|
87
76
|
# # => true
|
88
|
-
#
|
77
|
+
#
|
89
78
|
# rule.match?("google.com")
|
90
79
|
# # => false
|
91
80
|
#
|
92
|
-
# Rule order is significant. A
|
81
|
+
# Rule order is significant. A name can match more than one rule.
|
93
82
|
# See the {Public Suffix Documentation}[http://publicsuffix.org/format/]
|
94
83
|
# to learn more about rule priority.
|
95
84
|
#
|
96
85
|
# When you have the right rule, you can use it to tokenize the domain name.
|
97
|
-
#
|
86
|
+
#
|
98
87
|
# rule = PublicSuffix::Rule.factory("com")
|
99
|
-
#
|
88
|
+
#
|
100
89
|
# rule.decompose("google.com")
|
101
90
|
# # => ["google", "com"]
|
102
|
-
#
|
91
|
+
#
|
103
92
|
# rule.decompose("www.google.com")
|
104
93
|
# # => ["www.google", "com"]
|
105
94
|
#
|
@@ -107,145 +96,107 @@ module PublicSuffix
|
|
107
96
|
#
|
108
97
|
class Base
|
109
98
|
|
110
|
-
|
99
|
+
# @return [String] the rule definition
|
100
|
+
attr_reader :value
|
111
101
|
|
112
|
-
#
|
113
|
-
|
114
|
-
#
|
115
|
-
# @param [String] name
|
116
|
-
# The name of the rule
|
117
|
-
# @param [String] value
|
118
|
-
# The value of the rule. If nil, defaults to +name+.
|
119
|
-
#
|
120
|
-
def initialize(name, value = nil)
|
121
|
-
@name = name.to_s
|
122
|
-
@value = value || @name
|
123
|
-
@labels = Domain.domain_to_labels(@value)
|
124
|
-
end
|
102
|
+
# @return [Boolean] true if the rule is a private domain
|
103
|
+
attr_reader :private
|
125
104
|
|
126
|
-
#
|
127
|
-
# The rule type name.
|
128
|
-
#
|
129
|
-
# @return [Symbol]
|
130
|
-
#
|
131
|
-
def self.type
|
132
|
-
@type ||= self.name.split("::").last.downcase.to_sym
|
133
|
-
end
|
134
105
|
|
106
|
+
# Initializes a new rule with name and value.
|
107
|
+
# If value is +nil+, name also becomes the value for this rule.
|
135
108
|
#
|
136
|
-
# @
|
137
|
-
|
138
|
-
|
139
|
-
|
109
|
+
# @param value [String] the value of the rule
|
110
|
+
def initialize(value, private: false)
|
111
|
+
@value = value.to_s
|
112
|
+
@private = private
|
140
113
|
end
|
141
114
|
|
142
115
|
# Checks whether this rule is equal to <tt>other</tt>.
|
143
116
|
#
|
144
|
-
# @param
|
145
|
-
# The rule to compare.
|
146
|
-
#
|
117
|
+
# @param [PublicSuffix::Rule::*] other The rule to compare
|
147
118
|
# @return [Boolean]
|
148
119
|
# Returns true if this rule and other are instances of the same class
|
149
120
|
# and has the same value, false otherwise.
|
150
121
|
def ==(other)
|
151
|
-
|
152
|
-
self.equal?(other) ||
|
153
|
-
self.name == other.name
|
122
|
+
equal?(other) || (self.class == other.class && value == other.value)
|
154
123
|
end
|
155
|
-
alias
|
124
|
+
alias eql? ==
|
156
125
|
|
157
|
-
# Checks if this rule matches +
|
126
|
+
# Checks if this rule matches +name+.
|
158
127
|
#
|
159
|
-
#
|
160
|
-
#
|
128
|
+
# A domain name is said to match a rule if and only if
|
129
|
+
# all of the following conditions are met:
|
161
130
|
#
|
162
|
-
#
|
131
|
+
# - When the domain and rule are split into corresponding labels,
|
132
|
+
# that the domain contains as many or more labels than the rule.
|
133
|
+
# - Beginning with the right-most labels of both the domain and the rule,
|
134
|
+
# and continuing for all labels in the rule, one finds that for every pair,
|
135
|
+
# either they are identical, or that the label from the rule is "*".
|
136
|
+
#
|
137
|
+
# @see https://publicsuffix.org/list/
|
163
138
|
#
|
164
139
|
# @example
|
165
|
-
#
|
166
|
-
# # #<PublicSuffix::Rule::Normal>
|
167
|
-
# rule.match?("example.com")
|
140
|
+
# Rule.factory("com").match?("example.com")
|
168
141
|
# # => true
|
169
|
-
#
|
142
|
+
# Rule.factory("com").match?("example.net")
|
170
143
|
# # => false
|
171
144
|
#
|
172
|
-
|
173
|
-
l1 = labels
|
174
|
-
l2 = Domain.domain_to_labels(domain)
|
175
|
-
odiff(l1, l2).empty?
|
176
|
-
end
|
177
|
-
|
178
|
-
# Checks if this rule allows +domain+.
|
179
|
-
#
|
180
|
-
# @param [String, #to_s] domain
|
181
|
-
# The domain name to check.
|
182
|
-
#
|
145
|
+
# @param name [String, #to_s] The domain name to check.
|
183
146
|
# @return [Boolean]
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
# # => true
|
192
|
-
#
|
193
|
-
def allow?(domain)
|
194
|
-
!decompose(domain).last.nil?
|
195
|
-
end
|
196
|
-
|
197
|
-
# Gets the length of this rule for comparison.
|
198
|
-
# The length usually matches the number of rule +parts+.
|
199
|
-
#
|
200
|
-
# Subclasses might actually override this method.
|
201
|
-
#
|
202
|
-
# @return [Integer] The number of parts.
|
203
|
-
def length
|
204
|
-
parts.length
|
147
|
+
def match?(name)
|
148
|
+
# Note: it works because of the assumption there are no
|
149
|
+
# rules like foo.*.com. If the assumption is incorrect,
|
150
|
+
# we need to properly walk the input and skip parts according
|
151
|
+
# to wildcard component.
|
152
|
+
diff = name.chomp(value)
|
153
|
+
diff.empty? || diff[-1] == "."
|
205
154
|
end
|
206
155
|
|
207
|
-
#
|
208
|
-
# @raise [NotImplementedError]
|
209
156
|
# @abstract
|
210
157
|
def parts
|
211
|
-
raise
|
158
|
+
raise NotImplementedError
|
212
159
|
end
|
213
160
|
|
214
|
-
#
|
215
|
-
# @param [String, #to_s] domain
|
216
|
-
# The domain name to decompose.
|
217
|
-
#
|
218
|
-
# @return [Array<String, nil>]
|
219
|
-
#
|
220
|
-
# @raise [NotImplementedError]
|
221
161
|
# @abstract
|
222
|
-
def
|
223
|
-
raise
|
162
|
+
def length
|
163
|
+
raise NotImplementedError
|
224
164
|
end
|
225
165
|
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
while(ii < one.size && one[ii] == two[ii])
|
232
|
-
ii += 1
|
233
|
-
end
|
234
|
-
|
235
|
-
one[ii..one.length]
|
166
|
+
# @abstract
|
167
|
+
# @param [String, #to_s] name The domain name to decompose
|
168
|
+
# @return [Array<String, nil>]
|
169
|
+
def decompose(*)
|
170
|
+
raise NotImplementedError
|
236
171
|
end
|
237
172
|
|
238
173
|
end
|
239
174
|
|
175
|
+
# Normal represents a standard rule (e.g. com).
|
240
176
|
class Normal < Base
|
241
177
|
|
242
|
-
# Initializes a new rule
|
178
|
+
# Initializes a new rule from +definition+.
|
179
|
+
#
|
180
|
+
# @param definition [String] the rule as defined in the PSL
|
181
|
+
def initialize(definition, **options)
|
182
|
+
super(definition, **options)
|
183
|
+
end
|
184
|
+
|
185
|
+
# Gets the original rule definition.
|
243
186
|
#
|
244
|
-
# @
|
245
|
-
|
187
|
+
# @return [String] The rule definition.
|
188
|
+
def rule
|
189
|
+
value
|
190
|
+
end
|
191
|
+
|
192
|
+
# Decomposes the domain name according to rule properties.
|
246
193
|
#
|
247
|
-
|
248
|
-
|
194
|
+
# @param [String, #to_s] name The domain name to decompose
|
195
|
+
# @return [Array<String>] The array with [trd + sld, tld].
|
196
|
+
def decompose(domain)
|
197
|
+
suffix = parts.join('\.')
|
198
|
+
matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
|
199
|
+
matches ? matches[1..2] : [nil, nil]
|
249
200
|
end
|
250
201
|
|
251
202
|
# dot-split rule value and returns all rule parts
|
@@ -253,74 +204,96 @@ module PublicSuffix
|
|
253
204
|
#
|
254
205
|
# @return [Array<String>]
|
255
206
|
def parts
|
256
|
-
@
|
207
|
+
@value.split(DOT)
|
257
208
|
end
|
258
209
|
|
259
|
-
#
|
260
|
-
#
|
261
|
-
# @param [String, #to_s] domain
|
262
|
-
# The domain name to decompose.
|
210
|
+
# Gets the length of this rule for comparison,
|
211
|
+
# represented by the number of dot-separated parts in the rule.
|
263
212
|
#
|
264
|
-
# @return [
|
265
|
-
|
266
|
-
|
267
|
-
def decompose(domain)
|
268
|
-
domain.to_s.chomp(".") =~ /^(.*)\.(#{parts.join('\.')})$/
|
269
|
-
[$1, $2]
|
213
|
+
# @return [Integer] The length of the rule.
|
214
|
+
def length
|
215
|
+
@length ||= parts.length
|
270
216
|
end
|
271
217
|
|
272
218
|
end
|
273
219
|
|
220
|
+
# Wildcard represents a wildcard rule (e.g. *.co.uk).
|
274
221
|
class Wildcard < Base
|
275
222
|
|
276
|
-
# Initializes a new rule
|
223
|
+
# Initializes a new rule from +definition+.
|
277
224
|
#
|
278
|
-
#
|
279
|
-
#
|
225
|
+
# The wildcard "*" is removed from the value, as it's common
|
226
|
+
# for each wildcard rule.
|
280
227
|
#
|
281
|
-
|
282
|
-
|
228
|
+
# @param definition [String] the rule as defined in the PSL
|
229
|
+
def initialize(definition, **options)
|
230
|
+
super(definition.to_s[2..-1], **options)
|
283
231
|
end
|
284
232
|
|
285
|
-
#
|
286
|
-
# in the order they appear in the value.
|
233
|
+
# Gets the original rule definition.
|
287
234
|
#
|
288
|
-
# @return [
|
289
|
-
def
|
290
|
-
|
235
|
+
# @return [String] The rule definition.
|
236
|
+
def rule
|
237
|
+
value == "" ? STAR : STAR + DOT + value
|
291
238
|
end
|
292
239
|
|
293
|
-
#
|
294
|
-
# the +*+ char.
|
240
|
+
# Decomposes the domain name according to rule properties.
|
295
241
|
#
|
296
|
-
# @
|
297
|
-
|
298
|
-
|
242
|
+
# @param [String, #to_s] name The domain name to decompose
|
243
|
+
# @return [Array<String>] The array with [trd + sld, tld].
|
244
|
+
def decompose(domain)
|
245
|
+
suffix = ([".*?"] + parts).join('\.')
|
246
|
+
matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
|
247
|
+
matches ? matches[1..2] : [nil, nil]
|
299
248
|
end
|
300
249
|
|
301
|
-
#
|
302
|
-
#
|
303
|
-
# @param [String, #to_s] domain
|
304
|
-
# The domain name to decompose.
|
250
|
+
# dot-split rule value and returns all rule parts
|
251
|
+
# in the order they appear in the value.
|
305
252
|
#
|
306
253
|
# @return [Array<String>]
|
307
|
-
|
254
|
+
def parts
|
255
|
+
@value.split(DOT)
|
256
|
+
end
|
257
|
+
|
258
|
+
# Gets the length of this rule for comparison,
|
259
|
+
# represented by the number of dot-separated parts in the rule
|
260
|
+
# plus 1 for the *.
|
308
261
|
#
|
309
|
-
|
310
|
-
|
311
|
-
|
262
|
+
# @return [Integer] The length of the rule.
|
263
|
+
def length
|
264
|
+
@length ||= parts.length + 1 # * counts as 1
|
312
265
|
end
|
313
266
|
|
314
267
|
end
|
315
268
|
|
269
|
+
# Exception represents an exception rule (e.g. !parliament.uk).
|
316
270
|
class Exception < Base
|
317
271
|
|
318
|
-
# Initializes a new rule
|
272
|
+
# Initializes a new rule from +definition+.
|
273
|
+
#
|
274
|
+
# The bang ! is removed from the value, as it's common
|
275
|
+
# for each wildcard rule.
|
276
|
+
#
|
277
|
+
# @param definition [String] the rule as defined in the PSL
|
278
|
+
def initialize(definition, **options)
|
279
|
+
super(definition.to_s[1..-1], **options)
|
280
|
+
end
|
281
|
+
|
282
|
+
# Gets the original rule definition.
|
319
283
|
#
|
320
|
-
# @
|
284
|
+
# @return [String] The rule definition.
|
285
|
+
def rule
|
286
|
+
BANG + value
|
287
|
+
end
|
288
|
+
|
289
|
+
# Decomposes the domain name according to rule properties.
|
321
290
|
#
|
322
|
-
|
323
|
-
|
291
|
+
# @param [String, #to_s] name The domain name to decompose
|
292
|
+
# @return [Array<String>] The array with [trd + sld, tld].
|
293
|
+
def decompose(domain)
|
294
|
+
suffix = parts.join('\.')
|
295
|
+
matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
|
296
|
+
matches ? matches[1..2] : [nil, nil]
|
324
297
|
end
|
325
298
|
|
326
299
|
# dot-split rule value and returns all rule parts
|
@@ -329,42 +302,28 @@ module PublicSuffix
|
|
329
302
|
#
|
330
303
|
# See http://publicsuffix.org/format/:
|
331
304
|
# If the prevailing rule is a exception rule,
|
332
|
-
# modify it by removing the leftmost label.
|
305
|
+
# modify it by removing the leftmost label.
|
333
306
|
#
|
334
307
|
# @return [Array<String>]
|
335
308
|
def parts
|
336
|
-
@
|
309
|
+
@value.split(DOT)[1..-1]
|
337
310
|
end
|
338
311
|
|
339
|
-
#
|
340
|
-
#
|
341
|
-
# @param [String, #to_s] domain
|
342
|
-
# The domain name to decompose.
|
343
|
-
#
|
344
|
-
# @return [Array<String>]
|
345
|
-
# The array with [trd + sld, tld].
|
312
|
+
# Gets the length of this rule for comparison,
|
313
|
+
# represented by the number of dot-separated parts in the rule.
|
346
314
|
#
|
347
|
-
|
348
|
-
|
349
|
-
|
315
|
+
# @return [Integer] The length of the rule.
|
316
|
+
def length
|
317
|
+
@length ||= parts.length
|
350
318
|
end
|
351
319
|
|
352
320
|
end
|
353
321
|
|
354
|
-
RULES = {
|
355
|
-
'*' => Wildcard,
|
356
|
-
'!' => Exception
|
357
|
-
}
|
358
|
-
RULES.default = Normal
|
359
322
|
|
360
323
|
# Takes the +name+ of the rule, detects the specific rule class
|
361
324
|
# and creates a new instance of that class.
|
362
325
|
# The +name+ becomes the rule +value+.
|
363
326
|
#
|
364
|
-
# @param [String] name The rule definition.
|
365
|
-
#
|
366
|
-
# @return [PublicSuffix::Rule::*] A rule instance.
|
367
|
-
#
|
368
327
|
# @example Creates a Normal rule
|
369
328
|
# PublicSuffix::Rule.factory("ar")
|
370
329
|
# # => #<PublicSuffix::Rule::Normal>
|
@@ -377,8 +336,28 @@ module PublicSuffix
|
|
377
336
|
# PublicSuffix::Rule.factory("!congresodelalengua3.ar")
|
378
337
|
# # => #<PublicSuffix::Rule::Exception>
|
379
338
|
#
|
380
|
-
|
381
|
-
|
339
|
+
# @param [String] content The rule content.
|
340
|
+
# @return [PublicSuffix::Rule::*] A rule instance.
|
341
|
+
def self.factory(content, **options)
|
342
|
+
case content.to_s[0, 1]
|
343
|
+
when STAR
|
344
|
+
Wildcard
|
345
|
+
when BANG
|
346
|
+
Exception
|
347
|
+
else
|
348
|
+
Normal
|
349
|
+
end.new(content, **options)
|
350
|
+
end
|
351
|
+
|
352
|
+
# The default rule to use if no rule match.
|
353
|
+
#
|
354
|
+
# The default rule is "*". From https://publicsuffix.org/list/:
|
355
|
+
#
|
356
|
+
# > If no rules match, the prevailing rule is "*".
|
357
|
+
#
|
358
|
+
# @return [PublicSuffix::Rule::Wildcard] The default rule.
|
359
|
+
def self.default
|
360
|
+
factory(STAR)
|
382
361
|
end
|
383
362
|
|
384
363
|
end
|