public_suffix 1.5.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,16 @@
1
- #
2
- # Public Suffix
1
+ # = Public Suffix
3
2
  #
4
3
  # Domain name parser based on the Public Suffix List.
5
4
  #
6
- # Copyright (c) 2009-2015 Simone Carletti <weppos@weppos.net>
7
- #
5
+ # Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
8
6
 
9
7
  module PublicSuffix
10
8
 
11
9
  class Error < StandardError
12
10
  end
13
11
 
14
- # Raised when trying to parse an invalid domain.
15
- # A domain is considered invalid when no rule is found
16
- # in the definition list.
12
+ # Raised when trying to parse an invalid name.
13
+ # A name is considered invalid when no rule is found in the definition list.
17
14
  #
18
15
  # @example
19
16
  #
@@ -26,10 +23,7 @@ module PublicSuffix
26
23
  class DomainInvalid < Error
27
24
  end
28
25
 
29
- # Raised when trying to parse a domain
30
- # which is formally defined by a rule,
31
- # but the rules set a requirement which is not satisfied
32
- # by the input you are trying to parse.
26
+ # Raised when trying to parse a name that matches a suffix.
33
27
  #
34
28
  # @example
35
29
  #
@@ -42,10 +36,4 @@ module PublicSuffix
42
36
  class DomainNotAllowed < DomainInvalid
43
37
  end
44
38
 
45
- # Backward Compatibility
46
- #
47
- # @deprecated Use {PublicSuffix::DomainInvalid}.
48
- #
49
- InvalidDomain = DomainInvalid
50
-
51
39
  end
@@ -1,10 +1,8 @@
1
- #
2
- # Public Suffix
1
+ # = Public Suffix
3
2
  #
4
3
  # Domain name parser based on the Public Suffix List.
5
4
  #
6
- # Copyright (c) 2009-2015 Simone Carletti <weppos@weppos.net>
7
- #
5
+ # Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
8
6
 
9
7
  module PublicSuffix
10
8
 
@@ -42,17 +40,16 @@ module PublicSuffix
42
40
  class List
43
41
  include Enumerable
44
42
 
45
- class << self
46
- attr_writer :default_definition
47
- end
43
+ DEFAULT_LIST_PATH = File.join(File.dirname(__FILE__), "..", "..", "data", "list.txt")
48
44
 
49
45
  # Gets the default rule list.
46
+ #
50
47
  # Initializes a new {PublicSuffix::List} parsing the content
51
- # of {PublicSuffix::List.default_definition}, if required.
48
+ # of {PublicSuffix::List.default_list_content}, if required.
52
49
  #
53
50
  # @return [PublicSuffix::List]
54
- def self.default
55
- @default ||= parse(default_definition)
51
+ def self.default(**options)
52
+ @default ||= parse(File.read(DEFAULT_LIST_PATH), options)
56
53
  end
57
54
 
58
55
  # Sets the default rule list to +value+.
@@ -65,25 +62,6 @@ module PublicSuffix
65
62
  @default = value
66
63
  end
67
64
 
68
- # Shows if support for private (non-ICANN) domains is enabled or not
69
- #
70
- # @return [Boolean]
71
- def self.private_domains?
72
- @private_domains != false
73
- end
74
-
75
- # Enables/disables support for private (non-ICANN) domains
76
- # Implicitly reloads the list
77
- #
78
- # @param [Boolean] value
79
- # enable/disable support
80
- #
81
- # @return [PublicSuffix::List]
82
- def self.private_domains=(value)
83
- @private_domains = !!value
84
- self.clear
85
- end
86
-
87
65
  # Sets the default rule list to +nil+.
88
66
  #
89
67
  # @return [self]
@@ -92,92 +70,89 @@ module PublicSuffix
92
70
  self
93
71
  end
94
72
 
95
- # Resets the default rule list and reinitialize it
96
- # parsing the content of {PublicSuffix::List.default_definition}.
97
- #
98
- # @return [PublicSuffix::List]
99
- def self.reload
100
- self.clear.default
101
- end
102
-
103
- DEFAULT_DEFINITION_PATH = File.join(File.dirname(__FILE__), "..", "..", "data", "definitions.txt")
104
-
105
- # Gets the default definition list.
106
- # Can be any <tt>IOStream</tt> including a <tt>File</tt>
107
- # or a simple <tt>String</tt>.
108
- # The object must respond to <tt>#each_line</tt>.
109
- #
110
- # @return [File]
111
- def self.default_definition
112
- @default_definition || File.new(DEFAULT_DEFINITION_PATH, "r:utf-8")
113
- end
73
+ # rubocop:disable Metrics/MethodLength
114
74
 
115
75
  # Parse given +input+ treating the content as Public Suffix List.
116
76
  #
117
77
  # See http://publicsuffix.org/format/ for more details about input format.
118
78
  #
119
- # @param [String] input The rule list to parse.
120
- #
79
+ # @param string [#each_line] The list to parse.
80
+ # @param private_domain [Boolean] whether to ignore the private domains section.
121
81
  # @return [Array<PublicSuffix::Rule::*>]
122
- def self.parse(input)
82
+ def self.parse(input, private_domains: true)
83
+ comment_token = "//".freeze
84
+ private_token = "===BEGIN PRIVATE DOMAINS===".freeze
85
+ section = nil # 1 == ICANN, 2 == PRIVATE
86
+
123
87
  new do |list|
124
88
  input.each_line do |line|
125
89
  line.strip!
126
- break if !private_domains? && line.include?('===BEGIN PRIVATE DOMAINS===')
127
- # strip blank lines
128
- if line.empty?
90
+ case # rubocop:disable Style/EmptyCaseCondition
91
+
92
+ # skip blank lines
93
+ when line.empty?
129
94
  next
130
- # strip comments
131
- elsif line =~ %r{^//}
95
+
96
+ # include private domains or stop scanner
97
+ when line.include?(private_token)
98
+ break if !private_domains
99
+ section = 2
100
+
101
+ # skip comments
102
+ when line.start_with?(comment_token)
132
103
  next
133
- # append rule
104
+
134
105
  else
135
- list.add(Rule.factory(line), false)
106
+ list.add(Rule.factory(line, private: section == 2), reindex: false)
107
+
136
108
  end
137
109
  end
138
110
  end
139
111
  end
112
+ # rubocop:enable Metrics/MethodLength
113
+
140
114
 
141
115
  # Gets the array of rules.
142
116
  #
143
117
  # @return [Array<PublicSuffix::Rule::*>]
144
118
  attr_reader :rules
145
119
 
146
- # Gets the naive index, a hash that with the keys being the first label of
147
- # every rule pointing to an array of integers (indexes of the rules in @rules).
148
- #
149
- # @return [Array]
150
- attr_reader :indexes
151
120
 
152
121
  # Initializes an empty {PublicSuffix::List}.
153
122
  #
154
123
  # @yield [self] Yields on self.
155
124
  # @yieldparam [PublicSuffix::List] self The newly created instance.
156
125
  #
157
- def initialize(&block)
158
- @rules = []
126
+ def initialize
127
+ @rules = []
159
128
  yield(self) if block_given?
160
- create_index!
129
+ reindex!
161
130
  end
162
131
 
132
+
163
133
  # Creates a naive index for +@rules+. Just a hash that will tell
164
134
  # us where the elements of +@rules+ are relative to its first
165
135
  # {PublicSuffix::Rule::Base#labels} element.
166
136
  #
167
137
  # For instance if @rules[5] and @rules[4] are the only elements of the list
168
- # where Rule#labels.first is 'us' @indexes['us'] #=> [5,4], that way in
138
+ # where Rule#labels.first is 'us' @indexes['us'] #=> [5,4], that way in
169
139
  # select we can avoid mapping every single rule against the candidate domain.
170
- def create_index!
140
+ def reindex!
171
141
  @indexes = {}
172
- @rules.map { |l| l.labels.first }.each_with_index do |elm, inx|
173
- if !@indexes.has_key?(elm)
174
- @indexes[elm] = [inx]
175
- else
176
- @indexes[elm] << inx
177
- end
142
+ @rules.each_with_index do |rule, index|
143
+ tld = Domain.name_to_labels(rule.value).last
144
+ @indexes[tld] ||= []
145
+ @indexes[tld] << index
178
146
  end
179
147
  end
180
148
 
149
+ # Gets the naive index, a hash that with the keys being the first label of
150
+ # every rule pointing to an array of integers (indexes of the rules in @rules).
151
+ def indexes
152
+ @indexes.dup
153
+ end
154
+
155
+
181
156
  # Checks whether two lists are equal.
182
157
  #
183
158
  # List <tt>one</tt> is equal to <tt>two</tt>, if <tt>two</tt> is an instance of
@@ -190,39 +165,31 @@ module PublicSuffix
190
165
  # @return [Boolean]
191
166
  def ==(other)
192
167
  return false unless other.is_a?(List)
193
- self.equal?(other) ||
194
- self.rules == other.rules
168
+ equal?(other) || rules == other.rules
195
169
  end
196
- alias :eql? :==
170
+ alias eql? ==
197
171
 
198
172
  # Iterates each rule in the list.
199
173
  def each(*args, &block)
200
174
  @rules.each(*args, &block)
201
175
  end
202
176
 
203
- # Gets the list as array.
204
- #
205
- # @return [Array<PublicSuffix::Rule::*>]
206
- def to_a
207
- @rules
208
- end
209
177
 
210
- # Adds the given object to the list
211
- # and optionally refreshes the rule index.
178
+ # Adds the given object to the list and optionally refreshes the rule index.
212
179
  #
213
180
  # @param [PublicSuffix::Rule::*] rule
214
181
  # The rule to add to the list.
215
- # @param [Boolean] index
182
+ # @param [Boolean] reindex
216
183
  # Set to true to recreate the rule index
217
184
  # after the rule has been added to the list.
218
185
  #
219
186
  # @return [self]
220
187
  #
221
- # @see #create_index!
188
+ # @see #reindex!
222
189
  #
223
- def add(rule, index = true)
190
+ def add(rule, reindex: true)
224
191
  @rules << rule
225
- create_index! if index == true
192
+ reindex! if reindex
226
193
  self
227
194
  end
228
195
  alias << add
@@ -233,7 +200,6 @@ module PublicSuffix
233
200
  def size
234
201
  @rules.size
235
202
  end
236
- alias length size
237
203
 
238
204
  # Checks whether the list is empty.
239
205
  #
@@ -247,54 +213,73 @@ module PublicSuffix
247
213
  # @return [self]
248
214
  def clear
249
215
  @rules.clear
216
+ reindex!
250
217
  self
251
218
  end
252
219
 
253
- # Returns the most appropriate rule for domain.
220
+ # Finds and returns the most appropriate rule for the domain name.
254
221
  #
255
222
  # From the Public Suffix List documentation:
256
223
  #
257
- # * If a hostname matches more than one rule in the file,
224
+ # - If a hostname matches more than one rule in the file,
258
225
  # the longest matching rule (the one with the most levels) will be used.
259
- # * An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule.
226
+ # - An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule.
260
227
  # An exception rule takes priority over any other matching rule.
261
228
  #
262
- # == Algorithm description
263
- #
264
- # * Match domain against all rules and take note of the matching ones.
265
- # * If no rules match, the prevailing rule is "*".
266
- # * If more than one rule matches, the prevailing rule is the one which is an exception rule.
267
- # * If there is no matching exception rule, the prevailing rule is the one with the most labels.
268
- # * If the prevailing rule is a exception rule, modify it by removing the leftmost label.
269
- # * The public suffix is the set of labels from the domain
270
- # which directly match the labels of the prevailing rule (joined by dots).
271
- # * The registered domain is the public suffix plus one additional label.
272
- #
273
- # @param [String, #to_s] domain The domain name.
274
- #
275
- # @return [PublicSuffix::Rule::*, nil]
276
- def find(domain)
277
- rules = select(domain)
278
- rules.detect { |r| r.type == :exception } ||
279
- rules.inject { |t,r| t.length > r.length ? t : r }
229
+ # ## Algorithm description
230
+ #
231
+ # 1. Match domain against all rules and take note of the matching ones.
232
+ # 2. If no rules match, the prevailing rule is "*".
233
+ # 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
234
+ # 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
235
+ # 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
236
+ # 6. The public suffix is the set of labels from the domain
237
+ # which directly match the labels of the prevailing rule (joined by dots).
238
+ # 7. The registered domain is the public suffix plus one additional label.
239
+ #
240
+ # @param name [String, #to_s] The domain name.
241
+ # @param [PublicSuffix::Rule::*] default The default rule to return in case no rule matches.
242
+ # @return [PublicSuffix::Rule::*]
243
+ def find(name, default: default_rule, **options)
244
+ rule = select(name, **options).inject do |l, r|
245
+ return r if r.class == Rule::Exception
246
+ l.length > r.length ? l : r
247
+ end
248
+ rule || default
280
249
  end
281
250
 
282
251
  # Selects all the rules matching given domain.
283
252
  #
284
- # Will use +@indexes+ to try only the rules that share the same first label,
285
- # that will speed up things when using +List.find('foo')+ a lot.
253
+ # Internally, the lookup heavily rely on the `@indexes`. The input is split into labels,
254
+ # and we retriever from the index only the rules that end with the input label. After that,
255
+ # a sequential scan is performed. In most cases, where the number of rules for the same label
256
+ # is limited, this algorithm is efficient enough.
286
257
  #
287
- # @param [String, #to_s] domain The domain name.
258
+ # If `ignore_private` is set to true, the algorithm will skip the rules that are flagged as private domain.
259
+ # Note that the rules will still be part of the loop. If you frequently need to access lists
260
+ # ignoring the private domains, you should create a list that doesn't include these domains setting the
261
+ # `private_domains: false` option when calling {.parse}.
288
262
  #
263
+ # @param [String, #to_s] name The domain name.
264
+ # @param [Boolean] ignore_private
289
265
  # @return [Array<PublicSuffix::Rule::*>]
290
- def select(domain)
291
- # raise DomainInvalid, "Blank domain"
292
- return [] if domain.to_s =~ /\A\s*\z/
293
- # raise DomainInvalid, "`#{domain}' is not expected to contain a scheme"
294
- return [] if domain.include?("://")
295
-
296
- indices = (@indexes[Domain.domain_to_labels(domain).first] || [])
297
- @rules.values_at(*indices).select { |rule| rule.match?(domain) }
266
+ def select(name, ignore_private: false)
267
+ name = name.to_s
268
+ indices = (@indexes[Domain.name_to_labels(name).last] || [])
269
+
270
+ finder = @rules.values_at(*indices).lazy
271
+ finder = finder.select { |rule| rule.match?(name) }
272
+ finder = finder.select { |rule| !rule.private } if ignore_private
273
+ finder.to_a
274
+ end
275
+
276
+ # Gets the default rule.
277
+ #
278
+ # @see PublicSuffix::Rule.default_rule
279
+ #
280
+ # @return [PublicSuffix::Rule::*]
281
+ def default_rule
282
+ PublicSuffix::Rule.default
298
283
  end
299
284
 
300
285
  end
@@ -1,17 +1,15 @@
1
- #
2
- # Public Suffix
1
+ # = Public Suffix
3
2
  #
4
3
  # Domain name parser based on the Public Suffix List.
5
4
  #
6
- # Copyright (c) 2009-2015 Simone Carletti <weppos@weppos.net>
7
- #
5
+ # Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
8
6
 
9
7
  module PublicSuffix
10
8
 
11
9
  # A Rule is a special object which holds a single definition
12
10
  # of the Public Suffix List.
13
11
  #
14
- # There are 3 types of ruleas, each one represented by a specific
12
+ # There are 3 types of rules, each one represented by a specific
15
13
  # subclass within the +PublicSuffix::Rule+ namespace.
16
14
  #
17
15
  # To create a new Rule, use the {PublicSuffix::Rule#factory} method.
@@ -21,12 +19,11 @@ module PublicSuffix
21
19
  #
22
20
  module Rule
23
21
 
24
- #
25
22
  # = Abstract rule class
26
23
  #
27
24
  # This represent the base class for a Rule definition
28
- # in the {Public Suffix List}[http://publicsuffix.org].
29
- #
25
+ # in the {Public Suffix List}[https://publicsuffix.org].
26
+ #
30
27
  # This is intended to be an Abstract class
31
28
  # and you shouldn't create a direct instance. The only purpose
32
29
  # of this class is to expose a common interface
@@ -36,28 +33,21 @@ module PublicSuffix
36
33
  # * {PublicSuffix::Rule::Exception}
37
34
  # * {PublicSuffix::Rule::Wildcard}
38
35
  #
39
- # == Properties
36
+ # ## Properties
40
37
  #
41
38
  # A rule is composed by 4 properties:
42
39
  #
43
- # name - The name of the rule, corresponding to the rule definition
44
- # in the public suffix list
45
- # value - The value, a normalized version of the rule name.
40
+ # value - A normalized version of the rule name.
46
41
  # The normalization process depends on rule tpe.
47
- # type - The rule type (:normal, :wildcard, :exception)
48
- # labels - The canonicalized rule name
49
42
  #
50
43
  # Here's an example
51
44
  #
52
45
  # PublicSuffix::Rule.factory("*.google.com")
53
46
  # #<PublicSuffix::Rule::Wildcard:0x1015c14b0
54
- # @labels=["com", "google"],
55
- # @name="*.google.com",
56
- # @type=:wildcard,
57
47
  # @value="google.com"
58
48
  # >
59
49
  #
60
- # == Rule Creation
50
+ # ## Rule Creation
61
51
  #
62
52
  # The best way to create a new rule is passing the rule name
63
53
  # to the <tt>PublicSuffix::Rule.factory</tt> method.
@@ -71,35 +61,34 @@ module PublicSuffix
71
61
  # This method will detect the rule type and create an instance
72
62
  # from the proper rule class.
73
63
  #
74
- # == Rule Usage
64
+ # ## Rule Usage
75
65
  #
76
- # A rule describes the composition of a domain name
77
- # and explains how to tokenize the domain name
78
- # into tld, sld and trd.
66
+ # A rule describes the composition of a domain name and explains how to tokenize
67
+ # the name into tld, sld and trd.
79
68
  #
80
- # To use a rule, you first need to be sure the domain you want to tokenize
69
+ # To use a rule, you first need to be sure the name you want to tokenize
81
70
  # can be handled by the current rule.
82
71
  # You can use the <tt>#match?</tt> method.
83
72
  #
84
73
  # rule = PublicSuffix::Rule.factory("com")
85
- #
74
+ #
86
75
  # rule.match?("google.com")
87
76
  # # => true
88
- #
77
+ #
89
78
  # rule.match?("google.com")
90
79
  # # => false
91
80
  #
92
- # Rule order is significant. A domain can match more than one rule.
81
+ # Rule order is significant. A name can match more than one rule.
93
82
  # See the {Public Suffix Documentation}[http://publicsuffix.org/format/]
94
83
  # to learn more about rule priority.
95
84
  #
96
85
  # When you have the right rule, you can use it to tokenize the domain name.
97
- #
86
+ #
98
87
  # rule = PublicSuffix::Rule.factory("com")
99
- #
88
+ #
100
89
  # rule.decompose("google.com")
101
90
  # # => ["google", "com"]
102
- #
91
+ #
103
92
  # rule.decompose("www.google.com")
104
93
  # # => ["www.google", "com"]
105
94
  #
@@ -107,145 +96,107 @@ module PublicSuffix
107
96
  #
108
97
  class Base
109
98
 
110
- attr_reader :name, :value, :labels
99
+ # @return [String] the rule definition
100
+ attr_reader :value
111
101
 
112
- # Initializes a new rule with name and value.
113
- # If value is +nil+, name also becomes the value for this rule.
114
- #
115
- # @param [String] name
116
- # The name of the rule
117
- # @param [String] value
118
- # The value of the rule. If nil, defaults to +name+.
119
- #
120
- def initialize(name, value = nil)
121
- @name = name.to_s
122
- @value = value || @name
123
- @labels = Domain.domain_to_labels(@value)
124
- end
102
+ # @return [Boolean] true if the rule is a private domain
103
+ attr_reader :private
125
104
 
126
- #
127
- # The rule type name.
128
- #
129
- # @return [Symbol]
130
- #
131
- def self.type
132
- @type ||= self.name.split("::").last.downcase.to_sym
133
- end
134
105
 
106
+ # Initializes a new rule with name and value.
107
+ # If value is +nil+, name also becomes the value for this rule.
135
108
  #
136
- # @see {type}
137
- #
138
- def type
139
- self.class.type
109
+ # @param value [String] the value of the rule
110
+ def initialize(value, private: false)
111
+ @value = value.to_s
112
+ @private = private
140
113
  end
141
114
 
142
115
  # Checks whether this rule is equal to <tt>other</tt>.
143
116
  #
144
- # @param [PublicSuffix::Rule::*] other
145
- # The rule to compare.
146
- #
117
+ # @param [PublicSuffix::Rule::*] other The rule to compare
147
118
  # @return [Boolean]
148
119
  # Returns true if this rule and other are instances of the same class
149
120
  # and has the same value, false otherwise.
150
121
  def ==(other)
151
- return false unless other.is_a?(self.class)
152
- self.equal?(other) ||
153
- self.name == other.name
122
+ equal?(other) || (self.class == other.class && value == other.value)
154
123
  end
155
- alias :eql? :==
124
+ alias eql? ==
156
125
 
157
- # Checks if this rule matches +domain+.
126
+ # Checks if this rule matches +name+.
158
127
  #
159
- # @param [String, #to_s] domain
160
- # The domain name to check.
128
+ # A domain name is said to match a rule if and only if
129
+ # all of the following conditions are met:
161
130
  #
162
- # @return [Boolean]
131
+ # - When the domain and rule are split into corresponding labels,
132
+ # that the domain contains as many or more labels than the rule.
133
+ # - Beginning with the right-most labels of both the domain and the rule,
134
+ # and continuing for all labels in the rule, one finds that for every pair,
135
+ # either they are identical, or that the label from the rule is "*".
136
+ #
137
+ # @see https://publicsuffix.org/list/
163
138
  #
164
139
  # @example
165
- # rule = Rule.factory("com")
166
- # # #<PublicSuffix::Rule::Normal>
167
- # rule.match?("example.com")
140
+ # Rule.factory("com").match?("example.com")
168
141
  # # => true
169
- # rule.match?("example.net")
142
+ # Rule.factory("com").match?("example.net")
170
143
  # # => false
171
144
  #
172
- def match?(domain)
173
- l1 = labels
174
- l2 = Domain.domain_to_labels(domain)
175
- odiff(l1, l2).empty?
176
- end
177
-
178
- # Checks if this rule allows +domain+.
179
- #
180
- # @param [String, #to_s] domain
181
- # The domain name to check.
182
- #
145
+ # @param name [String, #to_s] The domain name to check.
183
146
  # @return [Boolean]
184
- #
185
- # @example
186
- # rule = Rule.factory("*.do")
187
- # # => #<PublicSuffix::Rule::Wildcard>
188
- # rule.allow?("example.do")
189
- # # => false
190
- # rule.allow?("www.example.do")
191
- # # => true
192
- #
193
- def allow?(domain)
194
- !decompose(domain).last.nil?
195
- end
196
-
197
- # Gets the length of this rule for comparison.
198
- # The length usually matches the number of rule +parts+.
199
- #
200
- # Subclasses might actually override this method.
201
- #
202
- # @return [Integer] The number of parts.
203
- def length
204
- parts.length
147
+ def match?(name)
148
+ # Note: it works because of the assumption there are no
149
+ # rules like foo.*.com. If the assumption is incorrect,
150
+ # we need to properly walk the input and skip parts according
151
+ # to wildcard component.
152
+ diff = name.chomp(value)
153
+ diff.empty? || diff[-1] == "."
205
154
  end
206
155
 
207
- #
208
- # @raise [NotImplementedError]
209
156
  # @abstract
210
157
  def parts
211
- raise(NotImplementedError,"#{self.class}##{__method__} is not implemented")
158
+ raise NotImplementedError
212
159
  end
213
160
 
214
- #
215
- # @param [String, #to_s] domain
216
- # The domain name to decompose.
217
- #
218
- # @return [Array<String, nil>]
219
- #
220
- # @raise [NotImplementedError]
221
161
  # @abstract
222
- def decompose(domain)
223
- raise(NotImplementedError,"#{self.class}##{__method__} is not implemented")
162
+ def length
163
+ raise NotImplementedError
224
164
  end
225
165
 
226
- private
227
-
228
- def odiff(one, two)
229
- ii = 0
230
-
231
- while(ii < one.size && one[ii] == two[ii])
232
- ii += 1
233
- end
234
-
235
- one[ii..one.length]
166
+ # @abstract
167
+ # @param [String, #to_s] name The domain name to decompose
168
+ # @return [Array<String, nil>]
169
+ def decompose(*)
170
+ raise NotImplementedError
236
171
  end
237
172
 
238
173
  end
239
174
 
175
+ # Normal represents a standard rule (e.g. com).
240
176
  class Normal < Base
241
177
 
242
- # Initializes a new rule with +name+.
178
+ # Initializes a new rule from +definition+.
179
+ #
180
+ # @param definition [String] the rule as defined in the PSL
181
+ def initialize(definition, **options)
182
+ super(definition, **options)
183
+ end
184
+
185
+ # Gets the original rule definition.
243
186
  #
244
- # @param [String] name
245
- # The name of this rule.
187
+ # @return [String] The rule definition.
188
+ def rule
189
+ value
190
+ end
191
+
192
+ # Decomposes the domain name according to rule properties.
246
193
  #
247
- def initialize(name)
248
- super(name, name)
194
+ # @param [String, #to_s] name The domain name to decompose
195
+ # @return [Array<String>] The array with [trd + sld, tld].
196
+ def decompose(domain)
197
+ suffix = parts.join('\.')
198
+ matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
199
+ matches ? matches[1..2] : [nil, nil]
249
200
  end
250
201
 
251
202
  # dot-split rule value and returns all rule parts
@@ -253,74 +204,96 @@ module PublicSuffix
253
204
  #
254
205
  # @return [Array<String>]
255
206
  def parts
256
- @parts ||= @value.split(".")
207
+ @value.split(DOT)
257
208
  end
258
209
 
259
- # Decomposes the domain according to rule properties.
260
- #
261
- # @param [String, #to_s] domain
262
- # The domain name to decompose.
210
+ # Gets the length of this rule for comparison,
211
+ # represented by the number of dot-separated parts in the rule.
263
212
  #
264
- # @return [Array<String>]
265
- # The array with [trd + sld, tld].
266
- #
267
- def decompose(domain)
268
- domain.to_s.chomp(".") =~ /^(.*)\.(#{parts.join('\.')})$/
269
- [$1, $2]
213
+ # @return [Integer] The length of the rule.
214
+ def length
215
+ @length ||= parts.length
270
216
  end
271
217
 
272
218
  end
273
219
 
220
+ # Wildcard represents a wildcard rule (e.g. *.co.uk).
274
221
  class Wildcard < Base
275
222
 
276
- # Initializes a new rule with +name+.
223
+ # Initializes a new rule from +definition+.
277
224
  #
278
- # @param [String] name
279
- # The name of this rule.
225
+ # The wildcard "*" is removed from the value, as it's common
226
+ # for each wildcard rule.
280
227
  #
281
- def initialize(name)
282
- super(name, name.to_s[2..-1])
228
+ # @param definition [String] the rule as defined in the PSL
229
+ def initialize(definition, **options)
230
+ super(definition.to_s[2..-1], **options)
283
231
  end
284
232
 
285
- # dot-split rule value and returns all rule parts
286
- # in the order they appear in the value.
233
+ # Gets the original rule definition.
287
234
  #
288
- # @return [Array<String>]
289
- def parts
290
- @parts ||= @value.split(".")
235
+ # @return [String] The rule definition.
236
+ def rule
237
+ value == "" ? STAR : STAR + DOT + value
291
238
  end
292
239
 
293
- # Overwrites the default implementation to cope with
294
- # the +*+ char.
240
+ # Decomposes the domain name according to rule properties.
295
241
  #
296
- # @return [Integer] The number of parts.
297
- def length
298
- parts.length + 1 # * counts as 1
242
+ # @param [String, #to_s] name The domain name to decompose
243
+ # @return [Array<String>] The array with [trd + sld, tld].
244
+ def decompose(domain)
245
+ suffix = ([".*?"] + parts).join('\.')
246
+ matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
247
+ matches ? matches[1..2] : [nil, nil]
299
248
  end
300
249
 
301
- # Decomposes the domain according to rule properties.
302
- #
303
- # @param [String, #to_s] domain
304
- # The domain name to decompose.
250
+ # dot-split rule value and returns all rule parts
251
+ # in the order they appear in the value.
305
252
  #
306
253
  # @return [Array<String>]
307
- # The array with [trd + sld, tld].
254
+ def parts
255
+ @value.split(DOT)
256
+ end
257
+
258
+ # Gets the length of this rule for comparison,
259
+ # represented by the number of dot-separated parts in the rule
260
+ # plus 1 for the *.
308
261
  #
309
- def decompose(domain)
310
- domain.to_s.chomp(".") =~ /^(.*)\.(.*?\.#{parts.join('\.')})$/
311
- [$1, $2]
262
+ # @return [Integer] The length of the rule.
263
+ def length
264
+ @length ||= parts.length + 1 # * counts as 1
312
265
  end
313
266
 
314
267
  end
315
268
 
269
+ # Exception represents an exception rule (e.g. !parliament.uk).
316
270
  class Exception < Base
317
271
 
318
- # Initializes a new rule with +name+.
272
+ # Initializes a new rule from +definition+.
273
+ #
274
+ # The bang ! is removed from the value, as it's common
275
+ # for each wildcard rule.
276
+ #
277
+ # @param definition [String] the rule as defined in the PSL
278
+ def initialize(definition, **options)
279
+ super(definition.to_s[1..-1], **options)
280
+ end
281
+
282
+ # Gets the original rule definition.
319
283
  #
320
- # @param [String] name The name of this rule.
284
+ # @return [String] The rule definition.
285
+ def rule
286
+ BANG + value
287
+ end
288
+
289
+ # Decomposes the domain name according to rule properties.
321
290
  #
322
- def initialize(name)
323
- super(name, name.to_s[1..-1])
291
+ # @param [String, #to_s] name The domain name to decompose
292
+ # @return [Array<String>] The array with [trd + sld, tld].
293
+ def decompose(domain)
294
+ suffix = parts.join('\.')
295
+ matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
296
+ matches ? matches[1..2] : [nil, nil]
324
297
  end
325
298
 
326
299
  # dot-split rule value and returns all rule parts
@@ -329,42 +302,28 @@ module PublicSuffix
329
302
  #
330
303
  # See http://publicsuffix.org/format/:
331
304
  # If the prevailing rule is a exception rule,
332
- # modify it by removing the leftmost label.
305
+ # modify it by removing the leftmost label.
333
306
  #
334
307
  # @return [Array<String>]
335
308
  def parts
336
- @parts ||= @value.split(".")[1..-1]
309
+ @value.split(DOT)[1..-1]
337
310
  end
338
311
 
339
- # Decomposes the domain according to rule properties.
340
- #
341
- # @param [String, #to_s] domain
342
- # The domain name to decompose.
343
- #
344
- # @return [Array<String>]
345
- # The array with [trd + sld, tld].
312
+ # Gets the length of this rule for comparison,
313
+ # represented by the number of dot-separated parts in the rule.
346
314
  #
347
- def decompose(domain)
348
- domain.to_s.chomp(".") =~ /^(.*)\.(#{parts.join('\.')})$/
349
- [$1, $2]
315
+ # @return [Integer] The length of the rule.
316
+ def length
317
+ @length ||= parts.length
350
318
  end
351
319
 
352
320
  end
353
321
 
354
- RULES = {
355
- '*' => Wildcard,
356
- '!' => Exception
357
- }
358
- RULES.default = Normal
359
322
 
360
323
  # Takes the +name+ of the rule, detects the specific rule class
361
324
  # and creates a new instance of that class.
362
325
  # The +name+ becomes the rule +value+.
363
326
  #
364
- # @param [String] name The rule definition.
365
- #
366
- # @return [PublicSuffix::Rule::*] A rule instance.
367
- #
368
327
  # @example Creates a Normal rule
369
328
  # PublicSuffix::Rule.factory("ar")
370
329
  # # => #<PublicSuffix::Rule::Normal>
@@ -377,8 +336,28 @@ module PublicSuffix
377
336
  # PublicSuffix::Rule.factory("!congresodelalengua3.ar")
378
337
  # # => #<PublicSuffix::Rule::Exception>
379
338
  #
380
- def self.factory(name)
381
- RULES[name.to_s[0,1]].new(name)
339
+ # @param [String] content The rule content.
340
+ # @return [PublicSuffix::Rule::*] A rule instance.
341
+ def self.factory(content, **options)
342
+ case content.to_s[0, 1]
343
+ when STAR
344
+ Wildcard
345
+ when BANG
346
+ Exception
347
+ else
348
+ Normal
349
+ end.new(content, **options)
350
+ end
351
+
352
+ # The default rule to use if no rule match.
353
+ #
354
+ # The default rule is "*". From https://publicsuffix.org/list/:
355
+ #
356
+ # > If no rules match, the prevailing rule is "*".
357
+ #
358
+ # @return [PublicSuffix::Rule::Wildcard] The default rule.
359
+ def self.default
360
+ factory(STAR)
382
361
  end
383
362
 
384
363
  end