public_suffix 1.5.3 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,19 +1,16 @@
1
- #
2
- # Public Suffix
1
+ # = Public Suffix
3
2
  #
4
3
  # Domain name parser based on the Public Suffix List.
5
4
  #
6
- # Copyright (c) 2009-2015 Simone Carletti <weppos@weppos.net>
7
- #
5
+ # Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
8
6
 
9
7
  module PublicSuffix
10
8
 
11
9
  class Error < StandardError
12
10
  end
13
11
 
14
- # Raised when trying to parse an invalid domain.
15
- # A domain is considered invalid when no rule is found
16
- # in the definition list.
12
+ # Raised when trying to parse an invalid name.
13
+ # A name is considered invalid when no rule is found in the definition list.
17
14
  #
18
15
  # @example
19
16
  #
@@ -26,10 +23,7 @@ module PublicSuffix
26
23
  class DomainInvalid < Error
27
24
  end
28
25
 
29
- # Raised when trying to parse a domain
30
- # which is formally defined by a rule,
31
- # but the rules set a requirement which is not satisfied
32
- # by the input you are trying to parse.
26
+ # Raised when trying to parse a name that matches a suffix.
33
27
  #
34
28
  # @example
35
29
  #
@@ -42,10 +36,4 @@ module PublicSuffix
42
36
  class DomainNotAllowed < DomainInvalid
43
37
  end
44
38
 
45
- # Backward Compatibility
46
- #
47
- # @deprecated Use {PublicSuffix::DomainInvalid}.
48
- #
49
- InvalidDomain = DomainInvalid
50
-
51
39
  end
@@ -1,10 +1,8 @@
1
- #
2
- # Public Suffix
1
+ # = Public Suffix
3
2
  #
4
3
  # Domain name parser based on the Public Suffix List.
5
4
  #
6
- # Copyright (c) 2009-2015 Simone Carletti <weppos@weppos.net>
7
- #
5
+ # Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
8
6
 
9
7
  module PublicSuffix
10
8
 
@@ -42,17 +40,16 @@ module PublicSuffix
42
40
  class List
43
41
  include Enumerable
44
42
 
45
- class << self
46
- attr_writer :default_definition
47
- end
43
+ DEFAULT_LIST_PATH = File.join(File.dirname(__FILE__), "..", "..", "data", "list.txt")
48
44
 
49
45
  # Gets the default rule list.
46
+ #
50
47
  # Initializes a new {PublicSuffix::List} parsing the content
51
- # of {PublicSuffix::List.default_definition}, if required.
48
+ # of {PublicSuffix::List.default_list_content}, if required.
52
49
  #
53
50
  # @return [PublicSuffix::List]
54
- def self.default
55
- @default ||= parse(default_definition)
51
+ def self.default(**options)
52
+ @default ||= parse(File.read(DEFAULT_LIST_PATH), options)
56
53
  end
57
54
 
58
55
  # Sets the default rule list to +value+.
@@ -65,25 +62,6 @@ module PublicSuffix
65
62
  @default = value
66
63
  end
67
64
 
68
- # Shows if support for private (non-ICANN) domains is enabled or not
69
- #
70
- # @return [Boolean]
71
- def self.private_domains?
72
- @private_domains != false
73
- end
74
-
75
- # Enables/disables support for private (non-ICANN) domains
76
- # Implicitly reloads the list
77
- #
78
- # @param [Boolean] value
79
- # enable/disable support
80
- #
81
- # @return [PublicSuffix::List]
82
- def self.private_domains=(value)
83
- @private_domains = !!value
84
- self.clear
85
- end
86
-
87
65
  # Sets the default rule list to +nil+.
88
66
  #
89
67
  # @return [self]
@@ -92,92 +70,89 @@ module PublicSuffix
92
70
  self
93
71
  end
94
72
 
95
- # Resets the default rule list and reinitialize it
96
- # parsing the content of {PublicSuffix::List.default_definition}.
97
- #
98
- # @return [PublicSuffix::List]
99
- def self.reload
100
- self.clear.default
101
- end
102
-
103
- DEFAULT_DEFINITION_PATH = File.join(File.dirname(__FILE__), "..", "..", "data", "definitions.txt")
104
-
105
- # Gets the default definition list.
106
- # Can be any <tt>IOStream</tt> including a <tt>File</tt>
107
- # or a simple <tt>String</tt>.
108
- # The object must respond to <tt>#each_line</tt>.
109
- #
110
- # @return [File]
111
- def self.default_definition
112
- @default_definition || File.new(DEFAULT_DEFINITION_PATH, "r:utf-8")
113
- end
73
+ # rubocop:disable Metrics/MethodLength
114
74
 
115
75
  # Parse given +input+ treating the content as Public Suffix List.
116
76
  #
117
77
  # See http://publicsuffix.org/format/ for more details about input format.
118
78
  #
119
- # @param [String] input The rule list to parse.
120
- #
79
+ # @param string [#each_line] The list to parse.
80
+ # @param private_domain [Boolean] whether to ignore the private domains section.
121
81
  # @return [Array<PublicSuffix::Rule::*>]
122
- def self.parse(input)
82
+ def self.parse(input, private_domains: true)
83
+ comment_token = "//".freeze
84
+ private_token = "===BEGIN PRIVATE DOMAINS===".freeze
85
+ section = nil # 1 == ICANN, 2 == PRIVATE
86
+
123
87
  new do |list|
124
88
  input.each_line do |line|
125
89
  line.strip!
126
- break if !private_domains? && line.include?('===BEGIN PRIVATE DOMAINS===')
127
- # strip blank lines
128
- if line.empty?
90
+ case # rubocop:disable Style/EmptyCaseCondition
91
+
92
+ # skip blank lines
93
+ when line.empty?
129
94
  next
130
- # strip comments
131
- elsif line =~ %r{^//}
95
+
96
+ # include private domains or stop scanner
97
+ when line.include?(private_token)
98
+ break if !private_domains
99
+ section = 2
100
+
101
+ # skip comments
102
+ when line.start_with?(comment_token)
132
103
  next
133
- # append rule
104
+
134
105
  else
135
- list.add(Rule.factory(line), false)
106
+ list.add(Rule.factory(line, private: section == 2), reindex: false)
107
+
136
108
  end
137
109
  end
138
110
  end
139
111
  end
112
+ # rubocop:enable Metrics/MethodLength
113
+
140
114
 
141
115
  # Gets the array of rules.
142
116
  #
143
117
  # @return [Array<PublicSuffix::Rule::*>]
144
118
  attr_reader :rules
145
119
 
146
- # Gets the naive index, a hash that with the keys being the first label of
147
- # every rule pointing to an array of integers (indexes of the rules in @rules).
148
- #
149
- # @return [Array]
150
- attr_reader :indexes
151
120
 
152
121
  # Initializes an empty {PublicSuffix::List}.
153
122
  #
154
123
  # @yield [self] Yields on self.
155
124
  # @yieldparam [PublicSuffix::List] self The newly created instance.
156
125
  #
157
- def initialize(&block)
158
- @rules = []
126
+ def initialize
127
+ @rules = []
159
128
  yield(self) if block_given?
160
- create_index!
129
+ reindex!
161
130
  end
162
131
 
132
+
163
133
  # Creates a naive index for +@rules+. Just a hash that will tell
164
134
  # us where the elements of +@rules+ are relative to its first
165
135
  # {PublicSuffix::Rule::Base#labels} element.
166
136
  #
167
137
  # For instance if @rules[5] and @rules[4] are the only elements of the list
168
- # where Rule#labels.first is 'us' @indexes['us'] #=> [5,4], that way in
138
+ # where Rule#labels.first is 'us' @indexes['us'] #=> [5,4], that way in
169
139
  # select we can avoid mapping every single rule against the candidate domain.
170
- def create_index!
140
+ def reindex!
171
141
  @indexes = {}
172
- @rules.map { |l| l.labels.first }.each_with_index do |elm, inx|
173
- if !@indexes.has_key?(elm)
174
- @indexes[elm] = [inx]
175
- else
176
- @indexes[elm] << inx
177
- end
142
+ @rules.each_with_index do |rule, index|
143
+ tld = Domain.name_to_labels(rule.value).last
144
+ @indexes[tld] ||= []
145
+ @indexes[tld] << index
178
146
  end
179
147
  end
180
148
 
149
+ # Gets the naive index, a hash that with the keys being the first label of
150
+ # every rule pointing to an array of integers (indexes of the rules in @rules).
151
+ def indexes
152
+ @indexes.dup
153
+ end
154
+
155
+
181
156
  # Checks whether two lists are equal.
182
157
  #
183
158
  # List <tt>one</tt> is equal to <tt>two</tt>, if <tt>two</tt> is an instance of
@@ -190,39 +165,31 @@ module PublicSuffix
190
165
  # @return [Boolean]
191
166
  def ==(other)
192
167
  return false unless other.is_a?(List)
193
- self.equal?(other) ||
194
- self.rules == other.rules
168
+ equal?(other) || rules == other.rules
195
169
  end
196
- alias :eql? :==
170
+ alias eql? ==
197
171
 
198
172
  # Iterates each rule in the list.
199
173
  def each(*args, &block)
200
174
  @rules.each(*args, &block)
201
175
  end
202
176
 
203
- # Gets the list as array.
204
- #
205
- # @return [Array<PublicSuffix::Rule::*>]
206
- def to_a
207
- @rules
208
- end
209
177
 
210
- # Adds the given object to the list
211
- # and optionally refreshes the rule index.
178
+ # Adds the given object to the list and optionally refreshes the rule index.
212
179
  #
213
180
  # @param [PublicSuffix::Rule::*] rule
214
181
  # The rule to add to the list.
215
- # @param [Boolean] index
182
+ # @param [Boolean] reindex
216
183
  # Set to true to recreate the rule index
217
184
  # after the rule has been added to the list.
218
185
  #
219
186
  # @return [self]
220
187
  #
221
- # @see #create_index!
188
+ # @see #reindex!
222
189
  #
223
- def add(rule, index = true)
190
+ def add(rule, reindex: true)
224
191
  @rules << rule
225
- create_index! if index == true
192
+ reindex! if reindex
226
193
  self
227
194
  end
228
195
  alias << add
@@ -233,7 +200,6 @@ module PublicSuffix
233
200
  def size
234
201
  @rules.size
235
202
  end
236
- alias length size
237
203
 
238
204
  # Checks whether the list is empty.
239
205
  #
@@ -247,54 +213,73 @@ module PublicSuffix
247
213
  # @return [self]
248
214
  def clear
249
215
  @rules.clear
216
+ reindex!
250
217
  self
251
218
  end
252
219
 
253
- # Returns the most appropriate rule for domain.
220
+ # Finds and returns the most appropriate rule for the domain name.
254
221
  #
255
222
  # From the Public Suffix List documentation:
256
223
  #
257
- # * If a hostname matches more than one rule in the file,
224
+ # - If a hostname matches more than one rule in the file,
258
225
  # the longest matching rule (the one with the most levels) will be used.
259
- # * An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule.
226
+ # - An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule.
260
227
  # An exception rule takes priority over any other matching rule.
261
228
  #
262
- # == Algorithm description
263
- #
264
- # * Match domain against all rules and take note of the matching ones.
265
- # * If no rules match, the prevailing rule is "*".
266
- # * If more than one rule matches, the prevailing rule is the one which is an exception rule.
267
- # * If there is no matching exception rule, the prevailing rule is the one with the most labels.
268
- # * If the prevailing rule is a exception rule, modify it by removing the leftmost label.
269
- # * The public suffix is the set of labels from the domain
270
- # which directly match the labels of the prevailing rule (joined by dots).
271
- # * The registered domain is the public suffix plus one additional label.
272
- #
273
- # @param [String, #to_s] domain The domain name.
274
- #
275
- # @return [PublicSuffix::Rule::*, nil]
276
- def find(domain)
277
- rules = select(domain)
278
- rules.detect { |r| r.type == :exception } ||
279
- rules.inject { |t,r| t.length > r.length ? t : r }
229
+ # ## Algorithm description
230
+ #
231
+ # 1. Match domain against all rules and take note of the matching ones.
232
+ # 2. If no rules match, the prevailing rule is "*".
233
+ # 3. If more than one rule matches, the prevailing rule is the one which is an exception rule.
234
+ # 4. If there is no matching exception rule, the prevailing rule is the one with the most labels.
235
+ # 5. If the prevailing rule is a exception rule, modify it by removing the leftmost label.
236
+ # 6. The public suffix is the set of labels from the domain
237
+ # which directly match the labels of the prevailing rule (joined by dots).
238
+ # 7. The registered domain is the public suffix plus one additional label.
239
+ #
240
+ # @param name [String, #to_s] The domain name.
241
+ # @param [PublicSuffix::Rule::*] default The default rule to return in case no rule matches.
242
+ # @return [PublicSuffix::Rule::*]
243
+ def find(name, default: default_rule, **options)
244
+ rule = select(name, **options).inject do |l, r|
245
+ return r if r.class == Rule::Exception
246
+ l.length > r.length ? l : r
247
+ end
248
+ rule || default
280
249
  end
281
250
 
282
251
  # Selects all the rules matching given domain.
283
252
  #
284
- # Will use +@indexes+ to try only the rules that share the same first label,
285
- # that will speed up things when using +List.find('foo')+ a lot.
253
+ # Internally, the lookup heavily rely on the `@indexes`. The input is split into labels,
254
+ # and we retriever from the index only the rules that end with the input label. After that,
255
+ # a sequential scan is performed. In most cases, where the number of rules for the same label
256
+ # is limited, this algorithm is efficient enough.
286
257
  #
287
- # @param [String, #to_s] domain The domain name.
258
+ # If `ignore_private` is set to true, the algorithm will skip the rules that are flagged as private domain.
259
+ # Note that the rules will still be part of the loop. If you frequently need to access lists
260
+ # ignoring the private domains, you should create a list that doesn't include these domains setting the
261
+ # `private_domains: false` option when calling {.parse}.
288
262
  #
263
+ # @param [String, #to_s] name The domain name.
264
+ # @param [Boolean] ignore_private
289
265
  # @return [Array<PublicSuffix::Rule::*>]
290
- def select(domain)
291
- # raise DomainInvalid, "Blank domain"
292
- return [] if domain.to_s =~ /\A\s*\z/
293
- # raise DomainInvalid, "`#{domain}' is not expected to contain a scheme"
294
- return [] if domain.include?("://")
295
-
296
- indices = (@indexes[Domain.domain_to_labels(domain).first] || [])
297
- @rules.values_at(*indices).select { |rule| rule.match?(domain) }
266
+ def select(name, ignore_private: false)
267
+ name = name.to_s
268
+ indices = (@indexes[Domain.name_to_labels(name).last] || [])
269
+
270
+ finder = @rules.values_at(*indices).lazy
271
+ finder = finder.select { |rule| rule.match?(name) }
272
+ finder = finder.select { |rule| !rule.private } if ignore_private
273
+ finder.to_a
274
+ end
275
+
276
+ # Gets the default rule.
277
+ #
278
+ # @see PublicSuffix::Rule.default_rule
279
+ #
280
+ # @return [PublicSuffix::Rule::*]
281
+ def default_rule
282
+ PublicSuffix::Rule.default
298
283
  end
299
284
 
300
285
  end
@@ -1,17 +1,15 @@
1
- #
2
- # Public Suffix
1
+ # = Public Suffix
3
2
  #
4
3
  # Domain name parser based on the Public Suffix List.
5
4
  #
6
- # Copyright (c) 2009-2015 Simone Carletti <weppos@weppos.net>
7
- #
5
+ # Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
8
6
 
9
7
  module PublicSuffix
10
8
 
11
9
  # A Rule is a special object which holds a single definition
12
10
  # of the Public Suffix List.
13
11
  #
14
- # There are 3 types of ruleas, each one represented by a specific
12
+ # There are 3 types of rules, each one represented by a specific
15
13
  # subclass within the +PublicSuffix::Rule+ namespace.
16
14
  #
17
15
  # To create a new Rule, use the {PublicSuffix::Rule#factory} method.
@@ -21,12 +19,11 @@ module PublicSuffix
21
19
  #
22
20
  module Rule
23
21
 
24
- #
25
22
  # = Abstract rule class
26
23
  #
27
24
  # This represent the base class for a Rule definition
28
- # in the {Public Suffix List}[http://publicsuffix.org].
29
- #
25
+ # in the {Public Suffix List}[https://publicsuffix.org].
26
+ #
30
27
  # This is intended to be an Abstract class
31
28
  # and you shouldn't create a direct instance. The only purpose
32
29
  # of this class is to expose a common interface
@@ -36,28 +33,21 @@ module PublicSuffix
36
33
  # * {PublicSuffix::Rule::Exception}
37
34
  # * {PublicSuffix::Rule::Wildcard}
38
35
  #
39
- # == Properties
36
+ # ## Properties
40
37
  #
41
38
  # A rule is composed by 4 properties:
42
39
  #
43
- # name - The name of the rule, corresponding to the rule definition
44
- # in the public suffix list
45
- # value - The value, a normalized version of the rule name.
40
+ # value - A normalized version of the rule name.
46
41
  # The normalization process depends on rule tpe.
47
- # type - The rule type (:normal, :wildcard, :exception)
48
- # labels - The canonicalized rule name
49
42
  #
50
43
  # Here's an example
51
44
  #
52
45
  # PublicSuffix::Rule.factory("*.google.com")
53
46
  # #<PublicSuffix::Rule::Wildcard:0x1015c14b0
54
- # @labels=["com", "google"],
55
- # @name="*.google.com",
56
- # @type=:wildcard,
57
47
  # @value="google.com"
58
48
  # >
59
49
  #
60
- # == Rule Creation
50
+ # ## Rule Creation
61
51
  #
62
52
  # The best way to create a new rule is passing the rule name
63
53
  # to the <tt>PublicSuffix::Rule.factory</tt> method.
@@ -71,35 +61,34 @@ module PublicSuffix
71
61
  # This method will detect the rule type and create an instance
72
62
  # from the proper rule class.
73
63
  #
74
- # == Rule Usage
64
+ # ## Rule Usage
75
65
  #
76
- # A rule describes the composition of a domain name
77
- # and explains how to tokenize the domain name
78
- # into tld, sld and trd.
66
+ # A rule describes the composition of a domain name and explains how to tokenize
67
+ # the name into tld, sld and trd.
79
68
  #
80
- # To use a rule, you first need to be sure the domain you want to tokenize
69
+ # To use a rule, you first need to be sure the name you want to tokenize
81
70
  # can be handled by the current rule.
82
71
  # You can use the <tt>#match?</tt> method.
83
72
  #
84
73
  # rule = PublicSuffix::Rule.factory("com")
85
- #
74
+ #
86
75
  # rule.match?("google.com")
87
76
  # # => true
88
- #
77
+ #
89
78
  # rule.match?("google.com")
90
79
  # # => false
91
80
  #
92
- # Rule order is significant. A domain can match more than one rule.
81
+ # Rule order is significant. A name can match more than one rule.
93
82
  # See the {Public Suffix Documentation}[http://publicsuffix.org/format/]
94
83
  # to learn more about rule priority.
95
84
  #
96
85
  # When you have the right rule, you can use it to tokenize the domain name.
97
- #
86
+ #
98
87
  # rule = PublicSuffix::Rule.factory("com")
99
- #
88
+ #
100
89
  # rule.decompose("google.com")
101
90
  # # => ["google", "com"]
102
- #
91
+ #
103
92
  # rule.decompose("www.google.com")
104
93
  # # => ["www.google", "com"]
105
94
  #
@@ -107,145 +96,107 @@ module PublicSuffix
107
96
  #
108
97
  class Base
109
98
 
110
- attr_reader :name, :value, :labels
99
+ # @return [String] the rule definition
100
+ attr_reader :value
111
101
 
112
- # Initializes a new rule with name and value.
113
- # If value is +nil+, name also becomes the value for this rule.
114
- #
115
- # @param [String] name
116
- # The name of the rule
117
- # @param [String] value
118
- # The value of the rule. If nil, defaults to +name+.
119
- #
120
- def initialize(name, value = nil)
121
- @name = name.to_s
122
- @value = value || @name
123
- @labels = Domain.domain_to_labels(@value)
124
- end
102
+ # @return [Boolean] true if the rule is a private domain
103
+ attr_reader :private
125
104
 
126
- #
127
- # The rule type name.
128
- #
129
- # @return [Symbol]
130
- #
131
- def self.type
132
- @type ||= self.name.split("::").last.downcase.to_sym
133
- end
134
105
 
106
+ # Initializes a new rule with name and value.
107
+ # If value is +nil+, name also becomes the value for this rule.
135
108
  #
136
- # @see {type}
137
- #
138
- def type
139
- self.class.type
109
+ # @param value [String] the value of the rule
110
+ def initialize(value, private: false)
111
+ @value = value.to_s
112
+ @private = private
140
113
  end
141
114
 
142
115
  # Checks whether this rule is equal to <tt>other</tt>.
143
116
  #
144
- # @param [PublicSuffix::Rule::*] other
145
- # The rule to compare.
146
- #
117
+ # @param [PublicSuffix::Rule::*] other The rule to compare
147
118
  # @return [Boolean]
148
119
  # Returns true if this rule and other are instances of the same class
149
120
  # and has the same value, false otherwise.
150
121
  def ==(other)
151
- return false unless other.is_a?(self.class)
152
- self.equal?(other) ||
153
- self.name == other.name
122
+ equal?(other) || (self.class == other.class && value == other.value)
154
123
  end
155
- alias :eql? :==
124
+ alias eql? ==
156
125
 
157
- # Checks if this rule matches +domain+.
126
+ # Checks if this rule matches +name+.
158
127
  #
159
- # @param [String, #to_s] domain
160
- # The domain name to check.
128
+ # A domain name is said to match a rule if and only if
129
+ # all of the following conditions are met:
161
130
  #
162
- # @return [Boolean]
131
+ # - When the domain and rule are split into corresponding labels,
132
+ # that the domain contains as many or more labels than the rule.
133
+ # - Beginning with the right-most labels of both the domain and the rule,
134
+ # and continuing for all labels in the rule, one finds that for every pair,
135
+ # either they are identical, or that the label from the rule is "*".
136
+ #
137
+ # @see https://publicsuffix.org/list/
163
138
  #
164
139
  # @example
165
- # rule = Rule.factory("com")
166
- # # #<PublicSuffix::Rule::Normal>
167
- # rule.match?("example.com")
140
+ # Rule.factory("com").match?("example.com")
168
141
  # # => true
169
- # rule.match?("example.net")
142
+ # Rule.factory("com").match?("example.net")
170
143
  # # => false
171
144
  #
172
- def match?(domain)
173
- l1 = labels
174
- l2 = Domain.domain_to_labels(domain)
175
- odiff(l1, l2).empty?
176
- end
177
-
178
- # Checks if this rule allows +domain+.
179
- #
180
- # @param [String, #to_s] domain
181
- # The domain name to check.
182
- #
145
+ # @param name [String, #to_s] The domain name to check.
183
146
  # @return [Boolean]
184
- #
185
- # @example
186
- # rule = Rule.factory("*.do")
187
- # # => #<PublicSuffix::Rule::Wildcard>
188
- # rule.allow?("example.do")
189
- # # => false
190
- # rule.allow?("www.example.do")
191
- # # => true
192
- #
193
- def allow?(domain)
194
- !decompose(domain).last.nil?
195
- end
196
-
197
- # Gets the length of this rule for comparison.
198
- # The length usually matches the number of rule +parts+.
199
- #
200
- # Subclasses might actually override this method.
201
- #
202
- # @return [Integer] The number of parts.
203
- def length
204
- parts.length
147
+ def match?(name)
148
+ # Note: it works because of the assumption there are no
149
+ # rules like foo.*.com. If the assumption is incorrect,
150
+ # we need to properly walk the input and skip parts according
151
+ # to wildcard component.
152
+ diff = name.chomp(value)
153
+ diff.empty? || diff[-1] == "."
205
154
  end
206
155
 
207
- #
208
- # @raise [NotImplementedError]
209
156
  # @abstract
210
157
  def parts
211
- raise(NotImplementedError,"#{self.class}##{__method__} is not implemented")
158
+ raise NotImplementedError
212
159
  end
213
160
 
214
- #
215
- # @param [String, #to_s] domain
216
- # The domain name to decompose.
217
- #
218
- # @return [Array<String, nil>]
219
- #
220
- # @raise [NotImplementedError]
221
161
  # @abstract
222
- def decompose(domain)
223
- raise(NotImplementedError,"#{self.class}##{__method__} is not implemented")
162
+ def length
163
+ raise NotImplementedError
224
164
  end
225
165
 
226
- private
227
-
228
- def odiff(one, two)
229
- ii = 0
230
-
231
- while(ii < one.size && one[ii] == two[ii])
232
- ii += 1
233
- end
234
-
235
- one[ii..one.length]
166
+ # @abstract
167
+ # @param [String, #to_s] name The domain name to decompose
168
+ # @return [Array<String, nil>]
169
+ def decompose(*)
170
+ raise NotImplementedError
236
171
  end
237
172
 
238
173
  end
239
174
 
175
+ # Normal represents a standard rule (e.g. com).
240
176
  class Normal < Base
241
177
 
242
- # Initializes a new rule with +name+.
178
+ # Initializes a new rule from +definition+.
179
+ #
180
+ # @param definition [String] the rule as defined in the PSL
181
+ def initialize(definition, **options)
182
+ super(definition, **options)
183
+ end
184
+
185
+ # Gets the original rule definition.
243
186
  #
244
- # @param [String] name
245
- # The name of this rule.
187
+ # @return [String] The rule definition.
188
+ def rule
189
+ value
190
+ end
191
+
192
+ # Decomposes the domain name according to rule properties.
246
193
  #
247
- def initialize(name)
248
- super(name, name)
194
+ # @param [String, #to_s] name The domain name to decompose
195
+ # @return [Array<String>] The array with [trd + sld, tld].
196
+ def decompose(domain)
197
+ suffix = parts.join('\.')
198
+ matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
199
+ matches ? matches[1..2] : [nil, nil]
249
200
  end
250
201
 
251
202
  # dot-split rule value and returns all rule parts
@@ -253,74 +204,96 @@ module PublicSuffix
253
204
  #
254
205
  # @return [Array<String>]
255
206
  def parts
256
- @parts ||= @value.split(".")
207
+ @value.split(DOT)
257
208
  end
258
209
 
259
- # Decomposes the domain according to rule properties.
260
- #
261
- # @param [String, #to_s] domain
262
- # The domain name to decompose.
210
+ # Gets the length of this rule for comparison,
211
+ # represented by the number of dot-separated parts in the rule.
263
212
  #
264
- # @return [Array<String>]
265
- # The array with [trd + sld, tld].
266
- #
267
- def decompose(domain)
268
- domain.to_s.chomp(".") =~ /^(.*)\.(#{parts.join('\.')})$/
269
- [$1, $2]
213
+ # @return [Integer] The length of the rule.
214
+ def length
215
+ @length ||= parts.length
270
216
  end
271
217
 
272
218
  end
273
219
 
220
+ # Wildcard represents a wildcard rule (e.g. *.co.uk).
274
221
  class Wildcard < Base
275
222
 
276
- # Initializes a new rule with +name+.
223
+ # Initializes a new rule from +definition+.
277
224
  #
278
- # @param [String] name
279
- # The name of this rule.
225
+ # The wildcard "*" is removed from the value, as it's common
226
+ # for each wildcard rule.
280
227
  #
281
- def initialize(name)
282
- super(name, name.to_s[2..-1])
228
+ # @param definition [String] the rule as defined in the PSL
229
+ def initialize(definition, **options)
230
+ super(definition.to_s[2..-1], **options)
283
231
  end
284
232
 
285
- # dot-split rule value and returns all rule parts
286
- # in the order they appear in the value.
233
+ # Gets the original rule definition.
287
234
  #
288
- # @return [Array<String>]
289
- def parts
290
- @parts ||= @value.split(".")
235
+ # @return [String] The rule definition.
236
+ def rule
237
+ value == "" ? STAR : STAR + DOT + value
291
238
  end
292
239
 
293
- # Overwrites the default implementation to cope with
294
- # the +*+ char.
240
+ # Decomposes the domain name according to rule properties.
295
241
  #
296
- # @return [Integer] The number of parts.
297
- def length
298
- parts.length + 1 # * counts as 1
242
+ # @param [String, #to_s] name The domain name to decompose
243
+ # @return [Array<String>] The array with [trd + sld, tld].
244
+ def decompose(domain)
245
+ suffix = ([".*?"] + parts).join('\.')
246
+ matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
247
+ matches ? matches[1..2] : [nil, nil]
299
248
  end
300
249
 
301
- # Decomposes the domain according to rule properties.
302
- #
303
- # @param [String, #to_s] domain
304
- # The domain name to decompose.
250
+ # dot-split rule value and returns all rule parts
251
+ # in the order they appear in the value.
305
252
  #
306
253
  # @return [Array<String>]
307
- # The array with [trd + sld, tld].
254
+ def parts
255
+ @value.split(DOT)
256
+ end
257
+
258
+ # Gets the length of this rule for comparison,
259
+ # represented by the number of dot-separated parts in the rule
260
+ # plus 1 for the *.
308
261
  #
309
- def decompose(domain)
310
- domain.to_s.chomp(".") =~ /^(.*)\.(.*?\.#{parts.join('\.')})$/
311
- [$1, $2]
262
+ # @return [Integer] The length of the rule.
263
+ def length
264
+ @length ||= parts.length + 1 # * counts as 1
312
265
  end
313
266
 
314
267
  end
315
268
 
269
+ # Exception represents an exception rule (e.g. !parliament.uk).
316
270
  class Exception < Base
317
271
 
318
- # Initializes a new rule with +name+.
272
+ # Initializes a new rule from +definition+.
273
+ #
274
+ # The bang ! is removed from the value, as it's common
275
+ # for each wildcard rule.
276
+ #
277
+ # @param definition [String] the rule as defined in the PSL
278
+ def initialize(definition, **options)
279
+ super(definition.to_s[1..-1], **options)
280
+ end
281
+
282
+ # Gets the original rule definition.
319
283
  #
320
- # @param [String] name The name of this rule.
284
+ # @return [String] The rule definition.
285
+ def rule
286
+ BANG + value
287
+ end
288
+
289
+ # Decomposes the domain name according to rule properties.
321
290
  #
322
- def initialize(name)
323
- super(name, name.to_s[1..-1])
291
+ # @param [String, #to_s] name The domain name to decompose
292
+ # @return [Array<String>] The array with [trd + sld, tld].
293
+ def decompose(domain)
294
+ suffix = parts.join('\.')
295
+ matches = domain.to_s.match(/^(.*)\.(#{suffix})$/)
296
+ matches ? matches[1..2] : [nil, nil]
324
297
  end
325
298
 
326
299
  # dot-split rule value and returns all rule parts
@@ -329,42 +302,28 @@ module PublicSuffix
329
302
  #
330
303
  # See http://publicsuffix.org/format/:
331
304
  # If the prevailing rule is a exception rule,
332
- # modify it by removing the leftmost label.
305
+ # modify it by removing the leftmost label.
333
306
  #
334
307
  # @return [Array<String>]
335
308
  def parts
336
- @parts ||= @value.split(".")[1..-1]
309
+ @value.split(DOT)[1..-1]
337
310
  end
338
311
 
339
- # Decomposes the domain according to rule properties.
340
- #
341
- # @param [String, #to_s] domain
342
- # The domain name to decompose.
343
- #
344
- # @return [Array<String>]
345
- # The array with [trd + sld, tld].
312
+ # Gets the length of this rule for comparison,
313
+ # represented by the number of dot-separated parts in the rule.
346
314
  #
347
- def decompose(domain)
348
- domain.to_s.chomp(".") =~ /^(.*)\.(#{parts.join('\.')})$/
349
- [$1, $2]
315
+ # @return [Integer] The length of the rule.
316
+ def length
317
+ @length ||= parts.length
350
318
  end
351
319
 
352
320
  end
353
321
 
354
- RULES = {
355
- '*' => Wildcard,
356
- '!' => Exception
357
- }
358
- RULES.default = Normal
359
322
 
360
323
  # Takes the +name+ of the rule, detects the specific rule class
361
324
  # and creates a new instance of that class.
362
325
  # The +name+ becomes the rule +value+.
363
326
  #
364
- # @param [String] name The rule definition.
365
- #
366
- # @return [PublicSuffix::Rule::*] A rule instance.
367
- #
368
327
  # @example Creates a Normal rule
369
328
  # PublicSuffix::Rule.factory("ar")
370
329
  # # => #<PublicSuffix::Rule::Normal>
@@ -377,8 +336,28 @@ module PublicSuffix
377
336
  # PublicSuffix::Rule.factory("!congresodelalengua3.ar")
378
337
  # # => #<PublicSuffix::Rule::Exception>
379
338
  #
380
- def self.factory(name)
381
- RULES[name.to_s[0,1]].new(name)
339
+ # @param [String] content The rule content.
340
+ # @return [PublicSuffix::Rule::*] A rule instance.
341
+ def self.factory(content, **options)
342
+ case content.to_s[0, 1]
343
+ when STAR
344
+ Wildcard
345
+ when BANG
346
+ Exception
347
+ else
348
+ Normal
349
+ end.new(content, **options)
350
+ end
351
+
352
+ # The default rule to use if no rule match.
353
+ #
354
+ # The default rule is "*". From https://publicsuffix.org/list/:
355
+ #
356
+ # > If no rules match, the prevailing rule is "*".
357
+ #
358
+ # @return [PublicSuffix::Rule::Wildcard] The default rule.
359
+ def self.default
360
+ factory(STAR)
382
361
  end
383
362
 
384
363
  end