public_suffix 1.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/.gitignore +4 -0
- data/.travis.yml +11 -0
- data/.yardopts +2 -0
- data/CHANGELOG.md +134 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +22 -0
- data/LICENSE +22 -0
- data/README.md +151 -0
- data/Rakefile +109 -0
- data/lib/public_suffix.rb +134 -0
- data/lib/public_suffix/definitions.txt +5190 -0
- data/lib/public_suffix/domain.rb +387 -0
- data/lib/public_suffix/errors.rb +57 -0
- data/lib/public_suffix/list.rb +283 -0
- data/lib/public_suffix/rule.rb +373 -0
- data/lib/public_suffix/rule_list.rb +14 -0
- data/lib/public_suffix/version.rb +23 -0
- data/public_suffix.gemspec +37 -0
- data/test/acceptance_test.rb +36 -0
- data/test/test_helper.rb +6 -0
- data/test/unit/domain_test.rb +170 -0
- data/test/unit/errors_test.rb +23 -0
- data/test/unit/list_test.rb +193 -0
- data/test/unit/public_suffix_test.rb +85 -0
- data/test/unit/rule_test.rb +307 -0
- metadata +111 -0
@@ -0,0 +1,387 @@
|
|
1
|
+
#--
|
2
|
+
# Public Suffix
|
3
|
+
#
|
4
|
+
# Domain name parser based on the Public Suffix List.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
|
7
|
+
#++
|
8
|
+
|
9
|
+
|
10
|
+
module PublicSuffix
|
11
|
+
|
12
|
+
class Domain
|
13
|
+
|
14
|
+
# Splits a string into its possible labels
|
15
|
+
# as a domain in reverse order from the input string.
|
16
|
+
#
|
17
|
+
# The input is not validated, but it is assumed to be a valid domain.
|
18
|
+
#
|
19
|
+
# @param [String, #to_s] domain
|
20
|
+
# The domain name to split.
|
21
|
+
#
|
22
|
+
# @return [Array<String>]
|
23
|
+
#
|
24
|
+
# @example
|
25
|
+
#
|
26
|
+
# domain_to_labels('google.com')
|
27
|
+
# # => ['com', 'google']
|
28
|
+
#
|
29
|
+
# domain_to_labels('google.co.uk')
|
30
|
+
# # => ['uk', 'co', 'google']
|
31
|
+
#
|
32
|
+
def self.domain_to_labels(domain)
|
33
|
+
domain.to_s.split(".").reverse
|
34
|
+
end
|
35
|
+
|
36
|
+
# Creates and returns a new {PublicSuffix::Domain} instance.
|
37
|
+
#
|
38
|
+
# @overload initialize(tld)
|
39
|
+
# Initializes with a +tld+.
|
40
|
+
# @param [String] tld The TLD (extension)
|
41
|
+
# @overload initialize(tld, sld)
|
42
|
+
# Initializes with a +tld+ and +sld+.
|
43
|
+
# @param [String] tld The TLD (extension)
|
44
|
+
# @param [String] sld The TRD (domain)
|
45
|
+
# @overload initialize(tld, sld, trd)
|
46
|
+
# Initializes with a +tld+, +sld+ and +trd+.
|
47
|
+
# @param [String] tld The TLD (extension)
|
48
|
+
# @param [String] sld The SLD (domain)
|
49
|
+
# @param [String] tld The TRD (subdomain)
|
50
|
+
#
|
51
|
+
# @yield [self] Yields on self.
|
52
|
+
# @yieldparam [PublicSuffix::Domain] self The newly creates instance
|
53
|
+
#
|
54
|
+
# @example Initialize with a TLD
|
55
|
+
# PublicSuffix::Domain.new("com")
|
56
|
+
# # => #<PublicSuffix::Domain @tld="com">
|
57
|
+
#
|
58
|
+
# @example Initialize with a TLD and SLD
|
59
|
+
# PublicSuffix::Domain.new("com", "example")
|
60
|
+
# # => #<PublicSuffix::Domain @tld="com", @trd=nil>
|
61
|
+
#
|
62
|
+
# @example Initialize with a TLD, SLD and TRD
|
63
|
+
# PublicSuffix::Domain.new("com", "example", "wwww")
|
64
|
+
# # => #<PublicSuffix::Domain @tld="com", @trd=nil, @sld="example">
|
65
|
+
#
|
66
|
+
def initialize(*args, &block)
|
67
|
+
@tld, @sld, @trd = args
|
68
|
+
yield(self) if block_given?
|
69
|
+
end
|
70
|
+
|
71
|
+
# Returns a string representation of this object.
|
72
|
+
#
|
73
|
+
# @return [String]
|
74
|
+
def to_s
|
75
|
+
name
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns an array containing the domain parts.
|
79
|
+
#
|
80
|
+
# @return [Array<String, nil>]
|
81
|
+
#
|
82
|
+
# @example
|
83
|
+
#
|
84
|
+
# PublicSuffix::Domain.new("google.com").to_a
|
85
|
+
# # => [nil, "google", "com"]
|
86
|
+
#
|
87
|
+
# PublicSuffix::Domain.new("www.google.com").to_a
|
88
|
+
# # => [nil, "google", "com"]
|
89
|
+
#
|
90
|
+
def to_a
|
91
|
+
[trd, sld, tld]
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
# Returns the Top Level Domain part, aka the extension.
|
96
|
+
#
|
97
|
+
# @return [String, nil]
|
98
|
+
def tld
|
99
|
+
@tld
|
100
|
+
end
|
101
|
+
|
102
|
+
# Returns the Second Level Domain part, aka the domain part.
|
103
|
+
#
|
104
|
+
# @return [String, nil]
|
105
|
+
def sld
|
106
|
+
@sld
|
107
|
+
end
|
108
|
+
|
109
|
+
# Returns the Third Level Domain part, aka the subdomain part.
|
110
|
+
#
|
111
|
+
# @return [String, nil]
|
112
|
+
def trd
|
113
|
+
@trd
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
# Returns the full domain name.
|
118
|
+
#
|
119
|
+
# @return [String]
|
120
|
+
#
|
121
|
+
# @example Gets the domain name of a domain
|
122
|
+
# PublicSuffix::Domain.new("com", "google").name
|
123
|
+
# # => "google.com"
|
124
|
+
#
|
125
|
+
# @example Gets the domain name of a subdomain
|
126
|
+
# PublicSuffix::Domain.new("com", "google", "www").name
|
127
|
+
# # => "www.google.com"
|
128
|
+
#
|
129
|
+
def name
|
130
|
+
[trd, sld, tld].reject { |part| part.nil? }.join(".")
|
131
|
+
end
|
132
|
+
|
133
|
+
# Returns a domain-like representation of this object
|
134
|
+
# if the object is a {#domain?}, <tt>nil</tt> otherwise.
|
135
|
+
#
|
136
|
+
# PublicSuffix::Domain.new("com").domain
|
137
|
+
# # => nil
|
138
|
+
#
|
139
|
+
# PublicSuffix::Domain.new("com", "google").domain
|
140
|
+
# # => "google.com"
|
141
|
+
#
|
142
|
+
# PublicSuffix::Domain.new("com", "google", "www").domain
|
143
|
+
# # => "www.google.com"
|
144
|
+
#
|
145
|
+
# This method doesn't validate the input. It handles the domain
|
146
|
+
# as a valid domain name and simply applies the necessary transformations.
|
147
|
+
#
|
148
|
+
# # This is an invalid domain
|
149
|
+
# PublicSuffix::Domain.new("zip", "google").domain
|
150
|
+
# # => "google.zip"
|
151
|
+
#
|
152
|
+
# This method returns a FQD, not just the domain part.
|
153
|
+
# To get the domain part, use <tt>#sld</tt> (aka second level domain).
|
154
|
+
#
|
155
|
+
# PublicSuffix::Domain.new("com", "google", "www").domain
|
156
|
+
# # => "google.com"
|
157
|
+
#
|
158
|
+
# PublicSuffix::Domain.new("com", "google", "www").sld
|
159
|
+
# # => "google"
|
160
|
+
#
|
161
|
+
# @return [String]
|
162
|
+
#
|
163
|
+
# @see #domain?
|
164
|
+
# @see #subdomain
|
165
|
+
#
|
166
|
+
def domain
|
167
|
+
return unless domain?
|
168
|
+
[sld, tld].join(".")
|
169
|
+
end
|
170
|
+
|
171
|
+
# Returns a domain-like representation of this object
|
172
|
+
# if the object is a {#subdomain?}, <tt>nil</tt> otherwise.
|
173
|
+
#
|
174
|
+
# PublicSuffix::Domain.new("com").subdomain
|
175
|
+
# # => nil
|
176
|
+
#
|
177
|
+
# PublicSuffix::Domain.new("com", "google").subdomain
|
178
|
+
# # => nil
|
179
|
+
#
|
180
|
+
# PublicSuffix::Domain.new("com", "google", "www").subdomain
|
181
|
+
# # => "www.google.com"
|
182
|
+
#
|
183
|
+
# This method doesn't validate the input. It handles the domain
|
184
|
+
# as a valid domain name and simply applies the necessary transformations.
|
185
|
+
#
|
186
|
+
# # This is an invalid domain
|
187
|
+
# PublicSuffix::Domain.new("zip", "google", "www").subdomain
|
188
|
+
# # => "www.google.zip"
|
189
|
+
#
|
190
|
+
# This method returns a FQD, not just the domain part.
|
191
|
+
# To get the domain part, use <tt>#tld</tt> (aka third level domain).
|
192
|
+
#
|
193
|
+
# PublicSuffix::Domain.new("com", "google", "www").subdomain
|
194
|
+
# # => "www.google.com"
|
195
|
+
#
|
196
|
+
# PublicSuffix::Domain.new("com", "google", "www").trd
|
197
|
+
# # => "www"
|
198
|
+
#
|
199
|
+
# @return [String]
|
200
|
+
#
|
201
|
+
# @see #subdomain?
|
202
|
+
# @see #domain
|
203
|
+
#
|
204
|
+
def subdomain
|
205
|
+
return unless subdomain?
|
206
|
+
[trd, sld, tld].join(".")
|
207
|
+
end
|
208
|
+
|
209
|
+
# Returns the rule matching this domain
|
210
|
+
# in the default {PublicSuffix::List}.
|
211
|
+
#
|
212
|
+
# @return [PublicSuffix::Rule::Base, nil]
|
213
|
+
# The rule instance a rule matches current domain,
|
214
|
+
# nil if no rule is found.
|
215
|
+
def rule
|
216
|
+
List.default.find(name)
|
217
|
+
end
|
218
|
+
|
219
|
+
|
220
|
+
# Checks whether <tt>self</tt> looks like a domain.
|
221
|
+
#
|
222
|
+
# This method doesn't actually validate the domain.
|
223
|
+
# It only checks whether the instance contains
|
224
|
+
# a value for the {#tld} and {#sld} attributes.
|
225
|
+
# If you also want to validate the domain,
|
226
|
+
# use {#valid_domain?} instead.
|
227
|
+
#
|
228
|
+
# @return [Boolean]
|
229
|
+
#
|
230
|
+
# @example
|
231
|
+
#
|
232
|
+
# PublicSuffix::Domain.new("com").domain?
|
233
|
+
# # => false
|
234
|
+
#
|
235
|
+
# PublicSuffix::Domain.new("com", "google").domain?
|
236
|
+
# # => true
|
237
|
+
#
|
238
|
+
# PublicSuffix::Domain.new("com", "google", "www").domain?
|
239
|
+
# # => true
|
240
|
+
#
|
241
|
+
# # This is an invalid domain, but returns true
|
242
|
+
# # because this method doesn't validate the content.
|
243
|
+
# PublicSuffix::Domain.new("zip", "google").domain?
|
244
|
+
# # => true
|
245
|
+
#
|
246
|
+
# @see #subdomain?
|
247
|
+
#
|
248
|
+
def domain?
|
249
|
+
!(tld.nil? || sld.nil?)
|
250
|
+
end
|
251
|
+
|
252
|
+
# Checks whether <tt>self</tt> looks like a subdomain.
|
253
|
+
#
|
254
|
+
# This method doesn't actually validate the subdomain.
|
255
|
+
# It only checks whether the instance contains
|
256
|
+
# a value for the {#tld}, {#sld} and {#trd} attributes.
|
257
|
+
# If you also want to validate the domain,
|
258
|
+
# use {#valid_subdomain?} instead.
|
259
|
+
#
|
260
|
+
# @return [Boolean]
|
261
|
+
#
|
262
|
+
# @example
|
263
|
+
#
|
264
|
+
# PublicSuffix::Domain.new("com").subdomain?
|
265
|
+
# # => false
|
266
|
+
#
|
267
|
+
# PublicSuffix::Domain.new("com", "google").subdomain?
|
268
|
+
# # => false
|
269
|
+
#
|
270
|
+
# PublicSuffix::Domain.new("com", "google", "www").subdomain?
|
271
|
+
# # => true
|
272
|
+
#
|
273
|
+
# # This is an invalid domain, but returns true
|
274
|
+
# # because this method doesn't validate the content.
|
275
|
+
# PublicSuffix::Domain.new("zip", "google", "www").subdomain?
|
276
|
+
# # => true
|
277
|
+
#
|
278
|
+
# @see #domain?
|
279
|
+
#
|
280
|
+
def subdomain?
|
281
|
+
!(tld.nil? || sld.nil? || trd.nil?)
|
282
|
+
end
|
283
|
+
|
284
|
+
# Checks whether <tt>self</tt> is exclusively a domain,
|
285
|
+
# and not a subdomain.
|
286
|
+
#
|
287
|
+
# @return [Boolean]
|
288
|
+
def is_a_domain?
|
289
|
+
domain? && !subdomain?
|
290
|
+
end
|
291
|
+
|
292
|
+
# Checks whether <tt>self</tt> is exclusively a subdomain.
|
293
|
+
#
|
294
|
+
# @return [Boolean]
|
295
|
+
def is_a_subdomain?
|
296
|
+
subdomain?
|
297
|
+
end
|
298
|
+
|
299
|
+
# Checks whether <tt>self</tt> is assigned and allowed
|
300
|
+
# according to default {List}.
|
301
|
+
#
|
302
|
+
# This method triggers a new rule lookup in the default {List},
|
303
|
+
# which is a quite intensive task.
|
304
|
+
#
|
305
|
+
# @return [Boolean]
|
306
|
+
#
|
307
|
+
# @example Check a valid domain
|
308
|
+
# Domain.new("com", "example").valid?
|
309
|
+
# # => true
|
310
|
+
#
|
311
|
+
# @example Check a valid subdomain
|
312
|
+
# Domain.new("com", "example", "www").valid?
|
313
|
+
# # => true
|
314
|
+
#
|
315
|
+
# @example Check a not-assigned domain
|
316
|
+
# Domain.new("zip", "example").valid?
|
317
|
+
# # => false
|
318
|
+
#
|
319
|
+
# @example Check a not-allowed domain
|
320
|
+
# Domain.new("do", "example").valid?
|
321
|
+
# # => false
|
322
|
+
# Domain.new("do", "example", "www").valid?
|
323
|
+
# # => true
|
324
|
+
#
|
325
|
+
def valid?
|
326
|
+
r = rule
|
327
|
+
!r.nil? && r.allow?(name)
|
328
|
+
end
|
329
|
+
|
330
|
+
|
331
|
+
# Checks whether <tt>self</tt> looks like a domain and validates
|
332
|
+
# according to default {List}.
|
333
|
+
#
|
334
|
+
# @return [Boolean]
|
335
|
+
#
|
336
|
+
# @example
|
337
|
+
#
|
338
|
+
# PublicSuffix::Domain.new("com").domain?
|
339
|
+
# # => false
|
340
|
+
#
|
341
|
+
# PublicSuffix::Domain.new("com", "google").domain?
|
342
|
+
# # => true
|
343
|
+
#
|
344
|
+
# PublicSuffix::Domain.new("com", "google", "www").domain?
|
345
|
+
# # => true
|
346
|
+
#
|
347
|
+
# # This is an invalid domain
|
348
|
+
# PublicSuffix::Domain.new("zip", "google").false?
|
349
|
+
# # => true
|
350
|
+
#
|
351
|
+
# @see #domain?
|
352
|
+
# @see #valid?
|
353
|
+
#
|
354
|
+
def valid_domain?
|
355
|
+
domain? && valid?
|
356
|
+
end
|
357
|
+
|
358
|
+
# Checks whether <tt>self</tt> looks like a subdomain and validates
|
359
|
+
# according to default {List}.
|
360
|
+
#
|
361
|
+
# @return [Boolean]
|
362
|
+
#
|
363
|
+
# @example
|
364
|
+
#
|
365
|
+
# PublicSuffix::Domain.new("com").subdomain?
|
366
|
+
# # => false
|
367
|
+
#
|
368
|
+
# PublicSuffix::Domain.new("com", "google").subdomain?
|
369
|
+
# # => false
|
370
|
+
#
|
371
|
+
# PublicSuffix::Domain.new("com", "google", "www").subdomain?
|
372
|
+
# # => true
|
373
|
+
#
|
374
|
+
# # This is an invalid domain
|
375
|
+
# PublicSuffix::Domain.new("zip", "google", "www").subdomain?
|
376
|
+
# # => false
|
377
|
+
#
|
378
|
+
# @see #subdomain?
|
379
|
+
# @see #valid?
|
380
|
+
#
|
381
|
+
def valid_subdomain?
|
382
|
+
subdomain? && valid?
|
383
|
+
end
|
384
|
+
|
385
|
+
end
|
386
|
+
|
387
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#--
|
2
|
+
# Public Suffix
|
3
|
+
#
|
4
|
+
# Domain name parser based on the Public Suffix List.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
|
7
|
+
#++
|
8
|
+
|
9
|
+
|
10
|
+
module PublicSuffix
|
11
|
+
|
12
|
+
class Error < StandardError
|
13
|
+
end
|
14
|
+
|
15
|
+
# Raised when trying to parse an invalid domain.
|
16
|
+
# A domain is considered invalid when no rule is found
|
17
|
+
# in the definition list.
|
18
|
+
#
|
19
|
+
# @example
|
20
|
+
#
|
21
|
+
# PublicSuffix.parse("nic.test")
|
22
|
+
# # => PublicSuffix::DomainInvalid
|
23
|
+
#
|
24
|
+
# PublicSuffix.parse("http://www.nic.it")
|
25
|
+
# # => PublicSuffix::DomainInvalid
|
26
|
+
#
|
27
|
+
# @since 0.6.0
|
28
|
+
#
|
29
|
+
class DomainInvalid < Error
|
30
|
+
end
|
31
|
+
|
32
|
+
# Raised when trying to parse a domain
|
33
|
+
# which is formally defined by a rule,
|
34
|
+
# but the rules set a requirement which is not satisfied
|
35
|
+
# by the input you are trying to parse.
|
36
|
+
#
|
37
|
+
# @example
|
38
|
+
#
|
39
|
+
# PublicSuffix.parse("nic.do")
|
40
|
+
# # => PublicSuffix::DomainNotAllowed
|
41
|
+
#
|
42
|
+
# PublicSuffix.parse("www.nic.do")
|
43
|
+
# # => PublicSuffix::Domain
|
44
|
+
#
|
45
|
+
# @since 0.6.0
|
46
|
+
#
|
47
|
+
class DomainNotAllowed < DomainInvalid
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
# Backward Compatibility
|
52
|
+
#
|
53
|
+
# @deprecated Use {PublicSuffix::DomainInvalid}.
|
54
|
+
#
|
55
|
+
InvalidDomain = DomainInvalid
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,283 @@
|
|
1
|
+
#--
|
2
|
+
# Public Suffix
|
3
|
+
#
|
4
|
+
# Domain name parser based on the Public Suffix List.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
|
7
|
+
#++
|
8
|
+
|
9
|
+
|
10
|
+
module PublicSuffix
|
11
|
+
|
12
|
+
# A {PublicSuffix::List} is a collection of one
|
13
|
+
# or more {PublicSuffix::Rule}.
|
14
|
+
#
|
15
|
+
# Given a {PublicSuffix::List},
|
16
|
+
# you can add or remove {PublicSuffix::Rule},
|
17
|
+
# iterate all items in the list or search for the first rule
|
18
|
+
# which matches a specific domain name.
|
19
|
+
#
|
20
|
+
# # Create a new list
|
21
|
+
# list = PublicSuffix::List.new
|
22
|
+
#
|
23
|
+
# # Push two rules to the list
|
24
|
+
# list << PublicSuffix::Rule.factory("it")
|
25
|
+
# list << PublicSuffix::Rule.factory("com")
|
26
|
+
#
|
27
|
+
# # Get the size of the list
|
28
|
+
# list.size
|
29
|
+
# # => 2
|
30
|
+
#
|
31
|
+
# # Search for the rule matching given domain
|
32
|
+
# list.find("example.com")
|
33
|
+
# # => #<PublicSuffix::Rule::Normal>
|
34
|
+
# list.find("example.org")
|
35
|
+
# # => nil
|
36
|
+
#
|
37
|
+
# You can create as many {PublicSuffix::List} you want.
|
38
|
+
# The {PublicSuffix::List.default} rule list is used
|
39
|
+
# to tokenize and validate a domain.
|
40
|
+
#
|
41
|
+
# {PublicSuffix::List} implements +Enumerable+ module.
|
42
|
+
#
|
43
|
+
class List
|
44
|
+
include Enumerable
|
45
|
+
|
46
|
+
# Gets the array of rules.
|
47
|
+
#
|
48
|
+
# @return [Array<PublicSuffix::Rule::*>]
|
49
|
+
attr_reader :rules
|
50
|
+
|
51
|
+
# Gets the naive index, a hash that with the keys being the first label of
|
52
|
+
# every rule pointing to an array of integers (indexes of the rules in @rules).
|
53
|
+
#
|
54
|
+
# @return [Array]
|
55
|
+
attr_reader :indexes
|
56
|
+
|
57
|
+
|
58
|
+
# Initializes an empty {PublicSuffix::List}.
|
59
|
+
#
|
60
|
+
# @yield [self] Yields on self.
|
61
|
+
# @yieldparam [PublicSuffix::List] self The newly created instance.
|
62
|
+
#
|
63
|
+
def initialize(&block)
|
64
|
+
@rules = []
|
65
|
+
@indexes = {}
|
66
|
+
yield(self) if block_given?
|
67
|
+
create_index!
|
68
|
+
end
|
69
|
+
|
70
|
+
# Creates a naive index for +@rules+. Just a hash that will tell
|
71
|
+
# us where the elements of +@rules+ are relative to its first
|
72
|
+
# {PublicSuffix::Rule::Base#labels} element.
|
73
|
+
#
|
74
|
+
# For instance if @rules[5] and @rules[4] are the only elements of the list
|
75
|
+
# where Rule#labels.first is 'us' @indexes['us'] #=> [5,4], that way in
|
76
|
+
# select we can avoid mapping every single rule against the candidate domain.
|
77
|
+
def create_index!
|
78
|
+
@rules.map { |l| l.labels.first }.each_with_index do |elm, inx|
|
79
|
+
if !@indexes.has_key?(elm)
|
80
|
+
@indexes[elm] = [inx]
|
81
|
+
else
|
82
|
+
@indexes[elm] << inx
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Checks whether two lists are equal.
|
88
|
+
#
|
89
|
+
# List <tt>one</tt> is equal to <tt>two</tt>, if <tt>two</tt> is an instance of
|
90
|
+
# {PublicSuffix::List} and each +PublicSuffix::Rule::*+
|
91
|
+
# in list <tt>one</tt> is available in list <tt>two</tt>, in the same order.
|
92
|
+
#
|
93
|
+
# @param [PublicSuffix::List] other
|
94
|
+
# The List to compare.
|
95
|
+
#
|
96
|
+
# @return [Boolean]
|
97
|
+
def ==(other)
|
98
|
+
return false unless other.is_a?(List)
|
99
|
+
self.equal?(other) ||
|
100
|
+
self.rules == other.rules
|
101
|
+
end
|
102
|
+
alias :eql? :==
|
103
|
+
|
104
|
+
# Iterates each rule in the list.
|
105
|
+
def each(*args, &block)
|
106
|
+
@rules.each(*args, &block)
|
107
|
+
end
|
108
|
+
|
109
|
+
# Gets the list as array.
|
110
|
+
#
|
111
|
+
# @return [Array<PublicSuffix::Rule::*>]
|
112
|
+
def to_a
|
113
|
+
@rules
|
114
|
+
end
|
115
|
+
|
116
|
+
# Adds the given object to the list
|
117
|
+
# and optionally refreshes the rule index.
|
118
|
+
#
|
119
|
+
# @param [PublicSuffix::Rule::*] rule
|
120
|
+
# The rule to add to the list.
|
121
|
+
# @param [Boolean] index
|
122
|
+
# Set to true to recreate the rule index
|
123
|
+
# after the rule has been added to the list.
|
124
|
+
#
|
125
|
+
# @return [self]
|
126
|
+
#
|
127
|
+
# @see #create_index!
|
128
|
+
#
|
129
|
+
def add(rule, index = true)
|
130
|
+
@rules << rule
|
131
|
+
create_index! if index == true
|
132
|
+
self
|
133
|
+
end
|
134
|
+
alias << add
|
135
|
+
|
136
|
+
# Gets the number of elements in the list.
|
137
|
+
#
|
138
|
+
# @return [Integer]
|
139
|
+
def size
|
140
|
+
@rules.size
|
141
|
+
end
|
142
|
+
alias length size
|
143
|
+
|
144
|
+
# Checks whether the list is empty.
|
145
|
+
#
|
146
|
+
# @return [Boolean]
|
147
|
+
def empty?
|
148
|
+
@rules.empty?
|
149
|
+
end
|
150
|
+
|
151
|
+
# Removes all elements.
|
152
|
+
#
|
153
|
+
# @return [self]
|
154
|
+
def clear
|
155
|
+
@rules.clear
|
156
|
+
self
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
# Returns the most appropriate rule for domain.
|
161
|
+
#
|
162
|
+
# From the Public Suffix List documentation:
|
163
|
+
#
|
164
|
+
# * If a hostname matches more than one rule in the file,
|
165
|
+
# the longest matching rule (the one with the most levels) will be used.
|
166
|
+
# * An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule.
|
167
|
+
# An exception rule takes priority over any other matching rule.
|
168
|
+
#
|
169
|
+
# == Algorithm description
|
170
|
+
#
|
171
|
+
# * Match domain against all rules and take note of the matching ones.
|
172
|
+
# * If no rules match, the prevailing rule is "*".
|
173
|
+
# * If more than one rule matches, the prevailing rule is the one which is an exception rule.
|
174
|
+
# * If there is no matching exception rule, the prevailing rule is the one with the most labels.
|
175
|
+
# * If the prevailing rule is a exception rule, modify it by removing the leftmost label.
|
176
|
+
# * The public suffix is the set of labels from the domain
|
177
|
+
# which directly match the labels of the prevailing rule (joined by dots).
|
178
|
+
# * The registered domain is the public suffix plus one additional label.
|
179
|
+
#
|
180
|
+
# @param [String, #to_s] domain The domain name.
|
181
|
+
#
|
182
|
+
# @return [PublicSuffix::Rule::*, nil]
|
183
|
+
def find(domain)
|
184
|
+
rules = select(domain)
|
185
|
+
rules.select { |r| r.type == :exception }.first ||
|
186
|
+
rules.inject { |t,r| t.length > r.length ? t : r }
|
187
|
+
end
|
188
|
+
|
189
|
+
# Selects all the rules matching given domain.
|
190
|
+
#
|
191
|
+
# Will use +@indexes+ to try only the rules that share the same first label,
|
192
|
+
# that will speed up things when using +List.find('foo')+ a lot.
|
193
|
+
#
|
194
|
+
# @param [String, #to_s] domain The domain name.
|
195
|
+
#
|
196
|
+
# @return [Array<PublicSuffix::Rule::*>]
|
197
|
+
def select(domain)
|
198
|
+
indices = (@indexes[Domain.domain_to_labels(domain).first] || [])
|
199
|
+
@rules.values_at(*indices).select { |rule| rule.match?(domain) }
|
200
|
+
end
|
201
|
+
|
202
|
+
|
203
|
+
@@default = nil
|
204
|
+
|
205
|
+
class << self
|
206
|
+
|
207
|
+
# Gets the default rule list.
|
208
|
+
# Initializes a new {PublicSuffix::List} parsing the content
|
209
|
+
# of {PublicSuffix::List.default_definition}, if required.
|
210
|
+
#
|
211
|
+
# @return [PublicSuffix::List]
|
212
|
+
def default
|
213
|
+
@@default ||= parse(default_definition)
|
214
|
+
end
|
215
|
+
|
216
|
+
# Sets the default rule list to +value+.
|
217
|
+
#
|
218
|
+
# @param [PublicSuffix::List] value
|
219
|
+
# The new rule list.
|
220
|
+
#
|
221
|
+
# @return [PublicSuffix::List]
|
222
|
+
def default=(value)
|
223
|
+
@@default = value
|
224
|
+
end
|
225
|
+
|
226
|
+
# Sets the default rule list to +nil+.
|
227
|
+
#
|
228
|
+
# @return [self]
|
229
|
+
def clear
|
230
|
+
self.default = nil
|
231
|
+
self
|
232
|
+
end
|
233
|
+
|
234
|
+
# Resets the default rule list and reinitialize it
|
235
|
+
# parsing the content of {PublicSuffix::List.default_definition}.
|
236
|
+
#
|
237
|
+
# @return [PublicSuffix::List]
|
238
|
+
def reload
|
239
|
+
self.clear.default
|
240
|
+
end
|
241
|
+
|
242
|
+
# Gets the default definition list.
|
243
|
+
# Can be any <tt>IOStream</tt> including a <tt>File</tt>
|
244
|
+
# or a simple <tt>String</tt>.
|
245
|
+
# The object must respond to <tt>#each_line</tt>.
|
246
|
+
#
|
247
|
+
# @return [File]
|
248
|
+
def default_definition
|
249
|
+
File.new(File.join(File.dirname(__FILE__), "definitions.txt"), "r:utf-8")
|
250
|
+
end
|
251
|
+
|
252
|
+
|
253
|
+
# Parse given +input+ treating the content as Public Suffix List.
|
254
|
+
#
|
255
|
+
# See http://publicsuffix.org/format/ for more details about input format.
|
256
|
+
#
|
257
|
+
# @param [String] input The rule list to parse.
|
258
|
+
#
|
259
|
+
# @return [Array<PublicSuffix::Rule::*>]
|
260
|
+
def parse(input)
|
261
|
+
new do |list|
|
262
|
+
input.each_line do |line|
|
263
|
+
line.strip!
|
264
|
+
|
265
|
+
# strip blank lines
|
266
|
+
if line.empty?
|
267
|
+
next
|
268
|
+
# strip comments
|
269
|
+
elsif line =~ %r{^//}
|
270
|
+
next
|
271
|
+
# append rule
|
272
|
+
else
|
273
|
+
list.add(Rule.factory(line), false)
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
end
|
280
|
+
|
281
|
+
end
|
282
|
+
|
283
|
+
end
|