public_suffix 1.0.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +0 -0
- data/.gitignore +4 -0
- data/.travis.yml +11 -0
- data/.yardopts +2 -0
- data/CHANGELOG.md +134 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +22 -0
- data/LICENSE +22 -0
- data/README.md +151 -0
- data/Rakefile +109 -0
- data/lib/public_suffix.rb +134 -0
- data/lib/public_suffix/definitions.txt +5190 -0
- data/lib/public_suffix/domain.rb +387 -0
- data/lib/public_suffix/errors.rb +57 -0
- data/lib/public_suffix/list.rb +283 -0
- data/lib/public_suffix/rule.rb +373 -0
- data/lib/public_suffix/rule_list.rb +14 -0
- data/lib/public_suffix/version.rb +23 -0
- data/public_suffix.gemspec +37 -0
- data/test/acceptance_test.rb +36 -0
- data/test/test_helper.rb +6 -0
- data/test/unit/domain_test.rb +170 -0
- data/test/unit/errors_test.rb +23 -0
- data/test/unit/list_test.rb +193 -0
- data/test/unit/public_suffix_test.rb +85 -0
- data/test/unit/rule_test.rb +307 -0
- metadata +111 -0
@@ -0,0 +1,387 @@
|
|
1
|
+
#--
|
2
|
+
# Public Suffix
|
3
|
+
#
|
4
|
+
# Domain name parser based on the Public Suffix List.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
|
7
|
+
#++
|
8
|
+
|
9
|
+
|
10
|
+
module PublicSuffix
|
11
|
+
|
12
|
+
class Domain
|
13
|
+
|
14
|
+
# Splits a string into its possible labels
|
15
|
+
# as a domain in reverse order from the input string.
|
16
|
+
#
|
17
|
+
# The input is not validated, but it is assumed to be a valid domain.
|
18
|
+
#
|
19
|
+
# @param [String, #to_s] domain
|
20
|
+
# The domain name to split.
|
21
|
+
#
|
22
|
+
# @return [Array<String>]
|
23
|
+
#
|
24
|
+
# @example
|
25
|
+
#
|
26
|
+
# domain_to_labels('google.com')
|
27
|
+
# # => ['com', 'google']
|
28
|
+
#
|
29
|
+
# domain_to_labels('google.co.uk')
|
30
|
+
# # => ['uk', 'co', 'google']
|
31
|
+
#
|
32
|
+
def self.domain_to_labels(domain)
|
33
|
+
domain.to_s.split(".").reverse
|
34
|
+
end
|
35
|
+
|
36
|
+
# Creates and returns a new {PublicSuffix::Domain} instance.
|
37
|
+
#
|
38
|
+
# @overload initialize(tld)
|
39
|
+
# Initializes with a +tld+.
|
40
|
+
# @param [String] tld The TLD (extension)
|
41
|
+
# @overload initialize(tld, sld)
|
42
|
+
# Initializes with a +tld+ and +sld+.
|
43
|
+
# @param [String] tld The TLD (extension)
|
44
|
+
# @param [String] sld The TRD (domain)
|
45
|
+
# @overload initialize(tld, sld, trd)
|
46
|
+
# Initializes with a +tld+, +sld+ and +trd+.
|
47
|
+
# @param [String] tld The TLD (extension)
|
48
|
+
# @param [String] sld The SLD (domain)
|
49
|
+
# @param [String] tld The TRD (subdomain)
|
50
|
+
#
|
51
|
+
# @yield [self] Yields on self.
|
52
|
+
# @yieldparam [PublicSuffix::Domain] self The newly creates instance
|
53
|
+
#
|
54
|
+
# @example Initialize with a TLD
|
55
|
+
# PublicSuffix::Domain.new("com")
|
56
|
+
# # => #<PublicSuffix::Domain @tld="com">
|
57
|
+
#
|
58
|
+
# @example Initialize with a TLD and SLD
|
59
|
+
# PublicSuffix::Domain.new("com", "example")
|
60
|
+
# # => #<PublicSuffix::Domain @tld="com", @trd=nil>
|
61
|
+
#
|
62
|
+
# @example Initialize with a TLD, SLD and TRD
|
63
|
+
# PublicSuffix::Domain.new("com", "example", "wwww")
|
64
|
+
# # => #<PublicSuffix::Domain @tld="com", @trd=nil, @sld="example">
|
65
|
+
#
|
66
|
+
def initialize(*args, &block)
|
67
|
+
@tld, @sld, @trd = args
|
68
|
+
yield(self) if block_given?
|
69
|
+
end
|
70
|
+
|
71
|
+
# Returns a string representation of this object.
|
72
|
+
#
|
73
|
+
# @return [String]
|
74
|
+
def to_s
|
75
|
+
name
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns an array containing the domain parts.
|
79
|
+
#
|
80
|
+
# @return [Array<String, nil>]
|
81
|
+
#
|
82
|
+
# @example
|
83
|
+
#
|
84
|
+
# PublicSuffix::Domain.new("google.com").to_a
|
85
|
+
# # => [nil, "google", "com"]
|
86
|
+
#
|
87
|
+
# PublicSuffix::Domain.new("www.google.com").to_a
|
88
|
+
# # => [nil, "google", "com"]
|
89
|
+
#
|
90
|
+
def to_a
|
91
|
+
[trd, sld, tld]
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
# Returns the Top Level Domain part, aka the extension.
|
96
|
+
#
|
97
|
+
# @return [String, nil]
|
98
|
+
def tld
|
99
|
+
@tld
|
100
|
+
end
|
101
|
+
|
102
|
+
# Returns the Second Level Domain part, aka the domain part.
|
103
|
+
#
|
104
|
+
# @return [String, nil]
|
105
|
+
def sld
|
106
|
+
@sld
|
107
|
+
end
|
108
|
+
|
109
|
+
# Returns the Third Level Domain part, aka the subdomain part.
|
110
|
+
#
|
111
|
+
# @return [String, nil]
|
112
|
+
def trd
|
113
|
+
@trd
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
# Returns the full domain name.
|
118
|
+
#
|
119
|
+
# @return [String]
|
120
|
+
#
|
121
|
+
# @example Gets the domain name of a domain
|
122
|
+
# PublicSuffix::Domain.new("com", "google").name
|
123
|
+
# # => "google.com"
|
124
|
+
#
|
125
|
+
# @example Gets the domain name of a subdomain
|
126
|
+
# PublicSuffix::Domain.new("com", "google", "www").name
|
127
|
+
# # => "www.google.com"
|
128
|
+
#
|
129
|
+
def name
|
130
|
+
[trd, sld, tld].reject { |part| part.nil? }.join(".")
|
131
|
+
end
|
132
|
+
|
133
|
+
# Returns a domain-like representation of this object
|
134
|
+
# if the object is a {#domain?}, <tt>nil</tt> otherwise.
|
135
|
+
#
|
136
|
+
# PublicSuffix::Domain.new("com").domain
|
137
|
+
# # => nil
|
138
|
+
#
|
139
|
+
# PublicSuffix::Domain.new("com", "google").domain
|
140
|
+
# # => "google.com"
|
141
|
+
#
|
142
|
+
# PublicSuffix::Domain.new("com", "google", "www").domain
|
143
|
+
# # => "www.google.com"
|
144
|
+
#
|
145
|
+
# This method doesn't validate the input. It handles the domain
|
146
|
+
# as a valid domain name and simply applies the necessary transformations.
|
147
|
+
#
|
148
|
+
# # This is an invalid domain
|
149
|
+
# PublicSuffix::Domain.new("zip", "google").domain
|
150
|
+
# # => "google.zip"
|
151
|
+
#
|
152
|
+
# This method returns a FQD, not just the domain part.
|
153
|
+
# To get the domain part, use <tt>#sld</tt> (aka second level domain).
|
154
|
+
#
|
155
|
+
# PublicSuffix::Domain.new("com", "google", "www").domain
|
156
|
+
# # => "google.com"
|
157
|
+
#
|
158
|
+
# PublicSuffix::Domain.new("com", "google", "www").sld
|
159
|
+
# # => "google"
|
160
|
+
#
|
161
|
+
# @return [String]
|
162
|
+
#
|
163
|
+
# @see #domain?
|
164
|
+
# @see #subdomain
|
165
|
+
#
|
166
|
+
def domain
|
167
|
+
return unless domain?
|
168
|
+
[sld, tld].join(".")
|
169
|
+
end
|
170
|
+
|
171
|
+
# Returns a domain-like representation of this object
|
172
|
+
# if the object is a {#subdomain?}, <tt>nil</tt> otherwise.
|
173
|
+
#
|
174
|
+
# PublicSuffix::Domain.new("com").subdomain
|
175
|
+
# # => nil
|
176
|
+
#
|
177
|
+
# PublicSuffix::Domain.new("com", "google").subdomain
|
178
|
+
# # => nil
|
179
|
+
#
|
180
|
+
# PublicSuffix::Domain.new("com", "google", "www").subdomain
|
181
|
+
# # => "www.google.com"
|
182
|
+
#
|
183
|
+
# This method doesn't validate the input. It handles the domain
|
184
|
+
# as a valid domain name and simply applies the necessary transformations.
|
185
|
+
#
|
186
|
+
# # This is an invalid domain
|
187
|
+
# PublicSuffix::Domain.new("zip", "google", "www").subdomain
|
188
|
+
# # => "www.google.zip"
|
189
|
+
#
|
190
|
+
# This method returns a FQD, not just the domain part.
|
191
|
+
# To get the domain part, use <tt>#tld</tt> (aka third level domain).
|
192
|
+
#
|
193
|
+
# PublicSuffix::Domain.new("com", "google", "www").subdomain
|
194
|
+
# # => "www.google.com"
|
195
|
+
#
|
196
|
+
# PublicSuffix::Domain.new("com", "google", "www").trd
|
197
|
+
# # => "www"
|
198
|
+
#
|
199
|
+
# @return [String]
|
200
|
+
#
|
201
|
+
# @see #subdomain?
|
202
|
+
# @see #domain
|
203
|
+
#
|
204
|
+
def subdomain
|
205
|
+
return unless subdomain?
|
206
|
+
[trd, sld, tld].join(".")
|
207
|
+
end
|
208
|
+
|
209
|
+
# Returns the rule matching this domain
|
210
|
+
# in the default {PublicSuffix::List}.
|
211
|
+
#
|
212
|
+
# @return [PublicSuffix::Rule::Base, nil]
|
213
|
+
# The rule instance a rule matches current domain,
|
214
|
+
# nil if no rule is found.
|
215
|
+
def rule
|
216
|
+
List.default.find(name)
|
217
|
+
end
|
218
|
+
|
219
|
+
|
220
|
+
# Checks whether <tt>self</tt> looks like a domain.
|
221
|
+
#
|
222
|
+
# This method doesn't actually validate the domain.
|
223
|
+
# It only checks whether the instance contains
|
224
|
+
# a value for the {#tld} and {#sld} attributes.
|
225
|
+
# If you also want to validate the domain,
|
226
|
+
# use {#valid_domain?} instead.
|
227
|
+
#
|
228
|
+
# @return [Boolean]
|
229
|
+
#
|
230
|
+
# @example
|
231
|
+
#
|
232
|
+
# PublicSuffix::Domain.new("com").domain?
|
233
|
+
# # => false
|
234
|
+
#
|
235
|
+
# PublicSuffix::Domain.new("com", "google").domain?
|
236
|
+
# # => true
|
237
|
+
#
|
238
|
+
# PublicSuffix::Domain.new("com", "google", "www").domain?
|
239
|
+
# # => true
|
240
|
+
#
|
241
|
+
# # This is an invalid domain, but returns true
|
242
|
+
# # because this method doesn't validate the content.
|
243
|
+
# PublicSuffix::Domain.new("zip", "google").domain?
|
244
|
+
# # => true
|
245
|
+
#
|
246
|
+
# @see #subdomain?
|
247
|
+
#
|
248
|
+
def domain?
|
249
|
+
!(tld.nil? || sld.nil?)
|
250
|
+
end
|
251
|
+
|
252
|
+
# Checks whether <tt>self</tt> looks like a subdomain.
|
253
|
+
#
|
254
|
+
# This method doesn't actually validate the subdomain.
|
255
|
+
# It only checks whether the instance contains
|
256
|
+
# a value for the {#tld}, {#sld} and {#trd} attributes.
|
257
|
+
# If you also want to validate the domain,
|
258
|
+
# use {#valid_subdomain?} instead.
|
259
|
+
#
|
260
|
+
# @return [Boolean]
|
261
|
+
#
|
262
|
+
# @example
|
263
|
+
#
|
264
|
+
# PublicSuffix::Domain.new("com").subdomain?
|
265
|
+
# # => false
|
266
|
+
#
|
267
|
+
# PublicSuffix::Domain.new("com", "google").subdomain?
|
268
|
+
# # => false
|
269
|
+
#
|
270
|
+
# PublicSuffix::Domain.new("com", "google", "www").subdomain?
|
271
|
+
# # => true
|
272
|
+
#
|
273
|
+
# # This is an invalid domain, but returns true
|
274
|
+
# # because this method doesn't validate the content.
|
275
|
+
# PublicSuffix::Domain.new("zip", "google", "www").subdomain?
|
276
|
+
# # => true
|
277
|
+
#
|
278
|
+
# @see #domain?
|
279
|
+
#
|
280
|
+
def subdomain?
|
281
|
+
!(tld.nil? || sld.nil? || trd.nil?)
|
282
|
+
end
|
283
|
+
|
284
|
+
# Checks whether <tt>self</tt> is exclusively a domain,
|
285
|
+
# and not a subdomain.
|
286
|
+
#
|
287
|
+
# @return [Boolean]
|
288
|
+
def is_a_domain?
|
289
|
+
domain? && !subdomain?
|
290
|
+
end
|
291
|
+
|
292
|
+
# Checks whether <tt>self</tt> is exclusively a subdomain.
|
293
|
+
#
|
294
|
+
# @return [Boolean]
|
295
|
+
def is_a_subdomain?
|
296
|
+
subdomain?
|
297
|
+
end
|
298
|
+
|
299
|
+
# Checks whether <tt>self</tt> is assigned and allowed
|
300
|
+
# according to default {List}.
|
301
|
+
#
|
302
|
+
# This method triggers a new rule lookup in the default {List},
|
303
|
+
# which is a quite intensive task.
|
304
|
+
#
|
305
|
+
# @return [Boolean]
|
306
|
+
#
|
307
|
+
# @example Check a valid domain
|
308
|
+
# Domain.new("com", "example").valid?
|
309
|
+
# # => true
|
310
|
+
#
|
311
|
+
# @example Check a valid subdomain
|
312
|
+
# Domain.new("com", "example", "www").valid?
|
313
|
+
# # => true
|
314
|
+
#
|
315
|
+
# @example Check a not-assigned domain
|
316
|
+
# Domain.new("zip", "example").valid?
|
317
|
+
# # => false
|
318
|
+
#
|
319
|
+
# @example Check a not-allowed domain
|
320
|
+
# Domain.new("do", "example").valid?
|
321
|
+
# # => false
|
322
|
+
# Domain.new("do", "example", "www").valid?
|
323
|
+
# # => true
|
324
|
+
#
|
325
|
+
def valid?
|
326
|
+
r = rule
|
327
|
+
!r.nil? && r.allow?(name)
|
328
|
+
end
|
329
|
+
|
330
|
+
|
331
|
+
# Checks whether <tt>self</tt> looks like a domain and validates
|
332
|
+
# according to default {List}.
|
333
|
+
#
|
334
|
+
# @return [Boolean]
|
335
|
+
#
|
336
|
+
# @example
|
337
|
+
#
|
338
|
+
# PublicSuffix::Domain.new("com").domain?
|
339
|
+
# # => false
|
340
|
+
#
|
341
|
+
# PublicSuffix::Domain.new("com", "google").domain?
|
342
|
+
# # => true
|
343
|
+
#
|
344
|
+
# PublicSuffix::Domain.new("com", "google", "www").domain?
|
345
|
+
# # => true
|
346
|
+
#
|
347
|
+
# # This is an invalid domain
|
348
|
+
# PublicSuffix::Domain.new("zip", "google").false?
|
349
|
+
# # => true
|
350
|
+
#
|
351
|
+
# @see #domain?
|
352
|
+
# @see #valid?
|
353
|
+
#
|
354
|
+
def valid_domain?
|
355
|
+
domain? && valid?
|
356
|
+
end
|
357
|
+
|
358
|
+
# Checks whether <tt>self</tt> looks like a subdomain and validates
|
359
|
+
# according to default {List}.
|
360
|
+
#
|
361
|
+
# @return [Boolean]
|
362
|
+
#
|
363
|
+
# @example
|
364
|
+
#
|
365
|
+
# PublicSuffix::Domain.new("com").subdomain?
|
366
|
+
# # => false
|
367
|
+
#
|
368
|
+
# PublicSuffix::Domain.new("com", "google").subdomain?
|
369
|
+
# # => false
|
370
|
+
#
|
371
|
+
# PublicSuffix::Domain.new("com", "google", "www").subdomain?
|
372
|
+
# # => true
|
373
|
+
#
|
374
|
+
# # This is an invalid domain
|
375
|
+
# PublicSuffix::Domain.new("zip", "google", "www").subdomain?
|
376
|
+
# # => false
|
377
|
+
#
|
378
|
+
# @see #subdomain?
|
379
|
+
# @see #valid?
|
380
|
+
#
|
381
|
+
def valid_subdomain?
|
382
|
+
subdomain? && valid?
|
383
|
+
end
|
384
|
+
|
385
|
+
end
|
386
|
+
|
387
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#--
|
2
|
+
# Public Suffix
|
3
|
+
#
|
4
|
+
# Domain name parser based on the Public Suffix List.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
|
7
|
+
#++
|
8
|
+
|
9
|
+
|
10
|
+
module PublicSuffix
|
11
|
+
|
12
|
+
class Error < StandardError
|
13
|
+
end
|
14
|
+
|
15
|
+
# Raised when trying to parse an invalid domain.
|
16
|
+
# A domain is considered invalid when no rule is found
|
17
|
+
# in the definition list.
|
18
|
+
#
|
19
|
+
# @example
|
20
|
+
#
|
21
|
+
# PublicSuffix.parse("nic.test")
|
22
|
+
# # => PublicSuffix::DomainInvalid
|
23
|
+
#
|
24
|
+
# PublicSuffix.parse("http://www.nic.it")
|
25
|
+
# # => PublicSuffix::DomainInvalid
|
26
|
+
#
|
27
|
+
# @since 0.6.0
|
28
|
+
#
|
29
|
+
class DomainInvalid < Error
|
30
|
+
end
|
31
|
+
|
32
|
+
# Raised when trying to parse a domain
|
33
|
+
# which is formally defined by a rule,
|
34
|
+
# but the rules set a requirement which is not satisfied
|
35
|
+
# by the input you are trying to parse.
|
36
|
+
#
|
37
|
+
# @example
|
38
|
+
#
|
39
|
+
# PublicSuffix.parse("nic.do")
|
40
|
+
# # => PublicSuffix::DomainNotAllowed
|
41
|
+
#
|
42
|
+
# PublicSuffix.parse("www.nic.do")
|
43
|
+
# # => PublicSuffix::Domain
|
44
|
+
#
|
45
|
+
# @since 0.6.0
|
46
|
+
#
|
47
|
+
class DomainNotAllowed < DomainInvalid
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
# Backward Compatibility
|
52
|
+
#
|
53
|
+
# @deprecated Use {PublicSuffix::DomainInvalid}.
|
54
|
+
#
|
55
|
+
InvalidDomain = DomainInvalid
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,283 @@
|
|
1
|
+
#--
|
2
|
+
# Public Suffix
|
3
|
+
#
|
4
|
+
# Domain name parser based on the Public Suffix List.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
|
7
|
+
#++
|
8
|
+
|
9
|
+
|
10
|
+
module PublicSuffix
|
11
|
+
|
12
|
+
# A {PublicSuffix::List} is a collection of one
|
13
|
+
# or more {PublicSuffix::Rule}.
|
14
|
+
#
|
15
|
+
# Given a {PublicSuffix::List},
|
16
|
+
# you can add or remove {PublicSuffix::Rule},
|
17
|
+
# iterate all items in the list or search for the first rule
|
18
|
+
# which matches a specific domain name.
|
19
|
+
#
|
20
|
+
# # Create a new list
|
21
|
+
# list = PublicSuffix::List.new
|
22
|
+
#
|
23
|
+
# # Push two rules to the list
|
24
|
+
# list << PublicSuffix::Rule.factory("it")
|
25
|
+
# list << PublicSuffix::Rule.factory("com")
|
26
|
+
#
|
27
|
+
# # Get the size of the list
|
28
|
+
# list.size
|
29
|
+
# # => 2
|
30
|
+
#
|
31
|
+
# # Search for the rule matching given domain
|
32
|
+
# list.find("example.com")
|
33
|
+
# # => #<PublicSuffix::Rule::Normal>
|
34
|
+
# list.find("example.org")
|
35
|
+
# # => nil
|
36
|
+
#
|
37
|
+
# You can create as many {PublicSuffix::List} you want.
|
38
|
+
# The {PublicSuffix::List.default} rule list is used
|
39
|
+
# to tokenize and validate a domain.
|
40
|
+
#
|
41
|
+
# {PublicSuffix::List} implements +Enumerable+ module.
|
42
|
+
#
|
43
|
+
class List
|
44
|
+
include Enumerable
|
45
|
+
|
46
|
+
# Gets the array of rules.
|
47
|
+
#
|
48
|
+
# @return [Array<PublicSuffix::Rule::*>]
|
49
|
+
attr_reader :rules
|
50
|
+
|
51
|
+
# Gets the naive index, a hash that with the keys being the first label of
|
52
|
+
# every rule pointing to an array of integers (indexes of the rules in @rules).
|
53
|
+
#
|
54
|
+
# @return [Array]
|
55
|
+
attr_reader :indexes
|
56
|
+
|
57
|
+
|
58
|
+
# Initializes an empty {PublicSuffix::List}.
|
59
|
+
#
|
60
|
+
# @yield [self] Yields on self.
|
61
|
+
# @yieldparam [PublicSuffix::List] self The newly created instance.
|
62
|
+
#
|
63
|
+
def initialize(&block)
|
64
|
+
@rules = []
|
65
|
+
@indexes = {}
|
66
|
+
yield(self) if block_given?
|
67
|
+
create_index!
|
68
|
+
end
|
69
|
+
|
70
|
+
# Creates a naive index for +@rules+. Just a hash that will tell
|
71
|
+
# us where the elements of +@rules+ are relative to its first
|
72
|
+
# {PublicSuffix::Rule::Base#labels} element.
|
73
|
+
#
|
74
|
+
# For instance if @rules[5] and @rules[4] are the only elements of the list
|
75
|
+
# where Rule#labels.first is 'us' @indexes['us'] #=> [5,4], that way in
|
76
|
+
# select we can avoid mapping every single rule against the candidate domain.
|
77
|
+
def create_index!
|
78
|
+
@rules.map { |l| l.labels.first }.each_with_index do |elm, inx|
|
79
|
+
if !@indexes.has_key?(elm)
|
80
|
+
@indexes[elm] = [inx]
|
81
|
+
else
|
82
|
+
@indexes[elm] << inx
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Checks whether two lists are equal.
|
88
|
+
#
|
89
|
+
# List <tt>one</tt> is equal to <tt>two</tt>, if <tt>two</tt> is an instance of
|
90
|
+
# {PublicSuffix::List} and each +PublicSuffix::Rule::*+
|
91
|
+
# in list <tt>one</tt> is available in list <tt>two</tt>, in the same order.
|
92
|
+
#
|
93
|
+
# @param [PublicSuffix::List] other
|
94
|
+
# The List to compare.
|
95
|
+
#
|
96
|
+
# @return [Boolean]
|
97
|
+
def ==(other)
|
98
|
+
return false unless other.is_a?(List)
|
99
|
+
self.equal?(other) ||
|
100
|
+
self.rules == other.rules
|
101
|
+
end
|
102
|
+
alias :eql? :==
|
103
|
+
|
104
|
+
# Iterates each rule in the list.
|
105
|
+
def each(*args, &block)
|
106
|
+
@rules.each(*args, &block)
|
107
|
+
end
|
108
|
+
|
109
|
+
# Gets the list as array.
|
110
|
+
#
|
111
|
+
# @return [Array<PublicSuffix::Rule::*>]
|
112
|
+
def to_a
|
113
|
+
@rules
|
114
|
+
end
|
115
|
+
|
116
|
+
# Adds the given object to the list
|
117
|
+
# and optionally refreshes the rule index.
|
118
|
+
#
|
119
|
+
# @param [PublicSuffix::Rule::*] rule
|
120
|
+
# The rule to add to the list.
|
121
|
+
# @param [Boolean] index
|
122
|
+
# Set to true to recreate the rule index
|
123
|
+
# after the rule has been added to the list.
|
124
|
+
#
|
125
|
+
# @return [self]
|
126
|
+
#
|
127
|
+
# @see #create_index!
|
128
|
+
#
|
129
|
+
def add(rule, index = true)
|
130
|
+
@rules << rule
|
131
|
+
create_index! if index == true
|
132
|
+
self
|
133
|
+
end
|
134
|
+
alias << add
|
135
|
+
|
136
|
+
# Gets the number of elements in the list.
|
137
|
+
#
|
138
|
+
# @return [Integer]
|
139
|
+
def size
|
140
|
+
@rules.size
|
141
|
+
end
|
142
|
+
alias length size
|
143
|
+
|
144
|
+
# Checks whether the list is empty.
|
145
|
+
#
|
146
|
+
# @return [Boolean]
|
147
|
+
def empty?
|
148
|
+
@rules.empty?
|
149
|
+
end
|
150
|
+
|
151
|
+
# Removes all elements.
|
152
|
+
#
|
153
|
+
# @return [self]
|
154
|
+
def clear
|
155
|
+
@rules.clear
|
156
|
+
self
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
# Returns the most appropriate rule for domain.
|
161
|
+
#
|
162
|
+
# From the Public Suffix List documentation:
|
163
|
+
#
|
164
|
+
# * If a hostname matches more than one rule in the file,
|
165
|
+
# the longest matching rule (the one with the most levels) will be used.
|
166
|
+
# * An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule.
|
167
|
+
# An exception rule takes priority over any other matching rule.
|
168
|
+
#
|
169
|
+
# == Algorithm description
|
170
|
+
#
|
171
|
+
# * Match domain against all rules and take note of the matching ones.
|
172
|
+
# * If no rules match, the prevailing rule is "*".
|
173
|
+
# * If more than one rule matches, the prevailing rule is the one which is an exception rule.
|
174
|
+
# * If there is no matching exception rule, the prevailing rule is the one with the most labels.
|
175
|
+
# * If the prevailing rule is a exception rule, modify it by removing the leftmost label.
|
176
|
+
# * The public suffix is the set of labels from the domain
|
177
|
+
# which directly match the labels of the prevailing rule (joined by dots).
|
178
|
+
# * The registered domain is the public suffix plus one additional label.
|
179
|
+
#
|
180
|
+
# @param [String, #to_s] domain The domain name.
|
181
|
+
#
|
182
|
+
# @return [PublicSuffix::Rule::*, nil]
|
183
|
+
def find(domain)
|
184
|
+
rules = select(domain)
|
185
|
+
rules.select { |r| r.type == :exception }.first ||
|
186
|
+
rules.inject { |t,r| t.length > r.length ? t : r }
|
187
|
+
end
|
188
|
+
|
189
|
+
# Selects all the rules matching given domain.
|
190
|
+
#
|
191
|
+
# Will use +@indexes+ to try only the rules that share the same first label,
|
192
|
+
# that will speed up things when using +List.find('foo')+ a lot.
|
193
|
+
#
|
194
|
+
# @param [String, #to_s] domain The domain name.
|
195
|
+
#
|
196
|
+
# @return [Array<PublicSuffix::Rule::*>]
|
197
|
+
def select(domain)
|
198
|
+
indices = (@indexes[Domain.domain_to_labels(domain).first] || [])
|
199
|
+
@rules.values_at(*indices).select { |rule| rule.match?(domain) }
|
200
|
+
end
|
201
|
+
|
202
|
+
|
203
|
+
@@default = nil
|
204
|
+
|
205
|
+
class << self
|
206
|
+
|
207
|
+
# Gets the default rule list.
|
208
|
+
# Initializes a new {PublicSuffix::List} parsing the content
|
209
|
+
# of {PublicSuffix::List.default_definition}, if required.
|
210
|
+
#
|
211
|
+
# @return [PublicSuffix::List]
|
212
|
+
def default
|
213
|
+
@@default ||= parse(default_definition)
|
214
|
+
end
|
215
|
+
|
216
|
+
# Sets the default rule list to +value+.
|
217
|
+
#
|
218
|
+
# @param [PublicSuffix::List] value
|
219
|
+
# The new rule list.
|
220
|
+
#
|
221
|
+
# @return [PublicSuffix::List]
|
222
|
+
def default=(value)
|
223
|
+
@@default = value
|
224
|
+
end
|
225
|
+
|
226
|
+
# Sets the default rule list to +nil+.
|
227
|
+
#
|
228
|
+
# @return [self]
|
229
|
+
def clear
|
230
|
+
self.default = nil
|
231
|
+
self
|
232
|
+
end
|
233
|
+
|
234
|
+
# Resets the default rule list and reinitialize it
|
235
|
+
# parsing the content of {PublicSuffix::List.default_definition}.
|
236
|
+
#
|
237
|
+
# @return [PublicSuffix::List]
|
238
|
+
def reload
|
239
|
+
self.clear.default
|
240
|
+
end
|
241
|
+
|
242
|
+
# Gets the default definition list.
|
243
|
+
# Can be any <tt>IOStream</tt> including a <tt>File</tt>
|
244
|
+
# or a simple <tt>String</tt>.
|
245
|
+
# The object must respond to <tt>#each_line</tt>.
|
246
|
+
#
|
247
|
+
# @return [File]
|
248
|
+
def default_definition
|
249
|
+
File.new(File.join(File.dirname(__FILE__), "definitions.txt"), "r:utf-8")
|
250
|
+
end
|
251
|
+
|
252
|
+
|
253
|
+
# Parse given +input+ treating the content as Public Suffix List.
|
254
|
+
#
|
255
|
+
# See http://publicsuffix.org/format/ for more details about input format.
|
256
|
+
#
|
257
|
+
# @param [String] input The rule list to parse.
|
258
|
+
#
|
259
|
+
# @return [Array<PublicSuffix::Rule::*>]
|
260
|
+
def parse(input)
|
261
|
+
new do |list|
|
262
|
+
input.each_line do |line|
|
263
|
+
line.strip!
|
264
|
+
|
265
|
+
# strip blank lines
|
266
|
+
if line.empty?
|
267
|
+
next
|
268
|
+
# strip comments
|
269
|
+
elsif line =~ %r{^//}
|
270
|
+
next
|
271
|
+
# append rule
|
272
|
+
else
|
273
|
+
list.add(Rule.factory(line), false)
|
274
|
+
end
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
|
279
|
+
end
|
280
|
+
|
281
|
+
end
|
282
|
+
|
283
|
+
end
|