public_suffix 1.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,387 @@
1
+ #--
2
+ # Public Suffix
3
+ #
4
+ # Domain name parser based on the Public Suffix List.
5
+ #
6
+ # Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
7
+ #++
8
+
9
+
10
+ module PublicSuffix
11
+
12
+ class Domain
13
+
14
+ # Splits a string into its possible labels
15
+ # as a domain in reverse order from the input string.
16
+ #
17
+ # The input is not validated, but it is assumed to be a valid domain.
18
+ #
19
+ # @param [String, #to_s] domain
20
+ # The domain name to split.
21
+ #
22
+ # @return [Array<String>]
23
+ #
24
+ # @example
25
+ #
26
+ # domain_to_labels('google.com')
27
+ # # => ['com', 'google']
28
+ #
29
+ # domain_to_labels('google.co.uk')
30
+ # # => ['uk', 'co', 'google']
31
+ #
32
+ def self.domain_to_labels(domain)
33
+ domain.to_s.split(".").reverse
34
+ end
35
+
36
+ # Creates and returns a new {PublicSuffix::Domain} instance.
37
+ #
38
+ # @overload initialize(tld)
39
+ # Initializes with a +tld+.
40
+ # @param [String] tld The TLD (extension)
41
+ # @overload initialize(tld, sld)
42
+ # Initializes with a +tld+ and +sld+.
43
+ # @param [String] tld The TLD (extension)
44
+ # @param [String] sld The TRD (domain)
45
+ # @overload initialize(tld, sld, trd)
46
+ # Initializes with a +tld+, +sld+ and +trd+.
47
+ # @param [String] tld The TLD (extension)
48
+ # @param [String] sld The SLD (domain)
49
+ # @param [String] tld The TRD (subdomain)
50
+ #
51
+ # @yield [self] Yields on self.
52
+ # @yieldparam [PublicSuffix::Domain] self The newly creates instance
53
+ #
54
+ # @example Initialize with a TLD
55
+ # PublicSuffix::Domain.new("com")
56
+ # # => #<PublicSuffix::Domain @tld="com">
57
+ #
58
+ # @example Initialize with a TLD and SLD
59
+ # PublicSuffix::Domain.new("com", "example")
60
+ # # => #<PublicSuffix::Domain @tld="com", @trd=nil>
61
+ #
62
+ # @example Initialize with a TLD, SLD and TRD
63
+ # PublicSuffix::Domain.new("com", "example", "wwww")
64
+ # # => #<PublicSuffix::Domain @tld="com", @trd=nil, @sld="example">
65
+ #
66
+ def initialize(*args, &block)
67
+ @tld, @sld, @trd = args
68
+ yield(self) if block_given?
69
+ end
70
+
71
+ # Returns a string representation of this object.
72
+ #
73
+ # @return [String]
74
+ def to_s
75
+ name
76
+ end
77
+
78
+ # Returns an array containing the domain parts.
79
+ #
80
+ # @return [Array<String, nil>]
81
+ #
82
+ # @example
83
+ #
84
+ # PublicSuffix::Domain.new("google.com").to_a
85
+ # # => [nil, "google", "com"]
86
+ #
87
+ # PublicSuffix::Domain.new("www.google.com").to_a
88
+ # # => [nil, "google", "com"]
89
+ #
90
+ def to_a
91
+ [trd, sld, tld]
92
+ end
93
+
94
+
95
+ # Returns the Top Level Domain part, aka the extension.
96
+ #
97
+ # @return [String, nil]
98
+ def tld
99
+ @tld
100
+ end
101
+
102
+ # Returns the Second Level Domain part, aka the domain part.
103
+ #
104
+ # @return [String, nil]
105
+ def sld
106
+ @sld
107
+ end
108
+
109
+ # Returns the Third Level Domain part, aka the subdomain part.
110
+ #
111
+ # @return [String, nil]
112
+ def trd
113
+ @trd
114
+ end
115
+
116
+
117
+ # Returns the full domain name.
118
+ #
119
+ # @return [String]
120
+ #
121
+ # @example Gets the domain name of a domain
122
+ # PublicSuffix::Domain.new("com", "google").name
123
+ # # => "google.com"
124
+ #
125
+ # @example Gets the domain name of a subdomain
126
+ # PublicSuffix::Domain.new("com", "google", "www").name
127
+ # # => "www.google.com"
128
+ #
129
+ def name
130
+ [trd, sld, tld].reject { |part| part.nil? }.join(".")
131
+ end
132
+
133
+ # Returns a domain-like representation of this object
134
+ # if the object is a {#domain?}, <tt>nil</tt> otherwise.
135
+ #
136
+ # PublicSuffix::Domain.new("com").domain
137
+ # # => nil
138
+ #
139
+ # PublicSuffix::Domain.new("com", "google").domain
140
+ # # => "google.com"
141
+ #
142
+ # PublicSuffix::Domain.new("com", "google", "www").domain
143
+ # # => "www.google.com"
144
+ #
145
+ # This method doesn't validate the input. It handles the domain
146
+ # as a valid domain name and simply applies the necessary transformations.
147
+ #
148
+ # # This is an invalid domain
149
+ # PublicSuffix::Domain.new("zip", "google").domain
150
+ # # => "google.zip"
151
+ #
152
+ # This method returns a FQD, not just the domain part.
153
+ # To get the domain part, use <tt>#sld</tt> (aka second level domain).
154
+ #
155
+ # PublicSuffix::Domain.new("com", "google", "www").domain
156
+ # # => "google.com"
157
+ #
158
+ # PublicSuffix::Domain.new("com", "google", "www").sld
159
+ # # => "google"
160
+ #
161
+ # @return [String]
162
+ #
163
+ # @see #domain?
164
+ # @see #subdomain
165
+ #
166
+ def domain
167
+ return unless domain?
168
+ [sld, tld].join(".")
169
+ end
170
+
171
+ # Returns a domain-like representation of this object
172
+ # if the object is a {#subdomain?}, <tt>nil</tt> otherwise.
173
+ #
174
+ # PublicSuffix::Domain.new("com").subdomain
175
+ # # => nil
176
+ #
177
+ # PublicSuffix::Domain.new("com", "google").subdomain
178
+ # # => nil
179
+ #
180
+ # PublicSuffix::Domain.new("com", "google", "www").subdomain
181
+ # # => "www.google.com"
182
+ #
183
+ # This method doesn't validate the input. It handles the domain
184
+ # as a valid domain name and simply applies the necessary transformations.
185
+ #
186
+ # # This is an invalid domain
187
+ # PublicSuffix::Domain.new("zip", "google", "www").subdomain
188
+ # # => "www.google.zip"
189
+ #
190
+ # This method returns a FQD, not just the domain part.
191
+ # To get the domain part, use <tt>#tld</tt> (aka third level domain).
192
+ #
193
+ # PublicSuffix::Domain.new("com", "google", "www").subdomain
194
+ # # => "www.google.com"
195
+ #
196
+ # PublicSuffix::Domain.new("com", "google", "www").trd
197
+ # # => "www"
198
+ #
199
+ # @return [String]
200
+ #
201
+ # @see #subdomain?
202
+ # @see #domain
203
+ #
204
+ def subdomain
205
+ return unless subdomain?
206
+ [trd, sld, tld].join(".")
207
+ end
208
+
209
+ # Returns the rule matching this domain
210
+ # in the default {PublicSuffix::List}.
211
+ #
212
+ # @return [PublicSuffix::Rule::Base, nil]
213
+ # The rule instance a rule matches current domain,
214
+ # nil if no rule is found.
215
+ def rule
216
+ List.default.find(name)
217
+ end
218
+
219
+
220
+ # Checks whether <tt>self</tt> looks like a domain.
221
+ #
222
+ # This method doesn't actually validate the domain.
223
+ # It only checks whether the instance contains
224
+ # a value for the {#tld} and {#sld} attributes.
225
+ # If you also want to validate the domain,
226
+ # use {#valid_domain?} instead.
227
+ #
228
+ # @return [Boolean]
229
+ #
230
+ # @example
231
+ #
232
+ # PublicSuffix::Domain.new("com").domain?
233
+ # # => false
234
+ #
235
+ # PublicSuffix::Domain.new("com", "google").domain?
236
+ # # => true
237
+ #
238
+ # PublicSuffix::Domain.new("com", "google", "www").domain?
239
+ # # => true
240
+ #
241
+ # # This is an invalid domain, but returns true
242
+ # # because this method doesn't validate the content.
243
+ # PublicSuffix::Domain.new("zip", "google").domain?
244
+ # # => true
245
+ #
246
+ # @see #subdomain?
247
+ #
248
+ def domain?
249
+ !(tld.nil? || sld.nil?)
250
+ end
251
+
252
+ # Checks whether <tt>self</tt> looks like a subdomain.
253
+ #
254
+ # This method doesn't actually validate the subdomain.
255
+ # It only checks whether the instance contains
256
+ # a value for the {#tld}, {#sld} and {#trd} attributes.
257
+ # If you also want to validate the domain,
258
+ # use {#valid_subdomain?} instead.
259
+ #
260
+ # @return [Boolean]
261
+ #
262
+ # @example
263
+ #
264
+ # PublicSuffix::Domain.new("com").subdomain?
265
+ # # => false
266
+ #
267
+ # PublicSuffix::Domain.new("com", "google").subdomain?
268
+ # # => false
269
+ #
270
+ # PublicSuffix::Domain.new("com", "google", "www").subdomain?
271
+ # # => true
272
+ #
273
+ # # This is an invalid domain, but returns true
274
+ # # because this method doesn't validate the content.
275
+ # PublicSuffix::Domain.new("zip", "google", "www").subdomain?
276
+ # # => true
277
+ #
278
+ # @see #domain?
279
+ #
280
+ def subdomain?
281
+ !(tld.nil? || sld.nil? || trd.nil?)
282
+ end
283
+
284
+ # Checks whether <tt>self</tt> is exclusively a domain,
285
+ # and not a subdomain.
286
+ #
287
+ # @return [Boolean]
288
+ def is_a_domain?
289
+ domain? && !subdomain?
290
+ end
291
+
292
+ # Checks whether <tt>self</tt> is exclusively a subdomain.
293
+ #
294
+ # @return [Boolean]
295
+ def is_a_subdomain?
296
+ subdomain?
297
+ end
298
+
299
+ # Checks whether <tt>self</tt> is assigned and allowed
300
+ # according to default {List}.
301
+ #
302
+ # This method triggers a new rule lookup in the default {List},
303
+ # which is a quite intensive task.
304
+ #
305
+ # @return [Boolean]
306
+ #
307
+ # @example Check a valid domain
308
+ # Domain.new("com", "example").valid?
309
+ # # => true
310
+ #
311
+ # @example Check a valid subdomain
312
+ # Domain.new("com", "example", "www").valid?
313
+ # # => true
314
+ #
315
+ # @example Check a not-assigned domain
316
+ # Domain.new("zip", "example").valid?
317
+ # # => false
318
+ #
319
+ # @example Check a not-allowed domain
320
+ # Domain.new("do", "example").valid?
321
+ # # => false
322
+ # Domain.new("do", "example", "www").valid?
323
+ # # => true
324
+ #
325
+ def valid?
326
+ r = rule
327
+ !r.nil? && r.allow?(name)
328
+ end
329
+
330
+
331
+ # Checks whether <tt>self</tt> looks like a domain and validates
332
+ # according to default {List}.
333
+ #
334
+ # @return [Boolean]
335
+ #
336
+ # @example
337
+ #
338
+ # PublicSuffix::Domain.new("com").domain?
339
+ # # => false
340
+ #
341
+ # PublicSuffix::Domain.new("com", "google").domain?
342
+ # # => true
343
+ #
344
+ # PublicSuffix::Domain.new("com", "google", "www").domain?
345
+ # # => true
346
+ #
347
+ # # This is an invalid domain
348
+ # PublicSuffix::Domain.new("zip", "google").false?
349
+ # # => true
350
+ #
351
+ # @see #domain?
352
+ # @see #valid?
353
+ #
354
+ def valid_domain?
355
+ domain? && valid?
356
+ end
357
+
358
+ # Checks whether <tt>self</tt> looks like a subdomain and validates
359
+ # according to default {List}.
360
+ #
361
+ # @return [Boolean]
362
+ #
363
+ # @example
364
+ #
365
+ # PublicSuffix::Domain.new("com").subdomain?
366
+ # # => false
367
+ #
368
+ # PublicSuffix::Domain.new("com", "google").subdomain?
369
+ # # => false
370
+ #
371
+ # PublicSuffix::Domain.new("com", "google", "www").subdomain?
372
+ # # => true
373
+ #
374
+ # # This is an invalid domain
375
+ # PublicSuffix::Domain.new("zip", "google", "www").subdomain?
376
+ # # => false
377
+ #
378
+ # @see #subdomain?
379
+ # @see #valid?
380
+ #
381
+ def valid_subdomain?
382
+ subdomain? && valid?
383
+ end
384
+
385
+ end
386
+
387
+ end
@@ -0,0 +1,57 @@
1
+ #--
2
+ # Public Suffix
3
+ #
4
+ # Domain name parser based on the Public Suffix List.
5
+ #
6
+ # Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
7
+ #++
8
+
9
+
10
+ module PublicSuffix
11
+
12
+ class Error < StandardError
13
+ end
14
+
15
+ # Raised when trying to parse an invalid domain.
16
+ # A domain is considered invalid when no rule is found
17
+ # in the definition list.
18
+ #
19
+ # @example
20
+ #
21
+ # PublicSuffix.parse("nic.test")
22
+ # # => PublicSuffix::DomainInvalid
23
+ #
24
+ # PublicSuffix.parse("http://www.nic.it")
25
+ # # => PublicSuffix::DomainInvalid
26
+ #
27
+ # @since 0.6.0
28
+ #
29
+ class DomainInvalid < Error
30
+ end
31
+
32
+ # Raised when trying to parse a domain
33
+ # which is formally defined by a rule,
34
+ # but the rules set a requirement which is not satisfied
35
+ # by the input you are trying to parse.
36
+ #
37
+ # @example
38
+ #
39
+ # PublicSuffix.parse("nic.do")
40
+ # # => PublicSuffix::DomainNotAllowed
41
+ #
42
+ # PublicSuffix.parse("www.nic.do")
43
+ # # => PublicSuffix::Domain
44
+ #
45
+ # @since 0.6.0
46
+ #
47
+ class DomainNotAllowed < DomainInvalid
48
+ end
49
+
50
+
51
+ # Backward Compatibility
52
+ #
53
+ # @deprecated Use {PublicSuffix::DomainInvalid}.
54
+ #
55
+ InvalidDomain = DomainInvalid
56
+
57
+ end
@@ -0,0 +1,283 @@
1
+ #--
2
+ # Public Suffix
3
+ #
4
+ # Domain name parser based on the Public Suffix List.
5
+ #
6
+ # Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
7
+ #++
8
+
9
+
10
+ module PublicSuffix
11
+
12
+ # A {PublicSuffix::List} is a collection of one
13
+ # or more {PublicSuffix::Rule}.
14
+ #
15
+ # Given a {PublicSuffix::List},
16
+ # you can add or remove {PublicSuffix::Rule},
17
+ # iterate all items in the list or search for the first rule
18
+ # which matches a specific domain name.
19
+ #
20
+ # # Create a new list
21
+ # list = PublicSuffix::List.new
22
+ #
23
+ # # Push two rules to the list
24
+ # list << PublicSuffix::Rule.factory("it")
25
+ # list << PublicSuffix::Rule.factory("com")
26
+ #
27
+ # # Get the size of the list
28
+ # list.size
29
+ # # => 2
30
+ #
31
+ # # Search for the rule matching given domain
32
+ # list.find("example.com")
33
+ # # => #<PublicSuffix::Rule::Normal>
34
+ # list.find("example.org")
35
+ # # => nil
36
+ #
37
+ # You can create as many {PublicSuffix::List} you want.
38
+ # The {PublicSuffix::List.default} rule list is used
39
+ # to tokenize and validate a domain.
40
+ #
41
+ # {PublicSuffix::List} implements +Enumerable+ module.
42
+ #
43
+ class List
44
+ include Enumerable
45
+
46
+ # Gets the array of rules.
47
+ #
48
+ # @return [Array<PublicSuffix::Rule::*>]
49
+ attr_reader :rules
50
+
51
+ # Gets the naive index, a hash that with the keys being the first label of
52
+ # every rule pointing to an array of integers (indexes of the rules in @rules).
53
+ #
54
+ # @return [Array]
55
+ attr_reader :indexes
56
+
57
+
58
+ # Initializes an empty {PublicSuffix::List}.
59
+ #
60
+ # @yield [self] Yields on self.
61
+ # @yieldparam [PublicSuffix::List] self The newly created instance.
62
+ #
63
+ def initialize(&block)
64
+ @rules = []
65
+ @indexes = {}
66
+ yield(self) if block_given?
67
+ create_index!
68
+ end
69
+
70
+ # Creates a naive index for +@rules+. Just a hash that will tell
71
+ # us where the elements of +@rules+ are relative to its first
72
+ # {PublicSuffix::Rule::Base#labels} element.
73
+ #
74
+ # For instance if @rules[5] and @rules[4] are the only elements of the list
75
+ # where Rule#labels.first is 'us' @indexes['us'] #=> [5,4], that way in
76
+ # select we can avoid mapping every single rule against the candidate domain.
77
+ def create_index!
78
+ @rules.map { |l| l.labels.first }.each_with_index do |elm, inx|
79
+ if !@indexes.has_key?(elm)
80
+ @indexes[elm] = [inx]
81
+ else
82
+ @indexes[elm] << inx
83
+ end
84
+ end
85
+ end
86
+
87
+ # Checks whether two lists are equal.
88
+ #
89
+ # List <tt>one</tt> is equal to <tt>two</tt>, if <tt>two</tt> is an instance of
90
+ # {PublicSuffix::List} and each +PublicSuffix::Rule::*+
91
+ # in list <tt>one</tt> is available in list <tt>two</tt>, in the same order.
92
+ #
93
+ # @param [PublicSuffix::List] other
94
+ # The List to compare.
95
+ #
96
+ # @return [Boolean]
97
+ def ==(other)
98
+ return false unless other.is_a?(List)
99
+ self.equal?(other) ||
100
+ self.rules == other.rules
101
+ end
102
+ alias :eql? :==
103
+
104
+ # Iterates each rule in the list.
105
+ def each(*args, &block)
106
+ @rules.each(*args, &block)
107
+ end
108
+
109
+ # Gets the list as array.
110
+ #
111
+ # @return [Array<PublicSuffix::Rule::*>]
112
+ def to_a
113
+ @rules
114
+ end
115
+
116
+ # Adds the given object to the list
117
+ # and optionally refreshes the rule index.
118
+ #
119
+ # @param [PublicSuffix::Rule::*] rule
120
+ # The rule to add to the list.
121
+ # @param [Boolean] index
122
+ # Set to true to recreate the rule index
123
+ # after the rule has been added to the list.
124
+ #
125
+ # @return [self]
126
+ #
127
+ # @see #create_index!
128
+ #
129
+ def add(rule, index = true)
130
+ @rules << rule
131
+ create_index! if index == true
132
+ self
133
+ end
134
+ alias << add
135
+
136
+ # Gets the number of elements in the list.
137
+ #
138
+ # @return [Integer]
139
+ def size
140
+ @rules.size
141
+ end
142
+ alias length size
143
+
144
+ # Checks whether the list is empty.
145
+ #
146
+ # @return [Boolean]
147
+ def empty?
148
+ @rules.empty?
149
+ end
150
+
151
+ # Removes all elements.
152
+ #
153
+ # @return [self]
154
+ def clear
155
+ @rules.clear
156
+ self
157
+ end
158
+
159
+
160
+ # Returns the most appropriate rule for domain.
161
+ #
162
+ # From the Public Suffix List documentation:
163
+ #
164
+ # * If a hostname matches more than one rule in the file,
165
+ # the longest matching rule (the one with the most levels) will be used.
166
+ # * An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule.
167
+ # An exception rule takes priority over any other matching rule.
168
+ #
169
+ # == Algorithm description
170
+ #
171
+ # * Match domain against all rules and take note of the matching ones.
172
+ # * If no rules match, the prevailing rule is "*".
173
+ # * If more than one rule matches, the prevailing rule is the one which is an exception rule.
174
+ # * If there is no matching exception rule, the prevailing rule is the one with the most labels.
175
+ # * If the prevailing rule is a exception rule, modify it by removing the leftmost label.
176
+ # * The public suffix is the set of labels from the domain
177
+ # which directly match the labels of the prevailing rule (joined by dots).
178
+ # * The registered domain is the public suffix plus one additional label.
179
+ #
180
+ # @param [String, #to_s] domain The domain name.
181
+ #
182
+ # @return [PublicSuffix::Rule::*, nil]
183
+ def find(domain)
184
+ rules = select(domain)
185
+ rules.select { |r| r.type == :exception }.first ||
186
+ rules.inject { |t,r| t.length > r.length ? t : r }
187
+ end
188
+
189
+ # Selects all the rules matching given domain.
190
+ #
191
+ # Will use +@indexes+ to try only the rules that share the same first label,
192
+ # that will speed up things when using +List.find('foo')+ a lot.
193
+ #
194
+ # @param [String, #to_s] domain The domain name.
195
+ #
196
+ # @return [Array<PublicSuffix::Rule::*>]
197
+ def select(domain)
198
+ indices = (@indexes[Domain.domain_to_labels(domain).first] || [])
199
+ @rules.values_at(*indices).select { |rule| rule.match?(domain) }
200
+ end
201
+
202
+
203
+ @@default = nil
204
+
205
+ class << self
206
+
207
+ # Gets the default rule list.
208
+ # Initializes a new {PublicSuffix::List} parsing the content
209
+ # of {PublicSuffix::List.default_definition}, if required.
210
+ #
211
+ # @return [PublicSuffix::List]
212
+ def default
213
+ @@default ||= parse(default_definition)
214
+ end
215
+
216
+ # Sets the default rule list to +value+.
217
+ #
218
+ # @param [PublicSuffix::List] value
219
+ # The new rule list.
220
+ #
221
+ # @return [PublicSuffix::List]
222
+ def default=(value)
223
+ @@default = value
224
+ end
225
+
226
+ # Sets the default rule list to +nil+.
227
+ #
228
+ # @return [self]
229
+ def clear
230
+ self.default = nil
231
+ self
232
+ end
233
+
234
+ # Resets the default rule list and reinitialize it
235
+ # parsing the content of {PublicSuffix::List.default_definition}.
236
+ #
237
+ # @return [PublicSuffix::List]
238
+ def reload
239
+ self.clear.default
240
+ end
241
+
242
+ # Gets the default definition list.
243
+ # Can be any <tt>IOStream</tt> including a <tt>File</tt>
244
+ # or a simple <tt>String</tt>.
245
+ # The object must respond to <tt>#each_line</tt>.
246
+ #
247
+ # @return [File]
248
+ def default_definition
249
+ File.new(File.join(File.dirname(__FILE__), "definitions.txt"), "r:utf-8")
250
+ end
251
+
252
+
253
+ # Parse given +input+ treating the content as Public Suffix List.
254
+ #
255
+ # See http://publicsuffix.org/format/ for more details about input format.
256
+ #
257
+ # @param [String] input The rule list to parse.
258
+ #
259
+ # @return [Array<PublicSuffix::Rule::*>]
260
+ def parse(input)
261
+ new do |list|
262
+ input.each_line do |line|
263
+ line.strip!
264
+
265
+ # strip blank lines
266
+ if line.empty?
267
+ next
268
+ # strip comments
269
+ elsif line =~ %r{^//}
270
+ next
271
+ # append rule
272
+ else
273
+ list.add(Rule.factory(line), false)
274
+ end
275
+ end
276
+ end
277
+ end
278
+
279
+ end
280
+
281
+ end
282
+
283
+ end