public_suffix 1.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,387 @@
1
+ #--
2
+ # Public Suffix
3
+ #
4
+ # Domain name parser based on the Public Suffix List.
5
+ #
6
+ # Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
7
+ #++
8
+
9
+
10
+ module PublicSuffix
11
+
12
+ class Domain
13
+
14
+ # Splits a string into its possible labels
15
+ # as a domain in reverse order from the input string.
16
+ #
17
+ # The input is not validated, but it is assumed to be a valid domain.
18
+ #
19
+ # @param [String, #to_s] domain
20
+ # The domain name to split.
21
+ #
22
+ # @return [Array<String>]
23
+ #
24
+ # @example
25
+ #
26
+ # domain_to_labels('google.com')
27
+ # # => ['com', 'google']
28
+ #
29
+ # domain_to_labels('google.co.uk')
30
+ # # => ['uk', 'co', 'google']
31
+ #
32
+ def self.domain_to_labels(domain)
33
+ domain.to_s.split(".").reverse
34
+ end
35
+
36
+ # Creates and returns a new {PublicSuffix::Domain} instance.
37
+ #
38
+ # @overload initialize(tld)
39
+ # Initializes with a +tld+.
40
+ # @param [String] tld The TLD (extension)
41
+ # @overload initialize(tld, sld)
42
+ # Initializes with a +tld+ and +sld+.
43
+ # @param [String] tld The TLD (extension)
44
+ # @param [String] sld The TRD (domain)
45
+ # @overload initialize(tld, sld, trd)
46
+ # Initializes with a +tld+, +sld+ and +trd+.
47
+ # @param [String] tld The TLD (extension)
48
+ # @param [String] sld The SLD (domain)
49
+ # @param [String] tld The TRD (subdomain)
50
+ #
51
+ # @yield [self] Yields on self.
52
+ # @yieldparam [PublicSuffix::Domain] self The newly creates instance
53
+ #
54
+ # @example Initialize with a TLD
55
+ # PublicSuffix::Domain.new("com")
56
+ # # => #<PublicSuffix::Domain @tld="com">
57
+ #
58
+ # @example Initialize with a TLD and SLD
59
+ # PublicSuffix::Domain.new("com", "example")
60
+ # # => #<PublicSuffix::Domain @tld="com", @trd=nil>
61
+ #
62
+ # @example Initialize with a TLD, SLD and TRD
63
+ # PublicSuffix::Domain.new("com", "example", "wwww")
64
+ # # => #<PublicSuffix::Domain @tld="com", @trd=nil, @sld="example">
65
+ #
66
+ def initialize(*args, &block)
67
+ @tld, @sld, @trd = args
68
+ yield(self) if block_given?
69
+ end
70
+
71
+ # Returns a string representation of this object.
72
+ #
73
+ # @return [String]
74
+ def to_s
75
+ name
76
+ end
77
+
78
+ # Returns an array containing the domain parts.
79
+ #
80
+ # @return [Array<String, nil>]
81
+ #
82
+ # @example
83
+ #
84
+ # PublicSuffix::Domain.new("google.com").to_a
85
+ # # => [nil, "google", "com"]
86
+ #
87
+ # PublicSuffix::Domain.new("www.google.com").to_a
88
+ # # => [nil, "google", "com"]
89
+ #
90
+ def to_a
91
+ [trd, sld, tld]
92
+ end
93
+
94
+
95
+ # Returns the Top Level Domain part, aka the extension.
96
+ #
97
+ # @return [String, nil]
98
+ def tld
99
+ @tld
100
+ end
101
+
102
+ # Returns the Second Level Domain part, aka the domain part.
103
+ #
104
+ # @return [String, nil]
105
+ def sld
106
+ @sld
107
+ end
108
+
109
+ # Returns the Third Level Domain part, aka the subdomain part.
110
+ #
111
+ # @return [String, nil]
112
+ def trd
113
+ @trd
114
+ end
115
+
116
+
117
+ # Returns the full domain name.
118
+ #
119
+ # @return [String]
120
+ #
121
+ # @example Gets the domain name of a domain
122
+ # PublicSuffix::Domain.new("com", "google").name
123
+ # # => "google.com"
124
+ #
125
+ # @example Gets the domain name of a subdomain
126
+ # PublicSuffix::Domain.new("com", "google", "www").name
127
+ # # => "www.google.com"
128
+ #
129
+ def name
130
+ [trd, sld, tld].reject { |part| part.nil? }.join(".")
131
+ end
132
+
133
+ # Returns a domain-like representation of this object
134
+ # if the object is a {#domain?}, <tt>nil</tt> otherwise.
135
+ #
136
+ # PublicSuffix::Domain.new("com").domain
137
+ # # => nil
138
+ #
139
+ # PublicSuffix::Domain.new("com", "google").domain
140
+ # # => "google.com"
141
+ #
142
+ # PublicSuffix::Domain.new("com", "google", "www").domain
143
+ # # => "www.google.com"
144
+ #
145
+ # This method doesn't validate the input. It handles the domain
146
+ # as a valid domain name and simply applies the necessary transformations.
147
+ #
148
+ # # This is an invalid domain
149
+ # PublicSuffix::Domain.new("zip", "google").domain
150
+ # # => "google.zip"
151
+ #
152
+ # This method returns a FQD, not just the domain part.
153
+ # To get the domain part, use <tt>#sld</tt> (aka second level domain).
154
+ #
155
+ # PublicSuffix::Domain.new("com", "google", "www").domain
156
+ # # => "google.com"
157
+ #
158
+ # PublicSuffix::Domain.new("com", "google", "www").sld
159
+ # # => "google"
160
+ #
161
+ # @return [String]
162
+ #
163
+ # @see #domain?
164
+ # @see #subdomain
165
+ #
166
+ def domain
167
+ return unless domain?
168
+ [sld, tld].join(".")
169
+ end
170
+
171
+ # Returns a domain-like representation of this object
172
+ # if the object is a {#subdomain?}, <tt>nil</tt> otherwise.
173
+ #
174
+ # PublicSuffix::Domain.new("com").subdomain
175
+ # # => nil
176
+ #
177
+ # PublicSuffix::Domain.new("com", "google").subdomain
178
+ # # => nil
179
+ #
180
+ # PublicSuffix::Domain.new("com", "google", "www").subdomain
181
+ # # => "www.google.com"
182
+ #
183
+ # This method doesn't validate the input. It handles the domain
184
+ # as a valid domain name and simply applies the necessary transformations.
185
+ #
186
+ # # This is an invalid domain
187
+ # PublicSuffix::Domain.new("zip", "google", "www").subdomain
188
+ # # => "www.google.zip"
189
+ #
190
+ # This method returns a FQD, not just the domain part.
191
+ # To get the domain part, use <tt>#tld</tt> (aka third level domain).
192
+ #
193
+ # PublicSuffix::Domain.new("com", "google", "www").subdomain
194
+ # # => "www.google.com"
195
+ #
196
+ # PublicSuffix::Domain.new("com", "google", "www").trd
197
+ # # => "www"
198
+ #
199
+ # @return [String]
200
+ #
201
+ # @see #subdomain?
202
+ # @see #domain
203
+ #
204
+ def subdomain
205
+ return unless subdomain?
206
+ [trd, sld, tld].join(".")
207
+ end
208
+
209
+ # Returns the rule matching this domain
210
+ # in the default {PublicSuffix::List}.
211
+ #
212
+ # @return [PublicSuffix::Rule::Base, nil]
213
+ # The rule instance a rule matches current domain,
214
+ # nil if no rule is found.
215
+ def rule
216
+ List.default.find(name)
217
+ end
218
+
219
+
220
+ # Checks whether <tt>self</tt> looks like a domain.
221
+ #
222
+ # This method doesn't actually validate the domain.
223
+ # It only checks whether the instance contains
224
+ # a value for the {#tld} and {#sld} attributes.
225
+ # If you also want to validate the domain,
226
+ # use {#valid_domain?} instead.
227
+ #
228
+ # @return [Boolean]
229
+ #
230
+ # @example
231
+ #
232
+ # PublicSuffix::Domain.new("com").domain?
233
+ # # => false
234
+ #
235
+ # PublicSuffix::Domain.new("com", "google").domain?
236
+ # # => true
237
+ #
238
+ # PublicSuffix::Domain.new("com", "google", "www").domain?
239
+ # # => true
240
+ #
241
+ # # This is an invalid domain, but returns true
242
+ # # because this method doesn't validate the content.
243
+ # PublicSuffix::Domain.new("zip", "google").domain?
244
+ # # => true
245
+ #
246
+ # @see #subdomain?
247
+ #
248
+ def domain?
249
+ !(tld.nil? || sld.nil?)
250
+ end
251
+
252
+ # Checks whether <tt>self</tt> looks like a subdomain.
253
+ #
254
+ # This method doesn't actually validate the subdomain.
255
+ # It only checks whether the instance contains
256
+ # a value for the {#tld}, {#sld} and {#trd} attributes.
257
+ # If you also want to validate the domain,
258
+ # use {#valid_subdomain?} instead.
259
+ #
260
+ # @return [Boolean]
261
+ #
262
+ # @example
263
+ #
264
+ # PublicSuffix::Domain.new("com").subdomain?
265
+ # # => false
266
+ #
267
+ # PublicSuffix::Domain.new("com", "google").subdomain?
268
+ # # => false
269
+ #
270
+ # PublicSuffix::Domain.new("com", "google", "www").subdomain?
271
+ # # => true
272
+ #
273
+ # # This is an invalid domain, but returns true
274
+ # # because this method doesn't validate the content.
275
+ # PublicSuffix::Domain.new("zip", "google", "www").subdomain?
276
+ # # => true
277
+ #
278
+ # @see #domain?
279
+ #
280
+ def subdomain?
281
+ !(tld.nil? || sld.nil? || trd.nil?)
282
+ end
283
+
284
+ # Checks whether <tt>self</tt> is exclusively a domain,
285
+ # and not a subdomain.
286
+ #
287
+ # @return [Boolean]
288
+ def is_a_domain?
289
+ domain? && !subdomain?
290
+ end
291
+
292
+ # Checks whether <tt>self</tt> is exclusively a subdomain.
293
+ #
294
+ # @return [Boolean]
295
+ def is_a_subdomain?
296
+ subdomain?
297
+ end
298
+
299
+ # Checks whether <tt>self</tt> is assigned and allowed
300
+ # according to default {List}.
301
+ #
302
+ # This method triggers a new rule lookup in the default {List},
303
+ # which is a quite intensive task.
304
+ #
305
+ # @return [Boolean]
306
+ #
307
+ # @example Check a valid domain
308
+ # Domain.new("com", "example").valid?
309
+ # # => true
310
+ #
311
+ # @example Check a valid subdomain
312
+ # Domain.new("com", "example", "www").valid?
313
+ # # => true
314
+ #
315
+ # @example Check a not-assigned domain
316
+ # Domain.new("zip", "example").valid?
317
+ # # => false
318
+ #
319
+ # @example Check a not-allowed domain
320
+ # Domain.new("do", "example").valid?
321
+ # # => false
322
+ # Domain.new("do", "example", "www").valid?
323
+ # # => true
324
+ #
325
+ def valid?
326
+ r = rule
327
+ !r.nil? && r.allow?(name)
328
+ end
329
+
330
+
331
+ # Checks whether <tt>self</tt> looks like a domain and validates
332
+ # according to default {List}.
333
+ #
334
+ # @return [Boolean]
335
+ #
336
+ # @example
337
+ #
338
+ # PublicSuffix::Domain.new("com").domain?
339
+ # # => false
340
+ #
341
+ # PublicSuffix::Domain.new("com", "google").domain?
342
+ # # => true
343
+ #
344
+ # PublicSuffix::Domain.new("com", "google", "www").domain?
345
+ # # => true
346
+ #
347
+ # # This is an invalid domain
348
+ # PublicSuffix::Domain.new("zip", "google").false?
349
+ # # => true
350
+ #
351
+ # @see #domain?
352
+ # @see #valid?
353
+ #
354
+ def valid_domain?
355
+ domain? && valid?
356
+ end
357
+
358
+ # Checks whether <tt>self</tt> looks like a subdomain and validates
359
+ # according to default {List}.
360
+ #
361
+ # @return [Boolean]
362
+ #
363
+ # @example
364
+ #
365
+ # PublicSuffix::Domain.new("com").subdomain?
366
+ # # => false
367
+ #
368
+ # PublicSuffix::Domain.new("com", "google").subdomain?
369
+ # # => false
370
+ #
371
+ # PublicSuffix::Domain.new("com", "google", "www").subdomain?
372
+ # # => true
373
+ #
374
+ # # This is an invalid domain
375
+ # PublicSuffix::Domain.new("zip", "google", "www").subdomain?
376
+ # # => false
377
+ #
378
+ # @see #subdomain?
379
+ # @see #valid?
380
+ #
381
+ def valid_subdomain?
382
+ subdomain? && valid?
383
+ end
384
+
385
+ end
386
+
387
+ end
@@ -0,0 +1,57 @@
1
+ #--
2
+ # Public Suffix
3
+ #
4
+ # Domain name parser based on the Public Suffix List.
5
+ #
6
+ # Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
7
+ #++
8
+
9
+
10
+ module PublicSuffix
11
+
12
+ class Error < StandardError
13
+ end
14
+
15
+ # Raised when trying to parse an invalid domain.
16
+ # A domain is considered invalid when no rule is found
17
+ # in the definition list.
18
+ #
19
+ # @example
20
+ #
21
+ # PublicSuffix.parse("nic.test")
22
+ # # => PublicSuffix::DomainInvalid
23
+ #
24
+ # PublicSuffix.parse("http://www.nic.it")
25
+ # # => PublicSuffix::DomainInvalid
26
+ #
27
+ # @since 0.6.0
28
+ #
29
+ class DomainInvalid < Error
30
+ end
31
+
32
+ # Raised when trying to parse a domain
33
+ # which is formally defined by a rule,
34
+ # but the rules set a requirement which is not satisfied
35
+ # by the input you are trying to parse.
36
+ #
37
+ # @example
38
+ #
39
+ # PublicSuffix.parse("nic.do")
40
+ # # => PublicSuffix::DomainNotAllowed
41
+ #
42
+ # PublicSuffix.parse("www.nic.do")
43
+ # # => PublicSuffix::Domain
44
+ #
45
+ # @since 0.6.0
46
+ #
47
+ class DomainNotAllowed < DomainInvalid
48
+ end
49
+
50
+
51
+ # Backward Compatibility
52
+ #
53
+ # @deprecated Use {PublicSuffix::DomainInvalid}.
54
+ #
55
+ InvalidDomain = DomainInvalid
56
+
57
+ end
@@ -0,0 +1,283 @@
1
+ #--
2
+ # Public Suffix
3
+ #
4
+ # Domain name parser based on the Public Suffix List.
5
+ #
6
+ # Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
7
+ #++
8
+
9
+
10
+ module PublicSuffix
11
+
12
+ # A {PublicSuffix::List} is a collection of one
13
+ # or more {PublicSuffix::Rule}.
14
+ #
15
+ # Given a {PublicSuffix::List},
16
+ # you can add or remove {PublicSuffix::Rule},
17
+ # iterate all items in the list or search for the first rule
18
+ # which matches a specific domain name.
19
+ #
20
+ # # Create a new list
21
+ # list = PublicSuffix::List.new
22
+ #
23
+ # # Push two rules to the list
24
+ # list << PublicSuffix::Rule.factory("it")
25
+ # list << PublicSuffix::Rule.factory("com")
26
+ #
27
+ # # Get the size of the list
28
+ # list.size
29
+ # # => 2
30
+ #
31
+ # # Search for the rule matching given domain
32
+ # list.find("example.com")
33
+ # # => #<PublicSuffix::Rule::Normal>
34
+ # list.find("example.org")
35
+ # # => nil
36
+ #
37
+ # You can create as many {PublicSuffix::List} you want.
38
+ # The {PublicSuffix::List.default} rule list is used
39
+ # to tokenize and validate a domain.
40
+ #
41
+ # {PublicSuffix::List} implements +Enumerable+ module.
42
+ #
43
+ class List
44
+ include Enumerable
45
+
46
+ # Gets the array of rules.
47
+ #
48
+ # @return [Array<PublicSuffix::Rule::*>]
49
+ attr_reader :rules
50
+
51
+ # Gets the naive index, a hash that with the keys being the first label of
52
+ # every rule pointing to an array of integers (indexes of the rules in @rules).
53
+ #
54
+ # @return [Array]
55
+ attr_reader :indexes
56
+
57
+
58
+ # Initializes an empty {PublicSuffix::List}.
59
+ #
60
+ # @yield [self] Yields on self.
61
+ # @yieldparam [PublicSuffix::List] self The newly created instance.
62
+ #
63
+ def initialize(&block)
64
+ @rules = []
65
+ @indexes = {}
66
+ yield(self) if block_given?
67
+ create_index!
68
+ end
69
+
70
+ # Creates a naive index for +@rules+. Just a hash that will tell
71
+ # us where the elements of +@rules+ are relative to its first
72
+ # {PublicSuffix::Rule::Base#labels} element.
73
+ #
74
+ # For instance if @rules[5] and @rules[4] are the only elements of the list
75
+ # where Rule#labels.first is 'us' @indexes['us'] #=> [5,4], that way in
76
+ # select we can avoid mapping every single rule against the candidate domain.
77
+ def create_index!
78
+ @rules.map { |l| l.labels.first }.each_with_index do |elm, inx|
79
+ if !@indexes.has_key?(elm)
80
+ @indexes[elm] = [inx]
81
+ else
82
+ @indexes[elm] << inx
83
+ end
84
+ end
85
+ end
86
+
87
+ # Checks whether two lists are equal.
88
+ #
89
+ # List <tt>one</tt> is equal to <tt>two</tt>, if <tt>two</tt> is an instance of
90
+ # {PublicSuffix::List} and each +PublicSuffix::Rule::*+
91
+ # in list <tt>one</tt> is available in list <tt>two</tt>, in the same order.
92
+ #
93
+ # @param [PublicSuffix::List] other
94
+ # The List to compare.
95
+ #
96
+ # @return [Boolean]
97
+ def ==(other)
98
+ return false unless other.is_a?(List)
99
+ self.equal?(other) ||
100
+ self.rules == other.rules
101
+ end
102
+ alias :eql? :==
103
+
104
+ # Iterates each rule in the list.
105
+ def each(*args, &block)
106
+ @rules.each(*args, &block)
107
+ end
108
+
109
+ # Gets the list as array.
110
+ #
111
+ # @return [Array<PublicSuffix::Rule::*>]
112
+ def to_a
113
+ @rules
114
+ end
115
+
116
+ # Adds the given object to the list
117
+ # and optionally refreshes the rule index.
118
+ #
119
+ # @param [PublicSuffix::Rule::*] rule
120
+ # The rule to add to the list.
121
+ # @param [Boolean] index
122
+ # Set to true to recreate the rule index
123
+ # after the rule has been added to the list.
124
+ #
125
+ # @return [self]
126
+ #
127
+ # @see #create_index!
128
+ #
129
+ def add(rule, index = true)
130
+ @rules << rule
131
+ create_index! if index == true
132
+ self
133
+ end
134
+ alias << add
135
+
136
+ # Gets the number of elements in the list.
137
+ #
138
+ # @return [Integer]
139
+ def size
140
+ @rules.size
141
+ end
142
+ alias length size
143
+
144
+ # Checks whether the list is empty.
145
+ #
146
+ # @return [Boolean]
147
+ def empty?
148
+ @rules.empty?
149
+ end
150
+
151
+ # Removes all elements.
152
+ #
153
+ # @return [self]
154
+ def clear
155
+ @rules.clear
156
+ self
157
+ end
158
+
159
+
160
+ # Returns the most appropriate rule for domain.
161
+ #
162
+ # From the Public Suffix List documentation:
163
+ #
164
+ # * If a hostname matches more than one rule in the file,
165
+ # the longest matching rule (the one with the most levels) will be used.
166
+ # * An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule.
167
+ # An exception rule takes priority over any other matching rule.
168
+ #
169
+ # == Algorithm description
170
+ #
171
+ # * Match domain against all rules and take note of the matching ones.
172
+ # * If no rules match, the prevailing rule is "*".
173
+ # * If more than one rule matches, the prevailing rule is the one which is an exception rule.
174
+ # * If there is no matching exception rule, the prevailing rule is the one with the most labels.
175
+ # * If the prevailing rule is a exception rule, modify it by removing the leftmost label.
176
+ # * The public suffix is the set of labels from the domain
177
+ # which directly match the labels of the prevailing rule (joined by dots).
178
+ # * The registered domain is the public suffix plus one additional label.
179
+ #
180
+ # @param [String, #to_s] domain The domain name.
181
+ #
182
+ # @return [PublicSuffix::Rule::*, nil]
183
+ def find(domain)
184
+ rules = select(domain)
185
+ rules.select { |r| r.type == :exception }.first ||
186
+ rules.inject { |t,r| t.length > r.length ? t : r }
187
+ end
188
+
189
+ # Selects all the rules matching given domain.
190
+ #
191
+ # Will use +@indexes+ to try only the rules that share the same first label,
192
+ # that will speed up things when using +List.find('foo')+ a lot.
193
+ #
194
+ # @param [String, #to_s] domain The domain name.
195
+ #
196
+ # @return [Array<PublicSuffix::Rule::*>]
197
+ def select(domain)
198
+ indices = (@indexes[Domain.domain_to_labels(domain).first] || [])
199
+ @rules.values_at(*indices).select { |rule| rule.match?(domain) }
200
+ end
201
+
202
+
203
+ @@default = nil
204
+
205
+ class << self
206
+
207
+ # Gets the default rule list.
208
+ # Initializes a new {PublicSuffix::List} parsing the content
209
+ # of {PublicSuffix::List.default_definition}, if required.
210
+ #
211
+ # @return [PublicSuffix::List]
212
+ def default
213
+ @@default ||= parse(default_definition)
214
+ end
215
+
216
+ # Sets the default rule list to +value+.
217
+ #
218
+ # @param [PublicSuffix::List] value
219
+ # The new rule list.
220
+ #
221
+ # @return [PublicSuffix::List]
222
+ def default=(value)
223
+ @@default = value
224
+ end
225
+
226
+ # Sets the default rule list to +nil+.
227
+ #
228
+ # @return [self]
229
+ def clear
230
+ self.default = nil
231
+ self
232
+ end
233
+
234
+ # Resets the default rule list and reinitialize it
235
+ # parsing the content of {PublicSuffix::List.default_definition}.
236
+ #
237
+ # @return [PublicSuffix::List]
238
+ def reload
239
+ self.clear.default
240
+ end
241
+
242
+ # Gets the default definition list.
243
+ # Can be any <tt>IOStream</tt> including a <tt>File</tt>
244
+ # or a simple <tt>String</tt>.
245
+ # The object must respond to <tt>#each_line</tt>.
246
+ #
247
+ # @return [File]
248
+ def default_definition
249
+ File.new(File.join(File.dirname(__FILE__), "definitions.txt"), "r:utf-8")
250
+ end
251
+
252
+
253
+ # Parse given +input+ treating the content as Public Suffix List.
254
+ #
255
+ # See http://publicsuffix.org/format/ for more details about input format.
256
+ #
257
+ # @param [String] input The rule list to parse.
258
+ #
259
+ # @return [Array<PublicSuffix::Rule::*>]
260
+ def parse(input)
261
+ new do |list|
262
+ input.each_line do |line|
263
+ line.strip!
264
+
265
+ # strip blank lines
266
+ if line.empty?
267
+ next
268
+ # strip comments
269
+ elsif line =~ %r{^//}
270
+ next
271
+ # append rule
272
+ else
273
+ list.add(Rule.factory(line), false)
274
+ end
275
+ end
276
+ end
277
+ end
278
+
279
+ end
280
+
281
+ end
282
+
283
+ end