public_suffix 1.5.3 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,28 +1,30 @@
1
- #
2
- # Public Suffix
1
+ # = Public Suffix
3
2
  #
4
3
  # Domain name parser based on the Public Suffix List.
5
4
  #
6
- # Copyright (c) 2009-2015 Simone Carletti <weppos@weppos.net>
7
- #
5
+ # Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
8
6
 
9
- require 'public_suffix/domain'
10
- require 'public_suffix/version'
11
- require 'public_suffix/errors'
12
- require 'public_suffix/rule'
13
- require 'public_suffix/list'
7
+ require "public_suffix/domain"
8
+ require "public_suffix/version"
9
+ require "public_suffix/errors"
10
+ require "public_suffix/rule"
11
+ require "public_suffix/list"
14
12
 
13
+ # PublicSuffix is a Ruby domain name parser based on the Public Suffix List.
14
+ #
15
+ # The [Public Suffix List](https://publicsuffix.org) is a cross-vendor initiative
16
+ # to provide an accurate list of domain name suffixes.
17
+ #
18
+ # The Public Suffix List is an initiative of the Mozilla Project,
19
+ # but is maintained as a community resource. It is available for use in any software,
20
+ # but was originally created to meet the needs of browser manufacturers.
15
21
  module PublicSuffix
16
22
 
17
- # Parses +domain+ and returns the
18
- # {PublicSuffix::Domain} instance.
19
- #
20
- # @param [String, #to_s] domain
21
- # The domain name or fully qualified domain name to parse.
22
- # @param [PublicSuffix::List] list
23
- # The rule list to search, defaults to the default {PublicSuffix::List}
24
- #
25
- # @return [PublicSuffix::Domain]
23
+ DOT = ".".freeze
24
+ BANG = "!".freeze
25
+ STAR = "*".freeze
26
+
27
+ # Parses +name+ and returns the {PublicSuffix::Domain} instance.
26
28
  #
27
29
  # @example Parse a valid domain
28
30
  # PublicSuffix.parse("google.com")
@@ -48,47 +50,37 @@ module PublicSuffix
48
50
  # PublicSuffix.parse("http://www.google.com")
49
51
  # # => PublicSuffix::DomainInvalid
50
52
  #
53
+ #
54
+ # @param [String, #to_s] name The domain name or fully qualified domain name to parse.
55
+ # @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
56
+ # @param [Boolean] ignore_private
57
+ # @return [PublicSuffix::Domain]
58
+ #
51
59
  # @raise [PublicSuffix::Error]
52
60
  # If domain is not a valid domain.
53
61
  # @raise [PublicSuffix::DomainNotAllowed]
54
- # If a rule for +domain+ is found, but the rule
55
- # doesn't allow +domain+.
56
- #
57
- def self.parse(domain, list = List.default)
58
- domain = domain.to_s.downcase
59
- rule = list.find(domain)
62
+ # If a rule for +domain+ is found, but the rule doesn't allow +domain+.
63
+ def self.parse(name, list: List.default, default_rule: list.default_rule, ignore_private: false)
64
+ what = normalize(name)
65
+ raise what if what.is_a?(DomainInvalid)
66
+
67
+ rule = list.find(what, default: default_rule, ignore_private: ignore_private)
60
68
 
61
69
  if rule.nil?
62
- raise DomainInvalid, "`#{domain}' is not a valid domain"
70
+ raise DomainInvalid, "`#{what}` is not a valid domain"
63
71
  end
64
- if !rule.allow?(domain)
65
- raise DomainNotAllowed, "`#{domain}' is not allowed according to Registry policy"
72
+ if rule.decompose(what).last.nil?
73
+ raise DomainNotAllowed, "`#{what}` is not allowed according to Registry policy"
66
74
  end
67
75
 
68
- left, right = rule.decompose(domain)
69
-
70
- parts = left.split(".")
71
- # If we have 0 parts left, there is just a tld and no domain or subdomain
72
- # If we have 1 part left, there is just a tld, domain and not subdomain
73
- # If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain
74
- tld = right
75
- sld = parts.empty? ? nil : parts.pop
76
- trd = parts.empty? ? nil : parts.join(".")
77
-
78
- Domain.new(tld, sld, trd)
76
+ decompose(what, rule)
79
77
  end
80
78
 
81
- # Checks whether +domain+ is assigned and allowed,
82
- # without actually parsing it.
79
+ # Checks whether +domain+ is assigned and allowed, without actually parsing it.
83
80
  #
84
81
  # This method doesn't care whether domain is a domain or subdomain.
85
82
  # The validation is performed using the default {PublicSuffix::List}.
86
83
  #
87
- # @param [String, #to_s] domain
88
- # The domain name or fully qualified domain name to validate.
89
- #
90
- # @return [Boolean]
91
- #
92
84
  # @example Validate a valid domain
93
85
  # PublicSuffix.valid?("example.com")
94
86
  # # => true
@@ -97,9 +89,9 @@ module PublicSuffix
97
89
  # PublicSuffix.valid?("www.example.com")
98
90
  # # => true
99
91
  #
100
- # @example Validate a not-assigned domain
101
- # PublicSuffix.valid?("example.qqq")
102
- # # => false
92
+ # @example Validate a not-listed domain
93
+ # PublicSuffix.valid?("example.tldnotlisted")
94
+ # # => true
103
95
  #
104
96
  # @example Validate a not-allowed domain
105
97
  # PublicSuffix.valid?("example.do")
@@ -117,10 +109,62 @@ module PublicSuffix
117
109
  # PublicSuffix.valid?("http://www.example.com")
118
110
  # # => false
119
111
  #
120
- def self.valid?(domain)
121
- domain = domain.to_s.downcase
122
- rule = List.default.find(domain)
123
- !rule.nil? && rule.allow?(domain)
112
+ #
113
+ # @param [String, #to_s] name The domain name or fully qualified domain name to validate.
114
+ # @param [Boolean] ignore_private
115
+ # @return [Boolean]
116
+ def self.valid?(name, list: List.default, default_rule: nil, ignore_private: false)
117
+ what = normalize(name)
118
+ return false if what.is_a?(DomainInvalid)
119
+
120
+ default_rule ||= list.default_rule
121
+ rule = list.find(what, default: default_rule, ignore_private: ignore_private)
122
+
123
+ !rule.nil? && !rule.decompose(what).last.nil?
124
+ end
125
+
126
+ # Attempt to parse the name and returns the domain, if valid.
127
+ #
128
+ # This method doesn't raise. Instead, it returns nil if the domain is not valid for whatever reason.
129
+ #
130
+ # @param [String, #to_s] name The domain name or fully qualified domain name to parse.
131
+ # @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
132
+ # @param [Boolean] ignore_private
133
+ # @return [String]
134
+ def self.domain(name, **options)
135
+ parse(name, **options).domain
136
+ rescue PublicSuffix::Error
137
+ nil
138
+ end
139
+
140
+
141
+ # private
142
+
143
+ def self.decompose(name, rule)
144
+ left, right = rule.decompose(name)
145
+
146
+ parts = left.split(DOT)
147
+ # If we have 0 parts left, there is just a tld and no domain or subdomain
148
+ # If we have 1 part left, there is just a tld, domain and not subdomain
149
+ # If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain
150
+ tld = right
151
+ sld = parts.empty? ? nil : parts.pop
152
+ trd = parts.empty? ? nil : parts.join(DOT)
153
+
154
+ Domain.new(tld, sld, trd)
155
+ end
156
+
157
+ # Pretend we know how to deal with user input.
158
+ def self.normalize(name)
159
+ name = name.to_s.dup
160
+ name.strip!
161
+ name.chomp!(DOT)
162
+ name.downcase!
163
+
164
+ return DomainInvalid.new("Name is blank") if name.empty?
165
+ return DomainInvalid.new("Name starts with a dot") if name.start_with?(DOT)
166
+ return DomainInvalid.new("%s is not expected to contain a scheme" % name) if name.include?("://")
167
+ name
124
168
  end
125
169
 
126
170
  end
@@ -1,37 +1,33 @@
1
- #
2
- # Public Suffix
1
+ # = Public Suffix
3
2
  #
4
3
  # Domain name parser based on the Public Suffix List.
5
4
  #
6
- # Copyright (c) 2009-2015 Simone Carletti <weppos@weppos.net>
7
- #
5
+ # Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
8
6
 
9
7
  module PublicSuffix
10
8
 
9
+ # Domain represents a domain name, composed by a TLD, SLD and TRD.
11
10
  class Domain
12
11
 
13
- # Splits a string into its possible labels
14
- # as a domain in reverse order from the input string.
15
- #
16
- # The input is not validated, but it is assumed to be a valid domain.
12
+ # Splits a string into the labels, that is the dot-separated parts.
17
13
  #
18
- # @param [String, #to_s] domain
19
- # The domain name to split.
20
- #
21
- # @return [Array<String>]
14
+ # The input is not validated, but it is assumed to be a valid domain name.
22
15
  #
23
16
  # @example
24
17
  #
25
- # domain_to_labels('google.com')
26
- # # => ['com', 'google']
18
+ # name_to_labels('example.com')
19
+ # # => ['example', 'com']
27
20
  #
28
- # domain_to_labels('google.co.uk')
29
- # # => ['uk', 'co', 'google']
21
+ # name_to_labels('example.co.uk')
22
+ # # => ['example', 'co', 'uk']
30
23
  #
31
- def self.domain_to_labels(domain)
32
- domain.to_s.split(".").reverse
24
+ # @param name [String, #to_s] The domain name to split.
25
+ # @return [Array<String>]
26
+ def self.name_to_labels(name)
27
+ name.to_s.split(DOT)
33
28
  end
34
29
 
30
+
35
31
  attr_reader :tld, :sld, :trd
36
32
 
37
33
  # Creates and returns a new {PublicSuffix::Domain} instance.
@@ -64,7 +60,7 @@ module PublicSuffix
64
60
  # PublicSuffix::Domain.new("com", "example", "wwww")
65
61
  # # => #<PublicSuffix::Domain @tld="com", @trd=nil, @sld="example">
66
62
  #
67
- def initialize(*args, &block)
63
+ def initialize(*args)
68
64
  @tld, @sld, @trd = args
69
65
  yield(self) if block_given?
70
66
  end
@@ -105,7 +101,7 @@ module PublicSuffix
105
101
  # # => "www.google.com"
106
102
  #
107
103
  def name
108
- [@trd, @sld, @tld].compact.join(".")
104
+ [@trd, @sld, @tld].compact.join(DOT)
109
105
  end
110
106
 
111
107
  # Returns a domain-like representation of this object
@@ -123,10 +119,6 @@ module PublicSuffix
123
119
  # This method doesn't validate the input. It handles the domain
124
120
  # as a valid domain name and simply applies the necessary transformations.
125
121
  #
126
- # # This is an invalid domain
127
- # PublicSuffix::Domain.new("qqq", "google").domain
128
- # # => "google.qqq"
129
- #
130
122
  # This method returns a FQD, not just the domain part.
131
123
  # To get the domain part, use <tt>#sld</tt> (aka second level domain).
132
124
  #
@@ -136,18 +128,15 @@ module PublicSuffix
136
128
  # PublicSuffix::Domain.new("com", "google", "www").sld
137
129
  # # => "google"
138
130
  #
139
- # @return [String]
140
- #
141
131
  # @see #domain?
142
132
  # @see #subdomain
143
133
  #
134
+ # @return [String]
144
135
  def domain
145
- if domain?
146
- [@sld, @tld].join(".")
147
- end
136
+ [@sld, @tld].join(DOT) if domain?
148
137
  end
149
138
 
150
- # Returns a domain-like representation of this object
139
+ # Returns a subdomain-like representation of this object
151
140
  # if the object is a {#subdomain?}, <tt>nil</tt> otherwise.
152
141
  #
153
142
  # PublicSuffix::Domain.new("com").subdomain
@@ -162,11 +151,7 @@ module PublicSuffix
162
151
  # This method doesn't validate the input. It handles the domain
163
152
  # as a valid domain name and simply applies the necessary transformations.
164
153
  #
165
- # # This is an invalid domain
166
- # PublicSuffix::Domain.new("qqq", "google", "www").subdomain
167
- # # => "www.google.qqq"
168
- #
169
- # This method returns a FQD, not just the domain part.
154
+ # This method returns a FQD, not just the subdomain part.
170
155
  # To get the subdomain part, use <tt>#trd</tt> (aka third level domain).
171
156
  #
172
157
  # PublicSuffix::Domain.new("com", "google", "www").subdomain
@@ -175,25 +160,12 @@ module PublicSuffix
175
160
  # PublicSuffix::Domain.new("com", "google", "www").trd
176
161
  # # => "www"
177
162
  #
178
- # @return [String]
179
- #
180
163
  # @see #subdomain?
181
164
  # @see #domain
182
165
  #
166
+ # @return [String]
183
167
  def subdomain
184
- if subdomain?
185
- [@trd, @sld, @tld].join(".")
186
- end
187
- end
188
-
189
- # Returns the rule matching this domain
190
- # in the default {PublicSuffix::List}.
191
- #
192
- # @return [PublicSuffix::Rule::Base, nil]
193
- # The rule instance a rule matches current domain,
194
- # nil if no rule is found.
195
- def rule
196
- List.default.find(name)
168
+ [@trd, @sld, @tld].join(DOT) if subdomain?
197
169
  end
198
170
 
199
171
  # Checks whether <tt>self</tt> looks like a domain.
@@ -204,8 +176,6 @@ module PublicSuffix
204
176
  # If you also want to validate the domain,
205
177
  # use {#valid_domain?} instead.
206
178
  #
207
- # @return [Boolean]
208
- #
209
179
  # @example
210
180
  #
211
181
  # PublicSuffix::Domain.new("com").domain?
@@ -219,11 +189,12 @@ module PublicSuffix
219
189
  #
220
190
  # # This is an invalid domain, but returns true
221
191
  # # because this method doesn't validate the content.
222
- # PublicSuffix::Domain.new("qqq", "google").domain?
192
+ # PublicSuffix::Domain.new("com", nil).domain?
223
193
  # # => true
224
194
  #
225
195
  # @see #subdomain?
226
196
  #
197
+ # @return [Boolean]
227
198
  def domain?
228
199
  !(@tld.nil? || @sld.nil?)
229
200
  end
@@ -236,8 +207,6 @@ module PublicSuffix
236
207
  # If you also want to validate the domain,
237
208
  # use {#valid_subdomain?} instead.
238
209
  #
239
- # @return [Boolean]
240
- #
241
210
  # @example
242
211
  #
243
212
  # PublicSuffix::Domain.new("com").subdomain?
@@ -251,115 +220,16 @@ module PublicSuffix
251
220
  #
252
221
  # # This is an invalid domain, but returns true
253
222
  # # because this method doesn't validate the content.
254
- # PublicSuffix::Domain.new("qqq", "google", "www").subdomain?
223
+ # PublicSuffix::Domain.new("com", "example", nil).subdomain?
255
224
  # # => true
256
225
  #
257
226
  # @see #domain?
258
227
  #
228
+ # @return [Boolean]
259
229
  def subdomain?
260
230
  !(@tld.nil? || @sld.nil? || @trd.nil?)
261
231
  end
262
232
 
263
- # Checks whether <tt>self</tt> is exclusively a domain,
264
- # and not a subdomain.
265
- #
266
- # @return [Boolean]
267
- def is_a_domain?
268
- domain? && !subdomain?
269
- end
270
-
271
- # Checks whether <tt>self</tt> is exclusively a subdomain.
272
- #
273
- # @return [Boolean]
274
- def is_a_subdomain?
275
- subdomain?
276
- end
277
-
278
- # Checks whether <tt>self</tt> is assigned and allowed
279
- # according to default {List}.
280
- #
281
- # This method triggers a new rule lookup in the default {List},
282
- # which is a quite intensive task.
283
- #
284
- # @return [Boolean]
285
- #
286
- # @example Check a valid domain
287
- # Domain.new("com", "example").valid?
288
- # # => true
289
- #
290
- # @example Check a valid subdomain
291
- # Domain.new("com", "example", "www").valid?
292
- # # => true
293
- #
294
- # @example Check a not-assigned domain
295
- # Domain.new("qqq", "example").valid?
296
- # # => false
297
- #
298
- # @example Check a not-allowed domain
299
- # Domain.new("do", "example").valid?
300
- # # => false
301
- # Domain.new("do", "example", "www").valid?
302
- # # => true
303
- #
304
- def valid?
305
- r = rule
306
- !r.nil? && r.allow?(name)
307
- end
308
-
309
- # Checks whether <tt>self</tt> looks like a domain and validates
310
- # according to default {List}.
311
- #
312
- # @return [Boolean]
313
- #
314
- # @example
315
- #
316
- # PublicSuffix::Domain.new("com").domain?
317
- # # => false
318
- #
319
- # PublicSuffix::Domain.new("com", "google").domain?
320
- # # => true
321
- #
322
- # PublicSuffix::Domain.new("com", "google", "www").domain?
323
- # # => true
324
- #
325
- # # This is an invalid domain
326
- # PublicSuffix::Domain.new("qqq", "google").false?
327
- # # => true
328
- #
329
- # @see #domain?
330
- # @see #valid?
331
- #
332
- def valid_domain?
333
- domain? && valid?
334
- end
335
-
336
- # Checks whether <tt>self</tt> looks like a subdomain and validates
337
- # according to default {List}.
338
- #
339
- # @return [Boolean]
340
- #
341
- # @example
342
- #
343
- # PublicSuffix::Domain.new("com").subdomain?
344
- # # => false
345
- #
346
- # PublicSuffix::Domain.new("com", "google").subdomain?
347
- # # => false
348
- #
349
- # PublicSuffix::Domain.new("com", "google", "www").subdomain?
350
- # # => true
351
- #
352
- # # This is an invalid domain
353
- # PublicSuffix::Domain.new("qqq", "google", "www").subdomain?
354
- # # => false
355
- #
356
- # @see #subdomain?
357
- # @see #valid?
358
- #
359
- def valid_subdomain?
360
- subdomain? && valid?
361
- end
362
-
363
233
  end
364
234
 
365
235
  end