public_suffix 1.5.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,30 @@
1
- #
2
- # Public Suffix
1
+ # = Public Suffix
3
2
  #
4
3
  # Domain name parser based on the Public Suffix List.
5
4
  #
6
- # Copyright (c) 2009-2015 Simone Carletti <weppos@weppos.net>
7
- #
5
+ # Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
8
6
 
9
- require 'public_suffix/domain'
10
- require 'public_suffix/version'
11
- require 'public_suffix/errors'
12
- require 'public_suffix/rule'
13
- require 'public_suffix/list'
7
+ require "public_suffix/domain"
8
+ require "public_suffix/version"
9
+ require "public_suffix/errors"
10
+ require "public_suffix/rule"
11
+ require "public_suffix/list"
14
12
 
13
+ # PublicSuffix is a Ruby domain name parser based on the Public Suffix List.
14
+ #
15
+ # The [Public Suffix List](https://publicsuffix.org) is a cross-vendor initiative
16
+ # to provide an accurate list of domain name suffixes.
17
+ #
18
+ # The Public Suffix List is an initiative of the Mozilla Project,
19
+ # but is maintained as a community resource. It is available for use in any software,
20
+ # but was originally created to meet the needs of browser manufacturers.
15
21
  module PublicSuffix
16
22
 
17
- # Parses +domain+ and returns the
18
- # {PublicSuffix::Domain} instance.
19
- #
20
- # @param [String, #to_s] domain
21
- # The domain name or fully qualified domain name to parse.
22
- # @param [PublicSuffix::List] list
23
- # The rule list to search, defaults to the default {PublicSuffix::List}
24
- #
25
- # @return [PublicSuffix::Domain]
23
+ DOT = ".".freeze
24
+ BANG = "!".freeze
25
+ STAR = "*".freeze
26
+
27
+ # Parses +name+ and returns the {PublicSuffix::Domain} instance.
26
28
  #
27
29
  # @example Parse a valid domain
28
30
  # PublicSuffix.parse("google.com")
@@ -48,47 +50,37 @@ module PublicSuffix
48
50
  # PublicSuffix.parse("http://www.google.com")
49
51
  # # => PublicSuffix::DomainInvalid
50
52
  #
53
+ #
54
+ # @param [String, #to_s] name The domain name or fully qualified domain name to parse.
55
+ # @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
56
+ # @param [Boolean] ignore_private
57
+ # @return [PublicSuffix::Domain]
58
+ #
51
59
  # @raise [PublicSuffix::Error]
52
60
  # If domain is not a valid domain.
53
61
  # @raise [PublicSuffix::DomainNotAllowed]
54
- # If a rule for +domain+ is found, but the rule
55
- # doesn't allow +domain+.
56
- #
57
- def self.parse(domain, list = List.default)
58
- domain = domain.to_s.downcase
59
- rule = list.find(domain)
62
+ # If a rule for +domain+ is found, but the rule doesn't allow +domain+.
63
+ def self.parse(name, list: List.default, default_rule: list.default_rule, ignore_private: false)
64
+ what = normalize(name)
65
+ raise what if what.is_a?(DomainInvalid)
66
+
67
+ rule = list.find(what, default: default_rule, ignore_private: ignore_private)
60
68
 
61
69
  if rule.nil?
62
- raise DomainInvalid, "`#{domain}' is not a valid domain"
70
+ raise DomainInvalid, "`#{what}` is not a valid domain"
63
71
  end
64
- if !rule.allow?(domain)
65
- raise DomainNotAllowed, "`#{domain}' is not allowed according to Registry policy"
72
+ if rule.decompose(what).last.nil?
73
+ raise DomainNotAllowed, "`#{what}` is not allowed according to Registry policy"
66
74
  end
67
75
 
68
- left, right = rule.decompose(domain)
69
-
70
- parts = left.split(".")
71
- # If we have 0 parts left, there is just a tld and no domain or subdomain
72
- # If we have 1 part left, there is just a tld, domain and not subdomain
73
- # If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain
74
- tld = right
75
- sld = parts.empty? ? nil : parts.pop
76
- trd = parts.empty? ? nil : parts.join(".")
77
-
78
- Domain.new(tld, sld, trd)
76
+ decompose(what, rule)
79
77
  end
80
78
 
81
- # Checks whether +domain+ is assigned and allowed,
82
- # without actually parsing it.
79
+ # Checks whether +domain+ is assigned and allowed, without actually parsing it.
83
80
  #
84
81
  # This method doesn't care whether domain is a domain or subdomain.
85
82
  # The validation is performed using the default {PublicSuffix::List}.
86
83
  #
87
- # @param [String, #to_s] domain
88
- # The domain name or fully qualified domain name to validate.
89
- #
90
- # @return [Boolean]
91
- #
92
84
  # @example Validate a valid domain
93
85
  # PublicSuffix.valid?("example.com")
94
86
  # # => true
@@ -97,9 +89,9 @@ module PublicSuffix
97
89
  # PublicSuffix.valid?("www.example.com")
98
90
  # # => true
99
91
  #
100
- # @example Validate a not-assigned domain
101
- # PublicSuffix.valid?("example.qqq")
102
- # # => false
92
+ # @example Validate a not-listed domain
93
+ # PublicSuffix.valid?("example.tldnotlisted")
94
+ # # => true
103
95
  #
104
96
  # @example Validate a not-allowed domain
105
97
  # PublicSuffix.valid?("example.do")
@@ -117,10 +109,62 @@ module PublicSuffix
117
109
  # PublicSuffix.valid?("http://www.example.com")
118
110
  # # => false
119
111
  #
120
- def self.valid?(domain)
121
- domain = domain.to_s.downcase
122
- rule = List.default.find(domain)
123
- !rule.nil? && rule.allow?(domain)
112
+ #
113
+ # @param [String, #to_s] name The domain name or fully qualified domain name to validate.
114
+ # @param [Boolean] ignore_private
115
+ # @return [Boolean]
116
+ def self.valid?(name, list: List.default, default_rule: nil, ignore_private: false)
117
+ what = normalize(name)
118
+ return false if what.is_a?(DomainInvalid)
119
+
120
+ default_rule ||= list.default_rule
121
+ rule = list.find(what, default: default_rule, ignore_private: ignore_private)
122
+
123
+ !rule.nil? && !rule.decompose(what).last.nil?
124
+ end
125
+
126
+ # Attempt to parse the name and returns the domain, if valid.
127
+ #
128
+ # This method doesn't raise. Instead, it returns nil if the domain is not valid for whatever reason.
129
+ #
130
+ # @param [String, #to_s] name The domain name or fully qualified domain name to parse.
131
+ # @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
132
+ # @param [Boolean] ignore_private
133
+ # @return [String]
134
+ def self.domain(name, **options)
135
+ parse(name, **options).domain
136
+ rescue PublicSuffix::Error
137
+ nil
138
+ end
139
+
140
+
141
+ # private
142
+
143
+ def self.decompose(name, rule)
144
+ left, right = rule.decompose(name)
145
+
146
+ parts = left.split(DOT)
147
+ # If we have 0 parts left, there is just a tld and no domain or subdomain
148
+ # If we have 1 part left, there is just a tld, domain and not subdomain
149
+ # If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain
150
+ tld = right
151
+ sld = parts.empty? ? nil : parts.pop
152
+ trd = parts.empty? ? nil : parts.join(DOT)
153
+
154
+ Domain.new(tld, sld, trd)
155
+ end
156
+
157
+ # Pretend we know how to deal with user input.
158
+ def self.normalize(name)
159
+ name = name.to_s.dup
160
+ name.strip!
161
+ name.chomp!(DOT)
162
+ name.downcase!
163
+
164
+ return DomainInvalid.new("Name is blank") if name.empty?
165
+ return DomainInvalid.new("Name starts with a dot") if name.start_with?(DOT)
166
+ return DomainInvalid.new("%s is not expected to contain a scheme" % name) if name.include?("://")
167
+ name
124
168
  end
125
169
 
126
170
  end
@@ -1,37 +1,33 @@
1
- #
2
- # Public Suffix
1
+ # = Public Suffix
3
2
  #
4
3
  # Domain name parser based on the Public Suffix List.
5
4
  #
6
- # Copyright (c) 2009-2015 Simone Carletti <weppos@weppos.net>
7
- #
5
+ # Copyright (c) 2009-2016 Simone Carletti <weppos@weppos.net>
8
6
 
9
7
  module PublicSuffix
10
8
 
9
+ # Domain represents a domain name, composed by a TLD, SLD and TRD.
11
10
  class Domain
12
11
 
13
- # Splits a string into its possible labels
14
- # as a domain in reverse order from the input string.
15
- #
16
- # The input is not validated, but it is assumed to be a valid domain.
12
+ # Splits a string into the labels, that is the dot-separated parts.
17
13
  #
18
- # @param [String, #to_s] domain
19
- # The domain name to split.
20
- #
21
- # @return [Array<String>]
14
+ # The input is not validated, but it is assumed to be a valid domain name.
22
15
  #
23
16
  # @example
24
17
  #
25
- # domain_to_labels('google.com')
26
- # # => ['com', 'google']
18
+ # name_to_labels('example.com')
19
+ # # => ['example', 'com']
27
20
  #
28
- # domain_to_labels('google.co.uk')
29
- # # => ['uk', 'co', 'google']
21
+ # name_to_labels('example.co.uk')
22
+ # # => ['example', 'co', 'uk']
30
23
  #
31
- def self.domain_to_labels(domain)
32
- domain.to_s.split(".").reverse
24
+ # @param name [String, #to_s] The domain name to split.
25
+ # @return [Array<String>]
26
+ def self.name_to_labels(name)
27
+ name.to_s.split(DOT)
33
28
  end
34
29
 
30
+
35
31
  attr_reader :tld, :sld, :trd
36
32
 
37
33
  # Creates and returns a new {PublicSuffix::Domain} instance.
@@ -64,7 +60,7 @@ module PublicSuffix
64
60
  # PublicSuffix::Domain.new("com", "example", "wwww")
65
61
  # # => #<PublicSuffix::Domain @tld="com", @trd=nil, @sld="example">
66
62
  #
67
- def initialize(*args, &block)
63
+ def initialize(*args)
68
64
  @tld, @sld, @trd = args
69
65
  yield(self) if block_given?
70
66
  end
@@ -105,7 +101,7 @@ module PublicSuffix
105
101
  # # => "www.google.com"
106
102
  #
107
103
  def name
108
- [@trd, @sld, @tld].compact.join(".")
104
+ [@trd, @sld, @tld].compact.join(DOT)
109
105
  end
110
106
 
111
107
  # Returns a domain-like representation of this object
@@ -123,10 +119,6 @@ module PublicSuffix
123
119
  # This method doesn't validate the input. It handles the domain
124
120
  # as a valid domain name and simply applies the necessary transformations.
125
121
  #
126
- # # This is an invalid domain
127
- # PublicSuffix::Domain.new("qqq", "google").domain
128
- # # => "google.qqq"
129
- #
130
122
  # This method returns a FQD, not just the domain part.
131
123
  # To get the domain part, use <tt>#sld</tt> (aka second level domain).
132
124
  #
@@ -136,18 +128,15 @@ module PublicSuffix
136
128
  # PublicSuffix::Domain.new("com", "google", "www").sld
137
129
  # # => "google"
138
130
  #
139
- # @return [String]
140
- #
141
131
  # @see #domain?
142
132
  # @see #subdomain
143
133
  #
134
+ # @return [String]
144
135
  def domain
145
- if domain?
146
- [@sld, @tld].join(".")
147
- end
136
+ [@sld, @tld].join(DOT) if domain?
148
137
  end
149
138
 
150
- # Returns a domain-like representation of this object
139
+ # Returns a subdomain-like representation of this object
151
140
  # if the object is a {#subdomain?}, <tt>nil</tt> otherwise.
152
141
  #
153
142
  # PublicSuffix::Domain.new("com").subdomain
@@ -162,11 +151,7 @@ module PublicSuffix
162
151
  # This method doesn't validate the input. It handles the domain
163
152
  # as a valid domain name and simply applies the necessary transformations.
164
153
  #
165
- # # This is an invalid domain
166
- # PublicSuffix::Domain.new("qqq", "google", "www").subdomain
167
- # # => "www.google.qqq"
168
- #
169
- # This method returns a FQD, not just the domain part.
154
+ # This method returns a FQD, not just the subdomain part.
170
155
  # To get the subdomain part, use <tt>#trd</tt> (aka third level domain).
171
156
  #
172
157
  # PublicSuffix::Domain.new("com", "google", "www").subdomain
@@ -175,25 +160,12 @@ module PublicSuffix
175
160
  # PublicSuffix::Domain.new("com", "google", "www").trd
176
161
  # # => "www"
177
162
  #
178
- # @return [String]
179
- #
180
163
  # @see #subdomain?
181
164
  # @see #domain
182
165
  #
166
+ # @return [String]
183
167
  def subdomain
184
- if subdomain?
185
- [@trd, @sld, @tld].join(".")
186
- end
187
- end
188
-
189
- # Returns the rule matching this domain
190
- # in the default {PublicSuffix::List}.
191
- #
192
- # @return [PublicSuffix::Rule::Base, nil]
193
- # The rule instance a rule matches current domain,
194
- # nil if no rule is found.
195
- def rule
196
- List.default.find(name)
168
+ [@trd, @sld, @tld].join(DOT) if subdomain?
197
169
  end
198
170
 
199
171
  # Checks whether <tt>self</tt> looks like a domain.
@@ -204,8 +176,6 @@ module PublicSuffix
204
176
  # If you also want to validate the domain,
205
177
  # use {#valid_domain?} instead.
206
178
  #
207
- # @return [Boolean]
208
- #
209
179
  # @example
210
180
  #
211
181
  # PublicSuffix::Domain.new("com").domain?
@@ -219,11 +189,12 @@ module PublicSuffix
219
189
  #
220
190
  # # This is an invalid domain, but returns true
221
191
  # # because this method doesn't validate the content.
222
- # PublicSuffix::Domain.new("qqq", "google").domain?
192
+ # PublicSuffix::Domain.new("com", nil).domain?
223
193
  # # => true
224
194
  #
225
195
  # @see #subdomain?
226
196
  #
197
+ # @return [Boolean]
227
198
  def domain?
228
199
  !(@tld.nil? || @sld.nil?)
229
200
  end
@@ -236,8 +207,6 @@ module PublicSuffix
236
207
  # If you also want to validate the domain,
237
208
  # use {#valid_subdomain?} instead.
238
209
  #
239
- # @return [Boolean]
240
- #
241
210
  # @example
242
211
  #
243
212
  # PublicSuffix::Domain.new("com").subdomain?
@@ -251,115 +220,16 @@ module PublicSuffix
251
220
  #
252
221
  # # This is an invalid domain, but returns true
253
222
  # # because this method doesn't validate the content.
254
- # PublicSuffix::Domain.new("qqq", "google", "www").subdomain?
223
+ # PublicSuffix::Domain.new("com", "example", nil).subdomain?
255
224
  # # => true
256
225
  #
257
226
  # @see #domain?
258
227
  #
228
+ # @return [Boolean]
259
229
  def subdomain?
260
230
  !(@tld.nil? || @sld.nil? || @trd.nil?)
261
231
  end
262
232
 
263
- # Checks whether <tt>self</tt> is exclusively a domain,
264
- # and not a subdomain.
265
- #
266
- # @return [Boolean]
267
- def is_a_domain?
268
- domain? && !subdomain?
269
- end
270
-
271
- # Checks whether <tt>self</tt> is exclusively a subdomain.
272
- #
273
- # @return [Boolean]
274
- def is_a_subdomain?
275
- subdomain?
276
- end
277
-
278
- # Checks whether <tt>self</tt> is assigned and allowed
279
- # according to default {List}.
280
- #
281
- # This method triggers a new rule lookup in the default {List},
282
- # which is a quite intensive task.
283
- #
284
- # @return [Boolean]
285
- #
286
- # @example Check a valid domain
287
- # Domain.new("com", "example").valid?
288
- # # => true
289
- #
290
- # @example Check a valid subdomain
291
- # Domain.new("com", "example", "www").valid?
292
- # # => true
293
- #
294
- # @example Check a not-assigned domain
295
- # Domain.new("qqq", "example").valid?
296
- # # => false
297
- #
298
- # @example Check a not-allowed domain
299
- # Domain.new("do", "example").valid?
300
- # # => false
301
- # Domain.new("do", "example", "www").valid?
302
- # # => true
303
- #
304
- def valid?
305
- r = rule
306
- !r.nil? && r.allow?(name)
307
- end
308
-
309
- # Checks whether <tt>self</tt> looks like a domain and validates
310
- # according to default {List}.
311
- #
312
- # @return [Boolean]
313
- #
314
- # @example
315
- #
316
- # PublicSuffix::Domain.new("com").domain?
317
- # # => false
318
- #
319
- # PublicSuffix::Domain.new("com", "google").domain?
320
- # # => true
321
- #
322
- # PublicSuffix::Domain.new("com", "google", "www").domain?
323
- # # => true
324
- #
325
- # # This is an invalid domain
326
- # PublicSuffix::Domain.new("qqq", "google").false?
327
- # # => true
328
- #
329
- # @see #domain?
330
- # @see #valid?
331
- #
332
- def valid_domain?
333
- domain? && valid?
334
- end
335
-
336
- # Checks whether <tt>self</tt> looks like a subdomain and validates
337
- # according to default {List}.
338
- #
339
- # @return [Boolean]
340
- #
341
- # @example
342
- #
343
- # PublicSuffix::Domain.new("com").subdomain?
344
- # # => false
345
- #
346
- # PublicSuffix::Domain.new("com", "google").subdomain?
347
- # # => false
348
- #
349
- # PublicSuffix::Domain.new("com", "google", "www").subdomain?
350
- # # => true
351
- #
352
- # # This is an invalid domain
353
- # PublicSuffix::Domain.new("qqq", "google", "www").subdomain?
354
- # # => false
355
- #
356
- # @see #subdomain?
357
- # @see #valid?
358
- #
359
- def valid_subdomain?
360
- subdomain? && valid?
361
- end
362
-
363
233
  end
364
234
 
365
235
  end