public_suffix 3.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rubocop.yml +36 -0
  4. data/.rubocop_defaults.yml +179 -0
  5. data/.ruby-gemset +1 -0
  6. data/.travis.yml +31 -0
  7. data/.yardopts +1 -0
  8. data/2.0-Upgrade.md +52 -0
  9. data/CHANGELOG.md +353 -0
  10. data/Gemfile +12 -0
  11. data/LICENSE.txt +22 -0
  12. data/README.md +202 -0
  13. data/Rakefile +51 -0
  14. data/bin/console +15 -0
  15. data/data/list.txt +12966 -0
  16. data/lib/public_suffix.rb +179 -0
  17. data/lib/public_suffix/domain.rb +235 -0
  18. data/lib/public_suffix/errors.rb +41 -0
  19. data/lib/public_suffix/list.rb +247 -0
  20. data/lib/public_suffix/rule.rb +350 -0
  21. data/lib/public_suffix/version.rb +13 -0
  22. data/public_suffix.gemspec +25 -0
  23. data/test/.empty +2 -0
  24. data/test/acceptance_test.rb +129 -0
  25. data/test/benchmarks/bm_find.rb +66 -0
  26. data/test/benchmarks/bm_find_all.rb +102 -0
  27. data/test/benchmarks/bm_names.rb +91 -0
  28. data/test/benchmarks/bm_select.rb +26 -0
  29. data/test/benchmarks/bm_select_incremental.rb +25 -0
  30. data/test/benchmarks/bm_valid.rb +101 -0
  31. data/test/profilers/domain_profiler.rb +12 -0
  32. data/test/profilers/find_profiler.rb +12 -0
  33. data/test/profilers/find_profiler_jp.rb +12 -0
  34. data/test/profilers/initialization_profiler.rb +11 -0
  35. data/test/profilers/list_profsize.rb +11 -0
  36. data/test/profilers/object_binsize.rb +57 -0
  37. data/test/psl_test.rb +52 -0
  38. data/test/test_helper.rb +18 -0
  39. data/test/tests.txt +98 -0
  40. data/test/unit/domain_test.rb +106 -0
  41. data/test/unit/errors_test.rb +25 -0
  42. data/test/unit/list_test.rb +241 -0
  43. data/test/unit/public_suffix_test.rb +188 -0
  44. data/test/unit/rule_test.rb +222 -0
  45. metadata +151 -0
@@ -0,0 +1,179 @@
1
+ # frozen_string_literal: true
2
+
3
+ # = Public Suffix
4
+ #
5
+ # Domain name parser based on the Public Suffix List.
6
+ #
7
+ # Copyright (c) 2009-2019 Simone Carletti <weppos@weppos.net>
8
+
9
+ require_relative "public_suffix/domain"
10
+ require_relative "public_suffix/version"
11
+ require_relative "public_suffix/errors"
12
+ require_relative "public_suffix/rule"
13
+ require_relative "public_suffix/list"
14
+
15
+ # PublicSuffix is a Ruby domain name parser based on the Public Suffix List.
16
+ #
17
+ # The [Public Suffix List](https://publicsuffix.org) is a cross-vendor initiative
18
+ # to provide an accurate list of domain name suffixes.
19
+ #
20
+ # The Public Suffix List is an initiative of the Mozilla Project,
21
+ # but is maintained as a community resource. It is available for use in any software,
22
+ # but was originally created to meet the needs of browser manufacturers.
23
+ module PublicSuffix
24
+
25
+ DOT = "."
26
+ BANG = "!"
27
+ STAR = "*"
28
+
29
+ # Parses +name+ and returns the {PublicSuffix::Domain} instance.
30
+ #
31
+ # @example Parse a valid domain
32
+ # PublicSuffix.parse("google.com")
33
+ # # => #<PublicSuffix::Domain:0x007fec2e51e588 @sld="google", @tld="com", @trd=nil>
34
+ #
35
+ # @example Parse a valid subdomain
36
+ # PublicSuffix.parse("www.google.com")
37
+ # # => #<PublicSuffix::Domain:0x007fec276d4cf8 @sld="google", @tld="com", @trd="www">
38
+ #
39
+ # @example Parse a fully qualified domain
40
+ # PublicSuffix.parse("google.com.")
41
+ # # => #<PublicSuffix::Domain:0x007fec257caf38 @sld="google", @tld="com", @trd=nil>
42
+ #
43
+ # @example Parse a fully qualified domain (subdomain)
44
+ # PublicSuffix.parse("www.google.com.")
45
+ # # => #<PublicSuffix::Domain:0x007fec27b6bca8 @sld="google", @tld="com", @trd="www">
46
+ #
47
+ # @example Parse an invalid (unlisted) domain
48
+ # PublicSuffix.parse("x.yz")
49
+ # # => #<PublicSuffix::Domain:0x007fec2f49bec0 @sld="x", @tld="yz", @trd=nil>
50
+ #
51
+ # @example Parse an invalid (unlisted) domain with strict checking (without applying the default * rule)
52
+ # PublicSuffix.parse("x.yz", default_rule: nil)
53
+ # # => PublicSuffix::DomainInvalid: `x.yz` is not a valid domain
54
+ #
55
+ # @example Parse an URL (not supported, only domains)
56
+ # PublicSuffix.parse("http://www.google.com")
57
+ # # => PublicSuffix::DomainInvalid: http://www.google.com is not expected to contain a scheme
58
+ #
59
+ #
60
+ # @param [String, #to_s] name The domain name or fully qualified domain name to parse.
61
+ # @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
62
+ # @param [Boolean] ignore_private
63
+ # @return [PublicSuffix::Domain]
64
+ #
65
+ # @raise [PublicSuffix::DomainInvalid]
66
+ # If domain is not a valid domain.
67
+ # @raise [PublicSuffix::DomainNotAllowed]
68
+ # If a rule for +domain+ is found, but the rule doesn't allow +domain+.
69
+ def self.parse(name, list: List.default, default_rule: list.default_rule, ignore_private: false)
70
+ what = normalize(name)
71
+ raise what if what.is_a?(DomainInvalid)
72
+
73
+ rule = list.find(what, default: default_rule, ignore_private: ignore_private)
74
+
75
+ # rubocop:disable Style/IfUnlessModifier
76
+ if rule.nil?
77
+ raise DomainInvalid, "`#{what}` is not a valid domain"
78
+ end
79
+ if rule.decompose(what).last.nil?
80
+ raise DomainNotAllowed, "`#{what}` is not allowed according to Registry policy"
81
+ end
82
+
83
+ # rubocop:enable Style/IfUnlessModifier
84
+
85
+ decompose(what, rule)
86
+ end
87
+
88
+ # Checks whether +domain+ is assigned and allowed, without actually parsing it.
89
+ #
90
+ # This method doesn't care whether domain is a domain or subdomain.
91
+ # The validation is performed using the default {PublicSuffix::List}.
92
+ #
93
+ # @example Validate a valid domain
94
+ # PublicSuffix.valid?("example.com")
95
+ # # => true
96
+ #
97
+ # @example Validate a valid subdomain
98
+ # PublicSuffix.valid?("www.example.com")
99
+ # # => true
100
+ #
101
+ # @example Validate a not-listed domain
102
+ # PublicSuffix.valid?("example.tldnotlisted")
103
+ # # => true
104
+ #
105
+ # @example Validate a not-listed domain with strict checking (without applying the default * rule)
106
+ # PublicSuffix.valid?("example.tldnotlisted")
107
+ # # => true
108
+ # PublicSuffix.valid?("example.tldnotlisted", default_rule: nil)
109
+ # # => false
110
+ #
111
+ # @example Validate a fully qualified domain
112
+ # PublicSuffix.valid?("google.com.")
113
+ # # => true
114
+ # PublicSuffix.valid?("www.google.com.")
115
+ # # => true
116
+ #
117
+ # @example Check an URL (which is not a valid domain)
118
+ # PublicSuffix.valid?("http://www.example.com")
119
+ # # => false
120
+ #
121
+ #
122
+ # @param [String, #to_s] name The domain name or fully qualified domain name to validate.
123
+ # @param [Boolean] ignore_private
124
+ # @return [Boolean]
125
+ def self.valid?(name, list: List.default, default_rule: list.default_rule, ignore_private: false)
126
+ what = normalize(name)
127
+ return false if what.is_a?(DomainInvalid)
128
+
129
+ rule = list.find(what, default: default_rule, ignore_private: ignore_private)
130
+
131
+ !rule.nil? && !rule.decompose(what).last.nil?
132
+ end
133
+
134
+ # Attempt to parse the name and returns the domain, if valid.
135
+ #
136
+ # This method doesn't raise. Instead, it returns nil if the domain is not valid for whatever reason.
137
+ #
138
+ # @param [String, #to_s] name The domain name or fully qualified domain name to parse.
139
+ # @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
140
+ # @param [Boolean] ignore_private
141
+ # @return [String]
142
+ def self.domain(name, **options)
143
+ parse(name, **options).domain
144
+ rescue PublicSuffix::Error
145
+ nil
146
+ end
147
+
148
+
149
+ # private
150
+
151
+ def self.decompose(name, rule)
152
+ left, right = rule.decompose(name)
153
+
154
+ parts = left.split(DOT)
155
+ # If we have 0 parts left, there is just a tld and no domain or subdomain
156
+ # If we have 1 part left, there is just a tld, domain and not subdomain
157
+ # If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain
158
+ tld = right
159
+ sld = parts.empty? ? nil : parts.pop
160
+ trd = parts.empty? ? nil : parts.join(DOT)
161
+
162
+ Domain.new(tld, sld, trd)
163
+ end
164
+
165
+ # Pretend we know how to deal with user input.
166
+ def self.normalize(name)
167
+ name = name.to_s.dup
168
+ name.strip!
169
+ name.chomp!(DOT)
170
+ name.downcase!
171
+
172
+ return DomainInvalid.new("Name is blank") if name.empty?
173
+ return DomainInvalid.new("Name starts with a dot") if name.start_with?(DOT)
174
+ return DomainInvalid.new("%s is not expected to contain a scheme" % name) if name.include?("://")
175
+
176
+ name
177
+ end
178
+
179
+ end
@@ -0,0 +1,235 @@
1
+ # frozen_string_literal: true
2
+
3
+ # = Public Suffix
4
+ #
5
+ # Domain name parser based on the Public Suffix List.
6
+ #
7
+ # Copyright (c) 2009-2019 Simone Carletti <weppos@weppos.net>
8
+
9
+ module PublicSuffix
10
+
11
+ # Domain represents a domain name, composed by a TLD, SLD and TRD.
12
+ class Domain
13
+
14
+ # Splits a string into the labels, that is the dot-separated parts.
15
+ #
16
+ # The input is not validated, but it is assumed to be a valid domain name.
17
+ #
18
+ # @example
19
+ #
20
+ # name_to_labels('example.com')
21
+ # # => ['example', 'com']
22
+ #
23
+ # name_to_labels('example.co.uk')
24
+ # # => ['example', 'co', 'uk']
25
+ #
26
+ # @param name [String, #to_s] The domain name to split.
27
+ # @return [Array<String>]
28
+ def self.name_to_labels(name)
29
+ name.to_s.split(DOT)
30
+ end
31
+
32
+
33
+ attr_reader :tld, :sld, :trd
34
+
35
+ # Creates and returns a new {PublicSuffix::Domain} instance.
36
+ #
37
+ # @overload initialize(tld)
38
+ # Initializes with a +tld+.
39
+ # @param [String] tld The TLD (extension)
40
+ # @overload initialize(tld, sld)
41
+ # Initializes with a +tld+ and +sld+.
42
+ # @param [String] tld The TLD (extension)
43
+ # @param [String] sld The TRD (domain)
44
+ # @overload initialize(tld, sld, trd)
45
+ # Initializes with a +tld+, +sld+ and +trd+.
46
+ # @param [String] tld The TLD (extension)
47
+ # @param [String] sld The SLD (domain)
48
+ # @param [String] trd The TRD (subdomain)
49
+ #
50
+ # @yield [self] Yields on self.
51
+ # @yieldparam [PublicSuffix::Domain] self The newly creates instance
52
+ #
53
+ # @example Initialize with a TLD
54
+ # PublicSuffix::Domain.new("com")
55
+ # # => #<PublicSuffix::Domain @tld="com">
56
+ #
57
+ # @example Initialize with a TLD and SLD
58
+ # PublicSuffix::Domain.new("com", "example")
59
+ # # => #<PublicSuffix::Domain @tld="com", @trd=nil>
60
+ #
61
+ # @example Initialize with a TLD, SLD and TRD
62
+ # PublicSuffix::Domain.new("com", "example", "wwww")
63
+ # # => #<PublicSuffix::Domain @tld="com", @trd=nil, @sld="example">
64
+ #
65
+ def initialize(*args)
66
+ @tld, @sld, @trd = args
67
+ yield(self) if block_given?
68
+ end
69
+
70
+ # Returns a string representation of this object.
71
+ #
72
+ # @return [String]
73
+ def to_s
74
+ name
75
+ end
76
+
77
+ # Returns an array containing the domain parts.
78
+ #
79
+ # @return [Array<String, nil>]
80
+ #
81
+ # @example
82
+ #
83
+ # PublicSuffix::Domain.new("google.com").to_a
84
+ # # => [nil, "google", "com"]
85
+ #
86
+ # PublicSuffix::Domain.new("www.google.com").to_a
87
+ # # => [nil, "google", "com"]
88
+ #
89
+ def to_a
90
+ [@trd, @sld, @tld]
91
+ end
92
+
93
+ # Returns the full domain name.
94
+ #
95
+ # @return [String]
96
+ #
97
+ # @example Gets the domain name of a domain
98
+ # PublicSuffix::Domain.new("com", "google").name
99
+ # # => "google.com"
100
+ #
101
+ # @example Gets the domain name of a subdomain
102
+ # PublicSuffix::Domain.new("com", "google", "www").name
103
+ # # => "www.google.com"
104
+ #
105
+ def name
106
+ [@trd, @sld, @tld].compact.join(DOT)
107
+ end
108
+
109
+ # Returns a domain-like representation of this object
110
+ # if the object is a {#domain?}, <tt>nil</tt> otherwise.
111
+ #
112
+ # PublicSuffix::Domain.new("com").domain
113
+ # # => nil
114
+ #
115
+ # PublicSuffix::Domain.new("com", "google").domain
116
+ # # => "google.com"
117
+ #
118
+ # PublicSuffix::Domain.new("com", "google", "www").domain
119
+ # # => "www.google.com"
120
+ #
121
+ # This method doesn't validate the input. It handles the domain
122
+ # as a valid domain name and simply applies the necessary transformations.
123
+ #
124
+ # This method returns a FQD, not just the domain part.
125
+ # To get the domain part, use <tt>#sld</tt> (aka second level domain).
126
+ #
127
+ # PublicSuffix::Domain.new("com", "google", "www").domain
128
+ # # => "google.com"
129
+ #
130
+ # PublicSuffix::Domain.new("com", "google", "www").sld
131
+ # # => "google"
132
+ #
133
+ # @see #domain?
134
+ # @see #subdomain
135
+ #
136
+ # @return [String]
137
+ def domain
138
+ [@sld, @tld].join(DOT) if domain?
139
+ end
140
+
141
+ # Returns a subdomain-like representation of this object
142
+ # if the object is a {#subdomain?}, <tt>nil</tt> otherwise.
143
+ #
144
+ # PublicSuffix::Domain.new("com").subdomain
145
+ # # => nil
146
+ #
147
+ # PublicSuffix::Domain.new("com", "google").subdomain
148
+ # # => nil
149
+ #
150
+ # PublicSuffix::Domain.new("com", "google", "www").subdomain
151
+ # # => "www.google.com"
152
+ #
153
+ # This method doesn't validate the input. It handles the domain
154
+ # as a valid domain name and simply applies the necessary transformations.
155
+ #
156
+ # This method returns a FQD, not just the subdomain part.
157
+ # To get the subdomain part, use <tt>#trd</tt> (aka third level domain).
158
+ #
159
+ # PublicSuffix::Domain.new("com", "google", "www").subdomain
160
+ # # => "www.google.com"
161
+ #
162
+ # PublicSuffix::Domain.new("com", "google", "www").trd
163
+ # # => "www"
164
+ #
165
+ # @see #subdomain?
166
+ # @see #domain
167
+ #
168
+ # @return [String]
169
+ def subdomain
170
+ [@trd, @sld, @tld].join(DOT) if subdomain?
171
+ end
172
+
173
+ # Checks whether <tt>self</tt> looks like a domain.
174
+ #
175
+ # This method doesn't actually validate the domain.
176
+ # It only checks whether the instance contains
177
+ # a value for the {#tld} and {#sld} attributes.
178
+ #
179
+ # @example
180
+ #
181
+ # PublicSuffix::Domain.new("com").domain?
182
+ # # => false
183
+ #
184
+ # PublicSuffix::Domain.new("com", "google").domain?
185
+ # # => true
186
+ #
187
+ # PublicSuffix::Domain.new("com", "google", "www").domain?
188
+ # # => true
189
+ #
190
+ # # This is an invalid domain, but returns true
191
+ # # because this method doesn't validate the content.
192
+ # PublicSuffix::Domain.new("com", nil).domain?
193
+ # # => true
194
+ #
195
+ # @see #subdomain?
196
+ #
197
+ # @return [Boolean]
198
+ def domain?
199
+ !(@tld.nil? || @sld.nil?)
200
+ end
201
+
202
+ # Checks whether <tt>self</tt> looks like a subdomain.
203
+ #
204
+ # This method doesn't actually validate the subdomain.
205
+ # It only checks whether the instance contains
206
+ # a value for the {#tld}, {#sld} and {#trd} attributes.
207
+ # If you also want to validate the domain,
208
+ # use {#valid_subdomain?} instead.
209
+ #
210
+ # @example
211
+ #
212
+ # PublicSuffix::Domain.new("com").subdomain?
213
+ # # => false
214
+ #
215
+ # PublicSuffix::Domain.new("com", "google").subdomain?
216
+ # # => false
217
+ #
218
+ # PublicSuffix::Domain.new("com", "google", "www").subdomain?
219
+ # # => true
220
+ #
221
+ # # This is an invalid domain, but returns true
222
+ # # because this method doesn't validate the content.
223
+ # PublicSuffix::Domain.new("com", "example", nil).subdomain?
224
+ # # => true
225
+ #
226
+ # @see #domain?
227
+ #
228
+ # @return [Boolean]
229
+ def subdomain?
230
+ !(@tld.nil? || @sld.nil? || @trd.nil?)
231
+ end
232
+
233
+ end
234
+
235
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ # = Public Suffix
4
+ #
5
+ # Domain name parser based on the Public Suffix List.
6
+ #
7
+ # Copyright (c) 2009-2019 Simone Carletti <weppos@weppos.net>
8
+
9
+ module PublicSuffix
10
+
11
+ class Error < StandardError
12
+ end
13
+
14
+ # Raised when trying to parse an invalid name.
15
+ # A name is considered invalid when no rule is found in the definition list.
16
+ #
17
+ # @example
18
+ #
19
+ # PublicSuffix.parse("nic.test")
20
+ # # => PublicSuffix::DomainInvalid
21
+ #
22
+ # PublicSuffix.parse("http://www.nic.it")
23
+ # # => PublicSuffix::DomainInvalid
24
+ #
25
+ class DomainInvalid < Error
26
+ end
27
+
28
+ # Raised when trying to parse a name that matches a suffix.
29
+ #
30
+ # @example
31
+ #
32
+ # PublicSuffix.parse("nic.do")
33
+ # # => PublicSuffix::DomainNotAllowed
34
+ #
35
+ # PublicSuffix.parse("www.nic.do")
36
+ # # => PublicSuffix::Domain
37
+ #
38
+ class DomainNotAllowed < DomainInvalid
39
+ end
40
+
41
+ end