public_suffix 3.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rubocop.yml +36 -0
- data/.rubocop_defaults.yml +179 -0
- data/.ruby-gemset +1 -0
- data/.travis.yml +31 -0
- data/.yardopts +1 -0
- data/2.0-Upgrade.md +52 -0
- data/CHANGELOG.md +353 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +202 -0
- data/Rakefile +51 -0
- data/bin/console +15 -0
- data/data/list.txt +12966 -0
- data/lib/public_suffix.rb +179 -0
- data/lib/public_suffix/domain.rb +235 -0
- data/lib/public_suffix/errors.rb +41 -0
- data/lib/public_suffix/list.rb +247 -0
- data/lib/public_suffix/rule.rb +350 -0
- data/lib/public_suffix/version.rb +13 -0
- data/public_suffix.gemspec +25 -0
- data/test/.empty +2 -0
- data/test/acceptance_test.rb +129 -0
- data/test/benchmarks/bm_find.rb +66 -0
- data/test/benchmarks/bm_find_all.rb +102 -0
- data/test/benchmarks/bm_names.rb +91 -0
- data/test/benchmarks/bm_select.rb +26 -0
- data/test/benchmarks/bm_select_incremental.rb +25 -0
- data/test/benchmarks/bm_valid.rb +101 -0
- data/test/profilers/domain_profiler.rb +12 -0
- data/test/profilers/find_profiler.rb +12 -0
- data/test/profilers/find_profiler_jp.rb +12 -0
- data/test/profilers/initialization_profiler.rb +11 -0
- data/test/profilers/list_profsize.rb +11 -0
- data/test/profilers/object_binsize.rb +57 -0
- data/test/psl_test.rb +52 -0
- data/test/test_helper.rb +18 -0
- data/test/tests.txt +98 -0
- data/test/unit/domain_test.rb +106 -0
- data/test/unit/errors_test.rb +25 -0
- data/test/unit/list_test.rb +241 -0
- data/test/unit/public_suffix_test.rb +188 -0
- data/test/unit/rule_test.rb +222 -0
- metadata +151 -0
@@ -0,0 +1,179 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# = Public Suffix
|
4
|
+
#
|
5
|
+
# Domain name parser based on the Public Suffix List.
|
6
|
+
#
|
7
|
+
# Copyright (c) 2009-2019 Simone Carletti <weppos@weppos.net>
|
8
|
+
|
9
|
+
require_relative "public_suffix/domain"
|
10
|
+
require_relative "public_suffix/version"
|
11
|
+
require_relative "public_suffix/errors"
|
12
|
+
require_relative "public_suffix/rule"
|
13
|
+
require_relative "public_suffix/list"
|
14
|
+
|
15
|
+
# PublicSuffix is a Ruby domain name parser based on the Public Suffix List.
|
16
|
+
#
|
17
|
+
# The [Public Suffix List](https://publicsuffix.org) is a cross-vendor initiative
|
18
|
+
# to provide an accurate list of domain name suffixes.
|
19
|
+
#
|
20
|
+
# The Public Suffix List is an initiative of the Mozilla Project,
|
21
|
+
# but is maintained as a community resource. It is available for use in any software,
|
22
|
+
# but was originally created to meet the needs of browser manufacturers.
|
23
|
+
module PublicSuffix
|
24
|
+
|
25
|
+
DOT = "."
|
26
|
+
BANG = "!"
|
27
|
+
STAR = "*"
|
28
|
+
|
29
|
+
# Parses +name+ and returns the {PublicSuffix::Domain} instance.
|
30
|
+
#
|
31
|
+
# @example Parse a valid domain
|
32
|
+
# PublicSuffix.parse("google.com")
|
33
|
+
# # => #<PublicSuffix::Domain:0x007fec2e51e588 @sld="google", @tld="com", @trd=nil>
|
34
|
+
#
|
35
|
+
# @example Parse a valid subdomain
|
36
|
+
# PublicSuffix.parse("www.google.com")
|
37
|
+
# # => #<PublicSuffix::Domain:0x007fec276d4cf8 @sld="google", @tld="com", @trd="www">
|
38
|
+
#
|
39
|
+
# @example Parse a fully qualified domain
|
40
|
+
# PublicSuffix.parse("google.com.")
|
41
|
+
# # => #<PublicSuffix::Domain:0x007fec257caf38 @sld="google", @tld="com", @trd=nil>
|
42
|
+
#
|
43
|
+
# @example Parse a fully qualified domain (subdomain)
|
44
|
+
# PublicSuffix.parse("www.google.com.")
|
45
|
+
# # => #<PublicSuffix::Domain:0x007fec27b6bca8 @sld="google", @tld="com", @trd="www">
|
46
|
+
#
|
47
|
+
# @example Parse an invalid (unlisted) domain
|
48
|
+
# PublicSuffix.parse("x.yz")
|
49
|
+
# # => #<PublicSuffix::Domain:0x007fec2f49bec0 @sld="x", @tld="yz", @trd=nil>
|
50
|
+
#
|
51
|
+
# @example Parse an invalid (unlisted) domain with strict checking (without applying the default * rule)
|
52
|
+
# PublicSuffix.parse("x.yz", default_rule: nil)
|
53
|
+
# # => PublicSuffix::DomainInvalid: `x.yz` is not a valid domain
|
54
|
+
#
|
55
|
+
# @example Parse an URL (not supported, only domains)
|
56
|
+
# PublicSuffix.parse("http://www.google.com")
|
57
|
+
# # => PublicSuffix::DomainInvalid: http://www.google.com is not expected to contain a scheme
|
58
|
+
#
|
59
|
+
#
|
60
|
+
# @param [String, #to_s] name The domain name or fully qualified domain name to parse.
|
61
|
+
# @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
|
62
|
+
# @param [Boolean] ignore_private
|
63
|
+
# @return [PublicSuffix::Domain]
|
64
|
+
#
|
65
|
+
# @raise [PublicSuffix::DomainInvalid]
|
66
|
+
# If domain is not a valid domain.
|
67
|
+
# @raise [PublicSuffix::DomainNotAllowed]
|
68
|
+
# If a rule for +domain+ is found, but the rule doesn't allow +domain+.
|
69
|
+
def self.parse(name, list: List.default, default_rule: list.default_rule, ignore_private: false)
|
70
|
+
what = normalize(name)
|
71
|
+
raise what if what.is_a?(DomainInvalid)
|
72
|
+
|
73
|
+
rule = list.find(what, default: default_rule, ignore_private: ignore_private)
|
74
|
+
|
75
|
+
# rubocop:disable Style/IfUnlessModifier
|
76
|
+
if rule.nil?
|
77
|
+
raise DomainInvalid, "`#{what}` is not a valid domain"
|
78
|
+
end
|
79
|
+
if rule.decompose(what).last.nil?
|
80
|
+
raise DomainNotAllowed, "`#{what}` is not allowed according to Registry policy"
|
81
|
+
end
|
82
|
+
|
83
|
+
# rubocop:enable Style/IfUnlessModifier
|
84
|
+
|
85
|
+
decompose(what, rule)
|
86
|
+
end
|
87
|
+
|
88
|
+
# Checks whether +domain+ is assigned and allowed, without actually parsing it.
|
89
|
+
#
|
90
|
+
# This method doesn't care whether domain is a domain or subdomain.
|
91
|
+
# The validation is performed using the default {PublicSuffix::List}.
|
92
|
+
#
|
93
|
+
# @example Validate a valid domain
|
94
|
+
# PublicSuffix.valid?("example.com")
|
95
|
+
# # => true
|
96
|
+
#
|
97
|
+
# @example Validate a valid subdomain
|
98
|
+
# PublicSuffix.valid?("www.example.com")
|
99
|
+
# # => true
|
100
|
+
#
|
101
|
+
# @example Validate a not-listed domain
|
102
|
+
# PublicSuffix.valid?("example.tldnotlisted")
|
103
|
+
# # => true
|
104
|
+
#
|
105
|
+
# @example Validate a not-listed domain with strict checking (without applying the default * rule)
|
106
|
+
# PublicSuffix.valid?("example.tldnotlisted")
|
107
|
+
# # => true
|
108
|
+
# PublicSuffix.valid?("example.tldnotlisted", default_rule: nil)
|
109
|
+
# # => false
|
110
|
+
#
|
111
|
+
# @example Validate a fully qualified domain
|
112
|
+
# PublicSuffix.valid?("google.com.")
|
113
|
+
# # => true
|
114
|
+
# PublicSuffix.valid?("www.google.com.")
|
115
|
+
# # => true
|
116
|
+
#
|
117
|
+
# @example Check an URL (which is not a valid domain)
|
118
|
+
# PublicSuffix.valid?("http://www.example.com")
|
119
|
+
# # => false
|
120
|
+
#
|
121
|
+
#
|
122
|
+
# @param [String, #to_s] name The domain name or fully qualified domain name to validate.
|
123
|
+
# @param [Boolean] ignore_private
|
124
|
+
# @return [Boolean]
|
125
|
+
def self.valid?(name, list: List.default, default_rule: list.default_rule, ignore_private: false)
|
126
|
+
what = normalize(name)
|
127
|
+
return false if what.is_a?(DomainInvalid)
|
128
|
+
|
129
|
+
rule = list.find(what, default: default_rule, ignore_private: ignore_private)
|
130
|
+
|
131
|
+
!rule.nil? && !rule.decompose(what).last.nil?
|
132
|
+
end
|
133
|
+
|
134
|
+
# Attempt to parse the name and returns the domain, if valid.
|
135
|
+
#
|
136
|
+
# This method doesn't raise. Instead, it returns nil if the domain is not valid for whatever reason.
|
137
|
+
#
|
138
|
+
# @param [String, #to_s] name The domain name or fully qualified domain name to parse.
|
139
|
+
# @param [PublicSuffix::List] list The rule list to search, defaults to the default {PublicSuffix::List}
|
140
|
+
# @param [Boolean] ignore_private
|
141
|
+
# @return [String]
|
142
|
+
def self.domain(name, **options)
|
143
|
+
parse(name, **options).domain
|
144
|
+
rescue PublicSuffix::Error
|
145
|
+
nil
|
146
|
+
end
|
147
|
+
|
148
|
+
|
149
|
+
# private
|
150
|
+
|
151
|
+
def self.decompose(name, rule)
|
152
|
+
left, right = rule.decompose(name)
|
153
|
+
|
154
|
+
parts = left.split(DOT)
|
155
|
+
# If we have 0 parts left, there is just a tld and no domain or subdomain
|
156
|
+
# If we have 1 part left, there is just a tld, domain and not subdomain
|
157
|
+
# If we have 2 parts left, the last part is the domain, the other parts (combined) are the subdomain
|
158
|
+
tld = right
|
159
|
+
sld = parts.empty? ? nil : parts.pop
|
160
|
+
trd = parts.empty? ? nil : parts.join(DOT)
|
161
|
+
|
162
|
+
Domain.new(tld, sld, trd)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Pretend we know how to deal with user input.
|
166
|
+
def self.normalize(name)
|
167
|
+
name = name.to_s.dup
|
168
|
+
name.strip!
|
169
|
+
name.chomp!(DOT)
|
170
|
+
name.downcase!
|
171
|
+
|
172
|
+
return DomainInvalid.new("Name is blank") if name.empty?
|
173
|
+
return DomainInvalid.new("Name starts with a dot") if name.start_with?(DOT)
|
174
|
+
return DomainInvalid.new("%s is not expected to contain a scheme" % name) if name.include?("://")
|
175
|
+
|
176
|
+
name
|
177
|
+
end
|
178
|
+
|
179
|
+
end
|
@@ -0,0 +1,235 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# = Public Suffix
|
4
|
+
#
|
5
|
+
# Domain name parser based on the Public Suffix List.
|
6
|
+
#
|
7
|
+
# Copyright (c) 2009-2019 Simone Carletti <weppos@weppos.net>
|
8
|
+
|
9
|
+
module PublicSuffix
|
10
|
+
|
11
|
+
# Domain represents a domain name, composed by a TLD, SLD and TRD.
|
12
|
+
class Domain
|
13
|
+
|
14
|
+
# Splits a string into the labels, that is the dot-separated parts.
|
15
|
+
#
|
16
|
+
# The input is not validated, but it is assumed to be a valid domain name.
|
17
|
+
#
|
18
|
+
# @example
|
19
|
+
#
|
20
|
+
# name_to_labels('example.com')
|
21
|
+
# # => ['example', 'com']
|
22
|
+
#
|
23
|
+
# name_to_labels('example.co.uk')
|
24
|
+
# # => ['example', 'co', 'uk']
|
25
|
+
#
|
26
|
+
# @param name [String, #to_s] The domain name to split.
|
27
|
+
# @return [Array<String>]
|
28
|
+
def self.name_to_labels(name)
|
29
|
+
name.to_s.split(DOT)
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
attr_reader :tld, :sld, :trd
|
34
|
+
|
35
|
+
# Creates and returns a new {PublicSuffix::Domain} instance.
|
36
|
+
#
|
37
|
+
# @overload initialize(tld)
|
38
|
+
# Initializes with a +tld+.
|
39
|
+
# @param [String] tld The TLD (extension)
|
40
|
+
# @overload initialize(tld, sld)
|
41
|
+
# Initializes with a +tld+ and +sld+.
|
42
|
+
# @param [String] tld The TLD (extension)
|
43
|
+
# @param [String] sld The TRD (domain)
|
44
|
+
# @overload initialize(tld, sld, trd)
|
45
|
+
# Initializes with a +tld+, +sld+ and +trd+.
|
46
|
+
# @param [String] tld The TLD (extension)
|
47
|
+
# @param [String] sld The SLD (domain)
|
48
|
+
# @param [String] trd The TRD (subdomain)
|
49
|
+
#
|
50
|
+
# @yield [self] Yields on self.
|
51
|
+
# @yieldparam [PublicSuffix::Domain] self The newly creates instance
|
52
|
+
#
|
53
|
+
# @example Initialize with a TLD
|
54
|
+
# PublicSuffix::Domain.new("com")
|
55
|
+
# # => #<PublicSuffix::Domain @tld="com">
|
56
|
+
#
|
57
|
+
# @example Initialize with a TLD and SLD
|
58
|
+
# PublicSuffix::Domain.new("com", "example")
|
59
|
+
# # => #<PublicSuffix::Domain @tld="com", @trd=nil>
|
60
|
+
#
|
61
|
+
# @example Initialize with a TLD, SLD and TRD
|
62
|
+
# PublicSuffix::Domain.new("com", "example", "wwww")
|
63
|
+
# # => #<PublicSuffix::Domain @tld="com", @trd=nil, @sld="example">
|
64
|
+
#
|
65
|
+
def initialize(*args)
|
66
|
+
@tld, @sld, @trd = args
|
67
|
+
yield(self) if block_given?
|
68
|
+
end
|
69
|
+
|
70
|
+
# Returns a string representation of this object.
|
71
|
+
#
|
72
|
+
# @return [String]
|
73
|
+
def to_s
|
74
|
+
name
|
75
|
+
end
|
76
|
+
|
77
|
+
# Returns an array containing the domain parts.
|
78
|
+
#
|
79
|
+
# @return [Array<String, nil>]
|
80
|
+
#
|
81
|
+
# @example
|
82
|
+
#
|
83
|
+
# PublicSuffix::Domain.new("google.com").to_a
|
84
|
+
# # => [nil, "google", "com"]
|
85
|
+
#
|
86
|
+
# PublicSuffix::Domain.new("www.google.com").to_a
|
87
|
+
# # => [nil, "google", "com"]
|
88
|
+
#
|
89
|
+
def to_a
|
90
|
+
[@trd, @sld, @tld]
|
91
|
+
end
|
92
|
+
|
93
|
+
# Returns the full domain name.
|
94
|
+
#
|
95
|
+
# @return [String]
|
96
|
+
#
|
97
|
+
# @example Gets the domain name of a domain
|
98
|
+
# PublicSuffix::Domain.new("com", "google").name
|
99
|
+
# # => "google.com"
|
100
|
+
#
|
101
|
+
# @example Gets the domain name of a subdomain
|
102
|
+
# PublicSuffix::Domain.new("com", "google", "www").name
|
103
|
+
# # => "www.google.com"
|
104
|
+
#
|
105
|
+
def name
|
106
|
+
[@trd, @sld, @tld].compact.join(DOT)
|
107
|
+
end
|
108
|
+
|
109
|
+
# Returns a domain-like representation of this object
|
110
|
+
# if the object is a {#domain?}, <tt>nil</tt> otherwise.
|
111
|
+
#
|
112
|
+
# PublicSuffix::Domain.new("com").domain
|
113
|
+
# # => nil
|
114
|
+
#
|
115
|
+
# PublicSuffix::Domain.new("com", "google").domain
|
116
|
+
# # => "google.com"
|
117
|
+
#
|
118
|
+
# PublicSuffix::Domain.new("com", "google", "www").domain
|
119
|
+
# # => "www.google.com"
|
120
|
+
#
|
121
|
+
# This method doesn't validate the input. It handles the domain
|
122
|
+
# as a valid domain name and simply applies the necessary transformations.
|
123
|
+
#
|
124
|
+
# This method returns a FQD, not just the domain part.
|
125
|
+
# To get the domain part, use <tt>#sld</tt> (aka second level domain).
|
126
|
+
#
|
127
|
+
# PublicSuffix::Domain.new("com", "google", "www").domain
|
128
|
+
# # => "google.com"
|
129
|
+
#
|
130
|
+
# PublicSuffix::Domain.new("com", "google", "www").sld
|
131
|
+
# # => "google"
|
132
|
+
#
|
133
|
+
# @see #domain?
|
134
|
+
# @see #subdomain
|
135
|
+
#
|
136
|
+
# @return [String]
|
137
|
+
def domain
|
138
|
+
[@sld, @tld].join(DOT) if domain?
|
139
|
+
end
|
140
|
+
|
141
|
+
# Returns a subdomain-like representation of this object
|
142
|
+
# if the object is a {#subdomain?}, <tt>nil</tt> otherwise.
|
143
|
+
#
|
144
|
+
# PublicSuffix::Domain.new("com").subdomain
|
145
|
+
# # => nil
|
146
|
+
#
|
147
|
+
# PublicSuffix::Domain.new("com", "google").subdomain
|
148
|
+
# # => nil
|
149
|
+
#
|
150
|
+
# PublicSuffix::Domain.new("com", "google", "www").subdomain
|
151
|
+
# # => "www.google.com"
|
152
|
+
#
|
153
|
+
# This method doesn't validate the input. It handles the domain
|
154
|
+
# as a valid domain name and simply applies the necessary transformations.
|
155
|
+
#
|
156
|
+
# This method returns a FQD, not just the subdomain part.
|
157
|
+
# To get the subdomain part, use <tt>#trd</tt> (aka third level domain).
|
158
|
+
#
|
159
|
+
# PublicSuffix::Domain.new("com", "google", "www").subdomain
|
160
|
+
# # => "www.google.com"
|
161
|
+
#
|
162
|
+
# PublicSuffix::Domain.new("com", "google", "www").trd
|
163
|
+
# # => "www"
|
164
|
+
#
|
165
|
+
# @see #subdomain?
|
166
|
+
# @see #domain
|
167
|
+
#
|
168
|
+
# @return [String]
|
169
|
+
def subdomain
|
170
|
+
[@trd, @sld, @tld].join(DOT) if subdomain?
|
171
|
+
end
|
172
|
+
|
173
|
+
# Checks whether <tt>self</tt> looks like a domain.
|
174
|
+
#
|
175
|
+
# This method doesn't actually validate the domain.
|
176
|
+
# It only checks whether the instance contains
|
177
|
+
# a value for the {#tld} and {#sld} attributes.
|
178
|
+
#
|
179
|
+
# @example
|
180
|
+
#
|
181
|
+
# PublicSuffix::Domain.new("com").domain?
|
182
|
+
# # => false
|
183
|
+
#
|
184
|
+
# PublicSuffix::Domain.new("com", "google").domain?
|
185
|
+
# # => true
|
186
|
+
#
|
187
|
+
# PublicSuffix::Domain.new("com", "google", "www").domain?
|
188
|
+
# # => true
|
189
|
+
#
|
190
|
+
# # This is an invalid domain, but returns true
|
191
|
+
# # because this method doesn't validate the content.
|
192
|
+
# PublicSuffix::Domain.new("com", nil).domain?
|
193
|
+
# # => true
|
194
|
+
#
|
195
|
+
# @see #subdomain?
|
196
|
+
#
|
197
|
+
# @return [Boolean]
|
198
|
+
def domain?
|
199
|
+
!(@tld.nil? || @sld.nil?)
|
200
|
+
end
|
201
|
+
|
202
|
+
# Checks whether <tt>self</tt> looks like a subdomain.
|
203
|
+
#
|
204
|
+
# This method doesn't actually validate the subdomain.
|
205
|
+
# It only checks whether the instance contains
|
206
|
+
# a value for the {#tld}, {#sld} and {#trd} attributes.
|
207
|
+
# If you also want to validate the domain,
|
208
|
+
# use {#valid_subdomain?} instead.
|
209
|
+
#
|
210
|
+
# @example
|
211
|
+
#
|
212
|
+
# PublicSuffix::Domain.new("com").subdomain?
|
213
|
+
# # => false
|
214
|
+
#
|
215
|
+
# PublicSuffix::Domain.new("com", "google").subdomain?
|
216
|
+
# # => false
|
217
|
+
#
|
218
|
+
# PublicSuffix::Domain.new("com", "google", "www").subdomain?
|
219
|
+
# # => true
|
220
|
+
#
|
221
|
+
# # This is an invalid domain, but returns true
|
222
|
+
# # because this method doesn't validate the content.
|
223
|
+
# PublicSuffix::Domain.new("com", "example", nil).subdomain?
|
224
|
+
# # => true
|
225
|
+
#
|
226
|
+
# @see #domain?
|
227
|
+
#
|
228
|
+
# @return [Boolean]
|
229
|
+
def subdomain?
|
230
|
+
!(@tld.nil? || @sld.nil? || @trd.nil?)
|
231
|
+
end
|
232
|
+
|
233
|
+
end
|
234
|
+
|
235
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# = Public Suffix
|
4
|
+
#
|
5
|
+
# Domain name parser based on the Public Suffix List.
|
6
|
+
#
|
7
|
+
# Copyright (c) 2009-2019 Simone Carletti <weppos@weppos.net>
|
8
|
+
|
9
|
+
module PublicSuffix
|
10
|
+
|
11
|
+
class Error < StandardError
|
12
|
+
end
|
13
|
+
|
14
|
+
# Raised when trying to parse an invalid name.
|
15
|
+
# A name is considered invalid when no rule is found in the definition list.
|
16
|
+
#
|
17
|
+
# @example
|
18
|
+
#
|
19
|
+
# PublicSuffix.parse("nic.test")
|
20
|
+
# # => PublicSuffix::DomainInvalid
|
21
|
+
#
|
22
|
+
# PublicSuffix.parse("http://www.nic.it")
|
23
|
+
# # => PublicSuffix::DomainInvalid
|
24
|
+
#
|
25
|
+
class DomainInvalid < Error
|
26
|
+
end
|
27
|
+
|
28
|
+
# Raised when trying to parse a name that matches a suffix.
|
29
|
+
#
|
30
|
+
# @example
|
31
|
+
#
|
32
|
+
# PublicSuffix.parse("nic.do")
|
33
|
+
# # => PublicSuffix::DomainNotAllowed
|
34
|
+
#
|
35
|
+
# PublicSuffix.parse("www.nic.do")
|
36
|
+
# # => PublicSuffix::Domain
|
37
|
+
#
|
38
|
+
class DomainNotAllowed < DomainInvalid
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|