public_suffix 1.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gemtest +0 -0
- data/.gitignore +4 -0
- data/.travis.yml +11 -0
- data/.yardopts +2 -0
- data/CHANGELOG.md +134 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +22 -0
- data/LICENSE +22 -0
- data/README.md +151 -0
- data/Rakefile +109 -0
- data/lib/public_suffix.rb +134 -0
- data/lib/public_suffix/definitions.txt +5190 -0
- data/lib/public_suffix/domain.rb +387 -0
- data/lib/public_suffix/errors.rb +57 -0
- data/lib/public_suffix/list.rb +283 -0
- data/lib/public_suffix/rule.rb +373 -0
- data/lib/public_suffix/rule_list.rb +14 -0
- data/lib/public_suffix/version.rb +23 -0
- data/public_suffix.gemspec +37 -0
- data/test/acceptance_test.rb +36 -0
- data/test/test_helper.rb +6 -0
- data/test/unit/domain_test.rb +170 -0
- data/test/unit/errors_test.rb +23 -0
- data/test/unit/list_test.rb +193 -0
- data/test/unit/public_suffix_test.rb +85 -0
- data/test/unit/rule_test.rb +307 -0
- metadata +111 -0
@@ -0,0 +1,373 @@
|
|
1
|
+
#--
|
2
|
+
# Public Suffix
|
3
|
+
#
|
4
|
+
# Domain name parser based on the Public Suffix List.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
|
7
|
+
#++
|
8
|
+
|
9
|
+
|
10
|
+
module PublicSuffix
|
11
|
+
|
12
|
+
# A Rule is a special object which holds a single definition
|
13
|
+
# of the Public Suffix List.
|
14
|
+
#
|
15
|
+
# There are 3 types of ruleas, each one represented by a specific
|
16
|
+
# subclass within the +PublicSuffix::Rule+ namespace.
|
17
|
+
#
|
18
|
+
# To create a new Rule, use the {PublicSuffix::Rule#factory} method.
|
19
|
+
#
|
20
|
+
# PublicSuffix::Rule.factory("ar")
|
21
|
+
# # => #<PublicSuffix::Rule::Normal>
|
22
|
+
#
|
23
|
+
class Rule
|
24
|
+
|
25
|
+
# Takes the +name+ of the rule, detects the specific rule class
|
26
|
+
# and creates a new instance of that class.
|
27
|
+
# The +name+ becomes the rule +value+.
|
28
|
+
#
|
29
|
+
# @param [String] name The rule definition.
|
30
|
+
#
|
31
|
+
# @return [PublicSuffix::Rule::*] A rule instance.
|
32
|
+
#
|
33
|
+
# @example Creates a Normal rule
|
34
|
+
# PublicSuffix::Rule.factory("ar")
|
35
|
+
# # => #<PublicSuffix::Rule::Normal>
|
36
|
+
#
|
37
|
+
# @example Creates a Wildcard rule
|
38
|
+
# PublicSuffix::Rule.factory("*.ar")
|
39
|
+
# # => #<PublicSuffix::Rule::Wildcard>
|
40
|
+
#
|
41
|
+
# @example Creates an Exception rule
|
42
|
+
# PublicSuffix::Rule.factory("!congresodelalengua3.ar")
|
43
|
+
# # => #<PublicSuffix::Rule::Exception>
|
44
|
+
#
|
45
|
+
def self.factory(name)
|
46
|
+
klass = case name.to_s[0..0]
|
47
|
+
when "*" then "wildcard"
|
48
|
+
when "!" then "exception"
|
49
|
+
else "normal"
|
50
|
+
end
|
51
|
+
const_get(klass.capitalize).new(name)
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
#
|
56
|
+
# = Abstract rule class
|
57
|
+
#
|
58
|
+
# This represent the base class for a Rule definition
|
59
|
+
# in the {Public Suffix List}[http://publicsuffix.org].
|
60
|
+
#
|
61
|
+
# This is intended to be an Abstract class
|
62
|
+
# and you shouldn't create a direct instance. The only purpose
|
63
|
+
# of this class is to expose a common interface
|
64
|
+
# for all the available subclasses.
|
65
|
+
#
|
66
|
+
# * {PublicSuffix::Rule::Normal}
|
67
|
+
# * {PublicSuffix::Rule::Exception}
|
68
|
+
# * {PublicSuffix::Rule::Wildcard}
|
69
|
+
#
|
70
|
+
# == Properties
|
71
|
+
#
|
72
|
+
# A rule is composed by 4 properties:
|
73
|
+
#
|
74
|
+
# name - The name of the rule, corresponding to the rule definition
|
75
|
+
# in the public suffix list
|
76
|
+
# value - The value, a normalized version of the rule name.
|
77
|
+
# The normalization process depends on rule tpe.
|
78
|
+
# type - The rule type (:normal, :wildcard, :exception)
|
79
|
+
# labels - The canonicalized rule name
|
80
|
+
#
|
81
|
+
# Here's an example
|
82
|
+
#
|
83
|
+
# PublicSuffix::Rule.factory("*.google.com")
|
84
|
+
# #<PublicSuffix::Rule::Wildcard:0x1015c14b0
|
85
|
+
# @labels=["com", "google"],
|
86
|
+
# @name="*.google.com",
|
87
|
+
# @type=:wildcard,
|
88
|
+
# @value="google.com"
|
89
|
+
# >
|
90
|
+
#
|
91
|
+
# == Rule Creation
|
92
|
+
#
|
93
|
+
# The best way to create a new rule is passing the rule name
|
94
|
+
# to the <tt>PublicSuffix::Rule.factory</tt> method.
|
95
|
+
#
|
96
|
+
# PublicSuffix::Rule.factory("com")
|
97
|
+
# # => PublicSuffix::Rule::Normal
|
98
|
+
#
|
99
|
+
# PublicSuffix::Rule.factory("*.com")
|
100
|
+
# # => PublicSuffix::Rule::Wildcard
|
101
|
+
#
|
102
|
+
# This method will detect the rule type and create an instance
|
103
|
+
# from the proper rule class.
|
104
|
+
#
|
105
|
+
# == Rule Usage
|
106
|
+
#
|
107
|
+
# A rule describes the composition of a domain name
|
108
|
+
# and explains how to tokenize the domain name
|
109
|
+
# into tld, sld and trd.
|
110
|
+
#
|
111
|
+
# To use a rule, you first need to be sure the domain you want to tokenize
|
112
|
+
# can be handled by the current rule.
|
113
|
+
# You can use the <tt>#match?</tt> method.
|
114
|
+
#
|
115
|
+
# rule = PublicSuffix::Rule.factory("com")
|
116
|
+
#
|
117
|
+
# rule.match?("google.com")
|
118
|
+
# # => true
|
119
|
+
#
|
120
|
+
# rule.match?("google.com")
|
121
|
+
# # => false
|
122
|
+
#
|
123
|
+
# Rule order is significant. A domain can match more than one rule.
|
124
|
+
# See the {Public Suffix Documentation}[http://publicsuffix.org/format/]
|
125
|
+
# to learn more about rule priority.
|
126
|
+
#
|
127
|
+
# When you have the right rule, you can use it to tokenize the domain name.
|
128
|
+
#
|
129
|
+
# rule = PublicSuffix::Rule.factory("com")
|
130
|
+
#
|
131
|
+
# rule.decompose("google.com")
|
132
|
+
# # => ["google", "com"]
|
133
|
+
#
|
134
|
+
# rule.decompose("www.google.com")
|
135
|
+
# # => ["www.google", "com"]
|
136
|
+
#
|
137
|
+
# @abstract
|
138
|
+
#
|
139
|
+
class Base
|
140
|
+
|
141
|
+
attr_reader :name, :value, :type, :labels
|
142
|
+
|
143
|
+
# Initializes a new rule with name and value.
|
144
|
+
# If value is +nil+, name also becomes the value for this rule.
|
145
|
+
#
|
146
|
+
# @param [String] name
|
147
|
+
# The name of the rule
|
148
|
+
# @param [String] value
|
149
|
+
# The value of the rule. If nil, defaults to +name+.
|
150
|
+
#
|
151
|
+
def initialize(name, value = nil)
|
152
|
+
@name = name.to_s
|
153
|
+
@value = value || @name
|
154
|
+
@type = self.class.name.split("::").last.downcase.to_sym
|
155
|
+
@labels = Domain.domain_to_labels(@value)
|
156
|
+
end
|
157
|
+
|
158
|
+
# Checks whether this rule is equal to <tt>other</tt>.
|
159
|
+
#
|
160
|
+
# @param [PublicSuffix::Rule::*] other
|
161
|
+
# The rule to compare.
|
162
|
+
#
|
163
|
+
# @return [Boolean]
|
164
|
+
# Returns true if this rule and other are instances of the same class
|
165
|
+
# and has the same value, false otherwise.
|
166
|
+
def ==(other)
|
167
|
+
return false unless other.is_a?(self.class)
|
168
|
+
self.equal?(other) ||
|
169
|
+
self.name == other.name
|
170
|
+
end
|
171
|
+
alias :eql? :==
|
172
|
+
|
173
|
+
|
174
|
+
# Checks if this rule matches +domain+.
|
175
|
+
#
|
176
|
+
# @param [String, #to_s] domain
|
177
|
+
# The domain name to check.
|
178
|
+
#
|
179
|
+
# @return [Boolean]
|
180
|
+
#
|
181
|
+
# @example
|
182
|
+
# rule = Rule.factory("com")
|
183
|
+
# # #<PublicSuffix::Rule::Normal>
|
184
|
+
# rule.match?("example.com")
|
185
|
+
# # => true
|
186
|
+
# rule.match?("example.net")
|
187
|
+
# # => false
|
188
|
+
#
|
189
|
+
def match?(domain)
|
190
|
+
l1 = labels
|
191
|
+
l2 = Domain.domain_to_labels(domain)
|
192
|
+
odiff(l1, l2).empty?
|
193
|
+
end
|
194
|
+
|
195
|
+
# Checks if this rule allows +domain+.
|
196
|
+
#
|
197
|
+
# @param [String, #to_s] domain
|
198
|
+
# The domain name to check.
|
199
|
+
#
|
200
|
+
# @return [Boolean]
|
201
|
+
#
|
202
|
+
# @example
|
203
|
+
# rule = Rule.factory("*.do")
|
204
|
+
# # => #<PublicSuffix::Rule::Wildcard>
|
205
|
+
# rule.allow?("example.do")
|
206
|
+
# # => false
|
207
|
+
# rule.allow?("www.example.do")
|
208
|
+
# # => true
|
209
|
+
#
|
210
|
+
def allow?(domain)
|
211
|
+
!decompose(domain).last.nil?
|
212
|
+
end
|
213
|
+
|
214
|
+
|
215
|
+
# Gets the length of this rule for comparison.
|
216
|
+
# The length usually matches the number of rule +parts+.
|
217
|
+
#
|
218
|
+
# Subclasses might actually override this method.
|
219
|
+
#
|
220
|
+
# @return [Integer] The number of parts.
|
221
|
+
def length
|
222
|
+
parts.length
|
223
|
+
end
|
224
|
+
|
225
|
+
#
|
226
|
+
# @raise [NotImplementedError]
|
227
|
+
# @abstract
|
228
|
+
def parts
|
229
|
+
raise NotImplementedError
|
230
|
+
end
|
231
|
+
|
232
|
+
#
|
233
|
+
# @param [String, #to_s] domain
|
234
|
+
# The domain name to decompose.
|
235
|
+
#
|
236
|
+
# @return [Array<String, nil>]
|
237
|
+
#
|
238
|
+
# @raise [NotImplementedError]
|
239
|
+
# @abstract
|
240
|
+
def decompose(domain)
|
241
|
+
raise NotImplementedError
|
242
|
+
end
|
243
|
+
|
244
|
+
|
245
|
+
private
|
246
|
+
|
247
|
+
def odiff(one, two)
|
248
|
+
ii = 0
|
249
|
+
while(ii < one.size && one[ii] == two[ii])
|
250
|
+
ii += 1
|
251
|
+
end
|
252
|
+
one[ii..one.length]
|
253
|
+
end
|
254
|
+
|
255
|
+
end
|
256
|
+
|
257
|
+
class Normal < Base
|
258
|
+
|
259
|
+
# Initializes a new rule with +name+.
|
260
|
+
#
|
261
|
+
# @param [String] name
|
262
|
+
# The name of this rule.
|
263
|
+
#
|
264
|
+
def initialize(name)
|
265
|
+
super(name, name)
|
266
|
+
end
|
267
|
+
|
268
|
+
# dot-split rule value and returns all rule parts
|
269
|
+
# in the order they appear in the value.
|
270
|
+
#
|
271
|
+
# @return [Array<String>]
|
272
|
+
def parts
|
273
|
+
@parts ||= @value.split(".")
|
274
|
+
end
|
275
|
+
|
276
|
+
# Decomposes the domain according to rule properties.
|
277
|
+
#
|
278
|
+
# @param [String, #to_s] domain
|
279
|
+
# The domain name to decompose.
|
280
|
+
#
|
281
|
+
# @return [Array<String>]
|
282
|
+
# The array with [trd + sld, tld].
|
283
|
+
#
|
284
|
+
def decompose(domain)
|
285
|
+
domain.to_s.chomp(".") =~ /^(.*)\.(#{parts.join('\.')})$/
|
286
|
+
[$1, $2]
|
287
|
+
end
|
288
|
+
|
289
|
+
end
|
290
|
+
|
291
|
+
class Wildcard < Base
|
292
|
+
|
293
|
+
# Initializes a new rule with +name+.
|
294
|
+
#
|
295
|
+
# @param [String] name
|
296
|
+
# The name of this rule.
|
297
|
+
#
|
298
|
+
def initialize(name)
|
299
|
+
super(name, name.to_s[2..-1])
|
300
|
+
end
|
301
|
+
|
302
|
+
# dot-split rule value and returns all rule parts
|
303
|
+
# in the order they appear in the value.
|
304
|
+
#
|
305
|
+
# @return [Array<String>]
|
306
|
+
def parts
|
307
|
+
@parts ||= @value.split(".")
|
308
|
+
end
|
309
|
+
|
310
|
+
# Overwrites the default implementation to cope with
|
311
|
+
# the +*+ char.
|
312
|
+
#
|
313
|
+
# @return [Integer] The number of parts.
|
314
|
+
def length
|
315
|
+
parts.length + 1 # * counts as 1
|
316
|
+
end
|
317
|
+
|
318
|
+
# Decomposes the domain according to rule properties.
|
319
|
+
#
|
320
|
+
# @param [String, #to_s] domain
|
321
|
+
# The domain name to decompose.
|
322
|
+
#
|
323
|
+
# @return [Array<String>]
|
324
|
+
# The array with [trd + sld, tld].
|
325
|
+
#
|
326
|
+
def decompose(domain)
|
327
|
+
domain.to_s.chomp(".") =~ /^(.*)\.(.*?\.#{parts.join('\.')})$/
|
328
|
+
[$1, $2]
|
329
|
+
end
|
330
|
+
|
331
|
+
end
|
332
|
+
|
333
|
+
class Exception < Base
|
334
|
+
|
335
|
+
# Initializes a new rule with +name+.
|
336
|
+
#
|
337
|
+
# @param [String] name The name of this rule.
|
338
|
+
#
|
339
|
+
def initialize(name)
|
340
|
+
super(name, name.to_s[1..-1])
|
341
|
+
end
|
342
|
+
|
343
|
+
# dot-split rule value and returns all rule parts
|
344
|
+
# in the order they appear in the value.
|
345
|
+
# The leftmost label is not considered a label.
|
346
|
+
#
|
347
|
+
# See http://publicsuffix.org/format/:
|
348
|
+
# If the prevailing rule is a exception rule,
|
349
|
+
# modify it by removing the leftmost label.
|
350
|
+
#
|
351
|
+
# @return [Array<String>]
|
352
|
+
def parts
|
353
|
+
@parts ||= @value.split(".")[1..-1]
|
354
|
+
end
|
355
|
+
|
356
|
+
# Decomposes the domain according to rule properties.
|
357
|
+
#
|
358
|
+
# @param [String, #to_s] domain
|
359
|
+
# The domain name to decompose.
|
360
|
+
#
|
361
|
+
# @return [Array<String>]
|
362
|
+
# The array with [trd + sld, tld].
|
363
|
+
#
|
364
|
+
def decompose(domain)
|
365
|
+
domain.to_s.chomp(".") =~ /^(.*)\.(#{parts.join('\.')})$/
|
366
|
+
[$1, $2]
|
367
|
+
end
|
368
|
+
|
369
|
+
end
|
370
|
+
|
371
|
+
end
|
372
|
+
|
373
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#--
|
2
|
+
# Public Suffix
|
3
|
+
#
|
4
|
+
# Domain name parser based on the Public Suffix List.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
|
7
|
+
#++
|
8
|
+
|
9
|
+
|
10
|
+
warn("The PublicSuffix::RuleList object has been deprecated and will be removed in PublicSuffix 1.1. Please use PublicSuffix::List instead.")
|
11
|
+
|
12
|
+
module PublicSuffix
|
13
|
+
RuleList = List
|
14
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#--
|
2
|
+
# Public Suffix
|
3
|
+
#
|
4
|
+
# Domain name parser based on the Public Suffix List.
|
5
|
+
#
|
6
|
+
# Copyright (c) 2009-2011 Simone Carletti <weppos@weppos.net>
|
7
|
+
#++
|
8
|
+
|
9
|
+
|
10
|
+
module PublicSuffix
|
11
|
+
|
12
|
+
module Version
|
13
|
+
MAJOR = 1
|
14
|
+
MINOR = 0
|
15
|
+
PATCH = 0
|
16
|
+
BUILD = "rc1"
|
17
|
+
|
18
|
+
STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join(".")
|
19
|
+
end
|
20
|
+
|
21
|
+
VERSION = Version::STRING
|
22
|
+
|
23
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = "public_suffix"
|
5
|
+
s.version = "1.0.0.rc1"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new("> 1.3.1") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Simone Carletti"]
|
9
|
+
s.date = "2011-12-23"
|
10
|
+
s.description = "PublicSuffix can parse and decompose a domain name into top level domain, domain and subdomains."
|
11
|
+
s.email = "weppos@weppos.net"
|
12
|
+
s.files = [".gemtest", ".gitignore", ".travis.yml", ".yardopts", "CHANGELOG.md", "Gemfile", "Gemfile.lock", "LICENSE", "README.md", "Rakefile", "lib/public_suffix.rb", "lib/public_suffix/definitions.txt", "lib/public_suffix/domain.rb", "lib/public_suffix/errors.rb", "lib/public_suffix/list.rb", "lib/public_suffix/rule.rb", "lib/public_suffix/rule_list.rb", "lib/public_suffix/version.rb", "public_suffix.gemspec", "test/acceptance_test.rb", "test/test_helper.rb", "test/unit/domain_test.rb", "test/unit/errors_test.rb", "test/unit/list_test.rb", "test/unit/public_suffix_test.rb", "test/unit/rule_test.rb"]
|
13
|
+
s.homepage = "http://www.simonecarletti.com/code/public_suffix_service"
|
14
|
+
s.require_paths = ["lib"]
|
15
|
+
s.required_ruby_version = Gem::Requirement.new(">= 1.8.7")
|
16
|
+
s.rubygems_version = "1.8.11"
|
17
|
+
s.summary = "Domain name parser based in the Public Suffix List."
|
18
|
+
s.test_files = ["test/acceptance_test.rb", "test/test_helper.rb", "test/unit/domain_test.rb", "test/unit/errors_test.rb", "test/unit/list_test.rb", "test/unit/public_suffix_test.rb", "test/unit/rule_test.rb"]
|
19
|
+
|
20
|
+
if s.respond_to? :specification_version then
|
21
|
+
s.specification_version = 3
|
22
|
+
|
23
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
24
|
+
s.add_development_dependency(%q<rake>, [">= 0"])
|
25
|
+
s.add_development_dependency(%q<mocha>, [">= 0"])
|
26
|
+
s.add_development_dependency(%q<yard>, [">= 0"])
|
27
|
+
else
|
28
|
+
s.add_dependency(%q<rake>, [">= 0"])
|
29
|
+
s.add_dependency(%q<mocha>, [">= 0"])
|
30
|
+
s.add_dependency(%q<yard>, [">= 0"])
|
31
|
+
end
|
32
|
+
else
|
33
|
+
s.add_dependency(%q<rake>, [">= 0"])
|
34
|
+
s.add_dependency(%q<mocha>, [">= 0"])
|
35
|
+
s.add_dependency(%q<yard>, [">= 0"])
|
36
|
+
end
|
37
|
+
end
|