public_suffix_service 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +38 -0
- data/LICENSE.rdoc +25 -0
- data/README.rdoc +99 -0
- data/Rakefile +132 -0
- data/lib/public_suffix_service.rb +90 -0
- data/lib/public_suffix_service/definitions.dat +4449 -0
- data/lib/public_suffix_service/domain.rb +230 -0
- data/lib/public_suffix_service/errors.rb +25 -0
- data/lib/public_suffix_service/rule.rb +294 -0
- data/lib/public_suffix_service/rule_list.rb +246 -0
- data/lib/public_suffix_service/version.rb +30 -0
- data/public_suffix_service.gemspec +33 -0
- data/test/acceptance_test.rb +26 -0
- data/test/public_suffix_service/domain_test.rb +141 -0
- data/test/public_suffix_service/rule_list_test.rb +182 -0
- data/test/public_suffix_service/rule_test.rb +215 -0
- data/test/public_suffix_service_test.rb +62 -0
- data/test/test_helper.rb +9 -0
- metadata +100 -0
@@ -0,0 +1,246 @@
|
|
1
|
+
#
|
2
|
+
# = Public Suffix Service
|
3
|
+
#
|
4
|
+
# Domain Name parser based on the Public Suffix List
|
5
|
+
#
|
6
|
+
#
|
7
|
+
# Category:: Net
|
8
|
+
# Package:: PublicSuffixService
|
9
|
+
# Author:: Simone Carletti <weppos@weppos.net>
|
10
|
+
# License:: MIT License
|
11
|
+
#
|
12
|
+
#--
|
13
|
+
#
|
14
|
+
#++
|
15
|
+
|
16
|
+
|
17
|
+
module PublicSuffixService
|
18
|
+
|
19
|
+
# = Rule List
|
20
|
+
#
|
21
|
+
# A PublicSuffixService::RuleList is a collection of one or more PublicSuffixService::Rule.
|
22
|
+
#
|
23
|
+
# Given a RuleList, you can add or remove PublicSuffixService::Rule,
|
24
|
+
# iterate all items in the list or search for the first rule
|
25
|
+
# which matches a specific domain name.
|
26
|
+
#
|
27
|
+
# # Create a new list
|
28
|
+
# list = PublicSuffixService::RuleList.new
|
29
|
+
#
|
30
|
+
# # Push two rules to the list
|
31
|
+
# list << PublicSuffixService::Rule.factory("it")
|
32
|
+
# list << PublicSuffixService::Rule.factory("com")
|
33
|
+
#
|
34
|
+
# # Get the size of the list
|
35
|
+
# list.size
|
36
|
+
# # => 2
|
37
|
+
#
|
38
|
+
# # Search for the rule matching given domain
|
39
|
+
# list.find("example.com")
|
40
|
+
# # => #<PublicSuffixService::Rule::Normal>
|
41
|
+
# list.find("example.org")
|
42
|
+
# # => nil
|
43
|
+
#
|
44
|
+
# You can create as many PublicSuffixService::RuleList you want.
|
45
|
+
# The PublicSuffixService::RuleList.default rule list is used by DomainName
|
46
|
+
# to tokenize and validate a domain.
|
47
|
+
#
|
48
|
+
# PublicSuffixService::RuleList implements Enumerable module.
|
49
|
+
#
|
50
|
+
class RuleList
|
51
|
+
include Enumerable
|
52
|
+
|
53
|
+
# Gets the list of rules.
|
54
|
+
# Each rule is expected to be a subclass of PublicSuffixService::Rule::Base.
|
55
|
+
#
|
56
|
+
# Returns an Array of rules.
|
57
|
+
attr_reader :list
|
58
|
+
|
59
|
+
|
60
|
+
# Initializes an empty PublicSuffixService::RuleList.
|
61
|
+
# If block is given, yields on self.
|
62
|
+
def initialize(&block) # :yields: self
|
63
|
+
@list = []
|
64
|
+
yield(self) if block_given?
|
65
|
+
end
|
66
|
+
|
67
|
+
# Checks whether two lists are equal.
|
68
|
+
# RuleList <tt>one</tt> is equal to <tt>two</tt>, if <tt>two</tt> is an instance of
|
69
|
+
# <tt>PublicSuffixService::RuleList</tt> and each <tt>PublicSuffixService::Rule::Base</tt>
|
70
|
+
# in list <tt>one</tt> is available in list <tt>two</tt>,
|
71
|
+
# in the same order.
|
72
|
+
#
|
73
|
+
# other - The PublicSuffixService::RuleList to compare.
|
74
|
+
#
|
75
|
+
# Returns true if self is equal to other.
|
76
|
+
def ==(other)
|
77
|
+
return false unless other.is_a?(RuleList)
|
78
|
+
self.equal?(other) ||
|
79
|
+
self.list == other.list
|
80
|
+
end
|
81
|
+
alias :eql? :==
|
82
|
+
|
83
|
+
# Iterates each rule in the list.
|
84
|
+
#
|
85
|
+
# Returns nothing.
|
86
|
+
def each(*args, &block)
|
87
|
+
@list.each(*args, &block)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Gets the list as Array.
|
91
|
+
#
|
92
|
+
# Return an Array.
|
93
|
+
def to_a
|
94
|
+
@list
|
95
|
+
end
|
96
|
+
|
97
|
+
# Adds the given object to the list.
|
98
|
+
#
|
99
|
+
# rule - The rule to add to the list.
|
100
|
+
# Expected to be a subclass of PublicSuffixService::Rule::Base.
|
101
|
+
#
|
102
|
+
# Returns self.
|
103
|
+
def add(rule)
|
104
|
+
@list << rule
|
105
|
+
self
|
106
|
+
end
|
107
|
+
alias << add
|
108
|
+
|
109
|
+
# Gets the number of elements in the list.
|
110
|
+
#
|
111
|
+
# Returns an Integer.
|
112
|
+
def size
|
113
|
+
@list.size
|
114
|
+
end
|
115
|
+
alias length size
|
116
|
+
|
117
|
+
# Checks whether the list is empty.
|
118
|
+
#
|
119
|
+
# Returns true if the list contains no elements.
|
120
|
+
def empty?
|
121
|
+
@list.empty?
|
122
|
+
end
|
123
|
+
|
124
|
+
# Removes all elements.
|
125
|
+
#
|
126
|
+
# Returns self.
|
127
|
+
def clear
|
128
|
+
@list.clear
|
129
|
+
self
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
# Returns the most appropriate rule for domain.
|
134
|
+
#
|
135
|
+
# From the Public Suffix List documentation:
|
136
|
+
#
|
137
|
+
# * If a hostname matches more than one rule in the file,
|
138
|
+
# the longest matching rule (the one with the most levels) will be used.
|
139
|
+
# * An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule.
|
140
|
+
# An exception rule takes priority over any other matching rule.
|
141
|
+
#
|
142
|
+
# == Algorithm description
|
143
|
+
#
|
144
|
+
# * Match domain against all rules and take note of the matching ones.
|
145
|
+
# * If no rules match, the prevailing rule is "*".
|
146
|
+
# * If more than one rule matches, the prevailing rule is the one which is an exception rule.
|
147
|
+
# * If there is no matching exception rule, the prevailing rule is the one with the most labels.
|
148
|
+
# * If the prevailing rule is a exception rule, modify it by removing the leftmost label.
|
149
|
+
# * The public suffix is the set of labels from the domain
|
150
|
+
# which directly match the labels of the prevailing rule (joined by dots).
|
151
|
+
# * The registered domain is the public suffix plus one additional label.
|
152
|
+
#
|
153
|
+
# Note: This might not be the most efficient algorithm.
|
154
|
+
#
|
155
|
+
# Returns a PublicSuffixService::Rule::Base instance or nil.
|
156
|
+
def find(domain)
|
157
|
+
rules = select(domain)
|
158
|
+
rules.select { |r| r.type == :exception }.first ||
|
159
|
+
rules.inject { |t,r| t.length > r.length ? t : r }
|
160
|
+
end
|
161
|
+
|
162
|
+
# Selects all the rules matching given domain.
|
163
|
+
#
|
164
|
+
# Returns an Array of rules.
|
165
|
+
# Each rule is expected to be a subclass of PublicSuffixService::Rule::Base.
|
166
|
+
def select(domain)
|
167
|
+
@list.select { |rule| rule.match?(domain) }
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
@@default = nil
|
172
|
+
|
173
|
+
class << self
|
174
|
+
|
175
|
+
# Gets the default <tt>PublicSuffixService::RuleList</tt>.
|
176
|
+
# Initializes a new <tt>PublicSuffixService::RuleList</tt>
|
177
|
+
# parsing the content of <tt>PublicSuffixService::RuleList.default_definition</tt> if necessary.
|
178
|
+
#
|
179
|
+
# Returns an instance of PublicSuffixService::RuleList.
|
180
|
+
def default
|
181
|
+
@@default ||= parse(default_definition)
|
182
|
+
end
|
183
|
+
|
184
|
+
# Sets the default <tt>PublicSuffixService::RuleList</tt> to <tt>value</tt>.
|
185
|
+
#
|
186
|
+
# value - The new PublicSuffixService::RuleList.
|
187
|
+
#
|
188
|
+
# Returns the new PublicSuffixService::RuleList.
|
189
|
+
def default=(value)
|
190
|
+
@@default = value
|
191
|
+
end
|
192
|
+
|
193
|
+
# Sets the default <tt>PublicSuffixService::RuleList</tt> to <tt>nil</tt>.
|
194
|
+
#
|
195
|
+
# Returns self.
|
196
|
+
def clear
|
197
|
+
self.default = nil
|
198
|
+
self
|
199
|
+
end
|
200
|
+
|
201
|
+
# Resets the default <tt>PublicSuffixService::RuleList</tt> and reinitialize it
|
202
|
+
# parsing the content of <tt>PublicSuffixService::RuleList.default_definition</tt>.
|
203
|
+
#
|
204
|
+
# Returns an instance of PublicSuffixService::RuleList.
|
205
|
+
def reload
|
206
|
+
self.clear.default
|
207
|
+
end
|
208
|
+
|
209
|
+
# Gets the default definition list.
|
210
|
+
# Can be any <tt>IOStream</tt> including a <tt>File</tt> or a simple <tt>String</tt>.
|
211
|
+
# The object must respond to <tt>#each_line</tt>.
|
212
|
+
#
|
213
|
+
# Returns an object which responds to <tt>#each_line</tt>.
|
214
|
+
def default_definition
|
215
|
+
File.new(File.join(File.dirname(__FILE__), "definitions.dat"))
|
216
|
+
end
|
217
|
+
|
218
|
+
|
219
|
+
# Parse given <tt>input</tt> treating the content as Public Suffic List.
|
220
|
+
# See http://publicsuffix.org/format/ for more details about input format.
|
221
|
+
#
|
222
|
+
# Returns an Array of PublicSuffixService::Rule::Base.
|
223
|
+
def parse(input)
|
224
|
+
new do |list|
|
225
|
+
input.each_line do |line|
|
226
|
+
line.strip!
|
227
|
+
|
228
|
+
# strip blank lines
|
229
|
+
if line.empty?
|
230
|
+
next
|
231
|
+
# strip comments
|
232
|
+
elsif line =~ %r{^//}
|
233
|
+
next
|
234
|
+
# append rule
|
235
|
+
else
|
236
|
+
list << Rule.factory(line)
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
end
|
243
|
+
|
244
|
+
end
|
245
|
+
|
246
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#
|
2
|
+
# = Public Suffix Service
|
3
|
+
#
|
4
|
+
# Domain Name parser based on the Public Suffix List
|
5
|
+
#
|
6
|
+
#
|
7
|
+
# Category:: Net
|
8
|
+
# Package:: PublicSuffixService
|
9
|
+
# Author:: Simone Carletti <weppos@weppos.net>
|
10
|
+
# License:: MIT License
|
11
|
+
#
|
12
|
+
#--
|
13
|
+
#
|
14
|
+
#++
|
15
|
+
|
16
|
+
|
17
|
+
module PublicSuffixService
|
18
|
+
|
19
|
+
module Version
|
20
|
+
MAJOR = 0
|
21
|
+
MINOR = 4
|
22
|
+
PATCH = 0
|
23
|
+
BUILD = nil
|
24
|
+
|
25
|
+
STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join(".")
|
26
|
+
end
|
27
|
+
|
28
|
+
VERSION = Version::STRING
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{public_suffix_service}
|
5
|
+
s.version = "0.3.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Simone Carletti"]
|
9
|
+
s.date = %q{2010-05-31}
|
10
|
+
s.description = %q{ Intelligent Domain Name parser based in the Public Suffic List. Domain Name can parse and decompose a domain name into top level domain, domain and subdomains.
|
11
|
+
}
|
12
|
+
s.email = %q{weppos@weppos.net}
|
13
|
+
s.extra_rdoc_files = ["CHANGELOG.rdoc", "LICENSE.rdoc", "README.rdoc"]
|
14
|
+
s.files = ["Rakefile", "CHANGELOG.rdoc", "LICENSE.rdoc", "README.rdoc", "public_suffix_service.gemspec", "test/acceptance_test.rb", "test/public_suffix_service/domain_test.rb", "test/public_suffix_service/rule_list_test.rb", "test/public_suffix_service/rule_test.rb", "test/public_suffix_service_test.rb", "test/test_helper.rb", "lib/public_suffix_service/definitions.dat", "lib/public_suffix_service/domain.rb", "lib/public_suffix_service/errors.rb", "lib/public_suffix_service/rule.rb", "lib/public_suffix_service/rule_list.rb", "lib/public_suffix_service/version.rb", "lib/public_suffix_service.rb"]
|
15
|
+
s.homepage = %q{http://www.simonecarletti.com/code/public-suffix-service}
|
16
|
+
s.rdoc_options = ["--main", "README.rdoc"]
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
s.rubygems_version = %q{1.3.7}
|
19
|
+
s.summary = %q{Domain Name parser based on the Public Suffix List}
|
20
|
+
|
21
|
+
if s.respond_to? :specification_version then
|
22
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
23
|
+
s.specification_version = 3
|
24
|
+
|
25
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
26
|
+
s.add_development_dependency(%q<mocha>, [">= 0"])
|
27
|
+
else
|
28
|
+
s.add_dependency(%q<mocha>, [">= 0"])
|
29
|
+
end
|
30
|
+
else
|
31
|
+
s.add_dependency(%q<mocha>, [">= 0"])
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class AcceptanceTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
CASES = {
|
6
|
+
"google.com" => [nil, "google", "com"],
|
7
|
+
"foo.google.com" => ["foo", "google", "com"],
|
8
|
+
|
9
|
+
"verybritish.co.uk" => [nil, "verybritish", "co.uk"],
|
10
|
+
"foo.verybritish.co.uk" => ["foo", "verybritish", "co.uk"],
|
11
|
+
|
12
|
+
"parliament.uk" => [nil, "parliament", "uk"],
|
13
|
+
"foo.parliament.uk" => ["foo", "parliament", "uk"],
|
14
|
+
}
|
15
|
+
|
16
|
+
def test_all
|
17
|
+
CASES.each do |name, results|
|
18
|
+
domain = PublicSuffixService.parse(name)
|
19
|
+
trd, sld, tld = results
|
20
|
+
assert_equal tld, domain.tld, "Invalid tld for '#{name}'"
|
21
|
+
assert_equal sld, domain.sld, "Invalid sld for '#{name}'"
|
22
|
+
assert_equal trd, domain.trd, "Invalid trd for '#{name}'"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,141 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class PublicSuffixService::DomainTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
@klass = PublicSuffixService::Domain
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
def test_initialize_with_tld
|
11
|
+
domain = @klass.new("com")
|
12
|
+
assert_equal "com", domain.tld
|
13
|
+
assert_equal nil, domain.sld
|
14
|
+
assert_equal nil, domain.trd
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_initialize_with_tld_and_sld
|
18
|
+
domain = @klass.new("com", "google")
|
19
|
+
assert_equal "com", domain.tld
|
20
|
+
assert_equal "google", domain.sld
|
21
|
+
assert_equal nil, domain.trd
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_initialize_with_tld_and_sld_and_trd
|
25
|
+
domain = @klass.new("com", "google", "www")
|
26
|
+
assert_equal "com", domain.tld
|
27
|
+
assert_equal "google", domain.sld
|
28
|
+
assert_equal "www", domain.trd
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
def test_to_s
|
33
|
+
assert_equal "com", @klass.new("com").to_s
|
34
|
+
assert_equal "google.com", @klass.new("com", "google").to_s
|
35
|
+
assert_equal "www.google.com", @klass.new("com", "google", "www").to_s
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_to_a
|
39
|
+
assert_equal [nil, nil, "com"], @klass.new("com").to_a
|
40
|
+
assert_equal [nil, "google", "com"], @klass.new("com", "google").to_a
|
41
|
+
assert_equal ["www", "google", "com"], @klass.new("com", "google", "www").to_a
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
def test_tld
|
46
|
+
assert_equal "com", @klass.new("com", "google", "www").tld
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_sld
|
50
|
+
assert_equal "google", @klass.new("com", "google", "www").sld
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_tld
|
54
|
+
assert_equal "www", @klass.new("com", "google", "www").trd
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
def test_name
|
59
|
+
assert_equal "com", @klass.new("com").name
|
60
|
+
assert_equal "google.com", @klass.new("com", "google").name
|
61
|
+
assert_equal "www.google.com", @klass.new("com", "google", "www").name
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_domain
|
65
|
+
assert_equal nil, @klass.new("com").domain
|
66
|
+
assert_equal nil, @klass.new("zip").domain
|
67
|
+
assert_equal "google.com", @klass.new("com", "google").domain
|
68
|
+
assert_equal "google.zip", @klass.new("zip", "google").domain
|
69
|
+
assert_equal "google.com", @klass.new("com", "google", "www").domain
|
70
|
+
assert_equal "google.zip", @klass.new("zip", "google", "www").domain
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_subdomain
|
74
|
+
assert_equal nil, @klass.new("com").subdomain
|
75
|
+
assert_equal nil, @klass.new("zip").subdomain
|
76
|
+
assert_equal nil, @klass.new("com", "google").subdomain
|
77
|
+
assert_equal nil, @klass.new("zip", "google").subdomain
|
78
|
+
assert_equal "www.google.com", @klass.new("com", "google", "www").subdomain
|
79
|
+
assert_equal "www.google.zip", @klass.new("zip", "google", "www").subdomain
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_rule
|
83
|
+
assert_equal nil, @klass.new("zip").rule
|
84
|
+
assert_equal PublicSuffixService::Rule.factory("com"), @klass.new("com").rule
|
85
|
+
assert_equal PublicSuffixService::Rule.factory("com"), @klass.new("com", "google").rule
|
86
|
+
assert_equal PublicSuffixService::Rule.factory("com"), @klass.new("com", "google", "www").rule
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
def test_domain_question
|
91
|
+
assert @klass.new("com", "google").domain?
|
92
|
+
assert @klass.new("zip", "google").domain?
|
93
|
+
assert @klass.new("com", "google", "www").domain?
|
94
|
+
assert !@klass.new("com").domain?
|
95
|
+
end
|
96
|
+
|
97
|
+
def test_subdomain_question
|
98
|
+
assert @klass.new("com", "google", "www").subdomain?
|
99
|
+
assert @klass.new("zip", "google", "www").subdomain?
|
100
|
+
assert !@klass.new("com").subdomain?
|
101
|
+
assert !@klass.new("com", "google").subdomain?
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_is_a_domain_question
|
105
|
+
assert @klass.new("com", "google").is_a_domain?
|
106
|
+
assert @klass.new("zip", "google").is_a_domain?
|
107
|
+
assert !@klass.new("com", "google", "www").is_a_domain?
|
108
|
+
assert !@klass.new("com").is_a_domain?
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_is_a_subdomain_question
|
112
|
+
assert @klass.new("com", "google", "www").is_a_subdomain?
|
113
|
+
assert @klass.new("zip", "google", "www").is_a_subdomain?
|
114
|
+
assert !@klass.new("com").is_a_subdomain?
|
115
|
+
assert !@klass.new("com", "google").is_a_subdomain?
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_valid_question
|
119
|
+
assert @klass.new("com").valid?
|
120
|
+
assert @klass.new("com", "google").valid?
|
121
|
+
assert @klass.new("com", "google", "www").valid?
|
122
|
+
assert !@klass.new("zip").valid?
|
123
|
+
assert !@klass.new("zip", "google").valid?
|
124
|
+
assert !@klass.new("zip", "google", "www").valid?
|
125
|
+
end
|
126
|
+
|
127
|
+
def test_valid_domain_question
|
128
|
+
assert @klass.new("com", "google").valid_domain?
|
129
|
+
assert !@klass.new("zip", "google").valid_domain?
|
130
|
+
assert @klass.new("com", "google", "www").valid_domain?
|
131
|
+
assert !@klass.new("com").valid_domain?
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_valid_subdomain_question
|
135
|
+
assert @klass.new("com", "google", "www").valid_subdomain?
|
136
|
+
assert !@klass.new("zip", "google", "www").valid_subdomain?
|
137
|
+
assert !@klass.new("com").valid_subdomain?
|
138
|
+
assert !@klass.new("com", "google").valid_subdomain?
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|