public_suffix_service 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +38 -0
- data/LICENSE.rdoc +25 -0
- data/README.rdoc +99 -0
- data/Rakefile +132 -0
- data/lib/public_suffix_service.rb +90 -0
- data/lib/public_suffix_service/definitions.dat +4449 -0
- data/lib/public_suffix_service/domain.rb +230 -0
- data/lib/public_suffix_service/errors.rb +25 -0
- data/lib/public_suffix_service/rule.rb +294 -0
- data/lib/public_suffix_service/rule_list.rb +246 -0
- data/lib/public_suffix_service/version.rb +30 -0
- data/public_suffix_service.gemspec +33 -0
- data/test/acceptance_test.rb +26 -0
- data/test/public_suffix_service/domain_test.rb +141 -0
- data/test/public_suffix_service/rule_list_test.rb +182 -0
- data/test/public_suffix_service/rule_test.rb +215 -0
- data/test/public_suffix_service_test.rb +62 -0
- data/test/test_helper.rb +9 -0
- metadata +100 -0
@@ -0,0 +1,246 @@
|
|
1
|
+
#
|
2
|
+
# = Public Suffix Service
|
3
|
+
#
|
4
|
+
# Domain Name parser based on the Public Suffix List
|
5
|
+
#
|
6
|
+
#
|
7
|
+
# Category:: Net
|
8
|
+
# Package:: PublicSuffixService
|
9
|
+
# Author:: Simone Carletti <weppos@weppos.net>
|
10
|
+
# License:: MIT License
|
11
|
+
#
|
12
|
+
#--
|
13
|
+
#
|
14
|
+
#++
|
15
|
+
|
16
|
+
|
17
|
+
module PublicSuffixService
|
18
|
+
|
19
|
+
# = Rule List
|
20
|
+
#
|
21
|
+
# A PublicSuffixService::RuleList is a collection of one or more PublicSuffixService::Rule.
|
22
|
+
#
|
23
|
+
# Given a RuleList, you can add or remove PublicSuffixService::Rule,
|
24
|
+
# iterate all items in the list or search for the first rule
|
25
|
+
# which matches a specific domain name.
|
26
|
+
#
|
27
|
+
# # Create a new list
|
28
|
+
# list = PublicSuffixService::RuleList.new
|
29
|
+
#
|
30
|
+
# # Push two rules to the list
|
31
|
+
# list << PublicSuffixService::Rule.factory("it")
|
32
|
+
# list << PublicSuffixService::Rule.factory("com")
|
33
|
+
#
|
34
|
+
# # Get the size of the list
|
35
|
+
# list.size
|
36
|
+
# # => 2
|
37
|
+
#
|
38
|
+
# # Search for the rule matching given domain
|
39
|
+
# list.find("example.com")
|
40
|
+
# # => #<PublicSuffixService::Rule::Normal>
|
41
|
+
# list.find("example.org")
|
42
|
+
# # => nil
|
43
|
+
#
|
44
|
+
# You can create as many PublicSuffixService::RuleList you want.
|
45
|
+
# The PublicSuffixService::RuleList.default rule list is used by DomainName
|
46
|
+
# to tokenize and validate a domain.
|
47
|
+
#
|
48
|
+
# PublicSuffixService::RuleList implements Enumerable module.
|
49
|
+
#
|
50
|
+
class RuleList
|
51
|
+
include Enumerable
|
52
|
+
|
53
|
+
# Gets the list of rules.
|
54
|
+
# Each rule is expected to be a subclass of PublicSuffixService::Rule::Base.
|
55
|
+
#
|
56
|
+
# Returns an Array of rules.
|
57
|
+
attr_reader :list
|
58
|
+
|
59
|
+
|
60
|
+
# Initializes an empty PublicSuffixService::RuleList.
|
61
|
+
# If block is given, yields on self.
|
62
|
+
def initialize(&block) # :yields: self
|
63
|
+
@list = []
|
64
|
+
yield(self) if block_given?
|
65
|
+
end
|
66
|
+
|
67
|
+
# Checks whether two lists are equal.
|
68
|
+
# RuleList <tt>one</tt> is equal to <tt>two</tt>, if <tt>two</tt> is an instance of
|
69
|
+
# <tt>PublicSuffixService::RuleList</tt> and each <tt>PublicSuffixService::Rule::Base</tt>
|
70
|
+
# in list <tt>one</tt> is available in list <tt>two</tt>,
|
71
|
+
# in the same order.
|
72
|
+
#
|
73
|
+
# other - The PublicSuffixService::RuleList to compare.
|
74
|
+
#
|
75
|
+
# Returns true if self is equal to other.
|
76
|
+
def ==(other)
|
77
|
+
return false unless other.is_a?(RuleList)
|
78
|
+
self.equal?(other) ||
|
79
|
+
self.list == other.list
|
80
|
+
end
|
81
|
+
alias :eql? :==
|
82
|
+
|
83
|
+
# Iterates each rule in the list.
|
84
|
+
#
|
85
|
+
# Returns nothing.
|
86
|
+
def each(*args, &block)
|
87
|
+
@list.each(*args, &block)
|
88
|
+
end
|
89
|
+
|
90
|
+
# Gets the list as Array.
|
91
|
+
#
|
92
|
+
# Return an Array.
|
93
|
+
def to_a
|
94
|
+
@list
|
95
|
+
end
|
96
|
+
|
97
|
+
# Adds the given object to the list.
|
98
|
+
#
|
99
|
+
# rule - The rule to add to the list.
|
100
|
+
# Expected to be a subclass of PublicSuffixService::Rule::Base.
|
101
|
+
#
|
102
|
+
# Returns self.
|
103
|
+
def add(rule)
|
104
|
+
@list << rule
|
105
|
+
self
|
106
|
+
end
|
107
|
+
alias << add
|
108
|
+
|
109
|
+
# Gets the number of elements in the list.
|
110
|
+
#
|
111
|
+
# Returns an Integer.
|
112
|
+
def size
|
113
|
+
@list.size
|
114
|
+
end
|
115
|
+
alias length size
|
116
|
+
|
117
|
+
# Checks whether the list is empty.
|
118
|
+
#
|
119
|
+
# Returns true if the list contains no elements.
|
120
|
+
def empty?
|
121
|
+
@list.empty?
|
122
|
+
end
|
123
|
+
|
124
|
+
# Removes all elements.
|
125
|
+
#
|
126
|
+
# Returns self.
|
127
|
+
def clear
|
128
|
+
@list.clear
|
129
|
+
self
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
# Returns the most appropriate rule for domain.
|
134
|
+
#
|
135
|
+
# From the Public Suffix List documentation:
|
136
|
+
#
|
137
|
+
# * If a hostname matches more than one rule in the file,
|
138
|
+
# the longest matching rule (the one with the most levels) will be used.
|
139
|
+
# * An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule.
|
140
|
+
# An exception rule takes priority over any other matching rule.
|
141
|
+
#
|
142
|
+
# == Algorithm description
|
143
|
+
#
|
144
|
+
# * Match domain against all rules and take note of the matching ones.
|
145
|
+
# * If no rules match, the prevailing rule is "*".
|
146
|
+
# * If more than one rule matches, the prevailing rule is the one which is an exception rule.
|
147
|
+
# * If there is no matching exception rule, the prevailing rule is the one with the most labels.
|
148
|
+
# * If the prevailing rule is a exception rule, modify it by removing the leftmost label.
|
149
|
+
# * The public suffix is the set of labels from the domain
|
150
|
+
# which directly match the labels of the prevailing rule (joined by dots).
|
151
|
+
# * The registered domain is the public suffix plus one additional label.
|
152
|
+
#
|
153
|
+
# Note: This might not be the most efficient algorithm.
|
154
|
+
#
|
155
|
+
# Returns a PublicSuffixService::Rule::Base instance or nil.
|
156
|
+
def find(domain)
|
157
|
+
rules = select(domain)
|
158
|
+
rules.select { |r| r.type == :exception }.first ||
|
159
|
+
rules.inject { |t,r| t.length > r.length ? t : r }
|
160
|
+
end
|
161
|
+
|
162
|
+
# Selects all the rules matching given domain.
|
163
|
+
#
|
164
|
+
# Returns an Array of rules.
|
165
|
+
# Each rule is expected to be a subclass of PublicSuffixService::Rule::Base.
|
166
|
+
def select(domain)
|
167
|
+
@list.select { |rule| rule.match?(domain) }
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
@@default = nil
|
172
|
+
|
173
|
+
class << self
|
174
|
+
|
175
|
+
# Gets the default <tt>PublicSuffixService::RuleList</tt>.
|
176
|
+
# Initializes a new <tt>PublicSuffixService::RuleList</tt>
|
177
|
+
# parsing the content of <tt>PublicSuffixService::RuleList.default_definition</tt> if necessary.
|
178
|
+
#
|
179
|
+
# Returns an instance of PublicSuffixService::RuleList.
|
180
|
+
def default
|
181
|
+
@@default ||= parse(default_definition)
|
182
|
+
end
|
183
|
+
|
184
|
+
# Sets the default <tt>PublicSuffixService::RuleList</tt> to <tt>value</tt>.
|
185
|
+
#
|
186
|
+
# value - The new PublicSuffixService::RuleList.
|
187
|
+
#
|
188
|
+
# Returns the new PublicSuffixService::RuleList.
|
189
|
+
def default=(value)
|
190
|
+
@@default = value
|
191
|
+
end
|
192
|
+
|
193
|
+
# Sets the default <tt>PublicSuffixService::RuleList</tt> to <tt>nil</tt>.
|
194
|
+
#
|
195
|
+
# Returns self.
|
196
|
+
def clear
|
197
|
+
self.default = nil
|
198
|
+
self
|
199
|
+
end
|
200
|
+
|
201
|
+
# Resets the default <tt>PublicSuffixService::RuleList</tt> and reinitialize it
|
202
|
+
# parsing the content of <tt>PublicSuffixService::RuleList.default_definition</tt>.
|
203
|
+
#
|
204
|
+
# Returns an instance of PublicSuffixService::RuleList.
|
205
|
+
def reload
|
206
|
+
self.clear.default
|
207
|
+
end
|
208
|
+
|
209
|
+
# Gets the default definition list.
|
210
|
+
# Can be any <tt>IOStream</tt> including a <tt>File</tt> or a simple <tt>String</tt>.
|
211
|
+
# The object must respond to <tt>#each_line</tt>.
|
212
|
+
#
|
213
|
+
# Returns an object which responds to <tt>#each_line</tt>.
|
214
|
+
def default_definition
|
215
|
+
File.new(File.join(File.dirname(__FILE__), "definitions.dat"))
|
216
|
+
end
|
217
|
+
|
218
|
+
|
219
|
+
# Parse given <tt>input</tt> treating the content as Public Suffic List.
|
220
|
+
# See http://publicsuffix.org/format/ for more details about input format.
|
221
|
+
#
|
222
|
+
# Returns an Array of PublicSuffixService::Rule::Base.
|
223
|
+
def parse(input)
|
224
|
+
new do |list|
|
225
|
+
input.each_line do |line|
|
226
|
+
line.strip!
|
227
|
+
|
228
|
+
# strip blank lines
|
229
|
+
if line.empty?
|
230
|
+
next
|
231
|
+
# strip comments
|
232
|
+
elsif line =~ %r{^//}
|
233
|
+
next
|
234
|
+
# append rule
|
235
|
+
else
|
236
|
+
list << Rule.factory(line)
|
237
|
+
end
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
end
|
243
|
+
|
244
|
+
end
|
245
|
+
|
246
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#
|
2
|
+
# = Public Suffix Service
|
3
|
+
#
|
4
|
+
# Domain Name parser based on the Public Suffix List
|
5
|
+
#
|
6
|
+
#
|
7
|
+
# Category:: Net
|
8
|
+
# Package:: PublicSuffixService
|
9
|
+
# Author:: Simone Carletti <weppos@weppos.net>
|
10
|
+
# License:: MIT License
|
11
|
+
#
|
12
|
+
#--
|
13
|
+
#
|
14
|
+
#++
|
15
|
+
|
16
|
+
|
17
|
+
module PublicSuffixService
|
18
|
+
|
19
|
+
module Version
|
20
|
+
MAJOR = 0
|
21
|
+
MINOR = 4
|
22
|
+
PATCH = 0
|
23
|
+
BUILD = nil
|
24
|
+
|
25
|
+
STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join(".")
|
26
|
+
end
|
27
|
+
|
28
|
+
VERSION = Version::STRING
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{public_suffix_service}
|
5
|
+
s.version = "0.3.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["Simone Carletti"]
|
9
|
+
s.date = %q{2010-05-31}
|
10
|
+
s.description = %q{ Intelligent Domain Name parser based in the Public Suffic List. Domain Name can parse and decompose a domain name into top level domain, domain and subdomains.
|
11
|
+
}
|
12
|
+
s.email = %q{weppos@weppos.net}
|
13
|
+
s.extra_rdoc_files = ["CHANGELOG.rdoc", "LICENSE.rdoc", "README.rdoc"]
|
14
|
+
s.files = ["Rakefile", "CHANGELOG.rdoc", "LICENSE.rdoc", "README.rdoc", "public_suffix_service.gemspec", "test/acceptance_test.rb", "test/public_suffix_service/domain_test.rb", "test/public_suffix_service/rule_list_test.rb", "test/public_suffix_service/rule_test.rb", "test/public_suffix_service_test.rb", "test/test_helper.rb", "lib/public_suffix_service/definitions.dat", "lib/public_suffix_service/domain.rb", "lib/public_suffix_service/errors.rb", "lib/public_suffix_service/rule.rb", "lib/public_suffix_service/rule_list.rb", "lib/public_suffix_service/version.rb", "lib/public_suffix_service.rb"]
|
15
|
+
s.homepage = %q{http://www.simonecarletti.com/code/public-suffix-service}
|
16
|
+
s.rdoc_options = ["--main", "README.rdoc"]
|
17
|
+
s.require_paths = ["lib"]
|
18
|
+
s.rubygems_version = %q{1.3.7}
|
19
|
+
s.summary = %q{Domain Name parser based on the Public Suffix List}
|
20
|
+
|
21
|
+
if s.respond_to? :specification_version then
|
22
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
23
|
+
s.specification_version = 3
|
24
|
+
|
25
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
26
|
+
s.add_development_dependency(%q<mocha>, [">= 0"])
|
27
|
+
else
|
28
|
+
s.add_dependency(%q<mocha>, [">= 0"])
|
29
|
+
end
|
30
|
+
else
|
31
|
+
s.add_dependency(%q<mocha>, [">= 0"])
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class AcceptanceTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
CASES = {
|
6
|
+
"google.com" => [nil, "google", "com"],
|
7
|
+
"foo.google.com" => ["foo", "google", "com"],
|
8
|
+
|
9
|
+
"verybritish.co.uk" => [nil, "verybritish", "co.uk"],
|
10
|
+
"foo.verybritish.co.uk" => ["foo", "verybritish", "co.uk"],
|
11
|
+
|
12
|
+
"parliament.uk" => [nil, "parliament", "uk"],
|
13
|
+
"foo.parliament.uk" => ["foo", "parliament", "uk"],
|
14
|
+
}
|
15
|
+
|
16
|
+
def test_all
|
17
|
+
CASES.each do |name, results|
|
18
|
+
domain = PublicSuffixService.parse(name)
|
19
|
+
trd, sld, tld = results
|
20
|
+
assert_equal tld, domain.tld, "Invalid tld for '#{name}'"
|
21
|
+
assert_equal sld, domain.sld, "Invalid sld for '#{name}'"
|
22
|
+
assert_equal trd, domain.trd, "Invalid trd for '#{name}'"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,141 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class PublicSuffixService::DomainTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
@klass = PublicSuffixService::Domain
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
def test_initialize_with_tld
|
11
|
+
domain = @klass.new("com")
|
12
|
+
assert_equal "com", domain.tld
|
13
|
+
assert_equal nil, domain.sld
|
14
|
+
assert_equal nil, domain.trd
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_initialize_with_tld_and_sld
|
18
|
+
domain = @klass.new("com", "google")
|
19
|
+
assert_equal "com", domain.tld
|
20
|
+
assert_equal "google", domain.sld
|
21
|
+
assert_equal nil, domain.trd
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_initialize_with_tld_and_sld_and_trd
|
25
|
+
domain = @klass.new("com", "google", "www")
|
26
|
+
assert_equal "com", domain.tld
|
27
|
+
assert_equal "google", domain.sld
|
28
|
+
assert_equal "www", domain.trd
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
def test_to_s
|
33
|
+
assert_equal "com", @klass.new("com").to_s
|
34
|
+
assert_equal "google.com", @klass.new("com", "google").to_s
|
35
|
+
assert_equal "www.google.com", @klass.new("com", "google", "www").to_s
|
36
|
+
end
|
37
|
+
|
38
|
+
def test_to_a
|
39
|
+
assert_equal [nil, nil, "com"], @klass.new("com").to_a
|
40
|
+
assert_equal [nil, "google", "com"], @klass.new("com", "google").to_a
|
41
|
+
assert_equal ["www", "google", "com"], @klass.new("com", "google", "www").to_a
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
def test_tld
|
46
|
+
assert_equal "com", @klass.new("com", "google", "www").tld
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_sld
|
50
|
+
assert_equal "google", @klass.new("com", "google", "www").sld
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_tld
|
54
|
+
assert_equal "www", @klass.new("com", "google", "www").trd
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
def test_name
|
59
|
+
assert_equal "com", @klass.new("com").name
|
60
|
+
assert_equal "google.com", @klass.new("com", "google").name
|
61
|
+
assert_equal "www.google.com", @klass.new("com", "google", "www").name
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_domain
|
65
|
+
assert_equal nil, @klass.new("com").domain
|
66
|
+
assert_equal nil, @klass.new("zip").domain
|
67
|
+
assert_equal "google.com", @klass.new("com", "google").domain
|
68
|
+
assert_equal "google.zip", @klass.new("zip", "google").domain
|
69
|
+
assert_equal "google.com", @klass.new("com", "google", "www").domain
|
70
|
+
assert_equal "google.zip", @klass.new("zip", "google", "www").domain
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_subdomain
|
74
|
+
assert_equal nil, @klass.new("com").subdomain
|
75
|
+
assert_equal nil, @klass.new("zip").subdomain
|
76
|
+
assert_equal nil, @klass.new("com", "google").subdomain
|
77
|
+
assert_equal nil, @klass.new("zip", "google").subdomain
|
78
|
+
assert_equal "www.google.com", @klass.new("com", "google", "www").subdomain
|
79
|
+
assert_equal "www.google.zip", @klass.new("zip", "google", "www").subdomain
|
80
|
+
end
|
81
|
+
|
82
|
+
def test_rule
|
83
|
+
assert_equal nil, @klass.new("zip").rule
|
84
|
+
assert_equal PublicSuffixService::Rule.factory("com"), @klass.new("com").rule
|
85
|
+
assert_equal PublicSuffixService::Rule.factory("com"), @klass.new("com", "google").rule
|
86
|
+
assert_equal PublicSuffixService::Rule.factory("com"), @klass.new("com", "google", "www").rule
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
def test_domain_question
|
91
|
+
assert @klass.new("com", "google").domain?
|
92
|
+
assert @klass.new("zip", "google").domain?
|
93
|
+
assert @klass.new("com", "google", "www").domain?
|
94
|
+
assert !@klass.new("com").domain?
|
95
|
+
end
|
96
|
+
|
97
|
+
def test_subdomain_question
|
98
|
+
assert @klass.new("com", "google", "www").subdomain?
|
99
|
+
assert @klass.new("zip", "google", "www").subdomain?
|
100
|
+
assert !@klass.new("com").subdomain?
|
101
|
+
assert !@klass.new("com", "google").subdomain?
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_is_a_domain_question
|
105
|
+
assert @klass.new("com", "google").is_a_domain?
|
106
|
+
assert @klass.new("zip", "google").is_a_domain?
|
107
|
+
assert !@klass.new("com", "google", "www").is_a_domain?
|
108
|
+
assert !@klass.new("com").is_a_domain?
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_is_a_subdomain_question
|
112
|
+
assert @klass.new("com", "google", "www").is_a_subdomain?
|
113
|
+
assert @klass.new("zip", "google", "www").is_a_subdomain?
|
114
|
+
assert !@klass.new("com").is_a_subdomain?
|
115
|
+
assert !@klass.new("com", "google").is_a_subdomain?
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_valid_question
|
119
|
+
assert @klass.new("com").valid?
|
120
|
+
assert @klass.new("com", "google").valid?
|
121
|
+
assert @klass.new("com", "google", "www").valid?
|
122
|
+
assert !@klass.new("zip").valid?
|
123
|
+
assert !@klass.new("zip", "google").valid?
|
124
|
+
assert !@klass.new("zip", "google", "www").valid?
|
125
|
+
end
|
126
|
+
|
127
|
+
def test_valid_domain_question
|
128
|
+
assert @klass.new("com", "google").valid_domain?
|
129
|
+
assert !@klass.new("zip", "google").valid_domain?
|
130
|
+
assert @klass.new("com", "google", "www").valid_domain?
|
131
|
+
assert !@klass.new("com").valid_domain?
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_valid_subdomain_question
|
135
|
+
assert @klass.new("com", "google", "www").valid_subdomain?
|
136
|
+
assert !@klass.new("zip", "google", "www").valid_subdomain?
|
137
|
+
assert !@klass.new("com").valid_subdomain?
|
138
|
+
assert !@klass.new("com", "google").valid_subdomain?
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|