public_suffix_service 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,246 @@
1
+ #
2
+ # = Public Suffix Service
3
+ #
4
+ # Domain Name parser based on the Public Suffix List
5
+ #
6
+ #
7
+ # Category:: Net
8
+ # Package:: PublicSuffixService
9
+ # Author:: Simone Carletti <weppos@weppos.net>
10
+ # License:: MIT License
11
+ #
12
+ #--
13
+ #
14
+ #++
15
+
16
+
17
+ module PublicSuffixService
18
+
19
+ # = Rule List
20
+ #
21
+ # A PublicSuffixService::RuleList is a collection of one or more PublicSuffixService::Rule.
22
+ #
23
+ # Given a RuleList, you can add or remove PublicSuffixService::Rule,
24
+ # iterate all items in the list or search for the first rule
25
+ # which matches a specific domain name.
26
+ #
27
+ # # Create a new list
28
+ # list = PublicSuffixService::RuleList.new
29
+ #
30
+ # # Push two rules to the list
31
+ # list << PublicSuffixService::Rule.factory("it")
32
+ # list << PublicSuffixService::Rule.factory("com")
33
+ #
34
+ # # Get the size of the list
35
+ # list.size
36
+ # # => 2
37
+ #
38
+ # # Search for the rule matching given domain
39
+ # list.find("example.com")
40
+ # # => #<PublicSuffixService::Rule::Normal>
41
+ # list.find("example.org")
42
+ # # => nil
43
+ #
44
+ # You can create as many PublicSuffixService::RuleList you want.
45
+ # The PublicSuffixService::RuleList.default rule list is used by DomainName
46
+ # to tokenize and validate a domain.
47
+ #
48
+ # PublicSuffixService::RuleList implements Enumerable module.
49
+ #
50
+ class RuleList
51
+ include Enumerable
52
+
53
+ # Gets the list of rules.
54
+ # Each rule is expected to be a subclass of PublicSuffixService::Rule::Base.
55
+ #
56
+ # Returns an Array of rules.
57
+ attr_reader :list
58
+
59
+
60
+ # Initializes an empty PublicSuffixService::RuleList.
61
+ # If block is given, yields on self.
62
+ def initialize(&block) # :yields: self
63
+ @list = []
64
+ yield(self) if block_given?
65
+ end
66
+
67
+ # Checks whether two lists are equal.
68
+ # RuleList <tt>one</tt> is equal to <tt>two</tt>, if <tt>two</tt> is an instance of
69
+ # <tt>PublicSuffixService::RuleList</tt> and each <tt>PublicSuffixService::Rule::Base</tt>
70
+ # in list <tt>one</tt> is available in list <tt>two</tt>,
71
+ # in the same order.
72
+ #
73
+ # other - The PublicSuffixService::RuleList to compare.
74
+ #
75
+ # Returns true if self is equal to other.
76
+ def ==(other)
77
+ return false unless other.is_a?(RuleList)
78
+ self.equal?(other) ||
79
+ self.list == other.list
80
+ end
81
+ alias :eql? :==
82
+
83
+ # Iterates each rule in the list.
84
+ #
85
+ # Returns nothing.
86
+ def each(*args, &block)
87
+ @list.each(*args, &block)
88
+ end
89
+
90
+ # Gets the list as Array.
91
+ #
92
+ # Return an Array.
93
+ def to_a
94
+ @list
95
+ end
96
+
97
+ # Adds the given object to the list.
98
+ #
99
+ # rule - The rule to add to the list.
100
+ # Expected to be a subclass of PublicSuffixService::Rule::Base.
101
+ #
102
+ # Returns self.
103
+ def add(rule)
104
+ @list << rule
105
+ self
106
+ end
107
+ alias << add
108
+
109
+ # Gets the number of elements in the list.
110
+ #
111
+ # Returns an Integer.
112
+ def size
113
+ @list.size
114
+ end
115
+ alias length size
116
+
117
+ # Checks whether the list is empty.
118
+ #
119
+ # Returns true if the list contains no elements.
120
+ def empty?
121
+ @list.empty?
122
+ end
123
+
124
+ # Removes all elements.
125
+ #
126
+ # Returns self.
127
+ def clear
128
+ @list.clear
129
+ self
130
+ end
131
+
132
+
133
+ # Returns the most appropriate rule for domain.
134
+ #
135
+ # From the Public Suffix List documentation:
136
+ #
137
+ # * If a hostname matches more than one rule in the file,
138
+ # the longest matching rule (the one with the most levels) will be used.
139
+ # * An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule.
140
+ # An exception rule takes priority over any other matching rule.
141
+ #
142
+ # == Algorithm description
143
+ #
144
+ # * Match domain against all rules and take note of the matching ones.
145
+ # * If no rules match, the prevailing rule is "*".
146
+ # * If more than one rule matches, the prevailing rule is the one which is an exception rule.
147
+ # * If there is no matching exception rule, the prevailing rule is the one with the most labels.
148
+ # * If the prevailing rule is a exception rule, modify it by removing the leftmost label.
149
+ # * The public suffix is the set of labels from the domain
150
+ # which directly match the labels of the prevailing rule (joined by dots).
151
+ # * The registered domain is the public suffix plus one additional label.
152
+ #
153
+ # Note: This might not be the most efficient algorithm.
154
+ #
155
+ # Returns a PublicSuffixService::Rule::Base instance or nil.
156
+ def find(domain)
157
+ rules = select(domain)
158
+ rules.select { |r| r.type == :exception }.first ||
159
+ rules.inject { |t,r| t.length > r.length ? t : r }
160
+ end
161
+
162
+ # Selects all the rules matching given domain.
163
+ #
164
+ # Returns an Array of rules.
165
+ # Each rule is expected to be a subclass of PublicSuffixService::Rule::Base.
166
+ def select(domain)
167
+ @list.select { |rule| rule.match?(domain) }
168
+ end
169
+
170
+
171
+ @@default = nil
172
+
173
+ class << self
174
+
175
+ # Gets the default <tt>PublicSuffixService::RuleList</tt>.
176
+ # Initializes a new <tt>PublicSuffixService::RuleList</tt>
177
+ # parsing the content of <tt>PublicSuffixService::RuleList.default_definition</tt> if necessary.
178
+ #
179
+ # Returns an instance of PublicSuffixService::RuleList.
180
+ def default
181
+ @@default ||= parse(default_definition)
182
+ end
183
+
184
+ # Sets the default <tt>PublicSuffixService::RuleList</tt> to <tt>value</tt>.
185
+ #
186
+ # value - The new PublicSuffixService::RuleList.
187
+ #
188
+ # Returns the new PublicSuffixService::RuleList.
189
+ def default=(value)
190
+ @@default = value
191
+ end
192
+
193
+ # Sets the default <tt>PublicSuffixService::RuleList</tt> to <tt>nil</tt>.
194
+ #
195
+ # Returns self.
196
+ def clear
197
+ self.default = nil
198
+ self
199
+ end
200
+
201
+ # Resets the default <tt>PublicSuffixService::RuleList</tt> and reinitialize it
202
+ # parsing the content of <tt>PublicSuffixService::RuleList.default_definition</tt>.
203
+ #
204
+ # Returns an instance of PublicSuffixService::RuleList.
205
+ def reload
206
+ self.clear.default
207
+ end
208
+
209
+ # Gets the default definition list.
210
+ # Can be any <tt>IOStream</tt> including a <tt>File</tt> or a simple <tt>String</tt>.
211
+ # The object must respond to <tt>#each_line</tt>.
212
+ #
213
+ # Returns an object which responds to <tt>#each_line</tt>.
214
+ def default_definition
215
+ File.new(File.join(File.dirname(__FILE__), "definitions.dat"))
216
+ end
217
+
218
+
219
+ # Parse given <tt>input</tt> treating the content as Public Suffic List.
220
+ # See http://publicsuffix.org/format/ for more details about input format.
221
+ #
222
+ # Returns an Array of PublicSuffixService::Rule::Base.
223
+ def parse(input)
224
+ new do |list|
225
+ input.each_line do |line|
226
+ line.strip!
227
+
228
+ # strip blank lines
229
+ if line.empty?
230
+ next
231
+ # strip comments
232
+ elsif line =~ %r{^//}
233
+ next
234
+ # append rule
235
+ else
236
+ list << Rule.factory(line)
237
+ end
238
+ end
239
+ end
240
+ end
241
+
242
+ end
243
+
244
+ end
245
+
246
+ end
@@ -0,0 +1,30 @@
1
+ #
2
+ # = Public Suffix Service
3
+ #
4
+ # Domain Name parser based on the Public Suffix List
5
+ #
6
+ #
7
+ # Category:: Net
8
+ # Package:: PublicSuffixService
9
+ # Author:: Simone Carletti <weppos@weppos.net>
10
+ # License:: MIT License
11
+ #
12
+ #--
13
+ #
14
+ #++
15
+
16
+
17
+ module PublicSuffixService
18
+
19
+ module Version
20
+ MAJOR = 0
21
+ MINOR = 4
22
+ PATCH = 0
23
+ BUILD = nil
24
+
25
+ STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join(".")
26
+ end
27
+
28
+ VERSION = Version::STRING
29
+
30
+ end
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{public_suffix_service}
5
+ s.version = "0.3.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Simone Carletti"]
9
+ s.date = %q{2010-05-31}
10
+ s.description = %q{ Intelligent Domain Name parser based in the Public Suffic List. Domain Name can parse and decompose a domain name into top level domain, domain and subdomains.
11
+ }
12
+ s.email = %q{weppos@weppos.net}
13
+ s.extra_rdoc_files = ["CHANGELOG.rdoc", "LICENSE.rdoc", "README.rdoc"]
14
+ s.files = ["Rakefile", "CHANGELOG.rdoc", "LICENSE.rdoc", "README.rdoc", "public_suffix_service.gemspec", "test/acceptance_test.rb", "test/public_suffix_service/domain_test.rb", "test/public_suffix_service/rule_list_test.rb", "test/public_suffix_service/rule_test.rb", "test/public_suffix_service_test.rb", "test/test_helper.rb", "lib/public_suffix_service/definitions.dat", "lib/public_suffix_service/domain.rb", "lib/public_suffix_service/errors.rb", "lib/public_suffix_service/rule.rb", "lib/public_suffix_service/rule_list.rb", "lib/public_suffix_service/version.rb", "lib/public_suffix_service.rb"]
15
+ s.homepage = %q{http://www.simonecarletti.com/code/public-suffix-service}
16
+ s.rdoc_options = ["--main", "README.rdoc"]
17
+ s.require_paths = ["lib"]
18
+ s.rubygems_version = %q{1.3.7}
19
+ s.summary = %q{Domain Name parser based on the Public Suffix List}
20
+
21
+ if s.respond_to? :specification_version then
22
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
23
+ s.specification_version = 3
24
+
25
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
26
+ s.add_development_dependency(%q<mocha>, [">= 0"])
27
+ else
28
+ s.add_dependency(%q<mocha>, [">= 0"])
29
+ end
30
+ else
31
+ s.add_dependency(%q<mocha>, [">= 0"])
32
+ end
33
+ end
@@ -0,0 +1,26 @@
1
+ require 'test_helper'
2
+
3
+ class AcceptanceTest < Test::Unit::TestCase
4
+
5
+ CASES = {
6
+ "google.com" => [nil, "google", "com"],
7
+ "foo.google.com" => ["foo", "google", "com"],
8
+
9
+ "verybritish.co.uk" => [nil, "verybritish", "co.uk"],
10
+ "foo.verybritish.co.uk" => ["foo", "verybritish", "co.uk"],
11
+
12
+ "parliament.uk" => [nil, "parliament", "uk"],
13
+ "foo.parliament.uk" => ["foo", "parliament", "uk"],
14
+ }
15
+
16
+ def test_all
17
+ CASES.each do |name, results|
18
+ domain = PublicSuffixService.parse(name)
19
+ trd, sld, tld = results
20
+ assert_equal tld, domain.tld, "Invalid tld for '#{name}'"
21
+ assert_equal sld, domain.sld, "Invalid sld for '#{name}'"
22
+ assert_equal trd, domain.trd, "Invalid trd for '#{name}'"
23
+ end
24
+ end
25
+
26
+ end
@@ -0,0 +1,141 @@
1
+ require 'test_helper'
2
+
3
+ class PublicSuffixService::DomainTest < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @klass = PublicSuffixService::Domain
7
+ end
8
+
9
+
10
+ def test_initialize_with_tld
11
+ domain = @klass.new("com")
12
+ assert_equal "com", domain.tld
13
+ assert_equal nil, domain.sld
14
+ assert_equal nil, domain.trd
15
+ end
16
+
17
+ def test_initialize_with_tld_and_sld
18
+ domain = @klass.new("com", "google")
19
+ assert_equal "com", domain.tld
20
+ assert_equal "google", domain.sld
21
+ assert_equal nil, domain.trd
22
+ end
23
+
24
+ def test_initialize_with_tld_and_sld_and_trd
25
+ domain = @klass.new("com", "google", "www")
26
+ assert_equal "com", domain.tld
27
+ assert_equal "google", domain.sld
28
+ assert_equal "www", domain.trd
29
+ end
30
+
31
+
32
+ def test_to_s
33
+ assert_equal "com", @klass.new("com").to_s
34
+ assert_equal "google.com", @klass.new("com", "google").to_s
35
+ assert_equal "www.google.com", @klass.new("com", "google", "www").to_s
36
+ end
37
+
38
+ def test_to_a
39
+ assert_equal [nil, nil, "com"], @klass.new("com").to_a
40
+ assert_equal [nil, "google", "com"], @klass.new("com", "google").to_a
41
+ assert_equal ["www", "google", "com"], @klass.new("com", "google", "www").to_a
42
+ end
43
+
44
+
45
+ def test_tld
46
+ assert_equal "com", @klass.new("com", "google", "www").tld
47
+ end
48
+
49
+ def test_sld
50
+ assert_equal "google", @klass.new("com", "google", "www").sld
51
+ end
52
+
53
+ def test_tld
54
+ assert_equal "www", @klass.new("com", "google", "www").trd
55
+ end
56
+
57
+
58
+ def test_name
59
+ assert_equal "com", @klass.new("com").name
60
+ assert_equal "google.com", @klass.new("com", "google").name
61
+ assert_equal "www.google.com", @klass.new("com", "google", "www").name
62
+ end
63
+
64
+ def test_domain
65
+ assert_equal nil, @klass.new("com").domain
66
+ assert_equal nil, @klass.new("zip").domain
67
+ assert_equal "google.com", @klass.new("com", "google").domain
68
+ assert_equal "google.zip", @klass.new("zip", "google").domain
69
+ assert_equal "google.com", @klass.new("com", "google", "www").domain
70
+ assert_equal "google.zip", @klass.new("zip", "google", "www").domain
71
+ end
72
+
73
+ def test_subdomain
74
+ assert_equal nil, @klass.new("com").subdomain
75
+ assert_equal nil, @klass.new("zip").subdomain
76
+ assert_equal nil, @klass.new("com", "google").subdomain
77
+ assert_equal nil, @klass.new("zip", "google").subdomain
78
+ assert_equal "www.google.com", @klass.new("com", "google", "www").subdomain
79
+ assert_equal "www.google.zip", @klass.new("zip", "google", "www").subdomain
80
+ end
81
+
82
+ def test_rule
83
+ assert_equal nil, @klass.new("zip").rule
84
+ assert_equal PublicSuffixService::Rule.factory("com"), @klass.new("com").rule
85
+ assert_equal PublicSuffixService::Rule.factory("com"), @klass.new("com", "google").rule
86
+ assert_equal PublicSuffixService::Rule.factory("com"), @klass.new("com", "google", "www").rule
87
+ end
88
+
89
+
90
+ def test_domain_question
91
+ assert @klass.new("com", "google").domain?
92
+ assert @klass.new("zip", "google").domain?
93
+ assert @klass.new("com", "google", "www").domain?
94
+ assert !@klass.new("com").domain?
95
+ end
96
+
97
+ def test_subdomain_question
98
+ assert @klass.new("com", "google", "www").subdomain?
99
+ assert @klass.new("zip", "google", "www").subdomain?
100
+ assert !@klass.new("com").subdomain?
101
+ assert !@klass.new("com", "google").subdomain?
102
+ end
103
+
104
+ def test_is_a_domain_question
105
+ assert @klass.new("com", "google").is_a_domain?
106
+ assert @klass.new("zip", "google").is_a_domain?
107
+ assert !@klass.new("com", "google", "www").is_a_domain?
108
+ assert !@klass.new("com").is_a_domain?
109
+ end
110
+
111
+ def test_is_a_subdomain_question
112
+ assert @klass.new("com", "google", "www").is_a_subdomain?
113
+ assert @klass.new("zip", "google", "www").is_a_subdomain?
114
+ assert !@klass.new("com").is_a_subdomain?
115
+ assert !@klass.new("com", "google").is_a_subdomain?
116
+ end
117
+
118
+ def test_valid_question
119
+ assert @klass.new("com").valid?
120
+ assert @klass.new("com", "google").valid?
121
+ assert @klass.new("com", "google", "www").valid?
122
+ assert !@klass.new("zip").valid?
123
+ assert !@klass.new("zip", "google").valid?
124
+ assert !@klass.new("zip", "google", "www").valid?
125
+ end
126
+
127
+ def test_valid_domain_question
128
+ assert @klass.new("com", "google").valid_domain?
129
+ assert !@klass.new("zip", "google").valid_domain?
130
+ assert @klass.new("com", "google", "www").valid_domain?
131
+ assert !@klass.new("com").valid_domain?
132
+ end
133
+
134
+ def test_valid_subdomain_question
135
+ assert @klass.new("com", "google", "www").valid_subdomain?
136
+ assert !@klass.new("zip", "google", "www").valid_subdomain?
137
+ assert !@klass.new("com").valid_subdomain?
138
+ assert !@klass.new("com", "google").valid_subdomain?
139
+ end
140
+
141
+ end