public_suffix_service 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,246 @@
1
+ #
2
+ # = Public Suffix Service
3
+ #
4
+ # Domain Name parser based on the Public Suffix List
5
+ #
6
+ #
7
+ # Category:: Net
8
+ # Package:: PublicSuffixService
9
+ # Author:: Simone Carletti <weppos@weppos.net>
10
+ # License:: MIT License
11
+ #
12
+ #--
13
+ #
14
+ #++
15
+
16
+
17
+ module PublicSuffixService
18
+
19
+ # = Rule List
20
+ #
21
+ # A PublicSuffixService::RuleList is a collection of one or more PublicSuffixService::Rule.
22
+ #
23
+ # Given a RuleList, you can add or remove PublicSuffixService::Rule,
24
+ # iterate all items in the list or search for the first rule
25
+ # which matches a specific domain name.
26
+ #
27
+ # # Create a new list
28
+ # list = PublicSuffixService::RuleList.new
29
+ #
30
+ # # Push two rules to the list
31
+ # list << PublicSuffixService::Rule.factory("it")
32
+ # list << PublicSuffixService::Rule.factory("com")
33
+ #
34
+ # # Get the size of the list
35
+ # list.size
36
+ # # => 2
37
+ #
38
+ # # Search for the rule matching given domain
39
+ # list.find("example.com")
40
+ # # => #<PublicSuffixService::Rule::Normal>
41
+ # list.find("example.org")
42
+ # # => nil
43
+ #
44
+ # You can create as many PublicSuffixService::RuleList you want.
45
+ # The PublicSuffixService::RuleList.default rule list is used by DomainName
46
+ # to tokenize and validate a domain.
47
+ #
48
+ # PublicSuffixService::RuleList implements Enumerable module.
49
+ #
50
+ class RuleList
51
+ include Enumerable
52
+
53
+ # Gets the list of rules.
54
+ # Each rule is expected to be a subclass of PublicSuffixService::Rule::Base.
55
+ #
56
+ # Returns an Array of rules.
57
+ attr_reader :list
58
+
59
+
60
+ # Initializes an empty PublicSuffixService::RuleList.
61
+ # If block is given, yields on self.
62
+ def initialize(&block) # :yields: self
63
+ @list = []
64
+ yield(self) if block_given?
65
+ end
66
+
67
+ # Checks whether two lists are equal.
68
+ # RuleList <tt>one</tt> is equal to <tt>two</tt>, if <tt>two</tt> is an instance of
69
+ # <tt>PublicSuffixService::RuleList</tt> and each <tt>PublicSuffixService::Rule::Base</tt>
70
+ # in list <tt>one</tt> is available in list <tt>two</tt>,
71
+ # in the same order.
72
+ #
73
+ # other - The PublicSuffixService::RuleList to compare.
74
+ #
75
+ # Returns true if self is equal to other.
76
+ def ==(other)
77
+ return false unless other.is_a?(RuleList)
78
+ self.equal?(other) ||
79
+ self.list == other.list
80
+ end
81
+ alias :eql? :==
82
+
83
+ # Iterates each rule in the list.
84
+ #
85
+ # Returns nothing.
86
+ def each(*args, &block)
87
+ @list.each(*args, &block)
88
+ end
89
+
90
+ # Gets the list as Array.
91
+ #
92
+ # Return an Array.
93
+ def to_a
94
+ @list
95
+ end
96
+
97
+ # Adds the given object to the list.
98
+ #
99
+ # rule - The rule to add to the list.
100
+ # Expected to be a subclass of PublicSuffixService::Rule::Base.
101
+ #
102
+ # Returns self.
103
+ def add(rule)
104
+ @list << rule
105
+ self
106
+ end
107
+ alias << add
108
+
109
+ # Gets the number of elements in the list.
110
+ #
111
+ # Returns an Integer.
112
+ def size
113
+ @list.size
114
+ end
115
+ alias length size
116
+
117
+ # Checks whether the list is empty.
118
+ #
119
+ # Returns true if the list contains no elements.
120
+ def empty?
121
+ @list.empty?
122
+ end
123
+
124
+ # Removes all elements.
125
+ #
126
+ # Returns self.
127
+ def clear
128
+ @list.clear
129
+ self
130
+ end
131
+
132
+
133
+ # Returns the most appropriate rule for domain.
134
+ #
135
+ # From the Public Suffix List documentation:
136
+ #
137
+ # * If a hostname matches more than one rule in the file,
138
+ # the longest matching rule (the one with the most levels) will be used.
139
+ # * An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule.
140
+ # An exception rule takes priority over any other matching rule.
141
+ #
142
+ # == Algorithm description
143
+ #
144
+ # * Match domain against all rules and take note of the matching ones.
145
+ # * If no rules match, the prevailing rule is "*".
146
+ # * If more than one rule matches, the prevailing rule is the one which is an exception rule.
147
+ # * If there is no matching exception rule, the prevailing rule is the one with the most labels.
148
+ # * If the prevailing rule is a exception rule, modify it by removing the leftmost label.
149
+ # * The public suffix is the set of labels from the domain
150
+ # which directly match the labels of the prevailing rule (joined by dots).
151
+ # * The registered domain is the public suffix plus one additional label.
152
+ #
153
+ # Note: This might not be the most efficient algorithm.
154
+ #
155
+ # Returns a PublicSuffixService::Rule::Base instance or nil.
156
+ def find(domain)
157
+ rules = select(domain)
158
+ rules.select { |r| r.type == :exception }.first ||
159
+ rules.inject { |t,r| t.length > r.length ? t : r }
160
+ end
161
+
162
+ # Selects all the rules matching given domain.
163
+ #
164
+ # Returns an Array of rules.
165
+ # Each rule is expected to be a subclass of PublicSuffixService::Rule::Base.
166
+ def select(domain)
167
+ @list.select { |rule| rule.match?(domain) }
168
+ end
169
+
170
+
171
+ @@default = nil
172
+
173
+ class << self
174
+
175
+ # Gets the default <tt>PublicSuffixService::RuleList</tt>.
176
+ # Initializes a new <tt>PublicSuffixService::RuleList</tt>
177
+ # parsing the content of <tt>PublicSuffixService::RuleList.default_definition</tt> if necessary.
178
+ #
179
+ # Returns an instance of PublicSuffixService::RuleList.
180
+ def default
181
+ @@default ||= parse(default_definition)
182
+ end
183
+
184
+ # Sets the default <tt>PublicSuffixService::RuleList</tt> to <tt>value</tt>.
185
+ #
186
+ # value - The new PublicSuffixService::RuleList.
187
+ #
188
+ # Returns the new PublicSuffixService::RuleList.
189
+ def default=(value)
190
+ @@default = value
191
+ end
192
+
193
+ # Sets the default <tt>PublicSuffixService::RuleList</tt> to <tt>nil</tt>.
194
+ #
195
+ # Returns self.
196
+ def clear
197
+ self.default = nil
198
+ self
199
+ end
200
+
201
+ # Resets the default <tt>PublicSuffixService::RuleList</tt> and reinitialize it
202
+ # parsing the content of <tt>PublicSuffixService::RuleList.default_definition</tt>.
203
+ #
204
+ # Returns an instance of PublicSuffixService::RuleList.
205
+ def reload
206
+ self.clear.default
207
+ end
208
+
209
+ # Gets the default definition list.
210
+ # Can be any <tt>IOStream</tt> including a <tt>File</tt> or a simple <tt>String</tt>.
211
+ # The object must respond to <tt>#each_line</tt>.
212
+ #
213
+ # Returns an object which responds to <tt>#each_line</tt>.
214
+ def default_definition
215
+ File.new(File.join(File.dirname(__FILE__), "definitions.dat"))
216
+ end
217
+
218
+
219
+ # Parse given <tt>input</tt> treating the content as Public Suffic List.
220
+ # See http://publicsuffix.org/format/ for more details about input format.
221
+ #
222
+ # Returns an Array of PublicSuffixService::Rule::Base.
223
+ def parse(input)
224
+ new do |list|
225
+ input.each_line do |line|
226
+ line.strip!
227
+
228
+ # strip blank lines
229
+ if line.empty?
230
+ next
231
+ # strip comments
232
+ elsif line =~ %r{^//}
233
+ next
234
+ # append rule
235
+ else
236
+ list << Rule.factory(line)
237
+ end
238
+ end
239
+ end
240
+ end
241
+
242
+ end
243
+
244
+ end
245
+
246
+ end
@@ -0,0 +1,30 @@
1
+ #
2
+ # = Public Suffix Service
3
+ #
4
+ # Domain Name parser based on the Public Suffix List
5
+ #
6
+ #
7
+ # Category:: Net
8
+ # Package:: PublicSuffixService
9
+ # Author:: Simone Carletti <weppos@weppos.net>
10
+ # License:: MIT License
11
+ #
12
+ #--
13
+ #
14
+ #++
15
+
16
+
17
+ module PublicSuffixService
18
+
19
+ module Version
20
+ MAJOR = 0
21
+ MINOR = 4
22
+ PATCH = 0
23
+ BUILD = nil
24
+
25
+ STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join(".")
26
+ end
27
+
28
+ VERSION = Version::STRING
29
+
30
+ end
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{public_suffix_service}
5
+ s.version = "0.3.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Simone Carletti"]
9
+ s.date = %q{2010-05-31}
10
+ s.description = %q{ Intelligent Domain Name parser based in the Public Suffic List. Domain Name can parse and decompose a domain name into top level domain, domain and subdomains.
11
+ }
12
+ s.email = %q{weppos@weppos.net}
13
+ s.extra_rdoc_files = ["CHANGELOG.rdoc", "LICENSE.rdoc", "README.rdoc"]
14
+ s.files = ["Rakefile", "CHANGELOG.rdoc", "LICENSE.rdoc", "README.rdoc", "public_suffix_service.gemspec", "test/acceptance_test.rb", "test/public_suffix_service/domain_test.rb", "test/public_suffix_service/rule_list_test.rb", "test/public_suffix_service/rule_test.rb", "test/public_suffix_service_test.rb", "test/test_helper.rb", "lib/public_suffix_service/definitions.dat", "lib/public_suffix_service/domain.rb", "lib/public_suffix_service/errors.rb", "lib/public_suffix_service/rule.rb", "lib/public_suffix_service/rule_list.rb", "lib/public_suffix_service/version.rb", "lib/public_suffix_service.rb"]
15
+ s.homepage = %q{http://www.simonecarletti.com/code/public-suffix-service}
16
+ s.rdoc_options = ["--main", "README.rdoc"]
17
+ s.require_paths = ["lib"]
18
+ s.rubygems_version = %q{1.3.7}
19
+ s.summary = %q{Domain Name parser based on the Public Suffix List}
20
+
21
+ if s.respond_to? :specification_version then
22
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
23
+ s.specification_version = 3
24
+
25
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
26
+ s.add_development_dependency(%q<mocha>, [">= 0"])
27
+ else
28
+ s.add_dependency(%q<mocha>, [">= 0"])
29
+ end
30
+ else
31
+ s.add_dependency(%q<mocha>, [">= 0"])
32
+ end
33
+ end
@@ -0,0 +1,26 @@
1
+ require 'test_helper'
2
+
3
+ class AcceptanceTest < Test::Unit::TestCase
4
+
5
+ CASES = {
6
+ "google.com" => [nil, "google", "com"],
7
+ "foo.google.com" => ["foo", "google", "com"],
8
+
9
+ "verybritish.co.uk" => [nil, "verybritish", "co.uk"],
10
+ "foo.verybritish.co.uk" => ["foo", "verybritish", "co.uk"],
11
+
12
+ "parliament.uk" => [nil, "parliament", "uk"],
13
+ "foo.parliament.uk" => ["foo", "parliament", "uk"],
14
+ }
15
+
16
+ def test_all
17
+ CASES.each do |name, results|
18
+ domain = PublicSuffixService.parse(name)
19
+ trd, sld, tld = results
20
+ assert_equal tld, domain.tld, "Invalid tld for '#{name}'"
21
+ assert_equal sld, domain.sld, "Invalid sld for '#{name}'"
22
+ assert_equal trd, domain.trd, "Invalid trd for '#{name}'"
23
+ end
24
+ end
25
+
26
+ end
@@ -0,0 +1,141 @@
1
+ require 'test_helper'
2
+
3
+ class PublicSuffixService::DomainTest < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @klass = PublicSuffixService::Domain
7
+ end
8
+
9
+
10
+ def test_initialize_with_tld
11
+ domain = @klass.new("com")
12
+ assert_equal "com", domain.tld
13
+ assert_equal nil, domain.sld
14
+ assert_equal nil, domain.trd
15
+ end
16
+
17
+ def test_initialize_with_tld_and_sld
18
+ domain = @klass.new("com", "google")
19
+ assert_equal "com", domain.tld
20
+ assert_equal "google", domain.sld
21
+ assert_equal nil, domain.trd
22
+ end
23
+
24
+ def test_initialize_with_tld_and_sld_and_trd
25
+ domain = @klass.new("com", "google", "www")
26
+ assert_equal "com", domain.tld
27
+ assert_equal "google", domain.sld
28
+ assert_equal "www", domain.trd
29
+ end
30
+
31
+
32
+ def test_to_s
33
+ assert_equal "com", @klass.new("com").to_s
34
+ assert_equal "google.com", @klass.new("com", "google").to_s
35
+ assert_equal "www.google.com", @klass.new("com", "google", "www").to_s
36
+ end
37
+
38
+ def test_to_a
39
+ assert_equal [nil, nil, "com"], @klass.new("com").to_a
40
+ assert_equal [nil, "google", "com"], @klass.new("com", "google").to_a
41
+ assert_equal ["www", "google", "com"], @klass.new("com", "google", "www").to_a
42
+ end
43
+
44
+
45
+ def test_tld
46
+ assert_equal "com", @klass.new("com", "google", "www").tld
47
+ end
48
+
49
+ def test_sld
50
+ assert_equal "google", @klass.new("com", "google", "www").sld
51
+ end
52
+
53
+ def test_tld
54
+ assert_equal "www", @klass.new("com", "google", "www").trd
55
+ end
56
+
57
+
58
+ def test_name
59
+ assert_equal "com", @klass.new("com").name
60
+ assert_equal "google.com", @klass.new("com", "google").name
61
+ assert_equal "www.google.com", @klass.new("com", "google", "www").name
62
+ end
63
+
64
+ def test_domain
65
+ assert_equal nil, @klass.new("com").domain
66
+ assert_equal nil, @klass.new("zip").domain
67
+ assert_equal "google.com", @klass.new("com", "google").domain
68
+ assert_equal "google.zip", @klass.new("zip", "google").domain
69
+ assert_equal "google.com", @klass.new("com", "google", "www").domain
70
+ assert_equal "google.zip", @klass.new("zip", "google", "www").domain
71
+ end
72
+
73
+ def test_subdomain
74
+ assert_equal nil, @klass.new("com").subdomain
75
+ assert_equal nil, @klass.new("zip").subdomain
76
+ assert_equal nil, @klass.new("com", "google").subdomain
77
+ assert_equal nil, @klass.new("zip", "google").subdomain
78
+ assert_equal "www.google.com", @klass.new("com", "google", "www").subdomain
79
+ assert_equal "www.google.zip", @klass.new("zip", "google", "www").subdomain
80
+ end
81
+
82
+ def test_rule
83
+ assert_equal nil, @klass.new("zip").rule
84
+ assert_equal PublicSuffixService::Rule.factory("com"), @klass.new("com").rule
85
+ assert_equal PublicSuffixService::Rule.factory("com"), @klass.new("com", "google").rule
86
+ assert_equal PublicSuffixService::Rule.factory("com"), @klass.new("com", "google", "www").rule
87
+ end
88
+
89
+
90
+ def test_domain_question
91
+ assert @klass.new("com", "google").domain?
92
+ assert @klass.new("zip", "google").domain?
93
+ assert @klass.new("com", "google", "www").domain?
94
+ assert !@klass.new("com").domain?
95
+ end
96
+
97
+ def test_subdomain_question
98
+ assert @klass.new("com", "google", "www").subdomain?
99
+ assert @klass.new("zip", "google", "www").subdomain?
100
+ assert !@klass.new("com").subdomain?
101
+ assert !@klass.new("com", "google").subdomain?
102
+ end
103
+
104
+ def test_is_a_domain_question
105
+ assert @klass.new("com", "google").is_a_domain?
106
+ assert @klass.new("zip", "google").is_a_domain?
107
+ assert !@klass.new("com", "google", "www").is_a_domain?
108
+ assert !@klass.new("com").is_a_domain?
109
+ end
110
+
111
+ def test_is_a_subdomain_question
112
+ assert @klass.new("com", "google", "www").is_a_subdomain?
113
+ assert @klass.new("zip", "google", "www").is_a_subdomain?
114
+ assert !@klass.new("com").is_a_subdomain?
115
+ assert !@klass.new("com", "google").is_a_subdomain?
116
+ end
117
+
118
+ def test_valid_question
119
+ assert @klass.new("com").valid?
120
+ assert @klass.new("com", "google").valid?
121
+ assert @klass.new("com", "google", "www").valid?
122
+ assert !@klass.new("zip").valid?
123
+ assert !@klass.new("zip", "google").valid?
124
+ assert !@klass.new("zip", "google", "www").valid?
125
+ end
126
+
127
+ def test_valid_domain_question
128
+ assert @klass.new("com", "google").valid_domain?
129
+ assert !@klass.new("zip", "google").valid_domain?
130
+ assert @klass.new("com", "google", "www").valid_domain?
131
+ assert !@klass.new("com").valid_domain?
132
+ end
133
+
134
+ def test_valid_subdomain_question
135
+ assert @klass.new("com", "google", "www").valid_subdomain?
136
+ assert !@klass.new("zip", "google", "www").valid_subdomain?
137
+ assert !@klass.new("com").valid_subdomain?
138
+ assert !@klass.new("com", "google").valid_subdomain?
139
+ end
140
+
141
+ end