public_suffix_service 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,230 @@
1
+ #
2
+ # = Public Suffix Service
3
+ #
4
+ # Domain Name parser based on the Public Suffix List
5
+ #
6
+ #
7
+ # Category:: Net
8
+ # Package:: PublicSuffixService
9
+ # Author:: Simone Carletti <weppos@weppos.net>
10
+ # License:: MIT License
11
+ #
12
+ #--
13
+ #
14
+ #++
15
+
16
+
17
+ module PublicSuffixService
18
+
19
+ class Domain
20
+
21
+ def initialize(*args, &block)
22
+ @tld, @sld, @trd = args
23
+ yield(self) if block_given?
24
+ end
25
+
26
+ # Gets a String representation of this object.
27
+ #
28
+ # Returns a String with the domain name.
29
+ def to_s
30
+ name
31
+ end
32
+
33
+ def to_a
34
+ [trd, sld, tld]
35
+ end
36
+
37
+
38
+ # Gets the Top Level Domain part, aka the extension.
39
+ #
40
+ # Returns a String if tld is set, nil otherwise.
41
+ def tld
42
+ @tld
43
+ end
44
+
45
+ # Gets the Second Level Domain part, aka the domain part.
46
+ #
47
+ # Returns a String if sld is set, nil otherwise.
48
+ def sld
49
+ @sld
50
+ end
51
+
52
+ # Gets the Third Level Domain part, aka the subdomain part.
53
+ #
54
+ # Returns a String if trd is set, nil otherwise.
55
+ def trd
56
+ @trd
57
+ end
58
+
59
+
60
+ # Gets the domain name.
61
+ #
62
+ # Examples
63
+ #
64
+ # DomainName.new("com", "google").name
65
+ # # => "google.com"
66
+ #
67
+ # DomainName.new("com", "google", "www").name
68
+ # # => "www.google.com"
69
+ #
70
+ # Returns a String with the domain name.
71
+ def name
72
+ [trd, sld, tld].reject { |part| part.nil? }.join(".")
73
+ end
74
+
75
+ # Returns a domain-like representation of this object
76
+ # if the object is a <tt>domain?</tt>,
77
+ # <tt>nil</tt> otherwise.
78
+ def domain
79
+ return unless domain?
80
+ [sld, tld].join(".")
81
+ end
82
+
83
+ # Returns a subdomain-like representation of this object
84
+ # if the object is a <tt>subdomain?</tt>,
85
+ # <tt>nil</tt> otherwise.
86
+ def subdomain
87
+ return unless subdomain?
88
+ [trd, sld, tld].join(".")
89
+ end
90
+
91
+ # Gets the rule matching this domain in the default PublicSuffixService::RuleList.
92
+ #
93
+ # Returns an instance of PublicSuffixService::Rule::Base if a rule matches current domain,
94
+ # nil if no rule is found.
95
+ def rule
96
+ RuleList.default.find(name)
97
+ end
98
+
99
+
100
+ # Checks whether <tt>self</tt> looks like a domain.
101
+ #
102
+ # This method doesn't actually validate the domain.
103
+ # It only checks whether the instance contains
104
+ # a value for the <tt>tld</tt> and <tt>sld</tt> attributes.
105
+ # If you also want to validate the domain, use <tt>#valid_domain?</tt> instead.
106
+ #
107
+ # Examples
108
+ #
109
+ # DomainName.new("com").domain?
110
+ # # => false
111
+ #
112
+ # DomainName.new("com", "google").domain?
113
+ # # => true
114
+ #
115
+ # DomainName.new("com", "google", "www").domain?
116
+ # # => true
117
+ #
118
+ # # This is an invalid domain, but returns true
119
+ # # because this method doesn't validate the content.
120
+ # DomainName.new("zip", "google").domain?
121
+ # # => true
122
+ #
123
+ # Returns true if this instance looks like a domain.
124
+ def domain?
125
+ !(tld.nil? || sld.nil?)
126
+ end
127
+
128
+ # Checks whether <tt>self</tt> looks like a subdomain.
129
+ #
130
+ # This method doesn't actually validate the subdomain.
131
+ # It only checks whether the instance contains
132
+ # a value for the <tt>tld</tt>, <tt>sld</tt> and <tt>trd</tt> attributes.
133
+ # If you also want to validate the domain, use <tt>#valid_subdomain?</tt> instead.
134
+ #
135
+ # Examples
136
+ #
137
+ # DomainName.new("com").subdomain?
138
+ # # => false
139
+ #
140
+ # DomainName.new("com", "google").subdomain?
141
+ # # => false
142
+ #
143
+ # DomainName.new("com", "google", "www").subdomain?
144
+ # # => true
145
+ #
146
+ # # This is an invalid domain, but returns true
147
+ # # because this method doesn't validate the content.
148
+ # DomainName.new("zip", "google", "www").subdomain?
149
+ # # => true
150
+ #
151
+ # Returns true if this instance looks like a subdomain.
152
+ def subdomain?
153
+ !(tld.nil? || sld.nil? || trd.nil?)
154
+ end
155
+
156
+ # Checks whether <tt>self</tt> is exclusively a domain,
157
+ # and not a subdomain.
158
+ def is_a_domain?
159
+ domain? && !subdomain?
160
+ end
161
+
162
+ # Checks whether <tt>self</tt> is exclusively a subdomain.
163
+ def is_a_subdomain?
164
+ subdomain?
165
+ end
166
+
167
+ # Checks whether <tt>self</tt> is valid
168
+ # according to default <tt>RuleList</tt>.
169
+ #
170
+ # Note: this method triggers a new rule lookup in the default RuleList,
171
+ # which is a quite intensive task.
172
+ #
173
+ # Returns true if this instance is valid.
174
+ def valid?
175
+ !rule.nil?
176
+ end
177
+
178
+ # Checks whether <tt>self</tt> looks like a domain and validates
179
+ # according to default <tt>RuleList</tt>.
180
+ #
181
+ # See also <tt>DomainName#domain?</tt> and <tt>DomainName#valid?</tt>.
182
+ #
183
+ # Examples
184
+ #
185
+ # DomainName.new("com").domain?
186
+ # # => false
187
+ #
188
+ # DomainName.new("com", "google").domain?
189
+ # # => true
190
+ #
191
+ # DomainName.new("com", "google", "www").domain?
192
+ # # => true
193
+ #
194
+ # # This is an invalid domain
195
+ # DomainName.new("zip", "google").false?
196
+ # # => true
197
+ #
198
+ # Returns true if this instance looks like a domain and is valid.
199
+ def valid_domain?
200
+ domain? && valid?
201
+ end
202
+
203
+ # Checks whether <tt>self</tt> looks like a subdomain and validates
204
+ # according to default <tt>RuleList</tt>.
205
+ #
206
+ # See also <tt>DomainName#subdomain?</tt> and <tt>DomainName#valid?</tt>.
207
+ #
208
+ # Examples
209
+ #
210
+ # DomainName.new("com").subdomain?
211
+ # # => false
212
+ #
213
+ # DomainName.new("com", "google").subdomain?
214
+ # # => false
215
+ #
216
+ # DomainName.new("com", "google", "www").subdomain?
217
+ # # => true
218
+ #
219
+ # # This is an invalid domain
220
+ # DomainName.new("zip", "google", "www").subdomain?
221
+ # # => false
222
+ #
223
+ # Returns true if this instance looks like a domain and is valid.
224
+ def valid_subdomain?
225
+ subdomain? && valid?
226
+ end
227
+
228
+ end
229
+
230
+ end
@@ -0,0 +1,25 @@
1
+ #
2
+ # = Public Suffix Service
3
+ #
4
+ # Domain Name parser based on the Public Suffix List
5
+ #
6
+ #
7
+ # Category:: Net
8
+ # Package:: PublicSuffixService
9
+ # Author:: Simone Carletti <weppos@weppos.net>
10
+ # License:: MIT License
11
+ #
12
+ #--
13
+ #
14
+ #++
15
+
16
+
17
+ module PublicSuffixService
18
+
19
+ class Error < StandardError
20
+ end
21
+
22
+ class InvalidDomain < Error
23
+ end
24
+
25
+ end
@@ -0,0 +1,294 @@
1
+ #
2
+ # = Public Suffix Service
3
+ #
4
+ # Domain Name parser based on the Public Suffix List
5
+ #
6
+ #
7
+ # Category:: Net
8
+ # Package:: PublicSuffixService
9
+ # Author:: Simone Carletti <weppos@weppos.net>
10
+ # License:: MIT License
11
+ #
12
+ #--
13
+ #
14
+ #++
15
+
16
+
17
+ module PublicSuffixService
18
+
19
+ class Rule
20
+
21
+ # Takes the <tt>name</tt> of the rule, detects the specific rule class
22
+ # and creates a new instance of that class.
23
+ # The <tt>name</tt> becomes the rule value.
24
+ #
25
+ # name - The rule String definition
26
+ #
27
+ # Examples
28
+ #
29
+ # PublicSuffixService::Rule.factory("ar")
30
+ # # => #<PublicSuffixService::Rule::Normal>
31
+ #
32
+ # PublicSuffixService::Rule.factory("*.ar")
33
+ # # => #<PublicSuffixService::Rule::Wildcard>
34
+ #
35
+ # PublicSuffixService::Rule.factory("!congresodelalengua3.ar")
36
+ # # => #<PublicSuffixService::Rule::Exception>
37
+ #
38
+ def self.factory(name)
39
+ klass = case name.to_s[0..0]
40
+ when "*" then "wildcard"
41
+ when "!" then "exception"
42
+ else "normal"
43
+ end
44
+ const_get(klass.capitalize).new(name)
45
+ end
46
+
47
+
48
+ #
49
+ # = Abstract rule class
50
+ #
51
+ # This represent the base class for a Rule definition
52
+ # in the {Public Suffix List}[http://publicsuffix.org].
53
+ #
54
+ # This is intended to be an Abstract class
55
+ # and you sholnd't create a direct instance. The only purpose
56
+ # of this class is to expose a common interface
57
+ # for all the available subclasses.
58
+ #
59
+ # * PublicSuffixService::Rule::Normal
60
+ # * PublicSuffixService::Rule::Exception
61
+ # * PublicSuffixService::Rule::Wildcard
62
+ #
63
+ # == Properties
64
+ #
65
+ # A rule is composed by 4 properties:
66
+ #
67
+ # name - The name of the rule, corresponding to the rule definition
68
+ # in the public suffic list
69
+ # value - The value, a normalized version of the rule name.
70
+ # The normalization process depends on rule tpe.
71
+ # type - The rule type (:normal, :wildcard, :exception)
72
+ # labels - The canonicalized rule name
73
+ #
74
+ # Here's an example
75
+ #
76
+ # PublicSuffixService::Rule.factory("*.google.com")
77
+ # #<PublicSuffixService::Rule::Wildcard:0x1015c14b0
78
+ # @labels=["com", "google"],
79
+ # @name="*.google.com",
80
+ # @type=:wildcard,
81
+ # @value="google.com"
82
+ # >
83
+ #
84
+ # == Rule Creation
85
+ #
86
+ # The best way to create a new rule is passing the rule name
87
+ # to the <tt>PublicSuffixService::Rule.factory</tt> method.
88
+ #
89
+ # PublicSuffixService::Rule.factory("com")
90
+ # # => PublicSuffixService::Rule::Normal
91
+ #
92
+ # PublicSuffixService::Rule.factory("*.com")
93
+ # # => PublicSuffixService::Rule::Wildcard
94
+ #
95
+ # This method will detect the rule type and create an instance
96
+ # from the proper rule class.
97
+ #
98
+ # == Rule Usage
99
+ #
100
+ # A rule describes the composition of a domain name
101
+ # and explains how to tokenize the domain name
102
+ # into tld, sld and trd.
103
+ #
104
+ # To use a rule, you first need to be sure the domain you want to tokenize
105
+ # can be handled by the current rule.
106
+ # You can use the <tt>#match?</tt> method.
107
+ #
108
+ # rule = PublicSuffixService::Rule.factory("com")
109
+ #
110
+ # rule.match?("google.com")
111
+ # # => true
112
+ #
113
+ # rule.match?("google.com")
114
+ # # => false
115
+ #
116
+ # Rule order is significant. A domain can match more than one rule.
117
+ # See the {Public Suffix Documentation}[http://publicsuffix.org/format/]
118
+ # to learn more about rule priority.
119
+ #
120
+ # When you have the right rule, you can use it to tokenize the domain name.
121
+ #
122
+ # rule = PublicSuffixService::Rule.factory("com")
123
+ #
124
+ # rule.decompose("google.com")
125
+ # # => ["google", "com"]
126
+ #
127
+ # rule.decompose("www.google.com")
128
+ # # => ["www.google", "com"]
129
+ #
130
+ class Base
131
+
132
+ attr_reader :name, :value, :type, :labels
133
+
134
+ # Initializes a new rule with name and value.
135
+ # If value is nil, name also becomes the value for this rule.
136
+ def initialize(name, value = nil)
137
+ @name = name.to_s
138
+ @value = value || @name
139
+ @type = self.class.name.split("::").last.downcase.to_sym
140
+ @labels = domain_to_labels(@value)
141
+ end
142
+
143
+ # Checks whether this rule is equal to <tt>other</tt>.
144
+ #
145
+ # other - An other PublicSuffixService::Rule::Base to compare.
146
+ #
147
+ # Returns true if this rule and other are instances of the same class
148
+ # and has the same value, false otherwise.
149
+ def ==(other)
150
+ return false unless other.is_a?(self.class)
151
+ self.equal?(other) ||
152
+ self.name == other.name
153
+ end
154
+ alias :eql? :==
155
+
156
+
157
+ # Checks whether this rule matches <tt>domain</tt>.
158
+ #
159
+ # domain - A string with the domain name to check.
160
+ #
161
+ # Returns a true if this rule matches domain,
162
+ # false otherwise.
163
+ def match?(domain)
164
+ l1 = labels
165
+ l2 = domain_to_labels(domain)
166
+ odiff(l1, l2).empty?
167
+ end
168
+
169
+ # Gets the length of this rule for comparison.
170
+ # The length usually matches the number of rule <tt>parts</tt>.
171
+ # Subclasses might actually override this method.
172
+ #
173
+ # Returns an Integer with the number of parts.
174
+ def length
175
+ parts.length
176
+ end
177
+
178
+ # Raises NotImplementedError.
179
+ def parts
180
+ raise NotImplementedError
181
+ end
182
+
183
+ # Raises NotImplementedError.
184
+ def decompose(domain)
185
+ raise NotImplementedError
186
+ end
187
+
188
+
189
+ private
190
+
191
+ def domain_to_labels(domain)
192
+ domain.to_s.split(".").reverse
193
+ end
194
+
195
+ def odiff(one, two)
196
+ ii = 0
197
+ while(ii < one.size && one[ii] == two[ii])
198
+ ii += 1
199
+ end
200
+ one[ii..one.length]
201
+ end
202
+
203
+ end
204
+
205
+ class Normal < Base
206
+
207
+ def initialize(name)
208
+ super(name, name)
209
+ end
210
+
211
+ # dot-split rule value and returns all rule parts
212
+ # in the order they appear in the value.
213
+ #
214
+ # Returns an Array with the domain parts.
215
+ def parts
216
+ @parts ||= @value.split(".")
217
+ end
218
+
219
+ # Decomposes the domain according to rule properties.
220
+ #
221
+ # domain - A String with the domain name to parse
222
+ #
223
+ # Return an Array with [trd + sld, tld].
224
+ def decompose(domain)
225
+ domain.to_s =~ /^(.*)\.(#{parts.join('\.')})$/
226
+ [$1, $2]
227
+ end
228
+
229
+ end
230
+
231
+ class Wildcard < Base
232
+
233
+ def initialize(name)
234
+ super(name, name.to_s[2..-1])
235
+ end
236
+
237
+ # dot-split rule value and returns all rule parts
238
+ # in the order they appear in the value.
239
+ #
240
+ # Returns an Array with the domain parts.
241
+ def parts
242
+ @parts ||= @value.split(".")
243
+ end
244
+
245
+ def length
246
+ parts.length + 1 # * counts as 1
247
+ end
248
+
249
+ # Decomposes the domain according to rule properties.
250
+ #
251
+ # domain - A String with the domain name to parse
252
+ #
253
+ # Return an Array with [trd + sld, tld].
254
+ def decompose(domain)
255
+ domain.to_s =~ /^(.*)\.(.*?\.#{parts.join('\.')})$/
256
+ [$1, $2]
257
+ end
258
+
259
+ end
260
+
261
+ class Exception < Base
262
+
263
+ def initialize(name)
264
+ super(name, name.to_s[1..-1])
265
+ end
266
+
267
+ # dot-split rule value and returns all rule parts
268
+ # in the order they appear in the value.
269
+ # The leftmost label is not considered a label.
270
+ #
271
+ # See http://publicsuffix.org/format/:
272
+ # If the prevailing rule is a exception rule,
273
+ # modify it by removing the leftmost label.
274
+ #
275
+ # Returns an Array with the domain parts.
276
+ def parts
277
+ @parts ||= @value.split(".")[1..-1]
278
+ end
279
+
280
+ # Decomposes the domain according to rule properties.
281
+ #
282
+ # domain - A String with the domain name to parse
283
+ #
284
+ # Return an Array with [trd + sld, tld].
285
+ def decompose(domain)
286
+ domain.to_s =~ /^(.*)\.(#{parts.join('\.')})$/
287
+ [$1, $2]
288
+ end
289
+
290
+ end
291
+
292
+ end
293
+
294
+ end