public_suffix_service 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,230 @@
1
+ #
2
+ # = Public Suffix Service
3
+ #
4
+ # Domain Name parser based on the Public Suffix List
5
+ #
6
+ #
7
+ # Category:: Net
8
+ # Package:: PublicSuffixService
9
+ # Author:: Simone Carletti <weppos@weppos.net>
10
+ # License:: MIT License
11
+ #
12
+ #--
13
+ #
14
+ #++
15
+
16
+
17
+ module PublicSuffixService
18
+
19
+ class Domain
20
+
21
+ def initialize(*args, &block)
22
+ @tld, @sld, @trd = args
23
+ yield(self) if block_given?
24
+ end
25
+
26
+ # Gets a String representation of this object.
27
+ #
28
+ # Returns a String with the domain name.
29
+ def to_s
30
+ name
31
+ end
32
+
33
+ def to_a
34
+ [trd, sld, tld]
35
+ end
36
+
37
+
38
+ # Gets the Top Level Domain part, aka the extension.
39
+ #
40
+ # Returns a String if tld is set, nil otherwise.
41
+ def tld
42
+ @tld
43
+ end
44
+
45
+ # Gets the Second Level Domain part, aka the domain part.
46
+ #
47
+ # Returns a String if sld is set, nil otherwise.
48
+ def sld
49
+ @sld
50
+ end
51
+
52
+ # Gets the Third Level Domain part, aka the subdomain part.
53
+ #
54
+ # Returns a String if trd is set, nil otherwise.
55
+ def trd
56
+ @trd
57
+ end
58
+
59
+
60
+ # Gets the domain name.
61
+ #
62
+ # Examples
63
+ #
64
+ # DomainName.new("com", "google").name
65
+ # # => "google.com"
66
+ #
67
+ # DomainName.new("com", "google", "www").name
68
+ # # => "www.google.com"
69
+ #
70
+ # Returns a String with the domain name.
71
+ def name
72
+ [trd, sld, tld].reject { |part| part.nil? }.join(".")
73
+ end
74
+
75
+ # Returns a domain-like representation of this object
76
+ # if the object is a <tt>domain?</tt>,
77
+ # <tt>nil</tt> otherwise.
78
+ def domain
79
+ return unless domain?
80
+ [sld, tld].join(".")
81
+ end
82
+
83
+ # Returns a subdomain-like representation of this object
84
+ # if the object is a <tt>subdomain?</tt>,
85
+ # <tt>nil</tt> otherwise.
86
+ def subdomain
87
+ return unless subdomain?
88
+ [trd, sld, tld].join(".")
89
+ end
90
+
91
+ # Gets the rule matching this domain in the default PublicSuffixService::RuleList.
92
+ #
93
+ # Returns an instance of PublicSuffixService::Rule::Base if a rule matches current domain,
94
+ # nil if no rule is found.
95
+ def rule
96
+ RuleList.default.find(name)
97
+ end
98
+
99
+
100
+ # Checks whether <tt>self</tt> looks like a domain.
101
+ #
102
+ # This method doesn't actually validate the domain.
103
+ # It only checks whether the instance contains
104
+ # a value for the <tt>tld</tt> and <tt>sld</tt> attributes.
105
+ # If you also want to validate the domain, use <tt>#valid_domain?</tt> instead.
106
+ #
107
+ # Examples
108
+ #
109
+ # DomainName.new("com").domain?
110
+ # # => false
111
+ #
112
+ # DomainName.new("com", "google").domain?
113
+ # # => true
114
+ #
115
+ # DomainName.new("com", "google", "www").domain?
116
+ # # => true
117
+ #
118
+ # # This is an invalid domain, but returns true
119
+ # # because this method doesn't validate the content.
120
+ # DomainName.new("zip", "google").domain?
121
+ # # => true
122
+ #
123
+ # Returns true if this instance looks like a domain.
124
+ def domain?
125
+ !(tld.nil? || sld.nil?)
126
+ end
127
+
128
+ # Checks whether <tt>self</tt> looks like a subdomain.
129
+ #
130
+ # This method doesn't actually validate the subdomain.
131
+ # It only checks whether the instance contains
132
+ # a value for the <tt>tld</tt>, <tt>sld</tt> and <tt>trd</tt> attributes.
133
+ # If you also want to validate the domain, use <tt>#valid_subdomain?</tt> instead.
134
+ #
135
+ # Examples
136
+ #
137
+ # DomainName.new("com").subdomain?
138
+ # # => false
139
+ #
140
+ # DomainName.new("com", "google").subdomain?
141
+ # # => false
142
+ #
143
+ # DomainName.new("com", "google", "www").subdomain?
144
+ # # => true
145
+ #
146
+ # # This is an invalid domain, but returns true
147
+ # # because this method doesn't validate the content.
148
+ # DomainName.new("zip", "google", "www").subdomain?
149
+ # # => true
150
+ #
151
+ # Returns true if this instance looks like a subdomain.
152
+ def subdomain?
153
+ !(tld.nil? || sld.nil? || trd.nil?)
154
+ end
155
+
156
+ # Checks whether <tt>self</tt> is exclusively a domain,
157
+ # and not a subdomain.
158
+ def is_a_domain?
159
+ domain? && !subdomain?
160
+ end
161
+
162
+ # Checks whether <tt>self</tt> is exclusively a subdomain.
163
+ def is_a_subdomain?
164
+ subdomain?
165
+ end
166
+
167
+ # Checks whether <tt>self</tt> is valid
168
+ # according to default <tt>RuleList</tt>.
169
+ #
170
+ # Note: this method triggers a new rule lookup in the default RuleList,
171
+ # which is a quite intensive task.
172
+ #
173
+ # Returns true if this instance is valid.
174
+ def valid?
175
+ !rule.nil?
176
+ end
177
+
178
+ # Checks whether <tt>self</tt> looks like a domain and validates
179
+ # according to default <tt>RuleList</tt>.
180
+ #
181
+ # See also <tt>DomainName#domain?</tt> and <tt>DomainName#valid?</tt>.
182
+ #
183
+ # Examples
184
+ #
185
+ # DomainName.new("com").domain?
186
+ # # => false
187
+ #
188
+ # DomainName.new("com", "google").domain?
189
+ # # => true
190
+ #
191
+ # DomainName.new("com", "google", "www").domain?
192
+ # # => true
193
+ #
194
+ # # This is an invalid domain
195
+ # DomainName.new("zip", "google").false?
196
+ # # => true
197
+ #
198
+ # Returns true if this instance looks like a domain and is valid.
199
+ def valid_domain?
200
+ domain? && valid?
201
+ end
202
+
203
+ # Checks whether <tt>self</tt> looks like a subdomain and validates
204
+ # according to default <tt>RuleList</tt>.
205
+ #
206
+ # See also <tt>DomainName#subdomain?</tt> and <tt>DomainName#valid?</tt>.
207
+ #
208
+ # Examples
209
+ #
210
+ # DomainName.new("com").subdomain?
211
+ # # => false
212
+ #
213
+ # DomainName.new("com", "google").subdomain?
214
+ # # => false
215
+ #
216
+ # DomainName.new("com", "google", "www").subdomain?
217
+ # # => true
218
+ #
219
+ # # This is an invalid domain
220
+ # DomainName.new("zip", "google", "www").subdomain?
221
+ # # => false
222
+ #
223
+ # Returns true if this instance looks like a domain and is valid.
224
+ def valid_subdomain?
225
+ subdomain? && valid?
226
+ end
227
+
228
+ end
229
+
230
+ end
@@ -0,0 +1,25 @@
1
+ #
2
+ # = Public Suffix Service
3
+ #
4
+ # Domain Name parser based on the Public Suffix List
5
+ #
6
+ #
7
+ # Category:: Net
8
+ # Package:: PublicSuffixService
9
+ # Author:: Simone Carletti <weppos@weppos.net>
10
+ # License:: MIT License
11
+ #
12
+ #--
13
+ #
14
+ #++
15
+
16
+
17
+ module PublicSuffixService
18
+
19
+ class Error < StandardError
20
+ end
21
+
22
+ class InvalidDomain < Error
23
+ end
24
+
25
+ end
@@ -0,0 +1,294 @@
1
+ #
2
+ # = Public Suffix Service
3
+ #
4
+ # Domain Name parser based on the Public Suffix List
5
+ #
6
+ #
7
+ # Category:: Net
8
+ # Package:: PublicSuffixService
9
+ # Author:: Simone Carletti <weppos@weppos.net>
10
+ # License:: MIT License
11
+ #
12
+ #--
13
+ #
14
+ #++
15
+
16
+
17
+ module PublicSuffixService
18
+
19
+ class Rule
20
+
21
+ # Takes the <tt>name</tt> of the rule, detects the specific rule class
22
+ # and creates a new instance of that class.
23
+ # The <tt>name</tt> becomes the rule value.
24
+ #
25
+ # name - The rule String definition
26
+ #
27
+ # Examples
28
+ #
29
+ # PublicSuffixService::Rule.factory("ar")
30
+ # # => #<PublicSuffixService::Rule::Normal>
31
+ #
32
+ # PublicSuffixService::Rule.factory("*.ar")
33
+ # # => #<PublicSuffixService::Rule::Wildcard>
34
+ #
35
+ # PublicSuffixService::Rule.factory("!congresodelalengua3.ar")
36
+ # # => #<PublicSuffixService::Rule::Exception>
37
+ #
38
+ def self.factory(name)
39
+ klass = case name.to_s[0..0]
40
+ when "*" then "wildcard"
41
+ when "!" then "exception"
42
+ else "normal"
43
+ end
44
+ const_get(klass.capitalize).new(name)
45
+ end
46
+
47
+
48
+ #
49
+ # = Abstract rule class
50
+ #
51
+ # This represent the base class for a Rule definition
52
+ # in the {Public Suffix List}[http://publicsuffix.org].
53
+ #
54
+ # This is intended to be an Abstract class
55
+ # and you sholnd't create a direct instance. The only purpose
56
+ # of this class is to expose a common interface
57
+ # for all the available subclasses.
58
+ #
59
+ # * PublicSuffixService::Rule::Normal
60
+ # * PublicSuffixService::Rule::Exception
61
+ # * PublicSuffixService::Rule::Wildcard
62
+ #
63
+ # == Properties
64
+ #
65
+ # A rule is composed by 4 properties:
66
+ #
67
+ # name - The name of the rule, corresponding to the rule definition
68
+ # in the public suffic list
69
+ # value - The value, a normalized version of the rule name.
70
+ # The normalization process depends on rule tpe.
71
+ # type - The rule type (:normal, :wildcard, :exception)
72
+ # labels - The canonicalized rule name
73
+ #
74
+ # Here's an example
75
+ #
76
+ # PublicSuffixService::Rule.factory("*.google.com")
77
+ # #<PublicSuffixService::Rule::Wildcard:0x1015c14b0
78
+ # @labels=["com", "google"],
79
+ # @name="*.google.com",
80
+ # @type=:wildcard,
81
+ # @value="google.com"
82
+ # >
83
+ #
84
+ # == Rule Creation
85
+ #
86
+ # The best way to create a new rule is passing the rule name
87
+ # to the <tt>PublicSuffixService::Rule.factory</tt> method.
88
+ #
89
+ # PublicSuffixService::Rule.factory("com")
90
+ # # => PublicSuffixService::Rule::Normal
91
+ #
92
+ # PublicSuffixService::Rule.factory("*.com")
93
+ # # => PublicSuffixService::Rule::Wildcard
94
+ #
95
+ # This method will detect the rule type and create an instance
96
+ # from the proper rule class.
97
+ #
98
+ # == Rule Usage
99
+ #
100
+ # A rule describes the composition of a domain name
101
+ # and explains how to tokenize the domain name
102
+ # into tld, sld and trd.
103
+ #
104
+ # To use a rule, you first need to be sure the domain you want to tokenize
105
+ # can be handled by the current rule.
106
+ # You can use the <tt>#match?</tt> method.
107
+ #
108
+ # rule = PublicSuffixService::Rule.factory("com")
109
+ #
110
+ # rule.match?("google.com")
111
+ # # => true
112
+ #
113
+ # rule.match?("google.com")
114
+ # # => false
115
+ #
116
+ # Rule order is significant. A domain can match more than one rule.
117
+ # See the {Public Suffix Documentation}[http://publicsuffix.org/format/]
118
+ # to learn more about rule priority.
119
+ #
120
+ # When you have the right rule, you can use it to tokenize the domain name.
121
+ #
122
+ # rule = PublicSuffixService::Rule.factory("com")
123
+ #
124
+ # rule.decompose("google.com")
125
+ # # => ["google", "com"]
126
+ #
127
+ # rule.decompose("www.google.com")
128
+ # # => ["www.google", "com"]
129
+ #
130
+ class Base
131
+
132
+ attr_reader :name, :value, :type, :labels
133
+
134
+ # Initializes a new rule with name and value.
135
+ # If value is nil, name also becomes the value for this rule.
136
+ def initialize(name, value = nil)
137
+ @name = name.to_s
138
+ @value = value || @name
139
+ @type = self.class.name.split("::").last.downcase.to_sym
140
+ @labels = domain_to_labels(@value)
141
+ end
142
+
143
+ # Checks whether this rule is equal to <tt>other</tt>.
144
+ #
145
+ # other - An other PublicSuffixService::Rule::Base to compare.
146
+ #
147
+ # Returns true if this rule and other are instances of the same class
148
+ # and has the same value, false otherwise.
149
+ def ==(other)
150
+ return false unless other.is_a?(self.class)
151
+ self.equal?(other) ||
152
+ self.name == other.name
153
+ end
154
+ alias :eql? :==
155
+
156
+
157
+ # Checks whether this rule matches <tt>domain</tt>.
158
+ #
159
+ # domain - A string with the domain name to check.
160
+ #
161
+ # Returns a true if this rule matches domain,
162
+ # false otherwise.
163
+ def match?(domain)
164
+ l1 = labels
165
+ l2 = domain_to_labels(domain)
166
+ odiff(l1, l2).empty?
167
+ end
168
+
169
+ # Gets the length of this rule for comparison.
170
+ # The length usually matches the number of rule <tt>parts</tt>.
171
+ # Subclasses might actually override this method.
172
+ #
173
+ # Returns an Integer with the number of parts.
174
+ def length
175
+ parts.length
176
+ end
177
+
178
+ # Raises NotImplementedError.
179
+ def parts
180
+ raise NotImplementedError
181
+ end
182
+
183
+ # Raises NotImplementedError.
184
+ def decompose(domain)
185
+ raise NotImplementedError
186
+ end
187
+
188
+
189
+ private
190
+
191
+ def domain_to_labels(domain)
192
+ domain.to_s.split(".").reverse
193
+ end
194
+
195
+ def odiff(one, two)
196
+ ii = 0
197
+ while(ii < one.size && one[ii] == two[ii])
198
+ ii += 1
199
+ end
200
+ one[ii..one.length]
201
+ end
202
+
203
+ end
204
+
205
+ class Normal < Base
206
+
207
+ def initialize(name)
208
+ super(name, name)
209
+ end
210
+
211
+ # dot-split rule value and returns all rule parts
212
+ # in the order they appear in the value.
213
+ #
214
+ # Returns an Array with the domain parts.
215
+ def parts
216
+ @parts ||= @value.split(".")
217
+ end
218
+
219
+ # Decomposes the domain according to rule properties.
220
+ #
221
+ # domain - A String with the domain name to parse
222
+ #
223
+ # Return an Array with [trd + sld, tld].
224
+ def decompose(domain)
225
+ domain.to_s =~ /^(.*)\.(#{parts.join('\.')})$/
226
+ [$1, $2]
227
+ end
228
+
229
+ end
230
+
231
+ class Wildcard < Base
232
+
233
+ def initialize(name)
234
+ super(name, name.to_s[2..-1])
235
+ end
236
+
237
+ # dot-split rule value and returns all rule parts
238
+ # in the order they appear in the value.
239
+ #
240
+ # Returns an Array with the domain parts.
241
+ def parts
242
+ @parts ||= @value.split(".")
243
+ end
244
+
245
+ def length
246
+ parts.length + 1 # * counts as 1
247
+ end
248
+
249
+ # Decomposes the domain according to rule properties.
250
+ #
251
+ # domain - A String with the domain name to parse
252
+ #
253
+ # Return an Array with [trd + sld, tld].
254
+ def decompose(domain)
255
+ domain.to_s =~ /^(.*)\.(.*?\.#{parts.join('\.')})$/
256
+ [$1, $2]
257
+ end
258
+
259
+ end
260
+
261
+ class Exception < Base
262
+
263
+ def initialize(name)
264
+ super(name, name.to_s[1..-1])
265
+ end
266
+
267
+ # dot-split rule value and returns all rule parts
268
+ # in the order they appear in the value.
269
+ # The leftmost label is not considered a label.
270
+ #
271
+ # See http://publicsuffix.org/format/:
272
+ # If the prevailing rule is a exception rule,
273
+ # modify it by removing the leftmost label.
274
+ #
275
+ # Returns an Array with the domain parts.
276
+ def parts
277
+ @parts ||= @value.split(".")[1..-1]
278
+ end
279
+
280
+ # Decomposes the domain according to rule properties.
281
+ #
282
+ # domain - A String with the domain name to parse
283
+ #
284
+ # Return an Array with [trd + sld, tld].
285
+ def decompose(domain)
286
+ domain.to_s =~ /^(.*)\.(#{parts.join('\.')})$/
287
+ [$1, $2]
288
+ end
289
+
290
+ end
291
+
292
+ end
293
+
294
+ end