email_address 0.0.3 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +10 -0
- data/Gemfile +0 -1
- data/README.md +451 -197
- data/Rakefile +4 -9
- data/email_address.gemspec +9 -5
- data/lib/email_address.rb +55 -24
- data/lib/email_address/active_record_validator.rb +5 -5
- data/lib/email_address/address.rb +152 -72
- data/lib/email_address/canonical_email_address_type.rb +46 -0
- data/lib/email_address/config.rb +148 -64
- data/lib/email_address/email_address_type.rb +15 -31
- data/lib/email_address/exchanger.rb +31 -34
- data/lib/email_address/host.rb +327 -51
- data/lib/email_address/local.rb +304 -52
- data/lib/email_address/version.rb +1 -1
- data/test/activerecord/test_ar.rb +22 -0
- data/test/activerecord/user.rb +71 -0
- data/test/email_address/test_address.rb +53 -27
- data/test/email_address/test_config.rb +23 -8
- data/test/email_address/test_exchanger.rb +22 -10
- data/test/email_address/test_host.rb +47 -6
- data/test/email_address/test_local.rb +80 -16
- data/test/test_email_address.rb +38 -4
- data/test/test_helper.rb +7 -5
- metadata +68 -34
- data/lib/email_address/domain_matcher.rb +0 -98
- data/lib/email_address/domain_parser.rb +0 -69
- data/lib/email_address/matcher.rb +0 -119
- data/lib/email_address/validator.rb +0 -141
- data/test/email_address/test_domain_matcher.rb +0 -21
- data/test/email_address/test_domain_parser.rb +0 -29
- data/test/email_address/test_matcher.rb +0 -44
- data/test/email_address/test_validator.rb +0 -16
data/lib/email_address/host.rb
CHANGED
@@ -1,82 +1,322 @@
|
|
1
1
|
require 'simpleidn'
|
2
|
+
require 'resolv'
|
3
|
+
require 'netaddr'
|
2
4
|
|
3
5
|
module EmailAddress
|
4
6
|
##############################################################################
|
5
|
-
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
7
|
+
# The EmailAddress Host is found on the right-hand side of the "@" symbol.
|
8
|
+
# It can be:
|
9
|
+
# * Host name (domain name with optional subdomain)
|
10
|
+
# * International Domain Name, in Unicode (Display) or Punycode (DNS) format
|
11
|
+
# * IP Address format, either IPv4 or IPv6, enclosed in square brackets.
|
12
|
+
# This is not Conventionally supported, but is part of the specification.
|
13
|
+
# * It can contain an optional comment, enclosed in parenthesis, either at
|
14
|
+
# beginning or ending of the host name. This is not well defined, so it not
|
15
|
+
# supported here, expect to parse it off, if found.
|
16
|
+
#
|
17
|
+
# For matching and query capabilities, the host name is parsed into these
|
18
|
+
# parts (with example data for "subdomain.example.co.uk"):
|
19
|
+
# * host_name: "subdomain.example.co.uk"
|
20
|
+
# * dns_name: punycode("subdomain.example.co.uk")
|
21
|
+
# * subdomain: "subdomain"
|
22
|
+
# * registration_name: "example"
|
23
|
+
# * domain_name: "example.co.uk"
|
24
|
+
# * tld: "uk"
|
25
|
+
# * tld2: "co.uk" (the 1 or 2 term TLD we could guess)
|
26
|
+
# * ip_address: nil or "ipaddress" used in [ipaddress] syntax
|
27
|
+
#
|
28
|
+
# The provider (Email Service Provider or ESP) is looked up according to the
|
29
|
+
# provider configuration rules, setting the config attribute to values of
|
30
|
+
# that provider.
|
17
31
|
##############################################################################
|
18
32
|
class Host
|
19
|
-
|
20
|
-
|
33
|
+
attr_accessor :host_name, :dns_name, :domain_name, :registration_name,
|
34
|
+
:tld, :tld2, :subdomains, :ip_address, :config, :provider,
|
35
|
+
:comment
|
36
|
+
MAX_HOST_LENGTH = 255
|
37
|
+
|
38
|
+
# Sometimes, you just need a Regexp...
|
39
|
+
DNS_HOST_REGEX = / [\p{L}\p{N}]+ (?: (?: \-{1,2} | \.) [\p{L}\p{N}]+ )*/x
|
40
|
+
|
41
|
+
# The IPv4 and IPv6 were lifted from Resolv::IPv?::Regex and tweaked to not
|
42
|
+
# \A...\z anchor at the edges.
|
43
|
+
IPv6_HOST_REGEX = /\[IPv6:
|
44
|
+
(?: (?:(?x-mi:
|
45
|
+
(?:[0-9A-Fa-f]{1,4}:){7}
|
46
|
+
[0-9A-Fa-f]{1,4}
|
47
|
+
)) |
|
48
|
+
(?:(?x-mi:
|
49
|
+
(?: (?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?) ::
|
50
|
+
(?: (?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)
|
51
|
+
)) |
|
52
|
+
(?:(?x-mi:
|
53
|
+
(?: (?:[0-9A-Fa-f]{1,4}:){6,6})
|
54
|
+
(?: \d+)\.(?: \d+)\.(?: \d+)\.(?: \d+)
|
55
|
+
)) |
|
56
|
+
(?:(?x-mi:
|
57
|
+
(?: (?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?) ::
|
58
|
+
(?: (?:[0-9A-Fa-f]{1,4}:)*)
|
59
|
+
(?: \d+)\.(?: \d+)\.(?: \d+)\.(?: \d+)
|
60
|
+
)))\]/ix
|
61
|
+
|
62
|
+
IPv4_HOST_REGEX = /\[((?x-mi:0
|
63
|
+
|1(?:[0-9][0-9]?)?
|
64
|
+
|2(?:[0-4][0-9]?|5[0-5]?|[6-9])?
|
65
|
+
|[3-9][0-9]?))\.((?x-mi:0
|
66
|
+
|1(?:[0-9][0-9]?)?
|
67
|
+
|2(?:[0-4][0-9]?|5[0-5]?|[6-9])?
|
68
|
+
|[3-9][0-9]?))\.((?x-mi:0
|
69
|
+
|1(?:[0-9][0-9]?)?
|
70
|
+
|2(?:[0-4][0-9]?|5[0-5]?|[6-9])?
|
71
|
+
|[3-9][0-9]?))\.((?x-mi:0
|
72
|
+
|1(?:[0-9][0-9]?)?
|
73
|
+
|2(?:[0-4][0-9]?|5[0-5]?|[6-9])?
|
74
|
+
|[3-9][0-9]?))\]/x
|
75
|
+
|
76
|
+
# Matches conventional host name and punycode: domain.tld, x--punycode.tld
|
77
|
+
CANONICAL_HOST_REGEX = /\A #{DNS_HOST_REGEX} \z/x
|
78
|
+
|
79
|
+
# Matches Host forms: DNS name, IPv4, or IPv6 formats
|
80
|
+
STANDARD_HOST_REGEX = /\A (?: #{DNS_HOST_REGEX}
|
81
|
+
| #{IPv4_HOST_REGEX} | #{IPv6_HOST_REGEX}) \z/ix
|
21
82
|
|
22
83
|
# host name -
|
23
|
-
# *
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
host_name||= ''
|
30
|
-
@host_name = host_name.downcase
|
31
|
-
@host_type = host_type
|
32
|
-
parse_host(@host_name)
|
84
|
+
# * host type - :email for an email host, :mx for exchanger host
|
85
|
+
def initialize(host_name, config={})
|
86
|
+
@original = host_name ||= ''
|
87
|
+
config[:host_type] ||= :email
|
88
|
+
@config = config
|
89
|
+
parse(host_name)
|
33
90
|
end
|
34
91
|
|
35
|
-
|
36
|
-
|
92
|
+
# Returns the String representation of the host name (or IP)
|
93
|
+
def name
|
94
|
+
if self.ipv4?
|
95
|
+
"[#{self.ip_address}]"
|
96
|
+
elsif self.ipv6?
|
97
|
+
"[IPv6:#{self.ip_address}]"
|
98
|
+
elsif @config[:host_encoding] && @config[:host_encoding] == :unicode
|
99
|
+
::SimpleIDN.to_unicode(self.host_name)
|
100
|
+
else
|
101
|
+
self.dns_name
|
102
|
+
end
|
37
103
|
end
|
38
|
-
alias :
|
104
|
+
alias :to_s :name
|
39
105
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
@parts.each { |k,v| instance_variable_set("@#{k}", v) }
|
106
|
+
# The canonical host name is the simplified, DNS host name
|
107
|
+
def canonical
|
108
|
+
self.dns_name
|
44
109
|
end
|
45
110
|
|
46
|
-
#
|
47
|
-
|
48
|
-
|
111
|
+
# Returns the munged version of the name, replacing everything after the
|
112
|
+
# initial two characters with "*****" or the configured "munge_string".
|
113
|
+
def munge
|
114
|
+
self.host_name.sub(/\A(.{1,2}).*/) { |m| $1 + @config[:munge_string] }
|
49
115
|
end
|
50
116
|
|
51
|
-
|
52
|
-
|
117
|
+
############################################################################
|
118
|
+
# Parsing
|
119
|
+
############################################################################
|
120
|
+
|
121
|
+
|
122
|
+
def parse(host) # :nodoc:
|
123
|
+
host = self.parse_comment(host)
|
124
|
+
|
125
|
+
if host =~ /\A\[IPv6:(.+)\]/i
|
126
|
+
self.ip_address = $1
|
127
|
+
elsif host =~ /\A\[(\d{1,3}(\.\d{1,3}){3})\]/ # IPv4
|
128
|
+
self.ip_address = $1
|
129
|
+
else
|
130
|
+
self.host_name = host
|
131
|
+
end
|
53
132
|
end
|
54
133
|
|
55
|
-
|
56
|
-
|
57
|
-
|
134
|
+
def parse_comment(host) # :nodoc:
|
135
|
+
if host =~ /\A\((.+?)\)(.+)/ # (comment)domain.tld
|
136
|
+
self.comment, host = $1, $2
|
137
|
+
end
|
138
|
+
if host =~ /\A(.+)\((.+?)\)\z/ # domain.tld(comment)
|
139
|
+
host, self.comment = $1, $2
|
140
|
+
end
|
141
|
+
host
|
142
|
+
end
|
143
|
+
|
144
|
+
def host_name=(name)
|
145
|
+
@host_name = name = name.strip.downcase.gsub(' ', '').gsub(/\(.*\)/, '')
|
146
|
+
@dns_name = ::SimpleIDN.to_ascii(self.host_name)
|
147
|
+
|
148
|
+
# Subdomain only (root@localhost)
|
149
|
+
if name.index('.').nil?
|
150
|
+
self.subdomains = name
|
151
|
+
|
152
|
+
# Split sub.domain from .tld: *.com, *.xx.cc, *.cc
|
153
|
+
elsif name =~ /\A(.+)\.(\w{3,10})\z/ ||
|
154
|
+
name =~ /\A(.+)\.(\w{1,3}\.\w\w)\z/ ||
|
155
|
+
name =~ /\A(.+)\.(\w\w)\z/
|
156
|
+
|
157
|
+
sub_and_domain, self.tld2 = [$1, $2] # sub+domain, com || co.uk
|
158
|
+
self.tld = self.tld2.sub(/\A.+\./, '') # co.uk => uk
|
159
|
+
if sub_and_domain =~ /\A(.+)\.(.+)\z/ # is subdomain? sub.example [.tld2]
|
160
|
+
self.subdomains = $1
|
161
|
+
self.registration_name = $2
|
162
|
+
else
|
163
|
+
self.registration_name = sub_and_domain
|
164
|
+
#self.domain_name = sub_and_domain + '.' + self.tld2
|
165
|
+
end
|
166
|
+
self.domain_name = self.registration_name + '.' + self.tld2
|
167
|
+
self.find_provider
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def find_provider # :nodoc:
|
172
|
+
return self.provider if self.provider
|
173
|
+
|
174
|
+
EmailAddress::Config.providers.each do |provider, config|
|
175
|
+
if config[:host_match] && self.matches?(config[:host_match])
|
176
|
+
return self.set_provider(provider, config)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
return self.set_provider(:default) unless self.dns_enabled?
|
181
|
+
|
182
|
+
provider = self.exchangers.provider
|
183
|
+
if provider != :default
|
184
|
+
self.set_provider(provider,
|
185
|
+
EmailAddress::Config.provider(self.provider))
|
186
|
+
end
|
187
|
+
|
188
|
+
self.provider ||= self.set_provider(:default)
|
189
|
+
end
|
190
|
+
|
191
|
+
def set_provider(name, provider_config={}) # :nodoc:
|
192
|
+
self.config = EmailAddress::Config.all_settings(provider_config, @config)
|
193
|
+
self.provider = name
|
194
|
+
end
|
195
|
+
|
196
|
+
# Returns a hash of the parts of the host name after parsing.
|
197
|
+
def parts
|
198
|
+
{ host_name:self.host_name, dns_name:self.dns_name, subdomain:self.subdomains,
|
199
|
+
registration_name:self.registration_name, domain_name:self.domain_name,
|
200
|
+
tld2:self.tld2, tld:self.tld, ip_address:self.ip_address }
|
58
201
|
end
|
59
202
|
|
60
|
-
|
61
|
-
|
62
|
-
|
203
|
+
############################################################################
|
204
|
+
# Access and Queries
|
205
|
+
############################################################################
|
206
|
+
|
207
|
+
# Is this a fully-qualified domain name?
|
208
|
+
def fqdn?
|
209
|
+
self.tld ? true : false
|
210
|
+
end
|
211
|
+
|
212
|
+
def ip?
|
213
|
+
self.ip_address.nil? ? false : true
|
214
|
+
end
|
215
|
+
|
216
|
+
def ipv4?
|
217
|
+
self.ip? && self.ip_address.include?(".")
|
218
|
+
end
|
219
|
+
|
220
|
+
def ipv6?
|
221
|
+
self.ip? && self.ip_address.include?(":")
|
222
|
+
end
|
223
|
+
|
224
|
+
############################################################################
|
225
|
+
# Matching
|
226
|
+
############################################################################
|
227
|
+
|
228
|
+
# Takes a email address string, returns true if it matches a rule
|
229
|
+
# Rules of the follow formats are evaluated:
|
230
|
+
# * "example." => registration name
|
231
|
+
# * ".com" => top-level domain name
|
232
|
+
# * "google" => email service provider designation
|
233
|
+
# * "@goog*.com" => Glob match
|
234
|
+
# * IPv4 or IPv6 or CIDR Address
|
235
|
+
def matches?(rules)
|
236
|
+
rules = Array(rules)
|
237
|
+
return false if rules.empty?
|
238
|
+
rules.each do |rule|
|
239
|
+
return rule if rule == self.domain_name || rule == self.dns_name
|
240
|
+
return rule if registration_name_matches?(rule)
|
241
|
+
return rule if tld_matches?(rule)
|
242
|
+
return rule if domain_matches?(rule)
|
243
|
+
return rule if self.provider && provider_matches?(rule)
|
244
|
+
return rule if self.ip_matches?(rule)
|
245
|
+
end
|
246
|
+
false
|
247
|
+
end
|
248
|
+
|
249
|
+
# Does "example." match any tld?
|
250
|
+
def registration_name_matches?(rule)
|
251
|
+
self.registration_name + '.' == rule ? true : false
|
252
|
+
end
|
253
|
+
|
254
|
+
# Does "sub.example.com" match ".com" and ".example.com" top level names?
|
255
|
+
# Matches TLD (uk) or TLD2 (co.uk)
|
256
|
+
def tld_matches?(rule)
|
257
|
+
rule.match(/\A\.(.+)\z/) && ($1 == self.tld || $1 == self.tld2) ? true : false
|
258
|
+
end
|
259
|
+
|
260
|
+
def provider_matches?(rule)
|
261
|
+
rule.to_s =~ /\A[\w\-]*\z/ && self.provider && self.provider == rule.to_sym
|
63
262
|
end
|
64
263
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
264
|
+
# Does domain == rule or glob matches? (also tests the DNS (punycode) name)
|
265
|
+
# Requires optionally starts with a "@".
|
266
|
+
def domain_matches?(rule)
|
267
|
+
rule = $1 if rule =~ /\A@(.+)/
|
268
|
+
return rule if File.fnmatch?(rule, self.domain_name)
|
269
|
+
return rule if File.fnmatch?(rule, self.dns_name)
|
270
|
+
false
|
271
|
+
end
|
272
|
+
|
273
|
+
# True if the host is an IP Address form, and that address matches
|
274
|
+
# the passed CIDR string ("10.9.8.0/24" or "2001:..../64")
|
275
|
+
def ip_matches?(cidr)
|
276
|
+
return false unless self.ip_address
|
277
|
+
return cidr if !cidr.include?("/") && cidr == self.ip_address
|
278
|
+
|
279
|
+
c = NetAddr::CIDR.create(cidr)
|
280
|
+
if cidr.include?(":") && self.ip_address.include?(":")
|
281
|
+
return cidr if c.matches?(self.ip_address)
|
282
|
+
elsif cidr.include?(".") && self.ip_address.include?(".")
|
283
|
+
return cidr if c.matches?(self.ip_address)
|
69
284
|
end
|
70
|
-
|
285
|
+
false
|
71
286
|
end
|
72
287
|
|
73
|
-
|
74
|
-
|
288
|
+
############################################################################
|
289
|
+
# DNS
|
290
|
+
############################################################################
|
291
|
+
|
292
|
+
# True if the :dns_lookup setting is enabled
|
293
|
+
def dns_enabled?
|
294
|
+
EmailAddress::Config.setting(:dns_lookup)
|
75
295
|
end
|
76
296
|
|
297
|
+
# True if the host name has a DNS A Record
|
298
|
+
def has_dns_a_record?
|
299
|
+
dns_a_record.size > 0 ? true : false
|
300
|
+
end
|
301
|
+
|
302
|
+
# Returns: [official_hostname, alias_hostnames, address_family, *address_list]
|
303
|
+
def dns_a_record
|
304
|
+
@_dns_a_record ||= Socket.gethostbyname(self.dns_name)
|
305
|
+
rescue SocketError # not found, but could also mean network not work
|
306
|
+
@_dns_a_record ||= []
|
307
|
+
end
|
308
|
+
|
309
|
+
# Returns an array of EmailAddress::Exchanger hosts configured in DNS.
|
310
|
+
# The array will be empty if none are configured.
|
311
|
+
def exchangers
|
312
|
+
return nil if @config[:host_type] != :email || !self.dns_enabled?
|
313
|
+
@_exchangers ||= EmailAddress::Exchanger.cached(self.dns_name)
|
314
|
+
end
|
315
|
+
|
316
|
+
# Returns a DNS TXT Record
|
77
317
|
def txt(alternate_host=nil)
|
78
318
|
Resolv::DNS.open do |dns|
|
79
|
-
records = dns.getresources(alternate_host || self.
|
319
|
+
records = dns.getresources(alternate_host || self.dns_name,
|
80
320
|
Resolv::DNS::Resource::IN::TXT)
|
81
321
|
records.empty? ? nil : records.map(&:data).join(" ")
|
82
322
|
end
|
@@ -95,8 +335,44 @@ module EmailAddress
|
|
95
335
|
fields
|
96
336
|
end
|
97
337
|
|
338
|
+
# Returns a hash of the domain's DMARC (https://en.wikipedia.org/wiki/DMARC)
|
339
|
+
# settings.
|
98
340
|
def dmarc
|
99
|
-
self.txt_hash("_dmarc." + self.
|
341
|
+
self.dns_name ? self.txt_hash("_dmarc." + self.dns_name) : {}
|
342
|
+
end
|
343
|
+
|
344
|
+
############################################################################
|
345
|
+
# Validation
|
346
|
+
############################################################################
|
347
|
+
|
348
|
+
# Returns true if the host name is valid according to the current configuration
|
349
|
+
def valid?(rule=@config[:dns_lookup]||:mx)
|
350
|
+
if self.provider != :default # well known
|
351
|
+
true
|
352
|
+
elsif self.ip_address
|
353
|
+
@config[:host_allow_ip] && self.valid_ip?
|
354
|
+
elsif rule == :mx
|
355
|
+
self.exchangers.mx_ips.size > 0
|
356
|
+
elsif rule == :a
|
357
|
+
self.has_dns_a_record?
|
358
|
+
elsif rule == :off
|
359
|
+
self.to_s.size <= MAX_HOST_LENGTH
|
360
|
+
else
|
361
|
+
false
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
# Returns true if the IP address given in that form of the host name
|
366
|
+
# is a potentially valid IP address. It does not check if the address
|
367
|
+
# is reachable.
|
368
|
+
def valid_ip?
|
369
|
+
if self.ip_address.nil?
|
370
|
+
false
|
371
|
+
elsif self.ip_address.include?(":")
|
372
|
+
self.ip_address =~ Resolv::IPv6::Regex
|
373
|
+
elsif self.ip_address.include?(".")
|
374
|
+
self.ip_address =~ Resolv::IPv4::Regex
|
375
|
+
end
|
100
376
|
end
|
101
377
|
|
102
378
|
end
|
data/lib/email_address/local.rb
CHANGED
@@ -10,7 +10,10 @@ module EmailAddress
|
|
10
10
|
# Quoted: space ( ) , : ; < > @ [ ]
|
11
11
|
# Quoted-Backslash-Escaped: \ "
|
12
12
|
# Quote local part or dot-separated sub-parts x."y".z
|
13
|
+
# RFC-5321 warns "a host that expects to receive mail SHOULD avoid defining mailboxes
|
14
|
+
# where the Local-part requires (or uses) the Quoted-string form".
|
13
15
|
# (comment)mailbox | mailbox(comment)
|
16
|
+
# . can not appear at beginning or end, or appear consecutively
|
14
17
|
# 8-bit/UTF-8: allowed but mail-system defined
|
15
18
|
# RFC 5321 also warns that "a host that expects to receive mail SHOULD avoid
|
16
19
|
# defining mailboxes where the Local-part requires (or uses) the Quoted-string form".
|
@@ -18,90 +21,339 @@ module EmailAddress
|
|
18
21
|
# Case: sensitive, but usually treated as equivalent
|
19
22
|
# Local Parts: comment, mailbox tag
|
20
23
|
# Length: up to 64 characters
|
24
|
+
# Note: gmail does allow ".." against RFC because they are ignored. This will
|
25
|
+
# be fixed by collapsing consecutive punctuation in conventional formats,
|
26
|
+
# and consider them typos.
|
21
27
|
##############################################################################
|
28
|
+
# RFC5322 Rules (Oct 2008):
|
29
|
+
#---------------------------------------------------------------------------
|
30
|
+
# addr-spec = local-part "@" domain
|
31
|
+
# local-part = dot-atom / quoted-string / obs-local-part
|
32
|
+
# domain = dot-atom / domain-literal / obs-domain
|
33
|
+
# domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
|
34
|
+
# dtext = %d33-90 / ; Printable US-ASCII
|
35
|
+
# %d94-126 / ; characters not including
|
36
|
+
# obs-dtext ; "[", "]", or "\"
|
37
|
+
# atext = ALPHA / DIGIT / ; Printable US-ASCII
|
38
|
+
# "!" / "#" / ; characters not including
|
39
|
+
# "$" / "%" / ; specials. Used for atoms.
|
40
|
+
# "&" / "'" /
|
41
|
+
# "*" / "+" /
|
42
|
+
# "-" / "/" /
|
43
|
+
# "=" / "?" /
|
44
|
+
# "^" / "_" /
|
45
|
+
# "`" / "{" /
|
46
|
+
# "|" / "}" /
|
47
|
+
# "~"
|
48
|
+
# atom = [CFWS] 1*atext [CFWS]
|
49
|
+
# dot-atom-text = 1*atext *("." 1*atext)
|
50
|
+
# dot-atom = [CFWS] dot-atom-text [CFWS]
|
51
|
+
# specials = "(" / ")" / ; Special characters that do
|
52
|
+
# "<" / ">" / ; not appear in atext
|
53
|
+
# "[" / "]" /
|
54
|
+
# ":" / ";" /
|
55
|
+
# "@" / "\" /
|
56
|
+
# "," / "." /
|
57
|
+
# DQUOTE
|
58
|
+
# qtext = %d33 / ; Printable US-ASCII
|
59
|
+
# %d35-91 / ; characters not including
|
60
|
+
# %d93-126 / ; "\" or the quote character
|
61
|
+
# obs-qtext
|
62
|
+
# qcontent = qtext / quoted-pair
|
63
|
+
# quoted-string = [CFWS]
|
64
|
+
# DQUOTE *([FWS] qcontent) [FWS] DQUOTE
|
65
|
+
# [CFWS]
|
66
|
+
############################################################################
|
22
67
|
class Local
|
23
|
-
attr_accessor :mailbox, :comment, :tag, :local
|
24
|
-
|
25
|
-
hostmaster usenet news webmaster www uucp ftp)
|
68
|
+
attr_accessor :mailbox, :comment, :tag, :local, :config, :original
|
69
|
+
attr_accessor :syntax
|
26
70
|
|
27
|
-
|
28
|
-
|
29
|
-
|
71
|
+
# RFC-2142: MAILBOX NAMES FOR COMMON SERVICES, ROLES AND FUNCTIONS
|
72
|
+
BUSINESS_MAILBOXES = %w(info marketing sales support)
|
73
|
+
NETWORK_MAILBOXES = %w(abuse noc security)
|
74
|
+
SERVICE_MAILBOXES = %w(postmaster hostmaster usenet news webmaster www uucp ftp)
|
75
|
+
SYSTEM_MAILBOXES = %w(help mailer-daemon root) # Not from RFC-2142
|
76
|
+
ROLE_MAILBOXES = %w(staff office orders billing careers jobs) # Not from RFC-2142
|
77
|
+
SPECIAL_MAILBOXES = BUSINESS_MAILBOXES + NETWORK_MAILBOXES + SERVICE_MAILBOXES +
|
78
|
+
SYSTEM_MAILBOXES + ROLE_MAILBOXES
|
79
|
+
STANDARD_MAX_SIZE = 64
|
80
|
+
|
81
|
+
# Conventional : word([.-+'_]word)*
|
82
|
+
CONVENTIONAL_MAILBOX_REGEX = /\A [\p{L}\p{N}]+ ( [\.\-\+\'_] [\p{L}\p{N}]+ )* \z/x
|
83
|
+
CONVENTIONAL_MAILBOX_WITHIN = /[\p{L}\p{N}]+ ( [\.\-\+\'_] [\p{L}\p{N}]+ )*/x
|
84
|
+
|
85
|
+
# Relaxed: same characters, relaxed order
|
86
|
+
RELAXED_MAILBOX_REGEX = /\A [\p{L}\p{N}]+ ( [\.\-\+\'_]+ [\p{L}\p{N}]+ )* \z/x
|
87
|
+
|
88
|
+
# RFC5322 Token: token."token".token (dot-separated tokens)
|
89
|
+
# Quoted Token can also have: SPACE \" \\ ( ) , : ; < > @ [ \ ] .
|
90
|
+
STANDARD_LOCAL_WITHIN = /
|
91
|
+
( [\p{L}\p{N}\!\#\$\%\&\'\*\+\-\/\=\?\^\_\`\{\|\}\~\(\)]+
|
92
|
+
| \" ( \\[\" \\] | [\x20 \! \x23-\x5B \x5D-\x7E \p{L} \p{N}] )+ \" )
|
93
|
+
( \. ( [\p{L}\p{N}\!\#\$\%\&\'\*\+\-\/\=\?\^\_\`\{\|\}\~\(\)]+
|
94
|
+
| \" ( \\[\" \\] | [\x20 \! \x23-\x5B \x5D-\x7E \p{L} \p{N}] )+ \" ) )* /x
|
95
|
+
STANDARD_LOCAL_REGEX = /\A #{STANDARD_LOCAL_WITHIN} \z/x
|
96
|
+
|
97
|
+
REDACTED_REGEX = /\A \{ [0-9a-f]{40} \} \z/x # {sha1}
|
98
|
+
|
99
|
+
def initialize(local, config={})
|
100
|
+
self.config = config.empty? ? EmailAddress::Config.all_settings : config
|
101
|
+
self.local = local
|
102
|
+
end
|
103
|
+
|
104
|
+
def local=(raw)
|
105
|
+
self.original = raw
|
106
|
+
raw.downcase! if @config[:local_downcase].nil? || @config[:local_downcase]
|
107
|
+
@local = raw
|
108
|
+
|
109
|
+
if @config[:local_parse].is_a?(Proc)
|
110
|
+
self.mailbox, self.tag, self.comment = @config[:local_parse].call(raw)
|
111
|
+
else
|
112
|
+
self.mailbox, self.tag, self.comment = self.parse(raw)
|
113
|
+
end
|
114
|
+
|
115
|
+
self.format
|
30
116
|
end
|
31
117
|
|
32
|
-
def parse(
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
118
|
+
def parse(raw)
|
119
|
+
if raw =~ /\A\"(.*)\"\z/ # Quoted
|
120
|
+
raw = $1
|
121
|
+
raw.gsub!(/\\(.)/, '\1') # Unescape
|
122
|
+
elsif @config[:local_fix]
|
123
|
+
raw.gsub!(' ','')
|
124
|
+
raw.gsub!(',','.')
|
125
|
+
raw.gsub!(/([^\p{L}\p{N}]{2,10})/) {|s| s[0] } # Stutter punctuation typo
|
126
|
+
end
|
127
|
+
raw, comment = self.parse_comment(raw)
|
128
|
+
mailbox, tag = self.parse_tag(raw)
|
129
|
+
mailbox ||= ""
|
130
|
+
[mailbox, tag, comment]
|
131
|
+
end
|
38
132
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
133
|
+
# "(comment)mailbox" or "mailbox(comment)", only one comment
|
134
|
+
# RFC Doesn't say what to do if 2 comments occur, so last wins
|
135
|
+
def parse_comment(raw)
|
136
|
+
c = nil
|
137
|
+
if raw =~ /\A\((.+?)\)(.+)\z/
|
138
|
+
c, raw = [$2, $1]
|
139
|
+
end
|
140
|
+
if raw =~ /\A(.+)\((.+?)\)\z/
|
141
|
+
raw, c = [$1, $2]
|
142
|
+
end
|
143
|
+
[raw, c]
|
144
|
+
end
|
145
|
+
|
146
|
+
def parse_tag(raw)
|
147
|
+
separator = @config[:tag_separator] ||= '+'
|
148
|
+
raw.split(separator, 2)
|
149
|
+
end
|
150
|
+
|
151
|
+
# True if the the value contains only Latin characters (7-bit ASCII)
|
152
|
+
def ascii?
|
153
|
+
! self.unicode?
|
154
|
+
end
|
155
|
+
|
156
|
+
# True if the the value contains non-Latin Unicde characters
|
157
|
+
def unicode?
|
158
|
+
self.local =~ /[^\p{InBasicLatin}]/ ? true : false
|
159
|
+
end
|
160
|
+
|
161
|
+
# Returns true if the value matches the Redacted format
|
162
|
+
def redacted?
|
163
|
+
self.local =~ REDACTED_REGEX ? true : false
|
164
|
+
end
|
165
|
+
|
166
|
+
# Returns true if the value matches the Redacted format
|
167
|
+
def self.redacted?(local)
|
168
|
+
local =~ REDACTED_REGEX ? true : false
|
169
|
+
end
|
170
|
+
|
171
|
+
# Is the address for a common system or business role account?
|
172
|
+
def special?
|
173
|
+
SPECIAL_MAILBOXES.include?(mailbox)
|
43
174
|
end
|
44
175
|
|
45
176
|
def to_s
|
46
|
-
|
177
|
+
self.format
|
47
178
|
end
|
48
179
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
180
|
+
# Builds the local string according to configurations
|
181
|
+
def format(form=@config[:local_format]||:conventional)
|
182
|
+
if @config[:local_format].is_a?(Proc)
|
183
|
+
@config[:local_format].call(self)
|
184
|
+
elsif form == :conventional
|
185
|
+
self.conventional
|
186
|
+
elsif form == :canonical
|
187
|
+
self.canonical
|
188
|
+
elsif form == :relax
|
189
|
+
self.relax
|
190
|
+
elsif form == :standard
|
191
|
+
self.standard
|
192
|
+
end
|
54
193
|
end
|
55
194
|
|
56
|
-
|
57
|
-
|
195
|
+
# Returns a conventional form of the address
|
196
|
+
def conventional
|
197
|
+
if self.tag
|
198
|
+
[self.mailbox, self.tag].join(@config[:tag_separator])
|
199
|
+
else
|
200
|
+
self.mailbox
|
201
|
+
end
|
58
202
|
end
|
59
203
|
|
204
|
+
# Returns a canonical form of the address
|
60
205
|
def canonical
|
61
|
-
|
62
|
-
|
63
|
-
|
206
|
+
if @config[:mailbox_canonical]
|
207
|
+
@config[:mailbox_canonical].call(self.mailbox)
|
208
|
+
else
|
209
|
+
self.mailbox.downcase
|
64
210
|
end
|
65
|
-
format(m)
|
66
211
|
end
|
67
212
|
|
68
|
-
|
69
|
-
|
213
|
+
# Relaxed format: mailbox and tag, no comment, no extended character set
|
214
|
+
def relax
|
215
|
+
form = self.mailbox
|
216
|
+
form += @config[:tag_separator] + self.tag if self.tag
|
217
|
+
form.gsub!(/[ \"\(\),:<>@\[\]\\]/,'')
|
218
|
+
form
|
70
219
|
end
|
71
220
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
221
|
+
# Returns a normalized version of the standard address parts.
|
222
|
+
def standard
|
223
|
+
form = self.mailbox
|
224
|
+
form += @config[:tag_separator] + self.tag if self.tag
|
225
|
+
form += "(" + self.comment + ")" if self.comment
|
226
|
+
form.gsub!(/([\\\"])/, '\\\1') # Escape \ and "
|
227
|
+
if form =~ /[ \"\(\),:<>@\[\\\]]/ # Space and "(),:;<>@[\]
|
228
|
+
form = %Q("#{form}")
|
76
229
|
end
|
77
|
-
|
230
|
+
form
|
78
231
|
end
|
79
232
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
end
|
233
|
+
# Sets the part to be the conventional form
|
234
|
+
def conventional!
|
235
|
+
self.local = self.conventional
|
236
|
+
end
|
237
|
+
|
238
|
+
# Sets the part to be the canonical form
|
239
|
+
def canonical!
|
240
|
+
self.local = self.canonical
|
89
241
|
end
|
90
242
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
(@mailbox, @tag) = *parts if parts.size > 1
|
243
|
+
# Dropps unusual parts of Standard form to form a relaxed version.
|
244
|
+
def relax!
|
245
|
+
self.local = self.relax
|
95
246
|
end
|
96
247
|
|
97
|
-
#
|
98
|
-
def
|
99
|
-
|
248
|
+
# Returns the munged form of the address, like "ma*****"
|
249
|
+
def munge
|
250
|
+
self.to_s.sub(/\A(.{1,2}).*/) { |m| $1 + @config[:munge_string] }
|
100
251
|
end
|
101
252
|
|
102
253
|
# Mailbox with trailing numbers removed
|
103
254
|
def root_name
|
104
|
-
|
255
|
+
self.mailbox =~ /\A(.+?)\d+\z/ ? $1 : self.mailbox
|
256
|
+
end
|
257
|
+
|
258
|
+
############################################################################
|
259
|
+
# Validations
|
260
|
+
############################################################################
|
261
|
+
|
262
|
+
# True if the part is valid according to the configurations
|
263
|
+
def valid?(format=@config[:local_format]||:conventional)
|
264
|
+
if @config[:mailbox_validator].is_a?(Proc)
|
265
|
+
@config[:mailbox_validator].call(self.mailbox, self.tag)
|
266
|
+
elsif format.is_a?(Proc)
|
267
|
+
format.call(self)
|
268
|
+
elsif format == :conventional
|
269
|
+
self.conventional?
|
270
|
+
elsif format == :relaxed
|
271
|
+
self.relaxed?
|
272
|
+
elsif format == :redacted
|
273
|
+
self.redacted?
|
274
|
+
elsif format == :standard
|
275
|
+
self.standard?
|
276
|
+
elsif format == :none
|
277
|
+
true
|
278
|
+
else
|
279
|
+
raise "Unknown format #{format}"
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
# Returns the format of the address
|
284
|
+
def format?
|
285
|
+
# if :custom
|
286
|
+
if self.conventional?
|
287
|
+
:conventional
|
288
|
+
elsif self.relaxed?
|
289
|
+
:relax
|
290
|
+
elsif self.redacted?
|
291
|
+
:redacted
|
292
|
+
elsif self.standard?
|
293
|
+
:standard
|
294
|
+
else
|
295
|
+
:invalid
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
def valid_size?
|
300
|
+
return false if @config[:local_size] && !@config[:local_size].include?(self.local.size)
|
301
|
+
return false if @config[:mailbox_size] && !@config[:mailbox_size].include?(self.mailbox.size)
|
302
|
+
return false if self.local.size > STANDARD_MAX_SIZE
|
303
|
+
true
|
304
|
+
end
|
305
|
+
|
306
|
+
def valid_encoding?(enc=@config[:local_encoding]||:ascii)
|
307
|
+
return false if enc == :ascii && self.unicode?
|
308
|
+
return false if enc == :unicode && self.ascii?
|
309
|
+
true
|
310
|
+
end
|
311
|
+
|
312
|
+
# True if the part matches the conventional format
|
313
|
+
def conventional?
|
314
|
+
self.syntax = :invalid
|
315
|
+
self.local =~ CONVENTIONAL_MAILBOX_REGEX or return false
|
316
|
+
self.valid_size? or return false
|
317
|
+
self.valid_encoding? or return false
|
318
|
+
self.syntax = :conventional
|
319
|
+
true
|
320
|
+
end
|
321
|
+
|
322
|
+
# Relaxed conventional is not so strict about character order.
|
323
|
+
def relaxed?
|
324
|
+
self.syntax = :invalid
|
325
|
+
self.valid_size? or return false
|
326
|
+
self.valid_encoding? or return false
|
327
|
+
if self.local =~ RELAXED_MAILBOX_REGEX
|
328
|
+
self.syntax = :relaxed
|
329
|
+
true
|
330
|
+
else
|
331
|
+
false
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
# True if the part matches the RFC standard format
|
336
|
+
def standard?
|
337
|
+
self.syntax = :invalid
|
338
|
+
self.valid_size? or return false
|
339
|
+
self.valid_encoding? or return false
|
340
|
+
if self.local =~ STANDARD_LOCAL_REGEX
|
341
|
+
self.syntax = :standard
|
342
|
+
true
|
343
|
+
else
|
344
|
+
false
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
# Matches configured formated form against File glob strings given.
|
349
|
+
# Rules must end in @ to distinguish themselves from other email part matches.
|
350
|
+
def matches?(*rules)
|
351
|
+
rules.flatten.each do |r|
|
352
|
+
if r =~ /(.+)@\z/
|
353
|
+
return r if File.fnmatch?($1, self.local)
|
354
|
+
end
|
355
|
+
end
|
356
|
+
false
|
105
357
|
end
|
106
358
|
end
|
107
359
|
end
|