email_address 0.0.3 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +10 -0
- data/Gemfile +0 -1
- data/README.md +451 -197
- data/Rakefile +4 -9
- data/email_address.gemspec +9 -5
- data/lib/email_address.rb +55 -24
- data/lib/email_address/active_record_validator.rb +5 -5
- data/lib/email_address/address.rb +152 -72
- data/lib/email_address/canonical_email_address_type.rb +46 -0
- data/lib/email_address/config.rb +148 -64
- data/lib/email_address/email_address_type.rb +15 -31
- data/lib/email_address/exchanger.rb +31 -34
- data/lib/email_address/host.rb +327 -51
- data/lib/email_address/local.rb +304 -52
- data/lib/email_address/version.rb +1 -1
- data/test/activerecord/test_ar.rb +22 -0
- data/test/activerecord/user.rb +71 -0
- data/test/email_address/test_address.rb +53 -27
- data/test/email_address/test_config.rb +23 -8
- data/test/email_address/test_exchanger.rb +22 -10
- data/test/email_address/test_host.rb +47 -6
- data/test/email_address/test_local.rb +80 -16
- data/test/test_email_address.rb +38 -4
- data/test/test_helper.rb +7 -5
- metadata +68 -34
- data/lib/email_address/domain_matcher.rb +0 -98
- data/lib/email_address/domain_parser.rb +0 -69
- data/lib/email_address/matcher.rb +0 -119
- data/lib/email_address/validator.rb +0 -141
- data/test/email_address/test_domain_matcher.rb +0 -21
- data/test/email_address/test_domain_parser.rb +0 -29
- data/test/email_address/test_matcher.rb +0 -44
- data/test/email_address/test_validator.rb +0 -16
data/lib/email_address/host.rb
CHANGED
@@ -1,82 +1,322 @@
|
|
1
1
|
require 'simpleidn'
|
2
|
+
require 'resolv'
|
3
|
+
require 'netaddr'
|
2
4
|
|
3
5
|
module EmailAddress
|
4
6
|
##############################################################################
|
5
|
-
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
7
|
+
# The EmailAddress Host is found on the right-hand side of the "@" symbol.
|
8
|
+
# It can be:
|
9
|
+
# * Host name (domain name with optional subdomain)
|
10
|
+
# * International Domain Name, in Unicode (Display) or Punycode (DNS) format
|
11
|
+
# * IP Address format, either IPv4 or IPv6, enclosed in square brackets.
|
12
|
+
# This is not Conventionally supported, but is part of the specification.
|
13
|
+
# * It can contain an optional comment, enclosed in parenthesis, either at
|
14
|
+
# beginning or ending of the host name. This is not well defined, so it not
|
15
|
+
# supported here, expect to parse it off, if found.
|
16
|
+
#
|
17
|
+
# For matching and query capabilities, the host name is parsed into these
|
18
|
+
# parts (with example data for "subdomain.example.co.uk"):
|
19
|
+
# * host_name: "subdomain.example.co.uk"
|
20
|
+
# * dns_name: punycode("subdomain.example.co.uk")
|
21
|
+
# * subdomain: "subdomain"
|
22
|
+
# * registration_name: "example"
|
23
|
+
# * domain_name: "example.co.uk"
|
24
|
+
# * tld: "uk"
|
25
|
+
# * tld2: "co.uk" (the 1 or 2 term TLD we could guess)
|
26
|
+
# * ip_address: nil or "ipaddress" used in [ipaddress] syntax
|
27
|
+
#
|
28
|
+
# The provider (Email Service Provider or ESP) is looked up according to the
|
29
|
+
# provider configuration rules, setting the config attribute to values of
|
30
|
+
# that provider.
|
17
31
|
##############################################################################
|
18
32
|
class Host
|
19
|
-
|
20
|
-
|
33
|
+
attr_accessor :host_name, :dns_name, :domain_name, :registration_name,
|
34
|
+
:tld, :tld2, :subdomains, :ip_address, :config, :provider,
|
35
|
+
:comment
|
36
|
+
MAX_HOST_LENGTH = 255
|
37
|
+
|
38
|
+
# Sometimes, you just need a Regexp...
|
39
|
+
DNS_HOST_REGEX = / [\p{L}\p{N}]+ (?: (?: \-{1,2} | \.) [\p{L}\p{N}]+ )*/x
|
40
|
+
|
41
|
+
# The IPv4 and IPv6 were lifted from Resolv::IPv?::Regex and tweaked to not
|
42
|
+
# \A...\z anchor at the edges.
|
43
|
+
IPv6_HOST_REGEX = /\[IPv6:
|
44
|
+
(?: (?:(?x-mi:
|
45
|
+
(?:[0-9A-Fa-f]{1,4}:){7}
|
46
|
+
[0-9A-Fa-f]{1,4}
|
47
|
+
)) |
|
48
|
+
(?:(?x-mi:
|
49
|
+
(?: (?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?) ::
|
50
|
+
(?: (?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?)
|
51
|
+
)) |
|
52
|
+
(?:(?x-mi:
|
53
|
+
(?: (?:[0-9A-Fa-f]{1,4}:){6,6})
|
54
|
+
(?: \d+)\.(?: \d+)\.(?: \d+)\.(?: \d+)
|
55
|
+
)) |
|
56
|
+
(?:(?x-mi:
|
57
|
+
(?: (?:[0-9A-Fa-f]{1,4}(?::[0-9A-Fa-f]{1,4})*)?) ::
|
58
|
+
(?: (?:[0-9A-Fa-f]{1,4}:)*)
|
59
|
+
(?: \d+)\.(?: \d+)\.(?: \d+)\.(?: \d+)
|
60
|
+
)))\]/ix
|
61
|
+
|
62
|
+
IPv4_HOST_REGEX = /\[((?x-mi:0
|
63
|
+
|1(?:[0-9][0-9]?)?
|
64
|
+
|2(?:[0-4][0-9]?|5[0-5]?|[6-9])?
|
65
|
+
|[3-9][0-9]?))\.((?x-mi:0
|
66
|
+
|1(?:[0-9][0-9]?)?
|
67
|
+
|2(?:[0-4][0-9]?|5[0-5]?|[6-9])?
|
68
|
+
|[3-9][0-9]?))\.((?x-mi:0
|
69
|
+
|1(?:[0-9][0-9]?)?
|
70
|
+
|2(?:[0-4][0-9]?|5[0-5]?|[6-9])?
|
71
|
+
|[3-9][0-9]?))\.((?x-mi:0
|
72
|
+
|1(?:[0-9][0-9]?)?
|
73
|
+
|2(?:[0-4][0-9]?|5[0-5]?|[6-9])?
|
74
|
+
|[3-9][0-9]?))\]/x
|
75
|
+
|
76
|
+
# Matches conventional host name and punycode: domain.tld, x--punycode.tld
|
77
|
+
CANONICAL_HOST_REGEX = /\A #{DNS_HOST_REGEX} \z/x
|
78
|
+
|
79
|
+
# Matches Host forms: DNS name, IPv4, or IPv6 formats
|
80
|
+
STANDARD_HOST_REGEX = /\A (?: #{DNS_HOST_REGEX}
|
81
|
+
| #{IPv4_HOST_REGEX} | #{IPv6_HOST_REGEX}) \z/ix
|
21
82
|
|
22
83
|
# host name -
|
23
|
-
# *
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
host_name||= ''
|
30
|
-
@host_name = host_name.downcase
|
31
|
-
@host_type = host_type
|
32
|
-
parse_host(@host_name)
|
84
|
+
# * host type - :email for an email host, :mx for exchanger host
|
85
|
+
def initialize(host_name, config={})
|
86
|
+
@original = host_name ||= ''
|
87
|
+
config[:host_type] ||= :email
|
88
|
+
@config = config
|
89
|
+
parse(host_name)
|
33
90
|
end
|
34
91
|
|
35
|
-
|
36
|
-
|
92
|
+
# Returns the String representation of the host name (or IP)
|
93
|
+
def name
|
94
|
+
if self.ipv4?
|
95
|
+
"[#{self.ip_address}]"
|
96
|
+
elsif self.ipv6?
|
97
|
+
"[IPv6:#{self.ip_address}]"
|
98
|
+
elsif @config[:host_encoding] && @config[:host_encoding] == :unicode
|
99
|
+
::SimpleIDN.to_unicode(self.host_name)
|
100
|
+
else
|
101
|
+
self.dns_name
|
102
|
+
end
|
37
103
|
end
|
38
|
-
alias :
|
104
|
+
alias :to_s :name
|
39
105
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
@parts.each { |k,v| instance_variable_set("@#{k}", v) }
|
106
|
+
# The canonical host name is the simplified, DNS host name
|
107
|
+
def canonical
|
108
|
+
self.dns_name
|
44
109
|
end
|
45
110
|
|
46
|
-
#
|
47
|
-
|
48
|
-
|
111
|
+
# Returns the munged version of the name, replacing everything after the
|
112
|
+
# initial two characters with "*****" or the configured "munge_string".
|
113
|
+
def munge
|
114
|
+
self.host_name.sub(/\A(.{1,2}).*/) { |m| $1 + @config[:munge_string] }
|
49
115
|
end
|
50
116
|
|
51
|
-
|
52
|
-
|
117
|
+
############################################################################
|
118
|
+
# Parsing
|
119
|
+
############################################################################
|
120
|
+
|
121
|
+
|
122
|
+
def parse(host) # :nodoc:
|
123
|
+
host = self.parse_comment(host)
|
124
|
+
|
125
|
+
if host =~ /\A\[IPv6:(.+)\]/i
|
126
|
+
self.ip_address = $1
|
127
|
+
elsif host =~ /\A\[(\d{1,3}(\.\d{1,3}){3})\]/ # IPv4
|
128
|
+
self.ip_address = $1
|
129
|
+
else
|
130
|
+
self.host_name = host
|
131
|
+
end
|
53
132
|
end
|
54
133
|
|
55
|
-
|
56
|
-
|
57
|
-
|
134
|
+
def parse_comment(host) # :nodoc:
|
135
|
+
if host =~ /\A\((.+?)\)(.+)/ # (comment)domain.tld
|
136
|
+
self.comment, host = $1, $2
|
137
|
+
end
|
138
|
+
if host =~ /\A(.+)\((.+?)\)\z/ # domain.tld(comment)
|
139
|
+
host, self.comment = $1, $2
|
140
|
+
end
|
141
|
+
host
|
142
|
+
end
|
143
|
+
|
144
|
+
def host_name=(name)
|
145
|
+
@host_name = name = name.strip.downcase.gsub(' ', '').gsub(/\(.*\)/, '')
|
146
|
+
@dns_name = ::SimpleIDN.to_ascii(self.host_name)
|
147
|
+
|
148
|
+
# Subdomain only (root@localhost)
|
149
|
+
if name.index('.').nil?
|
150
|
+
self.subdomains = name
|
151
|
+
|
152
|
+
# Split sub.domain from .tld: *.com, *.xx.cc, *.cc
|
153
|
+
elsif name =~ /\A(.+)\.(\w{3,10})\z/ ||
|
154
|
+
name =~ /\A(.+)\.(\w{1,3}\.\w\w)\z/ ||
|
155
|
+
name =~ /\A(.+)\.(\w\w)\z/
|
156
|
+
|
157
|
+
sub_and_domain, self.tld2 = [$1, $2] # sub+domain, com || co.uk
|
158
|
+
self.tld = self.tld2.sub(/\A.+\./, '') # co.uk => uk
|
159
|
+
if sub_and_domain =~ /\A(.+)\.(.+)\z/ # is subdomain? sub.example [.tld2]
|
160
|
+
self.subdomains = $1
|
161
|
+
self.registration_name = $2
|
162
|
+
else
|
163
|
+
self.registration_name = sub_and_domain
|
164
|
+
#self.domain_name = sub_and_domain + '.' + self.tld2
|
165
|
+
end
|
166
|
+
self.domain_name = self.registration_name + '.' + self.tld2
|
167
|
+
self.find_provider
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def find_provider # :nodoc:
|
172
|
+
return self.provider if self.provider
|
173
|
+
|
174
|
+
EmailAddress::Config.providers.each do |provider, config|
|
175
|
+
if config[:host_match] && self.matches?(config[:host_match])
|
176
|
+
return self.set_provider(provider, config)
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
return self.set_provider(:default) unless self.dns_enabled?
|
181
|
+
|
182
|
+
provider = self.exchangers.provider
|
183
|
+
if provider != :default
|
184
|
+
self.set_provider(provider,
|
185
|
+
EmailAddress::Config.provider(self.provider))
|
186
|
+
end
|
187
|
+
|
188
|
+
self.provider ||= self.set_provider(:default)
|
189
|
+
end
|
190
|
+
|
191
|
+
def set_provider(name, provider_config={}) # :nodoc:
|
192
|
+
self.config = EmailAddress::Config.all_settings(provider_config, @config)
|
193
|
+
self.provider = name
|
194
|
+
end
|
195
|
+
|
196
|
+
# Returns a hash of the parts of the host name after parsing.
|
197
|
+
def parts
|
198
|
+
{ host_name:self.host_name, dns_name:self.dns_name, subdomain:self.subdomains,
|
199
|
+
registration_name:self.registration_name, domain_name:self.domain_name,
|
200
|
+
tld2:self.tld2, tld:self.tld, ip_address:self.ip_address }
|
58
201
|
end
|
59
202
|
|
60
|
-
|
61
|
-
|
62
|
-
|
203
|
+
############################################################################
|
204
|
+
# Access and Queries
|
205
|
+
############################################################################
|
206
|
+
|
207
|
+
# Is this a fully-qualified domain name?
|
208
|
+
def fqdn?
|
209
|
+
self.tld ? true : false
|
210
|
+
end
|
211
|
+
|
212
|
+
def ip?
|
213
|
+
self.ip_address.nil? ? false : true
|
214
|
+
end
|
215
|
+
|
216
|
+
def ipv4?
|
217
|
+
self.ip? && self.ip_address.include?(".")
|
218
|
+
end
|
219
|
+
|
220
|
+
def ipv6?
|
221
|
+
self.ip? && self.ip_address.include?(":")
|
222
|
+
end
|
223
|
+
|
224
|
+
############################################################################
|
225
|
+
# Matching
|
226
|
+
############################################################################
|
227
|
+
|
228
|
+
# Takes a email address string, returns true if it matches a rule
|
229
|
+
# Rules of the follow formats are evaluated:
|
230
|
+
# * "example." => registration name
|
231
|
+
# * ".com" => top-level domain name
|
232
|
+
# * "google" => email service provider designation
|
233
|
+
# * "@goog*.com" => Glob match
|
234
|
+
# * IPv4 or IPv6 or CIDR Address
|
235
|
+
def matches?(rules)
|
236
|
+
rules = Array(rules)
|
237
|
+
return false if rules.empty?
|
238
|
+
rules.each do |rule|
|
239
|
+
return rule if rule == self.domain_name || rule == self.dns_name
|
240
|
+
return rule if registration_name_matches?(rule)
|
241
|
+
return rule if tld_matches?(rule)
|
242
|
+
return rule if domain_matches?(rule)
|
243
|
+
return rule if self.provider && provider_matches?(rule)
|
244
|
+
return rule if self.ip_matches?(rule)
|
245
|
+
end
|
246
|
+
false
|
247
|
+
end
|
248
|
+
|
249
|
+
# Does "example." match any tld?
|
250
|
+
def registration_name_matches?(rule)
|
251
|
+
self.registration_name + '.' == rule ? true : false
|
252
|
+
end
|
253
|
+
|
254
|
+
# Does "sub.example.com" match ".com" and ".example.com" top level names?
|
255
|
+
# Matches TLD (uk) or TLD2 (co.uk)
|
256
|
+
def tld_matches?(rule)
|
257
|
+
rule.match(/\A\.(.+)\z/) && ($1 == self.tld || $1 == self.tld2) ? true : false
|
258
|
+
end
|
259
|
+
|
260
|
+
def provider_matches?(rule)
|
261
|
+
rule.to_s =~ /\A[\w\-]*\z/ && self.provider && self.provider == rule.to_sym
|
63
262
|
end
|
64
263
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
264
|
+
# Does domain == rule or glob matches? (also tests the DNS (punycode) name)
|
265
|
+
# Requires optionally starts with a "@".
|
266
|
+
def domain_matches?(rule)
|
267
|
+
rule = $1 if rule =~ /\A@(.+)/
|
268
|
+
return rule if File.fnmatch?(rule, self.domain_name)
|
269
|
+
return rule if File.fnmatch?(rule, self.dns_name)
|
270
|
+
false
|
271
|
+
end
|
272
|
+
|
273
|
+
# True if the host is an IP Address form, and that address matches
|
274
|
+
# the passed CIDR string ("10.9.8.0/24" or "2001:..../64")
|
275
|
+
def ip_matches?(cidr)
|
276
|
+
return false unless self.ip_address
|
277
|
+
return cidr if !cidr.include?("/") && cidr == self.ip_address
|
278
|
+
|
279
|
+
c = NetAddr::CIDR.create(cidr)
|
280
|
+
if cidr.include?(":") && self.ip_address.include?(":")
|
281
|
+
return cidr if c.matches?(self.ip_address)
|
282
|
+
elsif cidr.include?(".") && self.ip_address.include?(".")
|
283
|
+
return cidr if c.matches?(self.ip_address)
|
69
284
|
end
|
70
|
-
|
285
|
+
false
|
71
286
|
end
|
72
287
|
|
73
|
-
|
74
|
-
|
288
|
+
############################################################################
|
289
|
+
# DNS
|
290
|
+
############################################################################
|
291
|
+
|
292
|
+
# True if the :dns_lookup setting is enabled
|
293
|
+
def dns_enabled?
|
294
|
+
EmailAddress::Config.setting(:dns_lookup)
|
75
295
|
end
|
76
296
|
|
297
|
+
# True if the host name has a DNS A Record
|
298
|
+
def has_dns_a_record?
|
299
|
+
dns_a_record.size > 0 ? true : false
|
300
|
+
end
|
301
|
+
|
302
|
+
# Returns: [official_hostname, alias_hostnames, address_family, *address_list]
|
303
|
+
def dns_a_record
|
304
|
+
@_dns_a_record ||= Socket.gethostbyname(self.dns_name)
|
305
|
+
rescue SocketError # not found, but could also mean network not work
|
306
|
+
@_dns_a_record ||= []
|
307
|
+
end
|
308
|
+
|
309
|
+
# Returns an array of EmailAddress::Exchanger hosts configured in DNS.
|
310
|
+
# The array will be empty if none are configured.
|
311
|
+
def exchangers
|
312
|
+
return nil if @config[:host_type] != :email || !self.dns_enabled?
|
313
|
+
@_exchangers ||= EmailAddress::Exchanger.cached(self.dns_name)
|
314
|
+
end
|
315
|
+
|
316
|
+
# Returns a DNS TXT Record
|
77
317
|
def txt(alternate_host=nil)
|
78
318
|
Resolv::DNS.open do |dns|
|
79
|
-
records = dns.getresources(alternate_host || self.
|
319
|
+
records = dns.getresources(alternate_host || self.dns_name,
|
80
320
|
Resolv::DNS::Resource::IN::TXT)
|
81
321
|
records.empty? ? nil : records.map(&:data).join(" ")
|
82
322
|
end
|
@@ -95,8 +335,44 @@ module EmailAddress
|
|
95
335
|
fields
|
96
336
|
end
|
97
337
|
|
338
|
+
# Returns a hash of the domain's DMARC (https://en.wikipedia.org/wiki/DMARC)
|
339
|
+
# settings.
|
98
340
|
def dmarc
|
99
|
-
self.txt_hash("_dmarc." + self.
|
341
|
+
self.dns_name ? self.txt_hash("_dmarc." + self.dns_name) : {}
|
342
|
+
end
|
343
|
+
|
344
|
+
############################################################################
|
345
|
+
# Validation
|
346
|
+
############################################################################
|
347
|
+
|
348
|
+
# Returns true if the host name is valid according to the current configuration
|
349
|
+
def valid?(rule=@config[:dns_lookup]||:mx)
|
350
|
+
if self.provider != :default # well known
|
351
|
+
true
|
352
|
+
elsif self.ip_address
|
353
|
+
@config[:host_allow_ip] && self.valid_ip?
|
354
|
+
elsif rule == :mx
|
355
|
+
self.exchangers.mx_ips.size > 0
|
356
|
+
elsif rule == :a
|
357
|
+
self.has_dns_a_record?
|
358
|
+
elsif rule == :off
|
359
|
+
self.to_s.size <= MAX_HOST_LENGTH
|
360
|
+
else
|
361
|
+
false
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
# Returns true if the IP address given in that form of the host name
|
366
|
+
# is a potentially valid IP address. It does not check if the address
|
367
|
+
# is reachable.
|
368
|
+
def valid_ip?
|
369
|
+
if self.ip_address.nil?
|
370
|
+
false
|
371
|
+
elsif self.ip_address.include?(":")
|
372
|
+
self.ip_address =~ Resolv::IPv6::Regex
|
373
|
+
elsif self.ip_address.include?(".")
|
374
|
+
self.ip_address =~ Resolv::IPv4::Regex
|
375
|
+
end
|
100
376
|
end
|
101
377
|
|
102
378
|
end
|
data/lib/email_address/local.rb
CHANGED
@@ -10,7 +10,10 @@ module EmailAddress
|
|
10
10
|
# Quoted: space ( ) , : ; < > @ [ ]
|
11
11
|
# Quoted-Backslash-Escaped: \ "
|
12
12
|
# Quote local part or dot-separated sub-parts x."y".z
|
13
|
+
# RFC-5321 warns "a host that expects to receive mail SHOULD avoid defining mailboxes
|
14
|
+
# where the Local-part requires (or uses) the Quoted-string form".
|
13
15
|
# (comment)mailbox | mailbox(comment)
|
16
|
+
# . can not appear at beginning or end, or appear consecutively
|
14
17
|
# 8-bit/UTF-8: allowed but mail-system defined
|
15
18
|
# RFC 5321 also warns that "a host that expects to receive mail SHOULD avoid
|
16
19
|
# defining mailboxes where the Local-part requires (or uses) the Quoted-string form".
|
@@ -18,90 +21,339 @@ module EmailAddress
|
|
18
21
|
# Case: sensitive, but usually treated as equivalent
|
19
22
|
# Local Parts: comment, mailbox tag
|
20
23
|
# Length: up to 64 characters
|
24
|
+
# Note: gmail does allow ".." against RFC because they are ignored. This will
|
25
|
+
# be fixed by collapsing consecutive punctuation in conventional formats,
|
26
|
+
# and consider them typos.
|
21
27
|
##############################################################################
|
28
|
+
# RFC5322 Rules (Oct 2008):
|
29
|
+
#---------------------------------------------------------------------------
|
30
|
+
# addr-spec = local-part "@" domain
|
31
|
+
# local-part = dot-atom / quoted-string / obs-local-part
|
32
|
+
# domain = dot-atom / domain-literal / obs-domain
|
33
|
+
# domain-literal = [CFWS] "[" *([FWS] dtext) [FWS] "]" [CFWS]
|
34
|
+
# dtext = %d33-90 / ; Printable US-ASCII
|
35
|
+
# %d94-126 / ; characters not including
|
36
|
+
# obs-dtext ; "[", "]", or "\"
|
37
|
+
# atext = ALPHA / DIGIT / ; Printable US-ASCII
|
38
|
+
# "!" / "#" / ; characters not including
|
39
|
+
# "$" / "%" / ; specials. Used for atoms.
|
40
|
+
# "&" / "'" /
|
41
|
+
# "*" / "+" /
|
42
|
+
# "-" / "/" /
|
43
|
+
# "=" / "?" /
|
44
|
+
# "^" / "_" /
|
45
|
+
# "`" / "{" /
|
46
|
+
# "|" / "}" /
|
47
|
+
# "~"
|
48
|
+
# atom = [CFWS] 1*atext [CFWS]
|
49
|
+
# dot-atom-text = 1*atext *("." 1*atext)
|
50
|
+
# dot-atom = [CFWS] dot-atom-text [CFWS]
|
51
|
+
# specials = "(" / ")" / ; Special characters that do
|
52
|
+
# "<" / ">" / ; not appear in atext
|
53
|
+
# "[" / "]" /
|
54
|
+
# ":" / ";" /
|
55
|
+
# "@" / "\" /
|
56
|
+
# "," / "." /
|
57
|
+
# DQUOTE
|
58
|
+
# qtext = %d33 / ; Printable US-ASCII
|
59
|
+
# %d35-91 / ; characters not including
|
60
|
+
# %d93-126 / ; "\" or the quote character
|
61
|
+
# obs-qtext
|
62
|
+
# qcontent = qtext / quoted-pair
|
63
|
+
# quoted-string = [CFWS]
|
64
|
+
# DQUOTE *([FWS] qcontent) [FWS] DQUOTE
|
65
|
+
# [CFWS]
|
66
|
+
############################################################################
|
22
67
|
class Local
|
23
|
-
attr_accessor :mailbox, :comment, :tag, :local
|
24
|
-
|
25
|
-
hostmaster usenet news webmaster www uucp ftp)
|
68
|
+
attr_accessor :mailbox, :comment, :tag, :local, :config, :original
|
69
|
+
attr_accessor :syntax
|
26
70
|
|
27
|
-
|
28
|
-
|
29
|
-
|
71
|
+
# RFC-2142: MAILBOX NAMES FOR COMMON SERVICES, ROLES AND FUNCTIONS
|
72
|
+
BUSINESS_MAILBOXES = %w(info marketing sales support)
|
73
|
+
NETWORK_MAILBOXES = %w(abuse noc security)
|
74
|
+
SERVICE_MAILBOXES = %w(postmaster hostmaster usenet news webmaster www uucp ftp)
|
75
|
+
SYSTEM_MAILBOXES = %w(help mailer-daemon root) # Not from RFC-2142
|
76
|
+
ROLE_MAILBOXES = %w(staff office orders billing careers jobs) # Not from RFC-2142
|
77
|
+
SPECIAL_MAILBOXES = BUSINESS_MAILBOXES + NETWORK_MAILBOXES + SERVICE_MAILBOXES +
|
78
|
+
SYSTEM_MAILBOXES + ROLE_MAILBOXES
|
79
|
+
STANDARD_MAX_SIZE = 64
|
80
|
+
|
81
|
+
# Conventional : word([.-+'_]word)*
|
82
|
+
CONVENTIONAL_MAILBOX_REGEX = /\A [\p{L}\p{N}]+ ( [\.\-\+\'_] [\p{L}\p{N}]+ )* \z/x
|
83
|
+
CONVENTIONAL_MAILBOX_WITHIN = /[\p{L}\p{N}]+ ( [\.\-\+\'_] [\p{L}\p{N}]+ )*/x
|
84
|
+
|
85
|
+
# Relaxed: same characters, relaxed order
|
86
|
+
RELAXED_MAILBOX_REGEX = /\A [\p{L}\p{N}]+ ( [\.\-\+\'_]+ [\p{L}\p{N}]+ )* \z/x
|
87
|
+
|
88
|
+
# RFC5322 Token: token."token".token (dot-separated tokens)
|
89
|
+
# Quoted Token can also have: SPACE \" \\ ( ) , : ; < > @ [ \ ] .
|
90
|
+
STANDARD_LOCAL_WITHIN = /
|
91
|
+
( [\p{L}\p{N}\!\#\$\%\&\'\*\+\-\/\=\?\^\_\`\{\|\}\~\(\)]+
|
92
|
+
| \" ( \\[\" \\] | [\x20 \! \x23-\x5B \x5D-\x7E \p{L} \p{N}] )+ \" )
|
93
|
+
( \. ( [\p{L}\p{N}\!\#\$\%\&\'\*\+\-\/\=\?\^\_\`\{\|\}\~\(\)]+
|
94
|
+
| \" ( \\[\" \\] | [\x20 \! \x23-\x5B \x5D-\x7E \p{L} \p{N}] )+ \" ) )* /x
|
95
|
+
STANDARD_LOCAL_REGEX = /\A #{STANDARD_LOCAL_WITHIN} \z/x
|
96
|
+
|
97
|
+
REDACTED_REGEX = /\A \{ [0-9a-f]{40} \} \z/x # {sha1}
|
98
|
+
|
99
|
+
def initialize(local, config={})
|
100
|
+
self.config = config.empty? ? EmailAddress::Config.all_settings : config
|
101
|
+
self.local = local
|
102
|
+
end
|
103
|
+
|
104
|
+
def local=(raw)
|
105
|
+
self.original = raw
|
106
|
+
raw.downcase! if @config[:local_downcase].nil? || @config[:local_downcase]
|
107
|
+
@local = raw
|
108
|
+
|
109
|
+
if @config[:local_parse].is_a?(Proc)
|
110
|
+
self.mailbox, self.tag, self.comment = @config[:local_parse].call(raw)
|
111
|
+
else
|
112
|
+
self.mailbox, self.tag, self.comment = self.parse(raw)
|
113
|
+
end
|
114
|
+
|
115
|
+
self.format
|
30
116
|
end
|
31
117
|
|
32
|
-
def parse(
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
118
|
+
def parse(raw)
|
119
|
+
if raw =~ /\A\"(.*)\"\z/ # Quoted
|
120
|
+
raw = $1
|
121
|
+
raw.gsub!(/\\(.)/, '\1') # Unescape
|
122
|
+
elsif @config[:local_fix]
|
123
|
+
raw.gsub!(' ','')
|
124
|
+
raw.gsub!(',','.')
|
125
|
+
raw.gsub!(/([^\p{L}\p{N}]{2,10})/) {|s| s[0] } # Stutter punctuation typo
|
126
|
+
end
|
127
|
+
raw, comment = self.parse_comment(raw)
|
128
|
+
mailbox, tag = self.parse_tag(raw)
|
129
|
+
mailbox ||= ""
|
130
|
+
[mailbox, tag, comment]
|
131
|
+
end
|
38
132
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
133
|
+
# "(comment)mailbox" or "mailbox(comment)", only one comment
|
134
|
+
# RFC Doesn't say what to do if 2 comments occur, so last wins
|
135
|
+
def parse_comment(raw)
|
136
|
+
c = nil
|
137
|
+
if raw =~ /\A\((.+?)\)(.+)\z/
|
138
|
+
c, raw = [$2, $1]
|
139
|
+
end
|
140
|
+
if raw =~ /\A(.+)\((.+?)\)\z/
|
141
|
+
raw, c = [$1, $2]
|
142
|
+
end
|
143
|
+
[raw, c]
|
144
|
+
end
|
145
|
+
|
146
|
+
def parse_tag(raw)
|
147
|
+
separator = @config[:tag_separator] ||= '+'
|
148
|
+
raw.split(separator, 2)
|
149
|
+
end
|
150
|
+
|
151
|
+
# True if the the value contains only Latin characters (7-bit ASCII)
|
152
|
+
def ascii?
|
153
|
+
! self.unicode?
|
154
|
+
end
|
155
|
+
|
156
|
+
# True if the the value contains non-Latin Unicde characters
|
157
|
+
def unicode?
|
158
|
+
self.local =~ /[^\p{InBasicLatin}]/ ? true : false
|
159
|
+
end
|
160
|
+
|
161
|
+
# Returns true if the value matches the Redacted format
|
162
|
+
def redacted?
|
163
|
+
self.local =~ REDACTED_REGEX ? true : false
|
164
|
+
end
|
165
|
+
|
166
|
+
# Returns true if the value matches the Redacted format
|
167
|
+
def self.redacted?(local)
|
168
|
+
local =~ REDACTED_REGEX ? true : false
|
169
|
+
end
|
170
|
+
|
171
|
+
# Is the address for a common system or business role account?
|
172
|
+
def special?
|
173
|
+
SPECIAL_MAILBOXES.include?(mailbox)
|
43
174
|
end
|
44
175
|
|
45
176
|
def to_s
|
46
|
-
|
177
|
+
self.format
|
47
178
|
end
|
48
179
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
180
|
+
# Builds the local string according to configurations
|
181
|
+
def format(form=@config[:local_format]||:conventional)
|
182
|
+
if @config[:local_format].is_a?(Proc)
|
183
|
+
@config[:local_format].call(self)
|
184
|
+
elsif form == :conventional
|
185
|
+
self.conventional
|
186
|
+
elsif form == :canonical
|
187
|
+
self.canonical
|
188
|
+
elsif form == :relax
|
189
|
+
self.relax
|
190
|
+
elsif form == :standard
|
191
|
+
self.standard
|
192
|
+
end
|
54
193
|
end
|
55
194
|
|
56
|
-
|
57
|
-
|
195
|
+
# Returns a conventional form of the address
|
196
|
+
def conventional
|
197
|
+
if self.tag
|
198
|
+
[self.mailbox, self.tag].join(@config[:tag_separator])
|
199
|
+
else
|
200
|
+
self.mailbox
|
201
|
+
end
|
58
202
|
end
|
59
203
|
|
204
|
+
# Returns a canonical form of the address
|
60
205
|
def canonical
|
61
|
-
|
62
|
-
|
63
|
-
|
206
|
+
if @config[:mailbox_canonical]
|
207
|
+
@config[:mailbox_canonical].call(self.mailbox)
|
208
|
+
else
|
209
|
+
self.mailbox.downcase
|
64
210
|
end
|
65
|
-
format(m)
|
66
211
|
end
|
67
212
|
|
68
|
-
|
69
|
-
|
213
|
+
# Relaxed format: mailbox and tag, no comment, no extended character set
|
214
|
+
def relax
|
215
|
+
form = self.mailbox
|
216
|
+
form += @config[:tag_separator] + self.tag if self.tag
|
217
|
+
form.gsub!(/[ \"\(\),:<>@\[\]\\]/,'')
|
218
|
+
form
|
70
219
|
end
|
71
220
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
221
|
+
# Returns a normalized version of the standard address parts.
|
222
|
+
def standard
|
223
|
+
form = self.mailbox
|
224
|
+
form += @config[:tag_separator] + self.tag if self.tag
|
225
|
+
form += "(" + self.comment + ")" if self.comment
|
226
|
+
form.gsub!(/([\\\"])/, '\\\1') # Escape \ and "
|
227
|
+
if form =~ /[ \"\(\),:<>@\[\\\]]/ # Space and "(),:;<>@[\]
|
228
|
+
form = %Q("#{form}")
|
76
229
|
end
|
77
|
-
|
230
|
+
form
|
78
231
|
end
|
79
232
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
end
|
233
|
+
# Sets the part to be the conventional form
|
234
|
+
def conventional!
|
235
|
+
self.local = self.conventional
|
236
|
+
end
|
237
|
+
|
238
|
+
# Sets the part to be the canonical form
|
239
|
+
def canonical!
|
240
|
+
self.local = self.canonical
|
89
241
|
end
|
90
242
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
(@mailbox, @tag) = *parts if parts.size > 1
|
243
|
+
# Dropps unusual parts of Standard form to form a relaxed version.
|
244
|
+
def relax!
|
245
|
+
self.local = self.relax
|
95
246
|
end
|
96
247
|
|
97
|
-
#
|
98
|
-
def
|
99
|
-
|
248
|
+
# Returns the munged form of the address, like "ma*****"
|
249
|
+
def munge
|
250
|
+
self.to_s.sub(/\A(.{1,2}).*/) { |m| $1 + @config[:munge_string] }
|
100
251
|
end
|
101
252
|
|
102
253
|
# Mailbox with trailing numbers removed
|
103
254
|
def root_name
|
104
|
-
|
255
|
+
self.mailbox =~ /\A(.+?)\d+\z/ ? $1 : self.mailbox
|
256
|
+
end
|
257
|
+
|
258
|
+
############################################################################
|
259
|
+
# Validations
|
260
|
+
############################################################################
|
261
|
+
|
262
|
+
# True if the part is valid according to the configurations
|
263
|
+
def valid?(format=@config[:local_format]||:conventional)
|
264
|
+
if @config[:mailbox_validator].is_a?(Proc)
|
265
|
+
@config[:mailbox_validator].call(self.mailbox, self.tag)
|
266
|
+
elsif format.is_a?(Proc)
|
267
|
+
format.call(self)
|
268
|
+
elsif format == :conventional
|
269
|
+
self.conventional?
|
270
|
+
elsif format == :relaxed
|
271
|
+
self.relaxed?
|
272
|
+
elsif format == :redacted
|
273
|
+
self.redacted?
|
274
|
+
elsif format == :standard
|
275
|
+
self.standard?
|
276
|
+
elsif format == :none
|
277
|
+
true
|
278
|
+
else
|
279
|
+
raise "Unknown format #{format}"
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
# Returns the format of the address
|
284
|
+
def format?
|
285
|
+
# if :custom
|
286
|
+
if self.conventional?
|
287
|
+
:conventional
|
288
|
+
elsif self.relaxed?
|
289
|
+
:relax
|
290
|
+
elsif self.redacted?
|
291
|
+
:redacted
|
292
|
+
elsif self.standard?
|
293
|
+
:standard
|
294
|
+
else
|
295
|
+
:invalid
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
def valid_size?
|
300
|
+
return false if @config[:local_size] && !@config[:local_size].include?(self.local.size)
|
301
|
+
return false if @config[:mailbox_size] && !@config[:mailbox_size].include?(self.mailbox.size)
|
302
|
+
return false if self.local.size > STANDARD_MAX_SIZE
|
303
|
+
true
|
304
|
+
end
|
305
|
+
|
306
|
+
def valid_encoding?(enc=@config[:local_encoding]||:ascii)
|
307
|
+
return false if enc == :ascii && self.unicode?
|
308
|
+
return false if enc == :unicode && self.ascii?
|
309
|
+
true
|
310
|
+
end
|
311
|
+
|
312
|
+
# True if the part matches the conventional format
|
313
|
+
def conventional?
|
314
|
+
self.syntax = :invalid
|
315
|
+
self.local =~ CONVENTIONAL_MAILBOX_REGEX or return false
|
316
|
+
self.valid_size? or return false
|
317
|
+
self.valid_encoding? or return false
|
318
|
+
self.syntax = :conventional
|
319
|
+
true
|
320
|
+
end
|
321
|
+
|
322
|
+
# Relaxed conventional is not so strict about character order.
|
323
|
+
def relaxed?
|
324
|
+
self.syntax = :invalid
|
325
|
+
self.valid_size? or return false
|
326
|
+
self.valid_encoding? or return false
|
327
|
+
if self.local =~ RELAXED_MAILBOX_REGEX
|
328
|
+
self.syntax = :relaxed
|
329
|
+
true
|
330
|
+
else
|
331
|
+
false
|
332
|
+
end
|
333
|
+
end
|
334
|
+
|
335
|
+
# True if the part matches the RFC standard format
|
336
|
+
def standard?
|
337
|
+
self.syntax = :invalid
|
338
|
+
self.valid_size? or return false
|
339
|
+
self.valid_encoding? or return false
|
340
|
+
if self.local =~ STANDARD_LOCAL_REGEX
|
341
|
+
self.syntax = :standard
|
342
|
+
true
|
343
|
+
else
|
344
|
+
false
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
# Matches configured formated form against File glob strings given.
|
349
|
+
# Rules must end in @ to distinguish themselves from other email part matches.
|
350
|
+
def matches?(*rules)
|
351
|
+
rules.flatten.each do |r|
|
352
|
+
if r =~ /(.+)@\z/
|
353
|
+
return r if File.fnmatch?($1, self.local)
|
354
|
+
end
|
355
|
+
end
|
356
|
+
false
|
105
357
|
end
|
106
358
|
end
|
107
359
|
end
|