mail_address 1.1.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -7
- data/lib/mail_address.rb +1 -0
- data/lib/mail_address/mail_address.rb +1 -2
- data/lib/mail_address/simple_parser.rb +133 -0
- data/lib/mail_address/version.rb +1 -1
- data/spec/mail_address_spec.rb +2 -2
- data/spec/simple_parser_spec.rb +86 -0
- metadata +5 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 55a1aa4aa12065d46469e8c35b6041f37ee42cc1
|
|
4
|
+
data.tar.gz: b797ab4c2a23d3ce801beaf16172ed0b0ff4280b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: db93d3bf8aa49be0e99c1932ab0a6b2e226078e2768d97ec542aaa118c5dc65ec203fd9ed781ec17843390fcd78c2968501b6b61cd377953df9b6a079f685f4c
|
|
7
|
+
data.tar.gz: dfba207c8827f687e68401236ae672350d669f5734db8ff4a119bec122f1f0d7f820f5c6ae8d99927ac73b3d893a785009ba384404c59fbf81a3d1093572bcff
|
data/README.md
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
# MailAddress [](https://travis-ci.org/kizashi1122/mail_address) [](https://coveralls.io/r/kizashi1122/mail_address)
|
|
2
2
|
|
|
3
|
-
MailAddress is a
|
|
4
|
-
This library is implemented based on Perl Module Mail::Address and added some improvements.
|
|
3
|
+
MailAddress is a port of Mail::Address from Perl.
|
|
5
4
|
|
|
6
|
-
[mail](https://github.com/mikel/mail) is a great gem library. But some email addresses
|
|
5
|
+
[mail](https://github.com/mikel/mail) is a great gem library. But some email addresses are unparsable with it. In perl, [Mail::Address](http://search.cpan.org/~markov/MailTools-2.14/lib/Mail/Address.pod) is a very common library to parse an email address. Mail::Address conviniently can parse even NOT RFC-compliant email addresses such as:
|
|
7
6
|
|
|
8
7
|
```rb
|
|
9
8
|
# mail gem cannot parse the following addresses
|
|
10
9
|
Ello [Do Not Reply] <do-not-reply@ello.co> # [, ] are not permitted according to RFC5322
|
|
11
|
-
大阪 太郎<osaka@example.com> # no whitespace just before
|
|
10
|
+
大阪 太郎<osaka@example.com> # no whitespace just before `<`
|
|
12
11
|
```
|
|
13
|
-
|
|
14
|
-
So I straightforwardly converted Perl module Mail::Address to Ruby gem. Then I reviced it because original Mail::Address also has some bad points. For example:
|
|
12
|
+
But Mail::Address has some bad points (below). These are fixed in MailAddress.
|
|
15
13
|
|
|
16
14
|
- if no ending parenthesis in name part, cannot parse correctly.
|
|
17
15
|
- Modifications of name part are too much.
|
|
18
16
|
|
|
17
|
+
However, MailAddress module cannnot parse whitespace-separated addresses.
|
|
18
|
+
Many people could paste email addresses from Excel or the other spreadsheets. To accomplish this, also ported from a parser part of [Google Closure Library](https://github.com/google/closure-library/blob/master/closure/goog/format/emailaddress.js).
|
|
19
19
|
|
|
20
20
|
## Installation
|
|
21
21
|
|
|
@@ -60,7 +60,7 @@ p addrs[1].host # "example.jp"
|
|
|
60
60
|
p addrs[1].user # "osaka"
|
|
61
61
|
p addrs[1].original # "大阪 太郎 <osaka@example.jp>"
|
|
62
62
|
```
|
|
63
|
-
`address.name` and `address.phrase` are almost same.
|
|
63
|
+
`address.name` and `address.phrase` are almost same.
|
|
64
64
|
`address.phrase` keeps outermost double quotes or parentheses.
|
|
65
65
|
|
|
66
66
|
## Contributing
|
data/lib/mail_address.rb
CHANGED
|
@@ -73,13 +73,12 @@ module MailAddress
|
|
|
73
73
|
|
|
74
74
|
def self._tokenize(addresses)
|
|
75
75
|
line = addresses.join(',') # $_
|
|
76
|
-
words
|
|
76
|
+
words = []
|
|
77
77
|
|
|
78
78
|
line.sub!(/\A\s+/, '')
|
|
79
79
|
line.gsub!(/[\r\n]+/,' ')
|
|
80
80
|
|
|
81
81
|
while (line != '')
|
|
82
|
-
field = ''
|
|
83
82
|
tmp = nil
|
|
84
83
|
if (
|
|
85
84
|
line.sub!(/\A("(?:[^"\\]+|\\.)*")(\s*)/, '') || # "..."
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
module MailAddress
|
|
2
|
+
|
|
3
|
+
# --------------------------------------------------------------------------------------------------
|
|
4
|
+
# This module is ported from Google Closure JavaScript Library
|
|
5
|
+
# -> https://github.com/google/closure-library/blob/master/closure/goog/format/emailaddress.js
|
|
6
|
+
# --------------------------------------------------------------------------------------------------
|
|
7
|
+
|
|
8
|
+
OPENERS_ = '"<(['
|
|
9
|
+
CLOSERS_ = '">)]'
|
|
10
|
+
# SPECIAL_CHARS = '()<>@:\\\".[]'
|
|
11
|
+
ADDRESS_SEPARATORS_ = ',;'
|
|
12
|
+
# CHARS_REQUIRE_QUOTES_ = SPECIAL_CHARS + ADDRESS_SEPARATORS_
|
|
13
|
+
ESCAPED_DOUBLE_QUOTES_ = /\\\"/
|
|
14
|
+
ESCAPED_BACKSLASHES_ = /\\\\/
|
|
15
|
+
LOCAL_PART_REGEXP_STR_ = '[+a-zA-Z0-9_.!#$%&\'*\\/=?^`{|}~-]+'
|
|
16
|
+
DOMAIN_PART_REGEXP_STR_ = '([a-zA-Z0-9-]+\\.)+[a-zA-Z0-9]{2,63}'
|
|
17
|
+
EMAIL_ADDRESS_ = Regexp.new('\\A' + LOCAL_PART_REGEXP_STR_ + '@' + DOMAIN_PART_REGEXP_STR_ + '\\z')
|
|
18
|
+
|
|
19
|
+
def self.parse_simple(str)
|
|
20
|
+
result = []
|
|
21
|
+
email = token = ''
|
|
22
|
+
|
|
23
|
+
# Remove non-UNIX-style newlines that would otherwise cause getToken_ to
|
|
24
|
+
# choke. Remove multiple consecutive whitespace characters for the same
|
|
25
|
+
# reason.
|
|
26
|
+
str = self.collapse_whitespace(str)
|
|
27
|
+
i = 0
|
|
28
|
+
while (i < str.length)
|
|
29
|
+
token = get_token(str, i)
|
|
30
|
+
if self.is_address_separator(token) || (token == ' ' && self.is_valid(self.parse_internal(email)))
|
|
31
|
+
if !self.is_empty_or_whitespace(email)
|
|
32
|
+
result.push(self.parse_internal(email))
|
|
33
|
+
end
|
|
34
|
+
email = ''
|
|
35
|
+
i += 1
|
|
36
|
+
next
|
|
37
|
+
end
|
|
38
|
+
email << token
|
|
39
|
+
i += token.length
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Add the final token.
|
|
43
|
+
if (!self.is_empty_or_whitespace(email))
|
|
44
|
+
result.push(self.parse_internal(email))
|
|
45
|
+
end
|
|
46
|
+
return result
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def self.parse_internal(addr)
|
|
50
|
+
name = ''
|
|
51
|
+
address = ''
|
|
52
|
+
i = 0
|
|
53
|
+
while (i < addr.length)
|
|
54
|
+
token = get_token(addr, i)
|
|
55
|
+
if (token[0] == '<' && token.index('>'))
|
|
56
|
+
end_i = token.index('>')
|
|
57
|
+
address = token[1, end_i - 1]
|
|
58
|
+
elsif (address == '')
|
|
59
|
+
name << token
|
|
60
|
+
end
|
|
61
|
+
i += token.length
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Check if it's a simple email address of the form "jlim@google.com".
|
|
65
|
+
if (address == '' && name.index('@'))
|
|
66
|
+
address = name
|
|
67
|
+
name = ''
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
name = self.collapse_whitespace(name)
|
|
71
|
+
name = name[1 .. -2] if name.start_with?('\'') && name.end_with?('\'')
|
|
72
|
+
name = name[1 .. -2] if name.start_with?('"') && name.end_with?('"')
|
|
73
|
+
|
|
74
|
+
# Replace escaped quotes and slashes.
|
|
75
|
+
name = name.gsub(ESCAPED_DOUBLE_QUOTES_, '"')
|
|
76
|
+
name = name.gsub(ESCAPED_BACKSLASHES_, '\\')
|
|
77
|
+
|
|
78
|
+
#address = goog.string.collapseWhitespace(address);
|
|
79
|
+
address.strip!
|
|
80
|
+
MailAddress::Address.new(name, address, addr)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def self.get_token(str, pos)
|
|
84
|
+
ch = str[pos]
|
|
85
|
+
p = OPENERS_.index(ch)
|
|
86
|
+
return ch unless p
|
|
87
|
+
|
|
88
|
+
if (self.is_escaped_dbl_quote(str, pos))
|
|
89
|
+
# If an opener is an escaped quote we do not treat it as a real opener
|
|
90
|
+
# and keep accumulating the token.
|
|
91
|
+
return ch
|
|
92
|
+
end
|
|
93
|
+
closer_char = CLOSERS_[p]
|
|
94
|
+
end_pos = str.index(closer_char, pos + 1)
|
|
95
|
+
|
|
96
|
+
# If the closer is a quote we go forward skipping escaped quotes until we
|
|
97
|
+
# hit the real closing one.
|
|
98
|
+
while (end_pos >= 0 && self.is_escaped_dbl_quote(str, end_pos))
|
|
99
|
+
end_pos = str.index(closer_char, end_pos + 1)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
token = (end_pos >= 0) ? str[pos .. end_pos] : ch
|
|
103
|
+
return token
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def self.is_escaped_dbl_quote(str, pos)
|
|
107
|
+
return false if str[pos] != '"'
|
|
108
|
+
slash_count = 0
|
|
109
|
+
|
|
110
|
+
for idx in (pos - 1).downto(0)
|
|
111
|
+
break unless str[idx] == '\\'
|
|
112
|
+
slash_count += 1
|
|
113
|
+
end
|
|
114
|
+
(slash_count % 2) != 0
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def self.collapse_whitespace(str)
|
|
118
|
+
str.gsub(/[\s\xc2\xa0]+/, ' ').strip
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def self.is_empty_or_whitespace(str)
|
|
122
|
+
/\A[\s\xc2\xa0]*\z/ =~ str
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def self.is_address_separator(ch)
|
|
126
|
+
ADDRESS_SEPARATORS_.include? ch
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def self.is_valid(address)
|
|
130
|
+
EMAIL_ADDRESS_ =~ address.address
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
end
|
data/lib/mail_address/version.rb
CHANGED
data/spec/mail_address_spec.rb
CHANGED
|
@@ -445,12 +445,12 @@ describe MailAddress do
|
|
|
445
445
|
line = 'john <john@example.com' # lack of right angle bracket
|
|
446
446
|
expect {
|
|
447
447
|
results = MailAddress.parse(line)
|
|
448
|
-
}.to raise_error(StandardError)
|
|
448
|
+
}.to raise_error(StandardError, "Unmatched '<>' in line")
|
|
449
449
|
|
|
450
450
|
line = 'john <john@example.com> (last' # lack of right parenthesis
|
|
451
451
|
expect {
|
|
452
452
|
results = MailAddress.parse(line)
|
|
453
|
-
}.to raise_error(StandardError)
|
|
453
|
+
}.to raise_error(StandardError, "cannot find end paren")
|
|
454
454
|
end
|
|
455
455
|
|
|
456
456
|
it "unbelievable but existed address" do
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
require 'spec_helper'
|
|
3
|
+
require 'pp'
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
# These tests are almost ports of the following test code:
|
|
7
|
+
#
|
|
8
|
+
# https://github.com/google/closure-library/blob/master/closure/goog/format/emailaddress_test.js
|
|
9
|
+
#
|
|
10
|
+
|
|
11
|
+
describe MailAddress do
|
|
12
|
+
|
|
13
|
+
def assert_parsed_list(input, expected_list, opt_message = nil)
|
|
14
|
+
result = MailAddress.parse_simple input
|
|
15
|
+
expect(result.size).to eq(expected_list.size)
|
|
16
|
+
expected_list.each_with_index do |expected, index|
|
|
17
|
+
expect(result[index].address).to eq(expected)
|
|
18
|
+
end
|
|
19
|
+
result
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it "simple parser - empty address" do
|
|
23
|
+
assert_parsed_list( '', [] )
|
|
24
|
+
assert_parsed_list( ',,', [] )
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it "simple parser - single address" do
|
|
28
|
+
assert_parsed_list( '<foo@gmail.com>', ['foo@gmail.com'] )
|
|
29
|
+
assert_parsed_list( '<foo@gmail.com>,', ['foo@gmail.com'] )
|
|
30
|
+
assert_parsed_list( '<foo@gmail.com>, ', ['foo@gmail.com'] )
|
|
31
|
+
assert_parsed_list( ',<foo@gmail.com>', ['foo@gmail.com'] )
|
|
32
|
+
assert_parsed_list( ' ,<foo@gmail.com>', ['foo@gmail.com'] )
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it "simple parser - single address" do
|
|
36
|
+
assert_parsed_list( '<foo@gmail.com>, <bar@gmail.com>', ['foo@gmail.com', 'bar@gmail.com'] )
|
|
37
|
+
assert_parsed_list( '<foo@gmail.com>, <bar@gmail.com>,', ['foo@gmail.com', 'bar@gmail.com'] )
|
|
38
|
+
assert_parsed_list( '<foo@gmail.com>, <bar@gmail.com>, ', ['foo@gmail.com', 'bar@gmail.com'] )
|
|
39
|
+
assert_parsed_list(
|
|
40
|
+
'John Doe <john@gmail.com>; Jane Doe <jane@gmail.com>, <jerry@gmail.com>',
|
|
41
|
+
['john@gmail.com', 'jane@gmail.com', 'jerry@gmail.com']
|
|
42
|
+
)
|
|
43
|
+
assert_parsed_list(
|
|
44
|
+
'aaa@gmail.com, "bbb@gmail.com", <ccc@gmail.com>, (ddd@gmail.com), [eee@gmail.com]',
|
|
45
|
+
['aaa@gmail.com', nil, 'ccc@gmail.com', nil, nil],
|
|
46
|
+
)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it "testparseListWithQuotedSpecialChars" do
|
|
50
|
+
res = assert_parsed_list(
|
|
51
|
+
'a\\"b\\"c <d@e.f>,"g\\"h\\"i\\\\" <j@k.l>',
|
|
52
|
+
['d@e.f', 'j@k.l']
|
|
53
|
+
)
|
|
54
|
+
expect(res[0].phrase).to eq('a"b"c')
|
|
55
|
+
expect(res[1].phrase).to eq('g"h"i\\')
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
it "testparseListWithCommaInLocalPart" do
|
|
59
|
+
res = assert_parsed_list(
|
|
60
|
+
'"Doe, John" <doe.john@gmail.com>, <someone@gmail.com>',
|
|
61
|
+
['doe.john@gmail.com', 'someone@gmail.com'])
|
|
62
|
+
expect(res[0].phrase).to eq('Doe, John')
|
|
63
|
+
expect(res[1].phrase).to eq('')
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
it "testparseListWithWhitespaceSeparatedEmails" do
|
|
67
|
+
res = assert_parsed_list(
|
|
68
|
+
'a@b.com <c@d.com> e@f.com "G H" <g@h.com> i@j.com',
|
|
69
|
+
['a@b.com', 'c@d.com', 'e@f.com', 'g@h.com', 'i@j.com']);
|
|
70
|
+
expect(res[3].phrase).to eq('G H')
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
it "testparseListSystemNewlines" do
|
|
74
|
+
# These Windows newlines can be inserted in IE8, or copied-and-pasted from
|
|
75
|
+
# bad data on a Mac, as seen in bug 11081852.
|
|
76
|
+
assert_parsed_list("a@b.com\r\nc@d.com", ['a@b.com', 'c@d.com'],
|
|
77
|
+
'Failed to parse Windows newlines');
|
|
78
|
+
assert_parsed_list("a@b.com\nc@d.com", ['a@b.com', 'c@d.com'],
|
|
79
|
+
'Failed to parse *nix newlines');
|
|
80
|
+
assert_parsed_list("a@b.com\n\rc@d.com", ['a@b.com', 'c@d.com'],
|
|
81
|
+
'Failed to parse obsolete newlines');
|
|
82
|
+
assert_parsed_list("a@b.com\rc@d.com", ['a@b.com', 'c@d.com'],
|
|
83
|
+
'Failed to parse pre-OS X Mac newlines');
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: mail_address
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Kizashi Nagata
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-
|
|
11
|
+
date: 2015-05-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -90,9 +90,11 @@ files:
|
|
|
90
90
|
- lib/mail_address.rb
|
|
91
91
|
- lib/mail_address/address.rb
|
|
92
92
|
- lib/mail_address/mail_address.rb
|
|
93
|
+
- lib/mail_address/simple_parser.rb
|
|
93
94
|
- lib/mail_address/version.rb
|
|
94
95
|
- mail_address.gemspec
|
|
95
96
|
- spec/mail_address_spec.rb
|
|
97
|
+
- spec/simple_parser_spec.rb
|
|
96
98
|
- spec/spec_helper.rb
|
|
97
99
|
homepage: https://github.com/kizashi1122/mail_address
|
|
98
100
|
licenses:
|
|
@@ -120,4 +122,5 @@ specification_version: 4
|
|
|
120
122
|
summary: Simple Mail Address Parser
|
|
121
123
|
test_files:
|
|
122
124
|
- spec/mail_address_spec.rb
|
|
125
|
+
- spec/simple_parser_spec.rb
|
|
123
126
|
- spec/spec_helper.rb
|