mailparser 0.5.3 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +6 -6
- data/lib/mailparser.rb +2 -0
- data/lib/mailparser/loose.rb +29 -19
- data/test/test_loose.rb +100 -2
- data/test/test_mailparser.rb +9 -0
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
5
|
-
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: acf77838874db7d6c17b1e85420b5027eaea07c5
|
4
|
+
data.tar.gz: b67ae4ea3374d66de3cb11f40a5fe25074e682d6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 790f81bfd597d4b209efb81a29ffa6356484ab5b8570140b16dac2b0e33982269df2a0fa11f16362c06e198c759eef8b087d17a3a50e2967ef4daea6710d9a97
|
7
|
+
data.tar.gz: 2f2502e6975c84472da8675c4c42d65056e1ec4f3845a2ebe8a5477a3162f4ecaa0f43c37b729da9c189ca5dd12609704bac7c2dba002d095cabe2266ca931ff
|
data/lib/mailparser.rb
CHANGED
@@ -81,6 +81,8 @@ module MailParser
|
|
81
81
|
r = @raw.chomp.gsub(/\r?\n/, '').gsub(/\t/, ' ')
|
82
82
|
if @opt[:decode_mime_header] then
|
83
83
|
@parsed = RFC2047.decode(r, @opt)
|
84
|
+
elsif @opt[:output_charset]
|
85
|
+
@parsed = @opt[:charset_converter].call(@opt[:output_charset], @opt[:output_charset], r)
|
84
86
|
else
|
85
87
|
@parsed = r
|
86
88
|
end
|
data/lib/mailparser/loose.rb
CHANGED
@@ -67,7 +67,8 @@ module MailParser
|
|
67
67
|
rescue
|
68
68
|
t = Time.now
|
69
69
|
end
|
70
|
-
|
70
|
+
year = t.year > 9999 ? 9999 : t.year
|
71
|
+
return RFC2822::DateTime.new(year, t.month, t.day, t.hour, t.min, t.sec, t.zone)
|
71
72
|
end
|
72
73
|
|
73
74
|
# parse From, To,Cc field
|
@@ -91,7 +92,7 @@ module MailParser
|
|
91
92
|
# @param [Hash] opt options
|
92
93
|
# @return [MailParser::RFC2822::MsgId]
|
93
94
|
def parse_msg_id(str, opt={})
|
94
|
-
msg_id_list(str)[0]
|
95
|
+
msg_id_list(str, opt)[0]
|
95
96
|
end
|
96
97
|
|
97
98
|
# parse In-Reply-To, References field
|
@@ -99,7 +100,7 @@ module MailParser
|
|
99
100
|
# @param [Hash] opt options
|
100
101
|
# @return [MailParser::RFC2822::MsgIdList]
|
101
102
|
def parse_msg_id_list(str, opt={})
|
102
|
-
msg_id_list(str)
|
103
|
+
msg_id_list(str, opt)
|
103
104
|
end
|
104
105
|
|
105
106
|
# parse Keywords field
|
@@ -112,7 +113,7 @@ module MailParser
|
|
112
113
|
if opt[:decode_mime_header] then
|
113
114
|
s.map!{|i| RFC2047.decode(i, opt)}
|
114
115
|
end
|
115
|
-
s
|
116
|
+
s.map{|_| _conv(_, opt)}
|
116
117
|
end
|
117
118
|
|
118
119
|
# parse Return-Path field
|
@@ -145,6 +146,9 @@ module MailParser
|
|
145
146
|
i += 1
|
146
147
|
end
|
147
148
|
end
|
149
|
+
name_val.keys.each do |k|
|
150
|
+
name_val[k] = _conv(name_val[k], opt)
|
151
|
+
end
|
148
152
|
RFC2822::Received.new(name_val, date)
|
149
153
|
end
|
150
154
|
|
@@ -158,13 +162,13 @@ module MailParser
|
|
158
162
|
params = {}
|
159
163
|
token.each do |param|
|
160
164
|
pn, pv = param.join.split(/=/, 2)
|
161
|
-
params[pn.to_s] = pv.to_s.gsub(/\A"|"\z/,"")
|
165
|
+
params[pn.to_s] = _conv(pv.to_s.gsub(/\A"|"\z/,""), opt)
|
162
166
|
end
|
163
167
|
type = "text" if type.nil? or type.empty?
|
164
168
|
if subtype.nil? or subtype.empty?
|
165
169
|
subtype = type == "text" ? "plain" : ""
|
166
170
|
end
|
167
|
-
RFC2045::ContentType.new(type, subtype, params)
|
171
|
+
RFC2045::ContentType.new(_conv(type, opt), _conv(subtype, opt), params)
|
168
172
|
end
|
169
173
|
|
170
174
|
# parse Content-Transfer-Encoding field
|
@@ -172,7 +176,7 @@ module MailParser
|
|
172
176
|
# @param [Hash] opt options
|
173
177
|
# @return [MailParser::RFC2045::ContentTransferEncoding]
|
174
178
|
def parse_content_transfer_encoding(str, opt={})
|
175
|
-
RFC2045::ContentTransferEncoding.new(Tokenizer.token(str).first.to_s)
|
179
|
+
RFC2045::ContentTransferEncoding.new(_conv(Tokenizer.token(str).first.to_s, opt))
|
176
180
|
end
|
177
181
|
|
178
182
|
# parse Mime-Version field
|
@@ -180,7 +184,7 @@ module MailParser
|
|
180
184
|
# @param [Hash] opt options
|
181
185
|
# @return [String]
|
182
186
|
def parse_mime_version(str, opt={})
|
183
|
-
Tokenizer.token(str).join
|
187
|
+
_conv(Tokenizer.token(str).join, opt)
|
184
188
|
end
|
185
189
|
|
186
190
|
# parse Content-Disposition field
|
@@ -193,9 +197,9 @@ module MailParser
|
|
193
197
|
params = {}
|
194
198
|
token.each do |param|
|
195
199
|
pn, pv = param.join.split(/=/, 2)
|
196
|
-
params[pn.to_s] = pv.to_s.gsub(/\A"|"\z/,"")
|
200
|
+
params[pn.to_s] = _conv(pv.to_s.gsub(/\A"|"\z/,""), opt)
|
197
201
|
end
|
198
|
-
RFC2183::ContentDisposition.new(type, params)
|
202
|
+
RFC2183::ContentDisposition.new(_conv(type, opt), params)
|
199
203
|
end
|
200
204
|
|
201
205
|
# split arry by delim
|
@@ -229,11 +233,11 @@ module MailParser
|
|
229
233
|
if opt[:decode_mime_header] then
|
230
234
|
display_name = RFC2047.decode(display_name, opt)
|
231
235
|
end
|
232
|
-
mailaddr = m[a1+1..a2-1].join
|
236
|
+
mailaddr = _conv(m[a1+1..a2-1].join, opt)
|
233
237
|
local_part, domain = mailaddr.split(/@/, 2)
|
234
|
-
ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain), display_name)
|
238
|
+
ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain), _conv(display_name, opt))
|
235
239
|
else
|
236
|
-
local_part, domain = m.join.split(/@/, 2)
|
240
|
+
local_part, domain = _conv(m.join, opt).split(/@/, 2)
|
237
241
|
ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain))
|
238
242
|
end
|
239
243
|
end
|
@@ -243,7 +247,7 @@ module MailParser
|
|
243
247
|
# parse MsgId type field
|
244
248
|
# @param [String] str
|
245
249
|
# @return [Array<MailParser::RFC2822::MsgId>]
|
246
|
-
def msg_id_list(str)
|
250
|
+
def msg_id_list(str, opt={})
|
247
251
|
ret = []
|
248
252
|
flag = false
|
249
253
|
msgid = nil
|
@@ -257,14 +261,14 @@ module MailParser
|
|
257
261
|
when ">"
|
258
262
|
if flag
|
259
263
|
flag = false
|
260
|
-
ret << RFC2822::MsgId.new(msgid)
|
264
|
+
ret << RFC2822::MsgId.new(_conv(msgid, opt))
|
261
265
|
end
|
262
266
|
else
|
263
267
|
msgid << m if flag
|
264
268
|
end
|
265
269
|
end
|
266
270
|
if ret.empty?
|
267
|
-
ret = str.split.map{|s| RFC2822::MsgId.new(s)}
|
271
|
+
ret = str.split.map{|s| RFC2822::MsgId.new(_conv(s, opt))}
|
268
272
|
end
|
269
273
|
return ret
|
270
274
|
end
|
@@ -291,9 +295,9 @@ module MailParser
|
|
291
295
|
@ss.pos = pos
|
292
296
|
token << s
|
293
297
|
end
|
294
|
-
elsif s = @ss.scan(/\"(\s*(\\[#{TEXT_RE}]|[#{QTEXT_RE}]))*\s*\"/o) ||
|
295
|
-
@ss.scan(/\[(\s*(\\[#{TEXT_RE}]|[#{DTEXT_RE}]))*\s*\]/o) ||
|
296
|
-
@ss.scan(/[#{ATEXT_RE}]+/o)
|
298
|
+
elsif s = @ss.scan(/\"(\s*(\\[#{TEXT_RE}]|[#{QTEXT_RE}\x80-\xff]))*\s*\"/o) ||
|
299
|
+
@ss.scan(/\[(\s*(\\[#{TEXT_RE}]|[#{DTEXT_RE}\x80-\xff]))*\s*\]/o) ||
|
300
|
+
@ss.scan(/[#{ATEXT_RE}\x80-\xff]+/o)
|
297
301
|
token << s
|
298
302
|
else
|
299
303
|
token << @ss.scan(/./)
|
@@ -337,5 +341,11 @@ module MailParser
|
|
337
341
|
end
|
338
342
|
end
|
339
343
|
|
344
|
+
def _conv(str, opt)
|
345
|
+
cv = opt[:charset_converter]
|
346
|
+
cs = opt[:output_charset]
|
347
|
+
cv && cs ? cv.call(cs, cs, str) : str
|
348
|
+
end
|
349
|
+
|
340
350
|
end
|
341
351
|
end
|
data/test/test_loose.rb
CHANGED
@@ -31,6 +31,23 @@ class TC_Loose < Test::Unit::TestCase
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
+
def test_parse_date_y10000()
|
35
|
+
tzbak = ENV["TZ"]
|
36
|
+
begin
|
37
|
+
ENV["TZ"] = "GMT"
|
38
|
+
d = parse_date("Wed, 10 Jan 10000 12:53:55 +0900")
|
39
|
+
assert_equal(9999, d.year)
|
40
|
+
assert_equal(1, d.month)
|
41
|
+
assert_equal(10, d.day)
|
42
|
+
assert_equal(3, d.hour)
|
43
|
+
assert_equal(53, d.min)
|
44
|
+
assert_equal(55, d.sec)
|
45
|
+
assert_equal("+0000", d.zone)
|
46
|
+
ensure
|
47
|
+
ENV["TZ"] = tzbak
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
34
51
|
def test_parse_phrase_list()
|
35
52
|
p = parse_phrase_list("abc def, ghi jkl")
|
36
53
|
assert_equal(2, p.size)
|
@@ -52,6 +69,15 @@ class TC_Loose < Test::Unit::TestCase
|
|
52
69
|
assert_equal("GHI JKL", p[1])
|
53
70
|
end
|
54
71
|
|
72
|
+
if String.method_defined? :force_encoding
|
73
|
+
def test_parse_phrase_list_output_charset_with_raw_utf8
|
74
|
+
p = parse_phrase_list("あいう, えお", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
|
75
|
+
assert_equal(2, p.size)
|
76
|
+
assert_equal("あいう".force_encoding("utf-8"), p[0])
|
77
|
+
assert_equal("えお".force_encoding("utf-8"), p[1])
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
55
81
|
def test_parse_received()
|
56
82
|
tzbak = ENV["TZ"]
|
57
83
|
begin
|
@@ -150,6 +176,28 @@ class TC_Loose < Test::Unit::TestCase
|
|
150
176
|
end
|
151
177
|
end
|
152
178
|
|
179
|
+
if String.method_defined? :force_encoding
|
180
|
+
def test_parse_received_output_charset_with_raw_utf8
|
181
|
+
tzbak = ENV["TZ"]
|
182
|
+
begin
|
183
|
+
ENV["TZ"] = "GMT"
|
184
|
+
r = parse_received("from ほげ by ふが for ぴよ; Wed, 10 Jan 2007 12:09:55 +0900", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
|
185
|
+
assert_equal(2007, r.date_time.year)
|
186
|
+
assert_equal(1, r.date_time.month)
|
187
|
+
assert_equal(10, r.date_time.day)
|
188
|
+
assert_equal(3, r.date_time.hour)
|
189
|
+
assert_equal(9, r.date_time.min)
|
190
|
+
assert_equal(55, r.date_time.sec)
|
191
|
+
assert_equal("+0000", r.date_time.zone)
|
192
|
+
assert_equal("ほげ".force_encoding("utf-8"), r.name_val["from"])
|
193
|
+
assert_equal("ふが".force_encoding("utf-8"), r.name_val["by"])
|
194
|
+
assert_equal("ぴよ".force_encoding("utf-8"), r.name_val["for"])
|
195
|
+
ensure
|
196
|
+
ENV["TZ"] = tzbak
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
153
201
|
def test_parse_content_type()
|
154
202
|
ct = parse_content_type("text/plain; charset=iso-2022-jp")
|
155
203
|
assert_equal("text", ct.type)
|
@@ -190,6 +238,15 @@ class TC_Loose < Test::Unit::TestCase
|
|
190
238
|
assert_equal("", ct.subtype)
|
191
239
|
end
|
192
240
|
|
241
|
+
if String.method_defined? :force_encoding
|
242
|
+
def test_parse_content_type_output_charset_with_raw_utf8
|
243
|
+
ct = parse_content_type("text/plain; name=ほげ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
|
244
|
+
assert_equal("text", ct.type)
|
245
|
+
assert_equal("plain", ct.subtype)
|
246
|
+
assert_equal({"name"=>"ほげ".force_encoding("utf-8")}, ct.params)
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
193
250
|
def test_parse_content_transfer_encoding
|
194
251
|
cte = parse_content_transfer_encoding("7BIT")
|
195
252
|
assert_equal "7bit", cte.mechanism
|
@@ -200,6 +257,13 @@ class TC_Loose < Test::Unit::TestCase
|
|
200
257
|
assert_equal "", cte.mechanism
|
201
258
|
end
|
202
259
|
|
260
|
+
if String.method_defined? :force_encoding
|
261
|
+
def test_parse_content_transfer_encoding_output_charset_with_raw_utf8
|
262
|
+
cte = parse_content_transfer_encoding("あいう", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
|
263
|
+
assert_equal "あいう".force_encoding("utf-8"), cte.mechanism
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
203
267
|
def test_parse_mime_version
|
204
268
|
assert_equal "1.0", parse_mime_version("1.0")
|
205
269
|
assert_equal "1.0", parse_mime_version("1 . 0")
|
@@ -210,6 +274,12 @@ class TC_Loose < Test::Unit::TestCase
|
|
210
274
|
assert_equal "", parse_mime_version("")
|
211
275
|
end
|
212
276
|
|
277
|
+
if String.method_defined? :force_encoding
|
278
|
+
def test_parse_mime_version_output_charset_with_raw_utf8
|
279
|
+
assert_equal "ほげ".force_encoding("utf-8"), parse_mime_version("ほげ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
213
283
|
def test_parse_content_disposition()
|
214
284
|
c = parse_content_disposition("attachment; filename=hoge.txt")
|
215
285
|
assert_equal("attachment", c.type)
|
@@ -227,6 +297,14 @@ class TC_Loose < Test::Unit::TestCase
|
|
227
297
|
assert_equal "", c.type
|
228
298
|
end
|
229
299
|
|
300
|
+
if String.method_defined? :force_encoding
|
301
|
+
def test_parse_content_disposition_output_charset_with_raw_utf8
|
302
|
+
c = parse_content_disposition("attachment; filename=ほげ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
|
303
|
+
assert_equal("attachment", c.type)
|
304
|
+
assert_equal({"filename"=>"ほげ".force_encoding("utf-8")}, c.params)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
230
308
|
def test_parse_other_header
|
231
309
|
s = parse("subject", "=?euc-jp?q?=A4=A2=A4=A4?=")
|
232
310
|
assert_equal "=?euc-jp?q?=A4=A2=A4=A4?=", s
|
@@ -282,8 +360,21 @@ class TC_Loose < Test::Unit::TestCase
|
|
282
360
|
ml = mailbox_list("hoge =?us-ascii?q?hoge?= <hoge.hoge@example.com>", {:decode_mime_header=>true, :output_charset=>"us-ascii", :charset_converter=>proc{|_,_,s| s.upcase}})
|
283
361
|
assert_equal(1, ml.size)
|
284
362
|
assert_equal("HOGE HOGE", ml[0].phrase)
|
285
|
-
assert_equal("
|
286
|
-
assert_equal("
|
363
|
+
assert_equal("HOGE.HOGE", ml[0].addr_spec.local_part)
|
364
|
+
assert_equal("EXAMPLE.COM", ml[0].addr_spec.domain)
|
365
|
+
end
|
366
|
+
|
367
|
+
if String.method_defined? :force_encoding
|
368
|
+
def test_mailbox_list_output_charset_with_raw_utf8
|
369
|
+
ml = mailbox_list("ほげ <ほげ@ぴよ>, ふが@ぴよ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
|
370
|
+
assert_equal(2, ml.size)
|
371
|
+
assert_equal("ほげ".force_encoding("utf-8"), ml[0].phrase)
|
372
|
+
assert_equal("ほげ".force_encoding("utf-8"), ml[0].addr_spec.local_part)
|
373
|
+
assert_equal("ぴよ".force_encoding("utf-8"), ml[0].addr_spec.domain)
|
374
|
+
assert_equal("", ml[1].phrase)
|
375
|
+
assert_equal("ふが".force_encoding("utf-8"), ml[1].addr_spec.local_part)
|
376
|
+
assert_equal("ぴよ".force_encoding("utf-8"), ml[1].addr_spec.domain)
|
377
|
+
end
|
287
378
|
end
|
288
379
|
|
289
380
|
def test_msg_id_list_old_in_reply_to()
|
@@ -317,6 +408,13 @@ class TC_Loose < Test::Unit::TestCase
|
|
317
408
|
assert_equal m, []
|
318
409
|
end
|
319
410
|
|
411
|
+
if String.method_defined? :force_encoding
|
412
|
+
def test_msg_id_output_charset_with_raw_utf8
|
413
|
+
m = msg_id_list "<ほげ>", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8"
|
414
|
+
assert_equal "ほげ".force_encoding("utf-8"), m[0].msg_id
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
320
418
|
end
|
321
419
|
|
322
420
|
class TC_Loose_Tokenizer < Test::Unit::TestCase
|
data/test/test_mailparser.rb
CHANGED
@@ -344,6 +344,15 @@ EOS
|
|
344
344
|
assert_equal("abcdefg", m.subject)
|
345
345
|
end
|
346
346
|
|
347
|
+
def test_subject_raw_utf8_with_output_charset
|
348
|
+
msg = StringIO.new(<<EOS)
|
349
|
+
Subject: あいうえお
|
350
|
+
|
351
|
+
EOS
|
352
|
+
m = MailParser::Message.new(msg, :output_charset=>"utf-8")
|
353
|
+
assert_equal("あいうえお", m.subject)
|
354
|
+
end
|
355
|
+
|
347
356
|
def test_content_type()
|
348
357
|
msg = StringIO.new(<<EOS)
|
349
358
|
Content-Type: text/plain; charset=us-ascii
|
metadata
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mailparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TOMITA Masahiro
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-03-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mmapscanner
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - '>='
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - '>='
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
description: MailParser is a parser for mail message
|
@@ -64,17 +64,17 @@ require_paths:
|
|
64
64
|
- lib
|
65
65
|
required_ruby_version: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
|
-
- -
|
67
|
+
- - '>='
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
version: '0'
|
70
70
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
71
|
requirements:
|
72
|
-
- -
|
72
|
+
- - '>='
|
73
73
|
- !ruby/object:Gem::Version
|
74
74
|
version: '0'
|
75
75
|
requirements: []
|
76
76
|
rubyforge_project:
|
77
|
-
rubygems_version: 2.0.0
|
77
|
+
rubygems_version: 2.0.0
|
78
78
|
signing_key:
|
79
79
|
specification_version: 4
|
80
80
|
summary: Mail Parser
|