mailparser 0.5.3 → 0.5.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +6 -6
- data/lib/mailparser.rb +2 -0
- data/lib/mailparser/loose.rb +29 -19
- data/test/test_loose.rb +100 -2
- data/test/test_mailparser.rb +9 -0
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
5
|
-
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: acf77838874db7d6c17b1e85420b5027eaea07c5
|
4
|
+
data.tar.gz: b67ae4ea3374d66de3cb11f40a5fe25074e682d6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 790f81bfd597d4b209efb81a29ffa6356484ab5b8570140b16dac2b0e33982269df2a0fa11f16362c06e198c759eef8b087d17a3a50e2967ef4daea6710d9a97
|
7
|
+
data.tar.gz: 2f2502e6975c84472da8675c4c42d65056e1ec4f3845a2ebe8a5477a3162f4ecaa0f43c37b729da9c189ca5dd12609704bac7c2dba002d095cabe2266ca931ff
|
data/lib/mailparser.rb
CHANGED
@@ -81,6 +81,8 @@ module MailParser
|
|
81
81
|
r = @raw.chomp.gsub(/\r?\n/, '').gsub(/\t/, ' ')
|
82
82
|
if @opt[:decode_mime_header] then
|
83
83
|
@parsed = RFC2047.decode(r, @opt)
|
84
|
+
elsif @opt[:output_charset]
|
85
|
+
@parsed = @opt[:charset_converter].call(@opt[:output_charset], @opt[:output_charset], r)
|
84
86
|
else
|
85
87
|
@parsed = r
|
86
88
|
end
|
data/lib/mailparser/loose.rb
CHANGED
@@ -67,7 +67,8 @@ module MailParser
|
|
67
67
|
rescue
|
68
68
|
t = Time.now
|
69
69
|
end
|
70
|
-
|
70
|
+
year = t.year > 9999 ? 9999 : t.year
|
71
|
+
return RFC2822::DateTime.new(year, t.month, t.day, t.hour, t.min, t.sec, t.zone)
|
71
72
|
end
|
72
73
|
|
73
74
|
# parse From, To,Cc field
|
@@ -91,7 +92,7 @@ module MailParser
|
|
91
92
|
# @param [Hash] opt options
|
92
93
|
# @return [MailParser::RFC2822::MsgId]
|
93
94
|
def parse_msg_id(str, opt={})
|
94
|
-
msg_id_list(str)[0]
|
95
|
+
msg_id_list(str, opt)[0]
|
95
96
|
end
|
96
97
|
|
97
98
|
# parse In-Reply-To, References field
|
@@ -99,7 +100,7 @@ module MailParser
|
|
99
100
|
# @param [Hash] opt options
|
100
101
|
# @return [MailParser::RFC2822::MsgIdList]
|
101
102
|
def parse_msg_id_list(str, opt={})
|
102
|
-
msg_id_list(str)
|
103
|
+
msg_id_list(str, opt)
|
103
104
|
end
|
104
105
|
|
105
106
|
# parse Keywords field
|
@@ -112,7 +113,7 @@ module MailParser
|
|
112
113
|
if opt[:decode_mime_header] then
|
113
114
|
s.map!{|i| RFC2047.decode(i, opt)}
|
114
115
|
end
|
115
|
-
s
|
116
|
+
s.map{|_| _conv(_, opt)}
|
116
117
|
end
|
117
118
|
|
118
119
|
# parse Return-Path field
|
@@ -145,6 +146,9 @@ module MailParser
|
|
145
146
|
i += 1
|
146
147
|
end
|
147
148
|
end
|
149
|
+
name_val.keys.each do |k|
|
150
|
+
name_val[k] = _conv(name_val[k], opt)
|
151
|
+
end
|
148
152
|
RFC2822::Received.new(name_val, date)
|
149
153
|
end
|
150
154
|
|
@@ -158,13 +162,13 @@ module MailParser
|
|
158
162
|
params = {}
|
159
163
|
token.each do |param|
|
160
164
|
pn, pv = param.join.split(/=/, 2)
|
161
|
-
params[pn.to_s] = pv.to_s.gsub(/\A"|"\z/,"")
|
165
|
+
params[pn.to_s] = _conv(pv.to_s.gsub(/\A"|"\z/,""), opt)
|
162
166
|
end
|
163
167
|
type = "text" if type.nil? or type.empty?
|
164
168
|
if subtype.nil? or subtype.empty?
|
165
169
|
subtype = type == "text" ? "plain" : ""
|
166
170
|
end
|
167
|
-
RFC2045::ContentType.new(type, subtype, params)
|
171
|
+
RFC2045::ContentType.new(_conv(type, opt), _conv(subtype, opt), params)
|
168
172
|
end
|
169
173
|
|
170
174
|
# parse Content-Transfer-Encoding field
|
@@ -172,7 +176,7 @@ module MailParser
|
|
172
176
|
# @param [Hash] opt options
|
173
177
|
# @return [MailParser::RFC2045::ContentTransferEncoding]
|
174
178
|
def parse_content_transfer_encoding(str, opt={})
|
175
|
-
RFC2045::ContentTransferEncoding.new(Tokenizer.token(str).first.to_s)
|
179
|
+
RFC2045::ContentTransferEncoding.new(_conv(Tokenizer.token(str).first.to_s, opt))
|
176
180
|
end
|
177
181
|
|
178
182
|
# parse Mime-Version field
|
@@ -180,7 +184,7 @@ module MailParser
|
|
180
184
|
# @param [Hash] opt options
|
181
185
|
# @return [String]
|
182
186
|
def parse_mime_version(str, opt={})
|
183
|
-
Tokenizer.token(str).join
|
187
|
+
_conv(Tokenizer.token(str).join, opt)
|
184
188
|
end
|
185
189
|
|
186
190
|
# parse Content-Disposition field
|
@@ -193,9 +197,9 @@ module MailParser
|
|
193
197
|
params = {}
|
194
198
|
token.each do |param|
|
195
199
|
pn, pv = param.join.split(/=/, 2)
|
196
|
-
params[pn.to_s] = pv.to_s.gsub(/\A"|"\z/,"")
|
200
|
+
params[pn.to_s] = _conv(pv.to_s.gsub(/\A"|"\z/,""), opt)
|
197
201
|
end
|
198
|
-
RFC2183::ContentDisposition.new(type, params)
|
202
|
+
RFC2183::ContentDisposition.new(_conv(type, opt), params)
|
199
203
|
end
|
200
204
|
|
201
205
|
# split arry by delim
|
@@ -229,11 +233,11 @@ module MailParser
|
|
229
233
|
if opt[:decode_mime_header] then
|
230
234
|
display_name = RFC2047.decode(display_name, opt)
|
231
235
|
end
|
232
|
-
mailaddr = m[a1+1..a2-1].join
|
236
|
+
mailaddr = _conv(m[a1+1..a2-1].join, opt)
|
233
237
|
local_part, domain = mailaddr.split(/@/, 2)
|
234
|
-
ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain), display_name)
|
238
|
+
ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain), _conv(display_name, opt))
|
235
239
|
else
|
236
|
-
local_part, domain = m.join.split(/@/, 2)
|
240
|
+
local_part, domain = _conv(m.join, opt).split(/@/, 2)
|
237
241
|
ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain))
|
238
242
|
end
|
239
243
|
end
|
@@ -243,7 +247,7 @@ module MailParser
|
|
243
247
|
# parse MsgId type field
|
244
248
|
# @param [String] str
|
245
249
|
# @return [Array<MailParser::RFC2822::MsgId>]
|
246
|
-
def msg_id_list(str)
|
250
|
+
def msg_id_list(str, opt={})
|
247
251
|
ret = []
|
248
252
|
flag = false
|
249
253
|
msgid = nil
|
@@ -257,14 +261,14 @@ module MailParser
|
|
257
261
|
when ">"
|
258
262
|
if flag
|
259
263
|
flag = false
|
260
|
-
ret << RFC2822::MsgId.new(msgid)
|
264
|
+
ret << RFC2822::MsgId.new(_conv(msgid, opt))
|
261
265
|
end
|
262
266
|
else
|
263
267
|
msgid << m if flag
|
264
268
|
end
|
265
269
|
end
|
266
270
|
if ret.empty?
|
267
|
-
ret = str.split.map{|s| RFC2822::MsgId.new(s)}
|
271
|
+
ret = str.split.map{|s| RFC2822::MsgId.new(_conv(s, opt))}
|
268
272
|
end
|
269
273
|
return ret
|
270
274
|
end
|
@@ -291,9 +295,9 @@ module MailParser
|
|
291
295
|
@ss.pos = pos
|
292
296
|
token << s
|
293
297
|
end
|
294
|
-
elsif s = @ss.scan(/\"(\s*(\\[#{TEXT_RE}]|[#{QTEXT_RE}]))*\s*\"/o) ||
|
295
|
-
@ss.scan(/\[(\s*(\\[#{TEXT_RE}]|[#{DTEXT_RE}]))*\s*\]/o) ||
|
296
|
-
@ss.scan(/[#{ATEXT_RE}]+/o)
|
298
|
+
elsif s = @ss.scan(/\"(\s*(\\[#{TEXT_RE}]|[#{QTEXT_RE}\x80-\xff]))*\s*\"/o) ||
|
299
|
+
@ss.scan(/\[(\s*(\\[#{TEXT_RE}]|[#{DTEXT_RE}\x80-\xff]))*\s*\]/o) ||
|
300
|
+
@ss.scan(/[#{ATEXT_RE}\x80-\xff]+/o)
|
297
301
|
token << s
|
298
302
|
else
|
299
303
|
token << @ss.scan(/./)
|
@@ -337,5 +341,11 @@ module MailParser
|
|
337
341
|
end
|
338
342
|
end
|
339
343
|
|
344
|
+
def _conv(str, opt)
|
345
|
+
cv = opt[:charset_converter]
|
346
|
+
cs = opt[:output_charset]
|
347
|
+
cv && cs ? cv.call(cs, cs, str) : str
|
348
|
+
end
|
349
|
+
|
340
350
|
end
|
341
351
|
end
|
data/test/test_loose.rb
CHANGED
@@ -31,6 +31,23 @@ class TC_Loose < Test::Unit::TestCase
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
|
34
|
+
def test_parse_date_y10000()
|
35
|
+
tzbak = ENV["TZ"]
|
36
|
+
begin
|
37
|
+
ENV["TZ"] = "GMT"
|
38
|
+
d = parse_date("Wed, 10 Jan 10000 12:53:55 +0900")
|
39
|
+
assert_equal(9999, d.year)
|
40
|
+
assert_equal(1, d.month)
|
41
|
+
assert_equal(10, d.day)
|
42
|
+
assert_equal(3, d.hour)
|
43
|
+
assert_equal(53, d.min)
|
44
|
+
assert_equal(55, d.sec)
|
45
|
+
assert_equal("+0000", d.zone)
|
46
|
+
ensure
|
47
|
+
ENV["TZ"] = tzbak
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
34
51
|
def test_parse_phrase_list()
|
35
52
|
p = parse_phrase_list("abc def, ghi jkl")
|
36
53
|
assert_equal(2, p.size)
|
@@ -52,6 +69,15 @@ class TC_Loose < Test::Unit::TestCase
|
|
52
69
|
assert_equal("GHI JKL", p[1])
|
53
70
|
end
|
54
71
|
|
72
|
+
if String.method_defined? :force_encoding
|
73
|
+
def test_parse_phrase_list_output_charset_with_raw_utf8
|
74
|
+
p = parse_phrase_list("あいう, えお", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
|
75
|
+
assert_equal(2, p.size)
|
76
|
+
assert_equal("あいう".force_encoding("utf-8"), p[0])
|
77
|
+
assert_equal("えお".force_encoding("utf-8"), p[1])
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
55
81
|
def test_parse_received()
|
56
82
|
tzbak = ENV["TZ"]
|
57
83
|
begin
|
@@ -150,6 +176,28 @@ class TC_Loose < Test::Unit::TestCase
|
|
150
176
|
end
|
151
177
|
end
|
152
178
|
|
179
|
+
if String.method_defined? :force_encoding
|
180
|
+
def test_parse_received_output_charset_with_raw_utf8
|
181
|
+
tzbak = ENV["TZ"]
|
182
|
+
begin
|
183
|
+
ENV["TZ"] = "GMT"
|
184
|
+
r = parse_received("from ほげ by ふが for ぴよ; Wed, 10 Jan 2007 12:09:55 +0900", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
|
185
|
+
assert_equal(2007, r.date_time.year)
|
186
|
+
assert_equal(1, r.date_time.month)
|
187
|
+
assert_equal(10, r.date_time.day)
|
188
|
+
assert_equal(3, r.date_time.hour)
|
189
|
+
assert_equal(9, r.date_time.min)
|
190
|
+
assert_equal(55, r.date_time.sec)
|
191
|
+
assert_equal("+0000", r.date_time.zone)
|
192
|
+
assert_equal("ほげ".force_encoding("utf-8"), r.name_val["from"])
|
193
|
+
assert_equal("ふが".force_encoding("utf-8"), r.name_val["by"])
|
194
|
+
assert_equal("ぴよ".force_encoding("utf-8"), r.name_val["for"])
|
195
|
+
ensure
|
196
|
+
ENV["TZ"] = tzbak
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
153
201
|
def test_parse_content_type()
|
154
202
|
ct = parse_content_type("text/plain; charset=iso-2022-jp")
|
155
203
|
assert_equal("text", ct.type)
|
@@ -190,6 +238,15 @@ class TC_Loose < Test::Unit::TestCase
|
|
190
238
|
assert_equal("", ct.subtype)
|
191
239
|
end
|
192
240
|
|
241
|
+
if String.method_defined? :force_encoding
|
242
|
+
def test_parse_content_type_output_charset_with_raw_utf8
|
243
|
+
ct = parse_content_type("text/plain; name=ほげ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
|
244
|
+
assert_equal("text", ct.type)
|
245
|
+
assert_equal("plain", ct.subtype)
|
246
|
+
assert_equal({"name"=>"ほげ".force_encoding("utf-8")}, ct.params)
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
193
250
|
def test_parse_content_transfer_encoding
|
194
251
|
cte = parse_content_transfer_encoding("7BIT")
|
195
252
|
assert_equal "7bit", cte.mechanism
|
@@ -200,6 +257,13 @@ class TC_Loose < Test::Unit::TestCase
|
|
200
257
|
assert_equal "", cte.mechanism
|
201
258
|
end
|
202
259
|
|
260
|
+
if String.method_defined? :force_encoding
|
261
|
+
def test_parse_content_transfer_encoding_output_charset_with_raw_utf8
|
262
|
+
cte = parse_content_transfer_encoding("あいう", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
|
263
|
+
assert_equal "あいう".force_encoding("utf-8"), cte.mechanism
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
203
267
|
def test_parse_mime_version
|
204
268
|
assert_equal "1.0", parse_mime_version("1.0")
|
205
269
|
assert_equal "1.0", parse_mime_version("1 . 0")
|
@@ -210,6 +274,12 @@ class TC_Loose < Test::Unit::TestCase
|
|
210
274
|
assert_equal "", parse_mime_version("")
|
211
275
|
end
|
212
276
|
|
277
|
+
if String.method_defined? :force_encoding
|
278
|
+
def test_parse_mime_version_output_charset_with_raw_utf8
|
279
|
+
assert_equal "ほげ".force_encoding("utf-8"), parse_mime_version("ほげ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
213
283
|
def test_parse_content_disposition()
|
214
284
|
c = parse_content_disposition("attachment; filename=hoge.txt")
|
215
285
|
assert_equal("attachment", c.type)
|
@@ -227,6 +297,14 @@ class TC_Loose < Test::Unit::TestCase
|
|
227
297
|
assert_equal "", c.type
|
228
298
|
end
|
229
299
|
|
300
|
+
if String.method_defined? :force_encoding
|
301
|
+
def test_parse_content_disposition_output_charset_with_raw_utf8
|
302
|
+
c = parse_content_disposition("attachment; filename=ほげ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
|
303
|
+
assert_equal("attachment", c.type)
|
304
|
+
assert_equal({"filename"=>"ほげ".force_encoding("utf-8")}, c.params)
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
230
308
|
def test_parse_other_header
|
231
309
|
s = parse("subject", "=?euc-jp?q?=A4=A2=A4=A4?=")
|
232
310
|
assert_equal "=?euc-jp?q?=A4=A2=A4=A4?=", s
|
@@ -282,8 +360,21 @@ class TC_Loose < Test::Unit::TestCase
|
|
282
360
|
ml = mailbox_list("hoge =?us-ascii?q?hoge?= <hoge.hoge@example.com>", {:decode_mime_header=>true, :output_charset=>"us-ascii", :charset_converter=>proc{|_,_,s| s.upcase}})
|
283
361
|
assert_equal(1, ml.size)
|
284
362
|
assert_equal("HOGE HOGE", ml[0].phrase)
|
285
|
-
assert_equal("
|
286
|
-
assert_equal("
|
363
|
+
assert_equal("HOGE.HOGE", ml[0].addr_spec.local_part)
|
364
|
+
assert_equal("EXAMPLE.COM", ml[0].addr_spec.domain)
|
365
|
+
end
|
366
|
+
|
367
|
+
if String.method_defined? :force_encoding
|
368
|
+
def test_mailbox_list_output_charset_with_raw_utf8
|
369
|
+
ml = mailbox_list("ほげ <ほげ@ぴよ>, ふが@ぴよ", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8")
|
370
|
+
assert_equal(2, ml.size)
|
371
|
+
assert_equal("ほげ".force_encoding("utf-8"), ml[0].phrase)
|
372
|
+
assert_equal("ほげ".force_encoding("utf-8"), ml[0].addr_spec.local_part)
|
373
|
+
assert_equal("ぴよ".force_encoding("utf-8"), ml[0].addr_spec.domain)
|
374
|
+
assert_equal("", ml[1].phrase)
|
375
|
+
assert_equal("ふが".force_encoding("utf-8"), ml[1].addr_spec.local_part)
|
376
|
+
assert_equal("ぴよ".force_encoding("utf-8"), ml[1].addr_spec.domain)
|
377
|
+
end
|
287
378
|
end
|
288
379
|
|
289
380
|
def test_msg_id_list_old_in_reply_to()
|
@@ -317,6 +408,13 @@ class TC_Loose < Test::Unit::TestCase
|
|
317
408
|
assert_equal m, []
|
318
409
|
end
|
319
410
|
|
411
|
+
if String.method_defined? :force_encoding
|
412
|
+
def test_msg_id_output_charset_with_raw_utf8
|
413
|
+
m = msg_id_list "<ほげ>", :charset_converter=>MailParser::ConvCharset.method(:conv_charset), :output_charset=>"utf-8"
|
414
|
+
assert_equal "ほげ".force_encoding("utf-8"), m[0].msg_id
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
320
418
|
end
|
321
419
|
|
322
420
|
class TC_Loose_Tokenizer < Test::Unit::TestCase
|
data/test/test_mailparser.rb
CHANGED
@@ -344,6 +344,15 @@ EOS
|
|
344
344
|
assert_equal("abcdefg", m.subject)
|
345
345
|
end
|
346
346
|
|
347
|
+
def test_subject_raw_utf8_with_output_charset
|
348
|
+
msg = StringIO.new(<<EOS)
|
349
|
+
Subject: あいうえお
|
350
|
+
|
351
|
+
EOS
|
352
|
+
m = MailParser::Message.new(msg, :output_charset=>"utf-8")
|
353
|
+
assert_equal("あいうえお", m.subject)
|
354
|
+
end
|
355
|
+
|
347
356
|
def test_content_type()
|
348
357
|
msg = StringIO.new(<<EOS)
|
349
358
|
Content-Type: text/plain; charset=us-ascii
|
metadata
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mailparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TOMITA Masahiro
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-03-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mmapscanner
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - '>='
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - '>='
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
description: MailParser is a parser for mail message
|
@@ -64,17 +64,17 @@ require_paths:
|
|
64
64
|
- lib
|
65
65
|
required_ruby_version: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
|
-
- -
|
67
|
+
- - '>='
|
68
68
|
- !ruby/object:Gem::Version
|
69
69
|
version: '0'
|
70
70
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
71
|
requirements:
|
72
|
-
- -
|
72
|
+
- - '>='
|
73
73
|
- !ruby/object:Gem::Version
|
74
74
|
version: '0'
|
75
75
|
requirements: []
|
76
76
|
rubyforge_project:
|
77
|
-
rubygems_version: 2.0.0
|
77
|
+
rubygems_version: 2.0.0
|
78
78
|
signing_key:
|
79
79
|
specification_version: 4
|
80
80
|
summary: Mail Parser
|