mailparser 0.4.22a
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY +141 -0
- data/README.txt +501 -0
- data/lib/mailparser.rb +558 -0
- data/lib/mailparser/conv_charset.rb +27 -0
- data/lib/mailparser/error.rb +7 -0
- data/lib/mailparser/loose.rb +292 -0
- data/lib/mailparser/obsolete.rb +403 -0
- data/lib/mailparser/rfc2045.rb +54 -0
- data/lib/mailparser/rfc2045/parser.rb +245 -0
- data/lib/mailparser/rfc2045/scanner.rb +54 -0
- data/lib/mailparser/rfc2047.rb +82 -0
- data/lib/mailparser/rfc2183.rb +33 -0
- data/lib/mailparser/rfc2183/parser.rb +186 -0
- data/lib/mailparser/rfc2183/scanner.rb +7 -0
- data/lib/mailparser/rfc2231.rb +57 -0
- data/lib/mailparser/rfc2822.rb +212 -0
- data/lib/mailparser/rfc2822/parser.rb +883 -0
- data/lib/mailparser/rfc2822/scanner.rb +119 -0
- data/test.rb +26 -0
- data/test/test_loose.rb +371 -0
- data/test/test_mailparser.rb +1130 -0
- data/test/test_obsolete.rb +615 -0
- data/test/test_rfc2045.rb +121 -0
- data/test/test_rfc2047.rb +118 -0
- data/test/test_rfc2183.rb +60 -0
- data/test/test_rfc2231.rb +167 -0
- data/test/test_rfc2822.rb +370 -0
- metadata +81 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
# Copyright (C) 2007-2010 TOMITA Masahiro
|
2
|
+
# mailto:tommy@tmtm.org
|
3
|
+
|
4
|
+
require "iconv"
|
5
|
+
require "nkf"
|
6
|
+
|
7
|
+
class MailParser::ConvCharset
|
8
|
+
CHARSET = {
|
9
|
+
"sjis" => "cp932",
|
10
|
+
"x-sjis" => "cp932",
|
11
|
+
"shift_jis" => "cp932",
|
12
|
+
"shift-jis" => "cp932",
|
13
|
+
}
|
14
|
+
def self.conv_charset(from, to, str)
|
15
|
+
from = CHARSET[from.downcase] || from.downcase
|
16
|
+
to = CHARSET[to.downcase] || to.downcase
|
17
|
+
s = str
|
18
|
+
if from == "iso-2022-jp" then
|
19
|
+
s = NKF.nkf("-m0Jxs", str)
|
20
|
+
from = "cp932"
|
21
|
+
end
|
22
|
+
if to == "iso-2022-jp" then
|
23
|
+
return NKF.nkf("-m0Sxj", Iconv.iconv("cp932", from, s)[0])
|
24
|
+
end
|
25
|
+
return Iconv.iconv(to, from, s)[0]
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,292 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Copyright (C) 2007-2010 TOMITA Masahiro
|
3
|
+
# mailto:tommy@tmtm.org
|
4
|
+
|
5
|
+
require "time"
|
6
|
+
require "strscan"
|
7
|
+
require "mailparser/rfc2822"
|
8
|
+
require "mailparser/rfc2045"
|
9
|
+
|
10
|
+
module MailParser
|
11
|
+
module Loose
|
12
|
+
HEADER_PARSER = {
|
13
|
+
"date" => :parse_date,
|
14
|
+
"from" => :parse_mailbox_list,
|
15
|
+
"sender" => :parse_mailbox,
|
16
|
+
"reply-to" => :parse_mailbox_list,
|
17
|
+
"to" => :parse_mailbox_list,
|
18
|
+
"cc" => :parse_mailbox_list,
|
19
|
+
"bcc" => :parse_mailbox_list,
|
20
|
+
"message-id" => :parse_msg_id,
|
21
|
+
"in-reply-to" => :parse_msg_id_list,
|
22
|
+
"references" => :parse_msg_id_list,
|
23
|
+
"keywords" => :parse_phrase_list,
|
24
|
+
"resent-date" => :parse_date,
|
25
|
+
"resent-from" => :parse_mailbox_list,
|
26
|
+
"resent-sender" => :parse_mailbox,
|
27
|
+
"resent-to" => :parse_mailbox_list,
|
28
|
+
"resent-cc" => :parse_mailbox_list,
|
29
|
+
"resent-bcc" => :parse_mailbox_list,
|
30
|
+
"resent-message-id" => :parse_msg_id,
|
31
|
+
"return-path" => :parse_return_path,
|
32
|
+
"received" => :parse_received,
|
33
|
+
"content-type" => :parse_content_type,
|
34
|
+
"content-transfer-encoding" => :parse_content_transfer_encoding,
|
35
|
+
"content-id" => :parse_msg_id,
|
36
|
+
"mime-version" => :parse_mime_version,
|
37
|
+
"content-disposition" => :parse_content_disposition,
|
38
|
+
}
|
39
|
+
|
40
|
+
module_function
|
41
|
+
# ヘッダをパースした結果のオブジェクトを返す
|
42
|
+
# hname:: ヘッダ名(String)
|
43
|
+
# hbody:: ヘッダ本文(String)
|
44
|
+
# opt:: オプション(Hash)
|
45
|
+
def parse(hname, hbody, opt={})
|
46
|
+
if HEADER_PARSER.key? hname then
|
47
|
+
return method(HEADER_PARSER[hname]).call(hbody, opt)
|
48
|
+
else
|
49
|
+
r = hbody.gsub(/\s+/, " ")
|
50
|
+
if opt[:decode_mime_header] then
|
51
|
+
return RFC2047.decode(r, opt)
|
52
|
+
else
|
53
|
+
return r
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Date ヘッダをパースして、RFC2822::DateTime を返す
|
59
|
+
def parse_date(str, opt={})
|
60
|
+
begin
|
61
|
+
t = Time.rfc2822(str) rescue Time.parse(str)
|
62
|
+
rescue
|
63
|
+
t = Time.now
|
64
|
+
end
|
65
|
+
return RFC2822::DateTime.new(t.year, t.month, t.day, t.hour, t.min, t.sec, t.zone)
|
66
|
+
end
|
67
|
+
|
68
|
+
# From,To,Cc 等のヘッダをパースして RFC2822::Mailbox の配列を返す
|
69
|
+
def parse_mailbox_list(str, opt={})
|
70
|
+
mailbox_list(str, opt)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Sender,Resent-Sender ヘッダをパースして RFC2822::Mailbox を返す
|
74
|
+
def parse_mailbox(str, opt={})
|
75
|
+
mailbox_list(str, opt)[0]
|
76
|
+
end
|
77
|
+
|
78
|
+
# Message-Id,Resent-Message-Id ヘッダをパースして RFC2822::MsgId を返す
|
79
|
+
def parse_msg_id(str, opt={})
|
80
|
+
msg_id_list(str)[0]
|
81
|
+
end
|
82
|
+
|
83
|
+
# In-Reply-To,References 等のヘッダを RFC2822::MsgIdList を返す
|
84
|
+
def parse_msg_id_list(str, opt={})
|
85
|
+
msg_id_list(str)
|
86
|
+
end
|
87
|
+
|
88
|
+
# Keywords ヘッダをパースして文字列の配列を返す
|
89
|
+
def parse_phrase_list(str, opt={})
|
90
|
+
s = split_by(Tokenizer.token(str), ",")
|
91
|
+
s.map!{|i| i.join(" ")}
|
92
|
+
if opt[:decode_mime_header] then
|
93
|
+
s.map!{|i| RFC2047.decode(i, opt)}
|
94
|
+
end
|
95
|
+
s
|
96
|
+
end
|
97
|
+
|
98
|
+
# Return-Path ヘッダをパースして RFC2822:ReturnPath を返す
|
99
|
+
def parse_return_path(str, opt={})
|
100
|
+
mailbox_list(str, opt)[0]
|
101
|
+
end
|
102
|
+
|
103
|
+
# Received ヘッダをパースして RFC2822::Received を返す
|
104
|
+
def parse_received(str, opt={})
|
105
|
+
a = split_by(Tokenizer.token_received(str), ";")
|
106
|
+
date = a.length > 1 ? parse_date(a.last.join(" ")) : RFC2822::DateTime.now
|
107
|
+
name_val = {}
|
108
|
+
i = 0
|
109
|
+
v = ""
|
110
|
+
unless a.empty?
|
111
|
+
while i < a[0].length do
|
112
|
+
if a[0][i] =~ /\A[a-z0-9]+\z/ino then
|
113
|
+
v = a[0][i+1]
|
114
|
+
name_val[a[0][i].downcase] = v
|
115
|
+
i += 1
|
116
|
+
else
|
117
|
+
v << a[0][i]
|
118
|
+
end
|
119
|
+
i += 1
|
120
|
+
end
|
121
|
+
end
|
122
|
+
RFC2822::Received.new(name_val, date)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Content-Type ヘッダをパースして RFC2045::ContentType を返す
|
126
|
+
def parse_content_type(str, opt={})
|
127
|
+
token = split_by(Tokenizer.token(str), ";")
|
128
|
+
type, subtype = token.shift.to_s.split("/", 2)
|
129
|
+
params = {}
|
130
|
+
token.map do |param|
|
131
|
+
pn, pv = param.to_s.split(/=/, 2)
|
132
|
+
params[pn.to_s] = pv.to_s.gsub(/\A"|"\z/,"")
|
133
|
+
end
|
134
|
+
type = "text" if type.nil? or type.empty?
|
135
|
+
if subtype.nil? or subtype.empty?
|
136
|
+
subtype = type == "text" ? "plain" : ""
|
137
|
+
end
|
138
|
+
RFC2045::ContentType.new(type, subtype, params)
|
139
|
+
end
|
140
|
+
|
141
|
+
# Content-Transfer-Encoding ヘッダをパースして RFC2045::ContentTransferEncoding を返す
|
142
|
+
def parse_content_transfer_encoding(str, opt={})
|
143
|
+
RFC2045::ContentTransferEncoding.new(Tokenizer.token(str).first.to_s)
|
144
|
+
end
|
145
|
+
|
146
|
+
# Mime-Version ヘッダをパースして文字列を返す
|
147
|
+
def parse_mime_version(str, opt={})
|
148
|
+
Tokenizer.token(str).join
|
149
|
+
end
|
150
|
+
|
151
|
+
# Content-Disposition ヘッダをパースして RFC2183::ContentDisposition を返す
|
152
|
+
def parse_content_disposition(str, opt={})
|
153
|
+
token = split_by(Tokenizer.token(str), ";")
|
154
|
+
type = token.shift.to_s
|
155
|
+
params = {}
|
156
|
+
token.map do |param|
|
157
|
+
pn, pv = param.to_s.split(/=/, 2)
|
158
|
+
params[pn.to_s] = pv.to_s.gsub(/\A"|"\z/,"")
|
159
|
+
end
|
160
|
+
RFC2183::ContentDisposition.new(type, params)
|
161
|
+
end
|
162
|
+
|
163
|
+
# array を delim で分割した配列(要素は配列)を返す
|
164
|
+
def split_by(array, delim)
|
165
|
+
ret = []
|
166
|
+
a = []
|
167
|
+
array.each do |i|
|
168
|
+
if i == delim then
|
169
|
+
ret << a
|
170
|
+
a = []
|
171
|
+
else
|
172
|
+
a << i
|
173
|
+
end
|
174
|
+
end
|
175
|
+
ret << a unless a.empty?
|
176
|
+
return ret
|
177
|
+
end
|
178
|
+
|
179
|
+
# Mailbox のリストを返す
|
180
|
+
def mailbox_list(str, opt)
|
181
|
+
ret = []
|
182
|
+
split_by(Tokenizer.token(str), ",").each do |m|
|
183
|
+
if a1 = m.index("<") and a2 = m.rindex(">") and a2 > a1 then
|
184
|
+
display_name = m[0..a1-1].join(" ")
|
185
|
+
if opt[:decode_mime_header] then
|
186
|
+
display_name = RFC2047.decode(display_name, opt)
|
187
|
+
end
|
188
|
+
mailaddr = m[a1+1..a2-1].to_s
|
189
|
+
local_part, domain = mailaddr.split(/@/, 2)
|
190
|
+
ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain), display_name)
|
191
|
+
else
|
192
|
+
local_part, domain = m.to_s.split(/@/, 2)
|
193
|
+
ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain))
|
194
|
+
end
|
195
|
+
end
|
196
|
+
return ret
|
197
|
+
end
|
198
|
+
|
199
|
+
# MsgId のリストを返す
|
200
|
+
def msg_id_list(str)
|
201
|
+
ret = []
|
202
|
+
flag = false
|
203
|
+
msgid = nil
|
204
|
+
Tokenizer.token(str).each do |m|
|
205
|
+
case m
|
206
|
+
when "<"
|
207
|
+
unless flag
|
208
|
+
flag = true
|
209
|
+
msgid = ""
|
210
|
+
end
|
211
|
+
when ">"
|
212
|
+
if flag
|
213
|
+
flag = false
|
214
|
+
ret << RFC2822::MsgId.new(msgid)
|
215
|
+
end
|
216
|
+
else
|
217
|
+
msgid << m if flag
|
218
|
+
end
|
219
|
+
end
|
220
|
+
if ret.empty?
|
221
|
+
ret = str.split.map{|s| RFC2822::MsgId.new(s)}
|
222
|
+
end
|
223
|
+
return ret
|
224
|
+
end
|
225
|
+
|
226
|
+
class Tokenizer < RFC2822::Scanner
|
227
|
+
def initialize(str)
|
228
|
+
@comments = []
|
229
|
+
@ss = StringScanner.new(str)
|
230
|
+
end
|
231
|
+
|
232
|
+
# トークンに分割(コメント部は削除)
|
233
|
+
def token()
|
234
|
+
token = []
|
235
|
+
while @ss.rest? do
|
236
|
+
if s = @ss.scan(/\s+/nmo) then
|
237
|
+
# ignore
|
238
|
+
elsif s = @ss.scan(/\(/nmo) then
|
239
|
+
begin
|
240
|
+
pos = @ss.pos
|
241
|
+
cfws(@ss)
|
242
|
+
rescue ParseError
|
243
|
+
@ss.pos = pos
|
244
|
+
token << s
|
245
|
+
end
|
246
|
+
elsif s = @ss.scan(/\"(\s*(\\[#{TEXT_RE}]|[#{QTEXT_RE}]))*\s*\"/nmo) ||
|
247
|
+
@ss.scan(/\[(\s*(\\[#{TEXT_RE}]|[#{DTEXT_RE}]))*\s*\]/nmo) ||
|
248
|
+
@ss.scan(/[#{ATEXT_RE}]+/no)
|
249
|
+
token << s
|
250
|
+
else
|
251
|
+
token << @ss.scan(/./no)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
return token
|
255
|
+
end
|
256
|
+
|
257
|
+
# Received 用に分割
|
258
|
+
def token_received()
|
259
|
+
ret = []
|
260
|
+
while @ss.rest? do
|
261
|
+
if s = @ss.scan(/[\s]+/nmo) then
|
262
|
+
# ignore blank
|
263
|
+
elsif s = @ss.scan(/\(/nmo) then
|
264
|
+
begin
|
265
|
+
pos = @ss.pos
|
266
|
+
cfws(@ss)
|
267
|
+
rescue ParseError
|
268
|
+
@ss.pos = pos
|
269
|
+
ret.last << s unless ret.empty?
|
270
|
+
end
|
271
|
+
elsif s = @ss.scan(/\"([\s]*(\\[#{TEXT_RE}]|[#{QTEXT_RE}]))*[\s]*\"/nmo)
|
272
|
+
ret << s
|
273
|
+
elsif s = @ss.scan(/;/)
|
274
|
+
ret << s
|
275
|
+
else
|
276
|
+
ret << @ss.scan(/[^\s\(\;]+/nmo)
|
277
|
+
end
|
278
|
+
end
|
279
|
+
return ret
|
280
|
+
end
|
281
|
+
|
282
|
+
def self.token(str)
|
283
|
+
Tokenizer.new(str).token
|
284
|
+
end
|
285
|
+
|
286
|
+
def self.token_received(str)
|
287
|
+
Tokenizer.new(str).token_received
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
end
|
292
|
+
end
|
@@ -0,0 +1,403 @@
|
|
1
|
+
# Copyright (C) 2003-2010 TOMITA Masahiro
|
2
|
+
# mailto:tommy@tmtm.org
|
3
|
+
|
4
|
+
require "nkf"
|
5
|
+
require "date"
|
6
|
+
|
7
|
+
module MailParser
|
8
|
+
|
9
|
+
@@output_charset = "euc-jp"
|
10
|
+
@@text_body_only = false
|
11
|
+
@@extract_message_type = true
|
12
|
+
|
13
|
+
ConvertMethods = {
|
14
|
+
"JE" => :jistoeuc,
|
15
|
+
"SE" => :sjistoeuc,
|
16
|
+
"UE" => :utf8toeuc,
|
17
|
+
"EU" => :euctoutf8,
|
18
|
+
"SU" => :sjistoutf8,
|
19
|
+
"JU" => :jistoutf8,
|
20
|
+
}
|
21
|
+
|
22
|
+
Charsets = {
|
23
|
+
"iso-2022-jp" => "J",
|
24
|
+
"euc-jp" => "E",
|
25
|
+
"shift_jis" => "S",
|
26
|
+
"sjis" => "S",
|
27
|
+
"x-sjis" => "S",
|
28
|
+
"utf-8" => "U",
|
29
|
+
"us-ascii" => "N",
|
30
|
+
}
|
31
|
+
|
32
|
+
module_function
|
33
|
+
|
34
|
+
def euctoutf8(s)
|
35
|
+
NKF.nkf("-m0Ewx", s)
|
36
|
+
end
|
37
|
+
|
38
|
+
def sjistoutf8(s)
|
39
|
+
NKF.nkf("-m0Swx", s)
|
40
|
+
end
|
41
|
+
|
42
|
+
def jistoutf8(s)
|
43
|
+
NKF.nkf("-m0Jwx", s)
|
44
|
+
end
|
45
|
+
|
46
|
+
def sjistoeuc(s)
|
47
|
+
NKF.nkf("-m0Sex", s)
|
48
|
+
end
|
49
|
+
|
50
|
+
def jistoeuc(s)
|
51
|
+
NKF.nkf("-m0Jex", s)
|
52
|
+
end
|
53
|
+
|
54
|
+
def utf8toeuc(s)
|
55
|
+
NKF.nkf("-m0Wex", s)
|
56
|
+
end
|
57
|
+
|
58
|
+
def output_charset=(c)
|
59
|
+
@@output_charset = c
|
60
|
+
end
|
61
|
+
|
62
|
+
def text_body_only=(f)
|
63
|
+
@@text_body_only = f
|
64
|
+
end
|
65
|
+
|
66
|
+
def extract_message_type=(f)
|
67
|
+
@@extract_message_type = f
|
68
|
+
end
|
69
|
+
|
70
|
+
def b64_hdecode(str)
|
71
|
+
str.unpack("m")[0]
|
72
|
+
end
|
73
|
+
|
74
|
+
def b64_decode(str)
|
75
|
+
str.unpack("m")[0]
|
76
|
+
end
|
77
|
+
|
78
|
+
def qp_hdecode(str)
|
79
|
+
str.gsub("_", " ").gsub(/=([0-9A-F][0-9A-F])/no) do $1.hex.chr end
|
80
|
+
end
|
81
|
+
|
82
|
+
def qp_decode(str)
|
83
|
+
str.gsub(/[ \t]+$/no, "").gsub(/=\r?\n/no, "").
|
84
|
+
gsub(/=([0-9A-F][0-9A-F])/no) do $1.hex.chr end
|
85
|
+
end
|
86
|
+
|
87
|
+
def mdecode_token(s)
|
88
|
+
if s !~ /\A=\?([a-z0-9_-]+)\?(Q|B)\?([^?]+)\?=\Z/nio then
|
89
|
+
s
|
90
|
+
else
|
91
|
+
charset, encoding, text = $1, $2, $3
|
92
|
+
fc = MailParser::Charsets[charset.downcase]
|
93
|
+
if fc == nil then return s end
|
94
|
+
if encoding.downcase == 'q' then
|
95
|
+
s2 = qp_hdecode(text)
|
96
|
+
else
|
97
|
+
s2 = b64_hdecode(text)
|
98
|
+
end
|
99
|
+
tc = @@output_charset && MailParser::Charsets[@@output_charset.downcase]
|
100
|
+
if fc == "N" or tc.nil? or fc == tc then return s2 end
|
101
|
+
MailParser.send(MailParser::ConvertMethods[fc+tc], s2)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def mime_header_decode(str)
|
106
|
+
return str.gsub(/\s+/no, " ").gsub(/\?=\s+=\?/no, "?==?").gsub(/=\?[a-z0-9_-]+\?(Q|B)\?[^?]+\?=/nio){mdecode_token $&}
|
107
|
+
end
|
108
|
+
|
109
|
+
def trunc_comment(v)
|
110
|
+
ret = ""
|
111
|
+
after = v
|
112
|
+
while not after.empty? and after =~ /^(\\.|\"(\\.|[^\\\"])*\"|[^\\\(])*/no do
|
113
|
+
ret << $&
|
114
|
+
after = $'
|
115
|
+
if after =~ /^\(/no then
|
116
|
+
a = trunc_comment_sub(after[1..-1])
|
117
|
+
if a == nil then
|
118
|
+
return ret+after
|
119
|
+
end
|
120
|
+
after = a
|
121
|
+
end
|
122
|
+
if after == "\\" then
|
123
|
+
break
|
124
|
+
end
|
125
|
+
end
|
126
|
+
ret+after
|
127
|
+
end
|
128
|
+
|
129
|
+
def trunc_comment_sub(orig)
|
130
|
+
after = orig
|
131
|
+
loop do
|
132
|
+
if after =~ /^(\\.|[^\\\(\)])*/no then
|
133
|
+
after = $'
|
134
|
+
end
|
135
|
+
if after =~ /^\)/no then
|
136
|
+
return after[1..-1]
|
137
|
+
end
|
138
|
+
if after =~ /^\(/no then
|
139
|
+
after = trunc_comment_sub(after[1..-1])
|
140
|
+
if after == nil then
|
141
|
+
return nil
|
142
|
+
end
|
143
|
+
next
|
144
|
+
end
|
145
|
+
return nil
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def split_address(v)
|
150
|
+
a = []
|
151
|
+
r = ""
|
152
|
+
while not v.empty? do
|
153
|
+
if v =~ /^(\s+|[0-9A-Za-z\!\#\$\%\&\'\*\+\-\/\=\?\^\_\`\{\|\}\~]+|\"(\\.|[^\\\"])*\")/ then
|
154
|
+
r << $&
|
155
|
+
v = $'
|
156
|
+
elsif v[0] == ?, then
|
157
|
+
a << r.strip
|
158
|
+
r = ""
|
159
|
+
v.slice!(0,1)
|
160
|
+
else
|
161
|
+
r << v.slice!(0,1)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
a << r.strip
|
165
|
+
return a
|
166
|
+
end
|
167
|
+
|
168
|
+
def get_mail_address(v)
|
169
|
+
v = trunc_comment(v)
|
170
|
+
a = split_address(v)
|
171
|
+
return a.map{|i| i.strip =~ /<([^<>]*)>$/ ? $1 : i.strip}
|
172
|
+
end
|
173
|
+
|
174
|
+
def get_date(s)
|
175
|
+
if s =~ /^[A-Z][A-Z][A-Z]\s*,\s*/i then
|
176
|
+
s = $'
|
177
|
+
end
|
178
|
+
d = ::DateTime._strptime(s, "%d %b %Y %X")
|
179
|
+
return unless d
|
180
|
+
Time.mktime(d[:year], d[:mon], d[:mday], d[:hour], d[:min], d[:sec]) rescue nil
|
181
|
+
end
|
182
|
+
|
183
|
+
def parse_content_type(str)
|
184
|
+
hash = {}
|
185
|
+
hash[:parameter] = {}
|
186
|
+
if str.strip =~ /^([a-z0-9_-]+)(?:\/([a-z0-9_-]+))?\s*/nio then
|
187
|
+
hash[:type] = $1.downcase
|
188
|
+
hash[:subtype] = $2.downcase if $2
|
189
|
+
params = $' #'
|
190
|
+
pending = {}
|
191
|
+
while true do
|
192
|
+
if params =~ /\A\s*;\s*([a-z0-9_-]+)(?:\*(\d+))?\s*=\s*(?:\"((?:\\\"|[^\"])*)\"|([^\s\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]*))\s*/nio then
|
193
|
+
pn, ord, pv = $1, $2, $3||$4
|
194
|
+
params = $'
|
195
|
+
if ord then
|
196
|
+
pending[pn] = [] unless pending.key? pn
|
197
|
+
pending[pn] << [ord.to_i, pv]
|
198
|
+
else
|
199
|
+
hash[:parameter][pn.downcase] = pv
|
200
|
+
end
|
201
|
+
elsif params =~ /\A\s*;\s*([a-z0-9_-]+)\*\s*=\s*([a-z0-9_-]+)?\'(?:[a-z0-9_-]+)?\'(?:\"((?:\\\"|[^\"])*)\"|([^\s\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]*))\s*/nio then
|
202
|
+
pn, charset, pv = $1, $2, $3||$4
|
203
|
+
params = $'
|
204
|
+
pending[pn] = [[0, pv, charset, true]]
|
205
|
+
elsif params =~ /\A\s*;\s*([a-z0-9_-]+)\*0\*\s*=\s*([a-z0-9_-]+)?\'(?:[a-z0-9_-]+)?\'(?:\"((?:\\\"|[^\"])*)\"|([^\s\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]*))\s*/nio then
|
206
|
+
pn, charset, pv = $1, $2, $3||$4
|
207
|
+
params = $'
|
208
|
+
pending[pn] = [[0, pv, charset, true]]
|
209
|
+
elsif params =~ /\A\s*;\s*([a-z0-9_-]+)\*(\d+)\*\s*=\s*(?:\"((?:\\\"|[^\"])*)\"|([^\s\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]*))\s*/nio then
|
210
|
+
pn, ord, pv = $1, $2, $3||$4
|
211
|
+
params = $'
|
212
|
+
pending[pn] = [] unless pending.key? pn
|
213
|
+
pending[pn] << [ord.to_i, pv, nil, true]
|
214
|
+
else
|
215
|
+
break
|
216
|
+
end
|
217
|
+
end
|
218
|
+
pending.each do |pn, pv|
|
219
|
+
pv = pv.sort{|a,b| a[0]<=>b[0]}
|
220
|
+
charset = pv[0][2]
|
221
|
+
v = pv.map{|a|a[3] ? a[1].gsub(/%([0-9A-F][0-9A-F])/nio){$1.hex.chr} : a[1]}.join
|
222
|
+
fc = MailParser::Charsets[charset.downcase] if charset
|
223
|
+
tc = @@output_charset && MailParser::Charsets[@@output_charset.downcase]
|
224
|
+
if fc and fc != "N" and fc != tc then
|
225
|
+
v = MailParser.send(MailParser::ConvertMethods[fc+tc], v)
|
226
|
+
end
|
227
|
+
hash[:parameter][pn.downcase] = v
|
228
|
+
end
|
229
|
+
end
|
230
|
+
return hash
|
231
|
+
end
|
232
|
+
|
233
|
+
def parse_content_disposition(str)
|
234
|
+
return parse_content_type(str)
|
235
|
+
end
|
236
|
+
|
237
|
+
def parse_message(msg)
|
238
|
+
class << msg
|
239
|
+
def _each_with_multiple_delimiter(delim=[])
|
240
|
+
@found_boundary = false
|
241
|
+
loop do
|
242
|
+
@l = gets
|
243
|
+
if @l == nil then
|
244
|
+
return
|
245
|
+
end
|
246
|
+
ll = @l.chomp
|
247
|
+
if delim.include? ll then
|
248
|
+
@found_boundary = true
|
249
|
+
return
|
250
|
+
end
|
251
|
+
yield @l
|
252
|
+
end
|
253
|
+
end
|
254
|
+
def last_line()
|
255
|
+
@l && @l.chomp
|
256
|
+
end
|
257
|
+
attr_reader :found_boundary
|
258
|
+
end
|
259
|
+
|
260
|
+
m = parse_message2(msg)
|
261
|
+
class << m
|
262
|
+
def to_s()
|
263
|
+
return <<EOS
|
264
|
+
From: #{self[:from].join(",")}
|
265
|
+
To: #{self[:to].join(",")}
|
266
|
+
Subject:#{self[:subject]}
|
267
|
+
Date: #{self[:date]}
|
268
|
+
|
269
|
+
#{self[:body]}
|
270
|
+
|
271
|
+
#{if self[:parts] then self[:parts].map{|p| "[#{p[:type]}/#{p[:subtype]}]<#{p[:filename]}>"}.join("\n") end}
|
272
|
+
EOS
|
273
|
+
end
|
274
|
+
end
|
275
|
+
return m
|
276
|
+
end
|
277
|
+
|
278
|
+
def parse_message2(msg, boundary=[])
|
279
|
+
ret = parse_header(msg, boundary)
|
280
|
+
return ret if msg.found_boundary
|
281
|
+
|
282
|
+
if ret[:type] == "message" and @@extract_message_type then
|
283
|
+
m = parse_message2(msg, boundary)
|
284
|
+
ret[:message] = m
|
285
|
+
elsif ret[:multipart] and ret[:boundary] then
|
286
|
+
parts = []
|
287
|
+
b = ret[:boundary]
|
288
|
+
bd = boundary + ["--"+b+"--", "--"+b]
|
289
|
+
msg._each_with_multiple_delimiter(bd) do end # skip preamble
|
290
|
+
while msg.last_line == bd[-1] do
|
291
|
+
m = parse_message2(msg, bd)
|
292
|
+
parts << m
|
293
|
+
end
|
294
|
+
if msg.last_line == bd[-2] then
|
295
|
+
msg._each_with_multiple_delimiter(boundary) do end
|
296
|
+
end
|
297
|
+
ret[:parts] = parts
|
298
|
+
else
|
299
|
+
if not @@text_body_only or ret[:type] == "text" or ret[:type].nil? then
|
300
|
+
body = ""
|
301
|
+
msg._each_with_multiple_delimiter(boundary) do |l|
|
302
|
+
body << l
|
303
|
+
end
|
304
|
+
ret[:body] = decode_body(body, ret[:encoding], ret[:charset])
|
305
|
+
else
|
306
|
+
msg._each_with_multiple_delimiter(boundary) do end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
return ret
|
310
|
+
end
|
311
|
+
|
312
|
+
def parse_header(msg, boundary=[])
|
313
|
+
ret = {}
|
314
|
+
raw = ""
|
315
|
+
header = []
|
316
|
+
msg._each_with_multiple_delimiter(boundary) do |l|
|
317
|
+
l.chomp!
|
318
|
+
break if l.empty?
|
319
|
+
raw << l+"\n"
|
320
|
+
if l =~ /^\s/no and not header.empty? then
|
321
|
+
header[-1] << l
|
322
|
+
elsif not l.include? ":"
|
323
|
+
next # skip garbage
|
324
|
+
else
|
325
|
+
header << l
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
from = []
|
330
|
+
to = []
|
331
|
+
cc = []
|
332
|
+
date = nil
|
333
|
+
subject = ""
|
334
|
+
encoding = ct = charset = multipart = body = filename = bd = nil
|
335
|
+
h = {}
|
336
|
+
|
337
|
+
header.each do |str|
|
338
|
+
hn, hb = str.split(/:\s*/no, 2)
|
339
|
+
hn.downcase!
|
340
|
+
h[hn] = [] unless h.key? hn
|
341
|
+
h[hn] << mime_header_decode(hb)
|
342
|
+
case hn.downcase
|
343
|
+
when "from"
|
344
|
+
from.concat get_mail_address(hb)
|
345
|
+
when "to"
|
346
|
+
to.concat get_mail_address(hb)
|
347
|
+
when "cc"
|
348
|
+
cc.concat get_mail_address(hb)
|
349
|
+
when "date"
|
350
|
+
date = get_date(hb)
|
351
|
+
when "subject"
|
352
|
+
subject.concat hb
|
353
|
+
when "content-type"
|
354
|
+
ct = parse_content_type(hb)
|
355
|
+
if ct[:type] == "text" then
|
356
|
+
charset = ct[:parameter]["charset"]
|
357
|
+
elsif ct[:type] == "multipart" then
|
358
|
+
multipart = true
|
359
|
+
bd = ct[:parameter]["boundary"]
|
360
|
+
end
|
361
|
+
filename = mime_header_decode(ct[:parameter]["name"]) if ct[:parameter]["name"]
|
362
|
+
when "content-disposition"
|
363
|
+
cd = parse_content_disposition(hb)
|
364
|
+
filename = mime_header_decode(cd[:parameter]["filename"]) if cd[:parameter]["filename"]
|
365
|
+
when "content-transfer-encoding"
|
366
|
+
encoding = hb.strip.downcase
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
ret[:from] = from
|
371
|
+
ret[:to] = to
|
372
|
+
ret[:cc] = cc
|
373
|
+
ret[:date] = date
|
374
|
+
ret[:subject] = mime_header_decode subject
|
375
|
+
if ct then
|
376
|
+
ret[:type] = ct[:type].downcase if ct[:type]
|
377
|
+
ret[:subtype] = ct[:subtype].downcase if ct[:subtype]
|
378
|
+
ret[:charset] = charset.downcase if charset
|
379
|
+
end
|
380
|
+
ret[:encoding] = encoding if encoding
|
381
|
+
ret[:multipart] = multipart
|
382
|
+
ret[:boundary] = bd
|
383
|
+
ret[:filename] = filename if filename
|
384
|
+
ret[:header] = h
|
385
|
+
ret[:rawheader] = raw
|
386
|
+
return ret
|
387
|
+
end
|
388
|
+
|
389
|
+
def decode_body(body, encoding, charset)
|
390
|
+
case encoding
|
391
|
+
when "base64"
|
392
|
+
body = b64_decode body
|
393
|
+
when "quoted-printable"
|
394
|
+
body = qp_decode body
|
395
|
+
end
|
396
|
+
if charset == nil then return body end
|
397
|
+
fc = MailParser::Charsets[charset.downcase]
|
398
|
+
if fc == nil then return body end
|
399
|
+
tc = @@output_charset && MailParser::Charsets[@@output_charset.downcase]
|
400
|
+
if fc == "N" or tc.nil? or fc == tc then return body end
|
401
|
+
MailParser.send(MailParser::ConvertMethods[fc+tc], body)
|
402
|
+
end
|
403
|
+
end
|