mailparser 0.4.22a
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY +141 -0
- data/README.txt +501 -0
- data/lib/mailparser.rb +558 -0
- data/lib/mailparser/conv_charset.rb +27 -0
- data/lib/mailparser/error.rb +7 -0
- data/lib/mailparser/loose.rb +292 -0
- data/lib/mailparser/obsolete.rb +403 -0
- data/lib/mailparser/rfc2045.rb +54 -0
- data/lib/mailparser/rfc2045/parser.rb +245 -0
- data/lib/mailparser/rfc2045/scanner.rb +54 -0
- data/lib/mailparser/rfc2047.rb +82 -0
- data/lib/mailparser/rfc2183.rb +33 -0
- data/lib/mailparser/rfc2183/parser.rb +186 -0
- data/lib/mailparser/rfc2183/scanner.rb +7 -0
- data/lib/mailparser/rfc2231.rb +57 -0
- data/lib/mailparser/rfc2822.rb +212 -0
- data/lib/mailparser/rfc2822/parser.rb +883 -0
- data/lib/mailparser/rfc2822/scanner.rb +119 -0
- data/test.rb +26 -0
- data/test/test_loose.rb +371 -0
- data/test/test_mailparser.rb +1130 -0
- data/test/test_obsolete.rb +615 -0
- data/test/test_rfc2045.rb +121 -0
- data/test/test_rfc2047.rb +118 -0
- data/test/test_rfc2183.rb +60 -0
- data/test/test_rfc2231.rb +167 -0
- data/test/test_rfc2822.rb +370 -0
- metadata +81 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
# Copyright (C) 2007-2010 TOMITA Masahiro
|
2
|
+
# mailto:tommy@tmtm.org
|
3
|
+
|
4
|
+
require "iconv"
|
5
|
+
require "nkf"
|
6
|
+
|
7
|
+
class MailParser::ConvCharset
|
8
|
+
CHARSET = {
|
9
|
+
"sjis" => "cp932",
|
10
|
+
"x-sjis" => "cp932",
|
11
|
+
"shift_jis" => "cp932",
|
12
|
+
"shift-jis" => "cp932",
|
13
|
+
}
|
14
|
+
def self.conv_charset(from, to, str)
|
15
|
+
from = CHARSET[from.downcase] || from.downcase
|
16
|
+
to = CHARSET[to.downcase] || to.downcase
|
17
|
+
s = str
|
18
|
+
if from == "iso-2022-jp" then
|
19
|
+
s = NKF.nkf("-m0Jxs", str)
|
20
|
+
from = "cp932"
|
21
|
+
end
|
22
|
+
if to == "iso-2022-jp" then
|
23
|
+
return NKF.nkf("-m0Sxj", Iconv.iconv("cp932", from, s)[0])
|
24
|
+
end
|
25
|
+
return Iconv.iconv(to, from, s)[0]
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,292 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# Copyright (C) 2007-2010 TOMITA Masahiro
|
3
|
+
# mailto:tommy@tmtm.org
|
4
|
+
|
5
|
+
require "time"
|
6
|
+
require "strscan"
|
7
|
+
require "mailparser/rfc2822"
|
8
|
+
require "mailparser/rfc2045"
|
9
|
+
|
10
|
+
module MailParser
|
11
|
+
module Loose
|
12
|
+
HEADER_PARSER = {
|
13
|
+
"date" => :parse_date,
|
14
|
+
"from" => :parse_mailbox_list,
|
15
|
+
"sender" => :parse_mailbox,
|
16
|
+
"reply-to" => :parse_mailbox_list,
|
17
|
+
"to" => :parse_mailbox_list,
|
18
|
+
"cc" => :parse_mailbox_list,
|
19
|
+
"bcc" => :parse_mailbox_list,
|
20
|
+
"message-id" => :parse_msg_id,
|
21
|
+
"in-reply-to" => :parse_msg_id_list,
|
22
|
+
"references" => :parse_msg_id_list,
|
23
|
+
"keywords" => :parse_phrase_list,
|
24
|
+
"resent-date" => :parse_date,
|
25
|
+
"resent-from" => :parse_mailbox_list,
|
26
|
+
"resent-sender" => :parse_mailbox,
|
27
|
+
"resent-to" => :parse_mailbox_list,
|
28
|
+
"resent-cc" => :parse_mailbox_list,
|
29
|
+
"resent-bcc" => :parse_mailbox_list,
|
30
|
+
"resent-message-id" => :parse_msg_id,
|
31
|
+
"return-path" => :parse_return_path,
|
32
|
+
"received" => :parse_received,
|
33
|
+
"content-type" => :parse_content_type,
|
34
|
+
"content-transfer-encoding" => :parse_content_transfer_encoding,
|
35
|
+
"content-id" => :parse_msg_id,
|
36
|
+
"mime-version" => :parse_mime_version,
|
37
|
+
"content-disposition" => :parse_content_disposition,
|
38
|
+
}
|
39
|
+
|
40
|
+
module_function
|
41
|
+
# ヘッダをパースした結果のオブジェクトを返す
|
42
|
+
# hname:: ヘッダ名(String)
|
43
|
+
# hbody:: ヘッダ本文(String)
|
44
|
+
# opt:: オプション(Hash)
|
45
|
+
def parse(hname, hbody, opt={})
|
46
|
+
if HEADER_PARSER.key? hname then
|
47
|
+
return method(HEADER_PARSER[hname]).call(hbody, opt)
|
48
|
+
else
|
49
|
+
r = hbody.gsub(/\s+/, " ")
|
50
|
+
if opt[:decode_mime_header] then
|
51
|
+
return RFC2047.decode(r, opt)
|
52
|
+
else
|
53
|
+
return r
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Date ヘッダをパースして、RFC2822::DateTime を返す
|
59
|
+
def parse_date(str, opt={})
|
60
|
+
begin
|
61
|
+
t = Time.rfc2822(str) rescue Time.parse(str)
|
62
|
+
rescue
|
63
|
+
t = Time.now
|
64
|
+
end
|
65
|
+
return RFC2822::DateTime.new(t.year, t.month, t.day, t.hour, t.min, t.sec, t.zone)
|
66
|
+
end
|
67
|
+
|
68
|
+
# From,To,Cc 等のヘッダをパースして RFC2822::Mailbox の配列を返す
|
69
|
+
def parse_mailbox_list(str, opt={})
|
70
|
+
mailbox_list(str, opt)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Sender,Resent-Sender ヘッダをパースして RFC2822::Mailbox を返す
|
74
|
+
def parse_mailbox(str, opt={})
|
75
|
+
mailbox_list(str, opt)[0]
|
76
|
+
end
|
77
|
+
|
78
|
+
# Message-Id,Resent-Message-Id ヘッダをパースして RFC2822::MsgId を返す
|
79
|
+
def parse_msg_id(str, opt={})
|
80
|
+
msg_id_list(str)[0]
|
81
|
+
end
|
82
|
+
|
83
|
+
# In-Reply-To,References 等のヘッダを RFC2822::MsgIdList を返す
|
84
|
+
def parse_msg_id_list(str, opt={})
|
85
|
+
msg_id_list(str)
|
86
|
+
end
|
87
|
+
|
88
|
+
# Keywords ヘッダをパースして文字列の配列を返す
|
89
|
+
def parse_phrase_list(str, opt={})
|
90
|
+
s = split_by(Tokenizer.token(str), ",")
|
91
|
+
s.map!{|i| i.join(" ")}
|
92
|
+
if opt[:decode_mime_header] then
|
93
|
+
s.map!{|i| RFC2047.decode(i, opt)}
|
94
|
+
end
|
95
|
+
s
|
96
|
+
end
|
97
|
+
|
98
|
+
# Return-Path ヘッダをパースして RFC2822:ReturnPath を返す
|
99
|
+
def parse_return_path(str, opt={})
|
100
|
+
mailbox_list(str, opt)[0]
|
101
|
+
end
|
102
|
+
|
103
|
+
# Received ヘッダをパースして RFC2822::Received を返す
|
104
|
+
def parse_received(str, opt={})
|
105
|
+
a = split_by(Tokenizer.token_received(str), ";")
|
106
|
+
date = a.length > 1 ? parse_date(a.last.join(" ")) : RFC2822::DateTime.now
|
107
|
+
name_val = {}
|
108
|
+
i = 0
|
109
|
+
v = ""
|
110
|
+
unless a.empty?
|
111
|
+
while i < a[0].length do
|
112
|
+
if a[0][i] =~ /\A[a-z0-9]+\z/ino then
|
113
|
+
v = a[0][i+1]
|
114
|
+
name_val[a[0][i].downcase] = v
|
115
|
+
i += 1
|
116
|
+
else
|
117
|
+
v << a[0][i]
|
118
|
+
end
|
119
|
+
i += 1
|
120
|
+
end
|
121
|
+
end
|
122
|
+
RFC2822::Received.new(name_val, date)
|
123
|
+
end
|
124
|
+
|
125
|
+
# Content-Type ヘッダをパースして RFC2045::ContentType を返す
|
126
|
+
def parse_content_type(str, opt={})
|
127
|
+
token = split_by(Tokenizer.token(str), ";")
|
128
|
+
type, subtype = token.shift.to_s.split("/", 2)
|
129
|
+
params = {}
|
130
|
+
token.map do |param|
|
131
|
+
pn, pv = param.to_s.split(/=/, 2)
|
132
|
+
params[pn.to_s] = pv.to_s.gsub(/\A"|"\z/,"")
|
133
|
+
end
|
134
|
+
type = "text" if type.nil? or type.empty?
|
135
|
+
if subtype.nil? or subtype.empty?
|
136
|
+
subtype = type == "text" ? "plain" : ""
|
137
|
+
end
|
138
|
+
RFC2045::ContentType.new(type, subtype, params)
|
139
|
+
end
|
140
|
+
|
141
|
+
# Content-Transfer-Encoding ヘッダをパースして RFC2045::ContentTransferEncoding を返す
|
142
|
+
def parse_content_transfer_encoding(str, opt={})
|
143
|
+
RFC2045::ContentTransferEncoding.new(Tokenizer.token(str).first.to_s)
|
144
|
+
end
|
145
|
+
|
146
|
+
# Mime-Version ヘッダをパースして文字列を返す
|
147
|
+
def parse_mime_version(str, opt={})
|
148
|
+
Tokenizer.token(str).join
|
149
|
+
end
|
150
|
+
|
151
|
+
# Content-Disposition ヘッダをパースして RFC2183::ContentDisposition を返す
|
152
|
+
def parse_content_disposition(str, opt={})
|
153
|
+
token = split_by(Tokenizer.token(str), ";")
|
154
|
+
type = token.shift.to_s
|
155
|
+
params = {}
|
156
|
+
token.map do |param|
|
157
|
+
pn, pv = param.to_s.split(/=/, 2)
|
158
|
+
params[pn.to_s] = pv.to_s.gsub(/\A"|"\z/,"")
|
159
|
+
end
|
160
|
+
RFC2183::ContentDisposition.new(type, params)
|
161
|
+
end
|
162
|
+
|
163
|
+
# array を delim で分割した配列(要素は配列)を返す
|
164
|
+
def split_by(array, delim)
|
165
|
+
ret = []
|
166
|
+
a = []
|
167
|
+
array.each do |i|
|
168
|
+
if i == delim then
|
169
|
+
ret << a
|
170
|
+
a = []
|
171
|
+
else
|
172
|
+
a << i
|
173
|
+
end
|
174
|
+
end
|
175
|
+
ret << a unless a.empty?
|
176
|
+
return ret
|
177
|
+
end
|
178
|
+
|
179
|
+
# Mailbox のリストを返す
|
180
|
+
def mailbox_list(str, opt)
|
181
|
+
ret = []
|
182
|
+
split_by(Tokenizer.token(str), ",").each do |m|
|
183
|
+
if a1 = m.index("<") and a2 = m.rindex(">") and a2 > a1 then
|
184
|
+
display_name = m[0..a1-1].join(" ")
|
185
|
+
if opt[:decode_mime_header] then
|
186
|
+
display_name = RFC2047.decode(display_name, opt)
|
187
|
+
end
|
188
|
+
mailaddr = m[a1+1..a2-1].to_s
|
189
|
+
local_part, domain = mailaddr.split(/@/, 2)
|
190
|
+
ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain), display_name)
|
191
|
+
else
|
192
|
+
local_part, domain = m.to_s.split(/@/, 2)
|
193
|
+
ret << RFC2822::Mailbox.new(RFC2822::AddrSpec.new(local_part, domain))
|
194
|
+
end
|
195
|
+
end
|
196
|
+
return ret
|
197
|
+
end
|
198
|
+
|
199
|
+
# MsgId のリストを返す
|
200
|
+
def msg_id_list(str)
|
201
|
+
ret = []
|
202
|
+
flag = false
|
203
|
+
msgid = nil
|
204
|
+
Tokenizer.token(str).each do |m|
|
205
|
+
case m
|
206
|
+
when "<"
|
207
|
+
unless flag
|
208
|
+
flag = true
|
209
|
+
msgid = ""
|
210
|
+
end
|
211
|
+
when ">"
|
212
|
+
if flag
|
213
|
+
flag = false
|
214
|
+
ret << RFC2822::MsgId.new(msgid)
|
215
|
+
end
|
216
|
+
else
|
217
|
+
msgid << m if flag
|
218
|
+
end
|
219
|
+
end
|
220
|
+
if ret.empty?
|
221
|
+
ret = str.split.map{|s| RFC2822::MsgId.new(s)}
|
222
|
+
end
|
223
|
+
return ret
|
224
|
+
end
|
225
|
+
|
226
|
+
class Tokenizer < RFC2822::Scanner
|
227
|
+
def initialize(str)
|
228
|
+
@comments = []
|
229
|
+
@ss = StringScanner.new(str)
|
230
|
+
end
|
231
|
+
|
232
|
+
# トークンに分割(コメント部は削除)
|
233
|
+
def token()
|
234
|
+
token = []
|
235
|
+
while @ss.rest? do
|
236
|
+
if s = @ss.scan(/\s+/nmo) then
|
237
|
+
# ignore
|
238
|
+
elsif s = @ss.scan(/\(/nmo) then
|
239
|
+
begin
|
240
|
+
pos = @ss.pos
|
241
|
+
cfws(@ss)
|
242
|
+
rescue ParseError
|
243
|
+
@ss.pos = pos
|
244
|
+
token << s
|
245
|
+
end
|
246
|
+
elsif s = @ss.scan(/\"(\s*(\\[#{TEXT_RE}]|[#{QTEXT_RE}]))*\s*\"/nmo) ||
|
247
|
+
@ss.scan(/\[(\s*(\\[#{TEXT_RE}]|[#{DTEXT_RE}]))*\s*\]/nmo) ||
|
248
|
+
@ss.scan(/[#{ATEXT_RE}]+/no)
|
249
|
+
token << s
|
250
|
+
else
|
251
|
+
token << @ss.scan(/./no)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
return token
|
255
|
+
end
|
256
|
+
|
257
|
+
# Received 用に分割
|
258
|
+
def token_received()
|
259
|
+
ret = []
|
260
|
+
while @ss.rest? do
|
261
|
+
if s = @ss.scan(/[\s]+/nmo) then
|
262
|
+
# ignore blank
|
263
|
+
elsif s = @ss.scan(/\(/nmo) then
|
264
|
+
begin
|
265
|
+
pos = @ss.pos
|
266
|
+
cfws(@ss)
|
267
|
+
rescue ParseError
|
268
|
+
@ss.pos = pos
|
269
|
+
ret.last << s unless ret.empty?
|
270
|
+
end
|
271
|
+
elsif s = @ss.scan(/\"([\s]*(\\[#{TEXT_RE}]|[#{QTEXT_RE}]))*[\s]*\"/nmo)
|
272
|
+
ret << s
|
273
|
+
elsif s = @ss.scan(/;/)
|
274
|
+
ret << s
|
275
|
+
else
|
276
|
+
ret << @ss.scan(/[^\s\(\;]+/nmo)
|
277
|
+
end
|
278
|
+
end
|
279
|
+
return ret
|
280
|
+
end
|
281
|
+
|
282
|
+
def self.token(str)
|
283
|
+
Tokenizer.new(str).token
|
284
|
+
end
|
285
|
+
|
286
|
+
def self.token_received(str)
|
287
|
+
Tokenizer.new(str).token_received
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
end
|
292
|
+
end
|
@@ -0,0 +1,403 @@
|
|
1
|
+
# Copyright (C) 2003-2010 TOMITA Masahiro
|
2
|
+
# mailto:tommy@tmtm.org
|
3
|
+
|
4
|
+
require "nkf"
|
5
|
+
require "date"
|
6
|
+
|
7
|
+
module MailParser
|
8
|
+
|
9
|
+
@@output_charset = "euc-jp"
|
10
|
+
@@text_body_only = false
|
11
|
+
@@extract_message_type = true
|
12
|
+
|
13
|
+
ConvertMethods = {
|
14
|
+
"JE" => :jistoeuc,
|
15
|
+
"SE" => :sjistoeuc,
|
16
|
+
"UE" => :utf8toeuc,
|
17
|
+
"EU" => :euctoutf8,
|
18
|
+
"SU" => :sjistoutf8,
|
19
|
+
"JU" => :jistoutf8,
|
20
|
+
}
|
21
|
+
|
22
|
+
Charsets = {
|
23
|
+
"iso-2022-jp" => "J",
|
24
|
+
"euc-jp" => "E",
|
25
|
+
"shift_jis" => "S",
|
26
|
+
"sjis" => "S",
|
27
|
+
"x-sjis" => "S",
|
28
|
+
"utf-8" => "U",
|
29
|
+
"us-ascii" => "N",
|
30
|
+
}
|
31
|
+
|
32
|
+
module_function
|
33
|
+
|
34
|
+
def euctoutf8(s)
|
35
|
+
NKF.nkf("-m0Ewx", s)
|
36
|
+
end
|
37
|
+
|
38
|
+
def sjistoutf8(s)
|
39
|
+
NKF.nkf("-m0Swx", s)
|
40
|
+
end
|
41
|
+
|
42
|
+
def jistoutf8(s)
|
43
|
+
NKF.nkf("-m0Jwx", s)
|
44
|
+
end
|
45
|
+
|
46
|
+
def sjistoeuc(s)
|
47
|
+
NKF.nkf("-m0Sex", s)
|
48
|
+
end
|
49
|
+
|
50
|
+
def jistoeuc(s)
|
51
|
+
NKF.nkf("-m0Jex", s)
|
52
|
+
end
|
53
|
+
|
54
|
+
def utf8toeuc(s)
|
55
|
+
NKF.nkf("-m0Wex", s)
|
56
|
+
end
|
57
|
+
|
58
|
+
def output_charset=(c)
|
59
|
+
@@output_charset = c
|
60
|
+
end
|
61
|
+
|
62
|
+
def text_body_only=(f)
|
63
|
+
@@text_body_only = f
|
64
|
+
end
|
65
|
+
|
66
|
+
def extract_message_type=(f)
|
67
|
+
@@extract_message_type = f
|
68
|
+
end
|
69
|
+
|
70
|
+
def b64_hdecode(str)
|
71
|
+
str.unpack("m")[0]
|
72
|
+
end
|
73
|
+
|
74
|
+
def b64_decode(str)
|
75
|
+
str.unpack("m")[0]
|
76
|
+
end
|
77
|
+
|
78
|
+
def qp_hdecode(str)
|
79
|
+
str.gsub("_", " ").gsub(/=([0-9A-F][0-9A-F])/no) do $1.hex.chr end
|
80
|
+
end
|
81
|
+
|
82
|
+
def qp_decode(str)
|
83
|
+
str.gsub(/[ \t]+$/no, "").gsub(/=\r?\n/no, "").
|
84
|
+
gsub(/=([0-9A-F][0-9A-F])/no) do $1.hex.chr end
|
85
|
+
end
|
86
|
+
|
87
|
+
def mdecode_token(s)
|
88
|
+
if s !~ /\A=\?([a-z0-9_-]+)\?(Q|B)\?([^?]+)\?=\Z/nio then
|
89
|
+
s
|
90
|
+
else
|
91
|
+
charset, encoding, text = $1, $2, $3
|
92
|
+
fc = MailParser::Charsets[charset.downcase]
|
93
|
+
if fc == nil then return s end
|
94
|
+
if encoding.downcase == 'q' then
|
95
|
+
s2 = qp_hdecode(text)
|
96
|
+
else
|
97
|
+
s2 = b64_hdecode(text)
|
98
|
+
end
|
99
|
+
tc = @@output_charset && MailParser::Charsets[@@output_charset.downcase]
|
100
|
+
if fc == "N" or tc.nil? or fc == tc then return s2 end
|
101
|
+
MailParser.send(MailParser::ConvertMethods[fc+tc], s2)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def mime_header_decode(str)
|
106
|
+
return str.gsub(/\s+/no, " ").gsub(/\?=\s+=\?/no, "?==?").gsub(/=\?[a-z0-9_-]+\?(Q|B)\?[^?]+\?=/nio){mdecode_token $&}
|
107
|
+
end
|
108
|
+
|
109
|
+
def trunc_comment(v)
|
110
|
+
ret = ""
|
111
|
+
after = v
|
112
|
+
while not after.empty? and after =~ /^(\\.|\"(\\.|[^\\\"])*\"|[^\\\(])*/no do
|
113
|
+
ret << $&
|
114
|
+
after = $'
|
115
|
+
if after =~ /^\(/no then
|
116
|
+
a = trunc_comment_sub(after[1..-1])
|
117
|
+
if a == nil then
|
118
|
+
return ret+after
|
119
|
+
end
|
120
|
+
after = a
|
121
|
+
end
|
122
|
+
if after == "\\" then
|
123
|
+
break
|
124
|
+
end
|
125
|
+
end
|
126
|
+
ret+after
|
127
|
+
end
|
128
|
+
|
129
|
+
def trunc_comment_sub(orig)
|
130
|
+
after = orig
|
131
|
+
loop do
|
132
|
+
if after =~ /^(\\.|[^\\\(\)])*/no then
|
133
|
+
after = $'
|
134
|
+
end
|
135
|
+
if after =~ /^\)/no then
|
136
|
+
return after[1..-1]
|
137
|
+
end
|
138
|
+
if after =~ /^\(/no then
|
139
|
+
after = trunc_comment_sub(after[1..-1])
|
140
|
+
if after == nil then
|
141
|
+
return nil
|
142
|
+
end
|
143
|
+
next
|
144
|
+
end
|
145
|
+
return nil
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def split_address(v)
|
150
|
+
a = []
|
151
|
+
r = ""
|
152
|
+
while not v.empty? do
|
153
|
+
if v =~ /^(\s+|[0-9A-Za-z\!\#\$\%\&\'\*\+\-\/\=\?\^\_\`\{\|\}\~]+|\"(\\.|[^\\\"])*\")/ then
|
154
|
+
r << $&
|
155
|
+
v = $'
|
156
|
+
elsif v[0] == ?, then
|
157
|
+
a << r.strip
|
158
|
+
r = ""
|
159
|
+
v.slice!(0,1)
|
160
|
+
else
|
161
|
+
r << v.slice!(0,1)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
a << r.strip
|
165
|
+
return a
|
166
|
+
end
|
167
|
+
|
168
|
+
def get_mail_address(v)
|
169
|
+
v = trunc_comment(v)
|
170
|
+
a = split_address(v)
|
171
|
+
return a.map{|i| i.strip =~ /<([^<>]*)>$/ ? $1 : i.strip}
|
172
|
+
end
|
173
|
+
|
174
|
+
def get_date(s)
|
175
|
+
if s =~ /^[A-Z][A-Z][A-Z]\s*,\s*/i then
|
176
|
+
s = $'
|
177
|
+
end
|
178
|
+
d = ::DateTime._strptime(s, "%d %b %Y %X")
|
179
|
+
return unless d
|
180
|
+
Time.mktime(d[:year], d[:mon], d[:mday], d[:hour], d[:min], d[:sec]) rescue nil
|
181
|
+
end
|
182
|
+
|
183
|
+
def parse_content_type(str)
|
184
|
+
hash = {}
|
185
|
+
hash[:parameter] = {}
|
186
|
+
if str.strip =~ /^([a-z0-9_-]+)(?:\/([a-z0-9_-]+))?\s*/nio then
|
187
|
+
hash[:type] = $1.downcase
|
188
|
+
hash[:subtype] = $2.downcase if $2
|
189
|
+
params = $' #'
|
190
|
+
pending = {}
|
191
|
+
while true do
|
192
|
+
if params =~ /\A\s*;\s*([a-z0-9_-]+)(?:\*(\d+))?\s*=\s*(?:\"((?:\\\"|[^\"])*)\"|([^\s\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]*))\s*/nio then
|
193
|
+
pn, ord, pv = $1, $2, $3||$4
|
194
|
+
params = $'
|
195
|
+
if ord then
|
196
|
+
pending[pn] = [] unless pending.key? pn
|
197
|
+
pending[pn] << [ord.to_i, pv]
|
198
|
+
else
|
199
|
+
hash[:parameter][pn.downcase] = pv
|
200
|
+
end
|
201
|
+
elsif params =~ /\A\s*;\s*([a-z0-9_-]+)\*\s*=\s*([a-z0-9_-]+)?\'(?:[a-z0-9_-]+)?\'(?:\"((?:\\\"|[^\"])*)\"|([^\s\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]*))\s*/nio then
|
202
|
+
pn, charset, pv = $1, $2, $3||$4
|
203
|
+
params = $'
|
204
|
+
pending[pn] = [[0, pv, charset, true]]
|
205
|
+
elsif params =~ /\A\s*;\s*([a-z0-9_-]+)\*0\*\s*=\s*([a-z0-9_-]+)?\'(?:[a-z0-9_-]+)?\'(?:\"((?:\\\"|[^\"])*)\"|([^\s\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]*))\s*/nio then
|
206
|
+
pn, charset, pv = $1, $2, $3||$4
|
207
|
+
params = $'
|
208
|
+
pending[pn] = [[0, pv, charset, true]]
|
209
|
+
elsif params =~ /\A\s*;\s*([a-z0-9_-]+)\*(\d+)\*\s*=\s*(?:\"((?:\\\"|[^\"])*)\"|([^\s\(\)\<\>\@\,\;\:\\\"\/\[\]\?\=]*))\s*/nio then
|
210
|
+
pn, ord, pv = $1, $2, $3||$4
|
211
|
+
params = $'
|
212
|
+
pending[pn] = [] unless pending.key? pn
|
213
|
+
pending[pn] << [ord.to_i, pv, nil, true]
|
214
|
+
else
|
215
|
+
break
|
216
|
+
end
|
217
|
+
end
|
218
|
+
pending.each do |pn, pv|
|
219
|
+
pv = pv.sort{|a,b| a[0]<=>b[0]}
|
220
|
+
charset = pv[0][2]
|
221
|
+
v = pv.map{|a|a[3] ? a[1].gsub(/%([0-9A-F][0-9A-F])/nio){$1.hex.chr} : a[1]}.join
|
222
|
+
fc = MailParser::Charsets[charset.downcase] if charset
|
223
|
+
tc = @@output_charset && MailParser::Charsets[@@output_charset.downcase]
|
224
|
+
if fc and fc != "N" and fc != tc then
|
225
|
+
v = MailParser.send(MailParser::ConvertMethods[fc+tc], v)
|
226
|
+
end
|
227
|
+
hash[:parameter][pn.downcase] = v
|
228
|
+
end
|
229
|
+
end
|
230
|
+
return hash
|
231
|
+
end
|
232
|
+
|
233
|
+
def parse_content_disposition(str)
|
234
|
+
return parse_content_type(str)
|
235
|
+
end
|
236
|
+
|
237
|
+
def parse_message(msg)
|
238
|
+
class << msg
|
239
|
+
def _each_with_multiple_delimiter(delim=[])
|
240
|
+
@found_boundary = false
|
241
|
+
loop do
|
242
|
+
@l = gets
|
243
|
+
if @l == nil then
|
244
|
+
return
|
245
|
+
end
|
246
|
+
ll = @l.chomp
|
247
|
+
if delim.include? ll then
|
248
|
+
@found_boundary = true
|
249
|
+
return
|
250
|
+
end
|
251
|
+
yield @l
|
252
|
+
end
|
253
|
+
end
|
254
|
+
def last_line()
|
255
|
+
@l && @l.chomp
|
256
|
+
end
|
257
|
+
attr_reader :found_boundary
|
258
|
+
end
|
259
|
+
|
260
|
+
m = parse_message2(msg)
|
261
|
+
class << m
|
262
|
+
def to_s()
|
263
|
+
return <<EOS
|
264
|
+
From: #{self[:from].join(",")}
|
265
|
+
To: #{self[:to].join(",")}
|
266
|
+
Subject:#{self[:subject]}
|
267
|
+
Date: #{self[:date]}
|
268
|
+
|
269
|
+
#{self[:body]}
|
270
|
+
|
271
|
+
#{if self[:parts] then self[:parts].map{|p| "[#{p[:type]}/#{p[:subtype]}]<#{p[:filename]}>"}.join("\n") end}
|
272
|
+
EOS
|
273
|
+
end
|
274
|
+
end
|
275
|
+
return m
|
276
|
+
end
|
277
|
+
|
278
|
+
def parse_message2(msg, boundary=[])
|
279
|
+
ret = parse_header(msg, boundary)
|
280
|
+
return ret if msg.found_boundary
|
281
|
+
|
282
|
+
if ret[:type] == "message" and @@extract_message_type then
|
283
|
+
m = parse_message2(msg, boundary)
|
284
|
+
ret[:message] = m
|
285
|
+
elsif ret[:multipart] and ret[:boundary] then
|
286
|
+
parts = []
|
287
|
+
b = ret[:boundary]
|
288
|
+
bd = boundary + ["--"+b+"--", "--"+b]
|
289
|
+
msg._each_with_multiple_delimiter(bd) do end # skip preamble
|
290
|
+
while msg.last_line == bd[-1] do
|
291
|
+
m = parse_message2(msg, bd)
|
292
|
+
parts << m
|
293
|
+
end
|
294
|
+
if msg.last_line == bd[-2] then
|
295
|
+
msg._each_with_multiple_delimiter(boundary) do end
|
296
|
+
end
|
297
|
+
ret[:parts] = parts
|
298
|
+
else
|
299
|
+
if not @@text_body_only or ret[:type] == "text" or ret[:type].nil? then
|
300
|
+
body = ""
|
301
|
+
msg._each_with_multiple_delimiter(boundary) do |l|
|
302
|
+
body << l
|
303
|
+
end
|
304
|
+
ret[:body] = decode_body(body, ret[:encoding], ret[:charset])
|
305
|
+
else
|
306
|
+
msg._each_with_multiple_delimiter(boundary) do end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
return ret
|
310
|
+
end
|
311
|
+
|
312
|
+
def parse_header(msg, boundary=[])
|
313
|
+
ret = {}
|
314
|
+
raw = ""
|
315
|
+
header = []
|
316
|
+
msg._each_with_multiple_delimiter(boundary) do |l|
|
317
|
+
l.chomp!
|
318
|
+
break if l.empty?
|
319
|
+
raw << l+"\n"
|
320
|
+
if l =~ /^\s/no and not header.empty? then
|
321
|
+
header[-1] << l
|
322
|
+
elsif not l.include? ":"
|
323
|
+
next # skip garbage
|
324
|
+
else
|
325
|
+
header << l
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
from = []
|
330
|
+
to = []
|
331
|
+
cc = []
|
332
|
+
date = nil
|
333
|
+
subject = ""
|
334
|
+
encoding = ct = charset = multipart = body = filename = bd = nil
|
335
|
+
h = {}
|
336
|
+
|
337
|
+
header.each do |str|
|
338
|
+
hn, hb = str.split(/:\s*/no, 2)
|
339
|
+
hn.downcase!
|
340
|
+
h[hn] = [] unless h.key? hn
|
341
|
+
h[hn] << mime_header_decode(hb)
|
342
|
+
case hn.downcase
|
343
|
+
when "from"
|
344
|
+
from.concat get_mail_address(hb)
|
345
|
+
when "to"
|
346
|
+
to.concat get_mail_address(hb)
|
347
|
+
when "cc"
|
348
|
+
cc.concat get_mail_address(hb)
|
349
|
+
when "date"
|
350
|
+
date = get_date(hb)
|
351
|
+
when "subject"
|
352
|
+
subject.concat hb
|
353
|
+
when "content-type"
|
354
|
+
ct = parse_content_type(hb)
|
355
|
+
if ct[:type] == "text" then
|
356
|
+
charset = ct[:parameter]["charset"]
|
357
|
+
elsif ct[:type] == "multipart" then
|
358
|
+
multipart = true
|
359
|
+
bd = ct[:parameter]["boundary"]
|
360
|
+
end
|
361
|
+
filename = mime_header_decode(ct[:parameter]["name"]) if ct[:parameter]["name"]
|
362
|
+
when "content-disposition"
|
363
|
+
cd = parse_content_disposition(hb)
|
364
|
+
filename = mime_header_decode(cd[:parameter]["filename"]) if cd[:parameter]["filename"]
|
365
|
+
when "content-transfer-encoding"
|
366
|
+
encoding = hb.strip.downcase
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
ret[:from] = from
|
371
|
+
ret[:to] = to
|
372
|
+
ret[:cc] = cc
|
373
|
+
ret[:date] = date
|
374
|
+
ret[:subject] = mime_header_decode subject
|
375
|
+
if ct then
|
376
|
+
ret[:type] = ct[:type].downcase if ct[:type]
|
377
|
+
ret[:subtype] = ct[:subtype].downcase if ct[:subtype]
|
378
|
+
ret[:charset] = charset.downcase if charset
|
379
|
+
end
|
380
|
+
ret[:encoding] = encoding if encoding
|
381
|
+
ret[:multipart] = multipart
|
382
|
+
ret[:boundary] = bd
|
383
|
+
ret[:filename] = filename if filename
|
384
|
+
ret[:header] = h
|
385
|
+
ret[:rawheader] = raw
|
386
|
+
return ret
|
387
|
+
end
|
388
|
+
|
389
|
+
def decode_body(body, encoding, charset)
|
390
|
+
case encoding
|
391
|
+
when "base64"
|
392
|
+
body = b64_decode body
|
393
|
+
when "quoted-printable"
|
394
|
+
body = qp_decode body
|
395
|
+
end
|
396
|
+
if charset == nil then return body end
|
397
|
+
fc = MailParser::Charsets[charset.downcase]
|
398
|
+
if fc == nil then return body end
|
399
|
+
tc = @@output_charset && MailParser::Charsets[@@output_charset.downcase]
|
400
|
+
if fc == "N" or tc.nil? or fc == tc then return body end
|
401
|
+
MailParser.send(MailParser::ConvertMethods[fc+tc], body)
|
402
|
+
end
|
403
|
+
end
|