mailparser 0.4.22a
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY +141 -0
- data/README.txt +501 -0
- data/lib/mailparser.rb +558 -0
- data/lib/mailparser/conv_charset.rb +27 -0
- data/lib/mailparser/error.rb +7 -0
- data/lib/mailparser/loose.rb +292 -0
- data/lib/mailparser/obsolete.rb +403 -0
- data/lib/mailparser/rfc2045.rb +54 -0
- data/lib/mailparser/rfc2045/parser.rb +245 -0
- data/lib/mailparser/rfc2045/scanner.rb +54 -0
- data/lib/mailparser/rfc2047.rb +82 -0
- data/lib/mailparser/rfc2183.rb +33 -0
- data/lib/mailparser/rfc2183/parser.rb +186 -0
- data/lib/mailparser/rfc2183/scanner.rb +7 -0
- data/lib/mailparser/rfc2231.rb +57 -0
- data/lib/mailparser/rfc2822.rb +212 -0
- data/lib/mailparser/rfc2822/parser.rb +883 -0
- data/lib/mailparser/rfc2822/scanner.rb +119 -0
- data/test.rb +26 -0
- data/test/test_loose.rb +371 -0
- data/test/test_mailparser.rb +1130 -0
- data/test/test_obsolete.rb +615 -0
- data/test/test_rfc2045.rb +121 -0
- data/test/test_rfc2047.rb +118 -0
- data/test/test_rfc2183.rb +60 -0
- data/test/test_rfc2231.rb +167 -0
- data/test/test_rfc2822.rb +370 -0
- metadata +81 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
# Copyright (C) 2006-2010 TOMITA Masahiro
|
2
|
+
# mailto:tommy@tmtm.org
|
3
|
+
|
4
|
+
require "mailparser/error"
|
5
|
+
require "mailparser/rfc2822"
|
6
|
+
require "mailparser/rfc2045/parser"
|
7
|
+
|
8
|
+
module MailParser::RFC2045
|
9
|
+
HEADER_TYPE = {
|
10
|
+
"content-type" => :CONTENT_TYPE,
|
11
|
+
# "content-description" => :UNSTRUCTURED,
|
12
|
+
"content-transfer-encoding" => :CONTENT_TRANSFER_ENCODING,
|
13
|
+
"content-id" => [MailParser::RFC2822, :MSG_ID],
|
14
|
+
"mime-version" => :MIME_VERSION,
|
15
|
+
}
|
16
|
+
|
17
|
+
class ContentType
|
18
|
+
def initialize(type, subtype, params)
|
19
|
+
@type, @subtype, @params = type.downcase, subtype.downcase, params
|
20
|
+
end
|
21
|
+
|
22
|
+
attr_reader :type, :subtype, :params
|
23
|
+
end
|
24
|
+
|
25
|
+
class ContentTransferEncoding
|
26
|
+
def initialize(mechanism)
|
27
|
+
@mechanism = mechanism.downcase
|
28
|
+
end
|
29
|
+
|
30
|
+
attr_reader :mechanism
|
31
|
+
end
|
32
|
+
|
33
|
+
module_function
|
34
|
+
|
35
|
+
def parse(name, value, opt={})
|
36
|
+
htype = HEADER_TYPE[name.downcase]
|
37
|
+
unless htype then
|
38
|
+
return value.chomp
|
39
|
+
end
|
40
|
+
if htype.is_a? Array then
|
41
|
+
htype[0]::Parser.new.parse(htype[1], value)
|
42
|
+
else
|
43
|
+
Parser.new.parse(htype, value)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def qp_decode(str)
|
48
|
+
return str.gsub(/=\s*?$/,"=").unpack("M")[0]
|
49
|
+
end
|
50
|
+
|
51
|
+
def b64_decode(str)
|
52
|
+
return str.gsub(/[^A-Z0-9\+\/=]/i,"").unpack("m")[0]
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,245 @@
|
|
1
|
+
#
|
2
|
+
# DO NOT MODIFY!!!!
|
3
|
+
# This file is automatically generated by racc 1.4.5
|
4
|
+
# from racc grammer file "lib/mailparser/rfc2045/parser.y".
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'racc/parser'
|
8
|
+
|
9
|
+
|
10
|
+
module MailParser
|
11
|
+
|
12
|
+
module RFC2045
|
13
|
+
|
14
|
+
class Parser < Racc::Parser
|
15
|
+
|
16
|
+
module_eval <<'..end lib/mailparser/rfc2045/parser.y modeval..idcdf01fc9ea', 'lib/mailparser/rfc2045/parser.y', 62
|
17
|
+
|
18
|
+
require "mailparser/rfc2045/scanner"
|
19
|
+
|
20
|
+
def parse(header_type, value)
|
21
|
+
@header_type = header_type
|
22
|
+
@value = value
|
23
|
+
@scanner = Scanner.new(header_type, value)
|
24
|
+
ret = yyparse(self, :parse_sub)
|
25
|
+
class << ret
|
26
|
+
attr_accessor :comments
|
27
|
+
end
|
28
|
+
ret.comments = @scanner.comments
|
29
|
+
ret
|
30
|
+
end
|
31
|
+
|
32
|
+
def parse_sub(&block)
|
33
|
+
yield @header_type, nil
|
34
|
+
@scanner.scan(&block)
|
35
|
+
end
|
36
|
+
|
37
|
+
def on_error(t, val, vstack)
|
38
|
+
# p t, val, vstack
|
39
|
+
# p racc_token2str(t)
|
40
|
+
raise MailParser::ParseError, val+@scanner.rest
|
41
|
+
end
|
42
|
+
..end lib/mailparser/rfc2045/parser.y modeval..idcdf01fc9ea
|
43
|
+
|
44
|
+
##### racc 1.4.5 generates ###
|
45
|
+
|
46
|
+
racc_reduce_table = [
|
47
|
+
0, 0, :racc_error,
|
48
|
+
2, 13, :_reduce_1,
|
49
|
+
2, 13, :_reduce_2,
|
50
|
+
2, 13, :_reduce_3,
|
51
|
+
4, 14, :_reduce_4,
|
52
|
+
1, 15, :_reduce_5,
|
53
|
+
3, 16, :_reduce_6,
|
54
|
+
1, 20, :_reduce_none,
|
55
|
+
1, 17, :_reduce_none,
|
56
|
+
1, 18, :_reduce_none,
|
57
|
+
0, 19, :_reduce_10,
|
58
|
+
3, 19, :_reduce_11,
|
59
|
+
3, 21, :_reduce_12,
|
60
|
+
1, 22, :_reduce_none,
|
61
|
+
1, 23, :_reduce_none,
|
62
|
+
1, 23, :_reduce_none ]
|
63
|
+
|
64
|
+
racc_reduce_n = 16
|
65
|
+
|
66
|
+
racc_shift_n = 29
|
67
|
+
|
68
|
+
racc_action_table = [
|
69
|
+
28, 8, 10, 26, 1, 3, 4, 13, 14, 15,
|
70
|
+
7, 18, 19, 21, 23, 25, 16 ]
|
71
|
+
|
72
|
+
racc_action_check = [
|
73
|
+
25, 2, 3, 25, 0, 0, 0, 4, 6, 8,
|
74
|
+
1, 14, 16, 20, 21, 22, 13 ]
|
75
|
+
|
76
|
+
racc_action_pointer = [
|
77
|
+
2, 2, 1, -6, 1, nil, 3, nil, 9, nil,
|
78
|
+
nil, nil, nil, 9, 3, nil, 6, nil, nil, nil,
|
79
|
+
4, 6, 5, nil, nil, -8, nil, nil, nil ]
|
80
|
+
|
81
|
+
racc_action_default = [
|
82
|
+
-16, -16, -16, -16, -16, -1, -16, -8, -16, -2,
|
83
|
+
-7, -5, -3, -16, -16, 29, -16, -10, -9, -6,
|
84
|
+
-4, -16, -16, -13, -11, -16, -15, -12, -14 ]
|
85
|
+
|
86
|
+
racc_goto_table = [
|
87
|
+
2, 5, 9, 12, 6, 17, 20, 11, 24, 22,
|
88
|
+
27 ]
|
89
|
+
|
90
|
+
racc_goto_check = [
|
91
|
+
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
|
92
|
+
11 ]
|
93
|
+
|
94
|
+
racc_goto_pointer = [
|
95
|
+
nil, 0, 0, -1, -1, 3, -9, -11, 4, -13,
|
96
|
+
-12, -15 ]
|
97
|
+
|
98
|
+
racc_goto_default = [
|
99
|
+
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
|
100
|
+
nil, nil ]
|
101
|
+
|
102
|
+
racc_token_table = {
|
103
|
+
false => 0,
|
104
|
+
Object.new => 1,
|
105
|
+
:CONTENT_TYPE => 2,
|
106
|
+
:CONTENT_TRANSFER_ENCODING => 3,
|
107
|
+
:MIME_VERSION => 4,
|
108
|
+
"/" => 5,
|
109
|
+
:DIGIT => 6,
|
110
|
+
"." => 7,
|
111
|
+
:TOKEN => 8,
|
112
|
+
";" => 9,
|
113
|
+
"=" => 10,
|
114
|
+
:QUOTED_STRING => 11 }
|
115
|
+
|
116
|
+
racc_use_result_var = false
|
117
|
+
|
118
|
+
racc_nt_base = 12
|
119
|
+
|
120
|
+
Racc_arg = [
|
121
|
+
racc_action_table,
|
122
|
+
racc_action_check,
|
123
|
+
racc_action_default,
|
124
|
+
racc_action_pointer,
|
125
|
+
racc_goto_table,
|
126
|
+
racc_goto_check,
|
127
|
+
racc_goto_default,
|
128
|
+
racc_goto_pointer,
|
129
|
+
racc_nt_base,
|
130
|
+
racc_reduce_table,
|
131
|
+
racc_token_table,
|
132
|
+
racc_shift_n,
|
133
|
+
racc_reduce_n,
|
134
|
+
racc_use_result_var ]
|
135
|
+
|
136
|
+
Racc_token_to_s_table = [
|
137
|
+
'$end',
|
138
|
+
'error',
|
139
|
+
'CONTENT_TYPE',
|
140
|
+
'CONTENT_TRANSFER_ENCODING',
|
141
|
+
'MIME_VERSION',
|
142
|
+
'"/"',
|
143
|
+
'DIGIT',
|
144
|
+
'"."',
|
145
|
+
'TOKEN',
|
146
|
+
'";"',
|
147
|
+
'"="',
|
148
|
+
'QUOTED_STRING',
|
149
|
+
'$start',
|
150
|
+
'all',
|
151
|
+
'content_type',
|
152
|
+
'content_transfer_encoding',
|
153
|
+
'mime_version',
|
154
|
+
'type',
|
155
|
+
'subtype',
|
156
|
+
'parameter_list',
|
157
|
+
'mechanism',
|
158
|
+
'parameter',
|
159
|
+
'attribute',
|
160
|
+
'value']
|
161
|
+
|
162
|
+
Racc_debug_parser = false
|
163
|
+
|
164
|
+
##### racc system variables end #####
|
165
|
+
|
166
|
+
# reduce 0 omitted
|
167
|
+
|
168
|
+
module_eval <<'.,.,', 'lib/mailparser/rfc2045/parser.y', 9
|
169
|
+
def _reduce_1( val, _values)
|
170
|
+
val[1]
|
171
|
+
end
|
172
|
+
.,.,
|
173
|
+
|
174
|
+
module_eval <<'.,.,', 'lib/mailparser/rfc2045/parser.y', 11
|
175
|
+
def _reduce_2( val, _values)
|
176
|
+
val[1]
|
177
|
+
end
|
178
|
+
.,.,
|
179
|
+
|
180
|
+
module_eval <<'.,.,', 'lib/mailparser/rfc2045/parser.y', 13
|
181
|
+
def _reduce_3( val, _values)
|
182
|
+
val[1]
|
183
|
+
end
|
184
|
+
.,.,
|
185
|
+
|
186
|
+
module_eval <<'.,.,', 'lib/mailparser/rfc2045/parser.y', 19
|
187
|
+
def _reduce_4( val, _values)
|
188
|
+
ContentType.new(val[0], val[2], val[3])
|
189
|
+
end
|
190
|
+
.,.,
|
191
|
+
|
192
|
+
module_eval <<'.,.,', 'lib/mailparser/rfc2045/parser.y', 24
|
193
|
+
def _reduce_5( val, _values)
|
194
|
+
ContentTransferEncoding.new(val[0])
|
195
|
+
end
|
196
|
+
.,.,
|
197
|
+
|
198
|
+
module_eval <<'.,.,', 'lib/mailparser/rfc2045/parser.y', 31
|
199
|
+
def _reduce_6( val, _values)
|
200
|
+
val.join
|
201
|
+
end
|
202
|
+
.,.,
|
203
|
+
|
204
|
+
# reduce 7 omitted
|
205
|
+
|
206
|
+
# reduce 8 omitted
|
207
|
+
|
208
|
+
# reduce 9 omitted
|
209
|
+
|
210
|
+
module_eval <<'.,.,', 'lib/mailparser/rfc2045/parser.y', 42
|
211
|
+
def _reduce_10( val, _values)
|
212
|
+
{}
|
213
|
+
end
|
214
|
+
.,.,
|
215
|
+
|
216
|
+
module_eval <<'.,.,', 'lib/mailparser/rfc2045/parser.y', 49
|
217
|
+
def _reduce_11( val, _values)
|
218
|
+
pn, pv = val[2]
|
219
|
+
pv = $1 if pv =~ /\A\"(.*)\"\Z/m
|
220
|
+
val[0][pn] = pv.gsub(/\s*\n\s*/, " ")
|
221
|
+
val[0]
|
222
|
+
end
|
223
|
+
.,.,
|
224
|
+
|
225
|
+
module_eval <<'.,.,', 'lib/mailparser/rfc2045/parser.y', 54
|
226
|
+
def _reduce_12( val, _values)
|
227
|
+
[val[0].downcase, val[2]]
|
228
|
+
end
|
229
|
+
.,.,
|
230
|
+
|
231
|
+
# reduce 13 omitted
|
232
|
+
|
233
|
+
# reduce 14 omitted
|
234
|
+
|
235
|
+
# reduce 15 omitted
|
236
|
+
|
237
|
+
def _reduce_none( val, _values)
|
238
|
+
val[0]
|
239
|
+
end
|
240
|
+
|
241
|
+
end # class Parser
|
242
|
+
|
243
|
+
end # module RFC2045
|
244
|
+
|
245
|
+
end # module MailParser
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# Copyright (C) 2006-2010 TOMITA Masahiro
|
2
|
+
# mailto:tommy@tmtm.org
|
3
|
+
|
4
|
+
require "mailparser/rfc2822"
|
5
|
+
|
6
|
+
class MailParser::RFC2045::Scanner < MailParser::RFC2822::Scanner
|
7
|
+
TOKEN_RE = '\x21\x23-\x27\x2a\x2b\x2d\x2e\x30-\x39\x41-\x5a\x5e-\x7f'
|
8
|
+
|
9
|
+
def scan(&block)
|
10
|
+
case @header_type
|
11
|
+
when :MIME_VERSION
|
12
|
+
scan_mime_version(&block)
|
13
|
+
else
|
14
|
+
scan_structured(&block)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def scan_structured()
|
19
|
+
until @ss.eos?
|
20
|
+
case
|
21
|
+
when s = @ss.scan(/\s*\(/nmo)
|
22
|
+
s << cfws(@ss)
|
23
|
+
next
|
24
|
+
when s = @ss.scan(/\s+/nmo)
|
25
|
+
next
|
26
|
+
when s = @ss.scan(/\"(\s*(\\[#{TEXT_RE}]|[#{QTEXT_RE}]))*\s*\"/nmo)
|
27
|
+
yield :QUOTED_STRING, s
|
28
|
+
when s = @ss.scan(/[#{TOKEN_RE}]+/no)
|
29
|
+
yield :TOKEN, s
|
30
|
+
when s = @ss.scan(/./no)
|
31
|
+
yield s, s
|
32
|
+
end
|
33
|
+
end
|
34
|
+
yield nil
|
35
|
+
end
|
36
|
+
|
37
|
+
def scan_mime_version()
|
38
|
+
until @ss.eos?
|
39
|
+
case
|
40
|
+
when s = @ss.scan(/\s*\(/nmo)
|
41
|
+
s << cfws(@ss)
|
42
|
+
next
|
43
|
+
when s = @ss.scan(/\s+/nmo)
|
44
|
+
next
|
45
|
+
when s = @ss.scan(/\d+/no)
|
46
|
+
yield :DIGIT, s
|
47
|
+
when s = @ss.scan(/./no)
|
48
|
+
yield s, s
|
49
|
+
end
|
50
|
+
end
|
51
|
+
yield nil
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# Copyright (C) 2006-2010 TOMITA Masahiro
|
2
|
+
# mailto:tommy@tmtm.org
|
3
|
+
|
4
|
+
require "strscan"
|
5
|
+
require "iconv"
|
6
|
+
require "nkf"
|
7
|
+
require "mailparser/conv_charset"
|
8
|
+
|
9
|
+
module MailParser
|
10
|
+
end
|
11
|
+
|
12
|
+
module MailParser::RFC2047
|
13
|
+
|
14
|
+
class String < ::String
|
15
|
+
@@charset_converter = Proc.new{|f,t,s| MailParser::ConvCharset.conv_charset(f,t,s)}
|
16
|
+
def initialize(str, charset=nil, raw=nil, charset_converter=nil)
|
17
|
+
super(str)
|
18
|
+
@charset = charset
|
19
|
+
@raw = raw || str
|
20
|
+
@charset_converter = charset_converter || @@charset_converter
|
21
|
+
end
|
22
|
+
attr_reader :charset
|
23
|
+
attr_reader :raw
|
24
|
+
|
25
|
+
def conv_charset(to_charset)
|
26
|
+
if @charset and to_charset
|
27
|
+
@charset_converter.call @charset, to_charset, self
|
28
|
+
else
|
29
|
+
self
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
module_function
|
35
|
+
|
36
|
+
def decode(str, opt=nil)
|
37
|
+
if opt.is_a? Hash
|
38
|
+
charset = opt[:output_charset]
|
39
|
+
charset_converter = opt[:charset_converter]
|
40
|
+
else
|
41
|
+
charset = opt
|
42
|
+
end
|
43
|
+
last_charset = nil
|
44
|
+
ret = ""
|
45
|
+
split_decode(str, charset_converter).each do |s|
|
46
|
+
begin
|
47
|
+
s2 = charset && s.charset ? s.conv_charset(charset) : s
|
48
|
+
cs = s.charset
|
49
|
+
rescue Iconv::Failure
|
50
|
+
s2 = s.raw
|
51
|
+
cs = nil
|
52
|
+
end
|
53
|
+
ret << " " if last_charset.nil? or cs.nil?
|
54
|
+
ret << s2
|
55
|
+
last_charset = cs
|
56
|
+
end
|
57
|
+
return ret.strip
|
58
|
+
end
|
59
|
+
|
60
|
+
def split_decode(str, charset_converter=nil)
|
61
|
+
ret = []
|
62
|
+
while str =~ /\=\?([^\(\)\<\>\@\,\;\:\"\/\[\]\?\.\=]+)\?([QB])\?([^\? ]+)\?\=/ni do
|
63
|
+
raw = $&
|
64
|
+
pre, charset, encoding, enc_text, after = $`, $1.downcase, $2.downcase, $3, $'
|
65
|
+
ret << String.new(pre.strip) unless pre.strip.empty?
|
66
|
+
s = encoding == "q" ? q_decode(enc_text) : b_decode(enc_text)
|
67
|
+
ret << String.new(s, charset, raw, charset_converter)
|
68
|
+
str = after
|
69
|
+
end
|
70
|
+
ret << String.new(str.strip) unless str.empty?
|
71
|
+
return ret
|
72
|
+
end
|
73
|
+
|
74
|
+
def q_decode(str)
|
75
|
+
return str.gsub(/_/," ").gsub(/=\s*?$/,"=").unpack("M")[0]
|
76
|
+
end
|
77
|
+
|
78
|
+
def b_decode(str)
|
79
|
+
return str.gsub(/[^A-Z0-9\+\/=]/i,"").unpack("m")[0]
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# Copyright (C) 2006-2010 TOMITA Masahiro
|
2
|
+
# mailto:tommy@tmtm.org
|
3
|
+
|
4
|
+
require "mailparser/error"
|
5
|
+
require "mailparser/rfc2183/parser"
|
6
|
+
|
7
|
+
module MailParser::RFC2183
|
8
|
+
HEADER_TYPE = {
|
9
|
+
"content-disposition" => :CONTENT_DISPOSITION,
|
10
|
+
}
|
11
|
+
|
12
|
+
class ContentDisposition
|
13
|
+
def initialize(type, params)
|
14
|
+
@type, @params = type.downcase, params
|
15
|
+
end
|
16
|
+
|
17
|
+
attr_reader :type, :params
|
18
|
+
end
|
19
|
+
|
20
|
+
module_function
|
21
|
+
|
22
|
+
def parse(name, value, opt={})
|
23
|
+
htype = HEADER_TYPE[name.downcase]
|
24
|
+
unless htype then
|
25
|
+
return value.chomp
|
26
|
+
end
|
27
|
+
if htype.is_a? Array then
|
28
|
+
htype[0]::Parser.new.parse(htype[1], value)
|
29
|
+
else
|
30
|
+
Parser.new.parse(htype, value)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|