mailparser 0.4.22a
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY +141 -0
- data/README.txt +501 -0
- data/lib/mailparser.rb +558 -0
- data/lib/mailparser/conv_charset.rb +27 -0
- data/lib/mailparser/error.rb +7 -0
- data/lib/mailparser/loose.rb +292 -0
- data/lib/mailparser/obsolete.rb +403 -0
- data/lib/mailparser/rfc2045.rb +54 -0
- data/lib/mailparser/rfc2045/parser.rb +245 -0
- data/lib/mailparser/rfc2045/scanner.rb +54 -0
- data/lib/mailparser/rfc2047.rb +82 -0
- data/lib/mailparser/rfc2183.rb +33 -0
- data/lib/mailparser/rfc2183/parser.rb +186 -0
- data/lib/mailparser/rfc2183/scanner.rb +7 -0
- data/lib/mailparser/rfc2231.rb +57 -0
- data/lib/mailparser/rfc2822.rb +212 -0
- data/lib/mailparser/rfc2822/parser.rb +883 -0
- data/lib/mailparser/rfc2822/scanner.rb +119 -0
- data/test.rb +26 -0
- data/test/test_loose.rb +371 -0
- data/test/test_mailparser.rb +1130 -0
- data/test/test_obsolete.rb +615 -0
- data/test/test_rfc2045.rb +121 -0
- data/test/test_rfc2047.rb +118 -0
- data/test/test_rfc2183.rb +60 -0
- data/test/test_rfc2231.rb +167 -0
- data/test/test_rfc2822.rb +370 -0
- metadata +81 -0
@@ -0,0 +1,186 @@
|
|
1
|
+
#
|
2
|
+
# DO NOT MODIFY!!!!
|
3
|
+
# This file is automatically generated by Racc 1.4.7
|
4
|
+
# from Racc grammer file "".
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'racc/parser.rb'
|
8
|
+
module MailParser
|
9
|
+
module RFC2183
|
10
|
+
class Parser < Racc::Parser
|
11
|
+
|
12
|
+
module_eval(<<'...end parser.y/module_eval...', 'parser.y', 42)
|
13
|
+
|
14
|
+
require "mailparser/rfc2183/scanner"
|
15
|
+
|
16
|
+
def parse(header_type, value)
|
17
|
+
@header_type = header_type
|
18
|
+
@value = value
|
19
|
+
@scanner = Scanner.new(header_type, value)
|
20
|
+
ret = yyparse(self, :parse_sub)
|
21
|
+
class << ret
|
22
|
+
attr_accessor :comments
|
23
|
+
end
|
24
|
+
ret.comments = @scanner.comments
|
25
|
+
ret
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse_sub(&block)
|
29
|
+
yield @header_type, nil
|
30
|
+
@scanner.scan(&block)
|
31
|
+
end
|
32
|
+
|
33
|
+
def on_error(t, val, vstack)
|
34
|
+
# p t, val, vstack
|
35
|
+
# p racc_token2str(t)
|
36
|
+
raise MailParser::ParseError, val+@scanner.rest
|
37
|
+
end
|
38
|
+
...end parser.y/module_eval...
|
39
|
+
##### State transition tables begin ###
|
40
|
+
|
41
|
+
racc_action_table = [
|
42
|
+
15, 9, 6, 16, 7, 3, 12, 13, 2 ]
|
43
|
+
|
44
|
+
racc_action_check = [
|
45
|
+
13, 8, 2, 13, 3, 1, 9, 11, 0 ]
|
46
|
+
|
47
|
+
racc_action_pointer = [
|
48
|
+
6, 5, -1, 4, nil, nil, nil, nil, -3, 3,
|
49
|
+
nil, 2, nil, -3, nil, nil, nil ]
|
50
|
+
|
51
|
+
racc_action_default = [
|
52
|
+
-10, -10, -10, -10, -1, -4, -3, 17, -2, -10,
|
53
|
+
-5, -10, -7, -10, -6, -8, -9 ]
|
54
|
+
|
55
|
+
racc_goto_table = [
|
56
|
+
1, 4, 5, 8, 10, 11, 14 ]
|
57
|
+
|
58
|
+
racc_goto_check = [
|
59
|
+
1, 2, 3, 4, 5, 6, 7 ]
|
60
|
+
|
61
|
+
racc_goto_pointer = [
|
62
|
+
nil, 0, -1, 0, -2, -5, -4, -7 ]
|
63
|
+
|
64
|
+
racc_goto_default = [
|
65
|
+
nil, nil, nil, nil, nil, nil, nil, nil ]
|
66
|
+
|
67
|
+
racc_reduce_table = [
|
68
|
+
0, 0, :racc_error,
|
69
|
+
2, 8, :_reduce_1,
|
70
|
+
2, 9, :_reduce_2,
|
71
|
+
1, 10, :_reduce_none,
|
72
|
+
0, 11, :_reduce_4,
|
73
|
+
3, 11, :_reduce_5,
|
74
|
+
3, 12, :_reduce_6,
|
75
|
+
1, 13, :_reduce_none,
|
76
|
+
1, 14, :_reduce_none,
|
77
|
+
1, 14, :_reduce_none ]
|
78
|
+
|
79
|
+
racc_reduce_n = 10
|
80
|
+
|
81
|
+
racc_shift_n = 17
|
82
|
+
|
83
|
+
racc_token_table = {
|
84
|
+
false => 0,
|
85
|
+
:error => 1,
|
86
|
+
:CONTENT_DISPOSITION => 2,
|
87
|
+
:TOKEN => 3,
|
88
|
+
";" => 4,
|
89
|
+
"=" => 5,
|
90
|
+
:QUOTED_STRING => 6 }
|
91
|
+
|
92
|
+
racc_nt_base = 7
|
93
|
+
|
94
|
+
racc_use_result_var = false
|
95
|
+
|
96
|
+
Racc_arg = [
|
97
|
+
racc_action_table,
|
98
|
+
racc_action_check,
|
99
|
+
racc_action_default,
|
100
|
+
racc_action_pointer,
|
101
|
+
racc_goto_table,
|
102
|
+
racc_goto_check,
|
103
|
+
racc_goto_default,
|
104
|
+
racc_goto_pointer,
|
105
|
+
racc_nt_base,
|
106
|
+
racc_reduce_table,
|
107
|
+
racc_token_table,
|
108
|
+
racc_shift_n,
|
109
|
+
racc_reduce_n,
|
110
|
+
racc_use_result_var ]
|
111
|
+
|
112
|
+
Racc_token_to_s_table = [
|
113
|
+
"$end",
|
114
|
+
"error",
|
115
|
+
"CONTENT_DISPOSITION",
|
116
|
+
"TOKEN",
|
117
|
+
"\";\"",
|
118
|
+
"\"=\"",
|
119
|
+
"QUOTED_STRING",
|
120
|
+
"$start",
|
121
|
+
"all",
|
122
|
+
"content_disposition",
|
123
|
+
"type",
|
124
|
+
"parameter_list",
|
125
|
+
"parameter",
|
126
|
+
"attribute",
|
127
|
+
"value" ]
|
128
|
+
|
129
|
+
Racc_debug_parser = false
|
130
|
+
|
131
|
+
##### State transition tables end #####
|
132
|
+
|
133
|
+
# reduce 0 omitted
|
134
|
+
|
135
|
+
module_eval(<<'.,.,', 'parser.y', 9)
|
136
|
+
def _reduce_1(val, _values)
|
137
|
+
val[1]
|
138
|
+
end
|
139
|
+
.,.,
|
140
|
+
|
141
|
+
module_eval(<<'.,.,', 'parser.y', 13)
|
142
|
+
def _reduce_2(val, _values)
|
143
|
+
ContentDisposition.new(val[0], val[1])
|
144
|
+
|
145
|
+
end
|
146
|
+
.,.,
|
147
|
+
|
148
|
+
# reduce 3 omitted
|
149
|
+
|
150
|
+
module_eval(<<'.,.,', 'parser.y', 20)
|
151
|
+
def _reduce_4(val, _values)
|
152
|
+
{}
|
153
|
+
|
154
|
+
end
|
155
|
+
.,.,
|
156
|
+
|
157
|
+
module_eval(<<'.,.,', 'parser.y', 24)
|
158
|
+
def _reduce_5(val, _values)
|
159
|
+
pn, pv = val[2]
|
160
|
+
pv = $1 if pv =~ /\A\"(.*)\"\Z/m
|
161
|
+
val[0][pn] = pv.gsub(/\s*\n\s*/, " ")
|
162
|
+
val[0]
|
163
|
+
|
164
|
+
end
|
165
|
+
.,.,
|
166
|
+
|
167
|
+
module_eval(<<'.,.,', 'parser.y', 32)
|
168
|
+
def _reduce_6(val, _values)
|
169
|
+
[val[0].downcase, val[2]]
|
170
|
+
|
171
|
+
end
|
172
|
+
.,.,
|
173
|
+
|
174
|
+
# reduce 7 omitted
|
175
|
+
|
176
|
+
# reduce 8 omitted
|
177
|
+
|
178
|
+
# reduce 9 omitted
|
179
|
+
|
180
|
+
def _reduce_none(val, _values)
|
181
|
+
val[0]
|
182
|
+
end
|
183
|
+
|
184
|
+
end # class Parser
|
185
|
+
end # module RFC2183
|
186
|
+
end # module MailParser
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# Copyright (C) 2006-2010 TOMITA Masahiro
|
2
|
+
# mailto:tommy@tmtm.org
|
3
|
+
|
4
|
+
require "mailparser/error"
|
5
|
+
|
6
|
+
module MailParser::RFC2231
|
7
|
+
module_function
|
8
|
+
def parse_param(params, opt={:strict=>true})
|
9
|
+
opt = {:strict=>opt} unless opt.is_a? Hash
|
10
|
+
newparams = {}
|
11
|
+
h = Hash.new{|h,k| h[k] = []}
|
12
|
+
char_lang = {}
|
13
|
+
params.each do |key, value|
|
14
|
+
case key
|
15
|
+
when /^([^\*]+)(\*0)?\*$/no
|
16
|
+
name, ord = $1, $2
|
17
|
+
char, lang, v = value.split(/\'/, 3)
|
18
|
+
char_lang[name] = [char, lang]
|
19
|
+
if v.nil? then
|
20
|
+
raise MailParser::ParseError, "#{key}=#{value}" if opt[:strict]
|
21
|
+
v = lang || char
|
22
|
+
end
|
23
|
+
v = v.gsub(/%([0-9A-F][0-9A-F])/ni){$1.hex.chr}
|
24
|
+
if ord then
|
25
|
+
h[name] << [0, v]
|
26
|
+
else
|
27
|
+
newparams[name] = v
|
28
|
+
end
|
29
|
+
when /^([^\*]+)\*([1-9]\d*)\*$/no
|
30
|
+
name, ord = $1, $2.to_i
|
31
|
+
v = value.gsub(/%([0-9A-F][0-9A-F])/ni){$1.hex.chr}
|
32
|
+
h[name] << [ord, v]
|
33
|
+
when /^([^\*]+)\*([0-9]\d*)$/no
|
34
|
+
name, ord = $1, $2.to_i
|
35
|
+
h[name] << [ord, value]
|
36
|
+
else
|
37
|
+
newparams[key] = value
|
38
|
+
end
|
39
|
+
end
|
40
|
+
h.each do |k, v|
|
41
|
+
newparams[k] = v.sort{|a,b| a[0]<=>b[0]}.map{|a| a[1]}.join
|
42
|
+
end
|
43
|
+
newparams.keys.each do |k|
|
44
|
+
v = newparams[k]
|
45
|
+
if char_lang.key? k and opt[:output_charset]
|
46
|
+
charset_converter = opt[:charset_converter] || Proc.new{|f,t,s| ConvCharset.conv_charset(f,t,s)}
|
47
|
+
v.replace charset_converter.call(char_lang[k][0], opt[:output_charset], v) rescue nil
|
48
|
+
end
|
49
|
+
class << v
|
50
|
+
attr_accessor :charset, :language
|
51
|
+
end
|
52
|
+
v.charset, v.language = char_lang[k] if char_lang.key? k
|
53
|
+
newparams[k] = v
|
54
|
+
end
|
55
|
+
return newparams
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,212 @@
|
|
1
|
+
# Copyright (C) 2006-2010 TOMITA Masahiro
|
2
|
+
# mailto:tommy@tmtm.org
|
3
|
+
|
4
|
+
require "mailparser/error"
|
5
|
+
require "mailparser/rfc2822/parser"
|
6
|
+
|
7
|
+
module MailParser::RFC2822
|
8
|
+
HEADER_TYPE = {
|
9
|
+
"date" => :DATE_TIME,
|
10
|
+
"from" => :MAILBOX_LIST,
|
11
|
+
"sender" => :MAILBOX,
|
12
|
+
"reply-to" => :ADDRESS_LIST,
|
13
|
+
"to" => :ADDRESS_LIST,
|
14
|
+
"cc" => :ADDRESS_LIST,
|
15
|
+
"bcc" => :ADDRESS_LIST_BCC,
|
16
|
+
"message-id" => :MSG_ID,
|
17
|
+
"in-reply-to" => :PHRASE_MSG_ID_LIST,
|
18
|
+
"references" => :PHRASE_MSG_ID_LIST,
|
19
|
+
# "subject" => :UNSTRUCTURED,
|
20
|
+
# "comments" => :UNSTRUCTURED,
|
21
|
+
"keywords" => :PHRASE_LIST,
|
22
|
+
"resent-date" => :DATE_TIME,
|
23
|
+
"resent-from" => :MAILBOX_LIST,
|
24
|
+
"resent-sender" => :MAILBOX,
|
25
|
+
"resent-to" => :ADDRESS_LIST,
|
26
|
+
"resent-cc" => :ADDRESS_LIST,
|
27
|
+
"resent-bcc" => :ADDRESS_LIST_BCC,
|
28
|
+
"resent-message-id" => :MSG_ID,
|
29
|
+
"return-path" => :RETURN_PATH,
|
30
|
+
"received" => :RECEIVED,
|
31
|
+
}
|
32
|
+
|
33
|
+
ZONE = {
|
34
|
+
"UT" => "+0000",
|
35
|
+
"GMT" => "+0000",
|
36
|
+
"EDT" => "-0400",
|
37
|
+
"EST" => "-0500",
|
38
|
+
"CDT" => "-0500",
|
39
|
+
"EDT" => "-0400",
|
40
|
+
"EST" => "-0500",
|
41
|
+
"CDT" => "-0500",
|
42
|
+
"CST" => "-0600",
|
43
|
+
"MDT" => "-0600",
|
44
|
+
"MST" => "-0700",
|
45
|
+
"PDT" => "-0700",
|
46
|
+
"PST" => "-0800",
|
47
|
+
"A" => "+0100",
|
48
|
+
"B" => "+0200",
|
49
|
+
"C" => "+0300",
|
50
|
+
"D" => "+0400",
|
51
|
+
"E" => "+0500",
|
52
|
+
"F" => "+0600",
|
53
|
+
"G" => "+0700",
|
54
|
+
"H" => "+0800",
|
55
|
+
"I" => "+0900",
|
56
|
+
"K" => "+1000",
|
57
|
+
"L" => "+1100",
|
58
|
+
"M" => "+1200",
|
59
|
+
"N" => "-0100",
|
60
|
+
"O" => "-0200",
|
61
|
+
"P" => "-0300",
|
62
|
+
"Q" => "-0400",
|
63
|
+
"R" => "-0500",
|
64
|
+
"S" => "-0600",
|
65
|
+
"T" => "-0700",
|
66
|
+
"U" => "-0800",
|
67
|
+
"V" => "-0900",
|
68
|
+
"W" => "-1000",
|
69
|
+
"X" => "-1100",
|
70
|
+
"Y" => "-1200",
|
71
|
+
"Z" => "+0000",
|
72
|
+
"JST" => "+0900",
|
73
|
+
}
|
74
|
+
|
75
|
+
class AddrSpec
|
76
|
+
def initialize(local_part, domain)
|
77
|
+
@local_part = local_part
|
78
|
+
@domain = domain
|
79
|
+
end
|
80
|
+
attr_reader :local_part, :domain
|
81
|
+
def to_s
|
82
|
+
"#{@local_part}@#{@domain}"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class Mailbox
|
87
|
+
def initialize(addr_spec, display_name=nil)
|
88
|
+
@addr_spec = addr_spec
|
89
|
+
@display_name = display_name || ""
|
90
|
+
@comments = []
|
91
|
+
end
|
92
|
+
attr_reader :addr_spec, :display_name
|
93
|
+
attr_accessor :comments
|
94
|
+
alias :phrase :display_name
|
95
|
+
def local_part()
|
96
|
+
@addr_spec.local_part
|
97
|
+
end
|
98
|
+
def domain()
|
99
|
+
@addr_spec.domain
|
100
|
+
end
|
101
|
+
def to_s()
|
102
|
+
if display_name.empty? then
|
103
|
+
"<#{@addr_spec}>"
|
104
|
+
else
|
105
|
+
"#{@display_name} <#{@addr_spec}>"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
class Group
|
111
|
+
def initialize(mailbox_list, display_name)
|
112
|
+
@mailbox_list = mailbox_list
|
113
|
+
@display_name = display_name
|
114
|
+
end
|
115
|
+
attr_reader :mailbox_list, :display_name
|
116
|
+
alias :phrase :display_name
|
117
|
+
def to_s()
|
118
|
+
"#{@display_name}:#{@mailbox_list.join(",")};"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
class ReturnPath
|
123
|
+
def initialize(addr_spec=nil)
|
124
|
+
@addr_spec = addr_spec
|
125
|
+
end
|
126
|
+
attr_reader :addr_spec
|
127
|
+
end
|
128
|
+
|
129
|
+
class MsgIdList < Array
|
130
|
+
def initialize(val=nil)
|
131
|
+
self << val if val
|
132
|
+
end
|
133
|
+
def to_s()
|
134
|
+
self.map{|i| i.to_s}.join(" ")
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
class MsgId
|
139
|
+
def initialize(msg_id)
|
140
|
+
@msg_id = msg_id
|
141
|
+
end
|
142
|
+
attr_reader :msg_id
|
143
|
+
def to_s()
|
144
|
+
"<#{@msg_id}>"
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
class Received
|
149
|
+
def initialize(name_val, date_time)
|
150
|
+
@name_val, @date_time = name_val, date_time
|
151
|
+
end
|
152
|
+
attr_reader :name_val, :date_time
|
153
|
+
end
|
154
|
+
|
155
|
+
class DateTime
|
156
|
+
def self.now
|
157
|
+
t = Time.now
|
158
|
+
self.new(t.year, t.month, t.day, t.hour, t.min, t.sec, t.zone)
|
159
|
+
end
|
160
|
+
|
161
|
+
def initialize(year, month, day, hour, min, sec, zone)
|
162
|
+
y, m, d, h, mi, s = year.to_i, month.to_i, day.to_i, hour.to_i, min.to_i, sec.to_i
|
163
|
+
raise ArgumentError, "invalid year" if y < 0 or 9999 < y
|
164
|
+
raise ArgumentError, "invalid month" if m < 1 or 12 < m
|
165
|
+
raise ArgumentError, "invalid day of the month" if d < 1
|
166
|
+
if [1,3,5,7,8,10,12].include? m
|
167
|
+
raise ArgumentError, "invalid day of the month" if d > 31
|
168
|
+
elsif [4,6,9,11].include? m
|
169
|
+
raise ArgumentError, "invalid day of the month" if d > 30
|
170
|
+
else # month == 2
|
171
|
+
if y%4 == 0 and (y%100 !=0 or y%400 == 0)
|
172
|
+
raise ArgumentError, "invalid day of the month" if d > 29
|
173
|
+
else
|
174
|
+
raise ArgumentError, "invalid day of the month" if d > 28
|
175
|
+
end
|
176
|
+
end
|
177
|
+
raise ArgumentError, "invalid hour" if h > 23
|
178
|
+
raise ArgumentError, "invalid minute" if mi > 59
|
179
|
+
raise ArgumentError, "invalid second" if s > 60
|
180
|
+
if zone =~ /^[+-]\d\d(\d\d)$/ then
|
181
|
+
raise ArgumentError, "invalid zone" if $1.to_i > 59
|
182
|
+
else
|
183
|
+
zone = ZONE[zone.upcase] || "-0000"
|
184
|
+
end
|
185
|
+
@year, @month, @day, @hour, @min, @sec, @zone = y, m, d, h, mi, s, zone
|
186
|
+
z = zone[1,4].to_i
|
187
|
+
@zone_sec = z/100*3600 + z%100*60
|
188
|
+
@zone_sec = -@zone_sec if zone[0] == ?-
|
189
|
+
end
|
190
|
+
|
191
|
+
attr_reader :year, :month, :day, :hour, :min, :sec, :zone
|
192
|
+
|
193
|
+
def time()
|
194
|
+
t = Time.utc(@year, @month, @day, @hour, @min, @sec)
|
195
|
+
Time.at(t.to_i - @zone_sec)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
module_function
|
200
|
+
|
201
|
+
def parse(name, value, opt={})
|
202
|
+
htype = HEADER_TYPE[name.downcase]
|
203
|
+
unless htype then
|
204
|
+
return value.chomp
|
205
|
+
end
|
206
|
+
if htype.is_a? Array then
|
207
|
+
htype[0]::Parser.new(opt).parse(htype[1], value)
|
208
|
+
else
|
209
|
+
Parser.new(opt).parse(htype, value)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|