mailparser 0.4.22a
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY +141 -0
- data/README.txt +501 -0
- data/lib/mailparser.rb +558 -0
- data/lib/mailparser/conv_charset.rb +27 -0
- data/lib/mailparser/error.rb +7 -0
- data/lib/mailparser/loose.rb +292 -0
- data/lib/mailparser/obsolete.rb +403 -0
- data/lib/mailparser/rfc2045.rb +54 -0
- data/lib/mailparser/rfc2045/parser.rb +245 -0
- data/lib/mailparser/rfc2045/scanner.rb +54 -0
- data/lib/mailparser/rfc2047.rb +82 -0
- data/lib/mailparser/rfc2183.rb +33 -0
- data/lib/mailparser/rfc2183/parser.rb +186 -0
- data/lib/mailparser/rfc2183/scanner.rb +7 -0
- data/lib/mailparser/rfc2231.rb +57 -0
- data/lib/mailparser/rfc2822.rb +212 -0
- data/lib/mailparser/rfc2822/parser.rb +883 -0
- data/lib/mailparser/rfc2822/scanner.rb +119 -0
- data/test.rb +26 -0
- data/test/test_loose.rb +371 -0
- data/test/test_mailparser.rb +1130 -0
- data/test/test_obsolete.rb +615 -0
- data/test/test_rfc2045.rb +121 -0
- data/test/test_rfc2047.rb +118 -0
- data/test/test_rfc2183.rb +60 -0
- data/test/test_rfc2231.rb +167 -0
- data/test/test_rfc2822.rb +370 -0
- metadata +81 -0
@@ -0,0 +1,186 @@
|
|
1
|
+
#
|
2
|
+
# DO NOT MODIFY!!!!
|
3
|
+
# This file is automatically generated by Racc 1.4.7
|
4
|
+
# from Racc grammer file "".
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'racc/parser.rb'
|
8
|
+
module MailParser
|
9
|
+
module RFC2183
|
10
|
+
class Parser < Racc::Parser
|
11
|
+
|
12
|
+
module_eval(<<'...end parser.y/module_eval...', 'parser.y', 42)
|
13
|
+
|
14
|
+
require "mailparser/rfc2183/scanner"
|
15
|
+
|
16
|
+
def parse(header_type, value)
|
17
|
+
@header_type = header_type
|
18
|
+
@value = value
|
19
|
+
@scanner = Scanner.new(header_type, value)
|
20
|
+
ret = yyparse(self, :parse_sub)
|
21
|
+
class << ret
|
22
|
+
attr_accessor :comments
|
23
|
+
end
|
24
|
+
ret.comments = @scanner.comments
|
25
|
+
ret
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse_sub(&block)
|
29
|
+
yield @header_type, nil
|
30
|
+
@scanner.scan(&block)
|
31
|
+
end
|
32
|
+
|
33
|
+
def on_error(t, val, vstack)
|
34
|
+
# p t, val, vstack
|
35
|
+
# p racc_token2str(t)
|
36
|
+
raise MailParser::ParseError, val+@scanner.rest
|
37
|
+
end
|
38
|
+
...end parser.y/module_eval...
|
39
|
+
##### State transition tables begin ###
|
40
|
+
|
41
|
+
racc_action_table = [
|
42
|
+
15, 9, 6, 16, 7, 3, 12, 13, 2 ]
|
43
|
+
|
44
|
+
racc_action_check = [
|
45
|
+
13, 8, 2, 13, 3, 1, 9, 11, 0 ]
|
46
|
+
|
47
|
+
racc_action_pointer = [
|
48
|
+
6, 5, -1, 4, nil, nil, nil, nil, -3, 3,
|
49
|
+
nil, 2, nil, -3, nil, nil, nil ]
|
50
|
+
|
51
|
+
racc_action_default = [
|
52
|
+
-10, -10, -10, -10, -1, -4, -3, 17, -2, -10,
|
53
|
+
-5, -10, -7, -10, -6, -8, -9 ]
|
54
|
+
|
55
|
+
racc_goto_table = [
|
56
|
+
1, 4, 5, 8, 10, 11, 14 ]
|
57
|
+
|
58
|
+
racc_goto_check = [
|
59
|
+
1, 2, 3, 4, 5, 6, 7 ]
|
60
|
+
|
61
|
+
racc_goto_pointer = [
|
62
|
+
nil, 0, -1, 0, -2, -5, -4, -7 ]
|
63
|
+
|
64
|
+
racc_goto_default = [
|
65
|
+
nil, nil, nil, nil, nil, nil, nil, nil ]
|
66
|
+
|
67
|
+
racc_reduce_table = [
|
68
|
+
0, 0, :racc_error,
|
69
|
+
2, 8, :_reduce_1,
|
70
|
+
2, 9, :_reduce_2,
|
71
|
+
1, 10, :_reduce_none,
|
72
|
+
0, 11, :_reduce_4,
|
73
|
+
3, 11, :_reduce_5,
|
74
|
+
3, 12, :_reduce_6,
|
75
|
+
1, 13, :_reduce_none,
|
76
|
+
1, 14, :_reduce_none,
|
77
|
+
1, 14, :_reduce_none ]
|
78
|
+
|
79
|
+
racc_reduce_n = 10
|
80
|
+
|
81
|
+
racc_shift_n = 17
|
82
|
+
|
83
|
+
racc_token_table = {
|
84
|
+
false => 0,
|
85
|
+
:error => 1,
|
86
|
+
:CONTENT_DISPOSITION => 2,
|
87
|
+
:TOKEN => 3,
|
88
|
+
";" => 4,
|
89
|
+
"=" => 5,
|
90
|
+
:QUOTED_STRING => 6 }
|
91
|
+
|
92
|
+
racc_nt_base = 7
|
93
|
+
|
94
|
+
racc_use_result_var = false
|
95
|
+
|
96
|
+
Racc_arg = [
|
97
|
+
racc_action_table,
|
98
|
+
racc_action_check,
|
99
|
+
racc_action_default,
|
100
|
+
racc_action_pointer,
|
101
|
+
racc_goto_table,
|
102
|
+
racc_goto_check,
|
103
|
+
racc_goto_default,
|
104
|
+
racc_goto_pointer,
|
105
|
+
racc_nt_base,
|
106
|
+
racc_reduce_table,
|
107
|
+
racc_token_table,
|
108
|
+
racc_shift_n,
|
109
|
+
racc_reduce_n,
|
110
|
+
racc_use_result_var ]
|
111
|
+
|
112
|
+
Racc_token_to_s_table = [
|
113
|
+
"$end",
|
114
|
+
"error",
|
115
|
+
"CONTENT_DISPOSITION",
|
116
|
+
"TOKEN",
|
117
|
+
"\";\"",
|
118
|
+
"\"=\"",
|
119
|
+
"QUOTED_STRING",
|
120
|
+
"$start",
|
121
|
+
"all",
|
122
|
+
"content_disposition",
|
123
|
+
"type",
|
124
|
+
"parameter_list",
|
125
|
+
"parameter",
|
126
|
+
"attribute",
|
127
|
+
"value" ]
|
128
|
+
|
129
|
+
Racc_debug_parser = false
|
130
|
+
|
131
|
+
##### State transition tables end #####
|
132
|
+
|
133
|
+
# reduce 0 omitted
|
134
|
+
|
135
|
+
module_eval(<<'.,.,', 'parser.y', 9)
|
136
|
+
def _reduce_1(val, _values)
|
137
|
+
val[1]
|
138
|
+
end
|
139
|
+
.,.,
|
140
|
+
|
141
|
+
module_eval(<<'.,.,', 'parser.y', 13)
|
142
|
+
def _reduce_2(val, _values)
|
143
|
+
ContentDisposition.new(val[0], val[1])
|
144
|
+
|
145
|
+
end
|
146
|
+
.,.,
|
147
|
+
|
148
|
+
# reduce 3 omitted
|
149
|
+
|
150
|
+
module_eval(<<'.,.,', 'parser.y', 20)
|
151
|
+
def _reduce_4(val, _values)
|
152
|
+
{}
|
153
|
+
|
154
|
+
end
|
155
|
+
.,.,
|
156
|
+
|
157
|
+
module_eval(<<'.,.,', 'parser.y', 24)
|
158
|
+
def _reduce_5(val, _values)
|
159
|
+
pn, pv = val[2]
|
160
|
+
pv = $1 if pv =~ /\A\"(.*)\"\Z/m
|
161
|
+
val[0][pn] = pv.gsub(/\s*\n\s*/, " ")
|
162
|
+
val[0]
|
163
|
+
|
164
|
+
end
|
165
|
+
.,.,
|
166
|
+
|
167
|
+
module_eval(<<'.,.,', 'parser.y', 32)
|
168
|
+
def _reduce_6(val, _values)
|
169
|
+
[val[0].downcase, val[2]]
|
170
|
+
|
171
|
+
end
|
172
|
+
.,.,
|
173
|
+
|
174
|
+
# reduce 7 omitted
|
175
|
+
|
176
|
+
# reduce 8 omitted
|
177
|
+
|
178
|
+
# reduce 9 omitted
|
179
|
+
|
180
|
+
def _reduce_none(val, _values)
|
181
|
+
val[0]
|
182
|
+
end
|
183
|
+
|
184
|
+
end # class Parser
|
185
|
+
end # module RFC2183
|
186
|
+
end # module MailParser
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# Copyright (C) 2006-2010 TOMITA Masahiro
|
2
|
+
# mailto:tommy@tmtm.org
|
3
|
+
|
4
|
+
require "mailparser/error"
|
5
|
+
|
6
|
+
module MailParser::RFC2231
|
7
|
+
module_function
|
8
|
+
def parse_param(params, opt={:strict=>true})
|
9
|
+
opt = {:strict=>opt} unless opt.is_a? Hash
|
10
|
+
newparams = {}
|
11
|
+
h = Hash.new{|h,k| h[k] = []}
|
12
|
+
char_lang = {}
|
13
|
+
params.each do |key, value|
|
14
|
+
case key
|
15
|
+
when /^([^\*]+)(\*0)?\*$/no
|
16
|
+
name, ord = $1, $2
|
17
|
+
char, lang, v = value.split(/\'/, 3)
|
18
|
+
char_lang[name] = [char, lang]
|
19
|
+
if v.nil? then
|
20
|
+
raise MailParser::ParseError, "#{key}=#{value}" if opt[:strict]
|
21
|
+
v = lang || char
|
22
|
+
end
|
23
|
+
v = v.gsub(/%([0-9A-F][0-9A-F])/ni){$1.hex.chr}
|
24
|
+
if ord then
|
25
|
+
h[name] << [0, v]
|
26
|
+
else
|
27
|
+
newparams[name] = v
|
28
|
+
end
|
29
|
+
when /^([^\*]+)\*([1-9]\d*)\*$/no
|
30
|
+
name, ord = $1, $2.to_i
|
31
|
+
v = value.gsub(/%([0-9A-F][0-9A-F])/ni){$1.hex.chr}
|
32
|
+
h[name] << [ord, v]
|
33
|
+
when /^([^\*]+)\*([0-9]\d*)$/no
|
34
|
+
name, ord = $1, $2.to_i
|
35
|
+
h[name] << [ord, value]
|
36
|
+
else
|
37
|
+
newparams[key] = value
|
38
|
+
end
|
39
|
+
end
|
40
|
+
h.each do |k, v|
|
41
|
+
newparams[k] = v.sort{|a,b| a[0]<=>b[0]}.map{|a| a[1]}.join
|
42
|
+
end
|
43
|
+
newparams.keys.each do |k|
|
44
|
+
v = newparams[k]
|
45
|
+
if char_lang.key? k and opt[:output_charset]
|
46
|
+
charset_converter = opt[:charset_converter] || Proc.new{|f,t,s| ConvCharset.conv_charset(f,t,s)}
|
47
|
+
v.replace charset_converter.call(char_lang[k][0], opt[:output_charset], v) rescue nil
|
48
|
+
end
|
49
|
+
class << v
|
50
|
+
attr_accessor :charset, :language
|
51
|
+
end
|
52
|
+
v.charset, v.language = char_lang[k] if char_lang.key? k
|
53
|
+
newparams[k] = v
|
54
|
+
end
|
55
|
+
return newparams
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,212 @@
|
|
1
|
+
# Copyright (C) 2006-2010 TOMITA Masahiro
|
2
|
+
# mailto:tommy@tmtm.org
|
3
|
+
|
4
|
+
require "mailparser/error"
|
5
|
+
require "mailparser/rfc2822/parser"
|
6
|
+
|
7
|
+
module MailParser::RFC2822
|
8
|
+
HEADER_TYPE = {
|
9
|
+
"date" => :DATE_TIME,
|
10
|
+
"from" => :MAILBOX_LIST,
|
11
|
+
"sender" => :MAILBOX,
|
12
|
+
"reply-to" => :ADDRESS_LIST,
|
13
|
+
"to" => :ADDRESS_LIST,
|
14
|
+
"cc" => :ADDRESS_LIST,
|
15
|
+
"bcc" => :ADDRESS_LIST_BCC,
|
16
|
+
"message-id" => :MSG_ID,
|
17
|
+
"in-reply-to" => :PHRASE_MSG_ID_LIST,
|
18
|
+
"references" => :PHRASE_MSG_ID_LIST,
|
19
|
+
# "subject" => :UNSTRUCTURED,
|
20
|
+
# "comments" => :UNSTRUCTURED,
|
21
|
+
"keywords" => :PHRASE_LIST,
|
22
|
+
"resent-date" => :DATE_TIME,
|
23
|
+
"resent-from" => :MAILBOX_LIST,
|
24
|
+
"resent-sender" => :MAILBOX,
|
25
|
+
"resent-to" => :ADDRESS_LIST,
|
26
|
+
"resent-cc" => :ADDRESS_LIST,
|
27
|
+
"resent-bcc" => :ADDRESS_LIST_BCC,
|
28
|
+
"resent-message-id" => :MSG_ID,
|
29
|
+
"return-path" => :RETURN_PATH,
|
30
|
+
"received" => :RECEIVED,
|
31
|
+
}
|
32
|
+
|
33
|
+
ZONE = {
|
34
|
+
"UT" => "+0000",
|
35
|
+
"GMT" => "+0000",
|
36
|
+
"EDT" => "-0400",
|
37
|
+
"EST" => "-0500",
|
38
|
+
"CDT" => "-0500",
|
39
|
+
"EDT" => "-0400",
|
40
|
+
"EST" => "-0500",
|
41
|
+
"CDT" => "-0500",
|
42
|
+
"CST" => "-0600",
|
43
|
+
"MDT" => "-0600",
|
44
|
+
"MST" => "-0700",
|
45
|
+
"PDT" => "-0700",
|
46
|
+
"PST" => "-0800",
|
47
|
+
"A" => "+0100",
|
48
|
+
"B" => "+0200",
|
49
|
+
"C" => "+0300",
|
50
|
+
"D" => "+0400",
|
51
|
+
"E" => "+0500",
|
52
|
+
"F" => "+0600",
|
53
|
+
"G" => "+0700",
|
54
|
+
"H" => "+0800",
|
55
|
+
"I" => "+0900",
|
56
|
+
"K" => "+1000",
|
57
|
+
"L" => "+1100",
|
58
|
+
"M" => "+1200",
|
59
|
+
"N" => "-0100",
|
60
|
+
"O" => "-0200",
|
61
|
+
"P" => "-0300",
|
62
|
+
"Q" => "-0400",
|
63
|
+
"R" => "-0500",
|
64
|
+
"S" => "-0600",
|
65
|
+
"T" => "-0700",
|
66
|
+
"U" => "-0800",
|
67
|
+
"V" => "-0900",
|
68
|
+
"W" => "-1000",
|
69
|
+
"X" => "-1100",
|
70
|
+
"Y" => "-1200",
|
71
|
+
"Z" => "+0000",
|
72
|
+
"JST" => "+0900",
|
73
|
+
}
|
74
|
+
|
75
|
+
class AddrSpec
|
76
|
+
def initialize(local_part, domain)
|
77
|
+
@local_part = local_part
|
78
|
+
@domain = domain
|
79
|
+
end
|
80
|
+
attr_reader :local_part, :domain
|
81
|
+
def to_s
|
82
|
+
"#{@local_part}@#{@domain}"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
class Mailbox
|
87
|
+
def initialize(addr_spec, display_name=nil)
|
88
|
+
@addr_spec = addr_spec
|
89
|
+
@display_name = display_name || ""
|
90
|
+
@comments = []
|
91
|
+
end
|
92
|
+
attr_reader :addr_spec, :display_name
|
93
|
+
attr_accessor :comments
|
94
|
+
alias :phrase :display_name
|
95
|
+
def local_part()
|
96
|
+
@addr_spec.local_part
|
97
|
+
end
|
98
|
+
def domain()
|
99
|
+
@addr_spec.domain
|
100
|
+
end
|
101
|
+
def to_s()
|
102
|
+
if display_name.empty? then
|
103
|
+
"<#{@addr_spec}>"
|
104
|
+
else
|
105
|
+
"#{@display_name} <#{@addr_spec}>"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
class Group
|
111
|
+
def initialize(mailbox_list, display_name)
|
112
|
+
@mailbox_list = mailbox_list
|
113
|
+
@display_name = display_name
|
114
|
+
end
|
115
|
+
attr_reader :mailbox_list, :display_name
|
116
|
+
alias :phrase :display_name
|
117
|
+
def to_s()
|
118
|
+
"#{@display_name}:#{@mailbox_list.join(",")};"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
class ReturnPath
|
123
|
+
def initialize(addr_spec=nil)
|
124
|
+
@addr_spec = addr_spec
|
125
|
+
end
|
126
|
+
attr_reader :addr_spec
|
127
|
+
end
|
128
|
+
|
129
|
+
class MsgIdList < Array
|
130
|
+
def initialize(val=nil)
|
131
|
+
self << val if val
|
132
|
+
end
|
133
|
+
def to_s()
|
134
|
+
self.map{|i| i.to_s}.join(" ")
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
class MsgId
|
139
|
+
def initialize(msg_id)
|
140
|
+
@msg_id = msg_id
|
141
|
+
end
|
142
|
+
attr_reader :msg_id
|
143
|
+
def to_s()
|
144
|
+
"<#{@msg_id}>"
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
class Received
|
149
|
+
def initialize(name_val, date_time)
|
150
|
+
@name_val, @date_time = name_val, date_time
|
151
|
+
end
|
152
|
+
attr_reader :name_val, :date_time
|
153
|
+
end
|
154
|
+
|
155
|
+
class DateTime
|
156
|
+
def self.now
|
157
|
+
t = Time.now
|
158
|
+
self.new(t.year, t.month, t.day, t.hour, t.min, t.sec, t.zone)
|
159
|
+
end
|
160
|
+
|
161
|
+
def initialize(year, month, day, hour, min, sec, zone)
|
162
|
+
y, m, d, h, mi, s = year.to_i, month.to_i, day.to_i, hour.to_i, min.to_i, sec.to_i
|
163
|
+
raise ArgumentError, "invalid year" if y < 0 or 9999 < y
|
164
|
+
raise ArgumentError, "invalid month" if m < 1 or 12 < m
|
165
|
+
raise ArgumentError, "invalid day of the month" if d < 1
|
166
|
+
if [1,3,5,7,8,10,12].include? m
|
167
|
+
raise ArgumentError, "invalid day of the month" if d > 31
|
168
|
+
elsif [4,6,9,11].include? m
|
169
|
+
raise ArgumentError, "invalid day of the month" if d > 30
|
170
|
+
else # month == 2
|
171
|
+
if y%4 == 0 and (y%100 !=0 or y%400 == 0)
|
172
|
+
raise ArgumentError, "invalid day of the month" if d > 29
|
173
|
+
else
|
174
|
+
raise ArgumentError, "invalid day of the month" if d > 28
|
175
|
+
end
|
176
|
+
end
|
177
|
+
raise ArgumentError, "invalid hour" if h > 23
|
178
|
+
raise ArgumentError, "invalid minute" if mi > 59
|
179
|
+
raise ArgumentError, "invalid second" if s > 60
|
180
|
+
if zone =~ /^[+-]\d\d(\d\d)$/ then
|
181
|
+
raise ArgumentError, "invalid zone" if $1.to_i > 59
|
182
|
+
else
|
183
|
+
zone = ZONE[zone.upcase] || "-0000"
|
184
|
+
end
|
185
|
+
@year, @month, @day, @hour, @min, @sec, @zone = y, m, d, h, mi, s, zone
|
186
|
+
z = zone[1,4].to_i
|
187
|
+
@zone_sec = z/100*3600 + z%100*60
|
188
|
+
@zone_sec = -@zone_sec if zone[0] == ?-
|
189
|
+
end
|
190
|
+
|
191
|
+
attr_reader :year, :month, :day, :hour, :min, :sec, :zone
|
192
|
+
|
193
|
+
def time()
|
194
|
+
t = Time.utc(@year, @month, @day, @hour, @min, @sec)
|
195
|
+
Time.at(t.to_i - @zone_sec)
|
196
|
+
end
|
197
|
+
end
|
198
|
+
|
199
|
+
module_function
|
200
|
+
|
201
|
+
def parse(name, value, opt={})
|
202
|
+
htype = HEADER_TYPE[name.downcase]
|
203
|
+
unless htype then
|
204
|
+
return value.chomp
|
205
|
+
end
|
206
|
+
if htype.is_a? Array then
|
207
|
+
htype[0]::Parser.new(opt).parse(htype[1], value)
|
208
|
+
else
|
209
|
+
Parser.new(opt).parse(htype, value)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|