rmail 0.17
Sign up to get free protection for your applications and to get access to all the features.
- data/NEWS +309 -0
- data/NOTES +14 -0
- data/README +83 -0
- data/THANKS +25 -0
- data/TODO +112 -0
- data/guide/Intro.txt +122 -0
- data/guide/MIME.txt +6 -0
- data/guide/TableOfContents.txt +13 -0
- data/install.rb +1023 -0
- data/lib/rmail.rb +50 -0
- data/lib/rmail/address.rb +829 -0
- data/lib/rmail/header.rb +987 -0
- data/lib/rmail/mailbox.rb +62 -0
- data/lib/rmail/mailbox/mboxreader.rb +182 -0
- data/lib/rmail/message.rb +201 -0
- data/lib/rmail/parser.rb +412 -0
- data/lib/rmail/parser/multipart.rb +217 -0
- data/lib/rmail/parser/pushbackreader.rb +173 -0
- data/lib/rmail/serialize.rb +190 -0
- data/lib/rmail/utils.rb +59 -0
- data/rmail.gemspec +17 -0
- data/tests/addrgrammar.txt +113 -0
- data/tests/data/mbox.odd +4 -0
- data/tests/data/mbox.simple +8 -0
- data/tests/data/multipart/data.1 +5 -0
- data/tests/data/multipart/data.10 +1 -0
- data/tests/data/multipart/data.11 +9 -0
- data/tests/data/multipart/data.12 +9 -0
- data/tests/data/multipart/data.13 +3 -0
- data/tests/data/multipart/data.14 +3 -0
- data/tests/data/multipart/data.15 +3 -0
- data/tests/data/multipart/data.16 +3 -0
- data/tests/data/multipart/data.17 +0 -0
- data/tests/data/multipart/data.2 +5 -0
- data/tests/data/multipart/data.3 +2 -0
- data/tests/data/multipart/data.4 +3 -0
- data/tests/data/multipart/data.5 +1 -0
- data/tests/data/multipart/data.6 +2 -0
- data/tests/data/multipart/data.7 +3 -0
- data/tests/data/multipart/data.8 +5 -0
- data/tests/data/multipart/data.9 +4 -0
- data/tests/data/parser.badmime1 +4 -0
- data/tests/data/parser.badmime2 +6 -0
- data/tests/data/parser.nested-multipart +75 -0
- data/tests/data/parser.nested-simple +12 -0
- data/tests/data/parser.nested-simple2 +16 -0
- data/tests/data/parser.nested-simple3 +21 -0
- data/tests/data/parser.rfc822 +65 -0
- data/tests/data/parser.simple-mime +24 -0
- data/tests/data/parser/multipart.1 +8 -0
- data/tests/data/parser/multipart.10 +4 -0
- data/tests/data/parser/multipart.11 +12 -0
- data/tests/data/parser/multipart.12 +12 -0
- data/tests/data/parser/multipart.13 +6 -0
- data/tests/data/parser/multipart.14 +6 -0
- data/tests/data/parser/multipart.15 +6 -0
- data/tests/data/parser/multipart.16 +6 -0
- data/tests/data/parser/multipart.2 +8 -0
- data/tests/data/parser/multipart.3 +5 -0
- data/tests/data/parser/multipart.4 +6 -0
- data/tests/data/parser/multipart.5 +4 -0
- data/tests/data/parser/multipart.6 +5 -0
- data/tests/data/parser/multipart.7 +6 -0
- data/tests/data/parser/multipart.8 +8 -0
- data/tests/data/parser/multipart.9 +7 -0
- data/tests/data/transparency/absolute.1 +5 -0
- data/tests/data/transparency/absolute.2 +1 -0
- data/tests/data/transparency/absolute.3 +2 -0
- data/tests/data/transparency/absolute.4 +3 -0
- data/tests/data/transparency/absolute.5 +4 -0
- data/tests/data/transparency/absolute.6 +49 -0
- data/tests/data/transparency/message.1 +73 -0
- data/tests/data/transparency/message.2 +34 -0
- data/tests/data/transparency/message.3 +63 -0
- data/tests/data/transparency/message.4 +5 -0
- data/tests/data/transparency/message.5 +15 -0
- data/tests/data/transparency/message.6 +1185 -0
- data/tests/runtests.rb +35 -0
- data/tests/testaddress.rb +1192 -0
- data/tests/testbase.rb +207 -0
- data/tests/testheader.rb +1207 -0
- data/tests/testmailbox.rb +47 -0
- data/tests/testmboxreader.rb +161 -0
- data/tests/testmessage.rb +257 -0
- data/tests/testparser.rb +634 -0
- data/tests/testparsermultipart.rb +205 -0
- data/tests/testpushbackreader.rb +40 -0
- data/tests/testserialize.rb +264 -0
- data/tests/testtestbase.rb +112 -0
- data/tests/testtranspparency.rb +105 -0
- metadata +143 -0
@@ -0,0 +1,62 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#--
|
3
|
+
# Copyright (c) 2002, 2003 Matt Armstrong. All rights reserved.
|
4
|
+
#
|
5
|
+
# Redistribution and use in source and binary forms, with or without
|
6
|
+
# modification, are permitted provided that the following conditions are met:
|
7
|
+
#
|
8
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
9
|
+
# this list of conditions and the following disclaimer.
|
10
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
11
|
+
# notice, this list of conditions and the following disclaimer in the
|
12
|
+
# documentation and/or other materials provided with the distribution.
|
13
|
+
# 3. The name of the author may not be used to endorse or promote products
|
14
|
+
# derived from this software without specific prior written permission.
|
15
|
+
#
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
17
|
+
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
18
|
+
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
|
19
|
+
# NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
20
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
21
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
22
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
23
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
24
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
25
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
#
|
27
|
+
#++
|
28
|
+
# Implements the RMail::Mailbox module.
|
29
|
+
|
30
|
+
module RMail
|
31
|
+
|
32
|
+
# The RMail::Mailbox module contains a few methods that are useful
|
33
|
+
# for working with mailboxes.
|
34
|
+
module Mailbox
|
35
|
+
|
36
|
+
class << self
|
37
|
+
|
38
|
+
# Parse a Unix mbox style mailbox. These mailboxes searate
|
39
|
+
# individual messages with a line beginning with the string
|
40
|
+
# "From ".
|
41
|
+
#
|
42
|
+
# If a block is given, yields to the block with the raw message
|
43
|
+
# (a string), otherwise an array of raw message strings is
|
44
|
+
# returned.
|
45
|
+
def parse_mbox(input, line_separator = $/)
|
46
|
+
require 'rmail/mailbox/mboxreader'
|
47
|
+
retval = []
|
48
|
+
RMail::Mailbox::MBoxReader.new(input, line_separator).each_message {
|
49
|
+
|reader|
|
50
|
+
raw_message = reader.read(nil)
|
51
|
+
if block_given?
|
52
|
+
yield raw_message
|
53
|
+
else
|
54
|
+
retval << raw_message
|
55
|
+
end
|
56
|
+
}
|
57
|
+
return block_given? ? nil : retval
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (c) 2002, 2003 Matt Armstrong. All rights reserved.
|
3
|
+
#
|
4
|
+
# Redistribution and use in source and binary forms, with or without
|
5
|
+
# modification, are permitted provided that the following conditions are met:
|
6
|
+
#
|
7
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer.
|
9
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
10
|
+
# notice, this list of conditions and the following disclaimer in the
|
11
|
+
# documentation and/or other materials provided with the distribution.
|
12
|
+
# 3. The name of the author may not be used to endorse or promote products
|
13
|
+
# derived from this software without specific prior written permission.
|
14
|
+
#
|
15
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
16
|
+
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
17
|
+
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
|
18
|
+
# NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
19
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
20
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
21
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
22
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
23
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
24
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
25
|
+
#
|
26
|
+
#++
|
27
|
+
# Implements the RMail::Mailbox::MBoxReader class.
|
28
|
+
|
29
|
+
require 'rmail/parser/pushbackreader'
|
30
|
+
|
31
|
+
module RMail
|
32
|
+
module Mailbox
|
33
|
+
|
34
|
+
# Class that can parse Unix mbox style mailboxes. These mailboxes
|
35
|
+
# separate individual messages with a line beginning with the
|
36
|
+
# string "From ".
|
37
|
+
#
|
38
|
+
# Typical usage:
|
39
|
+
#
|
40
|
+
# File.open("file.mbox") { |file|
|
41
|
+
# RMail::Mailbox::MBoxReader.new(file).each_message { |input|
|
42
|
+
# message = RMail::Parser.read(input)
|
43
|
+
# # do something with the message
|
44
|
+
# end
|
45
|
+
# }
|
46
|
+
#
|
47
|
+
# Or see RMail::Mailbox.parse_mbox for a more convenient
|
48
|
+
# interface.
|
49
|
+
#
|
50
|
+
class MBoxReader < RMail::Parser::PushbackReader
|
51
|
+
|
52
|
+
# Creates a new MBoxReader that reads from `input' with lines
|
53
|
+
# that end with `line_separator'.
|
54
|
+
#
|
55
|
+
# `input' can either be an IO source (an object that responds to
|
56
|
+
# the "read" method in the same way as a standard IO object) or
|
57
|
+
# a String.
|
58
|
+
#
|
59
|
+
# `line_separator' defaults to $/, and useful values are
|
60
|
+
# probably limited to "\n" (Unix) and "\r\n" (DOS/Windows).
|
61
|
+
def initialize(input, line_separator = $/)
|
62
|
+
super(input)
|
63
|
+
@end_of_message = false
|
64
|
+
@chunk_minsize = 0
|
65
|
+
@sep = line_separator
|
66
|
+
@tail = nil
|
67
|
+
|
68
|
+
# This regexp will match a From_ header, or some prefix.
|
69
|
+
re_string = RMail::Parser::PushbackReader.
|
70
|
+
maybe_contains_re("#{@sep}From ")
|
71
|
+
@partial_from_re = Regexp.new(re_string)
|
72
|
+
|
73
|
+
# This regexp will match an entire From_ header.
|
74
|
+
@entire_from_re = /\A#{@sep}From .*?#{@sep}/
|
75
|
+
end
|
76
|
+
|
77
|
+
alias_method :parent_read_chunk, :read_chunk
|
78
|
+
|
79
|
+
# Reads some data from the current message and returns it. The
|
80
|
+
# `size' argument is just a suggestion, and the returned string
|
81
|
+
# can be larger or smaller. When `size' is nil, then the entire
|
82
|
+
# message is returned.
|
83
|
+
#
|
84
|
+
# Once all data from the current message has been read, #read
|
85
|
+
# returns nil and #next must be called to begin reading from the
|
86
|
+
# next message. You can use #eof to tell if there is any more
|
87
|
+
# data to be read from the input source.
|
88
|
+
def read_chunk(size)
|
89
|
+
chunk = read_chunk_low(size)
|
90
|
+
if chunk
|
91
|
+
if chunk.length > @sep.length
|
92
|
+
@tail = chunk[-@sep.length .. -1]
|
93
|
+
else
|
94
|
+
@tail ||= ''
|
95
|
+
@tail << chunk
|
96
|
+
end
|
97
|
+
elsif @tail
|
98
|
+
if @tail[-@sep.length .. -1] != @sep
|
99
|
+
chunk = @sep
|
100
|
+
end
|
101
|
+
@tail = nil
|
102
|
+
end
|
103
|
+
chunk
|
104
|
+
end
|
105
|
+
|
106
|
+
# Advances to the next message to be read. Call this after
|
107
|
+
# #read returns nil.
|
108
|
+
#
|
109
|
+
# Note: Once #read returns nil, you can call #eof before or
|
110
|
+
# after calling #next to tell if there actually is a next
|
111
|
+
# message to read.
|
112
|
+
def next
|
113
|
+
@end_of_message = false
|
114
|
+
@tail = nil
|
115
|
+
end
|
116
|
+
|
117
|
+
alias_method :parent_eof, :eof
|
118
|
+
|
119
|
+
# Returns true if the next call to read_chunk will return nil.
|
120
|
+
def eof
|
121
|
+
parent_eof and @tail.nil?
|
122
|
+
end
|
123
|
+
|
124
|
+
# Yield self until eof, calling next after each yield.
|
125
|
+
#
|
126
|
+
# This method makes it simple to read messages successively out
|
127
|
+
# of the mailbox. See the class description for a code example.
|
128
|
+
def each_message
|
129
|
+
while !eof
|
130
|
+
yield self
|
131
|
+
self.next
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
private
|
136
|
+
|
137
|
+
def read_chunk_low(size)
|
138
|
+
return nil if @end_of_message
|
139
|
+
if chunk = parent_read_chunk(size)
|
140
|
+
# Read at least @chunk_minsize bytes.
|
141
|
+
while chunk.length < @chunk_minsize && more = parent_read_chunk(size)
|
142
|
+
chunk << more
|
143
|
+
end
|
144
|
+
if match = @partial_from_re.match(chunk)
|
145
|
+
# We matched what might be a From_ separator. Separate
|
146
|
+
# the chunk into what came before and what came after it.
|
147
|
+
mbegin = match.begin(0)
|
148
|
+
rest = chunk[mbegin .. -1]
|
149
|
+
|
150
|
+
if @entire_from_re =~ rest
|
151
|
+
# We've got a full From_ line, so set the end of message
|
152
|
+
# flag and get rid of the line separator present just
|
153
|
+
# before the From_.
|
154
|
+
@end_of_message = true
|
155
|
+
@chunk_minsize = 0
|
156
|
+
rest[0, @sep.length] = "" # painful
|
157
|
+
else
|
158
|
+
# Make sure that next time we read more than just the
|
159
|
+
# pushback.
|
160
|
+
@chunk_minsize = rest.length + 1
|
161
|
+
end
|
162
|
+
|
163
|
+
# Return the whole chunk with a partially matched From_
|
164
|
+
# when there is nothing further to read.
|
165
|
+
unless ! @end_of_message && parent_eof
|
166
|
+
# Otherwise, push back the From_ and return the
|
167
|
+
# pre-match.
|
168
|
+
pushback(rest)
|
169
|
+
if mbegin == 0 and @end_of_message
|
170
|
+
chunk = nil
|
171
|
+
else
|
172
|
+
chunk = chunk[0, mbegin]
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
177
|
+
end
|
178
|
+
return chunk
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
@@ -0,0 +1,201 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (C) 2001, 2002, 2003 Matt Armstrong. All rights
|
3
|
+
# reserved.
|
4
|
+
#
|
5
|
+
# Redistribution and use in source and binary forms, with or without
|
6
|
+
# modification, are permitted provided that the following conditions are met:
|
7
|
+
#
|
8
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
9
|
+
# this list of conditions and the following disclaimer.
|
10
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
11
|
+
# notice, this list of conditions and the following disclaimer in the
|
12
|
+
# documentation and/or other materials provided with the distribution.
|
13
|
+
# 3. The name of the author may not be used to endorse or promote products
|
14
|
+
# derived from this software without specific prior written permission.
|
15
|
+
#
|
16
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
17
|
+
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
18
|
+
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
|
19
|
+
# NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
20
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
21
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
22
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
23
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
24
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
25
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
#
|
27
|
+
#++
|
28
|
+
# Implements the RMail::Message class.
|
29
|
+
|
30
|
+
require 'rmail/header.rb'
|
31
|
+
|
32
|
+
module RMail
|
33
|
+
|
34
|
+
# The RMail::Message is an object representation of a standard
|
35
|
+
# Internet email message, including MIME multipart messages.
|
36
|
+
#
|
37
|
+
# An RMail::Message object represents a message header (held in the
|
38
|
+
# contained RMail::Header object) and a message body. The message
|
39
|
+
# body may either be a single String for single part messages or an
|
40
|
+
# Array of RMail::Message objects for MIME multipart messages.
|
41
|
+
class Message
|
42
|
+
|
43
|
+
# Create a new, empty, RMail::Message.
|
44
|
+
def initialize
|
45
|
+
@header = RMail::Header.new
|
46
|
+
@body = nil
|
47
|
+
@epilogue = nil
|
48
|
+
@preamble = nil
|
49
|
+
end
|
50
|
+
|
51
|
+
# Test if this message is structured exactly the same as the other
|
52
|
+
# message. This is useful mainly for testing.
|
53
|
+
def ==(other)
|
54
|
+
@preamble == other.preamble &&
|
55
|
+
@epilogue == other.epilogue &&
|
56
|
+
@header == other.header &&
|
57
|
+
@body == other.body
|
58
|
+
end
|
59
|
+
|
60
|
+
# Returns the body of the message as a String or Array.
|
61
|
+
#
|
62
|
+
# If #multipart? returns true, it will be an array of
|
63
|
+
# RMail::Message objects. Otherwise it will be a String.
|
64
|
+
#
|
65
|
+
# See also #header.
|
66
|
+
def body
|
67
|
+
return @body
|
68
|
+
end
|
69
|
+
|
70
|
+
# Sets the body of the message to the given value. It should
|
71
|
+
# either be a String or an Array of RMail:Message objects.
|
72
|
+
def body=(s)
|
73
|
+
@body = s
|
74
|
+
end
|
75
|
+
|
76
|
+
# Returns the RMail::Header object.
|
77
|
+
#
|
78
|
+
# See also #body.
|
79
|
+
def header()
|
80
|
+
return @header
|
81
|
+
end
|
82
|
+
|
83
|
+
# Return true if the message consists of multiple parts.
|
84
|
+
def multipart?
|
85
|
+
@body.is_a?(Array)
|
86
|
+
end
|
87
|
+
|
88
|
+
# Add a part to the message. After this message is called, the
|
89
|
+
# #multipart? method will return true and the #body method will
|
90
|
+
# #return an array of parts.
|
91
|
+
def add_part(part)
|
92
|
+
if @body.nil?
|
93
|
+
@body = [part]
|
94
|
+
elsif @body.is_a?(Array)
|
95
|
+
@body.push(part)
|
96
|
+
else
|
97
|
+
@body = [@body, part]
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Decode the body of this message.
|
102
|
+
#
|
103
|
+
# If the body of this message is encoded with
|
104
|
+
# <tt>quoted-printable</tt> or <tt>base64</tt>, this function will
|
105
|
+
# decode the data into its original form and return it as a
|
106
|
+
# String. If the body is not encoded, it is returned unaltered.
|
107
|
+
#
|
108
|
+
# This only works when the message is not a multipart. The
|
109
|
+
# <tt>Content-Transfer-Encoding:</tt> header field is consulted to
|
110
|
+
# determine the encoding of the body part.
|
111
|
+
def decode
|
112
|
+
raise TypeError, "Can not decode a multipart message." if multipart?
|
113
|
+
case header.fetch('content-transfer-encoding', '7bit').strip.downcase
|
114
|
+
when 'quoted-printable'
|
115
|
+
Utils.quoted_printable_decode(@body)
|
116
|
+
when 'base64'
|
117
|
+
Utils.base64_decode(@body)
|
118
|
+
else
|
119
|
+
@body
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Get the indicated part from a multipart message.
|
124
|
+
def part(i)
|
125
|
+
raise TypeError,
|
126
|
+
"Can not get part on a single part message." unless multipart?
|
127
|
+
@body[i]
|
128
|
+
end
|
129
|
+
|
130
|
+
# Access the epilogue string for this message. The epilogue
|
131
|
+
# string is relevant only for multipart messages. It is the text
|
132
|
+
# that occurs after all parts of the message and is generally nil.
|
133
|
+
attr :epilogue, true
|
134
|
+
|
135
|
+
# Access the preamble string for this message. The preamble
|
136
|
+
# string is relevant only for multipart messages. It is the text
|
137
|
+
# that occurs just before the first part of the message, and is
|
138
|
+
# generally nil or simple English text describing the nature of
|
139
|
+
# the message.
|
140
|
+
attr :preamble, true
|
141
|
+
|
142
|
+
# Returns the entire message in a single string. This uses the
|
143
|
+
# RMail::Serialize class.
|
144
|
+
def to_s()
|
145
|
+
require 'rmail/serialize'
|
146
|
+
RMail::Serialize.new('').serialize(self)
|
147
|
+
end
|
148
|
+
|
149
|
+
# Return each part of this message
|
150
|
+
#
|
151
|
+
# FIXME: not tested
|
152
|
+
def each_part
|
153
|
+
raise TypeError, "not a multipart message" unless multipart?
|
154
|
+
@body.each do |part|
|
155
|
+
yield part
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
# Call the supplied block for each line of the message. Each line
|
160
|
+
# will contain a trailing newline (<tt>\n</tt>).
|
161
|
+
def each()
|
162
|
+
# FIXME: this is incredibly inefficient! The only users of this
|
163
|
+
# is RMail::Deliver -- get them to use a RMail::Serialize object.
|
164
|
+
to_s.each("\n") { |line|
|
165
|
+
yield line
|
166
|
+
}
|
167
|
+
end
|
168
|
+
|
169
|
+
# This is used by the RMail::Parser to set the MIME multipart
|
170
|
+
# delimiter strings found in the message. These delimiters are
|
171
|
+
# then used when serializing the message again.
|
172
|
+
#
|
173
|
+
# Normal uses of RMail::Message will never use this method, and so
|
174
|
+
# it is left undocumented.
|
175
|
+
def set_delimiters(delimiters, boundary) # :nodoc:
|
176
|
+
raise TypeError, "not a multipart message" unless multipart?
|
177
|
+
raise ArgumentError, "delimiter array wrong size" unless
|
178
|
+
delimiters.length == @body.length + 1
|
179
|
+
@delimiters = delimiters.to_ary
|
180
|
+
@delimiters_boundary = boundary.to_str
|
181
|
+
end
|
182
|
+
|
183
|
+
# This is used by the serializing functions to retrieve the MIME
|
184
|
+
# multipart delimiter strings found while parsing the message.
|
185
|
+
# These delimiters are then used when serializing the message
|
186
|
+
# again.
|
187
|
+
#
|
188
|
+
# Normal uses of RMail::Message will never use this method, and so
|
189
|
+
# it is left undocumented.
|
190
|
+
def get_delimiters # :nodoc:
|
191
|
+
unless multipart? and @delimiters and @delimiters_boundary and
|
192
|
+
@delimiters.length == @body.length + 1 and
|
193
|
+
header.param('content-type', 'boundary') == @delimiters_boundary
|
194
|
+
@delimiters = nil
|
195
|
+
@delimiters_boundary = nil
|
196
|
+
end
|
197
|
+
[ @delimiters, @delimiters_boundary ]
|
198
|
+
end
|
199
|
+
|
200
|
+
end
|
201
|
+
end
|
data/lib/rmail/parser.rb
ADDED
@@ -0,0 +1,412 @@
|
|
1
|
+
#--
|
2
|
+
# Copyright (C) 2002, 2003, 2004 Matt Armstrong. All rights reserved.
|
3
|
+
#
|
4
|
+
# Redistribution and use in source and binary forms, with or without
|
5
|
+
# modification, are permitted provided that the following conditions are met:
|
6
|
+
#
|
7
|
+
# 1. Redistributions of source code must retain the above copyright notice,
|
8
|
+
# this list of conditions and the following disclaimer.
|
9
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
10
|
+
# notice, this list of conditions and the following disclaimer in the
|
11
|
+
# documentation and/or other materials provided with the distribution.
|
12
|
+
# 3. The name of the author may not be used to endorse or promote products
|
13
|
+
# derived from this software without specific prior written permission.
|
14
|
+
#
|
15
|
+
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
16
|
+
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
17
|
+
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
|
18
|
+
# NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
19
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
20
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
21
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
22
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
23
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
24
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
25
|
+
#
|
26
|
+
#++
|
27
|
+
# Implements the RMail::Parser, RMail::StreamParser and
|
28
|
+
# RMail::StreamHandler classes.
|
29
|
+
|
30
|
+
require 'rmail/message'
|
31
|
+
require 'rmail/parser/multipart'
|
32
|
+
|
33
|
+
module RMail
|
34
|
+
|
35
|
+
# = Overview
|
36
|
+
#
|
37
|
+
# An RMail::StreamHandler documents the set of methods a
|
38
|
+
# RMail::StreamParser handler must implement. See
|
39
|
+
# RMail::StreamParser.parse. This is a low level interface to the
|
40
|
+
# RMail message parser.
|
41
|
+
#
|
42
|
+
# = Order of Method Calls (Grammar)
|
43
|
+
#
|
44
|
+
# Calls to the methods of this class follow a specific grammar,
|
45
|
+
# described informally below. The words in all caps are productions
|
46
|
+
# in the grammar, while the lower case words are method calls to
|
47
|
+
# this object.
|
48
|
+
#
|
49
|
+
# MESSAGE:: [ #mbox_from ] *( #header_field )
|
50
|
+
# ( BODY / MULTIPART_BODY )
|
51
|
+
#
|
52
|
+
# BODY:: *body_begin *( #body_chunk ) #body_end
|
53
|
+
#
|
54
|
+
# MULTIPART_BODY:: #multipart_body_begin
|
55
|
+
# *( #preamble_chunk )
|
56
|
+
# *( #part_begin MESSAGE #part_end)
|
57
|
+
# *( #epilogue_chunk )
|
58
|
+
# #multipart_body_end
|
59
|
+
#
|
60
|
+
# = Order of Method Calls (English)
|
61
|
+
#
|
62
|
+
# If the grammar above is not clear, here is a description in English.
|
63
|
+
#
|
64
|
+
# The parser begins calling #header_field, possibly calling
|
65
|
+
# #mbox_from for the first line. Then it determines if the message
|
66
|
+
# was a MIME multipart message.
|
67
|
+
#
|
68
|
+
# If the message is a not a MIME multipart, the parser calls
|
69
|
+
# #body_begin once, then #body_chunk any number of times, then
|
70
|
+
# #body_end.
|
71
|
+
#
|
72
|
+
# If the message header is a MIME multipart message, then
|
73
|
+
# #multipart_body_begin is called, followed by any number of calls
|
74
|
+
# to #preamble_chunk. Then for each part parsed, #part_begin is
|
75
|
+
# called, followed by a recursive set of calls described by the
|
76
|
+
# "MESSAGE" production above, and then #part_end. After all parts
|
77
|
+
# are parsed, any number of calls to #epilogue_chunk are followed by
|
78
|
+
# a single call to #multipart_body_end.
|
79
|
+
#
|
80
|
+
# The recursive nature of MIME multipart messages is represented by
|
81
|
+
# the recursive invocation of the "MESSAGE" production in the
|
82
|
+
# grammar above.
|
83
|
+
class StreamHandler
|
84
|
+
|
85
|
+
# This method is called for Unix MBOX "From " lines in the message
|
86
|
+
# header, it calls this method with the text.
|
87
|
+
def mbox_from(line)
|
88
|
+
end
|
89
|
+
|
90
|
+
# This method is called when a header field is parsed. The
|
91
|
+
# +field+ is the full text of the field, the +name+ is the name of
|
92
|
+
# the field and the +value+ is the field's value with leading and
|
93
|
+
# trailing whitespace removed. Note that both +field+ and +value+
|
94
|
+
# may be multi-line strings.
|
95
|
+
def header_field(field, name, value)
|
96
|
+
end
|
97
|
+
|
98
|
+
# This method is called before a non-multipart message body is
|
99
|
+
# about to be parsed.
|
100
|
+
def body_begin
|
101
|
+
end
|
102
|
+
|
103
|
+
# This method is called with a string chunk of data from a
|
104
|
+
# non-multipart message body. The string does not necessarily
|
105
|
+
# begin or end on any particular boundary.
|
106
|
+
def body_chunk(chunk)
|
107
|
+
end
|
108
|
+
|
109
|
+
# This method is called after all of the non-multipart message
|
110
|
+
# body has been parsed.
|
111
|
+
def body_end
|
112
|
+
end
|
113
|
+
|
114
|
+
# This method is called before a multipart message body is about
|
115
|
+
# to be parsed.
|
116
|
+
def multipart_body_begin
|
117
|
+
end
|
118
|
+
|
119
|
+
# This method is called with a chunk of data from a multipart
|
120
|
+
# message body's preamble. The preamble is any text that appears
|
121
|
+
# before the first part of the multipart message body.
|
122
|
+
def preamble_chunk(chunk)
|
123
|
+
end
|
124
|
+
|
125
|
+
# This method is called when a part of a multipart body begins.
|
126
|
+
def part_begin
|
127
|
+
end
|
128
|
+
|
129
|
+
# This method is called when a part of a multipart body ends.
|
130
|
+
def part_end
|
131
|
+
end
|
132
|
+
|
133
|
+
# This method is called with a chunk of data from a multipart
|
134
|
+
# message body's epilogue. The epilogue is any text that appears
|
135
|
+
# after the last part of the multipart message body.
|
136
|
+
def epilogue_chunk(chunk)
|
137
|
+
end
|
138
|
+
|
139
|
+
# This method is called after a multipart message body has been
|
140
|
+
# completely parsed.
|
141
|
+
#
|
142
|
+
# The +delimiters+ is an Array of strings, one for each boundary
|
143
|
+
# string found in the multipart body. The +boundary+ is the
|
144
|
+
# boundary string used to delimit each part in the multipart body.
|
145
|
+
# You can normally ignore both +delimiters+ and +boundary+ if you
|
146
|
+
# are concerned only about message content.
|
147
|
+
def multipart_body_end(delimiters, boundary)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# The RMail::StreamParser is a low level message parsing API. It is
|
152
|
+
# useful when you are interested in serially examining all message
|
153
|
+
# content but are not interested in a full object representation of
|
154
|
+
# the object. See StreamParser.parse.
|
155
|
+
class StreamParser
|
156
|
+
|
157
|
+
class << self
|
158
|
+
|
159
|
+
# Parse a message from an input source. This method returns
|
160
|
+
# nothing. Instead, the supplied +handler+ is expected to
|
161
|
+
# implement the same methods as RMail::StreamHandler. The
|
162
|
+
# message structure can be inferred from the methods called on
|
163
|
+
# the +handler+. The +input+ can be any Ruby IO source or a
|
164
|
+
# String.
|
165
|
+
#
|
166
|
+
# This is a low level parsing API. For a message parser that
|
167
|
+
# returns an RMail::Message object, see the RMail::Parser class.
|
168
|
+
# RMail::Parser is implemented using RMail::StreamParser.
|
169
|
+
def parse(input, handler)
|
170
|
+
RMail::StreamParser.new(input, handler).parse
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def initialize(input, handler) # :nodoc:
|
175
|
+
@input = input
|
176
|
+
@handler = handler
|
177
|
+
@chunk_size = nil
|
178
|
+
end
|
179
|
+
|
180
|
+
def parse # :nodoc:
|
181
|
+
input = RMail::Parser::PushbackReader.new(@input)
|
182
|
+
input.chunk_size = @chunk_size if @chunk_size
|
183
|
+
parse_low(input, 0)
|
184
|
+
return nil
|
185
|
+
end
|
186
|
+
|
187
|
+
# Change the chunk size used to read the message. This is useful
|
188
|
+
# mostly for testing, so we don't document it.
|
189
|
+
attr_accessor :chunk_size # :nodoc:
|
190
|
+
|
191
|
+
private
|
192
|
+
|
193
|
+
def parse_low(input, depth)
|
194
|
+
multipart_boundary = parse_header(input, depth)
|
195
|
+
if multipart_boundary
|
196
|
+
parse_multipart_body(input, depth, multipart_boundary)
|
197
|
+
else
|
198
|
+
parse_singlepart_body(input, depth)
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def parse_header(input, depth)
|
203
|
+
data = nil
|
204
|
+
header = nil
|
205
|
+
pushback = nil
|
206
|
+
boundary = nil
|
207
|
+
while chunk = input.read
|
208
|
+
data ||= ''
|
209
|
+
data << chunk
|
210
|
+
if data[0] == ?\n
|
211
|
+
# A leading newline in the message is seen when parsing the
|
212
|
+
# parts of a multipart message. It means there are no
|
213
|
+
# headers. The body part starts directly after this
|
214
|
+
# newline.
|
215
|
+
rest = data[1..-1]
|
216
|
+
else
|
217
|
+
header, rest = data.split(/\n\n/, 2)
|
218
|
+
end
|
219
|
+
break if rest
|
220
|
+
end
|
221
|
+
input.pushback(rest)
|
222
|
+
if header
|
223
|
+
mime = false
|
224
|
+
fields = header.split(/\n(?!\s)/)
|
225
|
+
if fields.first =~ /^From /
|
226
|
+
@handler.mbox_from(fields.first)
|
227
|
+
fields.shift
|
228
|
+
end
|
229
|
+
fields.each { |field|
|
230
|
+
if field =~ /^From /
|
231
|
+
@handler.mbox_from(field)
|
232
|
+
else
|
233
|
+
name, value = RMail::Header::Field.parse(field)
|
234
|
+
case name.downcase
|
235
|
+
when 'mime-version'
|
236
|
+
if value =~ /\b1\.0\b/
|
237
|
+
mime = true
|
238
|
+
end
|
239
|
+
when 'content-type'
|
240
|
+
# FIXME: would be nice to have a procedural equivalent
|
241
|
+
# to RMail::Header#param.
|
242
|
+
header = RMail::Header.new
|
243
|
+
header['content-type'] = value
|
244
|
+
boundary = header.param('content-type', 'boundary')
|
245
|
+
end
|
246
|
+
@handler.header_field(field, name, value)
|
247
|
+
end
|
248
|
+
}
|
249
|
+
unless mime or depth > 0
|
250
|
+
boundary = nil
|
251
|
+
end
|
252
|
+
end
|
253
|
+
return boundary
|
254
|
+
end
|
255
|
+
|
256
|
+
def parse_multipart_body(input, depth, boundary)
|
257
|
+
input = RMail::Parser::MultipartReader.new(input, boundary)
|
258
|
+
input.chunk_size = @chunk_size if @chunk_size
|
259
|
+
|
260
|
+
@handler.multipart_body_begin
|
261
|
+
|
262
|
+
# Reach each part, adding it to this entity as appropriate.
|
263
|
+
delimiters = []
|
264
|
+
while input.next_part
|
265
|
+
if input.preamble?
|
266
|
+
while chunk = input.read
|
267
|
+
@handler.preamble_chunk(chunk)
|
268
|
+
end
|
269
|
+
elsif input.epilogue?
|
270
|
+
while chunk = input.read
|
271
|
+
@handler.epilogue_chunk(chunk)
|
272
|
+
end
|
273
|
+
else
|
274
|
+
@handler.part_begin
|
275
|
+
parse_low(input, depth + 1)
|
276
|
+
@handler.part_end
|
277
|
+
end
|
278
|
+
delimiters << (input.delimiter || "") unless input.epilogue?
|
279
|
+
end
|
280
|
+
@handler.multipart_body_end(delimiters, boundary)
|
281
|
+
end
|
282
|
+
|
283
|
+
def parse_singlepart_body(input, depth)
|
284
|
+
@handler.body_begin
|
285
|
+
while chunk = input.read
|
286
|
+
@handler.body_chunk(chunk)
|
287
|
+
end
|
288
|
+
@handler.body_end
|
289
|
+
end
|
290
|
+
|
291
|
+
end
|
292
|
+
|
293
|
+
# The RMail::Parser class creates RMail::Message objects from Ruby
|
294
|
+
# IO objects or strings.
|
295
|
+
#
|
296
|
+
# To parse from a string:
|
297
|
+
# message = RMail::Parser.read(the_string)
|
298
|
+
#
|
299
|
+
# To parse from an IO object:
|
300
|
+
# message = File.open('my-message') { |f|
|
301
|
+
# RMail::Parser.read(f)
|
302
|
+
# }
|
303
|
+
#
|
304
|
+
# You can also parse from STDIN, etc.
|
305
|
+
# message = RMail::Parser.read(STDIN)
|
306
|
+
#
|
307
|
+
# In all cases, the parser consumes all input.
|
308
|
+
class Parser
|
309
|
+
|
310
|
+
# This exception class is thrown when the parser encounters an
|
311
|
+
# error.
|
312
|
+
#
|
313
|
+
# Note: the parser tries hard to never throw exceptions -- this
|
314
|
+
# error is thrown only when the API is used incorrectly and not on
|
315
|
+
# invalid input.
|
316
|
+
class Error < StandardError; end
|
317
|
+
|
318
|
+
# Creates a new parser. Messages of +message_class+ will be
|
319
|
+
# created by the parser. By default, the parser will create
|
320
|
+
# RMail::Message objects.
|
321
|
+
def initialize()
|
322
|
+
@chunk_size = nil
|
323
|
+
end
|
324
|
+
|
325
|
+
# Parse a message from the IO object +io+ and return a new
|
326
|
+
# message. The +io+ object can also be a string.
|
327
|
+
def parse(input)
|
328
|
+
handler = RMail::Parser::Handler.new
|
329
|
+
parser = RMail::StreamParser.new(input, handler)
|
330
|
+
parser.chunk_size = @chunk_size if @chunk_size
|
331
|
+
parser.parse
|
332
|
+
return handler.message
|
333
|
+
end
|
334
|
+
|
335
|
+
# Change the chunk size used to read the message. This is useful
|
336
|
+
# mostly for testing.
|
337
|
+
attr_accessor :chunk_size
|
338
|
+
|
339
|
+
# Parse a message from the IO object +io+ and return a new
|
340
|
+
# message. The +io+ object can also be a string. This is just
|
341
|
+
# shorthand for:
|
342
|
+
#
|
343
|
+
# RMail::Parser.new.parse(io)
|
344
|
+
def Parser.read(input)
|
345
|
+
Parser.new.parse(input)
|
346
|
+
end
|
347
|
+
|
348
|
+
class Handler < RMail::StreamHandler # :nodoc:
|
349
|
+
def initialize
|
350
|
+
@parts = [ RMail::Message.new ]
|
351
|
+
@preambles = []
|
352
|
+
@epilogues = []
|
353
|
+
end
|
354
|
+
def mbox_from(field)
|
355
|
+
@parts.last.header.mbox_from = field
|
356
|
+
end
|
357
|
+
def header_field(field, name, value)
|
358
|
+
@parts.last.header.add_raw(field)
|
359
|
+
end
|
360
|
+
def body_begin
|
361
|
+
@body = nil
|
362
|
+
end
|
363
|
+
def body_chunk(chunk)
|
364
|
+
if @body
|
365
|
+
@body << chunk
|
366
|
+
else
|
367
|
+
@body = chunk
|
368
|
+
end
|
369
|
+
end
|
370
|
+
def body_end
|
371
|
+
@parts.last.body = @body
|
372
|
+
end
|
373
|
+
def multipart_body_begin
|
374
|
+
@preambles.push(nil)
|
375
|
+
@epilogues.push(nil)
|
376
|
+
end
|
377
|
+
def preamble_chunk(chunk)
|
378
|
+
if @preambles.last
|
379
|
+
@preambles.last << chunk
|
380
|
+
else
|
381
|
+
@preambles[-1] = chunk
|
382
|
+
end
|
383
|
+
end
|
384
|
+
def epilogue_chunk(chunk)
|
385
|
+
if @epilogues.last
|
386
|
+
@epilogues.last << chunk
|
387
|
+
else
|
388
|
+
@epilogues[-1] = chunk
|
389
|
+
end
|
390
|
+
end
|
391
|
+
def multipart_body_end(delimiters, boundary)
|
392
|
+
@parts.last.preamble = @preambles.pop
|
393
|
+
@parts.last.epilogue = @epilogues.pop
|
394
|
+
if @parts.last.body.nil?
|
395
|
+
@parts.last.body = []
|
396
|
+
end
|
397
|
+
@parts.last.set_delimiters(delimiters, boundary)
|
398
|
+
end
|
399
|
+
def part_begin
|
400
|
+
@parts << RMail::Message.new
|
401
|
+
end
|
402
|
+
def part_end
|
403
|
+
part = @parts.pop
|
404
|
+
@parts.last.add_part(part)
|
405
|
+
end
|
406
|
+
def message
|
407
|
+
@parts.first
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
411
|
+
end
|
412
|
+
end
|