rmail 0.17

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. data/NEWS +309 -0
  2. data/NOTES +14 -0
  3. data/README +83 -0
  4. data/THANKS +25 -0
  5. data/TODO +112 -0
  6. data/guide/Intro.txt +122 -0
  7. data/guide/MIME.txt +6 -0
  8. data/guide/TableOfContents.txt +13 -0
  9. data/install.rb +1023 -0
  10. data/lib/rmail.rb +50 -0
  11. data/lib/rmail/address.rb +829 -0
  12. data/lib/rmail/header.rb +987 -0
  13. data/lib/rmail/mailbox.rb +62 -0
  14. data/lib/rmail/mailbox/mboxreader.rb +182 -0
  15. data/lib/rmail/message.rb +201 -0
  16. data/lib/rmail/parser.rb +412 -0
  17. data/lib/rmail/parser/multipart.rb +217 -0
  18. data/lib/rmail/parser/pushbackreader.rb +173 -0
  19. data/lib/rmail/serialize.rb +190 -0
  20. data/lib/rmail/utils.rb +59 -0
  21. data/rmail.gemspec +17 -0
  22. data/tests/addrgrammar.txt +113 -0
  23. data/tests/data/mbox.odd +4 -0
  24. data/tests/data/mbox.simple +8 -0
  25. data/tests/data/multipart/data.1 +5 -0
  26. data/tests/data/multipart/data.10 +1 -0
  27. data/tests/data/multipart/data.11 +9 -0
  28. data/tests/data/multipart/data.12 +9 -0
  29. data/tests/data/multipart/data.13 +3 -0
  30. data/tests/data/multipart/data.14 +3 -0
  31. data/tests/data/multipart/data.15 +3 -0
  32. data/tests/data/multipart/data.16 +3 -0
  33. data/tests/data/multipart/data.17 +0 -0
  34. data/tests/data/multipart/data.2 +5 -0
  35. data/tests/data/multipart/data.3 +2 -0
  36. data/tests/data/multipart/data.4 +3 -0
  37. data/tests/data/multipart/data.5 +1 -0
  38. data/tests/data/multipart/data.6 +2 -0
  39. data/tests/data/multipart/data.7 +3 -0
  40. data/tests/data/multipart/data.8 +5 -0
  41. data/tests/data/multipart/data.9 +4 -0
  42. data/tests/data/parser.badmime1 +4 -0
  43. data/tests/data/parser.badmime2 +6 -0
  44. data/tests/data/parser.nested-multipart +75 -0
  45. data/tests/data/parser.nested-simple +12 -0
  46. data/tests/data/parser.nested-simple2 +16 -0
  47. data/tests/data/parser.nested-simple3 +21 -0
  48. data/tests/data/parser.rfc822 +65 -0
  49. data/tests/data/parser.simple-mime +24 -0
  50. data/tests/data/parser/multipart.1 +8 -0
  51. data/tests/data/parser/multipart.10 +4 -0
  52. data/tests/data/parser/multipart.11 +12 -0
  53. data/tests/data/parser/multipart.12 +12 -0
  54. data/tests/data/parser/multipart.13 +6 -0
  55. data/tests/data/parser/multipart.14 +6 -0
  56. data/tests/data/parser/multipart.15 +6 -0
  57. data/tests/data/parser/multipart.16 +6 -0
  58. data/tests/data/parser/multipart.2 +8 -0
  59. data/tests/data/parser/multipart.3 +5 -0
  60. data/tests/data/parser/multipart.4 +6 -0
  61. data/tests/data/parser/multipart.5 +4 -0
  62. data/tests/data/parser/multipart.6 +5 -0
  63. data/tests/data/parser/multipart.7 +6 -0
  64. data/tests/data/parser/multipart.8 +8 -0
  65. data/tests/data/parser/multipart.9 +7 -0
  66. data/tests/data/transparency/absolute.1 +5 -0
  67. data/tests/data/transparency/absolute.2 +1 -0
  68. data/tests/data/transparency/absolute.3 +2 -0
  69. data/tests/data/transparency/absolute.4 +3 -0
  70. data/tests/data/transparency/absolute.5 +4 -0
  71. data/tests/data/transparency/absolute.6 +49 -0
  72. data/tests/data/transparency/message.1 +73 -0
  73. data/tests/data/transparency/message.2 +34 -0
  74. data/tests/data/transparency/message.3 +63 -0
  75. data/tests/data/transparency/message.4 +5 -0
  76. data/tests/data/transparency/message.5 +15 -0
  77. data/tests/data/transparency/message.6 +1185 -0
  78. data/tests/runtests.rb +35 -0
  79. data/tests/testaddress.rb +1192 -0
  80. data/tests/testbase.rb +207 -0
  81. data/tests/testheader.rb +1207 -0
  82. data/tests/testmailbox.rb +47 -0
  83. data/tests/testmboxreader.rb +161 -0
  84. data/tests/testmessage.rb +257 -0
  85. data/tests/testparser.rb +634 -0
  86. data/tests/testparsermultipart.rb +205 -0
  87. data/tests/testpushbackreader.rb +40 -0
  88. data/tests/testserialize.rb +264 -0
  89. data/tests/testtestbase.rb +112 -0
  90. data/tests/testtranspparency.rb +105 -0
  91. metadata +143 -0
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env ruby
2
+ #--
3
+ # Copyright (c) 2002, 2003 Matt Armstrong. All rights reserved.
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice,
9
+ # this list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright
11
+ # notice, this list of conditions and the following disclaimer in the
12
+ # documentation and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
19
+ # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ #
27
+ #++
28
+ # Implements the RMail::Mailbox module.
29
+
30
+ module RMail
31
+
32
+ # The RMail::Mailbox module contains a few methods that are useful
33
+ # for working with mailboxes.
34
+ module Mailbox
35
+
36
+ class << self
37
+
38
+ # Parse a Unix mbox style mailbox. These mailboxes searate
39
+ # individual messages with a line beginning with the string
40
+ # "From ".
41
+ #
42
+ # If a block is given, yields to the block with the raw message
43
+ # (a string), otherwise an array of raw message strings is
44
+ # returned.
45
+ def parse_mbox(input, line_separator = $/)
46
+ require 'rmail/mailbox/mboxreader'
47
+ retval = []
48
+ RMail::Mailbox::MBoxReader.new(input, line_separator).each_message {
49
+ |reader|
50
+ raw_message = reader.read(nil)
51
+ if block_given?
52
+ yield raw_message
53
+ else
54
+ retval << raw_message
55
+ end
56
+ }
57
+ return block_given? ? nil : retval
58
+ end
59
+
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,182 @@
1
+ #--
2
+ # Copyright (c) 2002, 2003 Matt Armstrong. All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without
5
+ # modification, are permitted provided that the following conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright notice,
8
+ # this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above copyright
10
+ # notice, this list of conditions and the following disclaimer in the
11
+ # documentation and/or other materials provided with the distribution.
12
+ # 3. The name of the author may not be used to endorse or promote products
13
+ # derived from this software without specific prior written permission.
14
+ #
15
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
18
+ # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
20
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+ #
26
+ #++
27
+ # Implements the RMail::Mailbox::MBoxReader class.
28
+
29
+ require 'rmail/parser/pushbackreader'
30
+
31
+ module RMail
32
+ module Mailbox
33
+
34
+ # Class that can parse Unix mbox style mailboxes. These mailboxes
35
+ # separate individual messages with a line beginning with the
36
+ # string "From ".
37
+ #
38
+ # Typical usage:
39
+ #
40
+ # File.open("file.mbox") { |file|
41
+ # RMail::Mailbox::MBoxReader.new(file).each_message { |input|
42
+ # message = RMail::Parser.read(input)
43
+ # # do something with the message
44
+ # end
45
+ # }
46
+ #
47
+ # Or see RMail::Mailbox.parse_mbox for a more convenient
48
+ # interface.
49
+ #
50
+ class MBoxReader < RMail::Parser::PushbackReader
51
+
52
+ # Creates a new MBoxReader that reads from `input' with lines
53
+ # that end with `line_separator'.
54
+ #
55
+ # `input' can either be an IO source (an object that responds to
56
+ # the "read" method in the same way as a standard IO object) or
57
+ # a String.
58
+ #
59
+ # `line_separator' defaults to $/, and useful values are
60
+ # probably limited to "\n" (Unix) and "\r\n" (DOS/Windows).
61
+ def initialize(input, line_separator = $/)
62
+ super(input)
63
+ @end_of_message = false
64
+ @chunk_minsize = 0
65
+ @sep = line_separator
66
+ @tail = nil
67
+
68
+ # This regexp will match a From_ header, or some prefix.
69
+ re_string = RMail::Parser::PushbackReader.
70
+ maybe_contains_re("#{@sep}From ")
71
+ @partial_from_re = Regexp.new(re_string)
72
+
73
+ # This regexp will match an entire From_ header.
74
+ @entire_from_re = /\A#{@sep}From .*?#{@sep}/
75
+ end
76
+
77
+ alias_method :parent_read_chunk, :read_chunk
78
+
79
+ # Reads some data from the current message and returns it. The
80
+ # `size' argument is just a suggestion, and the returned string
81
+ # can be larger or smaller. When `size' is nil, then the entire
82
+ # message is returned.
83
+ #
84
+ # Once all data from the current message has been read, #read
85
+ # returns nil and #next must be called to begin reading from the
86
+ # next message. You can use #eof to tell if there is any more
87
+ # data to be read from the input source.
88
+ def read_chunk(size)
89
+ chunk = read_chunk_low(size)
90
+ if chunk
91
+ if chunk.length > @sep.length
92
+ @tail = chunk[-@sep.length .. -1]
93
+ else
94
+ @tail ||= ''
95
+ @tail << chunk
96
+ end
97
+ elsif @tail
98
+ if @tail[-@sep.length .. -1] != @sep
99
+ chunk = @sep
100
+ end
101
+ @tail = nil
102
+ end
103
+ chunk
104
+ end
105
+
106
+ # Advances to the next message to be read. Call this after
107
+ # #read returns nil.
108
+ #
109
+ # Note: Once #read returns nil, you can call #eof before or
110
+ # after calling #next to tell if there actually is a next
111
+ # message to read.
112
+ def next
113
+ @end_of_message = false
114
+ @tail = nil
115
+ end
116
+
117
+ alias_method :parent_eof, :eof
118
+
119
+ # Returns true if the next call to read_chunk will return nil.
120
+ def eof
121
+ parent_eof and @tail.nil?
122
+ end
123
+
124
+ # Yield self until eof, calling next after each yield.
125
+ #
126
+ # This method makes it simple to read messages successively out
127
+ # of the mailbox. See the class description for a code example.
128
+ def each_message
129
+ while !eof
130
+ yield self
131
+ self.next
132
+ end
133
+ end
134
+
135
+ private
136
+
137
+ def read_chunk_low(size)
138
+ return nil if @end_of_message
139
+ if chunk = parent_read_chunk(size)
140
+ # Read at least @chunk_minsize bytes.
141
+ while chunk.length < @chunk_minsize && more = parent_read_chunk(size)
142
+ chunk << more
143
+ end
144
+ if match = @partial_from_re.match(chunk)
145
+ # We matched what might be a From_ separator. Separate
146
+ # the chunk into what came before and what came after it.
147
+ mbegin = match.begin(0)
148
+ rest = chunk[mbegin .. -1]
149
+
150
+ if @entire_from_re =~ rest
151
+ # We've got a full From_ line, so set the end of message
152
+ # flag and get rid of the line separator present just
153
+ # before the From_.
154
+ @end_of_message = true
155
+ @chunk_minsize = 0
156
+ rest[0, @sep.length] = "" # painful
157
+ else
158
+ # Make sure that next time we read more than just the
159
+ # pushback.
160
+ @chunk_minsize = rest.length + 1
161
+ end
162
+
163
+ # Return the whole chunk with a partially matched From_
164
+ # when there is nothing further to read.
165
+ unless ! @end_of_message && parent_eof
166
+ # Otherwise, push back the From_ and return the
167
+ # pre-match.
168
+ pushback(rest)
169
+ if mbegin == 0 and @end_of_message
170
+ chunk = nil
171
+ else
172
+ chunk = chunk[0, mbegin]
173
+ end
174
+ end
175
+
176
+ end
177
+ end
178
+ return chunk
179
+ end
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,201 @@
1
+ #--
2
+ # Copyright (C) 2001, 2002, 2003 Matt Armstrong. All rights
3
+ # reserved.
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice,
9
+ # this list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright
11
+ # notice, this list of conditions and the following disclaimer in the
12
+ # documentation and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
19
+ # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ #
27
+ #++
28
+ # Implements the RMail::Message class.
29
+
30
+ require 'rmail/header.rb'
31
+
32
+ module RMail
33
+
34
+ # The RMail::Message is an object representation of a standard
35
+ # Internet email message, including MIME multipart messages.
36
+ #
37
+ # An RMail::Message object represents a message header (held in the
38
+ # contained RMail::Header object) and a message body. The message
39
+ # body may either be a single String for single part messages or an
40
+ # Array of RMail::Message objects for MIME multipart messages.
41
+ class Message
42
+
43
+ # Create a new, empty, RMail::Message.
44
+ def initialize
45
+ @header = RMail::Header.new
46
+ @body = nil
47
+ @epilogue = nil
48
+ @preamble = nil
49
+ end
50
+
51
+ # Test if this message is structured exactly the same as the other
52
+ # message. This is useful mainly for testing.
53
+ def ==(other)
54
+ @preamble == other.preamble &&
55
+ @epilogue == other.epilogue &&
56
+ @header == other.header &&
57
+ @body == other.body
58
+ end
59
+
60
+ # Returns the body of the message as a String or Array.
61
+ #
62
+ # If #multipart? returns true, it will be an array of
63
+ # RMail::Message objects. Otherwise it will be a String.
64
+ #
65
+ # See also #header.
66
+ def body
67
+ return @body
68
+ end
69
+
70
+ # Sets the body of the message to the given value. It should
71
+ # either be a String or an Array of RMail:Message objects.
72
+ def body=(s)
73
+ @body = s
74
+ end
75
+
76
+ # Returns the RMail::Header object.
77
+ #
78
+ # See also #body.
79
+ def header()
80
+ return @header
81
+ end
82
+
83
+ # Return true if the message consists of multiple parts.
84
+ def multipart?
85
+ @body.is_a?(Array)
86
+ end
87
+
88
+ # Add a part to the message. After this message is called, the
89
+ # #multipart? method will return true and the #body method will
90
+ # #return an array of parts.
91
+ def add_part(part)
92
+ if @body.nil?
93
+ @body = [part]
94
+ elsif @body.is_a?(Array)
95
+ @body.push(part)
96
+ else
97
+ @body = [@body, part]
98
+ end
99
+ end
100
+
101
+ # Decode the body of this message.
102
+ #
103
+ # If the body of this message is encoded with
104
+ # <tt>quoted-printable</tt> or <tt>base64</tt>, this function will
105
+ # decode the data into its original form and return it as a
106
+ # String. If the body is not encoded, it is returned unaltered.
107
+ #
108
+ # This only works when the message is not a multipart. The
109
+ # <tt>Content-Transfer-Encoding:</tt> header field is consulted to
110
+ # determine the encoding of the body part.
111
+ def decode
112
+ raise TypeError, "Can not decode a multipart message." if multipart?
113
+ case header.fetch('content-transfer-encoding', '7bit').strip.downcase
114
+ when 'quoted-printable'
115
+ Utils.quoted_printable_decode(@body)
116
+ when 'base64'
117
+ Utils.base64_decode(@body)
118
+ else
119
+ @body
120
+ end
121
+ end
122
+
123
+ # Get the indicated part from a multipart message.
124
+ def part(i)
125
+ raise TypeError,
126
+ "Can not get part on a single part message." unless multipart?
127
+ @body[i]
128
+ end
129
+
130
+ # Access the epilogue string for this message. The epilogue
131
+ # string is relevant only for multipart messages. It is the text
132
+ # that occurs after all parts of the message and is generally nil.
133
+ attr :epilogue, true
134
+
135
+ # Access the preamble string for this message. The preamble
136
+ # string is relevant only for multipart messages. It is the text
137
+ # that occurs just before the first part of the message, and is
138
+ # generally nil or simple English text describing the nature of
139
+ # the message.
140
+ attr :preamble, true
141
+
142
+ # Returns the entire message in a single string. This uses the
143
+ # RMail::Serialize class.
144
+ def to_s()
145
+ require 'rmail/serialize'
146
+ RMail::Serialize.new('').serialize(self)
147
+ end
148
+
149
+ # Return each part of this message
150
+ #
151
+ # FIXME: not tested
152
+ def each_part
153
+ raise TypeError, "not a multipart message" unless multipart?
154
+ @body.each do |part|
155
+ yield part
156
+ end
157
+ end
158
+
159
+ # Call the supplied block for each line of the message. Each line
160
+ # will contain a trailing newline (<tt>\n</tt>).
161
+ def each()
162
+ # FIXME: this is incredibly inefficient! The only users of this
163
+ # is RMail::Deliver -- get them to use a RMail::Serialize object.
164
+ to_s.each("\n") { |line|
165
+ yield line
166
+ }
167
+ end
168
+
169
+ # This is used by the RMail::Parser to set the MIME multipart
170
+ # delimiter strings found in the message. These delimiters are
171
+ # then used when serializing the message again.
172
+ #
173
+ # Normal uses of RMail::Message will never use this method, and so
174
+ # it is left undocumented.
175
+ def set_delimiters(delimiters, boundary) # :nodoc:
176
+ raise TypeError, "not a multipart message" unless multipart?
177
+ raise ArgumentError, "delimiter array wrong size" unless
178
+ delimiters.length == @body.length + 1
179
+ @delimiters = delimiters.to_ary
180
+ @delimiters_boundary = boundary.to_str
181
+ end
182
+
183
+ # This is used by the serializing functions to retrieve the MIME
184
+ # multipart delimiter strings found while parsing the message.
185
+ # These delimiters are then used when serializing the message
186
+ # again.
187
+ #
188
+ # Normal uses of RMail::Message will never use this method, and so
189
+ # it is left undocumented.
190
+ def get_delimiters # :nodoc:
191
+ unless multipart? and @delimiters and @delimiters_boundary and
192
+ @delimiters.length == @body.length + 1 and
193
+ header.param('content-type', 'boundary') == @delimiters_boundary
194
+ @delimiters = nil
195
+ @delimiters_boundary = nil
196
+ end
197
+ [ @delimiters, @delimiters_boundary ]
198
+ end
199
+
200
+ end
201
+ end
@@ -0,0 +1,412 @@
1
+ #--
2
+ # Copyright (C) 2002, 2003, 2004 Matt Armstrong. All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without
5
+ # modification, are permitted provided that the following conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright notice,
8
+ # this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above copyright
10
+ # notice, this list of conditions and the following disclaimer in the
11
+ # documentation and/or other materials provided with the distribution.
12
+ # 3. The name of the author may not be used to endorse or promote products
13
+ # derived from this software without specific prior written permission.
14
+ #
15
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
18
+ # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
20
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+ #
26
+ #++
27
+ # Implements the RMail::Parser, RMail::StreamParser and
28
+ # RMail::StreamHandler classes.
29
+
30
+ require 'rmail/message'
31
+ require 'rmail/parser/multipart'
32
+
33
+ module RMail
34
+
35
+ # = Overview
36
+ #
37
+ # An RMail::StreamHandler documents the set of methods a
38
+ # RMail::StreamParser handler must implement. See
39
+ # RMail::StreamParser.parse. This is a low level interface to the
40
+ # RMail message parser.
41
+ #
42
+ # = Order of Method Calls (Grammar)
43
+ #
44
+ # Calls to the methods of this class follow a specific grammar,
45
+ # described informally below. The words in all caps are productions
46
+ # in the grammar, while the lower case words are method calls to
47
+ # this object.
48
+ #
49
+ # MESSAGE:: [ #mbox_from ] *( #header_field )
50
+ # ( BODY / MULTIPART_BODY )
51
+ #
52
+ # BODY:: *body_begin *( #body_chunk ) #body_end
53
+ #
54
+ # MULTIPART_BODY:: #multipart_body_begin
55
+ # *( #preamble_chunk )
56
+ # *( #part_begin MESSAGE #part_end)
57
+ # *( #epilogue_chunk )
58
+ # #multipart_body_end
59
+ #
60
+ # = Order of Method Calls (English)
61
+ #
62
+ # If the grammar above is not clear, here is a description in English.
63
+ #
64
+ # The parser begins calling #header_field, possibly calling
65
+ # #mbox_from for the first line. Then it determines if the message
66
+ # was a MIME multipart message.
67
+ #
68
+ # If the message is a not a MIME multipart, the parser calls
69
+ # #body_begin once, then #body_chunk any number of times, then
70
+ # #body_end.
71
+ #
72
+ # If the message header is a MIME multipart message, then
73
+ # #multipart_body_begin is called, followed by any number of calls
74
+ # to #preamble_chunk. Then for each part parsed, #part_begin is
75
+ # called, followed by a recursive set of calls described by the
76
+ # "MESSAGE" production above, and then #part_end. After all parts
77
+ # are parsed, any number of calls to #epilogue_chunk are followed by
78
+ # a single call to #multipart_body_end.
79
+ #
80
+ # The recursive nature of MIME multipart messages is represented by
81
+ # the recursive invocation of the "MESSAGE" production in the
82
+ # grammar above.
83
+ class StreamHandler
84
+
85
+ # This method is called for Unix MBOX "From " lines in the message
86
+ # header, it calls this method with the text.
87
+ def mbox_from(line)
88
+ end
89
+
90
+ # This method is called when a header field is parsed. The
91
+ # +field+ is the full text of the field, the +name+ is the name of
92
+ # the field and the +value+ is the field's value with leading and
93
+ # trailing whitespace removed. Note that both +field+ and +value+
94
+ # may be multi-line strings.
95
+ def header_field(field, name, value)
96
+ end
97
+
98
+ # This method is called before a non-multipart message body is
99
+ # about to be parsed.
100
+ def body_begin
101
+ end
102
+
103
+ # This method is called with a string chunk of data from a
104
+ # non-multipart message body. The string does not necessarily
105
+ # begin or end on any particular boundary.
106
+ def body_chunk(chunk)
107
+ end
108
+
109
+ # This method is called after all of the non-multipart message
110
+ # body has been parsed.
111
+ def body_end
112
+ end
113
+
114
+ # This method is called before a multipart message body is about
115
+ # to be parsed.
116
+ def multipart_body_begin
117
+ end
118
+
119
+ # This method is called with a chunk of data from a multipart
120
+ # message body's preamble. The preamble is any text that appears
121
+ # before the first part of the multipart message body.
122
+ def preamble_chunk(chunk)
123
+ end
124
+
125
+ # This method is called when a part of a multipart body begins.
126
+ def part_begin
127
+ end
128
+
129
+ # This method is called when a part of a multipart body ends.
130
+ def part_end
131
+ end
132
+
133
+ # This method is called with a chunk of data from a multipart
134
+ # message body's epilogue. The epilogue is any text that appears
135
+ # after the last part of the multipart message body.
136
+ def epilogue_chunk(chunk)
137
+ end
138
+
139
+ # This method is called after a multipart message body has been
140
+ # completely parsed.
141
+ #
142
+ # The +delimiters+ is an Array of strings, one for each boundary
143
+ # string found in the multipart body. The +boundary+ is the
144
+ # boundary string used to delimit each part in the multipart body.
145
+ # You can normally ignore both +delimiters+ and +boundary+ if you
146
+ # are concerned only about message content.
147
+ def multipart_body_end(delimiters, boundary)
148
+ end
149
+ end
150
+
151
+ # The RMail::StreamParser is a low level message parsing API. It is
152
+ # useful when you are interested in serially examining all message
153
+ # content but are not interested in a full object representation of
154
+ # the object. See StreamParser.parse.
155
+ class StreamParser
156
+
157
+ class << self
158
+
159
+ # Parse a message from an input source. This method returns
160
+ # nothing. Instead, the supplied +handler+ is expected to
161
+ # implement the same methods as RMail::StreamHandler. The
162
+ # message structure can be inferred from the methods called on
163
+ # the +handler+. The +input+ can be any Ruby IO source or a
164
+ # String.
165
+ #
166
+ # This is a low level parsing API. For a message parser that
167
+ # returns an RMail::Message object, see the RMail::Parser class.
168
+ # RMail::Parser is implemented using RMail::StreamParser.
169
+ def parse(input, handler)
170
+ RMail::StreamParser.new(input, handler).parse
171
+ end
172
+ end
173
+
174
+ def initialize(input, handler) # :nodoc:
175
+ @input = input
176
+ @handler = handler
177
+ @chunk_size = nil
178
+ end
179
+
180
+ def parse # :nodoc:
181
+ input = RMail::Parser::PushbackReader.new(@input)
182
+ input.chunk_size = @chunk_size if @chunk_size
183
+ parse_low(input, 0)
184
+ return nil
185
+ end
186
+
187
+ # Change the chunk size used to read the message. This is useful
188
+ # mostly for testing, so we don't document it.
189
+ attr_accessor :chunk_size # :nodoc:
190
+
191
+ private
192
+
193
+ def parse_low(input, depth)
194
+ multipart_boundary = parse_header(input, depth)
195
+ if multipart_boundary
196
+ parse_multipart_body(input, depth, multipart_boundary)
197
+ else
198
+ parse_singlepart_body(input, depth)
199
+ end
200
+ end
201
+
202
+ def parse_header(input, depth)
203
+ data = nil
204
+ header = nil
205
+ pushback = nil
206
+ boundary = nil
207
+ while chunk = input.read
208
+ data ||= ''
209
+ data << chunk
210
+ if data[0] == ?\n
211
+ # A leading newline in the message is seen when parsing the
212
+ # parts of a multipart message. It means there are no
213
+ # headers. The body part starts directly after this
214
+ # newline.
215
+ rest = data[1..-1]
216
+ else
217
+ header, rest = data.split(/\n\n/, 2)
218
+ end
219
+ break if rest
220
+ end
221
+ input.pushback(rest)
222
+ if header
223
+ mime = false
224
+ fields = header.split(/\n(?!\s)/)
225
+ if fields.first =~ /^From /
226
+ @handler.mbox_from(fields.first)
227
+ fields.shift
228
+ end
229
+ fields.each { |field|
230
+ if field =~ /^From /
231
+ @handler.mbox_from(field)
232
+ else
233
+ name, value = RMail::Header::Field.parse(field)
234
+ case name.downcase
235
+ when 'mime-version'
236
+ if value =~ /\b1\.0\b/
237
+ mime = true
238
+ end
239
+ when 'content-type'
240
+ # FIXME: would be nice to have a procedural equivalent
241
+ # to RMail::Header#param.
242
+ header = RMail::Header.new
243
+ header['content-type'] = value
244
+ boundary = header.param('content-type', 'boundary')
245
+ end
246
+ @handler.header_field(field, name, value)
247
+ end
248
+ }
249
+ unless mime or depth > 0
250
+ boundary = nil
251
+ end
252
+ end
253
+ return boundary
254
+ end
255
+
256
+ def parse_multipart_body(input, depth, boundary)
257
+ input = RMail::Parser::MultipartReader.new(input, boundary)
258
+ input.chunk_size = @chunk_size if @chunk_size
259
+
260
+ @handler.multipart_body_begin
261
+
262
+ # Reach each part, adding it to this entity as appropriate.
263
+ delimiters = []
264
+ while input.next_part
265
+ if input.preamble?
266
+ while chunk = input.read
267
+ @handler.preamble_chunk(chunk)
268
+ end
269
+ elsif input.epilogue?
270
+ while chunk = input.read
271
+ @handler.epilogue_chunk(chunk)
272
+ end
273
+ else
274
+ @handler.part_begin
275
+ parse_low(input, depth + 1)
276
+ @handler.part_end
277
+ end
278
+ delimiters << (input.delimiter || "") unless input.epilogue?
279
+ end
280
+ @handler.multipart_body_end(delimiters, boundary)
281
+ end
282
+
283
+ def parse_singlepart_body(input, depth)
284
+ @handler.body_begin
285
+ while chunk = input.read
286
+ @handler.body_chunk(chunk)
287
+ end
288
+ @handler.body_end
289
+ end
290
+
291
+ end
292
+
293
+ # The RMail::Parser class creates RMail::Message objects from Ruby
294
+ # IO objects or strings.
295
+ #
296
+ # To parse from a string:
297
+ # message = RMail::Parser.read(the_string)
298
+ #
299
+ # To parse from an IO object:
300
+ # message = File.open('my-message') { |f|
301
+ # RMail::Parser.read(f)
302
+ # }
303
+ #
304
+ # You can also parse from STDIN, etc.
305
+ # message = RMail::Parser.read(STDIN)
306
+ #
307
+ # In all cases, the parser consumes all input.
308
+ class Parser
309
+
310
+ # This exception class is thrown when the parser encounters an
311
+ # error.
312
+ #
313
+ # Note: the parser tries hard to never throw exceptions -- this
314
+ # error is thrown only when the API is used incorrectly and not on
315
+ # invalid input.
316
+ class Error < StandardError; end
317
+
318
+ # Creates a new parser. Messages of +message_class+ will be
319
+ # created by the parser. By default, the parser will create
320
+ # RMail::Message objects.
321
+ def initialize()
322
+ @chunk_size = nil
323
+ end
324
+
325
+ # Parse a message from the IO object +io+ and return a new
326
+ # message. The +io+ object can also be a string.
327
+ def parse(input)
328
+ handler = RMail::Parser::Handler.new
329
+ parser = RMail::StreamParser.new(input, handler)
330
+ parser.chunk_size = @chunk_size if @chunk_size
331
+ parser.parse
332
+ return handler.message
333
+ end
334
+
335
+ # Change the chunk size used to read the message. This is useful
336
+ # mostly for testing.
337
+ attr_accessor :chunk_size
338
+
339
+ # Parse a message from the IO object +io+ and return a new
340
+ # message. The +io+ object can also be a string. This is just
341
+ # shorthand for:
342
+ #
343
+ # RMail::Parser.new.parse(io)
344
+ def Parser.read(input)
345
+ Parser.new.parse(input)
346
+ end
347
+
348
+ class Handler < RMail::StreamHandler # :nodoc:
349
+ def initialize
350
+ @parts = [ RMail::Message.new ]
351
+ @preambles = []
352
+ @epilogues = []
353
+ end
354
+ def mbox_from(field)
355
+ @parts.last.header.mbox_from = field
356
+ end
357
+ def header_field(field, name, value)
358
+ @parts.last.header.add_raw(field)
359
+ end
360
+ def body_begin
361
+ @body = nil
362
+ end
363
+ def body_chunk(chunk)
364
+ if @body
365
+ @body << chunk
366
+ else
367
+ @body = chunk
368
+ end
369
+ end
370
+ def body_end
371
+ @parts.last.body = @body
372
+ end
373
+ def multipart_body_begin
374
+ @preambles.push(nil)
375
+ @epilogues.push(nil)
376
+ end
377
+ def preamble_chunk(chunk)
378
+ if @preambles.last
379
+ @preambles.last << chunk
380
+ else
381
+ @preambles[-1] = chunk
382
+ end
383
+ end
384
+ def epilogue_chunk(chunk)
385
+ if @epilogues.last
386
+ @epilogues.last << chunk
387
+ else
388
+ @epilogues[-1] = chunk
389
+ end
390
+ end
391
+ def multipart_body_end(delimiters, boundary)
392
+ @parts.last.preamble = @preambles.pop
393
+ @parts.last.epilogue = @epilogues.pop
394
+ if @parts.last.body.nil?
395
+ @parts.last.body = []
396
+ end
397
+ @parts.last.set_delimiters(delimiters, boundary)
398
+ end
399
+ def part_begin
400
+ @parts << RMail::Message.new
401
+ end
402
+ def part_end
403
+ part = @parts.pop
404
+ @parts.last.add_part(part)
405
+ end
406
+ def message
407
+ @parts.first
408
+ end
409
+ end
410
+
411
+ end
412
+ end