rmail 0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. data/NEWS +309 -0
  2. data/NOTES +14 -0
  3. data/README +83 -0
  4. data/THANKS +25 -0
  5. data/TODO +112 -0
  6. data/guide/Intro.txt +122 -0
  7. data/guide/MIME.txt +6 -0
  8. data/guide/TableOfContents.txt +13 -0
  9. data/install.rb +1023 -0
  10. data/lib/rmail.rb +50 -0
  11. data/lib/rmail/address.rb +829 -0
  12. data/lib/rmail/header.rb +987 -0
  13. data/lib/rmail/mailbox.rb +62 -0
  14. data/lib/rmail/mailbox/mboxreader.rb +182 -0
  15. data/lib/rmail/message.rb +201 -0
  16. data/lib/rmail/parser.rb +412 -0
  17. data/lib/rmail/parser/multipart.rb +217 -0
  18. data/lib/rmail/parser/pushbackreader.rb +173 -0
  19. data/lib/rmail/serialize.rb +190 -0
  20. data/lib/rmail/utils.rb +59 -0
  21. data/rmail.gemspec +17 -0
  22. data/tests/addrgrammar.txt +113 -0
  23. data/tests/data/mbox.odd +4 -0
  24. data/tests/data/mbox.simple +8 -0
  25. data/tests/data/multipart/data.1 +5 -0
  26. data/tests/data/multipart/data.10 +1 -0
  27. data/tests/data/multipart/data.11 +9 -0
  28. data/tests/data/multipart/data.12 +9 -0
  29. data/tests/data/multipart/data.13 +3 -0
  30. data/tests/data/multipart/data.14 +3 -0
  31. data/tests/data/multipart/data.15 +3 -0
  32. data/tests/data/multipart/data.16 +3 -0
  33. data/tests/data/multipart/data.17 +0 -0
  34. data/tests/data/multipart/data.2 +5 -0
  35. data/tests/data/multipart/data.3 +2 -0
  36. data/tests/data/multipart/data.4 +3 -0
  37. data/tests/data/multipart/data.5 +1 -0
  38. data/tests/data/multipart/data.6 +2 -0
  39. data/tests/data/multipart/data.7 +3 -0
  40. data/tests/data/multipart/data.8 +5 -0
  41. data/tests/data/multipart/data.9 +4 -0
  42. data/tests/data/parser.badmime1 +4 -0
  43. data/tests/data/parser.badmime2 +6 -0
  44. data/tests/data/parser.nested-multipart +75 -0
  45. data/tests/data/parser.nested-simple +12 -0
  46. data/tests/data/parser.nested-simple2 +16 -0
  47. data/tests/data/parser.nested-simple3 +21 -0
  48. data/tests/data/parser.rfc822 +65 -0
  49. data/tests/data/parser.simple-mime +24 -0
  50. data/tests/data/parser/multipart.1 +8 -0
  51. data/tests/data/parser/multipart.10 +4 -0
  52. data/tests/data/parser/multipart.11 +12 -0
  53. data/tests/data/parser/multipart.12 +12 -0
  54. data/tests/data/parser/multipart.13 +6 -0
  55. data/tests/data/parser/multipart.14 +6 -0
  56. data/tests/data/parser/multipart.15 +6 -0
  57. data/tests/data/parser/multipart.16 +6 -0
  58. data/tests/data/parser/multipart.2 +8 -0
  59. data/tests/data/parser/multipart.3 +5 -0
  60. data/tests/data/parser/multipart.4 +6 -0
  61. data/tests/data/parser/multipart.5 +4 -0
  62. data/tests/data/parser/multipart.6 +5 -0
  63. data/tests/data/parser/multipart.7 +6 -0
  64. data/tests/data/parser/multipart.8 +8 -0
  65. data/tests/data/parser/multipart.9 +7 -0
  66. data/tests/data/transparency/absolute.1 +5 -0
  67. data/tests/data/transparency/absolute.2 +1 -0
  68. data/tests/data/transparency/absolute.3 +2 -0
  69. data/tests/data/transparency/absolute.4 +3 -0
  70. data/tests/data/transparency/absolute.5 +4 -0
  71. data/tests/data/transparency/absolute.6 +49 -0
  72. data/tests/data/transparency/message.1 +73 -0
  73. data/tests/data/transparency/message.2 +34 -0
  74. data/tests/data/transparency/message.3 +63 -0
  75. data/tests/data/transparency/message.4 +5 -0
  76. data/tests/data/transparency/message.5 +15 -0
  77. data/tests/data/transparency/message.6 +1185 -0
  78. data/tests/runtests.rb +35 -0
  79. data/tests/testaddress.rb +1192 -0
  80. data/tests/testbase.rb +207 -0
  81. data/tests/testheader.rb +1207 -0
  82. data/tests/testmailbox.rb +47 -0
  83. data/tests/testmboxreader.rb +161 -0
  84. data/tests/testmessage.rb +257 -0
  85. data/tests/testparser.rb +634 -0
  86. data/tests/testparsermultipart.rb +205 -0
  87. data/tests/testpushbackreader.rb +40 -0
  88. data/tests/testserialize.rb +264 -0
  89. data/tests/testtestbase.rb +112 -0
  90. data/tests/testtranspparency.rb +105 -0
  91. metadata +143 -0
@@ -0,0 +1,62 @@
1
+ #!/usr/bin/env ruby
2
+ #--
3
+ # Copyright (c) 2002, 2003 Matt Armstrong. All rights reserved.
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice,
9
+ # this list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright
11
+ # notice, this list of conditions and the following disclaimer in the
12
+ # documentation and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
19
+ # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ #
27
+ #++
28
+ # Implements the RMail::Mailbox module.
29
+
30
+ module RMail
31
+
32
+ # The RMail::Mailbox module contains a few methods that are useful
33
+ # for working with mailboxes.
34
+ module Mailbox
35
+
36
+ class << self
37
+
38
+ # Parse a Unix mbox style mailbox. These mailboxes searate
39
+ # individual messages with a line beginning with the string
40
+ # "From ".
41
+ #
42
+ # If a block is given, yields to the block with the raw message
43
+ # (a string), otherwise an array of raw message strings is
44
+ # returned.
45
+ def parse_mbox(input, line_separator = $/)
46
+ require 'rmail/mailbox/mboxreader'
47
+ retval = []
48
+ RMail::Mailbox::MBoxReader.new(input, line_separator).each_message {
49
+ |reader|
50
+ raw_message = reader.read(nil)
51
+ if block_given?
52
+ yield raw_message
53
+ else
54
+ retval << raw_message
55
+ end
56
+ }
57
+ return block_given? ? nil : retval
58
+ end
59
+
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,182 @@
1
+ #--
2
+ # Copyright (c) 2002, 2003 Matt Armstrong. All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without
5
+ # modification, are permitted provided that the following conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright notice,
8
+ # this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above copyright
10
+ # notice, this list of conditions and the following disclaimer in the
11
+ # documentation and/or other materials provided with the distribution.
12
+ # 3. The name of the author may not be used to endorse or promote products
13
+ # derived from this software without specific prior written permission.
14
+ #
15
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
18
+ # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
20
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+ #
26
+ #++
27
+ # Implements the RMail::Mailbox::MBoxReader class.
28
+
29
+ require 'rmail/parser/pushbackreader'
30
+
31
+ module RMail
32
+ module Mailbox
33
+
34
+ # Class that can parse Unix mbox style mailboxes. These mailboxes
35
+ # separate individual messages with a line beginning with the
36
+ # string "From ".
37
+ #
38
+ # Typical usage:
39
+ #
40
+ # File.open("file.mbox") { |file|
41
+ # RMail::Mailbox::MBoxReader.new(file).each_message { |input|
42
+ # message = RMail::Parser.read(input)
43
+ # # do something with the message
44
+ # end
45
+ # }
46
+ #
47
+ # Or see RMail::Mailbox.parse_mbox for a more convenient
48
+ # interface.
49
+ #
50
+ class MBoxReader < RMail::Parser::PushbackReader
51
+
52
+ # Creates a new MBoxReader that reads from `input' with lines
53
+ # that end with `line_separator'.
54
+ #
55
+ # `input' can either be an IO source (an object that responds to
56
+ # the "read" method in the same way as a standard IO object) or
57
+ # a String.
58
+ #
59
+ # `line_separator' defaults to $/, and useful values are
60
+ # probably limited to "\n" (Unix) and "\r\n" (DOS/Windows).
61
+ def initialize(input, line_separator = $/)
62
+ super(input)
63
+ @end_of_message = false
64
+ @chunk_minsize = 0
65
+ @sep = line_separator
66
+ @tail = nil
67
+
68
+ # This regexp will match a From_ header, or some prefix.
69
+ re_string = RMail::Parser::PushbackReader.
70
+ maybe_contains_re("#{@sep}From ")
71
+ @partial_from_re = Regexp.new(re_string)
72
+
73
+ # This regexp will match an entire From_ header.
74
+ @entire_from_re = /\A#{@sep}From .*?#{@sep}/
75
+ end
76
+
77
+ alias_method :parent_read_chunk, :read_chunk
78
+
79
+ # Reads some data from the current message and returns it. The
80
+ # `size' argument is just a suggestion, and the returned string
81
+ # can be larger or smaller. When `size' is nil, then the entire
82
+ # message is returned.
83
+ #
84
+ # Once all data from the current message has been read, #read
85
+ # returns nil and #next must be called to begin reading from the
86
+ # next message. You can use #eof to tell if there is any more
87
+ # data to be read from the input source.
88
+ def read_chunk(size)
89
+ chunk = read_chunk_low(size)
90
+ if chunk
91
+ if chunk.length > @sep.length
92
+ @tail = chunk[-@sep.length .. -1]
93
+ else
94
+ @tail ||= ''
95
+ @tail << chunk
96
+ end
97
+ elsif @tail
98
+ if @tail[-@sep.length .. -1] != @sep
99
+ chunk = @sep
100
+ end
101
+ @tail = nil
102
+ end
103
+ chunk
104
+ end
105
+
106
+ # Advances to the next message to be read. Call this after
107
+ # #read returns nil.
108
+ #
109
+ # Note: Once #read returns nil, you can call #eof before or
110
+ # after calling #next to tell if there actually is a next
111
+ # message to read.
112
+ def next
113
+ @end_of_message = false
114
+ @tail = nil
115
+ end
116
+
117
+ alias_method :parent_eof, :eof
118
+
119
+ # Returns true if the next call to read_chunk will return nil.
120
+ def eof
121
+ parent_eof and @tail.nil?
122
+ end
123
+
124
+ # Yield self until eof, calling next after each yield.
125
+ #
126
+ # This method makes it simple to read messages successively out
127
+ # of the mailbox. See the class description for a code example.
128
+ def each_message
129
+ while !eof
130
+ yield self
131
+ self.next
132
+ end
133
+ end
134
+
135
+ private
136
+
137
+ def read_chunk_low(size)
138
+ return nil if @end_of_message
139
+ if chunk = parent_read_chunk(size)
140
+ # Read at least @chunk_minsize bytes.
141
+ while chunk.length < @chunk_minsize && more = parent_read_chunk(size)
142
+ chunk << more
143
+ end
144
+ if match = @partial_from_re.match(chunk)
145
+ # We matched what might be a From_ separator. Separate
146
+ # the chunk into what came before and what came after it.
147
+ mbegin = match.begin(0)
148
+ rest = chunk[mbegin .. -1]
149
+
150
+ if @entire_from_re =~ rest
151
+ # We've got a full From_ line, so set the end of message
152
+ # flag and get rid of the line separator present just
153
+ # before the From_.
154
+ @end_of_message = true
155
+ @chunk_minsize = 0
156
+ rest[0, @sep.length] = "" # painful
157
+ else
158
+ # Make sure that next time we read more than just the
159
+ # pushback.
160
+ @chunk_minsize = rest.length + 1
161
+ end
162
+
163
+ # Return the whole chunk with a partially matched From_
164
+ # when there is nothing further to read.
165
+ unless ! @end_of_message && parent_eof
166
+ # Otherwise, push back the From_ and return the
167
+ # pre-match.
168
+ pushback(rest)
169
+ if mbegin == 0 and @end_of_message
170
+ chunk = nil
171
+ else
172
+ chunk = chunk[0, mbegin]
173
+ end
174
+ end
175
+
176
+ end
177
+ end
178
+ return chunk
179
+ end
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,201 @@
1
+ #--
2
+ # Copyright (C) 2001, 2002, 2003 Matt Armstrong. All rights
3
+ # reserved.
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice,
9
+ # this list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright
11
+ # notice, this list of conditions and the following disclaimer in the
12
+ # documentation and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
19
+ # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ #
27
+ #++
28
+ # Implements the RMail::Message class.
29
+
30
+ require 'rmail/header.rb'
31
+
32
+ module RMail
33
+
34
+ # The RMail::Message is an object representation of a standard
35
+ # Internet email message, including MIME multipart messages.
36
+ #
37
+ # An RMail::Message object represents a message header (held in the
38
+ # contained RMail::Header object) and a message body. The message
39
+ # body may either be a single String for single part messages or an
40
+ # Array of RMail::Message objects for MIME multipart messages.
41
+ class Message
42
+
43
+ # Create a new, empty, RMail::Message.
44
+ def initialize
45
+ @header = RMail::Header.new
46
+ @body = nil
47
+ @epilogue = nil
48
+ @preamble = nil
49
+ end
50
+
51
+ # Test if this message is structured exactly the same as the other
52
+ # message. This is useful mainly for testing.
53
+ def ==(other)
54
+ @preamble == other.preamble &&
55
+ @epilogue == other.epilogue &&
56
+ @header == other.header &&
57
+ @body == other.body
58
+ end
59
+
60
+ # Returns the body of the message as a String or Array.
61
+ #
62
+ # If #multipart? returns true, it will be an array of
63
+ # RMail::Message objects. Otherwise it will be a String.
64
+ #
65
+ # See also #header.
66
+ def body
67
+ return @body
68
+ end
69
+
70
+ # Sets the body of the message to the given value. It should
71
+ # either be a String or an Array of RMail:Message objects.
72
+ def body=(s)
73
+ @body = s
74
+ end
75
+
76
+ # Returns the RMail::Header object.
77
+ #
78
+ # See also #body.
79
+ def header()
80
+ return @header
81
+ end
82
+
83
+ # Return true if the message consists of multiple parts.
84
+ def multipart?
85
+ @body.is_a?(Array)
86
+ end
87
+
88
+ # Add a part to the message. After this message is called, the
89
+ # #multipart? method will return true and the #body method will
90
+ # #return an array of parts.
91
+ def add_part(part)
92
+ if @body.nil?
93
+ @body = [part]
94
+ elsif @body.is_a?(Array)
95
+ @body.push(part)
96
+ else
97
+ @body = [@body, part]
98
+ end
99
+ end
100
+
101
+ # Decode the body of this message.
102
+ #
103
+ # If the body of this message is encoded with
104
+ # <tt>quoted-printable</tt> or <tt>base64</tt>, this function will
105
+ # decode the data into its original form and return it as a
106
+ # String. If the body is not encoded, it is returned unaltered.
107
+ #
108
+ # This only works when the message is not a multipart. The
109
+ # <tt>Content-Transfer-Encoding:</tt> header field is consulted to
110
+ # determine the encoding of the body part.
111
+ def decode
112
+ raise TypeError, "Can not decode a multipart message." if multipart?
113
+ case header.fetch('content-transfer-encoding', '7bit').strip.downcase
114
+ when 'quoted-printable'
115
+ Utils.quoted_printable_decode(@body)
116
+ when 'base64'
117
+ Utils.base64_decode(@body)
118
+ else
119
+ @body
120
+ end
121
+ end
122
+
123
+ # Get the indicated part from a multipart message.
124
+ def part(i)
125
+ raise TypeError,
126
+ "Can not get part on a single part message." unless multipart?
127
+ @body[i]
128
+ end
129
+
130
+ # Access the epilogue string for this message. The epilogue
131
+ # string is relevant only for multipart messages. It is the text
132
+ # that occurs after all parts of the message and is generally nil.
133
+ attr :epilogue, true
134
+
135
+ # Access the preamble string for this message. The preamble
136
+ # string is relevant only for multipart messages. It is the text
137
+ # that occurs just before the first part of the message, and is
138
+ # generally nil or simple English text describing the nature of
139
+ # the message.
140
+ attr :preamble, true
141
+
142
+ # Returns the entire message in a single string. This uses the
143
+ # RMail::Serialize class.
144
+ def to_s()
145
+ require 'rmail/serialize'
146
+ RMail::Serialize.new('').serialize(self)
147
+ end
148
+
149
+ # Return each part of this message
150
+ #
151
+ # FIXME: not tested
152
+ def each_part
153
+ raise TypeError, "not a multipart message" unless multipart?
154
+ @body.each do |part|
155
+ yield part
156
+ end
157
+ end
158
+
159
+ # Call the supplied block for each line of the message. Each line
160
+ # will contain a trailing newline (<tt>\n</tt>).
161
+ def each()
162
+ # FIXME: this is incredibly inefficient! The only users of this
163
+ # is RMail::Deliver -- get them to use a RMail::Serialize object.
164
+ to_s.each("\n") { |line|
165
+ yield line
166
+ }
167
+ end
168
+
169
+ # This is used by the RMail::Parser to set the MIME multipart
170
+ # delimiter strings found in the message. These delimiters are
171
+ # then used when serializing the message again.
172
+ #
173
+ # Normal uses of RMail::Message will never use this method, and so
174
+ # it is left undocumented.
175
+ def set_delimiters(delimiters, boundary) # :nodoc:
176
+ raise TypeError, "not a multipart message" unless multipart?
177
+ raise ArgumentError, "delimiter array wrong size" unless
178
+ delimiters.length == @body.length + 1
179
+ @delimiters = delimiters.to_ary
180
+ @delimiters_boundary = boundary.to_str
181
+ end
182
+
183
+ # This is used by the serializing functions to retrieve the MIME
184
+ # multipart delimiter strings found while parsing the message.
185
+ # These delimiters are then used when serializing the message
186
+ # again.
187
+ #
188
+ # Normal uses of RMail::Message will never use this method, and so
189
+ # it is left undocumented.
190
+ def get_delimiters # :nodoc:
191
+ unless multipart? and @delimiters and @delimiters_boundary and
192
+ @delimiters.length == @body.length + 1 and
193
+ header.param('content-type', 'boundary') == @delimiters_boundary
194
+ @delimiters = nil
195
+ @delimiters_boundary = nil
196
+ end
197
+ [ @delimiters, @delimiters_boundary ]
198
+ end
199
+
200
+ end
201
+ end
@@ -0,0 +1,412 @@
1
+ #--
2
+ # Copyright (C) 2002, 2003, 2004 Matt Armstrong. All rights reserved.
3
+ #
4
+ # Redistribution and use in source and binary forms, with or without
5
+ # modification, are permitted provided that the following conditions are met:
6
+ #
7
+ # 1. Redistributions of source code must retain the above copyright notice,
8
+ # this list of conditions and the following disclaimer.
9
+ # 2. Redistributions in binary form must reproduce the above copyright
10
+ # notice, this list of conditions and the following disclaimer in the
11
+ # documentation and/or other materials provided with the distribution.
12
+ # 3. The name of the author may not be used to endorse or promote products
13
+ # derived from this software without specific prior written permission.
14
+ #
15
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
18
+ # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
20
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+ #
26
+ #++
27
+ # Implements the RMail::Parser, RMail::StreamParser and
28
+ # RMail::StreamHandler classes.
29
+
30
+ require 'rmail/message'
31
+ require 'rmail/parser/multipart'
32
+
33
+ module RMail
34
+
35
+ # = Overview
36
+ #
37
+ # An RMail::StreamHandler documents the set of methods a
38
+ # RMail::StreamParser handler must implement. See
39
+ # RMail::StreamParser.parse. This is a low level interface to the
40
+ # RMail message parser.
41
+ #
42
+ # = Order of Method Calls (Grammar)
43
+ #
44
+ # Calls to the methods of this class follow a specific grammar,
45
+ # described informally below. The words in all caps are productions
46
+ # in the grammar, while the lower case words are method calls to
47
+ # this object.
48
+ #
49
+ # MESSAGE:: [ #mbox_from ] *( #header_field )
50
+ # ( BODY / MULTIPART_BODY )
51
+ #
52
+ # BODY:: *body_begin *( #body_chunk ) #body_end
53
+ #
54
+ # MULTIPART_BODY:: #multipart_body_begin
55
+ # *( #preamble_chunk )
56
+ # *( #part_begin MESSAGE #part_end)
57
+ # *( #epilogue_chunk )
58
+ # #multipart_body_end
59
+ #
60
+ # = Order of Method Calls (English)
61
+ #
62
+ # If the grammar above is not clear, here is a description in English.
63
+ #
64
+ # The parser begins calling #header_field, possibly calling
65
+ # #mbox_from for the first line. Then it determines if the message
66
+ # was a MIME multipart message.
67
+ #
68
+ # If the message is a not a MIME multipart, the parser calls
69
+ # #body_begin once, then #body_chunk any number of times, then
70
+ # #body_end.
71
+ #
72
+ # If the message header is a MIME multipart message, then
73
+ # #multipart_body_begin is called, followed by any number of calls
74
+ # to #preamble_chunk. Then for each part parsed, #part_begin is
75
+ # called, followed by a recursive set of calls described by the
76
+ # "MESSAGE" production above, and then #part_end. After all parts
77
+ # are parsed, any number of calls to #epilogue_chunk are followed by
78
+ # a single call to #multipart_body_end.
79
+ #
80
+ # The recursive nature of MIME multipart messages is represented by
81
+ # the recursive invocation of the "MESSAGE" production in the
82
+ # grammar above.
83
+ class StreamHandler
84
+
85
+ # This method is called for Unix MBOX "From " lines in the message
86
+ # header, it calls this method with the text.
87
+ def mbox_from(line)
88
+ end
89
+
90
+ # This method is called when a header field is parsed. The
91
+ # +field+ is the full text of the field, the +name+ is the name of
92
+ # the field and the +value+ is the field's value with leading and
93
+ # trailing whitespace removed. Note that both +field+ and +value+
94
+ # may be multi-line strings.
95
+ def header_field(field, name, value)
96
+ end
97
+
98
+ # This method is called before a non-multipart message body is
99
+ # about to be parsed.
100
+ def body_begin
101
+ end
102
+
103
+ # This method is called with a string chunk of data from a
104
+ # non-multipart message body. The string does not necessarily
105
+ # begin or end on any particular boundary.
106
+ def body_chunk(chunk)
107
+ end
108
+
109
+ # This method is called after all of the non-multipart message
110
+ # body has been parsed.
111
+ def body_end
112
+ end
113
+
114
+ # This method is called before a multipart message body is about
115
+ # to be parsed.
116
+ def multipart_body_begin
117
+ end
118
+
119
+ # This method is called with a chunk of data from a multipart
120
+ # message body's preamble. The preamble is any text that appears
121
+ # before the first part of the multipart message body.
122
+ def preamble_chunk(chunk)
123
+ end
124
+
125
+ # This method is called when a part of a multipart body begins.
126
+ def part_begin
127
+ end
128
+
129
+ # This method is called when a part of a multipart body ends.
130
+ def part_end
131
+ end
132
+
133
+ # This method is called with a chunk of data from a multipart
134
+ # message body's epilogue. The epilogue is any text that appears
135
+ # after the last part of the multipart message body.
136
+ def epilogue_chunk(chunk)
137
+ end
138
+
139
+ # This method is called after a multipart message body has been
140
+ # completely parsed.
141
+ #
142
+ # The +delimiters+ is an Array of strings, one for each boundary
143
+ # string found in the multipart body. The +boundary+ is the
144
+ # boundary string used to delimit each part in the multipart body.
145
+ # You can normally ignore both +delimiters+ and +boundary+ if you
146
+ # are concerned only about message content.
147
+ def multipart_body_end(delimiters, boundary)
148
+ end
149
+ end
150
+
151
+ # The RMail::StreamParser is a low level message parsing API. It is
152
+ # useful when you are interested in serially examining all message
153
+ # content but are not interested in a full object representation of
154
+ # the object. See StreamParser.parse.
155
+ class StreamParser
156
+
157
+ class << self
158
+
159
+ # Parse a message from an input source. This method returns
160
+ # nothing. Instead, the supplied +handler+ is expected to
161
+ # implement the same methods as RMail::StreamHandler. The
162
+ # message structure can be inferred from the methods called on
163
+ # the +handler+. The +input+ can be any Ruby IO source or a
164
+ # String.
165
+ #
166
+ # This is a low level parsing API. For a message parser that
167
+ # returns an RMail::Message object, see the RMail::Parser class.
168
+ # RMail::Parser is implemented using RMail::StreamParser.
169
+ def parse(input, handler)
170
+ RMail::StreamParser.new(input, handler).parse
171
+ end
172
+ end
173
+
174
+ def initialize(input, handler) # :nodoc:
175
+ @input = input
176
+ @handler = handler
177
+ @chunk_size = nil
178
+ end
179
+
180
+ def parse # :nodoc:
181
+ input = RMail::Parser::PushbackReader.new(@input)
182
+ input.chunk_size = @chunk_size if @chunk_size
183
+ parse_low(input, 0)
184
+ return nil
185
+ end
186
+
187
+ # Change the chunk size used to read the message. This is useful
188
+ # mostly for testing, so we don't document it.
189
+ attr_accessor :chunk_size # :nodoc:
190
+
191
+ private
192
+
193
+ def parse_low(input, depth)
194
+ multipart_boundary = parse_header(input, depth)
195
+ if multipart_boundary
196
+ parse_multipart_body(input, depth, multipart_boundary)
197
+ else
198
+ parse_singlepart_body(input, depth)
199
+ end
200
+ end
201
+
202
+ def parse_header(input, depth)
203
+ data = nil
204
+ header = nil
205
+ pushback = nil
206
+ boundary = nil
207
+ while chunk = input.read
208
+ data ||= ''
209
+ data << chunk
210
+ if data[0] == ?\n
211
+ # A leading newline in the message is seen when parsing the
212
+ # parts of a multipart message. It means there are no
213
+ # headers. The body part starts directly after this
214
+ # newline.
215
+ rest = data[1..-1]
216
+ else
217
+ header, rest = data.split(/\n\n/, 2)
218
+ end
219
+ break if rest
220
+ end
221
+ input.pushback(rest)
222
+ if header
223
+ mime = false
224
+ fields = header.split(/\n(?!\s)/)
225
+ if fields.first =~ /^From /
226
+ @handler.mbox_from(fields.first)
227
+ fields.shift
228
+ end
229
+ fields.each { |field|
230
+ if field =~ /^From /
231
+ @handler.mbox_from(field)
232
+ else
233
+ name, value = RMail::Header::Field.parse(field)
234
+ case name.downcase
235
+ when 'mime-version'
236
+ if value =~ /\b1\.0\b/
237
+ mime = true
238
+ end
239
+ when 'content-type'
240
+ # FIXME: would be nice to have a procedural equivalent
241
+ # to RMail::Header#param.
242
+ header = RMail::Header.new
243
+ header['content-type'] = value
244
+ boundary = header.param('content-type', 'boundary')
245
+ end
246
+ @handler.header_field(field, name, value)
247
+ end
248
+ }
249
+ unless mime or depth > 0
250
+ boundary = nil
251
+ end
252
+ end
253
+ return boundary
254
+ end
255
+
256
+ def parse_multipart_body(input, depth, boundary)
257
+ input = RMail::Parser::MultipartReader.new(input, boundary)
258
+ input.chunk_size = @chunk_size if @chunk_size
259
+
260
+ @handler.multipart_body_begin
261
+
262
+ # Reach each part, adding it to this entity as appropriate.
263
+ delimiters = []
264
+ while input.next_part
265
+ if input.preamble?
266
+ while chunk = input.read
267
+ @handler.preamble_chunk(chunk)
268
+ end
269
+ elsif input.epilogue?
270
+ while chunk = input.read
271
+ @handler.epilogue_chunk(chunk)
272
+ end
273
+ else
274
+ @handler.part_begin
275
+ parse_low(input, depth + 1)
276
+ @handler.part_end
277
+ end
278
+ delimiters << (input.delimiter || "") unless input.epilogue?
279
+ end
280
+ @handler.multipart_body_end(delimiters, boundary)
281
+ end
282
+
283
+ def parse_singlepart_body(input, depth)
284
+ @handler.body_begin
285
+ while chunk = input.read
286
+ @handler.body_chunk(chunk)
287
+ end
288
+ @handler.body_end
289
+ end
290
+
291
+ end
292
+
293
+ # The RMail::Parser class creates RMail::Message objects from Ruby
294
+ # IO objects or strings.
295
+ #
296
+ # To parse from a string:
297
+ # message = RMail::Parser.read(the_string)
298
+ #
299
+ # To parse from an IO object:
300
+ # message = File.open('my-message') { |f|
301
+ # RMail::Parser.read(f)
302
+ # }
303
+ #
304
+ # You can also parse from STDIN, etc.
305
+ # message = RMail::Parser.read(STDIN)
306
+ #
307
+ # In all cases, the parser consumes all input.
308
+ class Parser
309
+
310
+ # This exception class is thrown when the parser encounters an
311
+ # error.
312
+ #
313
+ # Note: the parser tries hard to never throw exceptions -- this
314
+ # error is thrown only when the API is used incorrectly and not on
315
+ # invalid input.
316
+ class Error < StandardError; end
317
+
318
+ # Creates a new parser. Messages of +message_class+ will be
319
+ # created by the parser. By default, the parser will create
320
+ # RMail::Message objects.
321
+ def initialize()
322
+ @chunk_size = nil
323
+ end
324
+
325
+ # Parse a message from the IO object +io+ and return a new
326
+ # message. The +io+ object can also be a string.
327
+ def parse(input)
328
+ handler = RMail::Parser::Handler.new
329
+ parser = RMail::StreamParser.new(input, handler)
330
+ parser.chunk_size = @chunk_size if @chunk_size
331
+ parser.parse
332
+ return handler.message
333
+ end
334
+
335
+ # Change the chunk size used to read the message. This is useful
336
+ # mostly for testing.
337
+ attr_accessor :chunk_size
338
+
339
+ # Parse a message from the IO object +io+ and return a new
340
+ # message. The +io+ object can also be a string. This is just
341
+ # shorthand for:
342
+ #
343
+ # RMail::Parser.new.parse(io)
344
+ def Parser.read(input)
345
+ Parser.new.parse(input)
346
+ end
347
+
348
+ class Handler < RMail::StreamHandler # :nodoc:
349
+ def initialize
350
+ @parts = [ RMail::Message.new ]
351
+ @preambles = []
352
+ @epilogues = []
353
+ end
354
+ def mbox_from(field)
355
+ @parts.last.header.mbox_from = field
356
+ end
357
+ def header_field(field, name, value)
358
+ @parts.last.header.add_raw(field)
359
+ end
360
+ def body_begin
361
+ @body = nil
362
+ end
363
+ def body_chunk(chunk)
364
+ if @body
365
+ @body << chunk
366
+ else
367
+ @body = chunk
368
+ end
369
+ end
370
+ def body_end
371
+ @parts.last.body = @body
372
+ end
373
+ def multipart_body_begin
374
+ @preambles.push(nil)
375
+ @epilogues.push(nil)
376
+ end
377
+ def preamble_chunk(chunk)
378
+ if @preambles.last
379
+ @preambles.last << chunk
380
+ else
381
+ @preambles[-1] = chunk
382
+ end
383
+ end
384
+ def epilogue_chunk(chunk)
385
+ if @epilogues.last
386
+ @epilogues.last << chunk
387
+ else
388
+ @epilogues[-1] = chunk
389
+ end
390
+ end
391
+ def multipart_body_end(delimiters, boundary)
392
+ @parts.last.preamble = @preambles.pop
393
+ @parts.last.epilogue = @epilogues.pop
394
+ if @parts.last.body.nil?
395
+ @parts.last.body = []
396
+ end
397
+ @parts.last.set_delimiters(delimiters, boundary)
398
+ end
399
+ def part_begin
400
+ @parts << RMail::Message.new
401
+ end
402
+ def part_end
403
+ part = @parts.pop
404
+ @parts.last.add_part(part)
405
+ end
406
+ def message
407
+ @parts.first
408
+ end
409
+ end
410
+
411
+ end
412
+ end