rmail 0.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. data/NEWS +309 -0
  2. data/NOTES +14 -0
  3. data/README +83 -0
  4. data/THANKS +25 -0
  5. data/TODO +112 -0
  6. data/guide/Intro.txt +122 -0
  7. data/guide/MIME.txt +6 -0
  8. data/guide/TableOfContents.txt +13 -0
  9. data/install.rb +1023 -0
  10. data/lib/rmail.rb +50 -0
  11. data/lib/rmail/address.rb +829 -0
  12. data/lib/rmail/header.rb +987 -0
  13. data/lib/rmail/mailbox.rb +62 -0
  14. data/lib/rmail/mailbox/mboxreader.rb +182 -0
  15. data/lib/rmail/message.rb +201 -0
  16. data/lib/rmail/parser.rb +412 -0
  17. data/lib/rmail/parser/multipart.rb +217 -0
  18. data/lib/rmail/parser/pushbackreader.rb +173 -0
  19. data/lib/rmail/serialize.rb +190 -0
  20. data/lib/rmail/utils.rb +59 -0
  21. data/rmail.gemspec +17 -0
  22. data/tests/addrgrammar.txt +113 -0
  23. data/tests/data/mbox.odd +4 -0
  24. data/tests/data/mbox.simple +8 -0
  25. data/tests/data/multipart/data.1 +5 -0
  26. data/tests/data/multipart/data.10 +1 -0
  27. data/tests/data/multipart/data.11 +9 -0
  28. data/tests/data/multipart/data.12 +9 -0
  29. data/tests/data/multipart/data.13 +3 -0
  30. data/tests/data/multipart/data.14 +3 -0
  31. data/tests/data/multipart/data.15 +3 -0
  32. data/tests/data/multipart/data.16 +3 -0
  33. data/tests/data/multipart/data.17 +0 -0
  34. data/tests/data/multipart/data.2 +5 -0
  35. data/tests/data/multipart/data.3 +2 -0
  36. data/tests/data/multipart/data.4 +3 -0
  37. data/tests/data/multipart/data.5 +1 -0
  38. data/tests/data/multipart/data.6 +2 -0
  39. data/tests/data/multipart/data.7 +3 -0
  40. data/tests/data/multipart/data.8 +5 -0
  41. data/tests/data/multipart/data.9 +4 -0
  42. data/tests/data/parser.badmime1 +4 -0
  43. data/tests/data/parser.badmime2 +6 -0
  44. data/tests/data/parser.nested-multipart +75 -0
  45. data/tests/data/parser.nested-simple +12 -0
  46. data/tests/data/parser.nested-simple2 +16 -0
  47. data/tests/data/parser.nested-simple3 +21 -0
  48. data/tests/data/parser.rfc822 +65 -0
  49. data/tests/data/parser.simple-mime +24 -0
  50. data/tests/data/parser/multipart.1 +8 -0
  51. data/tests/data/parser/multipart.10 +4 -0
  52. data/tests/data/parser/multipart.11 +12 -0
  53. data/tests/data/parser/multipart.12 +12 -0
  54. data/tests/data/parser/multipart.13 +6 -0
  55. data/tests/data/parser/multipart.14 +6 -0
  56. data/tests/data/parser/multipart.15 +6 -0
  57. data/tests/data/parser/multipart.16 +6 -0
  58. data/tests/data/parser/multipart.2 +8 -0
  59. data/tests/data/parser/multipart.3 +5 -0
  60. data/tests/data/parser/multipart.4 +6 -0
  61. data/tests/data/parser/multipart.5 +4 -0
  62. data/tests/data/parser/multipart.6 +5 -0
  63. data/tests/data/parser/multipart.7 +6 -0
  64. data/tests/data/parser/multipart.8 +8 -0
  65. data/tests/data/parser/multipart.9 +7 -0
  66. data/tests/data/transparency/absolute.1 +5 -0
  67. data/tests/data/transparency/absolute.2 +1 -0
  68. data/tests/data/transparency/absolute.3 +2 -0
  69. data/tests/data/transparency/absolute.4 +3 -0
  70. data/tests/data/transparency/absolute.5 +4 -0
  71. data/tests/data/transparency/absolute.6 +49 -0
  72. data/tests/data/transparency/message.1 +73 -0
  73. data/tests/data/transparency/message.2 +34 -0
  74. data/tests/data/transparency/message.3 +63 -0
  75. data/tests/data/transparency/message.4 +5 -0
  76. data/tests/data/transparency/message.5 +15 -0
  77. data/tests/data/transparency/message.6 +1185 -0
  78. data/tests/runtests.rb +35 -0
  79. data/tests/testaddress.rb +1192 -0
  80. data/tests/testbase.rb +207 -0
  81. data/tests/testheader.rb +1207 -0
  82. data/tests/testmailbox.rb +47 -0
  83. data/tests/testmboxreader.rb +161 -0
  84. data/tests/testmessage.rb +257 -0
  85. data/tests/testparser.rb +634 -0
  86. data/tests/testparsermultipart.rb +205 -0
  87. data/tests/testpushbackreader.rb +40 -0
  88. data/tests/testserialize.rb +264 -0
  89. data/tests/testtestbase.rb +112 -0
  90. data/tests/testtranspparency.rb +105 -0
  91. metadata +143 -0
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+ #--
3
+ # Copyright (c) 2003 Matt Armstrong. All rights reserved.
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice,
9
+ # this list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright
11
+ # notice, this list of conditions and the following disclaimer in the
12
+ # documentation and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
19
+ # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ #
27
+ #++
28
+ # This module allows you to simply
29
+ # require 'rmail'
30
+ # in your ruby scripts and have all of the RMail module required.
31
+ # This provides maximum convenience when the startup time of your
32
+ # script is not crucial.
33
+
34
+ # The RMail module contains all of the RubyMail classes, but has no
35
+ # useful API of its own.
36
+ #
37
+ # See guide/Intro.txt for a general overview of RubyMail.
38
+ module RMail
39
+ end
40
+
41
+ require 'rmail/address'
42
+ require 'rmail/header'
43
+ require 'rmail/mailbox'
44
+ require 'rmail/message'
45
+ require 'rmail/parser'
46
+ require 'rmail/serialize'
47
+ require 'rmail/utils'
48
+ require 'rmail/mailbox/mboxreader'
49
+ require 'rmail/parser/multipart'
50
+ require 'rmail/parser/pushbackreader'
@@ -0,0 +1,829 @@
1
+ #--
2
+ # Copyright (C) 2001, 2002, 2003 Matt Armstrong. All rights
3
+ # reserved.
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice,
9
+ # this list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright
11
+ # notice, this list of conditions and the following disclaimer in the
12
+ # documentation and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
19
+ # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ #
27
+ #++
28
+ # Implements the RMail::Address, RMail::Address::List, and
29
+ # RMail::Address::Parser classes. Together, these classes allow you
30
+ # to robustly parse, manipulate, and generate RFC2822 email addresses
31
+ # and address lists.
32
+
33
+ module RMail
34
+
35
+ # This class provides the following functionality:
36
+ #
37
+ # * Parses RFC2822 address lists into a list of Address
38
+ # objects (see #parse).
39
+ #
40
+ # * Format Address objects as appropriate for insertion into email
41
+ # messages (see #format).
42
+ #
43
+ # * Allows manipulation of the various parts of the address (see
44
+ # #local=, #domain=, #display_name=, #comments=).
45
+ class Address
46
+
47
+ ATEXT = '[\w=!#$%&\'*+-?^\`{|}~]+'
48
+
49
+ # Create a new address. If the +string+ argument is not nil, it
50
+ # is parsed for mail addresses and if one is found, it is used to
51
+ # initialize this object.
52
+ def initialize(string = nil)
53
+
54
+ @local = @domain = @comments = @display_name = nil
55
+
56
+ if string.kind_of?(String)
57
+ addrs = Address.parse(string)
58
+ if addrs.length > 0
59
+ @local = addrs[0].local
60
+ @domain = addrs[0].domain
61
+ @comments = addrs[0].comments
62
+ @display_name = addrs[0].display_name
63
+ end
64
+ else
65
+ raise ArgumentError unless string.nil?
66
+ end
67
+ end
68
+
69
+ # Compare this address with another based on the email address
70
+ # portion only (any display name and comments are ignored). If
71
+ # the other object is not an RMail::Address, it is coerced into a
72
+ # string with its to_str method and then parsed into an
73
+ # RMail::Address object.
74
+ def <=>(other)
75
+ if !other.kind_of?(RMail::Address)
76
+ other = RMail::Address.new(other.to_str)
77
+ end
78
+ cmp = (@local || '') <=> (other.local || '')
79
+ if cmp == 0
80
+ cmp = (@domain || '') <=> (other.domain || '')
81
+ end
82
+ return cmp
83
+ end
84
+ include Comparable
85
+
86
+ # Return a hash value for this address. This is based solely on
87
+ # the email address portion (any display name and comments are
88
+ # ignored).
89
+ def hash
90
+ address.hash
91
+ end
92
+
93
+ # Return true if the two objects are equal. Do this based solely
94
+ # on the email address portion (any display name and comments are
95
+ # ignored). Fails if the other object is not an RMail::Address
96
+ # object.
97
+ def eql?(other)
98
+ raise TypeError unless other.kind_of?(RMail::Address)
99
+ @local.eql?(other.local) and @domain.eql?(other.domain)
100
+ end
101
+
102
+ # Retrieve the local portion of the mail address. This is the
103
+ # portion that precedes the <tt>@</tt> sign.
104
+ def local
105
+ @local
106
+ end
107
+
108
+ # Assign the local portion of the mail address. This is the
109
+ # portion that precedes the <tt>@</tt> sign.
110
+ def local=(l)
111
+ raise ArgumentError unless l.nil? || l.kind_of?(String)
112
+ @local = l
113
+ end
114
+
115
+ # Returns the display name of this address. The display name is
116
+ # present only for "angle addr" style addresses such as:
117
+ #
118
+ # John Doe <johnd@example.net>
119
+ #
120
+ # In this case, the display name will be "John Doe". In
121
+ # particular this old style address has no display name:
122
+ #
123
+ # bobs@example.net (Bob Smith)
124
+ #
125
+ # See also display_name=, #name
126
+ def display_name
127
+ @display_name
128
+ end
129
+
130
+ # Assign a display name to this address. See display_name for a
131
+ # definition of what this is.
132
+ #
133
+ # See also display_name
134
+ def display_name=(str)
135
+ unless str.nil? || str.kind_of?(String)
136
+ raise ArgumentError, 'not a string'
137
+ end
138
+ @display_name = str
139
+ @display_name = nil if @display_name == ''
140
+ end
141
+
142
+ # Returns a best guess at a display name for this email address.
143
+ # This function first checks if the address has a true display
144
+ # name (see display_name) and returns it if so. Otherwise, if the
145
+ # address has any comments, the last comment will be returned.
146
+ #
147
+ # In most cases, this will behave reasonably. For example, it
148
+ # will return "Bob Smith" for this address:
149
+ #
150
+ # bobs@example.net (Bob Smith)
151
+ #
152
+ # See also display_name, #comments, #comments=
153
+ def name
154
+ @display_name || (@comments && @comments.last)
155
+ end
156
+
157
+ # Returns the comments in this address as an array of strings.
158
+ def comments
159
+ @comments
160
+ end
161
+
162
+ # Set the comments for this address. The +comments+ argument can
163
+ # be a string, or an array of strings. In either case, any
164
+ # existing comments are replaced.
165
+ #
166
+ # See also #comments, #name
167
+ def comments=(comments)
168
+ case comments
169
+ when nil
170
+ @comments = comments
171
+ when Array
172
+ @comments = comments
173
+ when String
174
+ @comments = [ comments ]
175
+ else
176
+ raise TypeError, "Argument to RMail::Address#comments= must be " +
177
+ "String, Array or nil, was #{comments.type}."
178
+ end
179
+ @comments.freeze
180
+ end
181
+
182
+ # Retrieve to the domain portion of the mail address. This is the
183
+ # portion after the <tt>@</tt> sign.
184
+ def domain
185
+ @domain
186
+ end
187
+
188
+ # Assign a domain name to this address. This is the portion after
189
+ # the <tt>@</tt> sign. Any existing domain name will be changed.
190
+ def domain=(domain)
191
+ @domain = if domain.nil? or domain == ''
192
+ nil
193
+ else
194
+ raise ArgumentError unless domain.kind_of?(String)
195
+ domain.strip
196
+ end
197
+ end
198
+
199
+ # Returns the email address portion of the address (i.e. without a
200
+ # display name, angle addresses, or comments).
201
+ #
202
+ # The string returned is not suitable for insertion into an
203
+ # e-mail. RFC2822 quoting rules are not followed. The raw
204
+ # address is returned instead.
205
+ #
206
+ # For example, if the local part requires quoting, this function
207
+ # will not perform the quoting (see #format for that). So this
208
+ # function can returns strings such as:
209
+ #
210
+ # "address with no quoting@example.net"
211
+ #
212
+ # See also #format
213
+ def address
214
+ if @domain.nil?
215
+ @local
216
+ else
217
+ @local + '@' + @domain
218
+ end
219
+ end
220
+
221
+ # Return this address as a String formated as appropriate for
222
+ # insertion into a mail message.
223
+ def format
224
+ display_name = if @display_name.nil?
225
+ nil
226
+ elsif @display_name =~ /^[-\/\w=!#\$%&'*+?^`{|}~ ]+$/
227
+ @display_name
228
+ else
229
+ '"' + @display_name.gsub(/["\\]/, '\\\\\&') + '"'
230
+ end
231
+ local = if (@local !~ /^[-\w=!#\$%&'*+?^`{|}~\.\/]+$/ ||
232
+ @local =~ /^\./ ||
233
+ @local =~ /\.$/ ||
234
+ @local =~ /\.\./)
235
+ '"' + @local.gsub(/["\\]/, '\\\\\&') + '"'
236
+ else
237
+ @local
238
+ end
239
+ domain = if (!@domain.nil? and
240
+ (@domain !~ /^[-\w=!#\$%&'*+?^`{|}~\.\/]+$/ ||
241
+ @domain =~ /^\./ ||
242
+ @domain =~ /\.$/ ||
243
+ @domain =~ /\.\./))
244
+ then
245
+ '[' + if @domain =~ /^\[(.*)\]$/
246
+ $1
247
+ else
248
+ @domain
249
+ end.gsub(/[\[\]\\]/, '\\\\\&') + ']'
250
+ else
251
+ @domain
252
+ end
253
+ address = if domain.nil?
254
+ local
255
+ elsif !display_name.nil? or domain[-1] == ?]
256
+ '<' + local + '@' + domain + '>'
257
+ else
258
+ local + '@' + domain
259
+ end
260
+ comments = nil
261
+ comments = unless @comments.nil?
262
+ @comments.collect { |c|
263
+ '(' + c.gsub(/[()\\]/, '\\\\\&') + ')'
264
+ }.join(' ')
265
+ end
266
+ [display_name, address, comments].compact.join(' ')
267
+ end
268
+
269
+ # Addresses can be converted into strings.
270
+ alias :to_str :format
271
+
272
+ # This class provides a facility to parse a string containing one
273
+ # or more RFC2822 addresses into an array of RMail::Address
274
+ # objects. You can use it directly, but it is more conveniently
275
+ # used with the RMail::Address.parse method.
276
+ class Parser
277
+
278
+ # Create a RMail::Address::Parser object that will parse
279
+ # +string+. See also the RMail::Address.parse method.
280
+ def initialize(string)
281
+ @string = string
282
+ end
283
+
284
+ # This function attempts to extract mailing addresses from the
285
+ # string passed to #new. The function returns an
286
+ # RMail::Address::List of RMail::Address objects
287
+ # (RMail::Address::List is a subclass of Array). A malformed
288
+ # input string will not generate an exception. Instead, the
289
+ # array returned will simply not contained the malformed
290
+ # addresses.
291
+ #
292
+ # The string is expected to be in a valid format as documented
293
+ # in RFC2822's mailbox-list grammar. This will work for lists
294
+ # of addresses in the <tt>To:</tt>, <tt>From:</tt>, etc. headers
295
+ # in email.
296
+ def parse
297
+ @lexemes = []
298
+ @tokens = []
299
+ @addresses = RMail::Address::List.new
300
+ @errors = 0
301
+ new_address
302
+ get
303
+ address_list
304
+ reset_errors
305
+ @addresses.delete_if { |a|
306
+ !a.local || !a.domain
307
+ }
308
+ end
309
+
310
+ private
311
+
312
+ SYM_ATOM = :atom
313
+ SYM_ATOM_NON_ASCII = :atom_non_ascii
314
+ SYM_QTEXT = :qtext
315
+ SYM_COMMA = :comma
316
+ SYM_LESS_THAN = :less_than
317
+ SYM_GREATER_THAN = :greater_than
318
+ SYM_AT_SIGN = :at_sign
319
+ SYM_PERIOD = :period
320
+ SYM_COLON = :colon
321
+ SYM_SEMI_COLON = :semi_colon
322
+ SYM_DOMAIN_LITERAL = :domain_literal
323
+
324
+ def reset_errors
325
+ if @errors > 0
326
+ @addresses.pop
327
+ @errors = 0
328
+ end
329
+ end
330
+
331
+ def new_address
332
+ reset_errors
333
+ @addresses.push(Address.new)
334
+ end
335
+
336
+ # Get the text that has been saved up to this point.
337
+ def get_text
338
+ text = ''
339
+ sep = ''
340
+ @lexemes.each { |lexeme|
341
+ if lexeme == '.'
342
+ text << lexeme
343
+ sep = ''
344
+ else
345
+ text << sep
346
+ text << lexeme
347
+ sep = ' '
348
+ end
349
+ }
350
+ @lexemes = []
351
+ text
352
+ end
353
+
354
+ # Save the current lexeme away for later retrieval with
355
+ # get_text.
356
+ def save_text
357
+ @lexemes << @lexeme
358
+ end
359
+
360
+ # Parse this:
361
+ # address_list = ([address] SYNC ",") {[address] SYNC "," } [address] .
362
+ def address_list
363
+ if @sym == SYM_ATOM ||
364
+ @sym == SYM_ATOM_NON_ASCII ||
365
+ @sym == SYM_QTEXT ||
366
+ @sym == SYM_LESS_THAN
367
+ address
368
+ end
369
+ sync(SYM_COMMA)
370
+ return if @sym.nil?
371
+ expect(SYM_COMMA)
372
+ new_address
373
+ while @sym == SYM_ATOM ||
374
+ @sym == SYM_ATOM_NON_ASCII ||
375
+ @sym == SYM_QTEXT ||
376
+ @sym == SYM_LESS_THAN ||
377
+ @sym == SYM_COMMA
378
+ if @sym == SYM_ATOM ||
379
+ @sym == SYM_ATOM_NON_ASCII ||
380
+ @sym == SYM_QTEXT ||
381
+ @sym == SYM_LESS_THAN
382
+ address
383
+ end
384
+ sync(SYM_COMMA)
385
+ return if @sym.nil?
386
+ expect(SYM_COMMA)
387
+ new_address
388
+ end
389
+ if @sym == SYM_ATOM || @sym == SYM_QTEXT || @sym == SYM_LESS_THAN
390
+ address
391
+ end
392
+ end
393
+
394
+ # Parses ahead through a local-part or display-name until no
395
+ # longer looking at a word or "." and returns the next symbol.
396
+ def address_lookahead
397
+ lookahead = []
398
+ while @sym == SYM_ATOM ||
399
+ @sym == SYM_ATOM_NON_ASCII ||
400
+ @sym == SYM_QTEXT ||
401
+ @sym == SYM_PERIOD
402
+ lookahead.push([@sym, @lexeme])
403
+ get
404
+ end
405
+ retval = @sym
406
+ putback(@sym, @lexeme)
407
+ putback_array(lookahead)
408
+ get
409
+ retval
410
+ end
411
+
412
+ # Parse this:
413
+ # address = mailbox | group
414
+ def address
415
+ # At this point we could be looking at a display-name, angle
416
+ # addr, or local-part. If looking at a local-part, it could
417
+ # actually be a display-name, according to the following:
418
+ #
419
+ # local-part '@' -> it is a local part of a local-part @ domain
420
+ # local-part '<' -> it is a display-name of a mailbox
421
+ # local-part ':' -> it is a display-name of a group
422
+ # display-name '<' -> it is a mailbox display name
423
+ # display-name ':' -> it is a group display name
424
+ #
425
+
426
+ # set lookahead to '@' '<' or ':' (or another value for
427
+ # invalid input)
428
+ lookahead = address_lookahead
429
+
430
+ if lookahead == SYM_COLON
431
+ group
432
+ else
433
+ mailbox(lookahead)
434
+ end
435
+ end
436
+
437
+ # Parse this:
438
+ # mailbox = angleAddr |
439
+ # word {word | "."} angleAddr |
440
+ # word {"." word} "@" domain .
441
+ #
442
+ # lookahead will be set to the return value of
443
+ # address_lookahead, which will be '@' or '<' (or another value
444
+ # for invalid input)
445
+ def mailbox(lookahead)
446
+ if @sym == SYM_LESS_THAN
447
+ angle_addr
448
+ elsif lookahead == SYM_LESS_THAN
449
+ display_name_word
450
+ while @sym == SYM_ATOM ||
451
+ @sym == SYM_ATOM_NON_ASCII ||
452
+ @sym == SYM_QTEXT ||
453
+ @sym == SYM_PERIOD
454
+ if @sym == SYM_ATOM ||
455
+ @sym == SYM_ATOM_NON_ASCII ||
456
+ @sym == SYM_QTEXT
457
+ display_name_word
458
+ else
459
+ save_text
460
+ get
461
+ end
462
+ end
463
+ @addresses.last.display_name = get_text
464
+ angle_addr
465
+ else
466
+ word
467
+ while @sym == SYM_PERIOD
468
+ save_text
469
+ get
470
+ word
471
+ end
472
+ @addresses.last.local = get_text
473
+ expect(SYM_AT_SIGN)
474
+ domain
475
+ end
476
+ end
477
+
478
+ # Parse this:
479
+ # group = word {word | "."} SYNC ":" [mailbox_list] SYNC ";"
480
+ def group
481
+ word
482
+ while @sym == SYM_ATOM || @sym == SYM_QTEXT || @sym == SYM_PERIOD
483
+ if @sym == SYM_ATOM || @sym == SYM_QTEXT
484
+ word
485
+ else
486
+ save_text
487
+ get
488
+ end
489
+ end
490
+ sync(SYM_COLON)
491
+ expect(SYM_COLON)
492
+ get_text # throw away group name
493
+ @addresses.last.comments = nil
494
+ if @sym == SYM_ATOM || @sym == SYM_QTEXT ||
495
+ @sym == SYM_COMMA || @sym == SYM_LESS_THAN
496
+ mailbox_list
497
+ end
498
+ sync(SYM_SEMI_COLON)
499
+ expect(SYM_SEMI_COLON)
500
+ end
501
+
502
+ # Parse this:
503
+ # word = atom | atom_non_ascii | quotedString
504
+ def display_name_word
505
+ if @sym == SYM_ATOM || @sym == SYM_ATOM_NON_ASCII || @sym == SYM_QTEXT
506
+ save_text
507
+ get
508
+ else
509
+ error "expected word, got #{@sym.inspect}"
510
+ end
511
+ end
512
+
513
+ # Parse this:
514
+ # word = atom | quotedString
515
+ def word
516
+ if @sym == SYM_ATOM || @sym == SYM_QTEXT
517
+ save_text
518
+ get
519
+ else
520
+ error "expected word, got #{@sym.inspect}"
521
+ end
522
+ end
523
+
524
+ # Parse a mailbox list.
525
+ def mailbox_list
526
+ mailbox(address_lookahead)
527
+ while @sym == SYM_COMMA
528
+ get
529
+ new_address
530
+ mailbox(address_lookahead)
531
+ end
532
+ end
533
+
534
+ # Parse this:
535
+ # angleAddr = SYNC "<" [obsRoute] addrSpec SYNC ">"
536
+ def angle_addr
537
+ expect(SYM_LESS_THAN)
538
+ if @sym == SYM_AT_SIGN
539
+ obs_route
540
+ end
541
+ addr_spec
542
+ expect(SYM_GREATER_THAN)
543
+ end
544
+
545
+ # Parse this:
546
+ # domain = domainLiteral | obsDomain
547
+ def domain
548
+ if @sym == SYM_DOMAIN_LITERAL
549
+ save_text
550
+ @addresses.last.domain = get_text
551
+ get
552
+ elsif @sym == SYM_ATOM
553
+ obs_domain
554
+ @addresses.last.domain = get_text
555
+ else
556
+ error "expected start of domain, got #{@sym.inspect}"
557
+ end
558
+ end
559
+
560
+ # Parse this:
561
+ # addrSpec = localPart "@" domain
562
+ def addr_spec
563
+ local_part
564
+ expect(SYM_AT_SIGN)
565
+ domain
566
+ end
567
+
568
+ # Parse this:
569
+ # local_part = word *( "." word )
570
+ def local_part
571
+ word
572
+ while @sym == SYM_PERIOD
573
+ save_text
574
+ get
575
+ word
576
+ end
577
+ @addresses.last.local = get_text
578
+ end
579
+
580
+ # Parse this:
581
+ # obs_domain = atom *( "." atom ) .
582
+ def obs_domain
583
+ expect_save(SYM_ATOM)
584
+ while @sym == SYM_PERIOD
585
+ save_text
586
+ get
587
+ expect_save(SYM_ATOM)
588
+ end
589
+ end
590
+
591
+ # Parse this:
592
+ # obs_route = obs_domain_list ":"
593
+ def obs_route
594
+ obs_domain_list
595
+ expect(SYM_COLON)
596
+ end
597
+
598
+ # Parse this:
599
+ # obs_domain_list = "@" domain *( *( "," ) "@" domain )
600
+ def obs_domain_list
601
+ expect(SYM_AT_SIGN)
602
+ domain
603
+ while @sym == SYM_COMMA || @sym == SYM_AT_SIGN
604
+ while @sym == SYM_COMMA
605
+ get
606
+ end
607
+ expect(SYM_AT_SIGN)
608
+ domain
609
+ end
610
+ end
611
+
612
+ # Put a token back into the input stream. This token will be
613
+ # retrieved by the next call to get.
614
+ def putback(sym, lexeme)
615
+ @tokens.push([sym, lexeme])
616
+ end
617
+
618
+ # Put back an array of tokens into the input stream.
619
+ def putback_array(a)
620
+ a.reverse_each { |e|
621
+ putback(*e)
622
+ }
623
+ end
624
+
625
+ # Get a single token from the string or from the @tokens array
626
+ # if somebody used putback.
627
+ def get
628
+ unless @tokens.empty?
629
+ @sym, @lexeme = @tokens.pop
630
+ else
631
+ get_tokenize
632
+ end
633
+ end
634
+
635
+ # Get a single token from the string
636
+ def get_tokenize
637
+ @lexeme = nil
638
+ loop {
639
+ case @string
640
+ when nil # the end
641
+ @sym = nil
642
+ break
643
+ when "" # the end
644
+ @sym = nil
645
+ break
646
+ when /\A[\r\n\t ]+/m # skip whitespace
647
+ @string = $'
648
+ when /\A\(/m # skip comment
649
+ comment
650
+ when /\A""/ # skip empty quoted text
651
+ @string = $'
652
+ when /\A[\w!$%&\'*+\/=?^_\`{\}|~#-]+/m
653
+ @string = $'
654
+ @sym = SYM_ATOM
655
+ break
656
+ when /\A"(.*?([^\\]|\\\\))"/m
657
+ @string = $'
658
+ @sym = SYM_QTEXT
659
+ @lexeme = $1.gsub(/\\(.)/, '\1')
660
+ break
661
+ when /\A</
662
+ @string = $'
663
+ @sym = SYM_LESS_THAN
664
+ break
665
+ when /\A>/
666
+ @string = $'
667
+ @sym = SYM_GREATER_THAN
668
+ break
669
+ when /\A@/
670
+ @string = $'
671
+ @sym = SYM_AT_SIGN
672
+ break
673
+ when /\A,/
674
+ @string = $'
675
+ @sym = SYM_COMMA
676
+ break
677
+ when /\A:/
678
+ @string = $'
679
+ @sym = SYM_COLON
680
+ break
681
+ when /\A;/
682
+ @string = $'
683
+ @sym = SYM_SEMI_COLON
684
+ break
685
+ when /\A\./
686
+ @string = $'
687
+ @sym = SYM_PERIOD
688
+ break
689
+ when /\A(\[.*?([^\\]|\\\\)\])/m
690
+ @string = $'
691
+ @sym = SYM_DOMAIN_LITERAL
692
+ @lexeme = $1.gsub(/(^|[^\\])[\r\n\t ]+/, '\1').gsub(/\\(.)/, '\1')
693
+ break
694
+ when /\A[\200-\377\w!$%&\'*+\/=?^_\`{\}|~#-]+/m
695
+ # This is just like SYM_ATOM, but includes all characters
696
+ # with high bits. This is so we can allow such tokens in
697
+ # the display name portion of an address even though it
698
+ # violates the RFCs.
699
+ @string = $'
700
+ @sym = SYM_ATOM_NON_ASCII
701
+ break
702
+ when /\A./
703
+ @string = $' # garbage
704
+ error('garbage character in string')
705
+ else
706
+ raise "internal error, @string is #{@string.inspect}"
707
+ end
708
+ }
709
+ if @sym
710
+ @lexeme ||= $&
711
+ end
712
+ end
713
+
714
+ def comment
715
+ depth = 0
716
+ comment = ''
717
+ catch(:done) {
718
+ while @string =~ /\A(\(([^\(\)\\]|\\.)*)/m
719
+ @string = $'
720
+ comment += $1
721
+ depth += 1
722
+ while @string =~ /\A(([^\(\)\\]|\\.)*\))/m
723
+ @string = $'
724
+ comment += $1
725
+ depth -= 1
726
+ throw :done if depth == 0
727
+ if @string =~ /\A(([^\(\)\\]|\\.)+)/
728
+ @string = $'
729
+ comment += $1
730
+ end
731
+ end
732
+ end
733
+ }
734
+ comment = comment.gsub(/[\r\n\t ]+/m, ' ').
735
+ sub(/\A\((.*)\)$/m, '\1').
736
+ gsub(/\\(.)/, '\1')
737
+ @addresses.last.comments =
738
+ (@addresses.last.comments || []) + [comment]
739
+ end
740
+
741
+ def expect(token)
742
+ if @sym == token
743
+ get
744
+ else
745
+ error("expected #{token.inspect} but got #{@sym.inspect}")
746
+ end
747
+ end
748
+
749
+ def expect_save(token)
750
+ if @sym == token
751
+ save_text
752
+ end
753
+ expect(token)
754
+ end
755
+
756
+ def sync(token)
757
+ while @sym && @sym != token
758
+ error "expected #{token.inspect} but got #{@sym.inspect}"
759
+ get
760
+ end
761
+ end
762
+
763
+ def error(s)
764
+ @errors += 1
765
+ end
766
+ end
767
+
768
+ # Given a string, this function attempts to extract mailing
769
+ # addresses from it and returns an RMail::Address::List of those
770
+ # addresses (RMail::Address::List is a subclass of Array).
771
+ #
772
+ # This is identical to using a RMail::Address::Parser directly like
773
+ # this:
774
+ #
775
+ # RMail::Address::Parser.new(string).parse
776
+ def Address.parse(string)
777
+ Parser.new(string).parse
778
+ end
779
+
780
+ # RMail::Address::List is a simple subclass of the Array class
781
+ # that provides convenience methods for accessing the
782
+ # RMail::Address objects it contains.
783
+ class List < Array
784
+
785
+ # Returns an array of strings -- the result of calling
786
+ # RMail::Address#local on each element of the list.
787
+ def locals
788
+ collect { |a| a.local }
789
+ end
790
+
791
+ # Returns an array of strings -- the result of calling
792
+ # RMail::Address#display_name on each element of the list.
793
+ def display_names
794
+ collect { |a| a.display_name }
795
+ end
796
+
797
+ # Returns an array of strings -- the result of calling
798
+ # RMail::Address#name on each element of the list.
799
+ def names
800
+ collect { |a| a.name }
801
+ end
802
+
803
+ # Returns an array of strings -- the result of calling
804
+ # RMail::Address#domain on each element of the list.
805
+ def domains
806
+ collect { |a| a.domain }
807
+ end
808
+
809
+ # Returns an array of strings -- the result of calling
810
+ # RMail::Address#address on each element of the list.
811
+ def addresses
812
+ collect { |a| a.address }
813
+ end
814
+
815
+ # Returns an array of strings -- the result of calling
816
+ # RMail::Address#format on each element of the list.
817
+ def format
818
+ collect { |a| a.format }
819
+ end
820
+
821
+ end
822
+
823
+ end
824
+ end
825
+
826
+ if $0 == __FILE__
827
+ parser = RMail::Address::Parser.new('A Group:a@b.c,d@e.f;')
828
+ p parser.parse
829
+ end