rmail 0.17

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. data/NEWS +309 -0
  2. data/NOTES +14 -0
  3. data/README +83 -0
  4. data/THANKS +25 -0
  5. data/TODO +112 -0
  6. data/guide/Intro.txt +122 -0
  7. data/guide/MIME.txt +6 -0
  8. data/guide/TableOfContents.txt +13 -0
  9. data/install.rb +1023 -0
  10. data/lib/rmail.rb +50 -0
  11. data/lib/rmail/address.rb +829 -0
  12. data/lib/rmail/header.rb +987 -0
  13. data/lib/rmail/mailbox.rb +62 -0
  14. data/lib/rmail/mailbox/mboxreader.rb +182 -0
  15. data/lib/rmail/message.rb +201 -0
  16. data/lib/rmail/parser.rb +412 -0
  17. data/lib/rmail/parser/multipart.rb +217 -0
  18. data/lib/rmail/parser/pushbackreader.rb +173 -0
  19. data/lib/rmail/serialize.rb +190 -0
  20. data/lib/rmail/utils.rb +59 -0
  21. data/rmail.gemspec +17 -0
  22. data/tests/addrgrammar.txt +113 -0
  23. data/tests/data/mbox.odd +4 -0
  24. data/tests/data/mbox.simple +8 -0
  25. data/tests/data/multipart/data.1 +5 -0
  26. data/tests/data/multipart/data.10 +1 -0
  27. data/tests/data/multipart/data.11 +9 -0
  28. data/tests/data/multipart/data.12 +9 -0
  29. data/tests/data/multipart/data.13 +3 -0
  30. data/tests/data/multipart/data.14 +3 -0
  31. data/tests/data/multipart/data.15 +3 -0
  32. data/tests/data/multipart/data.16 +3 -0
  33. data/tests/data/multipart/data.17 +0 -0
  34. data/tests/data/multipart/data.2 +5 -0
  35. data/tests/data/multipart/data.3 +2 -0
  36. data/tests/data/multipart/data.4 +3 -0
  37. data/tests/data/multipart/data.5 +1 -0
  38. data/tests/data/multipart/data.6 +2 -0
  39. data/tests/data/multipart/data.7 +3 -0
  40. data/tests/data/multipart/data.8 +5 -0
  41. data/tests/data/multipart/data.9 +4 -0
  42. data/tests/data/parser.badmime1 +4 -0
  43. data/tests/data/parser.badmime2 +6 -0
  44. data/tests/data/parser.nested-multipart +75 -0
  45. data/tests/data/parser.nested-simple +12 -0
  46. data/tests/data/parser.nested-simple2 +16 -0
  47. data/tests/data/parser.nested-simple3 +21 -0
  48. data/tests/data/parser.rfc822 +65 -0
  49. data/tests/data/parser.simple-mime +24 -0
  50. data/tests/data/parser/multipart.1 +8 -0
  51. data/tests/data/parser/multipart.10 +4 -0
  52. data/tests/data/parser/multipart.11 +12 -0
  53. data/tests/data/parser/multipart.12 +12 -0
  54. data/tests/data/parser/multipart.13 +6 -0
  55. data/tests/data/parser/multipart.14 +6 -0
  56. data/tests/data/parser/multipart.15 +6 -0
  57. data/tests/data/parser/multipart.16 +6 -0
  58. data/tests/data/parser/multipart.2 +8 -0
  59. data/tests/data/parser/multipart.3 +5 -0
  60. data/tests/data/parser/multipart.4 +6 -0
  61. data/tests/data/parser/multipart.5 +4 -0
  62. data/tests/data/parser/multipart.6 +5 -0
  63. data/tests/data/parser/multipart.7 +6 -0
  64. data/tests/data/parser/multipart.8 +8 -0
  65. data/tests/data/parser/multipart.9 +7 -0
  66. data/tests/data/transparency/absolute.1 +5 -0
  67. data/tests/data/transparency/absolute.2 +1 -0
  68. data/tests/data/transparency/absolute.3 +2 -0
  69. data/tests/data/transparency/absolute.4 +3 -0
  70. data/tests/data/transparency/absolute.5 +4 -0
  71. data/tests/data/transparency/absolute.6 +49 -0
  72. data/tests/data/transparency/message.1 +73 -0
  73. data/tests/data/transparency/message.2 +34 -0
  74. data/tests/data/transparency/message.3 +63 -0
  75. data/tests/data/transparency/message.4 +5 -0
  76. data/tests/data/transparency/message.5 +15 -0
  77. data/tests/data/transparency/message.6 +1185 -0
  78. data/tests/runtests.rb +35 -0
  79. data/tests/testaddress.rb +1192 -0
  80. data/tests/testbase.rb +207 -0
  81. data/tests/testheader.rb +1207 -0
  82. data/tests/testmailbox.rb +47 -0
  83. data/tests/testmboxreader.rb +161 -0
  84. data/tests/testmessage.rb +257 -0
  85. data/tests/testparser.rb +634 -0
  86. data/tests/testparsermultipart.rb +205 -0
  87. data/tests/testpushbackreader.rb +40 -0
  88. data/tests/testserialize.rb +264 -0
  89. data/tests/testtestbase.rb +112 -0
  90. data/tests/testtranspparency.rb +105 -0
  91. metadata +143 -0
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+ #--
3
+ # Copyright (c) 2003 Matt Armstrong. All rights reserved.
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice,
9
+ # this list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright
11
+ # notice, this list of conditions and the following disclaimer in the
12
+ # documentation and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
19
+ # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ #
27
+ #++
28
+ # This module allows you to simply
29
+ # require 'rmail'
30
+ # in your ruby scripts and have all of the RMail module required.
31
+ # This provides maximum convenience when the startup time of your
32
+ # script is not crucial.
33
+
34
+ # The RMail module contains all of the RubyMail classes, but has no
35
+ # useful API of its own.
36
+ #
37
+ # See guide/Intro.txt for a general overview of RubyMail.
38
+ module RMail
39
+ end
40
+
41
+ require 'rmail/address'
42
+ require 'rmail/header'
43
+ require 'rmail/mailbox'
44
+ require 'rmail/message'
45
+ require 'rmail/parser'
46
+ require 'rmail/serialize'
47
+ require 'rmail/utils'
48
+ require 'rmail/mailbox/mboxreader'
49
+ require 'rmail/parser/multipart'
50
+ require 'rmail/parser/pushbackreader'
@@ -0,0 +1,829 @@
1
+ #--
2
+ # Copyright (C) 2001, 2002, 2003 Matt Armstrong. All rights
3
+ # reserved.
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice,
9
+ # this list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright
11
+ # notice, this list of conditions and the following disclaimer in the
12
+ # documentation and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
19
+ # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ #
27
+ #++
28
+ # Implements the RMail::Address, RMail::Address::List, and
29
+ # RMail::Address::Parser classes. Together, these classes allow you
30
+ # to robustly parse, manipulate, and generate RFC2822 email addresses
31
+ # and address lists.
32
+
33
+ module RMail
34
+
35
+ # This class provides the following functionality:
36
+ #
37
+ # * Parses RFC2822 address lists into a list of Address
38
+ # objects (see #parse).
39
+ #
40
+ # * Format Address objects as appropriate for insertion into email
41
+ # messages (see #format).
42
+ #
43
+ # * Allows manipulation of the various parts of the address (see
44
+ # #local=, #domain=, #display_name=, #comments=).
45
+ class Address
46
+
47
+ ATEXT = '[\w=!#$%&\'*+-?^\`{|}~]+'
48
+
49
+ # Create a new address. If the +string+ argument is not nil, it
50
+ # is parsed for mail addresses and if one is found, it is used to
51
+ # initialize this object.
52
+ def initialize(string = nil)
53
+
54
+ @local = @domain = @comments = @display_name = nil
55
+
56
+ if string.kind_of?(String)
57
+ addrs = Address.parse(string)
58
+ if addrs.length > 0
59
+ @local = addrs[0].local
60
+ @domain = addrs[0].domain
61
+ @comments = addrs[0].comments
62
+ @display_name = addrs[0].display_name
63
+ end
64
+ else
65
+ raise ArgumentError unless string.nil?
66
+ end
67
+ end
68
+
69
+ # Compare this address with another based on the email address
70
+ # portion only (any display name and comments are ignored). If
71
+ # the other object is not an RMail::Address, it is coerced into a
72
+ # string with its to_str method and then parsed into an
73
+ # RMail::Address object.
74
+ def <=>(other)
75
+ if !other.kind_of?(RMail::Address)
76
+ other = RMail::Address.new(other.to_str)
77
+ end
78
+ cmp = (@local || '') <=> (other.local || '')
79
+ if cmp == 0
80
+ cmp = (@domain || '') <=> (other.domain || '')
81
+ end
82
+ return cmp
83
+ end
84
+ include Comparable
85
+
86
+ # Return a hash value for this address. This is based solely on
87
+ # the email address portion (any display name and comments are
88
+ # ignored).
89
+ def hash
90
+ address.hash
91
+ end
92
+
93
+ # Return true if the two objects are equal. Do this based solely
94
+ # on the email address portion (any display name and comments are
95
+ # ignored). Fails if the other object is not an RMail::Address
96
+ # object.
97
+ def eql?(other)
98
+ raise TypeError unless other.kind_of?(RMail::Address)
99
+ @local.eql?(other.local) and @domain.eql?(other.domain)
100
+ end
101
+
102
+ # Retrieve the local portion of the mail address. This is the
103
+ # portion that precedes the <tt>@</tt> sign.
104
+ def local
105
+ @local
106
+ end
107
+
108
+ # Assign the local portion of the mail address. This is the
109
+ # portion that precedes the <tt>@</tt> sign.
110
+ def local=(l)
111
+ raise ArgumentError unless l.nil? || l.kind_of?(String)
112
+ @local = l
113
+ end
114
+
115
+ # Returns the display name of this address. The display name is
116
+ # present only for "angle addr" style addresses such as:
117
+ #
118
+ # John Doe <johnd@example.net>
119
+ #
120
+ # In this case, the display name will be "John Doe". In
121
+ # particular this old style address has no display name:
122
+ #
123
+ # bobs@example.net (Bob Smith)
124
+ #
125
+ # See also display_name=, #name
126
+ def display_name
127
+ @display_name
128
+ end
129
+
130
+ # Assign a display name to this address. See display_name for a
131
+ # definition of what this is.
132
+ #
133
+ # See also display_name
134
+ def display_name=(str)
135
+ unless str.nil? || str.kind_of?(String)
136
+ raise ArgumentError, 'not a string'
137
+ end
138
+ @display_name = str
139
+ @display_name = nil if @display_name == ''
140
+ end
141
+
142
+ # Returns a best guess at a display name for this email address.
143
+ # This function first checks if the address has a true display
144
+ # name (see display_name) and returns it if so. Otherwise, if the
145
+ # address has any comments, the last comment will be returned.
146
+ #
147
+ # In most cases, this will behave reasonably. For example, it
148
+ # will return "Bob Smith" for this address:
149
+ #
150
+ # bobs@example.net (Bob Smith)
151
+ #
152
+ # See also display_name, #comments, #comments=
153
+ def name
154
+ @display_name || (@comments && @comments.last)
155
+ end
156
+
157
+ # Returns the comments in this address as an array of strings.
158
+ def comments
159
+ @comments
160
+ end
161
+
162
+ # Set the comments for this address. The +comments+ argument can
163
+ # be a string, or an array of strings. In either case, any
164
+ # existing comments are replaced.
165
+ #
166
+ # See also #comments, #name
167
+ def comments=(comments)
168
+ case comments
169
+ when nil
170
+ @comments = comments
171
+ when Array
172
+ @comments = comments
173
+ when String
174
+ @comments = [ comments ]
175
+ else
176
+ raise TypeError, "Argument to RMail::Address#comments= must be " +
177
+ "String, Array or nil, was #{comments.type}."
178
+ end
179
+ @comments.freeze
180
+ end
181
+
182
+ # Retrieve to the domain portion of the mail address. This is the
183
+ # portion after the <tt>@</tt> sign.
184
+ def domain
185
+ @domain
186
+ end
187
+
188
+ # Assign a domain name to this address. This is the portion after
189
+ # the <tt>@</tt> sign. Any existing domain name will be changed.
190
+ def domain=(domain)
191
+ @domain = if domain.nil? or domain == ''
192
+ nil
193
+ else
194
+ raise ArgumentError unless domain.kind_of?(String)
195
+ domain.strip
196
+ end
197
+ end
198
+
199
+ # Returns the email address portion of the address (i.e. without a
200
+ # display name, angle addresses, or comments).
201
+ #
202
+ # The string returned is not suitable for insertion into an
203
+ # e-mail. RFC2822 quoting rules are not followed. The raw
204
+ # address is returned instead.
205
+ #
206
+ # For example, if the local part requires quoting, this function
207
+ # will not perform the quoting (see #format for that). So this
208
+ # function can returns strings such as:
209
+ #
210
+ # "address with no quoting@example.net"
211
+ #
212
+ # See also #format
213
+ def address
214
+ if @domain.nil?
215
+ @local
216
+ else
217
+ @local + '@' + @domain
218
+ end
219
+ end
220
+
221
+ # Return this address as a String formated as appropriate for
222
+ # insertion into a mail message.
223
+ def format
224
+ display_name = if @display_name.nil?
225
+ nil
226
+ elsif @display_name =~ /^[-\/\w=!#\$%&'*+?^`{|}~ ]+$/
227
+ @display_name
228
+ else
229
+ '"' + @display_name.gsub(/["\\]/, '\\\\\&') + '"'
230
+ end
231
+ local = if (@local !~ /^[-\w=!#\$%&'*+?^`{|}~\.\/]+$/ ||
232
+ @local =~ /^\./ ||
233
+ @local =~ /\.$/ ||
234
+ @local =~ /\.\./)
235
+ '"' + @local.gsub(/["\\]/, '\\\\\&') + '"'
236
+ else
237
+ @local
238
+ end
239
+ domain = if (!@domain.nil? and
240
+ (@domain !~ /^[-\w=!#\$%&'*+?^`{|}~\.\/]+$/ ||
241
+ @domain =~ /^\./ ||
242
+ @domain =~ /\.$/ ||
243
+ @domain =~ /\.\./))
244
+ then
245
+ '[' + if @domain =~ /^\[(.*)\]$/
246
+ $1
247
+ else
248
+ @domain
249
+ end.gsub(/[\[\]\\]/, '\\\\\&') + ']'
250
+ else
251
+ @domain
252
+ end
253
+ address = if domain.nil?
254
+ local
255
+ elsif !display_name.nil? or domain[-1] == ?]
256
+ '<' + local + '@' + domain + '>'
257
+ else
258
+ local + '@' + domain
259
+ end
260
+ comments = nil
261
+ comments = unless @comments.nil?
262
+ @comments.collect { |c|
263
+ '(' + c.gsub(/[()\\]/, '\\\\\&') + ')'
264
+ }.join(' ')
265
+ end
266
+ [display_name, address, comments].compact.join(' ')
267
+ end
268
+
269
+ # Addresses can be converted into strings.
270
+ alias :to_str :format
271
+
272
+ # This class provides a facility to parse a string containing one
273
+ # or more RFC2822 addresses into an array of RMail::Address
274
+ # objects. You can use it directly, but it is more conveniently
275
+ # used with the RMail::Address.parse method.
276
+ class Parser
277
+
278
+ # Create a RMail::Address::Parser object that will parse
279
+ # +string+. See also the RMail::Address.parse method.
280
+ def initialize(string)
281
+ @string = string
282
+ end
283
+
284
+ # This function attempts to extract mailing addresses from the
285
+ # string passed to #new. The function returns an
286
+ # RMail::Address::List of RMail::Address objects
287
+ # (RMail::Address::List is a subclass of Array). A malformed
288
+ # input string will not generate an exception. Instead, the
289
+ # array returned will simply not contained the malformed
290
+ # addresses.
291
+ #
292
+ # The string is expected to be in a valid format as documented
293
+ # in RFC2822's mailbox-list grammar. This will work for lists
294
+ # of addresses in the <tt>To:</tt>, <tt>From:</tt>, etc. headers
295
+ # in email.
296
+ def parse
297
+ @lexemes = []
298
+ @tokens = []
299
+ @addresses = RMail::Address::List.new
300
+ @errors = 0
301
+ new_address
302
+ get
303
+ address_list
304
+ reset_errors
305
+ @addresses.delete_if { |a|
306
+ !a.local || !a.domain
307
+ }
308
+ end
309
+
310
+ private
311
+
312
+ SYM_ATOM = :atom
313
+ SYM_ATOM_NON_ASCII = :atom_non_ascii
314
+ SYM_QTEXT = :qtext
315
+ SYM_COMMA = :comma
316
+ SYM_LESS_THAN = :less_than
317
+ SYM_GREATER_THAN = :greater_than
318
+ SYM_AT_SIGN = :at_sign
319
+ SYM_PERIOD = :period
320
+ SYM_COLON = :colon
321
+ SYM_SEMI_COLON = :semi_colon
322
+ SYM_DOMAIN_LITERAL = :domain_literal
323
+
324
+ def reset_errors
325
+ if @errors > 0
326
+ @addresses.pop
327
+ @errors = 0
328
+ end
329
+ end
330
+
331
+ def new_address
332
+ reset_errors
333
+ @addresses.push(Address.new)
334
+ end
335
+
336
+ # Get the text that has been saved up to this point.
337
+ def get_text
338
+ text = ''
339
+ sep = ''
340
+ @lexemes.each { |lexeme|
341
+ if lexeme == '.'
342
+ text << lexeme
343
+ sep = ''
344
+ else
345
+ text << sep
346
+ text << lexeme
347
+ sep = ' '
348
+ end
349
+ }
350
+ @lexemes = []
351
+ text
352
+ end
353
+
354
+ # Save the current lexeme away for later retrieval with
355
+ # get_text.
356
+ def save_text
357
+ @lexemes << @lexeme
358
+ end
359
+
360
+ # Parse this:
361
+ # address_list = ([address] SYNC ",") {[address] SYNC "," } [address] .
362
+ def address_list
363
+ if @sym == SYM_ATOM ||
364
+ @sym == SYM_ATOM_NON_ASCII ||
365
+ @sym == SYM_QTEXT ||
366
+ @sym == SYM_LESS_THAN
367
+ address
368
+ end
369
+ sync(SYM_COMMA)
370
+ return if @sym.nil?
371
+ expect(SYM_COMMA)
372
+ new_address
373
+ while @sym == SYM_ATOM ||
374
+ @sym == SYM_ATOM_NON_ASCII ||
375
+ @sym == SYM_QTEXT ||
376
+ @sym == SYM_LESS_THAN ||
377
+ @sym == SYM_COMMA
378
+ if @sym == SYM_ATOM ||
379
+ @sym == SYM_ATOM_NON_ASCII ||
380
+ @sym == SYM_QTEXT ||
381
+ @sym == SYM_LESS_THAN
382
+ address
383
+ end
384
+ sync(SYM_COMMA)
385
+ return if @sym.nil?
386
+ expect(SYM_COMMA)
387
+ new_address
388
+ end
389
+ if @sym == SYM_ATOM || @sym == SYM_QTEXT || @sym == SYM_LESS_THAN
390
+ address
391
+ end
392
+ end
393
+
394
+ # Parses ahead through a local-part or display-name until no
395
+ # longer looking at a word or "." and returns the next symbol.
396
+ def address_lookahead
397
+ lookahead = []
398
+ while @sym == SYM_ATOM ||
399
+ @sym == SYM_ATOM_NON_ASCII ||
400
+ @sym == SYM_QTEXT ||
401
+ @sym == SYM_PERIOD
402
+ lookahead.push([@sym, @lexeme])
403
+ get
404
+ end
405
+ retval = @sym
406
+ putback(@sym, @lexeme)
407
+ putback_array(lookahead)
408
+ get
409
+ retval
410
+ end
411
+
412
+ # Parse this:
413
+ # address = mailbox | group
414
+ def address
415
+ # At this point we could be looking at a display-name, angle
416
+ # addr, or local-part. If looking at a local-part, it could
417
+ # actually be a display-name, according to the following:
418
+ #
419
+ # local-part '@' -> it is a local part of a local-part @ domain
420
+ # local-part '<' -> it is a display-name of a mailbox
421
+ # local-part ':' -> it is a display-name of a group
422
+ # display-name '<' -> it is a mailbox display name
423
+ # display-name ':' -> it is a group display name
424
+ #
425
+
426
+ # set lookahead to '@' '<' or ':' (or another value for
427
+ # invalid input)
428
+ lookahead = address_lookahead
429
+
430
+ if lookahead == SYM_COLON
431
+ group
432
+ else
433
+ mailbox(lookahead)
434
+ end
435
+ end
436
+
437
+ # Parse this:
438
+ # mailbox = angleAddr |
439
+ # word {word | "."} angleAddr |
440
+ # word {"." word} "@" domain .
441
+ #
442
+ # lookahead will be set to the return value of
443
+ # address_lookahead, which will be '@' or '<' (or another value
444
+ # for invalid input)
445
+ def mailbox(lookahead)
446
+ if @sym == SYM_LESS_THAN
447
+ angle_addr
448
+ elsif lookahead == SYM_LESS_THAN
449
+ display_name_word
450
+ while @sym == SYM_ATOM ||
451
+ @sym == SYM_ATOM_NON_ASCII ||
452
+ @sym == SYM_QTEXT ||
453
+ @sym == SYM_PERIOD
454
+ if @sym == SYM_ATOM ||
455
+ @sym == SYM_ATOM_NON_ASCII ||
456
+ @sym == SYM_QTEXT
457
+ display_name_word
458
+ else
459
+ save_text
460
+ get
461
+ end
462
+ end
463
+ @addresses.last.display_name = get_text
464
+ angle_addr
465
+ else
466
+ word
467
+ while @sym == SYM_PERIOD
468
+ save_text
469
+ get
470
+ word
471
+ end
472
+ @addresses.last.local = get_text
473
+ expect(SYM_AT_SIGN)
474
+ domain
475
+ end
476
+ end
477
+
478
+ # Parse this:
479
+ # group = word {word | "."} SYNC ":" [mailbox_list] SYNC ";"
480
+ def group
481
+ word
482
+ while @sym == SYM_ATOM || @sym == SYM_QTEXT || @sym == SYM_PERIOD
483
+ if @sym == SYM_ATOM || @sym == SYM_QTEXT
484
+ word
485
+ else
486
+ save_text
487
+ get
488
+ end
489
+ end
490
+ sync(SYM_COLON)
491
+ expect(SYM_COLON)
492
+ get_text # throw away group name
493
+ @addresses.last.comments = nil
494
+ if @sym == SYM_ATOM || @sym == SYM_QTEXT ||
495
+ @sym == SYM_COMMA || @sym == SYM_LESS_THAN
496
+ mailbox_list
497
+ end
498
+ sync(SYM_SEMI_COLON)
499
+ expect(SYM_SEMI_COLON)
500
+ end
501
+
502
+ # Parse this:
503
+ # word = atom | atom_non_ascii | quotedString
504
+ def display_name_word
505
+ if @sym == SYM_ATOM || @sym == SYM_ATOM_NON_ASCII || @sym == SYM_QTEXT
506
+ save_text
507
+ get
508
+ else
509
+ error "expected word, got #{@sym.inspect}"
510
+ end
511
+ end
512
+
513
+ # Parse this:
514
+ # word = atom | quotedString
515
+ def word
516
+ if @sym == SYM_ATOM || @sym == SYM_QTEXT
517
+ save_text
518
+ get
519
+ else
520
+ error "expected word, got #{@sym.inspect}"
521
+ end
522
+ end
523
+
524
+ # Parse a mailbox list.
525
+ def mailbox_list
526
+ mailbox(address_lookahead)
527
+ while @sym == SYM_COMMA
528
+ get
529
+ new_address
530
+ mailbox(address_lookahead)
531
+ end
532
+ end
533
+
534
+ # Parse this:
535
+ # angleAddr = SYNC "<" [obsRoute] addrSpec SYNC ">"
536
+ def angle_addr
537
+ expect(SYM_LESS_THAN)
538
+ if @sym == SYM_AT_SIGN
539
+ obs_route
540
+ end
541
+ addr_spec
542
+ expect(SYM_GREATER_THAN)
543
+ end
544
+
545
+ # Parse this:
546
+ # domain = domainLiteral | obsDomain
547
+ def domain
548
+ if @sym == SYM_DOMAIN_LITERAL
549
+ save_text
550
+ @addresses.last.domain = get_text
551
+ get
552
+ elsif @sym == SYM_ATOM
553
+ obs_domain
554
+ @addresses.last.domain = get_text
555
+ else
556
+ error "expected start of domain, got #{@sym.inspect}"
557
+ end
558
+ end
559
+
560
+ # Parse this:
561
+ # addrSpec = localPart "@" domain
562
+ def addr_spec
563
+ local_part
564
+ expect(SYM_AT_SIGN)
565
+ domain
566
+ end
567
+
568
+ # Parse this:
569
+ # local_part = word *( "." word )
570
+ def local_part
571
+ word
572
+ while @sym == SYM_PERIOD
573
+ save_text
574
+ get
575
+ word
576
+ end
577
+ @addresses.last.local = get_text
578
+ end
579
+
580
+ # Parse this:
581
+ # obs_domain = atom *( "." atom ) .
582
+ def obs_domain
583
+ expect_save(SYM_ATOM)
584
+ while @sym == SYM_PERIOD
585
+ save_text
586
+ get
587
+ expect_save(SYM_ATOM)
588
+ end
589
+ end
590
+
591
+ # Parse this:
592
+ # obs_route = obs_domain_list ":"
593
+ def obs_route
594
+ obs_domain_list
595
+ expect(SYM_COLON)
596
+ end
597
+
598
+ # Parse this:
599
+ # obs_domain_list = "@" domain *( *( "," ) "@" domain )
600
+ def obs_domain_list
601
+ expect(SYM_AT_SIGN)
602
+ domain
603
+ while @sym == SYM_COMMA || @sym == SYM_AT_SIGN
604
+ while @sym == SYM_COMMA
605
+ get
606
+ end
607
+ expect(SYM_AT_SIGN)
608
+ domain
609
+ end
610
+ end
611
+
612
+ # Put a token back into the input stream. This token will be
613
+ # retrieved by the next call to get.
614
+ def putback(sym, lexeme)
615
+ @tokens.push([sym, lexeme])
616
+ end
617
+
618
+ # Put back an array of tokens into the input stream.
619
+ def putback_array(a)
620
+ a.reverse_each { |e|
621
+ putback(*e)
622
+ }
623
+ end
624
+
625
+ # Get a single token from the string or from the @tokens array
626
+ # if somebody used putback.
627
+ def get
628
+ unless @tokens.empty?
629
+ @sym, @lexeme = @tokens.pop
630
+ else
631
+ get_tokenize
632
+ end
633
+ end
634
+
635
+ # Get a single token from the string
636
+ def get_tokenize
637
+ @lexeme = nil
638
+ loop {
639
+ case @string
640
+ when nil # the end
641
+ @sym = nil
642
+ break
643
+ when "" # the end
644
+ @sym = nil
645
+ break
646
+ when /\A[\r\n\t ]+/m # skip whitespace
647
+ @string = $'
648
+ when /\A\(/m # skip comment
649
+ comment
650
+ when /\A""/ # skip empty quoted text
651
+ @string = $'
652
+ when /\A[\w!$%&\'*+\/=?^_\`{\}|~#-]+/m
653
+ @string = $'
654
+ @sym = SYM_ATOM
655
+ break
656
+ when /\A"(.*?([^\\]|\\\\))"/m
657
+ @string = $'
658
+ @sym = SYM_QTEXT
659
+ @lexeme = $1.gsub(/\\(.)/, '\1')
660
+ break
661
+ when /\A</
662
+ @string = $'
663
+ @sym = SYM_LESS_THAN
664
+ break
665
+ when /\A>/
666
+ @string = $'
667
+ @sym = SYM_GREATER_THAN
668
+ break
669
+ when /\A@/
670
+ @string = $'
671
+ @sym = SYM_AT_SIGN
672
+ break
673
+ when /\A,/
674
+ @string = $'
675
+ @sym = SYM_COMMA
676
+ break
677
+ when /\A:/
678
+ @string = $'
679
+ @sym = SYM_COLON
680
+ break
681
+ when /\A;/
682
+ @string = $'
683
+ @sym = SYM_SEMI_COLON
684
+ break
685
+ when /\A\./
686
+ @string = $'
687
+ @sym = SYM_PERIOD
688
+ break
689
+ when /\A(\[.*?([^\\]|\\\\)\])/m
690
+ @string = $'
691
+ @sym = SYM_DOMAIN_LITERAL
692
+ @lexeme = $1.gsub(/(^|[^\\])[\r\n\t ]+/, '\1').gsub(/\\(.)/, '\1')
693
+ break
694
+ when /\A[\200-\377\w!$%&\'*+\/=?^_\`{\}|~#-]+/m
695
+ # This is just like SYM_ATOM, but includes all characters
696
+ # with high bits. This is so we can allow such tokens in
697
+ # the display name portion of an address even though it
698
+ # violates the RFCs.
699
+ @string = $'
700
+ @sym = SYM_ATOM_NON_ASCII
701
+ break
702
+ when /\A./
703
+ @string = $' # garbage
704
+ error('garbage character in string')
705
+ else
706
+ raise "internal error, @string is #{@string.inspect}"
707
+ end
708
+ }
709
+ if @sym
710
+ @lexeme ||= $&
711
+ end
712
+ end
713
+
714
+ def comment
715
+ depth = 0
716
+ comment = ''
717
+ catch(:done) {
718
+ while @string =~ /\A(\(([^\(\)\\]|\\.)*)/m
719
+ @string = $'
720
+ comment += $1
721
+ depth += 1
722
+ while @string =~ /\A(([^\(\)\\]|\\.)*\))/m
723
+ @string = $'
724
+ comment += $1
725
+ depth -= 1
726
+ throw :done if depth == 0
727
+ if @string =~ /\A(([^\(\)\\]|\\.)+)/
728
+ @string = $'
729
+ comment += $1
730
+ end
731
+ end
732
+ end
733
+ }
734
+ comment = comment.gsub(/[\r\n\t ]+/m, ' ').
735
+ sub(/\A\((.*)\)$/m, '\1').
736
+ gsub(/\\(.)/, '\1')
737
+ @addresses.last.comments =
738
+ (@addresses.last.comments || []) + [comment]
739
+ end
740
+
741
+ def expect(token)
742
+ if @sym == token
743
+ get
744
+ else
745
+ error("expected #{token.inspect} but got #{@sym.inspect}")
746
+ end
747
+ end
748
+
749
+ def expect_save(token)
750
+ if @sym == token
751
+ save_text
752
+ end
753
+ expect(token)
754
+ end
755
+
756
+ def sync(token)
757
+ while @sym && @sym != token
758
+ error "expected #{token.inspect} but got #{@sym.inspect}"
759
+ get
760
+ end
761
+ end
762
+
763
+ def error(s)
764
+ @errors += 1
765
+ end
766
+ end
767
+
768
+ # Given a string, this function attempts to extract mailing
769
+ # addresses from it and returns an RMail::Address::List of those
770
+ # addresses (RMail::Address::List is a subclass of Array).
771
+ #
772
+ # This is identical to using a RMail::Address::Parser directly like
773
+ # this:
774
+ #
775
+ # RMail::Address::Parser.new(string).parse
776
+ def Address.parse(string)
777
+ Parser.new(string).parse
778
+ end
779
+
780
+ # RMail::Address::List is a simple subclass of the Array class
781
+ # that provides convenience methods for accessing the
782
+ # RMail::Address objects it contains.
783
+ class List < Array
784
+
785
+ # Returns an array of strings -- the result of calling
786
+ # RMail::Address#local on each element of the list.
787
+ def locals
788
+ collect { |a| a.local }
789
+ end
790
+
791
+ # Returns an array of strings -- the result of calling
792
+ # RMail::Address#display_name on each element of the list.
793
+ def display_names
794
+ collect { |a| a.display_name }
795
+ end
796
+
797
+ # Returns an array of strings -- the result of calling
798
+ # RMail::Address#name on each element of the list.
799
+ def names
800
+ collect { |a| a.name }
801
+ end
802
+
803
+ # Returns an array of strings -- the result of calling
804
+ # RMail::Address#domain on each element of the list.
805
+ def domains
806
+ collect { |a| a.domain }
807
+ end
808
+
809
+ # Returns an array of strings -- the result of calling
810
+ # RMail::Address#address on each element of the list.
811
+ def addresses
812
+ collect { |a| a.address }
813
+ end
814
+
815
+ # Returns an array of strings -- the result of calling
816
+ # RMail::Address#format on each element of the list.
817
+ def format
818
+ collect { |a| a.format }
819
+ end
820
+
821
+ end
822
+
823
+ end
824
+ end
825
+
826
+ if $0 == __FILE__
827
+ parser = RMail::Address::Parser.new('A Group:a@b.c,d@e.f;')
828
+ p parser.parse
829
+ end