rmail-sup 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +7 -0
  2. data/NEWS +323 -0
  3. data/NOTES +14 -0
  4. data/README +83 -0
  5. data/Rakefile +184 -0
  6. data/THANKS +25 -0
  7. data/TODO +115 -0
  8. data/guide/Intro.txt +122 -0
  9. data/guide/MIME.txt +6 -0
  10. data/guide/TableOfContents.txt +13 -0
  11. data/install.rb +1023 -0
  12. data/lib/rmail.rb +50 -0
  13. data/lib/rmail/address.rb +841 -0
  14. data/lib/rmail/header.rb +981 -0
  15. data/lib/rmail/mailbox.rb +62 -0
  16. data/lib/rmail/mailbox/mboxreader.rb +182 -0
  17. data/lib/rmail/message.rb +201 -0
  18. data/lib/rmail/parser.rb +412 -0
  19. data/lib/rmail/parser/multipart.rb +217 -0
  20. data/lib/rmail/parser/pushbackreader.rb +173 -0
  21. data/lib/rmail/serialize.rb +190 -0
  22. data/lib/rmail/utils.rb +59 -0
  23. data/test/addrgrammar.txt +113 -0
  24. data/test/data/mbox.odd +4 -0
  25. data/test/data/mbox.simple +8 -0
  26. data/test/data/multipart/data.1 +5 -0
  27. data/test/data/multipart/data.10 +1 -0
  28. data/test/data/multipart/data.11 +9 -0
  29. data/test/data/multipart/data.12 +9 -0
  30. data/test/data/multipart/data.13 +3 -0
  31. data/test/data/multipart/data.14 +3 -0
  32. data/test/data/multipart/data.15 +3 -0
  33. data/test/data/multipart/data.16 +3 -0
  34. data/test/data/multipart/data.17 +0 -0
  35. data/test/data/multipart/data.2 +5 -0
  36. data/test/data/multipart/data.3 +2 -0
  37. data/test/data/multipart/data.4 +3 -0
  38. data/test/data/multipart/data.5 +1 -0
  39. data/test/data/multipart/data.6 +2 -0
  40. data/test/data/multipart/data.7 +3 -0
  41. data/test/data/multipart/data.8 +5 -0
  42. data/test/data/multipart/data.9 +4 -0
  43. data/test/data/parser.badmime1 +4 -0
  44. data/test/data/parser.badmime2 +6 -0
  45. data/test/data/parser.nested-multipart +75 -0
  46. data/test/data/parser.nested-simple +12 -0
  47. data/test/data/parser.nested-simple2 +16 -0
  48. data/test/data/parser.nested-simple3 +21 -0
  49. data/test/data/parser.rfc822 +65 -0
  50. data/test/data/parser.simple-mime +24 -0
  51. data/test/data/parser/multipart.1 +8 -0
  52. data/test/data/parser/multipart.10 +4 -0
  53. data/test/data/parser/multipart.11 +12 -0
  54. data/test/data/parser/multipart.12 +12 -0
  55. data/test/data/parser/multipart.13 +6 -0
  56. data/test/data/parser/multipart.14 +6 -0
  57. data/test/data/parser/multipart.15 +6 -0
  58. data/test/data/parser/multipart.16 +6 -0
  59. data/test/data/parser/multipart.2 +8 -0
  60. data/test/data/parser/multipart.3 +5 -0
  61. data/test/data/parser/multipart.4 +6 -0
  62. data/test/data/parser/multipart.5 +4 -0
  63. data/test/data/parser/multipart.6 +5 -0
  64. data/test/data/parser/multipart.7 +6 -0
  65. data/test/data/parser/multipart.8 +8 -0
  66. data/test/data/parser/multipart.9 +7 -0
  67. data/test/data/transparency/absolute.1 +5 -0
  68. data/test/data/transparency/absolute.2 +1 -0
  69. data/test/data/transparency/absolute.3 +2 -0
  70. data/test/data/transparency/absolute.4 +3 -0
  71. data/test/data/transparency/absolute.5 +4 -0
  72. data/test/data/transparency/absolute.6 +49 -0
  73. data/test/data/transparency/message.1 +73 -0
  74. data/test/data/transparency/message.2 +34 -0
  75. data/test/data/transparency/message.3 +63 -0
  76. data/test/data/transparency/message.4 +5 -0
  77. data/test/data/transparency/message.5 +15 -0
  78. data/test/data/transparency/message.6 +1185 -0
  79. data/test/runtests.rb +35 -0
  80. data/test/testaddress.rb +1204 -0
  81. data/test/testbase.rb +204 -0
  82. data/test/testheader.rb +1225 -0
  83. data/test/testmailbox.rb +47 -0
  84. data/test/testmboxreader.rb +161 -0
  85. data/test/testmessage.rb +257 -0
  86. data/test/testparser.rb +634 -0
  87. data/test/testparsermultipart.rb +205 -0
  88. data/test/testpushbackreader.rb +40 -0
  89. data/test/testserialize.rb +264 -0
  90. data/test/testtestbase.rb +116 -0
  91. data/test/testtranspparency.rb +105 -0
  92. data/version +1 -0
  93. metadata +149 -0
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env ruby
2
+ #--
3
+ # Copyright (c) 2003 Matt Armstrong. All rights reserved.
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice,
9
+ # this list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright
11
+ # notice, this list of conditions and the following disclaimer in the
12
+ # documentation and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
19
+ # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ #
27
+ #++
28
+ # This module allows you to simply
29
+ # require 'rmail'
30
+ # in your ruby scripts and have all of the RMail module required.
31
+ # This provides maximum convenience when the startup time of your
32
+ # script is not crucial.
33
+
34
+ # The RMail module contains all of the RubyMail classes, but has no
35
+ # useful API of its own.
36
+ #
37
+ # See guide/Intro.txt for a general overview of RubyMail.
38
+ module RMail
39
+ end
40
+
41
+ require 'rmail/address'
42
+ require 'rmail/header'
43
+ require 'rmail/mailbox'
44
+ require 'rmail/message'
45
+ require 'rmail/parser'
46
+ require 'rmail/serialize'
47
+ require 'rmail/utils'
48
+ require 'rmail/mailbox/mboxreader'
49
+ require 'rmail/parser/multipart'
50
+ require 'rmail/parser/pushbackreader'
@@ -0,0 +1,841 @@
1
+ #--
2
+ # Copyright (C) 2001, 2002, 2003, 2008 Matt Armstrong. All rights
3
+ # reserved.
4
+ #
5
+ # Redistribution and use in source and binary forms, with or without
6
+ # modification, are permitted provided that the following conditions are met:
7
+ #
8
+ # 1. Redistributions of source code must retain the above copyright notice,
9
+ # this list of conditions and the following disclaimer.
10
+ # 2. Redistributions in binary form must reproduce the above copyright
11
+ # notice, this list of conditions and the following disclaimer in the
12
+ # documentation and/or other materials provided with the distribution.
13
+ # 3. The name of the author may not be used to endorse or promote products
14
+ # derived from this software without specific prior written permission.
15
+ #
16
+ # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17
+ # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18
+ # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
19
+ # NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
+ #
27
+ #++
28
+ # Implements the RMail::Address, RMail::Address::List, and
29
+ # RMail::Address::Parser classes. Together, these classes allow you
30
+ # to robustly parse, manipulate, and generate RFC2822 email addresses
31
+ # and address lists.
32
+
33
+ module RMail
34
+
35
+ # This class provides the following functionality:
36
+ #
37
+ # * Parses RFC2822 address lists into a list of Address
38
+ # objects (see #parse).
39
+ #
40
+ # * Format Address objects as appropriate for insertion into email
41
+ # messages (see #format).
42
+ #
43
+ # * Allows manipulation of the various parts of the address (see
44
+ # #local=, #domain=, #display_name=, #comments=).
45
+ class Address
46
+
47
+ ATEXT = '[\w=!#$%&\'*+-?^\`{|}~]+'
48
+
49
+ # Create a new address. If the +string+ argument is not nil, it
50
+ # is parsed for mail addresses and if one is found, it is used to
51
+ # initialize this object.
52
+ def initialize(string = nil)
53
+ @local = @domain = @comments = @display_name = nil
54
+
55
+ if string.kind_of?(String)
56
+ addrs = Address.parse(string)
57
+ if addrs.length > 0
58
+ @local = addrs[0].local
59
+ @domain = addrs[0].domain
60
+ @comments = addrs[0].comments
61
+ @display_name = addrs[0].display_name
62
+ end
63
+ else
64
+ raise ArgumentError unless string.nil?
65
+ end
66
+ end
67
+
68
+ # Compare this address with another based on the email address
69
+ # portion only (any display name and comments are ignored). If
70
+ # the other object is not an RMail::Address, it is coerced into a
71
+ # string with its to_str method and then parsed into an
72
+ # RMail::Address object.
73
+ def <=>(other)
74
+ if !other.kind_of?(RMail::Address)
75
+ other = RMail::Address.new(other.to_str)
76
+ end
77
+ cmp = (@local || '') <=> (other.local || '')
78
+ if cmp == 0
79
+ cmp = (@domain || '') <=> (other.domain || '')
80
+ end
81
+ return cmp
82
+ end
83
+ include Comparable
84
+
85
+ # Return a hash value for this address. This is based solely on
86
+ # the email address portion (any display name and comments are
87
+ # ignored).
88
+ def hash
89
+ address.hash
90
+ end
91
+
92
+ # Return true if the two objects are equal. Do this based solely
93
+ # on the email address portion (any display name and comments are
94
+ # ignored). Fails if the other object is not an RMail::Address
95
+ # object.
96
+ def eql?(other)
97
+ raise TypeError unless other.kind_of?(RMail::Address)
98
+ @local.eql?(other.local) and @domain.eql?(other.domain)
99
+ end
100
+
101
+ # Retrieve the local portion of the mail address. This is the
102
+ # portion that precedes the <tt>@</tt> sign.
103
+ def local
104
+ @local
105
+ end
106
+
107
+ # Assign the local portion of the mail address. This is the
108
+ # portion that precedes the <tt>@</tt> sign.
109
+ def local=(l)
110
+ raise ArgumentError unless l.nil? || l.kind_of?(String)
111
+ @local = l
112
+ end
113
+
114
+ # Returns the display name of this address. The display name is
115
+ # present only for "angle addr" style addresses such as:
116
+ #
117
+ # John Doe <johnd@example.net>
118
+ #
119
+ # In this case, the display name will be "John Doe". In
120
+ # particular this old style address has no display name:
121
+ #
122
+ # bobs@example.net (Bob Smith)
123
+ #
124
+ # See also display_name=, #name
125
+ def display_name
126
+ @display_name
127
+ end
128
+
129
+ # Assign a display name to this address. See display_name for a
130
+ # definition of what this is.
131
+ #
132
+ # See also display_name
133
+ def display_name=(str)
134
+ unless str.nil? || str.kind_of?(String)
135
+ raise ArgumentError, 'not a string'
136
+ end
137
+ @display_name = str
138
+ @display_name = nil if @display_name == ''
139
+ end
140
+
141
+ # Returns a best guess at a display name for this email address.
142
+ # This function first checks if the address has a true display
143
+ # name (see display_name) and returns it if so. Otherwise, if the
144
+ # address has any comments, the last comment will be returned.
145
+ #
146
+ # In most cases, this will behave reasonably. For example, it
147
+ # will return "Bob Smith" for this address:
148
+ #
149
+ # bobs@example.net (Bob Smith)
150
+ #
151
+ # See also display_name, #comments, #comments=
152
+ def name
153
+ @display_name || (@comments && @comments.last)
154
+ end
155
+
156
+ # Returns the comments in this address as an array of strings.
157
+ def comments
158
+ @comments
159
+ end
160
+
161
+ # Set the comments for this address. The +comments+ argument can
162
+ # be a string, or an array of strings. In either case, any
163
+ # existing comments are replaced.
164
+ #
165
+ # See also #comments, #name
166
+ def comments=(comments)
167
+ case comments
168
+ when nil
169
+ @comments = comments
170
+ when Array
171
+ @comments = comments
172
+ when String
173
+ @comments = [ comments ]
174
+ else
175
+ raise TypeError, "Argument to RMail::Address#comments= must be " +
176
+ "String, Array or nil, was #{comments.type}."
177
+ end
178
+ @comments.freeze
179
+ end
180
+
181
+ # Retrieve to the domain portion of the mail address. This is the
182
+ # portion after the <tt>@</tt> sign.
183
+ def domain
184
+ @domain
185
+ end
186
+
187
+ # Assign a domain name to this address. This is the portion after
188
+ # the <tt>@</tt> sign. Any existing domain name will be changed.
189
+ def domain=(domain)
190
+ @domain = if domain.nil? or domain == ''
191
+ nil
192
+ else
193
+ raise ArgumentError unless domain.kind_of?(String)
194
+ domain.strip
195
+ end
196
+ end
197
+
198
+ # Returns the email address portion of the address (i.e. without a
199
+ # display name, angle addresses, or comments).
200
+ #
201
+ # The string returned is not suitable for insertion into an
202
+ # e-mail. RFC2822 quoting rules are not followed. The raw
203
+ # address is returned instead.
204
+ #
205
+ # For example, if the local part requires quoting, this function
206
+ # will not perform the quoting (see #format for that). So this
207
+ # function can returns strings such as:
208
+ #
209
+ # "address with no quoting@example.net"
210
+ #
211
+ # See also #format
212
+ def address
213
+ if @domain.nil?
214
+ @local
215
+ else
216
+ @local + '@' + @domain
217
+ end
218
+ end
219
+
220
+ # Return this address as a String formated as appropriate for
221
+ # insertion into a mail message.
222
+ def format
223
+ display_name = if @display_name.nil?
224
+ nil
225
+ elsif @display_name =~ /^[-\/\w=!#\$%&'*+?^`{|}~ ]+$/
226
+ @display_name
227
+ else
228
+ '"' + @display_name.gsub(/["\\]/, '\\\\\&') + '"'
229
+ end
230
+ local = if (@local !~ /^[-\w=!#\$%&'*+?^`{|}~\.\/]+$/ ||
231
+ @local =~ /^\./ ||
232
+ @local =~ /\.$/ ||
233
+ @local =~ /\.\./)
234
+ '"' + @local.gsub(/["\\]/, '\\\\\&') + '"'
235
+ else
236
+ @local
237
+ end
238
+ domain = if (!@domain.nil? and
239
+ (@domain !~ /^[-\w=!#\$%&'*+?^`{|}~\.\/]+$/ ||
240
+ @domain =~ /^\./ ||
241
+ @domain =~ /\.$/ ||
242
+ @domain =~ /\.\./))
243
+ then
244
+ '[' + if @domain =~ /^\[(.*)\]$/
245
+ $1
246
+ else
247
+ @domain
248
+ end.gsub(/[\[\]\\]/, '\\\\\&') + ']'
249
+ else
250
+ @domain
251
+ end
252
+ address = if domain.nil?
253
+ local
254
+ elsif !display_name.nil? or domain[-1] == ?]
255
+ '<' + local + '@' + domain + '>'
256
+ else
257
+ local + '@' + domain
258
+ end
259
+ comments = nil
260
+ comments = unless @comments.nil?
261
+ @comments.collect { |c|
262
+ '(' + c.gsub(/[()\\]/, '\\\\\&') + ')'
263
+ }.join(' ')
264
+ end
265
+ [display_name, address, comments].compact.join(' ')
266
+ end
267
+
268
+ # Addresses can be converted into strings.
269
+ alias :to_str :format
270
+
271
+ # This class provides a facility to parse a string containing one
272
+ # or more RFC2822 addresses into an array of RMail::Address
273
+ # objects. You can use it directly, but it is more conveniently
274
+ # used with the RMail::Address.parse method.
275
+ class Parser
276
+
277
+ # Create a RMail::Address::Parser object that will parse
278
+ # +string+. See also the RMail::Address.parse method.
279
+ def initialize(string)
280
+ @string = string
281
+ end
282
+
283
+ # This function attempts to extract mailing addresses from the
284
+ # string passed to #new. The function returns an
285
+ # RMail::Address::List of RMail::Address objects
286
+ # (RMail::Address::List is a subclass of Array). A malformed
287
+ # input string will not generate an exception. Instead, the
288
+ # array returned will simply not contained the malformed
289
+ # addresses.
290
+ #
291
+ # The string is expected to be in a valid format as documented
292
+ # in RFC2822's mailbox-list grammar. This will work for lists
293
+ # of addresses in the <tt>To:</tt>, <tt>From:</tt>, etc. headers
294
+ # in email.
295
+ def parse
296
+ @lexemes = []
297
+ @tokens = []
298
+ @addresses = RMail::Address::List.new
299
+ @errors = 0
300
+ new_address
301
+ get
302
+ address_list
303
+ reset_errors
304
+ @addresses.delete_if { |a|
305
+ !a.local || !a.domain
306
+ }
307
+ end
308
+
309
+ private
310
+
311
+ SYM_ATOM = :atom
312
+ SYM_ATOM_NON_ASCII = :atom_non_ascii
313
+ SYM_QTEXT = :qtext
314
+ SYM_COMMA = :comma
315
+ SYM_LESS_THAN = :less_than
316
+ SYM_GREATER_THAN = :greater_than
317
+ SYM_AT_SIGN = :at_sign
318
+ SYM_PERIOD = :period
319
+ SYM_COLON = :colon
320
+ SYM_SEMI_COLON = :semi_colon
321
+ SYM_DOMAIN_LITERAL = :domain_literal
322
+
323
+ def reset_errors
324
+ if @errors > 0
325
+ @addresses.pop
326
+ @errors = 0
327
+ end
328
+ end
329
+
330
+ def new_address
331
+ reset_errors
332
+ @addresses.push(Address.new)
333
+ end
334
+
335
+ # Get the text that has been saved up to this point.
336
+ def get_text
337
+ text = ''
338
+ sep = ''
339
+ @lexemes.each { |lexeme|
340
+ if lexeme == '.'
341
+ text << lexeme
342
+ sep = ''
343
+ else
344
+ text << sep
345
+ text << lexeme
346
+ sep = ' '
347
+ end
348
+ }
349
+ @lexemes = []
350
+ text
351
+ end
352
+
353
+ # Save the current lexeme away for later retrieval with
354
+ # get_text.
355
+ def save_text
356
+ @lexemes << @lexeme
357
+ end
358
+
359
+ # Parse this:
360
+ # address_list = ([address] SYNC ",") {[address] SYNC "," } [address] .
361
+ def address_list
362
+ if @sym == SYM_ATOM ||
363
+ @sym == SYM_ATOM_NON_ASCII ||
364
+ @sym == SYM_QTEXT ||
365
+ @sym == SYM_LESS_THAN
366
+ address
367
+ end
368
+ sync(SYM_COMMA)
369
+ return if @sym.nil?
370
+ expect(SYM_COMMA)
371
+ new_address
372
+ while @sym == SYM_ATOM ||
373
+ @sym == SYM_ATOM_NON_ASCII ||
374
+ @sym == SYM_QTEXT ||
375
+ @sym == SYM_LESS_THAN ||
376
+ @sym == SYM_COMMA
377
+ if @sym == SYM_ATOM ||
378
+ @sym == SYM_ATOM_NON_ASCII ||
379
+ @sym == SYM_QTEXT ||
380
+ @sym == SYM_LESS_THAN
381
+ address
382
+ end
383
+ sync(SYM_COMMA)
384
+ return if @sym.nil?
385
+ expect(SYM_COMMA)
386
+ new_address
387
+ end
388
+ if @sym == SYM_ATOM || @sym == SYM_QTEXT || @sym == SYM_LESS_THAN
389
+ address
390
+ end
391
+ end
392
+
393
+ # Parses ahead through a local-part or display-name until no
394
+ # longer looking at a word or "." and returns the next symbol.
395
+ def address_lookahead
396
+ lookahead = []
397
+ while @sym == SYM_ATOM ||
398
+ @sym == SYM_ATOM_NON_ASCII ||
399
+ @sym == SYM_QTEXT ||
400
+ @sym == SYM_PERIOD
401
+ lookahead.push([@sym, @lexeme])
402
+ get
403
+ end
404
+ retval = @sym
405
+ putback(@sym, @lexeme)
406
+ putback_array(lookahead)
407
+ get
408
+ retval
409
+ end
410
+
411
+ # Parse this:
412
+ # address = mailbox | group
413
+ def address
414
+ # At this point we could be looking at a display-name, angle
415
+ # addr, or local-part. If looking at a local-part, it could
416
+ # actually be a display-name, according to the following:
417
+ #
418
+ # local-part '@' -> it is a local part of a local-part @ domain
419
+ # local-part '<' -> it is a display-name of a mailbox
420
+ # local-part ':' -> it is a display-name of a group
421
+ # display-name '<' -> it is a mailbox display name
422
+ # display-name ':' -> it is a group display name
423
+
424
+ # set lookahead to '@' '<' or ':' (or another value for
425
+ # invalid input)
426
+ lookahead = address_lookahead
427
+
428
+ if lookahead == SYM_COLON
429
+ group
430
+ else
431
+ mailbox(lookahead)
432
+ end
433
+ end
434
+
435
+ # Parse this:
436
+ # mailbox = angleAddr |
437
+ # word {word | "."} angleAddr |
438
+ # word {"." word} "@" domain .
439
+ #
440
+ # lookahead will be set to the return value of
441
+ # address_lookahead, which will be '@' or '<' (or another value
442
+ # for invalid input)
443
+ def mailbox(lookahead)
444
+ if @sym == SYM_LESS_THAN
445
+ angle_addr
446
+ elsif lookahead == SYM_LESS_THAN
447
+ display_name_word
448
+ while @sym == SYM_ATOM ||
449
+ @sym == SYM_ATOM_NON_ASCII ||
450
+ @sym == SYM_QTEXT ||
451
+ @sym == SYM_PERIOD
452
+ if @sym == SYM_ATOM ||
453
+ @sym == SYM_ATOM_NON_ASCII ||
454
+ @sym == SYM_QTEXT
455
+ display_name_word
456
+ else
457
+ save_text
458
+ get
459
+ end
460
+ end
461
+ @addresses.last.display_name = get_text
462
+ angle_addr
463
+ else
464
+ word
465
+ while @sym == SYM_PERIOD
466
+ save_text
467
+ get
468
+ word
469
+ end
470
+ @addresses.last.local = get_text
471
+ expect(SYM_AT_SIGN)
472
+ domain
473
+
474
+ if @sym == SYM_LESS_THAN
475
+ # Workaround for invalid input. Treat 'foo@bar <foo@bar>' as if it
476
+ # were '"foo@bar" <foo@bar>'. The domain parser will eat
477
+ # 'bar' but stop at '<'. At this point, we've been
478
+ # parsing the display name as if it were an address, so we
479
+ # throw the address into display_name and parse an
480
+ # angle_addr.
481
+ @addresses.last.display_name =
482
+ format("%s@%s", @addresses.last.local, @addresses.last.domain)
483
+ @addresses.last.local = nil
484
+ @addresses.last.domain = nil
485
+ angle_addr
486
+ end
487
+ end
488
+ end
489
+
490
+ # Parse this:
491
+ # group = word {word | "."} SYNC ":" [mailbox_list] SYNC ";"
492
+ def group
493
+ word
494
+ while @sym == SYM_ATOM || @sym == SYM_QTEXT || @sym == SYM_PERIOD
495
+ if @sym == SYM_ATOM || @sym == SYM_QTEXT
496
+ word
497
+ else
498
+ save_text
499
+ get
500
+ end
501
+ end
502
+ sync(SYM_COLON)
503
+ expect(SYM_COLON)
504
+ get_text # throw away group name
505
+ @addresses.last.comments = nil
506
+ if @sym == SYM_ATOM || @sym == SYM_QTEXT ||
507
+ @sym == SYM_COMMA || @sym == SYM_LESS_THAN
508
+ mailbox_list
509
+ end
510
+ sync(SYM_SEMI_COLON)
511
+ expect(SYM_SEMI_COLON)
512
+ end
513
+
514
+ # Parse this:
515
+ # word = atom | atom_non_ascii | quotedString
516
+ def display_name_word
517
+ if @sym == SYM_ATOM || @sym == SYM_ATOM_NON_ASCII || @sym == SYM_QTEXT
518
+ save_text
519
+ get
520
+ else
521
+ error "expected word, got #{@sym.inspect}"
522
+ end
523
+ end
524
+
525
+ # Parse this:
526
+ # word = atom | quotedString
527
+ def word
528
+ if @sym == SYM_ATOM || @sym == SYM_QTEXT
529
+ save_text
530
+ get
531
+ else
532
+ error "expected word, got #{@sym.inspect}"
533
+ end
534
+ end
535
+
536
+ # Parse a mailbox list.
537
+ def mailbox_list
538
+ mailbox(address_lookahead)
539
+ while @sym == SYM_COMMA
540
+ get
541
+ new_address
542
+ mailbox(address_lookahead)
543
+ end
544
+ end
545
+
546
+ # Parse this:
547
+ # angleAddr = SYNC "<" [obsRoute] addrSpec SYNC ">"
548
+ def angle_addr
549
+ expect(SYM_LESS_THAN)
550
+ if @sym == SYM_AT_SIGN
551
+ obs_route
552
+ end
553
+ addr_spec
554
+ expect(SYM_GREATER_THAN)
555
+ end
556
+
557
+ # Parse this:
558
+ # domain = domainLiteral | obsDomain
559
+ def domain
560
+ if @sym == SYM_DOMAIN_LITERAL
561
+ save_text
562
+ @addresses.last.domain = get_text
563
+ get
564
+ elsif @sym == SYM_ATOM
565
+ obs_domain
566
+ @addresses.last.domain = get_text
567
+ else
568
+ error "expected start of domain, got #{@sym.inspect}"
569
+ end
570
+ end
571
+
572
+ # Parse this:
573
+ # addrSpec = localPart "@" domain
574
+ def addr_spec
575
+ local_part
576
+ expect(SYM_AT_SIGN)
577
+ domain
578
+ end
579
+
580
+ # Parse this:
581
+ # local_part = word *( "." word )
582
+ def local_part
583
+ word
584
+ while @sym == SYM_PERIOD
585
+ save_text
586
+ get
587
+ word
588
+ end
589
+ @addresses.last.local = get_text
590
+ end
591
+
592
+ # Parse this:
593
+ # obs_domain = atom *( "." atom ) .
594
+ def obs_domain
595
+ expect_save(SYM_ATOM)
596
+ while @sym == SYM_PERIOD
597
+ save_text
598
+ get
599
+ expect_save(SYM_ATOM)
600
+ end
601
+ end
602
+
603
+ # Parse this:
604
+ # obs_route = obs_domain_list ":"
605
+ def obs_route
606
+ obs_domain_list
607
+ expect(SYM_COLON)
608
+ end
609
+
610
+ # Parse this:
611
+ # obs_domain_list = "@" domain *( *( "," ) "@" domain )
612
+ def obs_domain_list
613
+ expect(SYM_AT_SIGN)
614
+ domain
615
+ while @sym == SYM_COMMA || @sym == SYM_AT_SIGN
616
+ while @sym == SYM_COMMA
617
+ get
618
+ end
619
+ expect(SYM_AT_SIGN)
620
+ domain
621
+ end
622
+ end
623
+
624
+ # Put a token back into the input stream. This token will be
625
+ # retrieved by the next call to get.
626
+ def putback(sym, lexeme)
627
+ @tokens.push([sym, lexeme])
628
+ end
629
+
630
+ # Put back an array of tokens into the input stream.
631
+ def putback_array(a)
632
+ a.reverse_each { |e|
633
+ putback(*e)
634
+ }
635
+ end
636
+
637
+ # Get a single token from the string or from the @tokens array
638
+ # if somebody used putback.
639
+ def get
640
+ unless @tokens.empty?
641
+ @sym, @lexeme = @tokens.pop
642
+ else
643
+ get_tokenize
644
+ end
645
+ end
646
+
647
+ # Get a single token from the string
648
+ def get_tokenize
649
+ @lexeme = nil
650
+ loop {
651
+ case @string
652
+ when nil # the end
653
+ @sym = nil
654
+ break
655
+ when "" # the end
656
+ @sym = nil
657
+ break
658
+ when /\A[\r\n\t ]+/m # skip whitespace
659
+ @string = $'
660
+ when /\A\(/m # skip comment
661
+ comment
662
+ when /\A""/ # skip empty quoted text
663
+ @string = $'
664
+ when /\A[\w!$%&\'*+\/=?^_\`{\}|~#-]+/m
665
+ @string = $'
666
+ @sym = SYM_ATOM
667
+ break
668
+ when /\A"(.*?([^\\]|\\\\))"/m
669
+ @string = $'
670
+ @sym = SYM_QTEXT
671
+ @lexeme = $1.gsub(/\\(.)/, '\1')
672
+ break
673
+ when /\A</
674
+ @string = $'
675
+ @sym = SYM_LESS_THAN
676
+ break
677
+ when /\A>/
678
+ @string = $'
679
+ @sym = SYM_GREATER_THAN
680
+ break
681
+ when /\A@/
682
+ @string = $'
683
+ @sym = SYM_AT_SIGN
684
+ break
685
+ when /\A,/
686
+ @string = $'
687
+ @sym = SYM_COMMA
688
+ break
689
+ when /\A:/
690
+ @string = $'
691
+ @sym = SYM_COLON
692
+ break
693
+ when /\A;/
694
+ @string = $'
695
+ @sym = SYM_SEMI_COLON
696
+ break
697
+ when /\A\./
698
+ @string = $'
699
+ @sym = SYM_PERIOD
700
+ break
701
+ when /\A(\[.*?([^\\]|\\\\)\])/m
702
+ @string = $'
703
+ @sym = SYM_DOMAIN_LITERAL
704
+ @lexeme = $1.gsub(/(^|[^\\])[\r\n\t ]+/, '\1').gsub(/\\(.)/, '\1')
705
+ break
706
+ when /\A[\200-\377\w!$%&\'*+\/=?^_\`{\}|~#-]+/nm
707
+ # This is just like SYM_ATOM, but includes all characters
708
+ # with high bits. This is so we can allow such tokens in
709
+ # the display name portion of an address even though it
710
+ # violates the RFCs.
711
+ @string = $'
712
+ @sym = SYM_ATOM_NON_ASCII
713
+ break
714
+ when /\A./
715
+ @string = $' # garbage
716
+ error('garbage character in string')
717
+ else
718
+ raise "internal error, @string is #{@string.inspect}"
719
+ end
720
+ }
721
+ if @sym
722
+ @lexeme ||= $&
723
+ end
724
+ end
725
+
726
+ def comment
727
+ depth = 0
728
+ comment = ''
729
+ catch(:done) {
730
+ while @string =~ /\A(\(([^\(\)\\]|\\.)*)/m
731
+ @string = $'
732
+ comment += $1
733
+ depth += 1
734
+ while @string =~ /\A(([^\(\)\\]|\\.)*\))/m
735
+ @string = $'
736
+ comment += $1
737
+ depth -= 1
738
+ throw :done if depth == 0
739
+ if @string =~ /\A(([^\(\)\\]|\\.)+)/
740
+ @string = $'
741
+ comment += $1
742
+ end
743
+ end
744
+ end
745
+ }
746
+ comment = comment.gsub(/[\r\n\t ]+/m, ' ').
747
+ sub(/\A\((.*)\)$/m, '\1').
748
+ gsub(/\\(.)/, '\1')
749
+ @addresses.last.comments =
750
+ (@addresses.last.comments || []) + [comment]
751
+ end
752
+
753
+ def expect(token)
754
+ if @sym == token
755
+ get
756
+ else
757
+ error("expected #{token.inspect} but got #{@sym.inspect}")
758
+ end
759
+ end
760
+
761
+ def expect_save(token)
762
+ if @sym == token
763
+ save_text
764
+ end
765
+ expect(token)
766
+ end
767
+
768
+ def sync(token)
769
+ while @sym && @sym != token
770
+ error "expected #{token.inspect} but got #{@sym.inspect}"
771
+ get
772
+ end
773
+ end
774
+
775
+ def error(s)
776
+ @errors += 1
777
+ end
778
+ end
779
+
780
+ # Given a string, this function attempts to extract mailing
781
+ # addresses from it and returns an RMail::Address::List of those
782
+ # addresses (RMail::Address::List is a subclass of Array).
783
+ #
784
+ # This is identical to using a RMail::Address::Parser directly like
785
+ # this:
786
+ #
787
+ # RMail::Address::Parser.new(string).parse
788
+ def Address.parse(string)
789
+ Parser.new(string).parse
790
+ end
791
+
792
+ # RMail::Address::List is a simple subclass of the Array class
793
+ # that provides convenience methods for accessing the
794
+ # RMail::Address objects it contains.
795
+ class List < Array
796
+
797
+ # Returns an array of strings -- the result of calling
798
+ # RMail::Address#local on each element of the list.
799
+ def locals
800
+ collect { |a| a.local }
801
+ end
802
+
803
+ # Returns an array of strings -- the result of calling
804
+ # RMail::Address#display_name on each element of the list.
805
+ def display_names
806
+ collect { |a| a.display_name }
807
+ end
808
+
809
+ # Returns an array of strings -- the result of calling
810
+ # RMail::Address#name on each element of the list.
811
+ def names
812
+ collect { |a| a.name }
813
+ end
814
+
815
+ # Returns an array of strings -- the result of calling
816
+ # RMail::Address#domain on each element of the list.
817
+ def domains
818
+ collect { |a| a.domain }
819
+ end
820
+
821
+ # Returns an array of strings -- the result of calling
822
+ # RMail::Address#address on each element of the list.
823
+ def addresses
824
+ collect { |a| a.address }
825
+ end
826
+
827
+ # Returns an array of strings -- the result of calling
828
+ # RMail::Address#format on each element of the list.
829
+ def format
830
+ collect { |a| a.format }
831
+ end
832
+
833
+ end
834
+
835
+ end
836
+ end
837
+
838
+ if $0 == __FILE__
839
+ parser = RMail::Address::Parser.new('A Group:a@b.c,d@e.f;')
840
+ p parser.parse
841
+ end