net-imap 0.3.7 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of net-imap might be problematic. Click here for more details.

Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/pages.yml +46 -0
  3. data/.github/workflows/test.yml +5 -12
  4. data/.gitignore +1 -0
  5. data/Gemfile +3 -0
  6. data/README.md +15 -4
  7. data/Rakefile +0 -7
  8. data/lib/net/imap/authenticators.rb +26 -57
  9. data/lib/net/imap/command_data.rb +13 -6
  10. data/lib/net/imap/deprecated_client_options.rb +139 -0
  11. data/lib/net/imap/errors.rb +20 -0
  12. data/lib/net/imap/response_data.rb +92 -47
  13. data/lib/net/imap/response_parser/parser_utils.rb +240 -0
  14. data/lib/net/imap/response_parser.rb +1265 -986
  15. data/lib/net/imap/sasl/anonymous_authenticator.rb +69 -0
  16. data/lib/net/imap/sasl/authentication_exchange.rb +107 -0
  17. data/lib/net/imap/sasl/authenticators.rb +118 -0
  18. data/lib/net/imap/sasl/client_adapter.rb +72 -0
  19. data/lib/net/imap/{authenticators/cram_md5.rb → sasl/cram_md5_authenticator.rb} +21 -11
  20. data/lib/net/imap/sasl/digest_md5_authenticator.rb +180 -0
  21. data/lib/net/imap/sasl/external_authenticator.rb +83 -0
  22. data/lib/net/imap/sasl/gs2_header.rb +80 -0
  23. data/lib/net/imap/{authenticators/login.rb → sasl/login_authenticator.rb} +25 -16
  24. data/lib/net/imap/sasl/oauthbearer_authenticator.rb +199 -0
  25. data/lib/net/imap/sasl/plain_authenticator.rb +101 -0
  26. data/lib/net/imap/sasl/protocol_adapters.rb +45 -0
  27. data/lib/net/imap/sasl/scram_algorithm.rb +58 -0
  28. data/lib/net/imap/sasl/scram_authenticator.rb +287 -0
  29. data/lib/net/imap/sasl/stringprep.rb +6 -66
  30. data/lib/net/imap/sasl/xoauth2_authenticator.rb +106 -0
  31. data/lib/net/imap/sasl.rb +144 -43
  32. data/lib/net/imap/sasl_adapter.rb +21 -0
  33. data/lib/net/imap/stringprep/nameprep.rb +70 -0
  34. data/lib/net/imap/stringprep/saslprep.rb +69 -0
  35. data/lib/net/imap/stringprep/saslprep_tables.rb +96 -0
  36. data/lib/net/imap/stringprep/tables.rb +146 -0
  37. data/lib/net/imap/stringprep/trace.rb +85 -0
  38. data/lib/net/imap/stringprep.rb +159 -0
  39. data/lib/net/imap.rb +993 -609
  40. data/net-imap.gemspec +4 -3
  41. data/rakelib/benchmarks.rake +98 -0
  42. data/rakelib/saslprep.rake +4 -4
  43. data/rakelib/string_prep_tables_generator.rb +82 -60
  44. metadata +29 -13
  45. data/benchmarks/stringprep.yml +0 -65
  46. data/benchmarks/table-regexps.yml +0 -39
  47. data/lib/net/imap/authenticators/digest_md5.rb +0 -115
  48. data/lib/net/imap/authenticators/plain.rb +0 -41
  49. data/lib/net/imap/authenticators/xoauth2.rb +0 -20
  50. data/lib/net/imap/sasl/saslprep.rb +0 -55
  51. data/lib/net/imap/sasl/saslprep_tables.rb +0 -98
  52. data/lib/net/imap/sasl/stringprep_tables.rb +0 -153
@@ -1,12 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "errors"
4
+ require_relative "response_parser/parser_utils"
4
5
 
5
6
  module Net
6
7
  class IMAP < Protocol
7
8
 
8
9
  # Parses an \IMAP server response.
9
10
  class ResponseParser
11
+ include ParserUtils
12
+ extend ParserUtils::Generator
13
+
10
14
  # :call-seq: Net::IMAP::ResponseParser.new -> Net::IMAP::ResponseParser
11
15
  def initialize
12
16
  @str = nil
@@ -33,216 +37,734 @@ module Net
33
37
 
34
38
  # :stopdoc:
35
39
 
36
- EXPR_BEG = :EXPR_BEG
37
- EXPR_DATA = :EXPR_DATA
38
- EXPR_TEXT = :EXPR_TEXT
39
- EXPR_RTEXT = :EXPR_RTEXT
40
- EXPR_CTEXT = :EXPR_CTEXT
41
-
42
- T_SPACE = :SPACE
43
- T_NIL = :NIL
44
- T_NUMBER = :NUMBER
45
- T_ATOM = :ATOM
46
- T_QUOTED = :QUOTED
47
- T_LPAR = :LPAR
48
- T_RPAR = :RPAR
49
- T_BSLASH = :BSLASH
50
- T_STAR = :STAR
51
- T_LBRA = :LBRA
52
- T_RBRA = :RBRA
53
- T_LITERAL = :LITERAL
54
- T_PLUS = :PLUS
55
- T_PERCENT = :PERCENT
56
- T_CRLF = :CRLF
57
- T_EOF = :EOF
58
- T_TEXT = :TEXT
59
-
40
+ EXPR_BEG = :EXPR_BEG # the default, used in most places
41
+ EXPR_DATA = :EXPR_DATA # envelope, body(structure), namespaces
42
+
43
+ T_SPACE = :SPACE # atom special
44
+ T_ATOM = :ATOM # atom (subset of astring chars)
45
+ T_NIL = :NIL # subset of atom and label
46
+ T_NUMBER = :NUMBER # subset of atom
47
+ T_LBRA = :LBRA # subset of atom
48
+ T_PLUS = :PLUS # subset of atom; tag special
49
+ T_RBRA = :RBRA # atom special; resp_special; valid astring char
50
+ T_QUOTED = :QUOTED # starts/end with atom special
51
+ T_BSLASH = :BSLASH # atom special; quoted special
52
+ T_LPAR = :LPAR # atom special; paren list delimiter
53
+ T_RPAR = :RPAR # atom special; paren list delimiter
54
+ T_STAR = :STAR # atom special; list wildcard
55
+ T_PERCENT = :PERCENT # atom special; list wildcard
56
+ T_LITERAL = :LITERAL # starts with atom special
57
+ T_CRLF = :CRLF # atom special; text special; quoted special
58
+ T_TEXT = :TEXT # any char except CRLF
59
+ T_EOF = :EOF # end of response string
60
+
61
+ module ResponseConditions
62
+ OK = "OK"
63
+ NO = "NO"
64
+ BAD = "BAD"
65
+ BYE = "BYE"
66
+ PREAUTH = "PREAUTH"
67
+
68
+ RESP_COND_STATES = [OK, NO, BAD ].freeze
69
+ RESP_DATA_CONDS = [OK, NO, BAD, BYE, ].freeze
70
+ AUTH_CONDS = [OK, PREAUTH].freeze
71
+ GREETING_CONDS = [OK, BYE, PREAUTH].freeze
72
+ RESP_CONDS = [OK, NO, BAD, BYE, PREAUTH].freeze
73
+ end
74
+ include ResponseConditions
75
+
76
+ module Patterns
77
+
78
+ module CharClassSubtraction
79
+ refine Regexp do
80
+ def -(rhs); /[#{source}&&[^#{rhs.source}]]/n.freeze end
81
+ end
82
+ end
83
+ using CharClassSubtraction
84
+
85
+ # From RFC5234, "Augmented BNF for Syntax Specifications: ABNF"
86
+ # >>>
87
+ # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
88
+ # CHAR = %x01-7F
89
+ # CRLF = CR LF
90
+ # ; Internet standard newline
91
+ # CTL = %x00-1F / %x7F
92
+ # ; controls
93
+ # DIGIT = %x30-39
94
+ # ; 0-9
95
+ # DQUOTE = %x22
96
+ # ; " (Double Quote)
97
+ # HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
98
+ # OCTET = %x00-FF
99
+ # SP = %x20
100
+ module RFC5234
101
+ ALPHA = /[A-Za-z]/n
102
+ CHAR = /[\x01-\x7f]/n
103
+ CRLF = /\r\n/n
104
+ CTL = /[\x00-\x1F\x7F]/n
105
+ DIGIT = /\d/n
106
+ DQUOTE = /"/n
107
+ HEXDIG = /\h/
108
+ OCTET = /[\x00-\xFF]/n # not using /./m for embedding purposes
109
+ SP = / /n
110
+ end
111
+
112
+ # UTF-8, a transformation format of ISO 10646
113
+ # >>>
114
+ # UTF8-1 = %x00-7F
115
+ # UTF8-tail = %x80-BF
116
+ # UTF8-2 = %xC2-DF UTF8-tail
117
+ # UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
118
+ # %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
119
+ # UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
120
+ # %xF4 %x80-8F 2( UTF8-tail )
121
+ # UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
122
+ # UTF8-octets = *( UTF8-char )
123
+ #
124
+ # n.b. String * Integer is used for repetition, rather than /x{3}/,
125
+ # because ruby 3.2's linear-time cache-based optimization doesn't work
126
+ # with "bounded or fixed times repetition nesting in another repetition
127
+ # (e.g. /(a{2,3})*/). It is an implementation issue entirely, but we
128
+ # believe it is hard to support this case correctly."
129
+ # See https://bugs.ruby-lang.org/issues/19104
130
+ module RFC3629
131
+ UTF8_1 = /[\x00-\x7f]/n # aka ASCII 7bit
132
+ UTF8_TAIL = /[\x80-\xBF]/n
133
+ UTF8_2 = /[\xC2-\xDF]#{UTF8_TAIL}/n
134
+ UTF8_3 = Regexp.union(/\xE0[\xA0-\xBF]#{UTF8_TAIL}/n,
135
+ /\xED[\x80-\x9F]#{UTF8_TAIL}/n,
136
+ /[\xE1-\xEC]#{ UTF8_TAIL.source * 2}/n,
137
+ /[\xEE-\xEF]#{ UTF8_TAIL.source * 2}/n)
138
+ UTF8_4 = Regexp.union(/[\xF1-\xF3]#{ UTF8_TAIL.source * 3}/n,
139
+ /\xF0[\x90-\xBF]#{UTF8_TAIL.source * 2}/n,
140
+ /\xF4[\x80-\x8F]#{UTF8_TAIL.source * 2}/n)
141
+ UTF8_CHAR = Regexp.union(UTF8_1, UTF8_2, UTF8_3, UTF8_4)
142
+ UTF8_OCTETS = /#{UTF8_CHAR}*/n
143
+ end
144
+
145
+ include RFC5234
146
+ include RFC3629
147
+
148
+ # CHAR8 = %x01-ff
149
+ # ; any OCTET except NUL, %x00
150
+ CHAR8 = /[\x01-\xff]/n
151
+
152
+ # list-wildcards = "%" / "*"
153
+ LIST_WILDCARDS = /[%*]/n
154
+ # quoted-specials = DQUOTE / "\"
155
+ QUOTED_SPECIALS = /["\\]/n
156
+ # resp-specials = "]"
157
+ RESP_SPECIALS = /[\]]/n
158
+
159
+ # atomish = 1*<any ATOM-CHAR except "[">
160
+ # ; We use "atomish" for msg-att and section, in order
161
+ # ; to simplify "BODY[HEADER.FIELDS (foo bar)]".
162
+ #
163
+ # atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
164
+ # quoted-specials / resp-specials
165
+ # ATOM-CHAR = <any CHAR except atom-specials>
166
+ # atom = 1*ATOM-CHAR
167
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
168
+ # tag = 1*<any ASTRING-CHAR except "+">
169
+
170
+ ATOM_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\\]]/n
171
+ ASTRING_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\]/n
172
+
173
+ ASTRING_CHAR = CHAR - ASTRING_SPECIALS
174
+ ATOM_CHAR = CHAR - ATOM_SPECIALS
175
+
176
+ ATOM = /#{ATOM_CHAR}+/n
177
+ ASTRING_CHARS = /#{ASTRING_CHAR}+/n
178
+ ATOMISH = /#{ATOM_CHAR - /[\[]/ }+/
179
+ TAG = /#{ASTRING_CHAR - /[+]/ }+/
180
+
181
+ # TEXT-CHAR = <any CHAR except CR and LF>
182
+ TEXT_CHAR = CHAR - /[\r\n]/
183
+
184
+ # resp-text-code = ... / atom [SP 1*<any TEXT-CHAR except "]">]
185
+ CODE_TEXT_CHAR = TEXT_CHAR - RESP_SPECIALS
186
+ CODE_TEXT = /#{CODE_TEXT_CHAR}+/n
187
+
188
+ # flag = "\Answered" / "\Flagged" / "\Deleted" /
189
+ # "\Seen" / "\Draft" / flag-keyword / flag-extension
190
+ # ; Does not include "\Recent"
191
+ # flag-extension = "\" atom
192
+ # ; Future expansion. Client implementations
193
+ # ; MUST accept flag-extension flags. Server
194
+ # ; implementations MUST NOT generate
195
+ # ; flag-extension flags except as defined by
196
+ # ; a future Standard or Standards Track
197
+ # ; revisions of this specification.
198
+ # flag-keyword = "$MDNSent" / "$Forwarded" / "$Junk" /
199
+ # "$NotJunk" / "$Phishing" / atom
200
+ # flag-perm = flag / "\*"
201
+ #
202
+ # Not checking for max one mbx-list-sflag in the parser.
203
+ # >>>
204
+ # mbx-list-oflag = "\Noinferiors" / child-mbox-flag /
205
+ # "\Subscribed" / "\Remote" / flag-extension
206
+ # ; Other flags; multiple from this list are
207
+ # ; possible per LIST response, but each flag
208
+ # ; can only appear once per LIST response
209
+ # mbx-list-sflag = "\NonExistent" / "\Noselect" / "\Marked" /
210
+ # "\Unmarked"
211
+ # ; Selectability flags; only one per LIST response
212
+ # child-mbox-flag = "\HasChildren" / "\HasNoChildren"
213
+ # ; attributes for the CHILDREN return option, at most
214
+ # ; one possible per LIST response
215
+ FLAG = /\\?#{ATOM}/n
216
+ FLAG_EXTENSION = /\\#{ATOM}/n
217
+ FLAG_KEYWORD = ATOM
218
+ FLAG_PERM = Regexp.union(FLAG, "\\*")
219
+ MBX_FLAG = FLAG_EXTENSION
220
+
221
+ # flag-list = "(" [flag *(SP flag)] ")"
222
+ #
223
+ # part of resp-text-code:
224
+ # >>>
225
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")"
226
+ #
227
+ # parens from mailbox-list are included in the regexp:
228
+ # >>>
229
+ # mbx-list-flags = *(mbx-list-oflag SP) mbx-list-sflag
230
+ # *(SP mbx-list-oflag) /
231
+ # mbx-list-oflag *(SP mbx-list-oflag)
232
+ FLAG_LIST = /\G\((#{FLAG }(?:#{SP}#{FLAG })*|)\)/ni
233
+ FLAG_PERM_LIST = /\G\((#{FLAG_PERM}(?:#{SP}#{FLAG_PERM})*|)\)/ni
234
+ MBX_LIST_FLAGS = /\G\((#{MBX_FLAG }(?:#{SP}#{MBX_FLAG })*|)\)/ni
235
+
236
+ # RFC3501:
237
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
238
+ # "\" quoted-specials
239
+ # RFC9051:
240
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
241
+ # "\" quoted-specials / UTF8-2 / UTF8-3 / UTF8-4
242
+ # RFC3501 & RFC9051:
243
+ # quoted = DQUOTE *QUOTED-CHAR DQUOTE
244
+ QUOTED_CHAR_safe = TEXT_CHAR - QUOTED_SPECIALS
245
+ QUOTED_CHAR_esc = /\\#{QUOTED_SPECIALS}/n
246
+ QUOTED_CHAR_rev1 = Regexp.union(QUOTED_CHAR_safe, QUOTED_CHAR_esc)
247
+ QUOTED_CHAR_rev2 = Regexp.union(QUOTED_CHAR_rev1,
248
+ UTF8_2, UTF8_3, UTF8_4)
249
+ QUOTED_rev1 = /"(#{QUOTED_CHAR_rev1}*)"/n
250
+ QUOTED_rev2 = /"(#{QUOTED_CHAR_rev2}*)"/n
251
+
252
+ # RFC3501:
253
+ # text = 1*TEXT-CHAR
254
+ # RFC9051:
255
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
256
+ # ; Non-ASCII text can only be returned
257
+ # ; after ENABLE IMAP4rev2 command
258
+ TEXT_rev1 = /#{TEXT_CHAR}+/
259
+ TEXT_rev2 = /#{Regexp.union TEXT_CHAR, UTF8_2, UTF8_3, UTF8_4}+/
260
+
261
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
262
+ TAGGED_LABEL_FCHAR = /[a-zA-Z\-_.]/n
263
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
264
+ TAGGED_LABEL_CHAR = /[a-zA-Z\-_.0-9:]*/n
265
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
266
+ # ; Is a valid RFC 3501 "atom".
267
+ TAGGED_EXT_LABEL = /#{TAGGED_LABEL_FCHAR}#{TAGGED_LABEL_CHAR}*/n
268
+
269
+ # RFC3501:
270
+ # literal = "{" number "}" CRLF *CHAR8
271
+ # ; Number represents the number of CHAR8s
272
+ # RFC9051:
273
+ # literal = "{" number64 ["+"] "}" CRLF *CHAR8
274
+ # ; <number64> represents the number of CHAR8s.
275
+ # ; A non-synchronizing literal is distinguished
276
+ # ; from a synchronizing literal by the presence of
277
+ # ; "+" before the closing "}".
278
+ # ; Non-synchronizing literals are not allowed when
279
+ # ; sent from server to the client.
280
+ LITERAL = /\{(\d+)\}\r\n/n
281
+
282
+ module_function
283
+
284
+ def unescape_quoted!(quoted)
285
+ quoted
286
+ &.gsub!(/\\(#{QUOTED_SPECIALS})/n, "\\1")
287
+ &.force_encoding("UTF-8")
288
+ end
289
+
290
+ def unescape_quoted(quoted)
291
+ quoted
292
+ &.gsub(/\\(#{QUOTED_SPECIALS})/n, "\\1")
293
+ &.force_encoding("UTF-8")
294
+ end
295
+
296
+ end
297
+
298
+ # the default, used in most places
60
299
  BEG_REGEXP = /\G(?:\
61
- (?# 1: SPACE )( +)|\
62
- (?# 2: NIL )(NIL)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
63
- (?# 3: NUMBER )(\d+)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
64
- (?# 4: ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+]+)|\
65
- (?# 5: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
66
- (?# 6: LPAR )(\()|\
67
- (?# 7: RPAR )(\))|\
68
- (?# 8: BSLASH )(\\)|\
69
- (?# 9: STAR )(\*)|\
70
- (?# 10: LBRA )(\[)|\
71
- (?# 11: RBRA )(\])|\
72
- (?# 12: LITERAL )\{(\d+)\}\r\n|\
73
- (?# 13: PLUS )(\+)|\
74
- (?# 14: PERCENT )(%)|\
75
- (?# 15: CRLF )(\r\n)|\
76
- (?# 16: EOF )(\z))/ni
77
-
300
+ (?# 1: SPACE )( )|\
301
+ (?# 2: ATOM prefixed with a compatible subtype)\
302
+ ((?:\
303
+ (?# 3: NIL )(NIL)|\
304
+ (?# 4: NUMBER )(\d+)|\
305
+ (?# 5: PLUS )(\+))\
306
+ (?# 6: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
307
+ (?# This enables greedy alternation without lookahead, in linear time.)\
308
+ )|\
309
+ (?# Also need to check for ATOM without a subtype prefix.)\
310
+ (?# 7: ATOM )(#{Patterns::ATOMISH})|\
311
+ (?# 8: QUOTED )#{Patterns::QUOTED_rev2}|\
312
+ (?# 9: LPAR )(\()|\
313
+ (?# 10: RPAR )(\))|\
314
+ (?# 11: BSLASH )(\\)|\
315
+ (?# 12: STAR )(\*)|\
316
+ (?# 13: LBRA )(\[)|\
317
+ (?# 14: RBRA )(\])|\
318
+ (?# 15: LITERAL )#{Patterns::LITERAL}|\
319
+ (?# 16: PERCENT )(%)|\
320
+ (?# 17: CRLF )(\r\n)|\
321
+ (?# 18: EOF )(\z))/ni
322
+
323
+ # envelope, body(structure), namespaces
78
324
  DATA_REGEXP = /\G(?:\
79
325
  (?# 1: SPACE )( )|\
80
326
  (?# 2: NIL )(NIL)|\
81
327
  (?# 3: NUMBER )(\d+)|\
82
- (?# 4: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
83
- (?# 5: LITERAL )\{(\d+)\}\r\n|\
328
+ (?# 4: QUOTED )#{Patterns::QUOTED_rev2}|\
329
+ (?# 5: LITERAL )#{Patterns::LITERAL}|\
84
330
  (?# 6: LPAR )(\()|\
85
331
  (?# 7: RPAR )(\)))/ni
86
332
 
87
- TEXT_REGEXP = /\G(?:\
88
- (?# 1: TEXT )([^\x00\r\n]*))/ni
89
-
90
- RTEXT_REGEXP = /\G(?:\
91
- (?# 1: LBRA )(\[)|\
92
- (?# 2: TEXT )([^\x00\r\n]*))/ni
333
+ # text, after 'resp-text-code "]"'
334
+ TEXT_REGEXP = /\G(#{Patterns::TEXT_rev2})/n
93
335
 
94
- CTEXT_REGEXP = /\G(?:\
95
- (?# 1: TEXT )([^\x00\r\n\]]*))/ni
336
+ # resp-text-code, after 'atom SP'
337
+ CTEXT_REGEXP = /\G(#{Patterns::CODE_TEXT})/n
96
338
 
97
339
  Token = Struct.new(:symbol, :value)
98
340
 
99
- def response
100
- token = lookahead
101
- case token.symbol
102
- when T_PLUS
103
- result = continue_req
104
- when T_STAR
105
- result = response_untagged
106
- else
107
- result = response_tagged
108
- end
109
- while lookahead.symbol == T_SPACE
110
- # Ignore trailing space for Microsoft Exchange Server
111
- shift_token
112
- end
113
- match(T_CRLF)
114
- match(T_EOF)
115
- return result
341
+ def_char_matchers :SP, " ", :T_SPACE
342
+ def_char_matchers :PLUS, "+", :T_PLUS
343
+ def_char_matchers :STAR, "*", :T_STAR
344
+
345
+ def_char_matchers :lpar, "(", :T_LPAR
346
+ def_char_matchers :rpar, ")", :T_RPAR
347
+
348
+ def_char_matchers :lbra, "[", :T_LBRA
349
+ def_char_matchers :rbra, "]", :T_RBRA
350
+
351
+ # valid number ranges are not enforced by parser
352
+ # number = 1*DIGIT
353
+ # ; Unsigned 32-bit integer
354
+ # ; (0 <= n < 4,294,967,296)
355
+ def_token_matchers :number, T_NUMBER, coerce: Integer
356
+
357
+ def_token_matchers :quoted, T_QUOTED
358
+
359
+ # string = quoted / literal
360
+ def_token_matchers :string, T_QUOTED, T_LITERAL
361
+
362
+ # use where string represents "LABEL" values
363
+ def_token_matchers :case_insensitive__string,
364
+ T_QUOTED, T_LITERAL,
365
+ send: :upcase
366
+
367
+ # n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
368
+ # NIL? returns nil when it does *not* match
369
+ def_token_matchers :NIL, T_NIL
370
+
371
+ # In addition to explicitly uses of +tagged-ext-label+, use this to match
372
+ # keywords when the grammar has not provided any extension syntax.
373
+ #
374
+ # Do *not* use this for labels where the grammar specifies extensions
375
+ # can be +atom+, even if all currently defined labels would match. For
376
+ # example response codes in +resp-text-code+.
377
+ #
378
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
379
+ # ; Is a valid RFC 3501 "atom".
380
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
381
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
382
+ #
383
+ # TODO: add to lexer and only match tagged-ext-label
384
+ def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase
385
+
386
+ def_token_matchers :CRLF, T_CRLF
387
+ def_token_matchers :EOF, T_EOF
388
+
389
+ # atom = 1*ATOM-CHAR
390
+ # ATOM-CHAR = <any CHAR except atom-specials>
391
+ ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
392
+
393
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
394
+ # resp-specials = "]"
395
+ ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
396
+
397
+ ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze
398
+
399
+ # tag = 1*<any ASTRING-CHAR except "+">
400
+ TAG_TOKENS = (ASTRING_CHARS_TOKENS - [T_PLUS]).freeze
401
+
402
+ # TODO: handle atom, astring_chars, and tag entirely inside the lexer
403
+ def atom; combine_adjacent(*ATOM_TOKENS) end
404
+ def astring_chars; combine_adjacent(*ASTRING_CHARS_TOKENS) end
405
+ def tag; combine_adjacent(*TAG_TOKENS) end
406
+
407
+ # the #accept version of #atom
408
+ def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
409
+
410
+ # Returns <tt>atom.upcase</tt>
411
+ def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
412
+
413
+ # Returns <tt>atom?&.upcase</tt>
414
+ def case_insensitive__atom?
415
+ -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
116
416
  end
117
417
 
118
- def continue_req
119
- match(T_PLUS)
120
- token = lookahead
121
- if token.symbol == T_SPACE
122
- shift_token
123
- return ContinuationRequest.new(resp_text, @str)
124
- else
125
- return ContinuationRequest.new(ResponseText.new(nil, ""), @str)
126
- end
418
+ # astring = 1*ASTRING-CHAR / string
419
+ def astring
420
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
127
421
  end
128
422
 
129
- def response_untagged
130
- match(T_STAR)
131
- match(T_SPACE)
132
- token = lookahead
133
- if token.symbol == T_NUMBER
134
- return numeric_response
135
- elsif token.symbol == T_ATOM
136
- case token.value
137
- when /\A(?:OK|NO|BAD|BYE|PREAUTH)\z/ni
138
- return response_cond
139
- when /\A(?:FLAGS)\z/ni
140
- return flags_response
141
- when /\A(?:ID)\z/ni
142
- return id_response
143
- when /\A(?:LIST|LSUB|XLIST)\z/ni
144
- return list_response
145
- when /\A(?:NAMESPACE)\z/ni
146
- return namespace_response
147
- when /\A(?:QUOTA)\z/ni
148
- return getquota_response
149
- when /\A(?:QUOTAROOT)\z/ni
150
- return getquotaroot_response
151
- when /\A(?:ACL)\z/ni
152
- return getacl_response
153
- when /\A(?:SEARCH|SORT)\z/ni
154
- return search_response
155
- when /\A(?:THREAD)\z/ni
156
- return thread_response
157
- when /\A(?:STATUS)\z/ni
158
- return status_response
159
- when /\A(?:CAPABILITY)\z/ni
160
- return capability_response
161
- when /\A(?:NOOP)\z/ni
162
- return ignored_response
163
- else
164
- return text_response
423
+ def astring?
424
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
425
+ end
426
+
427
+ # Use #label or #label_in to assert specific known labels
428
+ # (+tagged-ext-label+ only, not +atom+).
429
+ def label(word)
430
+ (val = tagged_ext_label) == word and return val
431
+ parse_error("unexpected atom %p, expected %p instead", val, word)
432
+ end
433
+
434
+ # Use #label or #label_in to assert specific known labels
435
+ # (+tagged-ext-label+ only, not +atom+).
436
+ def label_in(*labels)
437
+ lbl = tagged_ext_label and labels.include?(lbl) and return lbl
438
+ parse_error("unexpected atom %p, expected one of %s instead",
439
+ lbl, labels.join(" or "))
440
+ end
441
+
442
+ # expects "OK" or "PREAUTH" and raises InvalidResponseError on failure
443
+ def resp_cond_auth__name
444
+ lbl = tagged_ext_label and AUTH_CONDS.include? lbl and return lbl
445
+ raise InvalidResponseError, "bad response type %p, expected %s" % [
446
+ lbl, AUTH_CONDS.join(" or ")
447
+ ]
448
+ end
449
+
450
+ # expects "OK" or "NO" or "BAD" and raises InvalidResponseError on failure
451
+ def resp_cond_state__name
452
+ lbl = tagged_ext_label and RESP_COND_STATES.include? lbl and return lbl
453
+ raise InvalidResponseError, "bad response type %p, expected %s" % [
454
+ lbl, RESP_COND_STATES.join(" or ")
455
+ ]
456
+ end
457
+
458
+ # nstring = string / nil
459
+ def nstring
460
+ NIL? ? nil : string
461
+ end
462
+
463
+ def nquoted
464
+ NIL? ? nil : quoted
465
+ end
466
+
467
+ # use where nstring represents "LABEL" values
468
+ def case_insensitive__nstring
469
+ NIL? ? nil : case_insensitive__string
470
+ end
471
+
472
+ # valid number ranges are not enforced by parser
473
+ # number64 = 1*DIGIT
474
+ # ; Unsigned 63-bit integer
475
+ # ; (0 <= n <= 9,223,372,036,854,775,807)
476
+ alias number64 number
477
+ alias number64? number?
478
+
479
+ # valid number ranges are not enforced by parser
480
+ # nz-number = digit-nz *DIGIT
481
+ # ; Non-zero unsigned 32-bit integer
482
+ # ; (0 < n < 4,294,967,296)
483
+ alias nz_number number
484
+ alias nz_number? number?
485
+
486
+ # valid number ranges are not enforced by parser
487
+ # nz-number64 = digit-nz *DIGIT
488
+ # ; Unsigned 63-bit integer
489
+ # ; (0 < n <= 9,223,372,036,854,775,807)
490
+ alias nz_number64 nz_number
491
+
492
+ # valid number ranges are not enforced by parser
493
+ # uniqueid = nz-number
494
+ # ; Strictly ascending
495
+ alias uniqueid nz_number
496
+
497
+ # [RFC3501 & RFC9051:]
498
+ # response = *(continue-req / response-data) response-done
499
+ #
500
+ # For simplicity, response isn't interpreted as the combination of the
501
+ # three response types, but instead represents any individual server
502
+ # response. Our simplified interpretation is defined as:
503
+ # response = continue-req | response_data | response-tagged
504
+ #
505
+ # n.b: our "response-tagged" definition parses "greeting" too.
506
+ def response
507
+ resp = case lookahead!(T_PLUS, T_STAR, *TAG_TOKENS).symbol
508
+ when T_PLUS then continue_req
509
+ when T_STAR then response_data
510
+ else response_tagged
511
+ end
512
+ accept_spaces # QUIRKY: Ignore trailing space (MS Exchange Server?)
513
+ CRLF!
514
+ EOF!
515
+ resp
516
+ end
517
+
518
+ # RFC3501 & RFC9051:
519
+ # continue-req = "+" SP (resp-text / base64) CRLF
520
+ #
521
+ # n.b: base64 is valid resp-text. And in the spirit of RFC9051 Appx E 23
522
+ # (and to workaround existing servers), we use the following grammar:
523
+ #
524
+ # continue-req = "+" (SP (resp-text)) CRLF
525
+ def continue_req
526
+ PLUS!
527
+ ContinuationRequest.new(SP? ? resp_text : ResponseText::EMPTY, @str)
528
+ end
529
+
530
+ RE_RESPONSE_TYPE = /\G(?:\d+ )?(?<type>#{Patterns::TAGGED_EXT_LABEL})/n
531
+
532
+ # [RFC3501:]
533
+ # response-data = "*" SP (resp-cond-state / resp-cond-bye /
534
+ # mailbox-data / message-data / capability-data) CRLF
535
+ # [RFC4466:]
536
+ # response-data = "*" SP response-payload CRLF
537
+ # response-payload = resp-cond-state / resp-cond-bye /
538
+ # mailbox-data / message-data / capability-data
539
+ # RFC5161 (ENABLE capability):
540
+ # response-data =/ "*" SP enable-data CRLF
541
+ # RFC5255 (LANGUAGE capability)
542
+ # response-payload =/ language-data
543
+ # RFC5255 (I18NLEVEL=1 and I18NLEVEL=2 capabilities)
544
+ # response-payload =/ comparator-data
545
+ # [RFC9051:]
546
+ # response-data = "*" SP (resp-cond-state / resp-cond-bye /
547
+ # mailbox-data / message-data / capability-data /
548
+ # enable-data) CRLF
549
+ #
550
+ # [merging in greeting and response-fatal:]
551
+ # greeting = "*" SP (resp-cond-auth / resp-cond-bye) CRLF
552
+ # response-fatal = "*" SP resp-cond-bye CRLF
553
+ # response-data =/ "*" SP (resp-cond-auth / resp-cond-bye) CRLF
554
+ # [removing duplicates, this is simply]
555
+ # response-payload =/ resp-cond-auth
556
+ #
557
+ # TODO: remove resp-cond-auth and handle greeting separately
558
+ def response_data
559
+ STAR!; SP!
560
+ m = peek_re(RE_RESPONSE_TYPE) or parse_error("unparsable response")
561
+ case m["type"].upcase
562
+ when "OK" then resp_cond_state__untagged # RFC3501, RFC9051
563
+ when "FETCH" then message_data__fetch # RFC3501, RFC9051
564
+ when "EXPUNGE" then message_data__expunge # RFC3501, RFC9051
565
+ when "EXISTS" then mailbox_data__exists # RFC3501, RFC9051
566
+ when "ESEARCH" then esearch_response # RFC4731, RFC9051, etc
567
+ when "VANISHED" then expunged_resp # RFC7162
568
+ when "UIDFETCH" then uidfetch_resp # (draft) UIDONLY
569
+ when "SEARCH" then mailbox_data__search # RFC3501 (obsolete)
570
+ when "CAPABILITY" then capability_data__untagged # RFC3501, RFC9051
571
+ when "FLAGS" then mailbox_data__flags # RFC3501, RFC9051
572
+ when "LIST" then mailbox_data__list # RFC3501, RFC9051
573
+ when "STATUS" then mailbox_data__status # RFC3501, RFC9051
574
+ when "NAMESPACE" then namespace_response # RFC2342, RFC9051
575
+ when "ENABLED" then enable_data # RFC5161, RFC9051
576
+ when "BAD" then resp_cond_state__untagged # RFC3501, RFC9051
577
+ when "NO" then resp_cond_state__untagged # RFC3501, RFC9051
578
+ when "PREAUTH" then resp_cond_auth # RFC3501, RFC9051
579
+ when "BYE" then resp_cond_bye # RFC3501, RFC9051
580
+ when "RECENT" then mailbox_data__recent # RFC3501 (obsolete)
581
+ when "SORT" then sort_data # RFC5256, RFC7162
582
+ when "THREAD" then thread_data # RFC5256
583
+ when "QUOTA" then quota_response # RFC2087, RFC9208
584
+ when "QUOTAROOT" then quotaroot_response # RFC2087, RFC9208
585
+ when "ID" then id_response # RFC2971
586
+ when "ACL" then acl_data # RFC4314
587
+ when "LISTRIGHTS" then listrights_data # RFC4314
588
+ when "MYRIGHTS" then myrights_data # RFC4314
589
+ when "METADATA" then metadata_resp # RFC5464
590
+ when "LANGUAGE" then language_data # RFC5255
591
+ when "COMPARATOR" then comparator_data # RFC5255
592
+ when "CONVERTED" then message_data__converted # RFC5259
593
+ when "LSUB" then mailbox_data__lsub # RFC3501 (obsolete)
594
+ when "XLIST" then mailbox_data__xlist # deprecated
595
+ when "NOOP" then response_data__noop
596
+ else response_data__unhandled
597
+ end
598
+ end
599
+
600
+ def response_data__unhandled(klass = UntaggedResponse)
601
+ num = number?; SP?
602
+ type = tagged_ext_label; SP?
603
+ text = remaining_unparsed
604
+ data =
605
+ if num && text then UnparsedNumericResponseData.new(num, text)
606
+ elsif text then UnparsedData.new(text)
607
+ else num
165
608
  end
166
- else
167
- parse_error("unexpected token %s", token.symbol)
168
- end
609
+ klass.new(type, data, @str)
610
+ end
611
+
612
+ # reads all the way up until CRLF
613
+ def remaining_unparsed
614
+ str = @str[@pos...-2] and @pos += str.bytesize
615
+ str&.empty? ? nil : str
169
616
  end
170
617
 
618
+ def response_data__ignored; response_data__unhandled(IgnoredResponse) end
619
+ alias response_data__noop response_data__ignored
620
+
621
+ alias esearch_response response_data__unhandled
622
+ alias expunged_resp response_data__unhandled
623
+ alias uidfetch_resp response_data__unhandled
624
+ alias listrights_data response_data__unhandled
625
+ alias myrights_data response_data__unhandled
626
+ alias metadata_resp response_data__unhandled
627
+ alias language_data response_data__unhandled
628
+ alias comparator_data response_data__unhandled
629
+ alias message_data__converted response_data__unhandled
630
+
631
+ # RFC3501 & RFC9051:
632
+ # response-tagged = tag SP resp-cond-state CRLF
633
+ #
634
+ # resp-cond-state = ("OK" / "NO" / "BAD") SP resp-text
635
+ # ; Status condition
636
+ #
637
+ # tag = 1*<any ASTRING-CHAR except "+">
171
638
  def response_tagged
172
- tag = astring_chars
173
- match(T_SPACE)
174
- token = match(T_ATOM)
175
- name = token.value.upcase
176
- match(T_SPACE)
177
- return TaggedResponse.new(tag, name, resp_text, @str)
639
+ tag = tag(); SP!
640
+ name = resp_cond_state__name; SP!
641
+ TaggedResponse.new(tag, name, resp_text, @str)
178
642
  end
179
643
 
180
- def response_cond
181
- token = match(T_ATOM)
182
- name = token.value.upcase
183
- match(T_SPACE)
184
- return UntaggedResponse.new(name, resp_text, @str)
644
+ # RFC3501 & RFC9051:
645
+ # resp-cond-state = ("OK" / "NO" / "BAD") SP resp-text
646
+ def resp_cond_state__untagged
647
+ name = resp_cond_state__name; SP!
648
+ UntaggedResponse.new(name, resp_text, @str)
185
649
  end
186
650
 
187
- def numeric_response
188
- n = number
189
- match(T_SPACE)
190
- token = match(T_ATOM)
191
- name = token.value.upcase
192
- case name
193
- when "EXISTS", "RECENT", "EXPUNGE"
194
- return UntaggedResponse.new(name, n, @str)
195
- when "FETCH"
196
- shift_token
197
- match(T_SPACE)
198
- data = FetchData.new(n, msg_att(n))
199
- return UntaggedResponse.new(name, data, @str)
200
- end
651
+ # resp-cond-auth = ("OK" / "PREAUTH") SP resp-text
652
+ def resp_cond_auth
653
+ name = resp_cond_auth__name; SP!
654
+ UntaggedResponse.new(name, resp_text, @str)
655
+ end
656
+
657
+ # resp-cond-bye = "BYE" SP resp-text
658
+ def resp_cond_bye
659
+ name = label(BYE); SP!
660
+ UntaggedResponse.new(name, resp_text, @str)
201
661
  end
202
662
 
663
+ # message-data = nz-number SP ("EXPUNGE" / ("FETCH" SP msg-att))
664
+ def message_data__fetch
665
+ seq = nz_number; SP!
666
+ name = label "FETCH"; SP!
667
+ data = FetchData.new(seq, msg_att(seq))
668
+ UntaggedResponse.new(name, data, @str)
669
+ end
670
+
671
+ def response_data__simple_numeric
672
+ data = nz_number; SP!
673
+ name = tagged_ext_label
674
+ UntaggedResponse.new(name, data, @str)
675
+ end
676
+
677
+ alias message_data__expunge response_data__simple_numeric
678
+ alias mailbox_data__exists response_data__simple_numeric
679
+ alias mailbox_data__recent response_data__simple_numeric
680
+
681
+ # RFC3501 & RFC9051:
682
+ # msg-att = "(" (msg-att-dynamic / msg-att-static)
683
+ # *(SP (msg-att-dynamic / msg-att-static)) ")"
684
+ #
685
+ # msg-att-dynamic = "FLAGS" SP "(" [flag-fetch *(SP flag-fetch)] ")"
686
+ # RFC5257 (ANNOTATE extension):
687
+ # msg-att-dynamic =/ "ANNOTATION" SP
688
+ # ( "(" entry-att *(SP entry-att) ")" /
689
+ # "(" entry *(SP entry) ")" )
690
+ # RFC7162 (CONDSTORE extension):
691
+ # msg-att-dynamic =/ fetch-mod-resp
692
+ # fetch-mod-resp = "MODSEQ" SP "(" permsg-modsequence ")"
693
+ # RFC8970 (PREVIEW extension):
694
+ # msg-att-dynamic =/ "PREVIEW" SP nstring
695
+ #
696
+ # RFC3501:
697
+ # msg-att-static = "ENVELOPE" SP envelope /
698
+ # "INTERNALDATE" SP date-time /
699
+ # "RFC822" [".HEADER" / ".TEXT"] SP nstring /
700
+ # "RFC822.SIZE" SP number /
701
+ # "BODY" ["STRUCTURE"] SP body /
702
+ # "BODY" section ["<" number ">"] SP nstring /
703
+ # "UID" SP uniqueid
704
+ # RFC3516 (BINARY extension):
705
+ # msg-att-static =/ "BINARY" section-binary SP (nstring / literal8)
706
+ # / "BINARY.SIZE" section-binary SP number
707
+ # RFC8514 (SAVEDATE extension):
708
+ # msg-att-static =/ "SAVEDATE" SP (date-time / nil)
709
+ # RFC8474 (OBJECTID extension):
710
+ # msg-att-static =/ fetch-emailid-resp / fetch-threadid-resp
711
+ # fetch-emailid-resp = "EMAILID" SP "(" objectid ")"
712
+ # fetch-threadid-resp = "THREADID" SP ( "(" objectid ")" / nil )
713
+ # RFC9051:
714
+ # msg-att-static = "ENVELOPE" SP envelope /
715
+ # "INTERNALDATE" SP date-time /
716
+ # "RFC822.SIZE" SP number64 /
717
+ # "BODY" ["STRUCTURE"] SP body /
718
+ # "BODY" section ["<" number ">"] SP nstring /
719
+ # "BINARY" section-binary SP (nstring / literal8) /
720
+ # "BINARY.SIZE" section-binary SP number /
721
+ # "UID" SP uniqueid
722
+ #
723
+ # Re https://www.rfc-editor.org/errata/eid7246, I'm adding "offset" to the
724
+ # official "BINARY" ABNF, like so:
725
+ #
726
+ # msg-att-static =/ "BINARY" section-binary ["<" number ">"] SP
727
+ # (nstring / literal8)
203
728
  def msg_att(n)
204
- match(T_LPAR)
729
+ lpar
205
730
  attr = {}
206
731
  while true
207
- token = lookahead
208
- case token.symbol
209
- when T_RPAR
210
- shift_token
211
- break
212
- when T_SPACE
213
- shift_token
214
- next
215
- end
216
- case token.value
217
- when /\A(?:ENVELOPE)\z/ni
218
- name, val = envelope_data
219
- when /\A(?:FLAGS)\z/ni
220
- name, val = flags_data
221
- when /\A(?:INTERNALDATE)\z/ni
222
- name, val = internaldate_data
223
- when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
224
- name, val = rfc822_text
225
- when /\A(?:RFC822\.SIZE)\z/ni
226
- name, val = rfc822_size
227
- when /\A(?:BODY(?:STRUCTURE)?)\z/ni
228
- name, val = body_data
229
- when /\A(?:UID)\z/ni
230
- name, val = uid_data
231
- when /\A(?:MODSEQ)\z/ni
232
- name, val = modseq_data
233
- else
234
- parse_error("unknown attribute `%s' for {%d}", token.value, n)
235
- end
732
+ name = msg_att__label; SP!
733
+ val =
734
+ case name
735
+ when "UID" then uniqueid
736
+ when "FLAGS" then flag_list
737
+ when "BODY" then body
738
+ when /\ABODY\[/ni then nstring
739
+ when "BODYSTRUCTURE" then body
740
+ when "ENVELOPE" then envelope
741
+ when "INTERNALDATE" then date_time
742
+ when "RFC822.SIZE" then number64
743
+ when "RFC822" then nstring # not in rev2
744
+ when "RFC822.HEADER" then nstring # not in rev2
745
+ when "RFC822.TEXT" then nstring # not in rev2
746
+ when "MODSEQ" then parens__modseq # CONDSTORE
747
+ else parse_error("unknown attribute `%s' for {%d}", name, n)
748
+ end
236
749
  attr[name] = val
750
+ break unless SP?
751
+ break if lookahead_rpar?
237
752
  end
238
- return attr
753
+ rpar
754
+ attr
239
755
  end
240
756
 
241
- def envelope_data
242
- token = match(T_ATOM)
243
- name = token.value.upcase
244
- match(T_SPACE)
245
- return name, envelope
757
+ # appends "[section]" and "<partial>" to the base label
758
+ def msg_att__label
759
+ case (name = tagged_ext_label)
760
+ when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
761
+ # ignoring "[]" fixes https://bugs.ruby-lang.org/issues/5620
762
+ lbra? and rbra
763
+ when "BODY"
764
+ peek_lbra? and name << section and
765
+ peek_str?("<") and name << atom # partial
766
+ end
767
+ name
246
768
  end
247
769
 
248
770
  def envelope
@@ -280,482 +802,351 @@ module Net
280
802
  return result
281
803
  end
282
804
 
283
- def flags_data
284
- token = match(T_ATOM)
285
- name = token.value.upcase
286
- match(T_SPACE)
287
- return name, flag_list
288
- end
289
-
290
- def internaldate_data
291
- token = match(T_ATOM)
292
- name = token.value.upcase
293
- match(T_SPACE)
294
- token = match(T_QUOTED)
295
- return name, token.value
296
- end
297
-
298
- def rfc822_text
299
- token = match(T_ATOM)
300
- name = token.value.upcase
301
- token = lookahead
302
- if token.symbol == T_LBRA
303
- shift_token
304
- match(T_RBRA)
305
- end
306
- match(T_SPACE)
307
- return name, nstring
308
- end
309
-
310
- def rfc822_size
311
- token = match(T_ATOM)
312
- name = token.value.upcase
313
- match(T_SPACE)
314
- return name, number
315
- end
316
-
317
- def body_data
318
- token = match(T_ATOM)
319
- name = token.value.upcase
320
- token = lookahead
321
- if token.symbol == T_SPACE
322
- shift_token
323
- return name, body
324
- end
325
- name.concat(section)
326
- token = lookahead
327
- if token.symbol == T_ATOM
328
- name.concat(token.value)
329
- shift_token
330
- end
331
- match(T_SPACE)
332
- data = nstring
333
- return name, data
334
- end
805
+ # date-time = DQUOTE date-day-fixed "-" date-month "-" date-year
806
+ # SP time SP zone DQUOTE
807
+ alias date_time quoted
808
+ alias ndatetime nquoted
335
809
 
810
+ # RFC-3501 & RFC-9051:
811
+ # body = "(" (body-type-1part / body-type-mpart) ")"
336
812
  def body
337
813
  @lex_state = EXPR_DATA
338
- token = lookahead
339
- if token.symbol == T_NIL
340
- shift_token
341
- result = nil
342
- else
343
- match(T_LPAR)
344
- token = lookahead
345
- if token.symbol == T_LPAR
346
- result = body_type_mpart
347
- else
348
- result = body_type_1part
349
- end
350
- match(T_RPAR)
351
- end
814
+ lpar; result = peek_lpar? ? body_type_mpart : body_type_1part; rpar
815
+ result
816
+ ensure
352
817
  @lex_state = EXPR_BEG
353
- return result
354
818
  end
819
+ alias lookahead_body? lookahead_lpar?
355
820
 
821
+ # RFC-3501 & RFC9051:
822
+ # body-type-1part = (body-type-basic / body-type-msg / body-type-text)
823
+ # [SP body-ext-1part]
356
824
  def body_type_1part
357
- token = lookahead
358
- case token.value
359
- when /\A(?:TEXT)\z/ni
360
- return body_type_text
361
- when /\A(?:MESSAGE)\z/ni
362
- return body_type_msg
363
- when /\A(?:ATTACHMENT)\z/ni
364
- return body_type_attachment
365
- when /\A(?:MIXED)\z/ni
366
- return body_type_mixed
367
- else
368
- return body_type_basic
369
- end
370
- end
371
-
825
+ # This regexp peek is a performance optimization.
826
+ # The lookahead fallback would work fine too.
827
+ m = peek_re(/\G(?:
828
+ (?<TEXT> "TEXT" \s "[^"]+" )
829
+ |(?<MESSAGE> "MESSAGE" \s "(?:RFC822|GLOBAL)" )
830
+ |(?<BASIC> "[^"]+" \s "[^"]+" )
831
+ |(?<MIXED> "MIXED" )
832
+ )/nix)
833
+ choice = m&.named_captures&.compact&.keys&.first
834
+ # In practice, the following line should never be used. But the ABNF
835
+ # *does* allow literals, and this will handle them.
836
+ choice ||= lookahead_case_insensitive__string!
837
+ case choice
838
+ when "BASIC" then body_type_basic # => BodyTypeBasic
839
+ when "MESSAGE" then body_type_msg # => BodyTypeMessage | BodyTypeBasic
840
+ when "TEXT" then body_type_text # => BodyTypeText
841
+ when "MIXED" then body_type_mixed # => BodyTypeMultipart (server bug)
842
+ else body_type_basic # might be a bug; server's or ours?
843
+ end
844
+ end
845
+
846
+ # RFC-3501 & RFC9051:
847
+ # body-type-basic = media-basic SP body-fields
372
848
  def body_type_basic
373
- mtype, msubtype = media_type
374
- token = lookahead
375
- if token.symbol == T_RPAR
376
- return BodyTypeBasic.new(mtype, msubtype)
377
- end
378
- match(T_SPACE)
379
- param, content_id, desc, enc, size = body_fields
380
- md5, disposition, language, extension = body_ext_1part
381
- return BodyTypeBasic.new(mtype, msubtype,
382
- param, content_id,
383
- desc, enc, size,
384
- md5, disposition, language, extension)
849
+ type = media_basic # n.b. "basic" type isn't enforced here
850
+ if lookahead_rpar? then return BodyTypeBasic.new(*type) end # invalid
851
+ SP!; flds = body_fields
852
+ SP? and exts = body_ext_1part
853
+ BodyTypeBasic.new(*type, *flds, *exts)
385
854
  end
386
855
 
856
+ # RFC-3501 & RFC-9051:
857
+ # body-type-text = media-text SP body-fields SP body-fld-lines
387
858
  def body_type_text
388
- mtype, msubtype = media_type
389
- match(T_SPACE)
390
- param, content_id, desc, enc, size = body_fields
391
- match(T_SPACE)
392
- lines = number
393
- md5, disposition, language, extension = body_ext_1part
394
- return BodyTypeText.new(mtype, msubtype,
395
- param, content_id,
396
- desc, enc, size,
397
- lines,
398
- md5, disposition, language, extension)
859
+ type = media_text
860
+ SP!; flds = body_fields
861
+ SP!; lines = body_fld_lines
862
+ SP? and exts = body_ext_1part
863
+ BodyTypeText.new(*type, *flds, lines, *exts)
399
864
  end
400
865
 
866
+ # RFC-3501 & RFC-9051:
867
+ # body-type-msg = media-message SP body-fields SP envelope
868
+ # SP body SP body-fld-lines
401
869
  def body_type_msg
402
- mtype, msubtype = media_type
403
- match(T_SPACE)
404
- param, content_id, desc, enc, size = body_fields
405
-
406
- token = lookahead
407
- if token.symbol == T_RPAR
408
- # If this is not message/rfc822, we shouldn't apply the RFC822
409
- # spec to it. We should handle anything other than
410
- # message/rfc822 using multipart extension data [rfc3501] (i.e.
411
- # the data itself won't be returned, we would have to retrieve it
412
- # with BODYSTRUCTURE instead of with BODY
413
-
414
- # Also, sometimes a message/rfc822 is included as a large
415
- # attachment instead of having all of the other details
416
- # (e.g. attaching a .eml file to an email)
417
- if msubtype == "RFC822"
418
- return BodyTypeMessage.new(mtype, msubtype, param, content_id,
419
- desc, enc, size, nil, nil, nil, nil,
420
- nil, nil, nil)
421
- else
422
- return BodyTypeExtension.new(mtype, msubtype,
423
- param, content_id,
424
- desc, enc, size)
425
- end
426
- end
427
-
428
- match(T_SPACE)
429
- env = envelope
430
- match(T_SPACE)
431
- b = body
432
- match(T_SPACE)
433
- lines = number
434
- md5, disposition, language, extension = body_ext_1part
435
- return BodyTypeMessage.new(mtype, msubtype,
436
- param, content_id,
437
- desc, enc, size,
438
- env, b, lines,
439
- md5, disposition, language, extension)
440
- end
441
-
442
- def body_type_attachment
443
- mtype = case_insensitive_string
444
- match(T_SPACE)
445
- param = body_fld_param
446
- return BodyTypeAttachment.new(mtype, nil, param)
447
- end
448
-
870
+ # n.b. "message/rfc822" type isn't enforced here
871
+ type = media_message
872
+ SP!; flds = body_fields
873
+
874
+ # Sometimes servers send body-type-basic when body-type-msg should be.
875
+ # E.g: when a message/rfc822 part has "Content-Disposition: attachment".
876
+ #
877
+ # * SP "(" --> SP envelope --> continue as body-type-msg
878
+ # * ")" --> no body-ext-1part --> completed body-type-basic
879
+ # * SP nstring --> SP body-fld-md5
880
+ # --> SP body-ext-1part --> continue as body-type-basic
881
+ #
882
+ # It's probably better to return BodyTypeBasic---even for
883
+ # "message/rfc822"---than BodyTypeMessage with invalid fields.
884
+ unless peek_str?(" (")
885
+ SP? and exts = body_ext_1part
886
+ return BodyTypeBasic.new(*type, *flds, *exts)
887
+ end
888
+
889
+ SP!; env = envelope
890
+ SP!; bdy = body
891
+ SP!; lines = body_fld_lines
892
+ SP? and exts = body_ext_1part
893
+ BodyTypeMessage.new(*type, *flds, env, bdy, lines, *exts)
894
+ end
895
+
896
+ # This is a malformed body-type-mpart with no subparts.
449
897
  def body_type_mixed
450
- mtype = "MULTIPART"
451
- msubtype = case_insensitive_string
452
- param, disposition, language, extension = body_ext_mpart
453
- return BodyTypeBasic.new(mtype, msubtype, param, nil, nil, nil, nil, nil, disposition, language, extension)
898
+ # warn "malformed body-type-mpart: multipart/mixed with no parts."
899
+ type = media_subtype # => "MIXED"
900
+ SP? and exts = body_ext_mpart
901
+ BodyTypeMultipart.new("MULTIPART", type, nil, *exts)
454
902
  end
455
903
 
904
+ # RFC-3501 & RFC-9051:
905
+ # body-type-mpart = 1*body SP media-subtype
906
+ # [SP body-ext-mpart]
456
907
  def body_type_mpart
457
- parts = []
458
- while true
459
- token = lookahead
460
- if token.symbol == T_SPACE
461
- shift_token
462
- break
463
- end
464
- parts.push(body)
465
- end
466
- mtype = "MULTIPART"
467
- msubtype = case_insensitive_string
468
- param, disposition, language, extension = body_ext_mpart
469
- return BodyTypeMultipart.new(mtype, msubtype, parts,
470
- param, disposition, language,
471
- extension)
908
+ parts = [body]; parts << body until SP?; msubtype = media_subtype
909
+ SP? and exts = body_ext_mpart
910
+ BodyTypeMultipart.new("MULTIPART", msubtype, parts, *exts)
472
911
  end
473
912
 
913
+ # n.b. this handles both type and subtype
914
+ #
915
+ # RFC-3501 vs RFC-9051:
916
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
917
+ # "MESSAGE" /
918
+ # "VIDEO") DQUOTE) / string) SP media-subtype
919
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
920
+ # "FONT" / "MESSAGE" / "MODEL" /
921
+ # "VIDEO") DQUOTE) / string) SP media-subtype
922
+ #
923
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
924
+ # DQUOTE "RFC822" DQUOTE
925
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
926
+ # DQUOTE ("RFC822" / "GLOBAL") DQUOTE
927
+ #
928
+ # RFC-3501 & RFC-9051:
929
+ # media-text = DQUOTE "TEXT" DQUOTE SP media-subtype
930
+ # media-subtype = string
474
931
  def media_type
475
- mtype = case_insensitive_string
476
- token = lookahead
477
- if token.symbol != T_SPACE
478
- return mtype, nil
479
- end
480
- match(T_SPACE)
481
- msubtype = case_insensitive_string
932
+ mtype = case_insensitive__string
933
+ SP? or return mtype, nil # ??? quirky!
934
+ msubtype = media_subtype
482
935
  return mtype, msubtype
483
936
  end
484
937
 
938
+ # TODO: check types
939
+ alias media_basic media_type # */* --- catchall
940
+ alias media_message media_type # message/rfc822, message/global
941
+ alias media_text media_type # text/*
942
+
943
+ alias media_subtype case_insensitive__string
944
+
945
+ # RFC-3501 & RFC-9051:
946
+ # body-fields = body-fld-param SP body-fld-id SP body-fld-desc SP
947
+ # body-fld-enc SP body-fld-octets
485
948
  def body_fields
486
- param = body_fld_param
487
- match(T_SPACE)
488
- content_id = nstring
489
- match(T_SPACE)
490
- desc = nstring
491
- match(T_SPACE)
492
- enc = case_insensitive_string
493
- match(T_SPACE)
494
- size = number
495
- return param, content_id, desc, enc, size
949
+ fields = []
950
+ fields << body_fld_param; SP!
951
+ fields << body_fld_id; SP!
952
+ fields << body_fld_desc; SP!
953
+ fields << body_fld_enc; SP!
954
+ fields << body_fld_octets
955
+ fields
496
956
  end
497
957
 
958
+ # RFC3501, RFC9051:
959
+ # body-fld-param = "(" string SP string *(SP string SP string) ")" / nil
498
960
  def body_fld_param
499
- token = lookahead
500
- if token.symbol == T_NIL
501
- shift_token
502
- return nil
503
- end
504
- match(T_LPAR)
961
+ return if NIL?
505
962
  param = {}
506
- while true
507
- token = lookahead
508
- case token.symbol
509
- when T_RPAR
510
- shift_token
511
- break
512
- when T_SPACE
513
- shift_token
514
- end
515
- name = case_insensitive_string
516
- match(T_SPACE)
517
- val = string
518
- param[name] = val
519
- end
520
- return param
521
- end
522
-
963
+ lpar
964
+ name = case_insensitive__string; SP!; param[name] = string
965
+ while SP?
966
+ name = case_insensitive__string; SP!; param[name] = string
967
+ end
968
+ rpar
969
+ param
970
+ end
971
+
972
+ # RFC2060
973
+ # body_ext_1part ::= body_fld_md5 [SPACE body_fld_dsp
974
+ # [SPACE body_fld_lang
975
+ # [SPACE 1#body_extension]]]
976
+ # ;; MUST NOT be returned on non-extensible
977
+ # ;; "BODY" fetch
978
+ # RFC3501 & RFC9051
979
+ # body-ext-1part = body-fld-md5 [SP body-fld-dsp [SP body-fld-lang
980
+ # [SP body-fld-loc *(SP body-extension)]]]
981
+ # ; MUST NOT be returned on non-extensible
982
+ # ; "BODY" fetch
523
983
  def body_ext_1part
524
- token = lookahead
525
- if token.symbol == T_SPACE
526
- shift_token
527
- else
528
- return nil
529
- end
530
- md5 = nstring
531
-
532
- token = lookahead
533
- if token.symbol == T_SPACE
534
- shift_token
535
- else
536
- return md5
537
- end
538
- disposition = body_fld_dsp
539
-
540
- token = lookahead
541
- if token.symbol == T_SPACE
542
- shift_token
543
- else
544
- return md5, disposition
545
- end
546
- language = body_fld_lang
547
-
548
- token = lookahead
549
- if token.symbol == T_SPACE
550
- shift_token
551
- else
552
- return md5, disposition, language
553
- end
554
-
555
- extension = body_extensions
556
- return md5, disposition, language, extension
557
- end
558
-
559
- def body_ext_mpart
560
- token = lookahead
561
- if token.symbol == T_SPACE
562
- shift_token
563
- else
564
- return nil
565
- end
566
- param = body_fld_param
567
-
568
- token = lookahead
569
- if token.symbol == T_SPACE
570
- shift_token
571
- else
572
- return param
573
- end
574
- disposition = body_fld_dsp
575
-
576
- token = lookahead
577
- if token.symbol == T_SPACE
578
- shift_token
579
- else
580
- return param, disposition
581
- end
582
- language = body_fld_lang
583
-
584
- token = lookahead
585
- if token.symbol == T_SPACE
586
- shift_token
587
- else
588
- return param, disposition, language
589
- end
590
-
591
- extension = body_extensions
592
- return param, disposition, language, extension
593
- end
594
-
595
- def body_fld_dsp
596
- token = lookahead
597
- if token.symbol == T_NIL
598
- shift_token
599
- return nil
600
- end
601
- match(T_LPAR)
602
- dsp_type = case_insensitive_string
603
- match(T_SPACE)
604
- param = body_fld_param
605
- match(T_RPAR)
606
- return ContentDisposition.new(dsp_type, param)
607
- end
608
-
609
- def body_fld_lang
610
- token = lookahead
611
- if token.symbol == T_LPAR
612
- shift_token
613
- result = []
614
- while true
615
- token = lookahead
616
- case token.symbol
617
- when T_RPAR
618
- shift_token
619
- return result
620
- when T_SPACE
621
- shift_token
622
- end
623
- result.push(case_insensitive_string)
624
- end
625
- else
626
- lang = nstring
627
- if lang
628
- return lang.upcase
629
- else
630
- return lang
631
- end
632
- end
633
- end
634
-
635
- def body_extensions
636
- result = []
637
- while true
638
- token = lookahead
639
- case token.symbol
640
- when T_RPAR
641
- return result
642
- when T_SPACE
643
- shift_token
644
- end
645
- result.push(body_extension)
646
- end
647
- end
648
-
649
- def body_extension
650
- token = lookahead
651
- case token.symbol
652
- when T_LPAR
653
- shift_token
654
- result = body_extensions
655
- match(T_RPAR)
656
- return result
657
- when T_NUMBER
658
- return number
659
- else
660
- return nstring
661
- end
662
- end
663
-
664
- def section
665
- str = String.new
666
- token = match(T_LBRA)
667
- str.concat(token.value)
668
- token = match(T_ATOM, T_NUMBER, T_RBRA)
669
- if token.symbol == T_RBRA
670
- str.concat(token.value)
671
- return str
672
- end
673
- str.concat(token.value)
674
- token = lookahead
675
- if token.symbol == T_SPACE
676
- shift_token
677
- str.concat(token.value)
678
- token = match(T_LPAR)
679
- str.concat(token.value)
680
- while true
681
- token = lookahead
682
- case token.symbol
683
- when T_RPAR
684
- str.concat(token.value)
685
- shift_token
686
- break
687
- when T_SPACE
688
- shift_token
689
- str.concat(token.value)
690
- end
691
- str.concat(format_string(astring))
692
- end
693
- end
694
- token = match(T_RBRA)
695
- str.concat(token.value)
696
- return str
697
- end
698
-
699
- def format_string(str)
700
- case str
701
- when ""
702
- return '""'
703
- when /[\x80-\xff\r\n]/n
704
- # literal
705
- return "{" + str.bytesize.to_s + "}" + CRLF + str
706
- when /[(){ \x00-\x1f\x7f%*"\\]/n
707
- # quoted string
708
- return '"' + str.gsub(/["\\]/n, "\\\\\\&") + '"'
709
- else
710
- # atom
711
- return str
712
- end
984
+ fields = []; fields << body_fld_md5
985
+ SP? or return fields; fields << body_fld_dsp
986
+ SP? or return fields; fields << body_fld_lang
987
+ SP? or return fields; fields << body_fld_loc
988
+ SP? or return fields; fields << body_extensions
989
+ fields
990
+ end
991
+
992
+ # RFC-2060:
993
+ # body_ext_mpart = body_fld_param [SP body_fld_dsp SP body_fld_lang
994
+ # [SP 1#body_extension]]
995
+ # ;; MUST NOT be returned on non-extensible
996
+ # ;; "BODY" fetch
997
+ # RFC-3501 & RFC-9051:
998
+ # body-ext-mpart = body-fld-param [SP body-fld-dsp [SP body-fld-lang
999
+ # [SP body-fld-loc *(SP body-extension)]]]
1000
+ # ; MUST NOT be returned on non-extensible
1001
+ # ; "BODY" fetch
1002
+ def body_ext_mpart
1003
+ fields = []; fields << body_fld_param
1004
+ SP? or return fields; fields << body_fld_dsp
1005
+ SP? or return fields; fields << body_fld_lang
1006
+ SP? or return fields; fields << body_fld_loc
1007
+ SP? or return fields; fields << body_extensions
1008
+ fields
1009
+ end
1010
+
1011
+ alias body_fld_desc nstring
1012
+ alias body_fld_id nstring
1013
+ alias body_fld_loc nstring
1014
+ alias body_fld_lines number64 # number in 3501, number64 in 9051
1015
+ alias body_fld_md5 nstring
1016
+ alias body_fld_octets number
1017
+
1018
+ # RFC-3501 & RFC-9051:
1019
+ # body-fld-enc = (DQUOTE ("7BIT" / "8BIT" / "BINARY" / "BASE64"/
1020
+ # "QUOTED-PRINTABLE") DQUOTE) / string
1021
+ alias body_fld_enc case_insensitive__string
1022
+
1023
+ # body-fld-dsp = "(" string SP body-fld-param ")" / nil
1024
+ def body_fld_dsp
1025
+ return if NIL?
1026
+ lpar; dsp_type = case_insensitive__string
1027
+ SP!; param = body_fld_param
1028
+ rpar
1029
+ ContentDisposition.new(dsp_type, param)
713
1030
  end
714
1031
 
715
- def uid_data
716
- token = match(T_ATOM)
717
- name = token.value.upcase
718
- match(T_SPACE)
719
- return name, number
1032
+ # body-fld-lang = nstring / "(" string *(SP string) ")"
1033
+ def body_fld_lang
1034
+ if lpar?
1035
+ result = [case_insensitive__string]
1036
+ result << case_insensitive__string while SP?
1037
+ rpar
1038
+ result
1039
+ else
1040
+ case_insensitive__nstring
1041
+ end
720
1042
  end
721
1043
 
722
- def modseq_data
723
- token = match(T_ATOM)
724
- name = token.value.upcase
725
- match(T_SPACE)
726
- match(T_LPAR)
727
- modseq = number
728
- match(T_RPAR)
729
- return name, modseq
1044
+ # body-extension *(SP body-extension)
1045
+ def body_extensions
1046
+ result = []
1047
+ result << body_extension; while SP? do result << body_extension end
1048
+ result
730
1049
  end
731
1050
 
732
- def ignored_response
733
- while lookahead.symbol != T_CRLF
734
- shift_token
1051
+ # body-extension = nstring / number / number64 /
1052
+ # "(" body-extension *(SP body-extension) ")"
1053
+ # ; Future expansion. Client implementations
1054
+ # ; MUST accept body-extension fields. Server
1055
+ # ; implementations MUST NOT generate
1056
+ # ; body-extension fields except as defined by
1057
+ # ; future Standard or Standards Track
1058
+ # ; revisions of this specification.
1059
+ def body_extension
1060
+ if (uint = number64?) then uint
1061
+ elsif lpar? then exts = body_extensions; rpar; exts
1062
+ else nstring
735
1063
  end
736
- return IgnoredResponse.new(@str)
737
1064
  end
738
1065
 
739
- def text_response
740
- token = match(T_ATOM)
741
- name = token.value.upcase
742
- match(T_SPACE)
743
- return UntaggedResponse.new(name, text)
1066
+ # section = "[" [section-spec] "]"
1067
+ def section
1068
+ str = +lbra
1069
+ str << section_spec unless peek_rbra?
1070
+ str << rbra
1071
+ end
1072
+
1073
+ # section-spec = section-msgtext / (section-part ["." section-text])
1074
+ # section-msgtext = "HEADER" /
1075
+ # "HEADER.FIELDS" [".NOT"] SP header-list /
1076
+ # "TEXT"
1077
+ # ; top-level or MESSAGE/RFC822 or
1078
+ # ; MESSAGE/GLOBAL part
1079
+ # section-part = nz-number *("." nz-number)
1080
+ # ; body part reference.
1081
+ # ; Allows for accessing nested body parts.
1082
+ # section-text = section-msgtext / "MIME"
1083
+ # ; text other than actual body part (headers,
1084
+ # ; etc.)
1085
+ #
1086
+ # n.b: we could "cheat" here and just grab all text inside the brackets,
1087
+ # but literals would need special treatment.
1088
+ def section_spec
1089
+ str = "".b
1090
+ str << atom # grabs everything up to "SP header-list" or "]"
1091
+ str << " " << header_list if SP?
1092
+ str
744
1093
  end
745
1094
 
746
- def flags_response
747
- token = match(T_ATOM)
748
- name = token.value.upcase
749
- match(T_SPACE)
750
- return UntaggedResponse.new(name, flag_list, @str)
1095
+ # header-list = "(" header-fld-name *(SP header-fld-name) ")"
1096
+ def header_list
1097
+ str = +""
1098
+ str << lpar << header_fld_name
1099
+ str << " " << header_fld_name while SP?
1100
+ str << rpar
751
1101
  end
752
1102
 
753
- def list_response
754
- token = match(T_ATOM)
755
- name = token.value.upcase
756
- match(T_SPACE)
757
- return UntaggedResponse.new(name, mailbox_list, @str)
758
- end
1103
+ # RFC3501 & RFC9051:
1104
+ # header-fld-name = astring
1105
+ #
1106
+ # NOTE: Previously, Net::IMAP recreated the raw original source string.
1107
+ # Now, it grabs the raw encoded value using @str and @pos. A future
1108
+ # version may simply return the decoded astring value. Although that is
1109
+ # technically incompatible, it should almost never make a difference: all
1110
+ # standard header field names are valid atoms:
1111
+ #
1112
+ # https://www.iana.org/assignments/message-headers/message-headers.xhtml
1113
+ #
1114
+ # Although RFC3501 allows any astring, RFC5322-valid header names are one
1115
+ # or more of the printable US-ASCII characters, except SP and colon. So
1116
+ # empty string isn't valid, and literals aren't needed and should not be
1117
+ # used. This is explicitly unchanged by [I18N-HDRS] (RFC6532).
1118
+ #
1119
+ # RFC5233:
1120
+ # optional-field = field-name ":" unstructured CRLF
1121
+ # field-name = 1*ftext
1122
+ # ftext = %d33-57 / ; Printable US-ASCII
1123
+ # %d59-126 ; characters not including
1124
+ # ; ":".
1125
+ def header_fld_name
1126
+ assert_no_lookahead
1127
+ start = @pos
1128
+ astring
1129
+ @str[start...@pos - 1]
1130
+ end
1131
+
1132
+ # mailbox-data = "FLAGS" SP flag-list / "LIST" SP mailbox-list /
1133
+ # "LSUB" SP mailbox-list / "SEARCH" *(SP nz-number) /
1134
+ # "STATUS" SP mailbox SP "(" [status-att-list] ")" /
1135
+ # number SP "EXISTS" / number SP "RECENT"
1136
+
1137
+ def mailbox_data__flags
1138
+ name = label("FLAGS")
1139
+ SP!
1140
+ UntaggedResponse.new(name, flag_list, @str)
1141
+ end
1142
+
1143
+ def mailbox_data__list
1144
+ name = label_in("LIST", "LSUB", "XLIST")
1145
+ SP!
1146
+ UntaggedResponse.new(name, mailbox_list, @str)
1147
+ end
1148
+ alias mailbox_data__lsub mailbox_data__list
1149
+ alias mailbox_data__xlist mailbox_data__list
759
1150
 
760
1151
  def mailbox_list
761
1152
  attr = flag_list
@@ -821,7 +1212,8 @@ module Net
821
1212
  return UntaggedResponse.new(name, data, @str)
822
1213
  end
823
1214
 
824
- def getacl_response
1215
+ # acl-data = "ACL" SP mailbox *(SP identifier SP rights)
1216
+ def acl_data
825
1217
  token = match(T_ATOM)
826
1218
  name = token.value.upcase
827
1219
  match(T_SPACE)
@@ -847,7 +1239,21 @@ module Net
847
1239
  return UntaggedResponse.new(name, data, @str)
848
1240
  end
849
1241
 
850
- def search_response
1242
+ # RFC3501:
1243
+ # mailbox-data = "SEARCH" *(SP nz-number) / ...
1244
+ # RFC5256: SORT
1245
+ # sort-data = "SORT" *(SP nz-number)
1246
+ # RFC7162: CONDSTORE, QRESYNC
1247
+ # mailbox-data =/ "SEARCH" [1*(SP nz-number) SP
1248
+ # search-sort-mod-seq]
1249
+ # sort-data = "SORT" [1*(SP nz-number) SP
1250
+ # search-sort-mod-seq]
1251
+ # ; Updates the SORT response from RFC 5256.
1252
+ # search-sort-mod-seq = "(" "MODSEQ" SP mod-sequence-value ")"
1253
+ # RFC9051:
1254
+ # mailbox-data = obsolete-search-response / ...
1255
+ # obsolete-search-response = "SEARCH" *(SP nz-number)
1256
+ def mailbox_data__search
851
1257
  token = match(T_ATOM)
852
1258
  name = token.value.upcase
853
1259
  token = lookahead
@@ -877,8 +1283,9 @@ module Net
877
1283
  end
878
1284
  return UntaggedResponse.new(name, data, @str)
879
1285
  end
1286
+ alias sort_data mailbox_data__search
880
1287
 
881
- def thread_response
1288
+ def thread_data
882
1289
  token = match(T_ATOM)
883
1290
  name = token.value.upcase
884
1291
  token = lookahead
@@ -940,7 +1347,7 @@ module Net
940
1347
  return rootmember
941
1348
  end
942
1349
 
943
- def status_response
1350
+ def mailbox_data__status
944
1351
  token = match(T_ATOM)
945
1352
  name = token.value.upcase
946
1353
  match(T_SPACE)
@@ -967,29 +1374,40 @@ module Net
967
1374
  return UntaggedResponse.new(name, data, @str)
968
1375
  end
969
1376
 
970
- def capability_response
971
- token = match(T_ATOM)
972
- name = token.value.upcase
973
- match(T_SPACE)
974
- UntaggedResponse.new(name, capability_data, @str)
1377
+ # The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.
1378
+ # The grammar rule is used by both response-data and resp-text-code.
1379
+ # But this method only returns UntaggedResponse (response-data).
1380
+ #
1381
+ # RFC3501:
1382
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1383
+ # *(SP capability)
1384
+ # RFC9051:
1385
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1386
+ # *(SP capability)
1387
+ def capability_data__untagged
1388
+ UntaggedResponse.new label("CAPABILITY"), capability__list, @str
975
1389
  end
976
1390
 
977
- def capability_data
978
- data = []
979
- while true
980
- token = lookahead
981
- case token.symbol
982
- when T_CRLF, T_RBRA
983
- break
984
- when T_SPACE
985
- shift_token
986
- next
987
- end
988
- data.push(atom.upcase)
989
- end
990
- data
1391
+ # enable-data = "ENABLED" *(SP capability)
1392
+ def enable_data
1393
+ UntaggedResponse.new label("ENABLED"), capability__list, @str
1394
+ end
1395
+
1396
+ # As a workaround for buggy servers, allow a trailing SP:
1397
+ # *(SP capability) [SP]
1398
+ def capability__list
1399
+ list = []; while SP? && (capa = capability?) do list << capa end; list
991
1400
  end
992
1401
 
1402
+ alias resp_code__capability capability__list
1403
+
1404
+ # capability = ("AUTH=" auth-type) / atom
1405
+ # ; New capabilities MUST begin with "X" or be
1406
+ # ; registered with IANA as standard or
1407
+ # ; standards-track
1408
+ alias capability case_insensitive__atom
1409
+ alias capability? case_insensitive__atom?
1410
+
993
1411
  def id_response
994
1412
  token = match(T_ATOM)
995
1413
  name = token.value.upcase
@@ -1019,147 +1437,177 @@ module Net
1019
1437
  end
1020
1438
  end
1021
1439
 
1440
+ # namespace-response = "NAMESPACE" SP namespace
1441
+ # SP namespace SP namespace
1442
+ # ; The first Namespace is the Personal Namespace(s).
1443
+ # ; The second Namespace is the Other Users'
1444
+ # ; Namespace(s).
1445
+ # ; The third Namespace is the Shared Namespace(s).
1022
1446
  def namespace_response
1447
+ name = label("NAMESPACE")
1023
1448
  @lex_state = EXPR_DATA
1024
- token = lookahead
1025
- token = match(T_ATOM)
1026
- name = token.value.upcase
1027
- match(T_SPACE)
1028
- personal = namespaces
1029
- match(T_SPACE)
1030
- other = namespaces
1031
- match(T_SPACE)
1032
- shared = namespaces
1449
+ data = Namespaces.new((SP!; namespace),
1450
+ (SP!; namespace),
1451
+ (SP!; namespace))
1452
+ UntaggedResponse.new(name, data, @str)
1453
+ ensure
1033
1454
  @lex_state = EXPR_BEG
1034
- data = Namespaces.new(personal, other, shared)
1035
- return UntaggedResponse.new(name, data, @str)
1036
- end
1037
-
1038
- def namespaces
1039
- token = lookahead
1040
- # empty () is not allowed, so nil is functionally identical to empty.
1041
- data = []
1042
- if token.symbol == T_NIL
1043
- shift_token
1044
- else
1045
- match(T_LPAR)
1046
- loop do
1047
- data << namespace
1048
- break unless lookahead.symbol == T_SPACE
1049
- shift_token
1050
- end
1051
- match(T_RPAR)
1052
- end
1053
- data
1054
1455
  end
1055
1456
 
1457
+ # namespace = nil / "(" 1*namespace-descr ")"
1056
1458
  def namespace
1057
- match(T_LPAR)
1058
- prefix = match(T_QUOTED, T_LITERAL).value
1059
- match(T_SPACE)
1060
- delimiter = string
1459
+ NIL? and return []
1460
+ lpar
1461
+ list = [namespace_descr]
1462
+ list << namespace_descr until rpar?
1463
+ list
1464
+ end
1465
+
1466
+ # namespace-descr = "(" string SP
1467
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil)
1468
+ # [namespace-response-extensions] ")"
1469
+ def namespace_descr
1470
+ lpar
1471
+ prefix = string; SP!
1472
+ delimiter = nquoted # n.b: should only accept single char
1061
1473
  extensions = namespace_response_extensions
1062
- match(T_RPAR)
1474
+ rpar
1063
1475
  Namespace.new(prefix, delimiter, extensions)
1064
1476
  end
1065
1477
 
1478
+ # namespace-response-extensions = *namespace-response-extension
1479
+ # namespace-response-extension = SP string SP
1480
+ # "(" string *(SP string) ")"
1066
1481
  def namespace_response_extensions
1067
1482
  data = {}
1068
- token = lookahead
1069
- if token.symbol == T_SPACE
1070
- shift_token
1071
- name = match(T_QUOTED, T_LITERAL).value
1483
+ while SP?
1484
+ name = string; SP!
1485
+ lpar
1072
1486
  data[name] ||= []
1073
- match(T_SPACE)
1074
- match(T_LPAR)
1075
- loop do
1076
- data[name].push match(T_QUOTED, T_LITERAL).value
1077
- break unless lookahead.symbol == T_SPACE
1078
- shift_token
1079
- end
1080
- match(T_RPAR)
1487
+ data[name] << string
1488
+ data[name] << string while SP?
1489
+ rpar
1081
1490
  end
1082
1491
  data
1083
1492
  end
1084
1493
 
1085
- # text = 1*TEXT-CHAR
1086
- # TEXT-CHAR = <any CHAR except CR and LF>
1494
+ # TEXT-CHAR = <any CHAR except CR and LF>
1495
+ # RFC3501:
1496
+ # text = 1*TEXT-CHAR
1497
+ # RFC9051:
1498
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
1499
+ # ; Non-ASCII text can only be returned
1500
+ # ; after ENABLE IMAP4rev2 command
1087
1501
  def text
1088
- match(T_TEXT, lex_state: EXPR_TEXT).value
1502
+ match_re(TEXT_REGEXP, "text")[0].force_encoding("UTF-8")
1503
+ end
1504
+
1505
+ # an "accept" versiun of #text
1506
+ def text?
1507
+ accept_re(TEXT_REGEXP)&.[](0)&.force_encoding("UTF-8")
1089
1508
  end
1090
1509
 
1091
- # resp-text = ["[" resp-text-code "]" SP] text
1510
+ # RFC3501:
1511
+ # resp-text = ["[" resp-text-code "]" SP] text
1512
+ # RFC9051:
1513
+ # resp-text = ["[" resp-text-code "]" SP] [text]
1514
+ #
1515
+ # We leniently re-interpret this as
1516
+ # resp-text = ["[" resp-text-code "]" [SP [text]] / [text]
1092
1517
  def resp_text
1093
- token = match(T_LBRA, T_TEXT, lex_state: EXPR_RTEXT)
1094
- case token.symbol
1095
- when T_LBRA
1096
- code = resp_text_code
1097
- match(T_RBRA)
1098
- accept_space # violating RFC
1099
- ResponseText.new(code, text)
1100
- when T_TEXT
1101
- ResponseText.new(nil, token.value)
1518
+ if lbra?
1519
+ code = resp_text_code; rbra
1520
+ ResponseText.new(code, SP? && text? || "")
1521
+ else
1522
+ ResponseText.new(nil, text? || "")
1102
1523
  end
1103
1524
  end
1104
1525
 
1105
- # See https://www.rfc-editor.org/errata/rfc3501
1526
+ # RFC3501 (See https://www.rfc-editor.org/errata/rfc3501):
1527
+ # resp-text-code = "ALERT" /
1528
+ # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1529
+ # capability-data / "PARSE" /
1530
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
1531
+ # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1532
+ # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1533
+ # "UNSEEN" SP nz-number /
1534
+ # atom [SP 1*<any TEXT-CHAR except "]">]
1535
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1536
+ # *(SP capability)
1106
1537
  #
1107
- # resp-text-code = "ALERT" /
1108
- # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1109
- # capability-data / "PARSE" /
1110
- # "PERMANENTFLAGS" SP "("
1111
- # [flag-perm *(SP flag-perm)] ")" /
1112
- # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1113
- # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1114
- # "UNSEEN" SP nz-number /
1115
- # atom [SP 1*<any TEXT-CHAR except "]">]
1538
+ # RFC5530:
1539
+ # resp-text-code =/ "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
1540
+ # "AUTHORIZATIONFAILED" / "EXPIRED" /
1541
+ # "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
1542
+ # "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
1543
+ # "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
1544
+ # "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
1545
+ # "NONEXISTENT"
1546
+ # RFC9051:
1547
+ # resp-text-code = "ALERT" /
1548
+ # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1549
+ # capability-data / "PARSE" /
1550
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
1551
+ # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1552
+ # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1553
+ # resp-code-apnd / resp-code-copy / "UIDNOTSTICKY" /
1554
+ # "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
1555
+ # "AUTHORIZATIONFAILED" / "EXPIRED" /
1556
+ # "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
1557
+ # "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
1558
+ # "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
1559
+ # "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
1560
+ # "NONEXISTENT" / "NOTSAVED" / "HASCHILDREN" /
1561
+ # "CLOSED" /
1562
+ # "UNKNOWN-CTE" /
1563
+ # atom [SP 1*<any TEXT-CHAR except "]">]
1564
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1565
+ # *(SP capability)
1116
1566
  #
1117
- # +UIDPLUS+ ABNF:: https://www.rfc-editor.org/rfc/rfc4315.html#section-4
1118
- # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
1567
+ # RFC4315 (UIDPLUS), RFC9051 (IMAP4rev2):
1568
+ # resp-code-apnd = "APPENDUID" SP nz-number SP append-uid
1569
+ # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
1570
+ # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
1571
+ #
1572
+ # RFC7162 (CONDSTORE):
1573
+ # resp-text-code =/ "HIGHESTMODSEQ" SP mod-sequence-value /
1574
+ # "NOMODSEQ" /
1575
+ # "MODIFIED" SP sequence-set
1119
1576
  def resp_text_code
1120
- token = match(T_ATOM)
1121
- name = token.value.upcase
1122
- case name
1123
- when /\A(?:ALERT|PARSE|READ-ONLY|READ-WRITE|TRYCREATE|NOMODSEQ)\z/n
1124
- result = ResponseCode.new(name, nil)
1125
- when /\A(?:BADCHARSET)\z/n
1126
- result = ResponseCode.new(name, charset_list)
1127
- when /\A(?:CAPABILITY)\z/ni
1128
- result = ResponseCode.new(name, capability_data)
1129
- when /\A(?:PERMANENTFLAGS)\z/n
1130
- match(T_SPACE)
1131
- result = ResponseCode.new(name, flag_list)
1132
- when /\A(?:UIDVALIDITY|UIDNEXT|UNSEEN)\z/n
1133
- match(T_SPACE)
1134
- result = ResponseCode.new(name, number)
1135
- when /\A(?:APPENDUID)\z/n
1136
- result = ResponseCode.new(name, resp_code_apnd__data)
1137
- when /\A(?:COPYUID)\z/n
1138
- result = ResponseCode.new(name, resp_code_copy__data)
1139
- else
1140
- token = lookahead
1141
- if token.symbol == T_SPACE
1142
- shift_token
1143
- token = match(T_TEXT, lex_state: EXPR_CTEXT)
1144
- result = ResponseCode.new(name, token.value)
1577
+ name = resp_text_code__name
1578
+ data =
1579
+ case name
1580
+ when "CAPABILITY" then resp_code__capability
1581
+ when "PERMANENTFLAGS" then SP? ? flag_perm__list : []
1582
+ when "UIDNEXT" then SP!; nz_number
1583
+ when "UIDVALIDITY" then SP!; nz_number
1584
+ when "UNSEEN" then SP!; nz_number # rev1 only
1585
+ when "APPENDUID" then SP!; resp_code_apnd__data # rev2, UIDPLUS
1586
+ when "COPYUID" then SP!; resp_code_copy__data # rev2, UIDPLUS
1587
+ when "BADCHARSET" then SP? ? charset__list : []
1588
+ when "ALERT", "PARSE", "READ-ONLY", "READ-WRITE", "TRYCREATE",
1589
+ "UNAVAILABLE", "AUTHENTICATIONFAILED", "AUTHORIZATIONFAILED",
1590
+ "EXPIRED", "PRIVACYREQUIRED", "CONTACTADMIN", "NOPERM", "INUSE",
1591
+ "EXPUNGEISSUED", "CORRUPTION", "SERVERBUG", "CLIENTBUG", "CANNOT",
1592
+ "LIMIT", "OVERQUOTA", "ALREADYEXISTS", "NONEXISTENT", "CLOSED",
1593
+ "NOTSAVED", "UIDNOTSTICKY", "UNKNOWN-CTE", "HASCHILDREN"
1594
+ when "NOMODSEQ" # CONDSTORE
1145
1595
  else
1146
- result = ResponseCode.new(name, nil)
1596
+ SP? and text_chars_except_rbra
1147
1597
  end
1148
- end
1149
- return result
1598
+ ResponseCode.new(name, data)
1150
1599
  end
1151
1600
 
1152
- def charset_list
1153
- result = []
1154
- if accept(T_SPACE)
1155
- match(T_LPAR)
1156
- result << charset
1157
- while accept(T_SPACE)
1158
- result << charset
1159
- end
1160
- match(T_RPAR)
1161
- end
1162
- result
1601
+ alias resp_text_code__name case_insensitive__atom
1602
+
1603
+ # 1*<any TEXT-CHAR except "]">
1604
+ def text_chars_except_rbra
1605
+ match_re(CTEXT_REGEXP, '1*<any TEXT-CHAR except "]">')[0]
1606
+ end
1607
+
1608
+ # "(" charset *(SP charset) ")"
1609
+ def charset__list
1610
+ lpar; list = [charset]; while SP? do list << charset end; rpar; list
1163
1611
  end
1164
1612
 
1165
1613
  # already matched: "APPENDUID"
@@ -1175,8 +1623,8 @@ module Net
1175
1623
  # match uid_set even if that returns a single-member array.
1176
1624
  #
1177
1625
  def resp_code_apnd__data
1178
- match(T_SPACE); validity = number
1179
- match(T_SPACE); dst_uids = uid_set # uniqueid ⊂ uid-set
1626
+ validity = number; SP!
1627
+ dst_uids = uid_set # uniqueid ⊂ uid-set
1180
1628
  UIDPlusData.new(validity, nil, dst_uids)
1181
1629
  end
1182
1630
 
@@ -1184,9 +1632,9 @@ module Net
1184
1632
  #
1185
1633
  # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
1186
1634
  def resp_code_copy__data
1187
- match(T_SPACE); validity = number
1188
- match(T_SPACE); src_uids = uid_set
1189
- match(T_SPACE); dst_uids = uid_set
1635
+ validity = number; SP!
1636
+ src_uids = uid_set; SP!
1637
+ dst_uids = uid_set
1190
1638
  UIDPlusData.new(validity, src_uids, dst_uids)
1191
1639
  end
1192
1640
 
@@ -1230,9 +1678,7 @@ module Net
1230
1678
  mailbox = $3
1231
1679
  host = $4
1232
1680
  for s in [name, route, mailbox, host]
1233
- if s
1234
- s.gsub!(/\\(["\\])/n, "\\1")
1235
- end
1681
+ Patterns.unescape_quoted! s
1236
1682
  end
1237
1683
  else
1238
1684
  name = nstring
@@ -1247,124 +1693,56 @@ module Net
1247
1693
  return Address.new(name, route, mailbox, host)
1248
1694
  end
1249
1695
 
1250
- FLAG_REGEXP = /\
1251
- (?# FLAG )\\([^\x80-\xff(){ \x00-\x1f\x7f%"\\]+)|\
1252
- (?# ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\]+)/n
1253
-
1696
+ # flag-list = "(" [flag *(SP flag)] ")"
1254
1697
  def flag_list
1255
- if @str.index(/\(([^)]*)\)/ni, @pos)
1256
- @pos = $~.end(0)
1257
- return $1.scan(FLAG_REGEXP).collect { |flag, atom|
1258
- if atom
1259
- atom
1260
- else
1261
- flag.capitalize.intern
1262
- end
1263
- }
1264
- else
1265
- parse_error("invalid flag list")
1266
- end
1267
- end
1268
-
1269
- def nstring
1270
- token = lookahead
1271
- if token.symbol == T_NIL
1272
- shift_token
1273
- return nil
1274
- else
1275
- return string
1276
- end
1277
- end
1278
-
1279
- def astring
1280
- token = lookahead
1281
- if string_token?(token)
1282
- return string
1283
- else
1284
- return astring_chars
1285
- end
1286
- end
1287
-
1288
- def string
1289
- token = lookahead
1290
- if token.symbol == T_NIL
1291
- shift_token
1292
- return nil
1293
- end
1294
- token = match(T_QUOTED, T_LITERAL)
1295
- return token.value
1296
- end
1297
-
1298
- STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL]
1299
-
1300
- def string_token?(token)
1301
- return STRING_TOKENS.include?(token.symbol)
1302
- end
1303
-
1304
- def case_insensitive_string
1305
- token = lookahead
1306
- if token.symbol == T_NIL
1307
- shift_token
1308
- return nil
1309
- end
1310
- token = match(T_QUOTED, T_LITERAL)
1311
- return token.value.upcase
1312
- end
1313
-
1314
- # atom = 1*ATOM-CHAR
1315
- # ATOM-CHAR = <any CHAR except atom-specials>
1316
- ATOM_TOKENS = [
1317
- T_ATOM,
1318
- T_NUMBER,
1319
- T_NIL,
1320
- T_LBRA,
1321
- T_PLUS
1322
- ]
1323
-
1324
- def atom
1325
- -combine_adjacent(*ATOM_TOKENS)
1326
- end
1327
-
1328
- # ASTRING-CHAR = ATOM-CHAR / resp-specials
1329
- # resp-specials = "]"
1330
- ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA]
1331
-
1332
- def astring_chars
1333
- combine_adjacent(*ASTRING_CHARS_TOKENS)
1334
- end
1335
-
1336
- def combine_adjacent(*tokens)
1337
- result = "".b
1338
- while token = accept(*tokens)
1339
- result << token.value
1340
- end
1341
- if result.empty?
1342
- parse_error('unexpected token %s (expected %s)',
1343
- lookahead.symbol, args.join(" or "))
1344
- end
1345
- result
1698
+ match_re(Patterns::FLAG_LIST, "flag-list")[1]
1699
+ .split(nil)
1700
+ .map! { _1.start_with?("\\") ? _1[1..].capitalize.to_sym : _1 }
1701
+ end
1702
+
1703
+ # "(" [flag-perm *(SP flag-perm)] ")"
1704
+ def flag_perm__list
1705
+ match_re(Patterns::FLAG_PERM_LIST, "PERMANENTFLAGS flag-perm list")[1]
1706
+ .split(nil)
1707
+ .map! { _1.start_with?("\\") ? _1[1..].capitalize.to_sym : _1 }
1708
+ end
1709
+
1710
+ # Not checking for max one mbx-list-sflag in the parser.
1711
+ # >>>
1712
+ # mbx-list-flags = *(mbx-list-oflag SP) mbx-list-sflag
1713
+ # *(SP mbx-list-oflag) /
1714
+ # mbx-list-oflag *(SP mbx-list-oflag)
1715
+ # mbx-list-oflag = "\Noinferiors" / child-mbox-flag /
1716
+ # "\Subscribed" / "\Remote" / flag-extension
1717
+ # ; Other flags; multiple from this list are
1718
+ # ; possible per LIST response, but each flag
1719
+ # ; can only appear once per LIST response
1720
+ # mbx-list-sflag = "\NonExistent" / "\Noselect" / "\Marked" /
1721
+ # "\Unmarked"
1722
+ # ; Selectability flags; only one per LIST response
1723
+ def parens__mbx_list_flags
1724
+ match_re(Patterns::MBX_LIST_FLAGS, "mbx-list-flags")[1]
1725
+ .split(nil).map! { _1.capitalize.to_sym }
1346
1726
  end
1347
1727
 
1348
1728
  # See https://www.rfc-editor.org/errata/rfc3501
1349
1729
  #
1350
1730
  # charset = atom / quoted
1351
- def charset
1352
- if token = accept(T_QUOTED)
1353
- token.value
1354
- else
1355
- atom
1356
- end
1357
- end
1731
+ def charset; quoted? || atom end
1358
1732
 
1359
- def number
1360
- token = lookahead
1361
- if token.symbol == T_NIL
1362
- shift_token
1363
- return nil
1364
- end
1365
- token = match(T_NUMBER)
1366
- return token.value.to_i
1367
- end
1733
+ # RFC7162:
1734
+ # mod-sequence-value = 1*DIGIT
1735
+ # ;; Positive unsigned 63-bit integer
1736
+ # ;; (mod-sequence)
1737
+ # ;; (1 <= n <= 9,223,372,036,854,775,807).
1738
+ alias mod_sequence_value nz_number64
1739
+
1740
+ # RFC7162:
1741
+ # permsg-modsequence = mod-sequence-value
1742
+ # ;; Per-message mod-sequence.
1743
+ alias permsg_modsequence mod_sequence_value
1744
+
1745
+ def parens__modseq; lpar; _ = permsg_modsequence; rpar; _ end
1368
1746
 
1369
1747
  # RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
1370
1748
  # uid-set = (uniqueid / uid-range) *("," uid-set)
@@ -1393,64 +1771,15 @@ module Net
1393
1771
 
1394
1772
  SPACES_REGEXP = /\G */n
1395
1773
 
1396
- # This advances @pos directly so it's safe before changing @lex_state.
1397
- def accept_space
1398
- if @token
1399
- shift_token if @token.symbol == T_SPACE
1400
- elsif @str[@pos] == " "
1401
- @pos += 1
1402
- end
1403
- end
1404
-
1405
1774
  # The RFC is very strict about this and usually we should be too.
1406
1775
  # But skipping spaces is usually a safe workaround for buggy servers.
1407
1776
  #
1408
1777
  # This advances @pos directly so it's safe before changing @lex_state.
1409
1778
  def accept_spaces
1410
- shift_token if @token&.symbol == T_SPACE
1411
- if @str.index(SPACES_REGEXP, @pos)
1779
+ return false unless SP?
1780
+ @str.index(SPACES_REGEXP, @pos) and
1412
1781
  @pos = $~.end(0)
1413
- end
1414
- end
1415
-
1416
- def match(*args, lex_state: @lex_state)
1417
- if @token && lex_state != @lex_state
1418
- parse_error("invalid lex_state change to %s with unconsumed token",
1419
- lex_state)
1420
- end
1421
- begin
1422
- @lex_state, original_lex_state = lex_state, @lex_state
1423
- token = lookahead
1424
- unless args.include?(token.symbol)
1425
- parse_error('unexpected token %s (expected %s)',
1426
- token.symbol.id2name,
1427
- args.collect {|i| i.id2name}.join(" or "))
1428
- end
1429
- shift_token
1430
- return token
1431
- ensure
1432
- @lex_state = original_lex_state
1433
- end
1434
- end
1435
-
1436
- # like match, but does not raise error on failure.
1437
- #
1438
- # returns and shifts token on successful match
1439
- # returns nil and leaves @token unshifted on no match
1440
- def accept(*args)
1441
- token = lookahead
1442
- if args.include?(token.symbol)
1443
- shift_token
1444
- token
1445
- end
1446
- end
1447
-
1448
- def lookahead
1449
- @token ||= next_token
1450
- end
1451
-
1452
- def shift_token
1453
- @token = nil
1782
+ true
1454
1783
  end
1455
1784
 
1456
1785
  def next_token
@@ -1460,39 +1789,42 @@ module Net
1460
1789
  @pos = $~.end(0)
1461
1790
  if $1
1462
1791
  return Token.new(T_SPACE, $+)
1463
- elsif $2
1464
- return Token.new(T_NIL, $+)
1792
+ elsif $2 && $6
1793
+ # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
1794
+ return Token.new(T_ATOM, $2)
1465
1795
  elsif $3
1466
- return Token.new(T_NUMBER, $+)
1796
+ return Token.new(T_NIL, $+)
1467
1797
  elsif $4
1468
- return Token.new(T_ATOM, $+)
1798
+ return Token.new(T_NUMBER, $+)
1469
1799
  elsif $5
1470
- return Token.new(T_QUOTED,
1471
- $+.gsub(/\\(["\\])/n, "\\1"))
1472
- elsif $6
1473
- return Token.new(T_LPAR, $+)
1800
+ return Token.new(T_PLUS, $+)
1474
1801
  elsif $7
1475
- return Token.new(T_RPAR, $+)
1802
+ # match ATOM, without a NUMBER, NIL, or PLUS prefix
1803
+ return Token.new(T_ATOM, $+)
1476
1804
  elsif $8
1477
- return Token.new(T_BSLASH, $+)
1805
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1478
1806
  elsif $9
1479
- return Token.new(T_STAR, $+)
1807
+ return Token.new(T_LPAR, $+)
1480
1808
  elsif $10
1481
- return Token.new(T_LBRA, $+)
1809
+ return Token.new(T_RPAR, $+)
1482
1810
  elsif $11
1483
- return Token.new(T_RBRA, $+)
1811
+ return Token.new(T_BSLASH, $+)
1484
1812
  elsif $12
1813
+ return Token.new(T_STAR, $+)
1814
+ elsif $13
1815
+ return Token.new(T_LBRA, $+)
1816
+ elsif $14
1817
+ return Token.new(T_RBRA, $+)
1818
+ elsif $15
1485
1819
  len = $+.to_i
1486
1820
  val = @str[@pos, len]
1487
1821
  @pos += len
1488
1822
  return Token.new(T_LITERAL, val)
1489
- elsif $13
1490
- return Token.new(T_PLUS, $+)
1491
- elsif $14
1823
+ elsif $16
1492
1824
  return Token.new(T_PERCENT, $+)
1493
- elsif $15
1825
+ elsif $17
1494
1826
  return Token.new(T_CRLF, $+)
1495
- elsif $16
1827
+ elsif $18
1496
1828
  return Token.new(T_EOF, $+)
1497
1829
  else
1498
1830
  parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")
@@ -1511,8 +1843,7 @@ module Net
1511
1843
  elsif $3
1512
1844
  return Token.new(T_NUMBER, $+)
1513
1845
  elsif $4
1514
- return Token.new(T_QUOTED,
1515
- $+.gsub(/\\(["\\])/n, "\\1"))
1846
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1516
1847
  elsif $5
1517
1848
  len = $+.to_i
1518
1849
  val = @str[@pos, len]
@@ -1529,63 +1860,11 @@ module Net
1529
1860
  @str.index(/\S*/n, @pos)
1530
1861
  parse_error("unknown token - %s", $&.dump)
1531
1862
  end
1532
- when EXPR_TEXT
1533
- if @str.index(TEXT_REGEXP, @pos)
1534
- @pos = $~.end(0)
1535
- if $1
1536
- return Token.new(T_TEXT, $+)
1537
- else
1538
- parse_error("[Net::IMAP BUG] TEXT_REGEXP is invalid")
1539
- end
1540
- else
1541
- @str.index(/\S*/n, @pos)
1542
- parse_error("unknown token - %s", $&.dump)
1543
- end
1544
- when EXPR_RTEXT
1545
- if @str.index(RTEXT_REGEXP, @pos)
1546
- @pos = $~.end(0)
1547
- if $1
1548
- return Token.new(T_LBRA, $+)
1549
- elsif $2
1550
- return Token.new(T_TEXT, $+)
1551
- else
1552
- parse_error("[Net::IMAP BUG] RTEXT_REGEXP is invalid")
1553
- end
1554
- else
1555
- @str.index(/\S*/n, @pos)
1556
- parse_error("unknown token - %s", $&.dump)
1557
- end
1558
- when EXPR_CTEXT
1559
- if @str.index(CTEXT_REGEXP, @pos)
1560
- @pos = $~.end(0)
1561
- if $1
1562
- return Token.new(T_TEXT, $+)
1563
- else
1564
- parse_error("[Net::IMAP BUG] CTEXT_REGEXP is invalid")
1565
- end
1566
- else
1567
- @str.index(/\S*/n, @pos) #/
1568
- parse_error("unknown token - %s", $&.dump)
1569
- end
1570
1863
  else
1571
1864
  parse_error("invalid @lex_state - %s", @lex_state.inspect)
1572
1865
  end
1573
1866
  end
1574
1867
 
1575
- def parse_error(fmt, *args)
1576
- if IMAP.debug
1577
- $stderr.printf("@str: %s\n", @str.dump)
1578
- $stderr.printf("@pos: %d\n", @pos)
1579
- $stderr.printf("@lex_state: %s\n", @lex_state)
1580
- if @token
1581
- $stderr.printf("@token.symbol: %s\n", @token.symbol)
1582
- $stderr.printf("@token.value: %s\n", @token.value.inspect)
1583
- end
1584
- end
1585
- raise ResponseParseError, format(fmt, *args)
1586
- end
1587
1868
  end
1588
-
1589
1869
  end
1590
-
1591
1870
  end