net-imap 0.3.7 → 0.4.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/pages.yml +46 -0
  3. data/.github/workflows/test.yml +5 -12
  4. data/.gitignore +2 -0
  5. data/Gemfile +3 -0
  6. data/README.md +15 -4
  7. data/Rakefile +0 -7
  8. data/docs/styles.css +0 -12
  9. data/lib/net/imap/authenticators.rb +26 -57
  10. data/lib/net/imap/command_data.rb +13 -6
  11. data/lib/net/imap/data_encoding.rb +14 -2
  12. data/lib/net/imap/deprecated_client_options.rb +139 -0
  13. data/lib/net/imap/errors.rb +20 -0
  14. data/lib/net/imap/fetch_data.rb +518 -0
  15. data/lib/net/imap/response_data.rb +116 -252
  16. data/lib/net/imap/response_parser/parser_utils.rb +240 -0
  17. data/lib/net/imap/response_parser.rb +1696 -1196
  18. data/lib/net/imap/sasl/anonymous_authenticator.rb +69 -0
  19. data/lib/net/imap/sasl/authentication_exchange.rb +107 -0
  20. data/lib/net/imap/sasl/authenticators.rb +118 -0
  21. data/lib/net/imap/sasl/client_adapter.rb +72 -0
  22. data/lib/net/imap/{authenticators/cram_md5.rb → sasl/cram_md5_authenticator.rb} +21 -11
  23. data/lib/net/imap/sasl/digest_md5_authenticator.rb +180 -0
  24. data/lib/net/imap/sasl/external_authenticator.rb +83 -0
  25. data/lib/net/imap/sasl/gs2_header.rb +80 -0
  26. data/lib/net/imap/{authenticators/login.rb → sasl/login_authenticator.rb} +25 -16
  27. data/lib/net/imap/sasl/oauthbearer_authenticator.rb +199 -0
  28. data/lib/net/imap/sasl/plain_authenticator.rb +101 -0
  29. data/lib/net/imap/sasl/protocol_adapters.rb +45 -0
  30. data/lib/net/imap/sasl/scram_algorithm.rb +58 -0
  31. data/lib/net/imap/sasl/scram_authenticator.rb +287 -0
  32. data/lib/net/imap/sasl/stringprep.rb +6 -66
  33. data/lib/net/imap/sasl/xoauth2_authenticator.rb +106 -0
  34. data/lib/net/imap/sasl.rb +144 -43
  35. data/lib/net/imap/sasl_adapter.rb +21 -0
  36. data/lib/net/imap/sequence_set.rb +67 -0
  37. data/lib/net/imap/stringprep/nameprep.rb +70 -0
  38. data/lib/net/imap/stringprep/saslprep.rb +69 -0
  39. data/lib/net/imap/stringprep/saslprep_tables.rb +96 -0
  40. data/lib/net/imap/stringprep/tables.rb +146 -0
  41. data/lib/net/imap/stringprep/trace.rb +85 -0
  42. data/lib/net/imap/stringprep.rb +159 -0
  43. data/lib/net/imap.rb +1061 -612
  44. data/net-imap.gemspec +5 -3
  45. data/rakelib/benchmarks.rake +91 -0
  46. data/rakelib/saslprep.rake +4 -4
  47. data/rakelib/string_prep_tables_generator.rb +82 -60
  48. metadata +33 -14
  49. data/benchmarks/stringprep.yml +0 -65
  50. data/benchmarks/table-regexps.yml +0 -39
  51. data/lib/net/imap/authenticators/digest_md5.rb +0 -115
  52. data/lib/net/imap/authenticators/plain.rb +0 -41
  53. data/lib/net/imap/authenticators/xoauth2.rb +0 -20
  54. data/lib/net/imap/sasl/saslprep.rb +0 -55
  55. data/lib/net/imap/sasl/saslprep_tables.rb +0 -98
  56. data/lib/net/imap/sasl/stringprep_tables.rb +0 -153
@@ -1,12 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "errors"
4
+ require_relative "response_parser/parser_utils"
4
5
 
5
6
  module Net
6
7
  class IMAP < Protocol
7
8
 
8
9
  # Parses an \IMAP server response.
9
10
  class ResponseParser
11
+ include ParserUtils
12
+ extend ParserUtils::Generator
13
+
10
14
  # :call-seq: Net::IMAP::ResponseParser.new -> Net::IMAP::ResponseParser
11
15
  def initialize
12
16
  @str = nil
@@ -33,742 +37,1333 @@ module Net
33
37
 
34
38
  # :stopdoc:
35
39
 
36
- EXPR_BEG = :EXPR_BEG
37
- EXPR_DATA = :EXPR_DATA
38
- EXPR_TEXT = :EXPR_TEXT
39
- EXPR_RTEXT = :EXPR_RTEXT
40
- EXPR_CTEXT = :EXPR_CTEXT
41
-
42
- T_SPACE = :SPACE
43
- T_NIL = :NIL
44
- T_NUMBER = :NUMBER
45
- T_ATOM = :ATOM
46
- T_QUOTED = :QUOTED
47
- T_LPAR = :LPAR
48
- T_RPAR = :RPAR
49
- T_BSLASH = :BSLASH
50
- T_STAR = :STAR
51
- T_LBRA = :LBRA
52
- T_RBRA = :RBRA
53
- T_LITERAL = :LITERAL
54
- T_PLUS = :PLUS
55
- T_PERCENT = :PERCENT
56
- T_CRLF = :CRLF
57
- T_EOF = :EOF
58
- T_TEXT = :TEXT
59
-
40
+ EXPR_BEG = :EXPR_BEG # the default, used in most places
41
+ EXPR_DATA = :EXPR_DATA # envelope, body(structure), namespaces
42
+
43
+ T_SPACE = :SPACE # atom special
44
+ T_ATOM = :ATOM # atom (subset of astring chars)
45
+ T_NIL = :NIL # subset of atom and label
46
+ T_NUMBER = :NUMBER # subset of atom
47
+ T_LBRA = :LBRA # subset of atom
48
+ T_PLUS = :PLUS # subset of atom; tag special
49
+ T_RBRA = :RBRA # atom special; resp_special; valid astring char
50
+ T_QUOTED = :QUOTED # starts/end with atom special
51
+ T_BSLASH = :BSLASH # atom special; quoted special
52
+ T_LPAR = :LPAR # atom special; paren list delimiter
53
+ T_RPAR = :RPAR # atom special; paren list delimiter
54
+ T_STAR = :STAR # atom special; list wildcard
55
+ T_PERCENT = :PERCENT # atom special; list wildcard
56
+ T_LITERAL = :LITERAL # starts with atom special
57
+ T_LITERAL8 = :LITERAL8 # starts with atom char "~"
58
+ T_CRLF = :CRLF # atom special; text special; quoted special
59
+ T_TEXT = :TEXT # any char except CRLF
60
+ T_EOF = :EOF # end of response string
61
+
62
+ module ResponseConditions
63
+ OK = "OK"
64
+ NO = "NO"
65
+ BAD = "BAD"
66
+ BYE = "BYE"
67
+ PREAUTH = "PREAUTH"
68
+
69
+ RESP_COND_STATES = [OK, NO, BAD ].freeze
70
+ RESP_DATA_CONDS = [OK, NO, BAD, BYE, ].freeze
71
+ AUTH_CONDS = [OK, PREAUTH].freeze
72
+ GREETING_CONDS = [OK, BYE, PREAUTH].freeze
73
+ RESP_CONDS = [OK, NO, BAD, BYE, PREAUTH].freeze
74
+ end
75
+ include ResponseConditions
76
+
77
+ module Patterns
78
+
79
+ module CharClassSubtraction
80
+ refine Regexp do
81
+ def -(rhs); /[#{source}&&[^#{rhs.source}]]/n.freeze end
82
+ end
83
+ end
84
+ using CharClassSubtraction
85
+
86
+ # From RFC5234, "Augmented BNF for Syntax Specifications: ABNF"
87
+ # >>>
88
+ # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
89
+ # CHAR = %x01-7F
90
+ # CRLF = CR LF
91
+ # ; Internet standard newline
92
+ # CTL = %x00-1F / %x7F
93
+ # ; controls
94
+ # DIGIT = %x30-39
95
+ # ; 0-9
96
+ # DQUOTE = %x22
97
+ # ; " (Double Quote)
98
+ # HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
99
+ # OCTET = %x00-FF
100
+ # SP = %x20
101
+ module RFC5234
102
+ ALPHA = /[A-Za-z]/n
103
+ CHAR = /[\x01-\x7f]/n
104
+ CRLF = /\r\n/n
105
+ CTL = /[\x00-\x1F\x7F]/n
106
+ DIGIT = /\d/n
107
+ DQUOTE = /"/n
108
+ HEXDIG = /\h/
109
+ OCTET = /[\x00-\xFF]/n # not using /./m for embedding purposes
110
+ SP = / /n
111
+ end
112
+
113
+ # UTF-8, a transformation format of ISO 10646
114
+ # >>>
115
+ # UTF8-1 = %x00-7F
116
+ # UTF8-tail = %x80-BF
117
+ # UTF8-2 = %xC2-DF UTF8-tail
118
+ # UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
119
+ # %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
120
+ # UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
121
+ # %xF4 %x80-8F 2( UTF8-tail )
122
+ # UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
123
+ # UTF8-octets = *( UTF8-char )
124
+ #
125
+ # n.b. String * Integer is used for repetition, rather than /x{3}/,
126
+ # because ruby 3.2's linear-time cache-based optimization doesn't work
127
+ # with "bounded or fixed times repetition nesting in another repetition
128
+ # (e.g. /(a{2,3})*/). It is an implementation issue entirely, but we
129
+ # believe it is hard to support this case correctly."
130
+ # See https://bugs.ruby-lang.org/issues/19104
131
+ module RFC3629
132
+ UTF8_1 = /[\x00-\x7f]/n # aka ASCII 7bit
133
+ UTF8_TAIL = /[\x80-\xBF]/n
134
+ UTF8_2 = /[\xC2-\xDF]#{UTF8_TAIL}/n
135
+ UTF8_3 = Regexp.union(/\xE0[\xA0-\xBF]#{UTF8_TAIL}/n,
136
+ /\xED[\x80-\x9F]#{UTF8_TAIL}/n,
137
+ /[\xE1-\xEC]#{ UTF8_TAIL.source * 2}/n,
138
+ /[\xEE-\xEF]#{ UTF8_TAIL.source * 2}/n)
139
+ UTF8_4 = Regexp.union(/[\xF1-\xF3]#{ UTF8_TAIL.source * 3}/n,
140
+ /\xF0[\x90-\xBF]#{UTF8_TAIL.source * 2}/n,
141
+ /\xF4[\x80-\x8F]#{UTF8_TAIL.source * 2}/n)
142
+ UTF8_CHAR = Regexp.union(UTF8_1, UTF8_2, UTF8_3, UTF8_4)
143
+ UTF8_OCTETS = /#{UTF8_CHAR}*/n
144
+ end
145
+
146
+ include RFC5234
147
+ include RFC3629
148
+
149
+ # CHAR8 = %x01-ff
150
+ # ; any OCTET except NUL, %x00
151
+ CHAR8 = /[\x01-\xff]/n
152
+
153
+ # list-wildcards = "%" / "*"
154
+ LIST_WILDCARDS = /[%*]/n
155
+ # quoted-specials = DQUOTE / "\"
156
+ QUOTED_SPECIALS = /["\\]/n
157
+ # resp-specials = "]"
158
+ RESP_SPECIALS = /[\]]/n
159
+
160
+ # atomish = 1*<any ATOM-CHAR except "[">
161
+ # ; We use "atomish" for msg-att and section, in order
162
+ # ; to simplify "BODY[HEADER.FIELDS (foo bar)]".
163
+ #
164
+ # atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
165
+ # quoted-specials / resp-specials
166
+ # ATOM-CHAR = <any CHAR except atom-specials>
167
+ # atom = 1*ATOM-CHAR
168
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
169
+ # tag = 1*<any ASTRING-CHAR except "+">
170
+
171
+ ATOM_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\\]]/n
172
+ ASTRING_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\]/n
173
+
174
+ ASTRING_CHAR = CHAR - ASTRING_SPECIALS
175
+ ATOM_CHAR = CHAR - ATOM_SPECIALS
176
+
177
+ ATOM = /#{ATOM_CHAR}+/n
178
+ ASTRING_CHARS = /#{ASTRING_CHAR}+/n
179
+ ATOMISH = /#{ATOM_CHAR - /[\[]/ }+/
180
+ TAG = /#{ASTRING_CHAR - /[+]/ }+/
181
+
182
+ # TEXT-CHAR = <any CHAR except CR and LF>
183
+ TEXT_CHAR = CHAR - /[\r\n]/
184
+
185
+ # resp-text-code = ... / atom [SP 1*<any TEXT-CHAR except "]">]
186
+ CODE_TEXT_CHAR = TEXT_CHAR - RESP_SPECIALS
187
+ CODE_TEXT = /#{CODE_TEXT_CHAR}+/n
188
+
189
+ # flag = "\Answered" / "\Flagged" / "\Deleted" /
190
+ # "\Seen" / "\Draft" / flag-keyword / flag-extension
191
+ # ; Does not include "\Recent"
192
+ # flag-extension = "\" atom
193
+ # ; Future expansion. Client implementations
194
+ # ; MUST accept flag-extension flags. Server
195
+ # ; implementations MUST NOT generate
196
+ # ; flag-extension flags except as defined by
197
+ # ; a future Standard or Standards Track
198
+ # ; revisions of this specification.
199
+ # flag-keyword = "$MDNSent" / "$Forwarded" / "$Junk" /
200
+ # "$NotJunk" / "$Phishing" / atom
201
+ #
202
+ # flag-perm = flag / "\*"
203
+ #
204
+ # Not checking for max one mbx-list-sflag in the parser.
205
+ # >>>
206
+ # mbx-list-oflag = "\Noinferiors" / child-mbox-flag /
207
+ # "\Subscribed" / "\Remote" / flag-extension
208
+ # ; Other flags; multiple from this list are
209
+ # ; possible per LIST response, but each flag
210
+ # ; can only appear once per LIST response
211
+ # mbx-list-sflag = "\NonExistent" / "\Noselect" / "\Marked" /
212
+ # "\Unmarked"
213
+ # ; Selectability flags; only one per LIST response
214
+ # child-mbox-flag = "\HasChildren" / "\HasNoChildren"
215
+ # ; attributes for the CHILDREN return option, at most
216
+ # ; one possible per LIST response
217
+ FLAG = /\\?#{ATOM}/n
218
+ FLAG_EXTENSION = /\\#{ATOM}/n
219
+ FLAG_KEYWORD = ATOM
220
+ FLAG_PERM = Regexp.union(FLAG, "\\*")
221
+ MBX_FLAG = FLAG_EXTENSION
222
+
223
+ # flag-list = "(" [flag *(SP flag)] ")"
224
+ # resp-text-code =/ "PERMANENTFLAGS" SP
225
+ # "(" [flag-perm *(SP flag-perm)] ")"
226
+ # mbx-list-flags = *(mbx-list-oflag SP) mbx-list-sflag
227
+ # *(SP mbx-list-oflag) /
228
+ # mbx-list-oflag *(SP mbx-list-oflag)
229
+ # (Not checking for max one mbx-list-sflag in the parser.)
230
+ FLAG_LIST = /\G\((#{FLAG }(?:#{SP}#{FLAG })*|)\)/ni
231
+ FLAG_PERM_LIST = /\G\((#{FLAG_PERM}(?:#{SP}#{FLAG_PERM})*|)\)/ni
232
+ MBX_LIST_FLAGS = /\G (#{MBX_FLAG }(?:#{SP}#{MBX_FLAG })*) /nix
233
+
234
+ # RFC3501:
235
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
236
+ # "\" quoted-specials
237
+ # RFC9051:
238
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
239
+ # "\" quoted-specials / UTF8-2 / UTF8-3 / UTF8-4
240
+ # RFC3501 & RFC9051:
241
+ # quoted = DQUOTE *QUOTED-CHAR DQUOTE
242
+ QUOTED_CHAR_safe = TEXT_CHAR - QUOTED_SPECIALS
243
+ QUOTED_CHAR_esc = /\\#{QUOTED_SPECIALS}/n
244
+ QUOTED_CHAR_rev1 = Regexp.union(QUOTED_CHAR_safe, QUOTED_CHAR_esc)
245
+ QUOTED_CHAR_rev2 = Regexp.union(QUOTED_CHAR_rev1,
246
+ UTF8_2, UTF8_3, UTF8_4)
247
+ QUOTED_rev1 = /"(#{QUOTED_CHAR_rev1}*)"/n
248
+ QUOTED_rev2 = /"(#{QUOTED_CHAR_rev2}*)"/n
249
+
250
+ # RFC3501:
251
+ # text = 1*TEXT-CHAR
252
+ # RFC9051:
253
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
254
+ # ; Non-ASCII text can only be returned
255
+ # ; after ENABLE IMAP4rev2 command
256
+ TEXT_rev1 = /#{TEXT_CHAR}+/
257
+ TEXT_rev2 = /#{Regexp.union TEXT_CHAR, UTF8_2, UTF8_3, UTF8_4}+/
258
+
259
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
260
+ TAGGED_LABEL_FCHAR = /[a-zA-Z\-_.]/n
261
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
262
+ TAGGED_LABEL_CHAR = /[a-zA-Z\-_.0-9:]*/n
263
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
264
+ # ; Is a valid RFC 3501 "atom".
265
+ TAGGED_EXT_LABEL = /#{TAGGED_LABEL_FCHAR}#{TAGGED_LABEL_CHAR}*/n
266
+
267
+ # nz-number = digit-nz *DIGIT
268
+ # ; Non-zero unsigned 32-bit integer
269
+ # ; (0 < n < 4,294,967,296)
270
+ NZ_NUMBER = /[1-9]\d*/n
271
+
272
+ # seq-number = nz-number / "*"
273
+ # ; message sequence number (COPY, FETCH, STORE
274
+ # ; commands) or unique identifier (UID COPY,
275
+ # ; UID FETCH, UID STORE commands).
276
+ # ; * represents the largest number in use. In
277
+ # ; the case of message sequence numbers, it is
278
+ # ; the number of messages in a non-empty mailbox.
279
+ # ; In the case of unique identifiers, it is the
280
+ # ; unique identifier of the last message in the
281
+ # ; mailbox or, if the mailbox is empty, the
282
+ # ; mailbox's current UIDNEXT value.
283
+ # ; The server should respond with a tagged BAD
284
+ # ; response to a command that uses a message
285
+ # ; sequence number greater than the number of
286
+ # ; messages in the selected mailbox. This
287
+ # ; includes "*" if the selected mailbox is empty.
288
+ SEQ_NUMBER = /#{NZ_NUMBER}|\*/n
289
+
290
+ # seq-range = seq-number ":" seq-number
291
+ # ; two seq-number values and all values between
292
+ # ; these two regardless of order.
293
+ # ; Example: 2:4 and 4:2 are equivalent and
294
+ # ; indicate values 2, 3, and 4.
295
+ # ; Example: a unique identifier sequence range of
296
+ # ; 3291:* includes the UID of the last message in
297
+ # ; the mailbox, even if that value is less than
298
+ # ; 3291.
299
+ SEQ_RANGE = /#{SEQ_NUMBER}:#{SEQ_NUMBER}/n
300
+
301
+ # sequence-set = (seq-number / seq-range) ["," sequence-set]
302
+ # ; set of seq-number values, regardless of order.
303
+ # ; Servers MAY coalesce overlaps and/or execute
304
+ # ; the sequence in any order.
305
+ # ; Example: a message sequence number set of
306
+ # ; 2,4:7,9,12:* for a mailbox with 15 messages is
307
+ # ; equivalent to 2,4,5,6,7,9,12,13,14,15
308
+ # ; Example: a message sequence number set of
309
+ # ; *:4,5:7 for a mailbox with 10 messages is
310
+ # ; equivalent to 10,9,8,7,6,5,4,5,6,7 and MAY
311
+ # ; be reordered and overlap coalesced to be
312
+ # ; 4,5,6,7,8,9,10.
313
+ SEQUENCE_SET_ITEM = /#{SEQ_NUMBER}|#{SEQ_RANGE}/n
314
+ SEQUENCE_SET = /#{SEQUENCE_SET_ITEM}(?:,#{SEQUENCE_SET_ITEM})*/n
315
+ SEQUENCE_SET_STR = /\A#{SEQUENCE_SET}\z/n
316
+
317
+ # RFC3501:
318
+ # literal = "{" number "}" CRLF *CHAR8
319
+ # ; Number represents the number of CHAR8s
320
+ # RFC9051:
321
+ # literal = "{" number64 ["+"] "}" CRLF *CHAR8
322
+ # ; <number64> represents the number of CHAR8s.
323
+ # ; A non-synchronizing literal is distinguished
324
+ # ; from a synchronizing literal by the presence of
325
+ # ; "+" before the closing "}".
326
+ # ; Non-synchronizing literals are not allowed when
327
+ # ; sent from server to the client.
328
+ LITERAL = /\{(\d+)\}\r\n/n
329
+
330
+ # RFC3516 (BINARY):
331
+ # literal8 = "~{" number "}" CRLF *OCTET
332
+ # ; <number> represents the number of OCTETs
333
+ # ; in the response string.
334
+ # RFC9051:
335
+ # literal8 = "~{" number64 "}" CRLF *OCTET
336
+ # ; <number64> represents the number of OCTETs
337
+ # ; in the response string.
338
+ LITERAL8 = /~\{(\d+)\}\r\n/n
339
+
340
+ module_function
341
+
342
+ def unescape_quoted!(quoted)
343
+ quoted
344
+ &.gsub!(/\\(#{QUOTED_SPECIALS})/n, "\\1")
345
+ &.force_encoding("UTF-8")
346
+ end
347
+
348
+ def unescape_quoted(quoted)
349
+ quoted
350
+ &.gsub(/\\(#{QUOTED_SPECIALS})/n, "\\1")
351
+ &.force_encoding("UTF-8")
352
+ end
353
+
354
+ end
355
+
356
+ # the default, used in most places
60
357
  BEG_REGEXP = /\G(?:\
61
- (?# 1: SPACE )( +)|\
62
- (?# 2: NIL )(NIL)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
63
- (?# 3: NUMBER )(\d+)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
64
- (?# 4: ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+]+)|\
65
- (?# 5: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
66
- (?# 6: LPAR )(\()|\
67
- (?# 7: RPAR )(\))|\
68
- (?# 8: BSLASH )(\\)|\
69
- (?# 9: STAR )(\*)|\
70
- (?# 10: LBRA )(\[)|\
71
- (?# 11: RBRA )(\])|\
72
- (?# 12: LITERAL )\{(\d+)\}\r\n|\
73
- (?# 13: PLUS )(\+)|\
74
- (?# 14: PERCENT )(%)|\
75
- (?# 15: CRLF )(\r\n)|\
76
- (?# 16: EOF )(\z))/ni
77
-
358
+ (?# 1: SPACE )( )|\
359
+ (?# 2: LITERAL8)#{Patterns::LITERAL8}|\
360
+ (?# 3: ATOM prefixed with a compatible subtype)\
361
+ ((?:\
362
+ (?# 4: NIL )(NIL)|\
363
+ (?# 5: NUMBER )(\d+)|\
364
+ (?# 6: PLUS )(\+))\
365
+ (?# 7: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
366
+ (?# This enables greedy alternation without lookahead, in linear time.)\
367
+ )|\
368
+ (?# Also need to check for ATOM without a subtype prefix.)\
369
+ (?# 8: ATOM )(#{Patterns::ATOMISH})|\
370
+ (?# 9: QUOTED )#{Patterns::QUOTED_rev2}|\
371
+ (?# 10: LPAR )(\()|\
372
+ (?# 11: RPAR )(\))|\
373
+ (?# 12: BSLASH )(\\)|\
374
+ (?# 13: STAR )(\*)|\
375
+ (?# 14: LBRA )(\[)|\
376
+ (?# 15: RBRA )(\])|\
377
+ (?# 16: LITERAL )#{Patterns::LITERAL}|\
378
+ (?# 17: PERCENT )(%)|\
379
+ (?# 18: CRLF )(\r\n)|\
380
+ (?# 19: EOF )(\z))/ni
381
+
382
+ # envelope, body(structure), namespaces
78
383
  DATA_REGEXP = /\G(?:\
79
384
  (?# 1: SPACE )( )|\
80
385
  (?# 2: NIL )(NIL)|\
81
386
  (?# 3: NUMBER )(\d+)|\
82
- (?# 4: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
83
- (?# 5: LITERAL )\{(\d+)\}\r\n|\
387
+ (?# 4: QUOTED )#{Patterns::QUOTED_rev2}|\
388
+ (?# 5: LITERAL )#{Patterns::LITERAL}|\
84
389
  (?# 6: LPAR )(\()|\
85
390
  (?# 7: RPAR )(\)))/ni
86
391
 
87
- TEXT_REGEXP = /\G(?:\
88
- (?# 1: TEXT )([^\x00\r\n]*))/ni
89
-
90
- RTEXT_REGEXP = /\G(?:\
91
- (?# 1: LBRA )(\[)|\
92
- (?# 2: TEXT )([^\x00\r\n]*))/ni
392
+ # text, after 'resp-text-code "]"'
393
+ TEXT_REGEXP = /\G(#{Patterns::TEXT_rev2})/n
93
394
 
94
- CTEXT_REGEXP = /\G(?:\
95
- (?# 1: TEXT )([^\x00\r\n\]]*))/ni
395
+ # resp-text-code, after 'atom SP'
396
+ CTEXT_REGEXP = /\G(#{Patterns::CODE_TEXT})/n
96
397
 
97
398
  Token = Struct.new(:symbol, :value)
98
399
 
99
- def response
100
- token = lookahead
101
- case token.symbol
102
- when T_PLUS
103
- result = continue_req
104
- when T_STAR
105
- result = response_untagged
106
- else
107
- result = response_tagged
108
- end
109
- while lookahead.symbol == T_SPACE
110
- # Ignore trailing space for Microsoft Exchange Server
111
- shift_token
112
- end
113
- match(T_CRLF)
114
- match(T_EOF)
115
- return result
116
- end
400
+ def_char_matchers :SP, " ", :T_SPACE
401
+ def_char_matchers :PLUS, "+", :T_PLUS
402
+ def_char_matchers :STAR, "*", :T_STAR
117
403
 
118
- def continue_req
119
- match(T_PLUS)
120
- token = lookahead
121
- if token.symbol == T_SPACE
122
- shift_token
123
- return ContinuationRequest.new(resp_text, @str)
404
+ def_char_matchers :lpar, "(", :T_LPAR
405
+ def_char_matchers :rpar, ")", :T_RPAR
406
+
407
+ def_char_matchers :lbra, "[", :T_LBRA
408
+ def_char_matchers :rbra, "]", :T_RBRA
409
+
410
+ # valid number ranges are not enforced by parser
411
+ # number = 1*DIGIT
412
+ # ; Unsigned 32-bit integer
413
+ # ; (0 <= n < 4,294,967,296)
414
+ def_token_matchers :number, T_NUMBER, coerce: Integer
415
+
416
+ def_token_matchers :quoted, T_QUOTED
417
+
418
+ # string = quoted / literal
419
+ def_token_matchers :string, T_QUOTED, T_LITERAL
420
+
421
+ # used by nstring8 = nstring / literal8
422
+ def_token_matchers :string8, T_QUOTED, T_LITERAL, T_LITERAL8
423
+
424
+ # use where string represents "LABEL" values
425
+ def_token_matchers :case_insensitive__string,
426
+ T_QUOTED, T_LITERAL,
427
+ send: :upcase
428
+
429
+ # n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
430
+ # NIL? returns nil when it does *not* match
431
+ def_token_matchers :NIL, T_NIL
432
+
433
+ # In addition to explicitly uses of +tagged-ext-label+, use this to match
434
+ # keywords when the grammar has not provided any extension syntax.
435
+ #
436
+ # Do *not* use this for labels where the grammar specifies extensions
437
+ # can be +atom+, even if all currently defined labels would match. For
438
+ # example response codes in +resp-text-code+.
439
+ #
440
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
441
+ # ; Is a valid RFC 3501 "atom".
442
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
443
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
444
+ #
445
+ # TODO: add to lexer and only match tagged-ext-label
446
+ def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase
447
+
448
+ def_token_matchers :CRLF, T_CRLF
449
+ def_token_matchers :EOF, T_EOF
450
+
451
+ # atom = 1*ATOM-CHAR
452
+ # ATOM-CHAR = <any CHAR except atom-specials>
453
+ ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
454
+
455
+ SEQUENCE_SET_TOKENS = [T_ATOM, T_NUMBER, T_STAR]
456
+
457
+ # sequence-set = (seq-number / seq-range) ["," sequence-set]
458
+ # sequence-set =/ seq-last-command
459
+ # ; Allow for "result of the last command"
460
+ # ; indicator.
461
+ # seq-last-command = "$"
462
+ #
463
+ # *note*: doesn't match seq-last-command
464
+ def sequence_set
465
+ str = combine_adjacent(*SEQUENCE_SET_TOKENS)
466
+ if Patterns::SEQUENCE_SET_STR.match?(str)
467
+ SequenceSet.new(str)
124
468
  else
125
- return ContinuationRequest.new(ResponseText.new(nil, ""), @str)
469
+ parse_error("unexpected atom %p, expected sequence-set", str)
126
470
  end
127
471
  end
128
472
 
129
- def response_untagged
130
- match(T_STAR)
131
- match(T_SPACE)
132
- token = lookahead
133
- if token.symbol == T_NUMBER
134
- return numeric_response
135
- elsif token.symbol == T_ATOM
136
- case token.value
137
- when /\A(?:OK|NO|BAD|BYE|PREAUTH)\z/ni
138
- return response_cond
139
- when /\A(?:FLAGS)\z/ni
140
- return flags_response
141
- when /\A(?:ID)\z/ni
142
- return id_response
143
- when /\A(?:LIST|LSUB|XLIST)\z/ni
144
- return list_response
145
- when /\A(?:NAMESPACE)\z/ni
146
- return namespace_response
147
- when /\A(?:QUOTA)\z/ni
148
- return getquota_response
149
- when /\A(?:QUOTAROOT)\z/ni
150
- return getquotaroot_response
151
- when /\A(?:ACL)\z/ni
152
- return getacl_response
153
- when /\A(?:SEARCH|SORT)\z/ni
154
- return search_response
155
- when /\A(?:THREAD)\z/ni
156
- return thread_response
157
- when /\A(?:STATUS)\z/ni
158
- return status_response
159
- when /\A(?:CAPABILITY)\z/ni
160
- return capability_response
161
- when /\A(?:NOOP)\z/ni
162
- return ignored_response
163
- else
164
- return text_response
165
- end
166
- else
167
- parse_error("unexpected token %s", token.symbol)
168
- end
473
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
474
+ # resp-specials = "]"
475
+ ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
476
+
477
+ ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze
478
+
479
+ # tag = 1*<any ASTRING-CHAR except "+">
480
+ TAG_TOKENS = (ASTRING_CHARS_TOKENS - [T_PLUS]).freeze
481
+
482
+ # TODO: handle atom, astring_chars, and tag entirely inside the lexer
483
+ def atom; combine_adjacent(*ATOM_TOKENS) end
484
+ def astring_chars; combine_adjacent(*ASTRING_CHARS_TOKENS) end
485
+ def tag; combine_adjacent(*TAG_TOKENS) end
486
+
487
+ # the #accept version of #atom
488
+ def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
489
+
490
+ # Returns <tt>atom.upcase</tt>
491
+ def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
492
+
493
+ # Returns <tt>atom?&.upcase</tt>
494
+ def case_insensitive__atom?
495
+ -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
169
496
  end
170
497
 
171
- def response_tagged
172
- tag = astring_chars
173
- match(T_SPACE)
174
- token = match(T_ATOM)
175
- name = token.value.upcase
176
- match(T_SPACE)
177
- return TaggedResponse.new(tag, name, resp_text, @str)
498
+ # astring = 1*ASTRING-CHAR / string
499
+ def astring
500
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
178
501
  end
179
502
 
180
- def response_cond
181
- token = match(T_ATOM)
182
- name = token.value.upcase
183
- match(T_SPACE)
184
- return UntaggedResponse.new(name, resp_text, @str)
503
+ def astring?
504
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
185
505
  end
186
506
 
187
- def numeric_response
188
- n = number
189
- match(T_SPACE)
190
- token = match(T_ATOM)
191
- name = token.value.upcase
192
- case name
193
- when "EXISTS", "RECENT", "EXPUNGE"
194
- return UntaggedResponse.new(name, n, @str)
195
- when "FETCH"
196
- shift_token
197
- match(T_SPACE)
198
- data = FetchData.new(n, msg_att(n))
199
- return UntaggedResponse.new(name, data, @str)
200
- end
507
+ # Use #label or #label_in to assert specific known labels
508
+ # (+tagged-ext-label+ only, not +atom+).
509
+ def label(word)
510
+ (val = tagged_ext_label) == word and return val
511
+ parse_error("unexpected atom %p, expected %p instead", val, word)
201
512
  end
202
513
 
203
- def msg_att(n)
204
- match(T_LPAR)
205
- attr = {}
206
- while true
207
- token = lookahead
208
- case token.symbol
209
- when T_RPAR
210
- shift_token
211
- break
212
- when T_SPACE
213
- shift_token
214
- next
215
- end
216
- case token.value
217
- when /\A(?:ENVELOPE)\z/ni
218
- name, val = envelope_data
219
- when /\A(?:FLAGS)\z/ni
220
- name, val = flags_data
221
- when /\A(?:INTERNALDATE)\z/ni
222
- name, val = internaldate_data
223
- when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
224
- name, val = rfc822_text
225
- when /\A(?:RFC822\.SIZE)\z/ni
226
- name, val = rfc822_size
227
- when /\A(?:BODY(?:STRUCTURE)?)\z/ni
228
- name, val = body_data
229
- when /\A(?:UID)\z/ni
230
- name, val = uid_data
231
- when /\A(?:MODSEQ)\z/ni
232
- name, val = modseq_data
233
- else
234
- parse_error("unknown attribute `%s' for {%d}", token.value, n)
235
- end
236
- attr[name] = val
237
- end
238
- return attr
514
+ # Use #label or #label_in to assert specific known labels
515
+ # (+tagged-ext-label+ only, not +atom+).
516
+ def label_in(*labels)
517
+ lbl = tagged_ext_label and labels.include?(lbl) and return lbl
518
+ parse_error("unexpected atom %p, expected one of %s instead",
519
+ lbl, labels.join(" or "))
239
520
  end
240
521
 
241
- def envelope_data
242
- token = match(T_ATOM)
243
- name = token.value.upcase
244
- match(T_SPACE)
245
- return name, envelope
522
+ # expects "OK" or "PREAUTH" and raises InvalidResponseError on failure
523
+ def resp_cond_auth__name
524
+ lbl = tagged_ext_label and AUTH_CONDS.include? lbl and return lbl
525
+ raise InvalidResponseError, "bad response type %p, expected %s" % [
526
+ lbl, AUTH_CONDS.join(" or ")
527
+ ]
246
528
  end
247
529
 
248
- def envelope
249
- @lex_state = EXPR_DATA
250
- token = lookahead
251
- if token.symbol == T_NIL
252
- shift_token
253
- result = nil
254
- else
255
- match(T_LPAR)
256
- date = nstring
257
- match(T_SPACE)
258
- subject = nstring
259
- match(T_SPACE)
260
- from = address_list
261
- match(T_SPACE)
262
- sender = address_list
263
- match(T_SPACE)
264
- reply_to = address_list
265
- match(T_SPACE)
266
- to = address_list
267
- match(T_SPACE)
268
- cc = address_list
269
- match(T_SPACE)
270
- bcc = address_list
271
- match(T_SPACE)
272
- in_reply_to = nstring
273
- match(T_SPACE)
274
- message_id = nstring
275
- match(T_RPAR)
276
- result = Envelope.new(date, subject, from, sender, reply_to,
277
- to, cc, bcc, in_reply_to, message_id)
278
- end
279
- @lex_state = EXPR_BEG
280
- return result
530
+ # expects "OK" or "NO" or "BAD" and raises InvalidResponseError on failure
531
+ def resp_cond_state__name
532
+ lbl = tagged_ext_label and RESP_COND_STATES.include? lbl and return lbl
533
+ raise InvalidResponseError, "bad response type %p, expected %s" % [
534
+ lbl, RESP_COND_STATES.join(" or ")
535
+ ]
281
536
  end
282
537
 
283
- def flags_data
284
- token = match(T_ATOM)
285
- name = token.value.upcase
286
- match(T_SPACE)
287
- return name, flag_list
538
+ # nstring = string / nil
539
+ def nstring
540
+ NIL? ? nil : string
288
541
  end
289
542
 
290
- def internaldate_data
291
- token = match(T_ATOM)
292
- name = token.value.upcase
293
- match(T_SPACE)
294
- token = match(T_QUOTED)
295
- return name, token.value
543
+ def nstring8
544
+ NIL? ? nil : string8
296
545
  end
297
546
 
298
- def rfc822_text
299
- token = match(T_ATOM)
300
- name = token.value.upcase
301
- token = lookahead
302
- if token.symbol == T_LBRA
303
- shift_token
304
- match(T_RBRA)
305
- end
306
- match(T_SPACE)
307
- return name, nstring
547
+ def nquoted
548
+ NIL? ? nil : quoted
308
549
  end
309
550
 
310
- def rfc822_size
311
- token = match(T_ATOM)
312
- name = token.value.upcase
313
- match(T_SPACE)
314
- return name, number
551
+ # use where nstring represents "LABEL" values
552
+ def case_insensitive__nstring
553
+ NIL? ? nil : case_insensitive__string
315
554
  end
316
555
 
317
- def body_data
318
- token = match(T_ATOM)
319
- name = token.value.upcase
320
- token = lookahead
321
- if token.symbol == T_SPACE
322
- shift_token
323
- return name, body
324
- end
325
- name.concat(section)
326
- token = lookahead
327
- if token.symbol == T_ATOM
328
- name.concat(token.value)
329
- shift_token
556
+ # tagged-ext-comp = astring /
557
+ # tagged-ext-comp *(SP tagged-ext-comp) /
558
+ # "(" tagged-ext-comp ")"
559
+ # ; Extensions that follow this general
560
+ # ; syntax should use nstring instead of
561
+ # ; astring when appropriate in the context
562
+ # ; of the extension.
563
+ # ; Note that a message set or a "number"
564
+ # ; can always be represented as an "atom".
565
+ # ; A URL should be represented as
566
+ # ; a "quoted" string.
567
+ def tagged_ext_comp
568
+ vals = []
569
+ while true
570
+ vals << case lookahead!(*ASTRING_TOKENS, T_LPAR).symbol
571
+ when T_LPAR then lpar; ary = tagged_ext_comp; rpar; ary
572
+ when T_NUMBER then number
573
+ else astring
574
+ end
575
+ SP? or break
330
576
  end
331
- match(T_SPACE)
332
- data = nstring
333
- return name, data
577
+ vals
334
578
  end
335
579
 
336
- def body
337
- @lex_state = EXPR_DATA
338
- token = lookahead
339
- if token.symbol == T_NIL
340
- shift_token
341
- result = nil
580
+ # tagged-ext-simple is a subset of atom
581
+ # TODO: recognize sequence-set in the lexer
582
+ #
583
+ # tagged-ext-simple = sequence-set / number / number64
584
+ def tagged_ext_simple
585
+ number? || sequence_set
586
+ end
587
+
588
+ # tagged-ext-val = tagged-ext-simple /
589
+ # "(" [tagged-ext-comp] ")"
590
+ def tagged_ext_val
591
+ if lpar?
592
+ _ = peek_rpar? ? [] : tagged_ext_comp
593
+ rpar
594
+ _
342
595
  else
343
- match(T_LPAR)
344
- token = lookahead
345
- if token.symbol == T_LPAR
346
- result = body_type_mpart
347
- else
348
- result = body_type_1part
596
+ tagged_ext_simple
597
+ end
598
+ end
599
+
600
+ # mailbox = "INBOX" / astring
601
+ # ; INBOX is case-insensitive. All case variants of
602
+ # ; INBOX (e.g., "iNbOx") MUST be interpreted as INBOX
603
+ # ; not as an astring. An astring which consists of
604
+ # ; the case-insensitive sequence "I" "N" "B" "O" "X"
605
+ # ; is considered to be INBOX and not an astring.
606
+ # ; Refer to section 5.1 for further
607
+ # ; semantic details of mailbox names.
608
+ alias mailbox astring
609
+
610
+ # valid number ranges are not enforced by parser
611
+ # number64 = 1*DIGIT
612
+ # ; Unsigned 63-bit integer
613
+ # ; (0 <= n <= 9,223,372,036,854,775,807)
614
+ alias number64 number
615
+ alias number64? number?
616
+
617
+ # valid number ranges are not enforced by parser
618
+ # nz-number = digit-nz *DIGIT
619
+ # ; Non-zero unsigned 32-bit integer
620
+ # ; (0 < n < 4,294,967,296)
621
+ alias nz_number number
622
+ alias nz_number? number?
623
+
624
+ # valid number ranges are not enforced by parser
625
+ # nz-number64 = digit-nz *DIGIT
626
+ # ; Unsigned 63-bit integer
627
+ # ; (0 < n <= 9,223,372,036,854,775,807)
628
+ alias nz_number64 nz_number
629
+
630
+ # valid number ranges are not enforced by parser
631
+ # uniqueid = nz-number
632
+ # ; Strictly ascending
633
+ alias uniqueid nz_number
634
+
635
+ # valid number ranges are not enforced by parser
636
+ #
637
+ # a 64-bit unsigned integer and is the decimal equivalent for the ID hex
638
+ # string used in the web interface and the Gmail API.
639
+ alias x_gm_id number
640
+
641
+ # [RFC3501 & RFC9051:]
642
+ # response = *(continue-req / response-data) response-done
643
+ #
644
+ # For simplicity, response isn't interpreted as the combination of the
645
+ # three response types, but instead represents any individual server
646
+ # response. Our simplified interpretation is defined as:
647
+ # response = continue-req | response_data | response-tagged
648
+ #
649
+ # n.b: our "response-tagged" definition parses "greeting" too.
650
+ def response
651
+ resp = case lookahead!(T_PLUS, T_STAR, *TAG_TOKENS).symbol
652
+ when T_PLUS then continue_req
653
+ when T_STAR then response_data
654
+ else response_tagged
655
+ end
656
+ accept_spaces # QUIRKY: Ignore trailing space (MS Exchange Server?)
657
+ CRLF!
658
+ EOF!
659
+ resp
660
+ end
661
+
662
+ # RFC3501 & RFC9051:
663
+ # continue-req = "+" SP (resp-text / base64) CRLF
664
+ #
665
+ # n.b: base64 is valid resp-text. And in the spirit of RFC9051 Appx E 23
666
+ # (and to workaround existing servers), we use the following grammar:
667
+ #
668
+ # continue-req = "+" (SP (resp-text)) CRLF
669
+ def continue_req
670
+ PLUS!
671
+ ContinuationRequest.new(SP? ? resp_text : ResponseText::EMPTY, @str)
672
+ end
673
+
674
+ RE_RESPONSE_TYPE = /\G(?:\d+ )?(?<type>#{Patterns::TAGGED_EXT_LABEL})/n
675
+
676
+ # [RFC3501:]
677
+ # response-data = "*" SP (resp-cond-state / resp-cond-bye /
678
+ # mailbox-data / message-data / capability-data) CRLF
679
+ # [RFC4466:]
680
+ # response-data = "*" SP response-payload CRLF
681
+ # response-payload = resp-cond-state / resp-cond-bye /
682
+ # mailbox-data / message-data / capability-data
683
+ # RFC5161 (ENABLE capability):
684
+ # response-data =/ "*" SP enable-data CRLF
685
+ # RFC5255 (LANGUAGE capability)
686
+ # response-payload =/ language-data
687
+ # RFC5255 (I18NLEVEL=1 and I18NLEVEL=2 capabilities)
688
+ # response-payload =/ comparator-data
689
+ # [RFC9051:]
690
+ # response-data = "*" SP (resp-cond-state / resp-cond-bye /
691
+ # mailbox-data / message-data / capability-data /
692
+ # enable-data) CRLF
693
+ #
694
+ # [merging in greeting and response-fatal:]
695
+ # greeting = "*" SP (resp-cond-auth / resp-cond-bye) CRLF
696
+ # response-fatal = "*" SP resp-cond-bye CRLF
697
+ # response-data =/ "*" SP (resp-cond-auth / resp-cond-bye) CRLF
698
+ # [removing duplicates, this is simply]
699
+ # response-payload =/ resp-cond-auth
700
+ #
701
+ # TODO: remove resp-cond-auth and handle greeting separately
702
+ def response_data
703
+ STAR!; SP!
704
+ m = peek_re(RE_RESPONSE_TYPE) or parse_error("unparsable response")
705
+ case m["type"].upcase
706
+ when "OK" then resp_cond_state__untagged # RFC3501, RFC9051
707
+ when "FETCH" then message_data__fetch # RFC3501, RFC9051
708
+ when "EXPUNGE" then message_data__expunge # RFC3501, RFC9051
709
+ when "EXISTS" then mailbox_data__exists # RFC3501, RFC9051
710
+ when "ESEARCH" then esearch_response # RFC4731, RFC9051, etc
711
+ when "VANISHED" then expunged_resp # RFC7162
712
+ when "UIDFETCH" then uidfetch_resp # (draft) UIDONLY
713
+ when "SEARCH" then mailbox_data__search # RFC3501 (obsolete)
714
+ when "CAPABILITY" then capability_data__untagged # RFC3501, RFC9051
715
+ when "FLAGS" then mailbox_data__flags # RFC3501, RFC9051
716
+ when "LIST" then mailbox_data__list # RFC3501, RFC9051
717
+ when "STATUS" then mailbox_data__status # RFC3501, RFC9051
718
+ when "NAMESPACE" then namespace_response # RFC2342, RFC9051
719
+ when "ENABLED" then enable_data # RFC5161, RFC9051
720
+ when "BAD" then resp_cond_state__untagged # RFC3501, RFC9051
721
+ when "NO" then resp_cond_state__untagged # RFC3501, RFC9051
722
+ when "PREAUTH" then resp_cond_auth # RFC3501, RFC9051
723
+ when "BYE" then resp_cond_bye # RFC3501, RFC9051
724
+ when "RECENT" then mailbox_data__recent # RFC3501 (obsolete)
725
+ when "SORT" then sort_data # RFC5256, RFC7162
726
+ when "THREAD" then thread_data # RFC5256
727
+ when "QUOTA" then quota_response # RFC2087, RFC9208
728
+ when "QUOTAROOT" then quotaroot_response # RFC2087, RFC9208
729
+ when "ID" then id_response # RFC2971
730
+ when "ACL" then acl_data # RFC4314
731
+ when "LISTRIGHTS" then listrights_data # RFC4314
732
+ when "MYRIGHTS" then myrights_data # RFC4314
733
+ when "METADATA" then metadata_resp # RFC5464
734
+ when "LANGUAGE" then language_data # RFC5255
735
+ when "COMPARATOR" then comparator_data # RFC5255
736
+ when "CONVERTED" then message_data__converted # RFC5259
737
+ when "LSUB" then mailbox_data__lsub # RFC3501 (obsolete)
738
+ when "XLIST" then mailbox_data__xlist # deprecated
739
+ when "NOOP" then response_data__noop
740
+ else response_data__unhandled
741
+ end
742
+ end
743
+
744
+ def response_data__unhandled(klass = UntaggedResponse)
745
+ num = number?; SP?
746
+ type = tagged_ext_label; SP?
747
+ text = remaining_unparsed
748
+ data =
749
+ if num && text then UnparsedNumericResponseData.new(num, text)
750
+ elsif text then UnparsedData.new(text)
751
+ else num
349
752
  end
350
- match(T_RPAR)
351
- end
352
- @lex_state = EXPR_BEG
353
- return result
753
+ klass.new(type, data, @str)
354
754
  end
355
755
 
356
- def body_type_1part
357
- token = lookahead
358
- case token.value
359
- when /\A(?:TEXT)\z/ni
360
- return body_type_text
361
- when /\A(?:MESSAGE)\z/ni
362
- return body_type_msg
363
- when /\A(?:ATTACHMENT)\z/ni
364
- return body_type_attachment
365
- when /\A(?:MIXED)\z/ni
366
- return body_type_mixed
367
- else
368
- return body_type_basic
369
- end
756
+ # reads all the way up until CRLF
757
+ def remaining_unparsed
758
+ str = @str[@pos...-2] and @pos += str.bytesize
759
+ str&.empty? ? nil : str
370
760
  end
371
761
 
372
- def body_type_basic
373
- mtype, msubtype = media_type
374
- token = lookahead
375
- if token.symbol == T_RPAR
376
- return BodyTypeBasic.new(mtype, msubtype)
377
- end
378
- match(T_SPACE)
379
- param, content_id, desc, enc, size = body_fields
380
- md5, disposition, language, extension = body_ext_1part
381
- return BodyTypeBasic.new(mtype, msubtype,
382
- param, content_id,
383
- desc, enc, size,
384
- md5, disposition, language, extension)
762
+ def response_data__ignored; response_data__unhandled(IgnoredResponse) end
763
+ alias response_data__noop response_data__ignored
764
+
765
+ alias esearch_response response_data__unhandled
766
+ alias expunged_resp response_data__unhandled
767
+ alias uidfetch_resp response_data__unhandled
768
+ alias listrights_data response_data__unhandled
769
+ alias myrights_data response_data__unhandled
770
+ alias metadata_resp response_data__unhandled
771
+ alias language_data response_data__unhandled
772
+ alias comparator_data response_data__unhandled
773
+ alias message_data__converted response_data__unhandled
774
+
775
+ # RFC3501 & RFC9051:
776
+ # response-tagged = tag SP resp-cond-state CRLF
777
+ def response_tagged
778
+ TaggedResponse.new(tag, *(SP!; resp_cond_state), @str)
385
779
  end
386
780
 
387
- def body_type_text
388
- mtype, msubtype = media_type
389
- match(T_SPACE)
390
- param, content_id, desc, enc, size = body_fields
391
- match(T_SPACE)
392
- lines = number
393
- md5, disposition, language, extension = body_ext_1part
394
- return BodyTypeText.new(mtype, msubtype,
395
- param, content_id,
396
- desc, enc, size,
397
- lines,
398
- md5, disposition, language, extension)
781
+ # RFC3501 & RFC9051:
782
+ # resp-cond-state = ("OK" / "NO" / "BAD") SP resp-text
783
+ #
784
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
785
+ # servers), we don't require a final SP and instead parse this as:
786
+ #
787
+ # resp-cond-state = ("OK" / "NO" / "BAD") [SP resp-text]
788
+ def resp_cond_state
789
+ [resp_cond_state__name, SP? ? resp_text : ResponseText::EMPTY]
399
790
  end
400
791
 
401
- def body_type_msg
402
- mtype, msubtype = media_type
403
- match(T_SPACE)
404
- param, content_id, desc, enc, size = body_fields
792
+ def resp_cond_state__untagged
793
+ UntaggedResponse.new(*resp_cond_state, @str)
794
+ end
405
795
 
406
- token = lookahead
407
- if token.symbol == T_RPAR
408
- # If this is not message/rfc822, we shouldn't apply the RFC822
409
- # spec to it. We should handle anything other than
410
- # message/rfc822 using multipart extension data [rfc3501] (i.e.
411
- # the data itself won't be returned, we would have to retrieve it
412
- # with BODYSTRUCTURE instead of with BODY
413
-
414
- # Also, sometimes a message/rfc822 is included as a large
415
- # attachment instead of having all of the other details
416
- # (e.g. attaching a .eml file to an email)
417
- if msubtype == "RFC822"
418
- return BodyTypeMessage.new(mtype, msubtype, param, content_id,
419
- desc, enc, size, nil, nil, nil, nil,
420
- nil, nil, nil)
421
- else
422
- return BodyTypeExtension.new(mtype, msubtype,
423
- param, content_id,
424
- desc, enc, size)
425
- end
426
- end
796
+ # resp-cond-auth = ("OK" / "PREAUTH") SP resp-text
797
+ #
798
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
799
+ # servers), we don't require a final SP and instead parse this as:
800
+ #
801
+ # resp-cond-auth = ("OK" / "PREAUTH") [SP resp-text]
802
+ def resp_cond_auth
803
+ UntaggedResponse.new(resp_cond_auth__name,
804
+ SP? ? resp_text : ResponseText::EMPTY,
805
+ @str)
806
+ end
427
807
 
428
- match(T_SPACE)
429
- env = envelope
430
- match(T_SPACE)
431
- b = body
432
- match(T_SPACE)
433
- lines = number
434
- md5, disposition, language, extension = body_ext_1part
435
- return BodyTypeMessage.new(mtype, msubtype,
436
- param, content_id,
437
- desc, enc, size,
438
- env, b, lines,
439
- md5, disposition, language, extension)
808
+ # resp-cond-bye = "BYE" SP resp-text
809
+ #
810
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
811
+ # servers), we don't require a final SP and instead parse this as:
812
+ #
813
+ # resp-cond-bye = "BYE" [SP resp-text]
814
+ def resp_cond_bye
815
+ UntaggedResponse.new(label(BYE),
816
+ SP? ? resp_text : ResponseText::EMPTY,
817
+ @str)
440
818
  end
441
819
 
442
- def body_type_attachment
443
- mtype = case_insensitive_string
444
- match(T_SPACE)
445
- param = body_fld_param
446
- return BodyTypeAttachment.new(mtype, nil, param)
820
+ # message-data = nz-number SP ("EXPUNGE" / ("FETCH" SP msg-att))
821
+ def message_data__fetch
822
+ seq = nz_number; SP!
823
+ name = label "FETCH"; SP!
824
+ data = FetchData.new(seq, msg_att(seq))
825
+ UntaggedResponse.new(name, data, @str)
447
826
  end
448
827
 
449
- def body_type_mixed
450
- mtype = "MULTIPART"
451
- msubtype = case_insensitive_string
452
- param, disposition, language, extension = body_ext_mpart
453
- return BodyTypeBasic.new(mtype, msubtype, param, nil, nil, nil, nil, nil, disposition, language, extension)
828
+ def response_data__simple_numeric
829
+ data = nz_number; SP!
830
+ name = tagged_ext_label
831
+ UntaggedResponse.new(name, data, @str)
454
832
  end
455
833
 
456
- def body_type_mpart
457
- parts = []
834
+ alias message_data__expunge response_data__simple_numeric
835
+ alias mailbox_data__exists response_data__simple_numeric
836
+ alias mailbox_data__recent response_data__simple_numeric
837
+
838
+ # RFC3501 & RFC9051:
839
+ # msg-att = "(" (msg-att-dynamic / msg-att-static)
840
+ # *(SP (msg-att-dynamic / msg-att-static)) ")"
841
+ #
842
+ # msg-att-dynamic = "FLAGS" SP "(" [flag-fetch *(SP flag-fetch)] ")"
843
+ # RFC5257 (ANNOTATE extension):
844
+ # msg-att-dynamic =/ "ANNOTATION" SP
845
+ # ( "(" entry-att *(SP entry-att) ")" /
846
+ # "(" entry *(SP entry) ")" )
847
+ # RFC7162 (CONDSTORE extension):
848
+ # msg-att-dynamic =/ fetch-mod-resp
849
+ # fetch-mod-resp = "MODSEQ" SP "(" permsg-modsequence ")"
850
+ # RFC8970 (PREVIEW extension):
851
+ # msg-att-dynamic =/ "PREVIEW" SP nstring
852
+ #
853
+ # RFC3501:
854
+ # msg-att-static = "ENVELOPE" SP envelope /
855
+ # "INTERNALDATE" SP date-time /
856
+ # "RFC822" [".HEADER" / ".TEXT"] SP nstring /
857
+ # "RFC822.SIZE" SP number /
858
+ # "BODY" ["STRUCTURE"] SP body /
859
+ # "BODY" section ["<" number ">"] SP nstring /
860
+ # "UID" SP uniqueid
861
+ # RFC3516 (BINARY extension):
862
+ # msg-att-static =/ "BINARY" section-binary SP (nstring / literal8)
863
+ # / "BINARY.SIZE" section-binary SP number
864
+ # RFC8514 (SAVEDATE extension):
865
+ # msg-att-static =/ "SAVEDATE" SP (date-time / nil)
866
+ # RFC8474 (OBJECTID extension):
867
+ # msg-att-static =/ fetch-emailid-resp / fetch-threadid-resp
868
+ # fetch-emailid-resp = "EMAILID" SP "(" objectid ")"
869
+ # fetch-threadid-resp = "THREADID" SP ( "(" objectid ")" / nil )
870
+ # RFC9051:
871
+ # msg-att-static = "ENVELOPE" SP envelope /
872
+ # "INTERNALDATE" SP date-time /
873
+ # "RFC822.SIZE" SP number64 /
874
+ # "BODY" ["STRUCTURE"] SP body /
875
+ # "BODY" section ["<" number ">"] SP nstring /
876
+ # "BINARY" section-binary SP (nstring / literal8) /
877
+ # "BINARY.SIZE" section-binary SP number /
878
+ # "UID" SP uniqueid
879
+ #
880
+ # Re https://www.rfc-editor.org/errata/eid7246, I'm adding "offset" to the
881
+ # official "BINARY" ABNF, like so:
882
+ #
883
+ # msg-att-static =/ "BINARY" section-binary ["<" number ">"] SP
884
+ # (nstring / literal8)
885
+ def msg_att(n)
886
+ lpar
887
+ attr = {}
458
888
  while true
459
- token = lookahead
460
- if token.symbol == T_SPACE
461
- shift_token
462
- break
463
- end
464
- parts.push(body)
889
+ name = msg_att__label; SP!
890
+ val =
891
+ case name
892
+ when "UID" then uniqueid
893
+ when "FLAGS" then flag_list
894
+ when "BODY" then body
895
+ when /\ABODY\[/ni then nstring
896
+ when "BODYSTRUCTURE" then body
897
+ when "ENVELOPE" then envelope
898
+ when "INTERNALDATE" then date_time
899
+ when "RFC822.SIZE" then number64
900
+ when /\ABINARY\[/ni then nstring8 # BINARY, IMAP4rev2
901
+ when /\ABINARY\.SIZE\[/ni then number # BINARY, IMAP4rev2
902
+ when "RFC822" then nstring # not in rev2
903
+ when "RFC822.HEADER" then nstring # not in rev2
904
+ when "RFC822.TEXT" then nstring # not in rev2
905
+ when "MODSEQ" then parens__modseq # CONDSTORE
906
+ when "EMAILID" then parens__objectid # OBJECTID
907
+ when "THREADID" then nparens__objectid # OBJECTID
908
+ when "X-GM-MSGID" then x_gm_id # GMail
909
+ when "X-GM-THRID" then x_gm_id # GMail
910
+ when "X-GM-LABELS" then x_gm_labels # GMail
911
+ else parse_error("unknown attribute `%s' for {%d}", name, n)
912
+ end
913
+ attr[name] = val
914
+ break unless SP?
915
+ break if lookahead_rpar?
465
916
  end
466
- mtype = "MULTIPART"
467
- msubtype = case_insensitive_string
468
- param, disposition, language, extension = body_ext_mpart
469
- return BodyTypeMultipart.new(mtype, msubtype, parts,
470
- param, disposition, language,
471
- extension)
917
+ rpar
918
+ attr
472
919
  end
473
920
 
474
- def media_type
475
- mtype = case_insensitive_string
476
- token = lookahead
477
- if token.symbol != T_SPACE
478
- return mtype, nil
921
+ # appends "[section]" and "<partial>" to the base label
922
+ def msg_att__label
923
+ case (name = tagged_ext_label)
924
+ when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
925
+ # ignoring "[]" fixes https://bugs.ruby-lang.org/issues/5620
926
+ lbra? and rbra
927
+ when "BODY"
928
+ peek_lbra? and name << section and
929
+ peek_str?("<") and name << gt__number__lt # partial
930
+ when "BINARY", "BINARY.SIZE"
931
+ name << section_binary
932
+ # see https://www.rfc-editor.org/errata/eid7246 and the note above
933
+ peek_str?("<") and name << gt__number__lt # partial
479
934
  end
480
- match(T_SPACE)
481
- msubtype = case_insensitive_string
482
- return mtype, msubtype
935
+ name
483
936
  end
484
937
 
485
- def body_fields
486
- param = body_fld_param
487
- match(T_SPACE)
488
- content_id = nstring
489
- match(T_SPACE)
490
- desc = nstring
491
- match(T_SPACE)
492
- enc = case_insensitive_string
493
- match(T_SPACE)
494
- size = number
495
- return param, content_id, desc, enc, size
496
- end
938
+ # this represents the partial size for BODY or BINARY
939
+ alias gt__number__lt atom
497
940
 
498
- def body_fld_param
499
- token = lookahead
500
- if token.symbol == T_NIL
501
- shift_token
502
- return nil
503
- end
504
- match(T_LPAR)
505
- param = {}
506
- while true
507
- token = lookahead
508
- case token.symbol
509
- when T_RPAR
510
- shift_token
511
- break
512
- when T_SPACE
513
- shift_token
514
- end
515
- name = case_insensitive_string
516
- match(T_SPACE)
517
- val = string
518
- param[name] = val
519
- end
520
- return param
941
+ # RFC3501 & RFC9051:
942
+ # envelope = "(" env-date SP env-subject SP env-from SP
943
+ # env-sender SP env-reply-to SP env-to SP env-cc SP
944
+ # env-bcc SP env-in-reply-to SP env-message-id ")"
945
+ def envelope
946
+ @lex_state = EXPR_DATA
947
+ lpar; date = env_date
948
+ SP!; subject = env_subject
949
+ SP!; from = env_from
950
+ SP!; sender = env_sender
951
+ SP!; reply_to = env_reply_to
952
+ SP!; to = env_to
953
+ SP!; cc = env_cc
954
+ SP!; bcc = env_bcc
955
+ SP!; in_reply_to = env_in_reply_to
956
+ SP!; message_id = env_message_id
957
+ rpar
958
+ Envelope.new(date, subject, from, sender, reply_to,
959
+ to, cc, bcc, in_reply_to, message_id)
960
+ ensure
961
+ @lex_state = EXPR_BEG
521
962
  end
522
963
 
523
- def body_ext_1part
524
- token = lookahead
525
- if token.symbol == T_SPACE
526
- shift_token
527
- else
528
- return nil
529
- end
530
- md5 = nstring
964
+ # env-date = nstring
965
+ # env-subject = nstring
966
+ # env-in-reply-to = nstring
967
+ # env-message-id = nstring
968
+ alias env_date nstring
969
+ alias env_subject nstring
970
+ alias env_in_reply_to nstring
971
+ alias env_message_id nstring
972
+
973
+ # env-from = "(" 1*address ")" / nil
974
+ # env-sender = "(" 1*address ")" / nil
975
+ # env-reply-to = "(" 1*address ")" / nil
976
+ # env-to = "(" 1*address ")" / nil
977
+ # env-cc = "(" 1*address ")" / nil
978
+ # env-bcc = "(" 1*address ")" / nil
979
+ def nlist__address
980
+ return if NIL?
981
+ lpar; list = [address]; list << address until (quirky_SP?; rpar?)
982
+ list
983
+ end
984
+
985
+ alias env_from nlist__address
986
+ alias env_sender nlist__address
987
+ alias env_reply_to nlist__address
988
+ alias env_to nlist__address
989
+ alias env_cc nlist__address
990
+ alias env_bcc nlist__address
991
+
992
+ # Used when servers erroneously send an extra SP.
993
+ #
994
+ # As of 2023-11-28, Outlook.com (still) sends SP
995
+ # between +address+ in <tt>env-*</tt> lists.
996
+ alias quirky_SP? SP?
531
997
 
532
- token = lookahead
533
- if token.symbol == T_SPACE
534
- shift_token
535
- else
536
- return md5
537
- end
538
- disposition = body_fld_dsp
998
+ # date-time = DQUOTE date-day-fixed "-" date-month "-" date-year
999
+ # SP time SP zone DQUOTE
1000
+ alias date_time quoted
1001
+ alias ndatetime nquoted
539
1002
 
540
- token = lookahead
541
- if token.symbol == T_SPACE
542
- shift_token
543
- else
544
- return md5, disposition
545
- end
546
- language = body_fld_lang
1003
+ # RFC-3501 & RFC-9051:
1004
+ # body = "(" (body-type-1part / body-type-mpart) ")"
1005
+ def body
1006
+ @lex_state = EXPR_DATA
1007
+ lpar; result = peek_lpar? ? body_type_mpart : body_type_1part; rpar
1008
+ result
1009
+ ensure
1010
+ @lex_state = EXPR_BEG
1011
+ end
1012
+ alias lookahead_body? lookahead_lpar?
547
1013
 
548
- token = lookahead
549
- if token.symbol == T_SPACE
550
- shift_token
551
- else
552
- return md5, disposition, language
553
- end
1014
+ # RFC-3501 & RFC9051:
1015
+ # body-type-1part = (body-type-basic / body-type-msg / body-type-text)
1016
+ # [SP body-ext-1part]
1017
+ def body_type_1part
1018
+ # This regexp peek is a performance optimization.
1019
+ # The lookahead fallback would work fine too.
1020
+ m = peek_re(/\G(?:
1021
+ (?<TEXT> "TEXT" \s "[^"]+" )
1022
+ |(?<MESSAGE> "MESSAGE" \s "(?:RFC822|GLOBAL)" )
1023
+ |(?<BASIC> "[^"]+" \s "[^"]+" )
1024
+ |(?<MIXED> "MIXED" )
1025
+ )/nix)
1026
+ choice = m&.named_captures&.compact&.keys&.first
1027
+ # In practice, the following line should never be used. But the ABNF
1028
+ # *does* allow literals, and this will handle them.
1029
+ choice ||= lookahead_case_insensitive__string!
1030
+ case choice
1031
+ when "BASIC" then body_type_basic # => BodyTypeBasic
1032
+ when "MESSAGE" then body_type_msg # => BodyTypeMessage | BodyTypeBasic
1033
+ when "TEXT" then body_type_text # => BodyTypeText
1034
+ when "MIXED" then body_type_mixed # => BodyTypeMultipart (server bug)
1035
+ else body_type_basic # might be a bug; server's or ours?
1036
+ end
1037
+ end
1038
+
1039
+ # RFC-3501 & RFC9051:
1040
+ # body-type-basic = media-basic SP body-fields
1041
+ def body_type_basic
1042
+ type = media_basic # n.b. "basic" type isn't enforced here
1043
+ if lookahead_rpar? then return BodyTypeBasic.new(*type) end # invalid
1044
+ SP!; flds = body_fields
1045
+ SP? and exts = body_ext_1part
1046
+ BodyTypeBasic.new(*type, *flds, *exts)
1047
+ end
554
1048
 
555
- extension = body_extensions
556
- return md5, disposition, language, extension
1049
+ # RFC-3501 & RFC-9051:
1050
+ # body-type-text = media-text SP body-fields SP body-fld-lines
1051
+ def body_type_text
1052
+ type = media_text
1053
+ SP!; flds = body_fields
1054
+ SP!; lines = body_fld_lines
1055
+ SP? and exts = body_ext_1part
1056
+ BodyTypeText.new(*type, *flds, lines, *exts)
557
1057
  end
558
1058
 
559
- def body_ext_mpart
560
- token = lookahead
561
- if token.symbol == T_SPACE
562
- shift_token
563
- else
564
- return nil
565
- end
566
- param = body_fld_param
1059
+ # RFC-3501 & RFC-9051:
1060
+ # body-type-msg = media-message SP body-fields SP envelope
1061
+ # SP body SP body-fld-lines
1062
+ def body_type_msg
1063
+ # n.b. "message/rfc822" type isn't enforced here
1064
+ type = media_message
1065
+ SP!; flds = body_fields
1066
+
1067
+ # Sometimes servers send body-type-basic when body-type-msg should be.
1068
+ # E.g: when a message/rfc822 part has "Content-Disposition: attachment".
1069
+ #
1070
+ # * SP "(" --> SP envelope --> continue as body-type-msg
1071
+ # * ")" --> no body-ext-1part --> completed body-type-basic
1072
+ # * SP nstring --> SP body-fld-md5
1073
+ # --> SP body-ext-1part --> continue as body-type-basic
1074
+ #
1075
+ # It's probably better to return BodyTypeBasic---even for
1076
+ # "message/rfc822"---than BodyTypeMessage with invalid fields.
1077
+ unless peek_str?(" (")
1078
+ SP? and exts = body_ext_1part
1079
+ return BodyTypeBasic.new(*type, *flds, *exts)
1080
+ end
1081
+
1082
+ SP!; env = envelope
1083
+ SP!; bdy = body
1084
+ SP!; lines = body_fld_lines
1085
+ SP? and exts = body_ext_1part
1086
+ BodyTypeMessage.new(*type, *flds, env, bdy, lines, *exts)
1087
+ end
1088
+
1089
+ # This is a malformed body-type-mpart with no subparts.
1090
+ def body_type_mixed
1091
+ # warn "malformed body-type-mpart: multipart/mixed with no parts."
1092
+ type = media_subtype # => "MIXED"
1093
+ SP? and exts = body_ext_mpart
1094
+ BodyTypeMultipart.new("MULTIPART", type, nil, *exts)
1095
+ end
567
1096
 
568
- token = lookahead
569
- if token.symbol == T_SPACE
570
- shift_token
571
- else
572
- return param
573
- end
574
- disposition = body_fld_dsp
1097
+ # RFC-3501 & RFC-9051:
1098
+ # body-type-mpart = 1*body SP media-subtype
1099
+ # [SP body-ext-mpart]
1100
+ def body_type_mpart
1101
+ parts = [body]; parts << body until SP?; msubtype = media_subtype
1102
+ SP? and exts = body_ext_mpart
1103
+ BodyTypeMultipart.new("MULTIPART", msubtype, parts, *exts)
1104
+ end
575
1105
 
576
- token = lookahead
577
- if token.symbol == T_SPACE
578
- shift_token
579
- else
580
- return param, disposition
581
- end
582
- language = body_fld_lang
1106
+ # n.b. this handles both type and subtype
1107
+ #
1108
+ # RFC-3501 vs RFC-9051:
1109
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
1110
+ # "MESSAGE" /
1111
+ # "VIDEO") DQUOTE) / string) SP media-subtype
1112
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
1113
+ # "FONT" / "MESSAGE" / "MODEL" /
1114
+ # "VIDEO") DQUOTE) / string) SP media-subtype
1115
+ #
1116
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
1117
+ # DQUOTE "RFC822" DQUOTE
1118
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
1119
+ # DQUOTE ("RFC822" / "GLOBAL") DQUOTE
1120
+ #
1121
+ # RFC-3501 & RFC-9051:
1122
+ # media-text = DQUOTE "TEXT" DQUOTE SP media-subtype
1123
+ # media-subtype = string
1124
+ def media_type
1125
+ mtype = case_insensitive__string
1126
+ SP? or return mtype, nil # ??? quirky!
1127
+ msubtype = media_subtype
1128
+ return mtype, msubtype
1129
+ end
583
1130
 
584
- token = lookahead
585
- if token.symbol == T_SPACE
586
- shift_token
587
- else
588
- return param, disposition, language
589
- end
1131
+ # TODO: check types
1132
+ alias media_basic media_type # */* --- catchall
1133
+ alias media_message media_type # message/rfc822, message/global
1134
+ alias media_text media_type # text/*
1135
+
1136
+ alias media_subtype case_insensitive__string
590
1137
 
591
- extension = body_extensions
592
- return param, disposition, language, extension
1138
+ # RFC-3501 & RFC-9051:
1139
+ # body-fields = body-fld-param SP body-fld-id SP body-fld-desc SP
1140
+ # body-fld-enc SP body-fld-octets
1141
+ def body_fields
1142
+ fields = []
1143
+ fields << body_fld_param; SP!
1144
+ fields << body_fld_id; SP!
1145
+ fields << body_fld_desc; SP!
1146
+ fields << body_fld_enc; SP!
1147
+ fields << body_fld_octets
1148
+ fields
593
1149
  end
594
1150
 
1151
+ # RFC3501, RFC9051:
1152
+ # body-fld-param = "(" string SP string *(SP string SP string) ")" / nil
1153
+ def body_fld_param
1154
+ return if NIL?
1155
+ param = {}
1156
+ lpar
1157
+ name = case_insensitive__string; SP!; param[name] = string
1158
+ while SP?
1159
+ name = case_insensitive__string; SP!; param[name] = string
1160
+ end
1161
+ rpar
1162
+ param
1163
+ end
1164
+
1165
+ # RFC2060
1166
+ # body_ext_1part ::= body_fld_md5 [SPACE body_fld_dsp
1167
+ # [SPACE body_fld_lang
1168
+ # [SPACE 1#body_extension]]]
1169
+ # ;; MUST NOT be returned on non-extensible
1170
+ # ;; "BODY" fetch
1171
+ # RFC3501 & RFC9051
1172
+ # body-ext-1part = body-fld-md5 [SP body-fld-dsp [SP body-fld-lang
1173
+ # [SP body-fld-loc *(SP body-extension)]]]
1174
+ # ; MUST NOT be returned on non-extensible
1175
+ # ; "BODY" fetch
1176
+ def body_ext_1part
1177
+ fields = []; fields << body_fld_md5
1178
+ SP? or return fields; fields << body_fld_dsp
1179
+ SP? or return fields; fields << body_fld_lang
1180
+ SP? or return fields; fields << body_fld_loc
1181
+ SP? or return fields; fields << body_extensions
1182
+ fields
1183
+ end
1184
+
1185
+ # RFC-2060:
1186
+ # body_ext_mpart = body_fld_param [SP body_fld_dsp SP body_fld_lang
1187
+ # [SP 1#body_extension]]
1188
+ # ;; MUST NOT be returned on non-extensible
1189
+ # ;; "BODY" fetch
1190
+ # RFC-3501 & RFC-9051:
1191
+ # body-ext-mpart = body-fld-param [SP body-fld-dsp [SP body-fld-lang
1192
+ # [SP body-fld-loc *(SP body-extension)]]]
1193
+ # ; MUST NOT be returned on non-extensible
1194
+ # ; "BODY" fetch
1195
+ def body_ext_mpart
1196
+ fields = []; fields << body_fld_param
1197
+ SP? or return fields; fields << body_fld_dsp
1198
+ SP? or return fields; fields << body_fld_lang
1199
+ SP? or return fields; fields << body_fld_loc
1200
+ SP? or return fields; fields << body_extensions
1201
+ fields
1202
+ end
1203
+
1204
+ alias body_fld_desc nstring
1205
+ alias body_fld_id nstring
1206
+ alias body_fld_loc nstring
1207
+ alias body_fld_lines number64 # number in 3501, number64 in 9051
1208
+ alias body_fld_md5 nstring
1209
+ alias body_fld_octets number
1210
+
1211
+ # RFC-3501 & RFC-9051:
1212
+ # body-fld-enc = (DQUOTE ("7BIT" / "8BIT" / "BINARY" / "BASE64"/
1213
+ # "QUOTED-PRINTABLE") DQUOTE) / string
1214
+ alias body_fld_enc case_insensitive__string
1215
+
1216
+ # body-fld-dsp = "(" string SP body-fld-param ")" / nil
595
1217
  def body_fld_dsp
596
- token = lookahead
597
- if token.symbol == T_NIL
598
- shift_token
599
- return nil
600
- end
601
- match(T_LPAR)
602
- dsp_type = case_insensitive_string
603
- match(T_SPACE)
604
- param = body_fld_param
605
- match(T_RPAR)
606
- return ContentDisposition.new(dsp_type, param)
1218
+ return if NIL?
1219
+ lpar; dsp_type = case_insensitive__string
1220
+ SP!; param = body_fld_param
1221
+ rpar
1222
+ ContentDisposition.new(dsp_type, param)
607
1223
  end
608
1224
 
1225
+ # body-fld-lang = nstring / "(" string *(SP string) ")"
609
1226
  def body_fld_lang
610
- token = lookahead
611
- if token.symbol == T_LPAR
612
- shift_token
613
- result = []
614
- while true
615
- token = lookahead
616
- case token.symbol
617
- when T_RPAR
618
- shift_token
619
- return result
620
- when T_SPACE
621
- shift_token
622
- end
623
- result.push(case_insensitive_string)
624
- end
1227
+ if lpar?
1228
+ result = [case_insensitive__string]
1229
+ result << case_insensitive__string while SP?
1230
+ rpar
1231
+ result
625
1232
  else
626
- lang = nstring
627
- if lang
628
- return lang.upcase
629
- else
630
- return lang
631
- end
1233
+ case_insensitive__nstring
632
1234
  end
633
1235
  end
634
1236
 
1237
+ # body-extension *(SP body-extension)
635
1238
  def body_extensions
636
1239
  result = []
637
- while true
638
- token = lookahead
639
- case token.symbol
640
- when T_RPAR
641
- return result
642
- when T_SPACE
643
- shift_token
644
- end
645
- result.push(body_extension)
646
- end
1240
+ result << body_extension; while SP? do result << body_extension end
1241
+ result
647
1242
  end
648
1243
 
1244
+ # body-extension = nstring / number / number64 /
1245
+ # "(" body-extension *(SP body-extension) ")"
1246
+ # ; Future expansion. Client implementations
1247
+ # ; MUST accept body-extension fields. Server
1248
+ # ; implementations MUST NOT generate
1249
+ # ; body-extension fields except as defined by
1250
+ # ; future Standard or Standards Track
1251
+ # ; revisions of this specification.
649
1252
  def body_extension
650
- token = lookahead
651
- case token.symbol
652
- when T_LPAR
653
- shift_token
654
- result = body_extensions
655
- match(T_RPAR)
656
- return result
657
- when T_NUMBER
658
- return number
659
- else
660
- return nstring
1253
+ if (uint = number64?) then uint
1254
+ elsif lpar? then exts = body_extensions; rpar; exts
1255
+ else nstring
661
1256
  end
662
1257
  end
663
1258
 
1259
+ # section = "[" [section-spec] "]"
664
1260
  def section
665
- str = String.new
666
- token = match(T_LBRA)
667
- str.concat(token.value)
668
- token = match(T_ATOM, T_NUMBER, T_RBRA)
669
- if token.symbol == T_RBRA
670
- str.concat(token.value)
671
- return str
672
- end
673
- str.concat(token.value)
674
- token = lookahead
675
- if token.symbol == T_SPACE
676
- shift_token
677
- str.concat(token.value)
678
- token = match(T_LPAR)
679
- str.concat(token.value)
680
- while true
681
- token = lookahead
682
- case token.symbol
683
- when T_RPAR
684
- str.concat(token.value)
685
- shift_token
686
- break
687
- when T_SPACE
688
- shift_token
689
- str.concat(token.value)
690
- end
691
- str.concat(format_string(astring))
692
- end
693
- end
694
- token = match(T_RBRA)
695
- str.concat(token.value)
696
- return str
697
- end
698
-
699
- def format_string(str)
700
- case str
701
- when ""
702
- return '""'
703
- when /[\x80-\xff\r\n]/n
704
- # literal
705
- return "{" + str.bytesize.to_s + "}" + CRLF + str
706
- when /[(){ \x00-\x1f\x7f%*"\\]/n
707
- # quoted string
708
- return '"' + str.gsub(/["\\]/n, "\\\\\\&") + '"'
709
- else
710
- # atom
711
- return str
712
- end
713
- end
714
-
715
- def uid_data
716
- token = match(T_ATOM)
717
- name = token.value.upcase
718
- match(T_SPACE)
719
- return name, number
720
- end
721
-
722
- def modseq_data
723
- token = match(T_ATOM)
724
- name = token.value.upcase
725
- match(T_SPACE)
726
- match(T_LPAR)
727
- modseq = number
728
- match(T_RPAR)
729
- return name, modseq
730
- end
731
-
732
- def ignored_response
733
- while lookahead.symbol != T_CRLF
734
- shift_token
735
- end
736
- return IgnoredResponse.new(@str)
737
- end
738
-
739
- def text_response
740
- token = match(T_ATOM)
741
- name = token.value.upcase
742
- match(T_SPACE)
743
- return UntaggedResponse.new(name, text)
744
- end
745
-
746
- def flags_response
747
- token = match(T_ATOM)
748
- name = token.value.upcase
749
- match(T_SPACE)
750
- return UntaggedResponse.new(name, flag_list, @str)
751
- end
752
-
753
- def list_response
754
- token = match(T_ATOM)
755
- name = token.value.upcase
756
- match(T_SPACE)
757
- return UntaggedResponse.new(name, mailbox_list, @str)
758
- end
759
-
1261
+ str = +lbra
1262
+ str << section_spec unless peek_rbra?
1263
+ str << rbra
1264
+ end
1265
+
1266
+ # section-binary = "[" [section-part] "]"
1267
+ def section_binary
1268
+ str = +lbra
1269
+ str << section_part unless peek_rbra?
1270
+ str << rbra
1271
+ end
1272
+
1273
+ # section-spec = section-msgtext / (section-part ["." section-text])
1274
+ # section-msgtext = "HEADER" /
1275
+ # "HEADER.FIELDS" [".NOT"] SP header-list /
1276
+ # "TEXT"
1277
+ # ; top-level or MESSAGE/RFC822 or
1278
+ # ; MESSAGE/GLOBAL part
1279
+ # section-part = nz-number *("." nz-number)
1280
+ # ; body part reference.
1281
+ # ; Allows for accessing nested body parts.
1282
+ # section-text = section-msgtext / "MIME"
1283
+ # ; text other than actual body part (headers,
1284
+ # ; etc.)
1285
+ #
1286
+ # n.b: we could "cheat" here and just grab all text inside the brackets,
1287
+ # but literals would need special treatment.
1288
+ def section_spec
1289
+ str = "".b
1290
+ str << atom # grabs everything up to "SP header-list" or "]"
1291
+ str << " " << header_list if SP?
1292
+ str
1293
+ end
1294
+
1295
+ # header-list = "(" header-fld-name *(SP header-fld-name) ")"
1296
+ def header_list
1297
+ str = +""
1298
+ str << lpar << header_fld_name
1299
+ str << " " << header_fld_name while SP?
1300
+ str << rpar
1301
+ end
1302
+
1303
+ # section-part = nz-number *("." nz-number)
1304
+ # ; body part reference.
1305
+ # ; Allows for accessing nested body parts.
1306
+ alias section_part atom
1307
+
1308
+ # RFC3501 & RFC9051:
1309
+ # header-fld-name = astring
1310
+ #
1311
+ # NOTE: Previously, Net::IMAP recreated the raw original source string.
1312
+ # Now, it grabs the raw encoded value using @str and @pos. A future
1313
+ # version may simply return the decoded astring value. Although that is
1314
+ # technically incompatible, it should almost never make a difference: all
1315
+ # standard header field names are valid atoms:
1316
+ #
1317
+ # https://www.iana.org/assignments/message-headers/message-headers.xhtml
1318
+ #
1319
+ # Although RFC3501 allows any astring, RFC5322-valid header names are one
1320
+ # or more of the printable US-ASCII characters, except SP and colon. So
1321
+ # empty string isn't valid, and literals aren't needed and should not be
1322
+ # used. This is explicitly unchanged by [I18N-HDRS] (RFC6532).
1323
+ #
1324
+ # RFC5233:
1325
+ # optional-field = field-name ":" unstructured CRLF
1326
+ # field-name = 1*ftext
1327
+ # ftext = %d33-57 / ; Printable US-ASCII
1328
+ # %d59-126 ; characters not including
1329
+ # ; ":".
1330
+ def header_fld_name
1331
+ assert_no_lookahead
1332
+ start = @pos
1333
+ astring
1334
+ @str[start...@pos - 1]
1335
+ end
1336
+
1337
+ # mailbox-data = "FLAGS" SP flag-list / "LIST" SP mailbox-list /
1338
+ # "LSUB" SP mailbox-list / "SEARCH" *(SP nz-number) /
1339
+ # "STATUS" SP mailbox SP "(" [status-att-list] ")" /
1340
+ # number SP "EXISTS" / number SP "RECENT"
1341
+
1342
+ def mailbox_data__flags
1343
+ name = label("FLAGS")
1344
+ SP!
1345
+ UntaggedResponse.new(name, flag_list, @str)
1346
+ end
1347
+
1348
+ def mailbox_data__list
1349
+ name = label_in("LIST", "LSUB", "XLIST")
1350
+ SP!
1351
+ UntaggedResponse.new(name, mailbox_list, @str)
1352
+ end
1353
+ alias mailbox_data__lsub mailbox_data__list
1354
+ alias mailbox_data__xlist mailbox_data__list
1355
+
1356
+ # mailbox-list = "(" [mbx-list-flags] ")" SP
1357
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil) SP mailbox
1358
+ # [SP mbox-list-extended]
1359
+ # ; This is the list information pointed to by the ABNF
1360
+ # ; item "mailbox-data", which is defined above
760
1361
  def mailbox_list
761
- attr = flag_list
762
- match(T_SPACE)
763
- token = match(T_QUOTED, T_NIL)
764
- if token.symbol == T_NIL
765
- delim = nil
766
- else
767
- delim = token.value
768
- end
769
- match(T_SPACE)
770
- name = astring
771
- return MailboxList.new(attr, delim, name)
1362
+ lpar; attr = peek_rpar? ? [] : mbx_list_flags; rpar
1363
+ SP!; delim = nquoted
1364
+ SP!; name = mailbox
1365
+ # TODO: mbox-list-extended
1366
+ MailboxList.new(attr, delim, name)
772
1367
  end
773
1368
 
774
1369
  def getquota_response
@@ -813,183 +1408,229 @@ module Net
813
1408
  quotaroots = []
814
1409
  while true
815
1410
  token = lookahead
816
- break unless token.symbol == T_SPACE
817
- shift_token
818
- quotaroots.push(astring)
819
- end
820
- data = MailboxQuotaRoot.new(mailbox, quotaroots)
821
- return UntaggedResponse.new(name, data, @str)
822
- end
823
-
824
- def getacl_response
825
- token = match(T_ATOM)
826
- name = token.value.upcase
827
- match(T_SPACE)
828
- mailbox = astring
829
- data = []
830
- token = lookahead
831
- if token.symbol == T_SPACE
832
- shift_token
833
- while true
834
- token = lookahead
835
- case token.symbol
836
- when T_CRLF
837
- break
838
- when T_SPACE
839
- shift_token
840
- end
841
- user = astring
842
- match(T_SPACE)
843
- rights = astring
844
- data.push(MailboxACLItem.new(user, rights, mailbox))
845
- end
846
- end
847
- return UntaggedResponse.new(name, data, @str)
848
- end
849
-
850
- def search_response
851
- token = match(T_ATOM)
852
- name = token.value.upcase
853
- token = lookahead
854
- if token.symbol == T_SPACE
855
- shift_token
856
- data = []
857
- while true
858
- token = lookahead
859
- case token.symbol
860
- when T_CRLF
861
- break
862
- when T_SPACE
863
- shift_token
864
- when T_NUMBER
865
- data.push(number)
866
- when T_LPAR
867
- # TODO: include the MODSEQ value in a response
868
- shift_token
869
- match(T_ATOM)
870
- match(T_SPACE)
871
- match(T_NUMBER)
872
- match(T_RPAR)
873
- end
874
- end
875
- else
876
- data = []
877
- end
878
- return UntaggedResponse.new(name, data, @str)
879
- end
880
-
881
- def thread_response
882
- token = match(T_ATOM)
883
- name = token.value.upcase
884
- token = lookahead
885
-
886
- if token.symbol == T_SPACE
887
- threads = []
888
-
889
- while true
890
- shift_token
891
- token = lookahead
892
-
893
- case token.symbol
894
- when T_LPAR
895
- threads << thread_branch(token)
896
- when T_CRLF
897
- break
898
- end
899
- end
900
- else
901
- # no member
902
- threads = []
903
- end
904
-
905
- return UntaggedResponse.new(name, threads, @str)
906
- end
907
-
908
- def thread_branch(token)
909
- rootmember = nil
910
- lastmember = nil
911
-
912
- while true
913
- shift_token # ignore first T_LPAR
914
- token = lookahead
915
-
916
- case token.symbol
917
- when T_NUMBER
918
- # new member
919
- newmember = ThreadMember.new(number, [])
920
- if rootmember.nil?
921
- rootmember = newmember
922
- else
923
- lastmember.children << newmember
924
- end
925
- lastmember = newmember
926
- when T_SPACE
927
- # do nothing
928
- when T_LPAR
929
- if rootmember.nil?
930
- # dummy member
931
- lastmember = rootmember = ThreadMember.new(nil, [])
932
- end
933
-
934
- lastmember.children << thread_branch(token)
935
- when T_RPAR
936
- break
937
- end
938
- end
939
-
940
- return rootmember
941
- end
942
-
943
- def status_response
944
- token = match(T_ATOM)
945
- name = token.value.upcase
946
- match(T_SPACE)
947
- mailbox = astring
948
- match(T_SPACE)
949
- match(T_LPAR)
950
- attr = {}
951
- while true
952
- token = lookahead
953
- case token.symbol
954
- when T_RPAR
955
- shift_token
956
- break
957
- when T_SPACE
958
- shift_token
959
- end
960
- token = match(T_ATOM)
961
- key = token.value.upcase
962
- match(T_SPACE)
963
- val = number
964
- attr[key] = val
1411
+ break unless token.symbol == T_SPACE
1412
+ shift_token
1413
+ quotaroots.push(astring)
965
1414
  end
966
- data = StatusData.new(mailbox, attr)
1415
+ data = MailboxQuotaRoot.new(mailbox, quotaroots)
967
1416
  return UntaggedResponse.new(name, data, @str)
968
1417
  end
969
1418
 
970
- def capability_response
1419
+ # acl-data = "ACL" SP mailbox *(SP identifier SP rights)
1420
+ def acl_data
971
1421
  token = match(T_ATOM)
972
1422
  name = token.value.upcase
973
1423
  match(T_SPACE)
974
- UntaggedResponse.new(name, capability_data, @str)
1424
+ mailbox = astring
1425
+ data = []
1426
+ token = lookahead
1427
+ if token.symbol == T_SPACE
1428
+ shift_token
1429
+ while true
1430
+ token = lookahead
1431
+ case token.symbol
1432
+ when T_CRLF
1433
+ break
1434
+ when T_SPACE
1435
+ shift_token
1436
+ end
1437
+ user = astring
1438
+ match(T_SPACE)
1439
+ rights = astring
1440
+ data.push(MailboxACLItem.new(user, rights, mailbox))
1441
+ end
1442
+ end
1443
+ return UntaggedResponse.new(name, data, @str)
975
1444
  end
976
1445
 
977
- def capability_data
1446
+ # RFC3501:
1447
+ # mailbox-data = "SEARCH" *(SP nz-number) / ...
1448
+ # RFC5256: SORT
1449
+ # sort-data = "SORT" *(SP nz-number)
1450
+ # RFC7162: CONDSTORE, QRESYNC
1451
+ # mailbox-data =/ "SEARCH" [1*(SP nz-number) SP
1452
+ # search-sort-mod-seq]
1453
+ # sort-data = "SORT" [1*(SP nz-number) SP
1454
+ # search-sort-mod-seq]
1455
+ # ; Updates the SORT response from RFC 5256.
1456
+ # search-sort-mod-seq = "(" "MODSEQ" SP mod-sequence-value ")"
1457
+ # RFC9051:
1458
+ # mailbox-data = obsolete-search-response / ...
1459
+ # obsolete-search-response = "SEARCH" *(SP nz-number)
1460
+ def mailbox_data__search
1461
+ name = label_in("SEARCH", "SORT")
978
1462
  data = []
979
- while true
980
- token = lookahead
981
- case token.symbol
982
- when T_CRLF, T_RBRA
983
- break
984
- when T_SPACE
985
- shift_token
986
- next
1463
+ while _ = SP? && nz_number? do data << _ end
1464
+ if lpar?
1465
+ label("MODSEQ"); SP!
1466
+ mod_sequence_value
1467
+ rpar
1468
+ end
1469
+ UntaggedResponse.new(name, data, @str)
1470
+ end
1471
+ alias sort_data mailbox_data__search
1472
+
1473
+ # RFC5256: THREAD
1474
+ # thread-data = "THREAD" [SP 1*thread-list]
1475
+ def thread_data
1476
+ name = label("THREAD")
1477
+ threads = []
1478
+ if SP?
1479
+ threads << thread_list while lookahead_thread_list?
1480
+ end
1481
+ UntaggedResponse.new(name, threads, @str)
1482
+ end
1483
+
1484
+ alias lookahead_thread_list? lookahead_lpar?
1485
+ alias lookahead_thread_nested? lookahead_thread_list?
1486
+
1487
+ # RFC5256: THREAD
1488
+ # thread-list = "(" (thread-members / thread-nested) ")"
1489
+ def thread_list
1490
+ lpar
1491
+ thread = if lookahead_thread_nested?
1492
+ ThreadMember.new(nil, thread_nested)
1493
+ else
1494
+ thread_members
1495
+ end
1496
+ rpar
1497
+ thread
1498
+ end
1499
+
1500
+ # RFC5256: THREAD
1501
+ # thread-members = nz-number *(SP nz-number) [SP thread-nested]
1502
+ def thread_members
1503
+ members = []
1504
+ members << nz_number # thread root
1505
+ while SP?
1506
+ case lookahead!(T_NUMBER, T_LPAR).symbol
1507
+ when T_NUMBER then members << nz_number
1508
+ else nested = thread_nested; break
987
1509
  end
988
- data.push(atom.upcase)
989
1510
  end
990
- data
1511
+ members.reverse.inject(nested || []) {|subthreads, number|
1512
+ [ThreadMember.new(number, subthreads)]
1513
+ }.first
1514
+ end
1515
+
1516
+ # RFC5256: THREAD
1517
+ # thread-nested = 2*thread-list
1518
+ def thread_nested
1519
+ nested = [thread_list, thread_list]
1520
+ while lookahead_thread_list? do nested << thread_list end
1521
+ nested
1522
+ end
1523
+
1524
+ # mailbox-data =/ "STATUS" SP mailbox SP "(" [status-att-list] ")"
1525
+ def mailbox_data__status
1526
+ resp_name = label("STATUS"); SP!
1527
+ mbox_name = mailbox; SP!
1528
+ lpar; attr = status_att_list; rpar
1529
+ UntaggedResponse.new(resp_name, StatusData.new(mbox_name, attr), @str)
1530
+ end
1531
+
1532
+ # RFC3501
1533
+ # status-att-list = status-att SP number *(SP status-att SP number)
1534
+ # RFC4466, RFC9051, and RFC3501 Errata
1535
+ # status-att-list = status-att-val *(SP status-att-val)
1536
+ def status_att_list
1537
+ attrs = [status_att_val]
1538
+ while SP? do attrs << status_att_val end
1539
+ attrs.to_h
1540
+ end
1541
+
1542
+ # RFC3501 Errata:
1543
+ # status-att-val = ("MESSAGES" SP number) / ("RECENT" SP number) /
1544
+ # ("UIDNEXT" SP nz-number) / ("UIDVALIDITY" SP nz-number) /
1545
+ # ("UNSEEN" SP number)
1546
+ # RFC4466:
1547
+ # status-att-val = ("MESSAGES" SP number) /
1548
+ # ("RECENT" SP number) /
1549
+ # ("UIDNEXT" SP nz-number) /
1550
+ # ("UIDVALIDITY" SP nz-number) /
1551
+ # ("UNSEEN" SP number)
1552
+ # ;; Extensions to the STATUS responses
1553
+ # ;; should extend this production.
1554
+ # ;; Extensions should use the generic
1555
+ # ;; syntax defined by tagged-ext.
1556
+ # RFC9051:
1557
+ # status-att-val = ("MESSAGES" SP number) /
1558
+ # ("UIDNEXT" SP nz-number) /
1559
+ # ("UIDVALIDITY" SP nz-number) /
1560
+ # ("UNSEEN" SP number) /
1561
+ # ("DELETED" SP number) /
1562
+ # ("SIZE" SP number64)
1563
+ # ; Extensions to the STATUS responses
1564
+ # ; should extend this production.
1565
+ # ; Extensions should use the generic
1566
+ # ; syntax defined by tagged-ext.
1567
+ # RFC7162:
1568
+ # status-att-val =/ "HIGHESTMODSEQ" SP mod-sequence-valzer
1569
+ # ;; Extends non-terminal defined in [RFC4466].
1570
+ # ;; Value 0 denotes that the mailbox doesn't
1571
+ # ;; support persistent mod-sequences
1572
+ # ;; as described in Section 3.1.2.2.
1573
+ # RFC7889:
1574
+ # status-att-val =/ "APPENDLIMIT" SP (number / nil)
1575
+ # ;; status-att-val is defined in RFC 4466
1576
+ # RFC8438:
1577
+ # status-att-val =/ "SIZE" SP number64
1578
+ # RFC8474:
1579
+ # status-att-val =/ "MAILBOXID" SP "(" objectid ")"
1580
+ # ; follows tagged-ext production from [RFC4466]
1581
+ def status_att_val
1582
+ key = tagged_ext_label
1583
+ SP!
1584
+ val =
1585
+ case key
1586
+ when "MESSAGES" then number # RFC3501, RFC9051
1587
+ when "UNSEEN" then number # RFC3501, RFC9051
1588
+ when "DELETED" then number # RFC3501, RFC9051
1589
+ when "UIDNEXT" then nz_number # RFC3501, RFC9051
1590
+ when "UIDVALIDITY" then nz_number # RFC3501, RFC9051
1591
+ when "RECENT" then number # RFC3501 (obsolete)
1592
+ when "SIZE" then number64 # RFC8483, RFC9051
1593
+ when "MAILBOXID" then parens__objectid # RFC8474
1594
+ else
1595
+ number? || ExtensionData.new(tagged_ext_val)
1596
+ end
1597
+ [key, val]
1598
+ end
1599
+
1600
+ # The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.
1601
+ # The grammar rule is used by both response-data and resp-text-code.
1602
+ # But this method only returns UntaggedResponse (response-data).
1603
+ #
1604
+ # RFC3501:
1605
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1606
+ # *(SP capability)
1607
+ # RFC9051:
1608
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1609
+ # *(SP capability)
1610
+ def capability_data__untagged
1611
+ UntaggedResponse.new label("CAPABILITY"), capability__list, @str
991
1612
  end
992
1613
 
1614
+ # enable-data = "ENABLED" *(SP capability)
1615
+ def enable_data
1616
+ UntaggedResponse.new label("ENABLED"), capability__list, @str
1617
+ end
1618
+
1619
+ # As a workaround for buggy servers, allow a trailing SP:
1620
+ # *(SP capability) [SP]
1621
+ def capability__list
1622
+ list = []; while SP? && (capa = capability?) do list << capa end; list
1623
+ end
1624
+
1625
+ alias resp_code__capability capability__list
1626
+
1627
+ # capability = ("AUTH=" auth-type) / atom
1628
+ # ; New capabilities MUST begin with "X" or be
1629
+ # ; registered with IANA as standard or
1630
+ # ; standards-track
1631
+ alias capability case_insensitive__atom
1632
+ alias capability? case_insensitive__atom?
1633
+
993
1634
  def id_response
994
1635
  token = match(T_ATOM)
995
1636
  name = token.value.upcase
@@ -1019,147 +1660,181 @@ module Net
1019
1660
  end
1020
1661
  end
1021
1662
 
1663
+ # namespace-response = "NAMESPACE" SP namespace
1664
+ # SP namespace SP namespace
1665
+ # ; The first Namespace is the Personal Namespace(s).
1666
+ # ; The second Namespace is the Other Users'
1667
+ # ; Namespace(s).
1668
+ # ; The third Namespace is the Shared Namespace(s).
1022
1669
  def namespace_response
1670
+ name = label("NAMESPACE")
1023
1671
  @lex_state = EXPR_DATA
1024
- token = lookahead
1025
- token = match(T_ATOM)
1026
- name = token.value.upcase
1027
- match(T_SPACE)
1028
- personal = namespaces
1029
- match(T_SPACE)
1030
- other = namespaces
1031
- match(T_SPACE)
1032
- shared = namespaces
1672
+ data = Namespaces.new((SP!; namespace),
1673
+ (SP!; namespace),
1674
+ (SP!; namespace))
1675
+ UntaggedResponse.new(name, data, @str)
1676
+ ensure
1033
1677
  @lex_state = EXPR_BEG
1034
- data = Namespaces.new(personal, other, shared)
1035
- return UntaggedResponse.new(name, data, @str)
1036
- end
1037
-
1038
- def namespaces
1039
- token = lookahead
1040
- # empty () is not allowed, so nil is functionally identical to empty.
1041
- data = []
1042
- if token.symbol == T_NIL
1043
- shift_token
1044
- else
1045
- match(T_LPAR)
1046
- loop do
1047
- data << namespace
1048
- break unless lookahead.symbol == T_SPACE
1049
- shift_token
1050
- end
1051
- match(T_RPAR)
1052
- end
1053
- data
1054
1678
  end
1055
1679
 
1680
+ # namespace = nil / "(" 1*namespace-descr ")"
1056
1681
  def namespace
1057
- match(T_LPAR)
1058
- prefix = match(T_QUOTED, T_LITERAL).value
1059
- match(T_SPACE)
1060
- delimiter = string
1682
+ NIL? and return []
1683
+ lpar
1684
+ list = [namespace_descr]
1685
+ list << namespace_descr until rpar?
1686
+ list
1687
+ end
1688
+
1689
+ # namespace-descr = "(" string SP
1690
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil)
1691
+ # [namespace-response-extensions] ")"
1692
+ def namespace_descr
1693
+ lpar
1694
+ prefix = string; SP!
1695
+ delimiter = nquoted # n.b: should only accept single char
1061
1696
  extensions = namespace_response_extensions
1062
- match(T_RPAR)
1697
+ rpar
1063
1698
  Namespace.new(prefix, delimiter, extensions)
1064
1699
  end
1065
1700
 
1701
+ # namespace-response-extensions = *namespace-response-extension
1702
+ # namespace-response-extension = SP string SP
1703
+ # "(" string *(SP string) ")"
1066
1704
  def namespace_response_extensions
1067
1705
  data = {}
1068
- token = lookahead
1069
- if token.symbol == T_SPACE
1070
- shift_token
1071
- name = match(T_QUOTED, T_LITERAL).value
1706
+ while SP?
1707
+ name = string; SP!
1708
+ lpar
1072
1709
  data[name] ||= []
1073
- match(T_SPACE)
1074
- match(T_LPAR)
1075
- loop do
1076
- data[name].push match(T_QUOTED, T_LITERAL).value
1077
- break unless lookahead.symbol == T_SPACE
1078
- shift_token
1079
- end
1080
- match(T_RPAR)
1710
+ data[name] << string
1711
+ data[name] << string while SP?
1712
+ rpar
1081
1713
  end
1082
1714
  data
1083
1715
  end
1084
1716
 
1085
- # text = 1*TEXT-CHAR
1086
- # TEXT-CHAR = <any CHAR except CR and LF>
1717
+ # TEXT-CHAR = <any CHAR except CR and LF>
1718
+ # RFC3501:
1719
+ # text = 1*TEXT-CHAR
1720
+ # RFC9051:
1721
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
1722
+ # ; Non-ASCII text can only be returned
1723
+ # ; after ENABLE IMAP4rev2 command
1087
1724
  def text
1088
- match(T_TEXT, lex_state: EXPR_TEXT).value
1725
+ match_re(TEXT_REGEXP, "text")[0].force_encoding("UTF-8")
1089
1726
  end
1090
1727
 
1091
- # resp-text = ["[" resp-text-code "]" SP] text
1728
+ # an "accept" versiun of #text
1729
+ def text?
1730
+ accept_re(TEXT_REGEXP)&.[](0)&.force_encoding("UTF-8")
1731
+ end
1732
+
1733
+ # RFC3501:
1734
+ # resp-text = ["[" resp-text-code "]" SP] text
1735
+ # RFC9051:
1736
+ # resp-text = ["[" resp-text-code "]" SP] [text]
1737
+ #
1738
+ # We leniently re-interpret this as
1739
+ # resp-text = ["[" resp-text-code "]" [SP [text]] / [text]
1092
1740
  def resp_text
1093
- token = match(T_LBRA, T_TEXT, lex_state: EXPR_RTEXT)
1094
- case token.symbol
1095
- when T_LBRA
1096
- code = resp_text_code
1097
- match(T_RBRA)
1098
- accept_space # violating RFC
1099
- ResponseText.new(code, text)
1100
- when T_TEXT
1101
- ResponseText.new(nil, token.value)
1741
+ if lbra?
1742
+ code = resp_text_code; rbra
1743
+ ResponseText.new(code, SP? && text? || "")
1744
+ else
1745
+ ResponseText.new(nil, text? || "")
1102
1746
  end
1103
1747
  end
1104
1748
 
1105
- # See https://www.rfc-editor.org/errata/rfc3501
1749
+ # RFC3501 (See https://www.rfc-editor.org/errata/rfc3501):
1750
+ # resp-text-code = "ALERT" /
1751
+ # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1752
+ # capability-data / "PARSE" /
1753
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
1754
+ # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1755
+ # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1756
+ # "UNSEEN" SP nz-number /
1757
+ # atom [SP 1*<any TEXT-CHAR except "]">]
1758
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1759
+ # *(SP capability)
1106
1760
  #
1107
- # resp-text-code = "ALERT" /
1108
- # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1109
- # capability-data / "PARSE" /
1110
- # "PERMANENTFLAGS" SP "("
1111
- # [flag-perm *(SP flag-perm)] ")" /
1112
- # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1113
- # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1114
- # "UNSEEN" SP nz-number /
1115
- # atom [SP 1*<any TEXT-CHAR except "]">]
1761
+ # RFC5530:
1762
+ # resp-text-code =/ "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
1763
+ # "AUTHORIZATIONFAILED" / "EXPIRED" /
1764
+ # "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
1765
+ # "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
1766
+ # "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
1767
+ # "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
1768
+ # "NONEXISTENT"
1769
+ # RFC9051:
1770
+ # resp-text-code = "ALERT" /
1771
+ # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1772
+ # capability-data / "PARSE" /
1773
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
1774
+ # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1775
+ # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1776
+ # resp-code-apnd / resp-code-copy / "UIDNOTSTICKY" /
1777
+ # "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
1778
+ # "AUTHORIZATIONFAILED" / "EXPIRED" /
1779
+ # "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
1780
+ # "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
1781
+ # "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
1782
+ # "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
1783
+ # "NONEXISTENT" / "NOTSAVED" / "HASCHILDREN" /
1784
+ # "CLOSED" /
1785
+ # "UNKNOWN-CTE" /
1786
+ # atom [SP 1*<any TEXT-CHAR except "]">]
1787
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1788
+ # *(SP capability)
1116
1789
  #
1117
- # +UIDPLUS+ ABNF:: https://www.rfc-editor.org/rfc/rfc4315.html#section-4
1118
- # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
1790
+ # RFC4315 (UIDPLUS), RFC9051 (IMAP4rev2):
1791
+ # resp-code-apnd = "APPENDUID" SP nz-number SP append-uid
1792
+ # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
1793
+ # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
1794
+ #
1795
+ # RFC7162 (CONDSTORE):
1796
+ # resp-text-code =/ "HIGHESTMODSEQ" SP mod-sequence-value /
1797
+ # "NOMODSEQ" /
1798
+ # "MODIFIED" SP sequence-set
1799
+ #
1800
+ # RFC8474: OBJECTID
1801
+ # resp-text-code =/ "MAILBOXID" SP "(" objectid ")"
1119
1802
  def resp_text_code
1120
- token = match(T_ATOM)
1121
- name = token.value.upcase
1122
- case name
1123
- when /\A(?:ALERT|PARSE|READ-ONLY|READ-WRITE|TRYCREATE|NOMODSEQ)\z/n
1124
- result = ResponseCode.new(name, nil)
1125
- when /\A(?:BADCHARSET)\z/n
1126
- result = ResponseCode.new(name, charset_list)
1127
- when /\A(?:CAPABILITY)\z/ni
1128
- result = ResponseCode.new(name, capability_data)
1129
- when /\A(?:PERMANENTFLAGS)\z/n
1130
- match(T_SPACE)
1131
- result = ResponseCode.new(name, flag_list)
1132
- when /\A(?:UIDVALIDITY|UIDNEXT|UNSEEN)\z/n
1133
- match(T_SPACE)
1134
- result = ResponseCode.new(name, number)
1135
- when /\A(?:APPENDUID)\z/n
1136
- result = ResponseCode.new(name, resp_code_apnd__data)
1137
- when /\A(?:COPYUID)\z/n
1138
- result = ResponseCode.new(name, resp_code_copy__data)
1139
- else
1140
- token = lookahead
1141
- if token.symbol == T_SPACE
1142
- shift_token
1143
- token = match(T_TEXT, lex_state: EXPR_CTEXT)
1144
- result = ResponseCode.new(name, token.value)
1803
+ name = resp_text_code__name
1804
+ data =
1805
+ case name
1806
+ when "CAPABILITY" then resp_code__capability
1807
+ when "PERMANENTFLAGS" then SP? ? flag_perm__list : []
1808
+ when "UIDNEXT" then SP!; nz_number
1809
+ when "UIDVALIDITY" then SP!; nz_number
1810
+ when "UNSEEN" then SP!; nz_number # rev1 only
1811
+ when "APPENDUID" then SP!; resp_code_apnd__data # rev2, UIDPLUS
1812
+ when "COPYUID" then SP!; resp_code_copy__data # rev2, UIDPLUS
1813
+ when "BADCHARSET" then SP? ? charset__list : []
1814
+ when "ALERT", "PARSE", "READ-ONLY", "READ-WRITE", "TRYCREATE",
1815
+ "UNAVAILABLE", "AUTHENTICATIONFAILED", "AUTHORIZATIONFAILED",
1816
+ "EXPIRED", "PRIVACYREQUIRED", "CONTACTADMIN", "NOPERM", "INUSE",
1817
+ "EXPUNGEISSUED", "CORRUPTION", "SERVERBUG", "CLIENTBUG", "CANNOT",
1818
+ "LIMIT", "OVERQUOTA", "ALREADYEXISTS", "NONEXISTENT", "CLOSED",
1819
+ "NOTSAVED", "UIDNOTSTICKY", "UNKNOWN-CTE", "HASCHILDREN"
1820
+ when "NOMODSEQ" # CONDSTORE
1821
+ when "MAILBOXID" then SP!; parens__objectid # RFC8474: OBJECTID
1145
1822
  else
1146
- result = ResponseCode.new(name, nil)
1823
+ SP? and text_chars_except_rbra
1147
1824
  end
1148
- end
1149
- return result
1825
+ ResponseCode.new(name, data)
1150
1826
  end
1151
1827
 
1152
- def charset_list
1153
- result = []
1154
- if accept(T_SPACE)
1155
- match(T_LPAR)
1156
- result << charset
1157
- while accept(T_SPACE)
1158
- result << charset
1159
- end
1160
- match(T_RPAR)
1161
- end
1162
- result
1828
+ alias resp_text_code__name case_insensitive__atom
1829
+
1830
+ # 1*<any TEXT-CHAR except "]">
1831
+ def text_chars_except_rbra
1832
+ match_re(CTEXT_REGEXP, '1*<any TEXT-CHAR except "]">')[0]
1833
+ end
1834
+
1835
+ # "(" charset *(SP charset) ")"
1836
+ def charset__list
1837
+ lpar; list = [charset]; while SP? do list << charset end; rpar; list
1163
1838
  end
1164
1839
 
1165
1840
  # already matched: "APPENDUID"
@@ -1175,8 +1850,8 @@ module Net
1175
1850
  # match uid_set even if that returns a single-member array.
1176
1851
  #
1177
1852
  def resp_code_apnd__data
1178
- match(T_SPACE); validity = number
1179
- match(T_SPACE); dst_uids = uid_set # uniqueid ⊂ uid-set
1853
+ validity = number; SP!
1854
+ dst_uids = uid_set # uniqueid ⊂ uid-set
1180
1855
  UIDPlusData.new(validity, nil, dst_uids)
1181
1856
  end
1182
1857
 
@@ -1184,187 +1859,106 @@ module Net
1184
1859
  #
1185
1860
  # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
1186
1861
  def resp_code_copy__data
1187
- match(T_SPACE); validity = number
1188
- match(T_SPACE); src_uids = uid_set
1189
- match(T_SPACE); dst_uids = uid_set
1862
+ validity = number; SP!
1863
+ src_uids = uid_set; SP!
1864
+ dst_uids = uid_set
1190
1865
  UIDPlusData.new(validity, src_uids, dst_uids)
1191
1866
  end
1192
1867
 
1193
- def address_list
1194
- token = lookahead
1195
- if token.symbol == T_NIL
1196
- shift_token
1197
- return nil
1198
- else
1199
- result = []
1200
- match(T_LPAR)
1201
- while true
1202
- token = lookahead
1203
- case token.symbol
1204
- when T_RPAR
1205
- shift_token
1206
- break
1207
- when T_SPACE
1208
- shift_token
1209
- end
1210
- result.push(address)
1211
- end
1212
- return result
1213
- end
1214
- end
1215
-
1216
- ADDRESS_REGEXP = /\G\
1217
- (?# 1: NAME )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1218
- (?# 2: ROUTE )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1219
- (?# 3: MAILBOX )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1220
- (?# 4: HOST )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)")\
1221
- \)/ni
1222
-
1868
+ ADDRESS_REGEXP = /\G
1869
+ \( (?: NIL | #{Patterns::QUOTED_rev2} ) # 1: NAME
1870
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 2: ROUTE
1871
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 3: MAILBOX
1872
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 4: HOST
1873
+ \)
1874
+ /nix
1875
+
1876
+ # address = "(" addr-name SP addr-adl SP addr-mailbox SP
1877
+ # addr-host ")"
1878
+ # addr-adl = nstring
1879
+ # addr-host = nstring
1880
+ # addr-mailbox = nstring
1881
+ # addr-name = nstring
1223
1882
  def address
1224
- match(T_LPAR)
1225
- if @str.index(ADDRESS_REGEXP, @pos)
1226
- # address does not include literal.
1227
- @pos = $~.end(0)
1228
- name = $1
1229
- route = $2
1230
- mailbox = $3
1231
- host = $4
1232
- for s in [name, route, mailbox, host]
1233
- if s
1234
- s.gsub!(/\\(["\\])/n, "\\1")
1235
- end
1236
- end
1237
- else
1238
- name = nstring
1239
- match(T_SPACE)
1240
- route = nstring
1241
- match(T_SPACE)
1242
- mailbox = nstring
1243
- match(T_SPACE)
1244
- host = nstring
1245
- match(T_RPAR)
1246
- end
1247
- return Address.new(name, route, mailbox, host)
1248
- end
1249
-
1250
- FLAG_REGEXP = /\
1251
- (?# FLAG )\\([^\x80-\xff(){ \x00-\x1f\x7f%"\\]+)|\
1252
- (?# ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\]+)/n
1253
-
1883
+ if (match = accept_re(ADDRESS_REGEXP))
1884
+ # note that "NIL" isn't captured by the regexp
1885
+ name, route, mailbox, host = match.captures
1886
+ .map { Patterns.unescape_quoted _1 }
1887
+ else # address may include literals
1888
+ lpar; name = addr_name
1889
+ SP!; route = addr_adl
1890
+ SP!; mailbox = addr_mailbox
1891
+ SP!; host = addr_host
1892
+ rpar
1893
+ end
1894
+ Address.new(name, route, mailbox, host)
1895
+ end
1896
+
1897
+ alias addr_adl nstring
1898
+ alias addr_host nstring
1899
+ alias addr_mailbox nstring
1900
+ alias addr_name nstring
1901
+
1902
+ # flag-list = "(" [flag *(SP flag)] ")"
1254
1903
  def flag_list
1255
- if @str.index(/\(([^)]*)\)/ni, @pos)
1256
- @pos = $~.end(0)
1257
- return $1.scan(FLAG_REGEXP).collect { |flag, atom|
1258
- if atom
1259
- atom
1260
- else
1261
- flag.capitalize.intern
1262
- end
1263
- }
1264
- else
1265
- parse_error("invalid flag list")
1266
- end
1267
- end
1268
-
1269
- def nstring
1270
- token = lookahead
1271
- if token.symbol == T_NIL
1272
- shift_token
1273
- return nil
1274
- else
1275
- return string
1276
- end
1904
+ match_re(Patterns::FLAG_LIST, "flag-list")[1]
1905
+ .split(nil)
1906
+ .map! { _1.start_with?("\\") ? _1[1..].capitalize.to_sym : _1 }
1277
1907
  end
1278
1908
 
1279
- def astring
1280
- token = lookahead
1281
- if string_token?(token)
1282
- return string
1283
- else
1284
- return astring_chars
1285
- end
1909
+ # "(" [flag-perm *(SP flag-perm)] ")"
1910
+ def flag_perm__list
1911
+ match_re(Patterns::FLAG_PERM_LIST, "PERMANENTFLAGS flag-perm list")[1]
1912
+ .split(nil)
1913
+ .map! { _1.start_with?("\\") ? _1[1..].capitalize.to_sym : _1 }
1286
1914
  end
1287
1915
 
1288
- def string
1289
- token = lookahead
1290
- if token.symbol == T_NIL
1291
- shift_token
1292
- return nil
1293
- end
1294
- token = match(T_QUOTED, T_LITERAL)
1295
- return token.value
1916
+ # See Patterns::MBX_LIST_FLAGS
1917
+ def mbx_list_flags
1918
+ match_re(Patterns::MBX_LIST_FLAGS, "mbx-list-flags")[1]
1919
+ .split(nil).map! { _1[1..].capitalize.to_sym }
1296
1920
  end
1297
1921
 
1298
- STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL]
1299
-
1300
- def string_token?(token)
1301
- return STRING_TOKENS.include?(token.symbol)
1302
- end
1922
+ # See https://developers.google.com/gmail/imap/imap-extensions
1923
+ def x_gm_label; accept(T_BSLASH) ? atom.capitalize.to_sym : astring end
1303
1924
 
1304
- def case_insensitive_string
1305
- token = lookahead
1306
- if token.symbol == T_NIL
1307
- shift_token
1308
- return nil
1309
- end
1310
- token = match(T_QUOTED, T_LITERAL)
1311
- return token.value.upcase
1925
+ # See https://developers.google.com/gmail/imap/imap-extensions
1926
+ def x_gm_labels
1927
+ lpar; return [] if rpar?
1928
+ labels = []
1929
+ labels << x_gm_label
1930
+ labels << x_gm_label while SP?
1931
+ rpar
1932
+ labels
1312
1933
  end
1313
1934
 
1314
- # atom = 1*ATOM-CHAR
1315
- # ATOM-CHAR = <any CHAR except atom-specials>
1316
- ATOM_TOKENS = [
1317
- T_ATOM,
1318
- T_NUMBER,
1319
- T_NIL,
1320
- T_LBRA,
1321
- T_PLUS
1322
- ]
1323
-
1324
- def atom
1325
- -combine_adjacent(*ATOM_TOKENS)
1326
- end
1935
+ # See https://www.rfc-editor.org/errata/rfc3501
1936
+ #
1937
+ # charset = atom / quoted
1938
+ def charset; quoted? || atom end
1327
1939
 
1328
- # ASTRING-CHAR = ATOM-CHAR / resp-specials
1329
- # resp-specials = "]"
1330
- ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA]
1940
+ # RFC7162:
1941
+ # mod-sequence-value = 1*DIGIT
1942
+ # ;; Positive unsigned 63-bit integer
1943
+ # ;; (mod-sequence)
1944
+ # ;; (1 <= n <= 9,223,372,036,854,775,807).
1945
+ alias mod_sequence_value nz_number64
1331
1946
 
1332
- def astring_chars
1333
- combine_adjacent(*ASTRING_CHARS_TOKENS)
1334
- end
1947
+ # RFC7162:
1948
+ # permsg-modsequence = mod-sequence-value
1949
+ # ;; Per-message mod-sequence.
1950
+ alias permsg_modsequence mod_sequence_value
1335
1951
 
1336
- def combine_adjacent(*tokens)
1337
- result = "".b
1338
- while token = accept(*tokens)
1339
- result << token.value
1340
- end
1341
- if result.empty?
1342
- parse_error('unexpected token %s (expected %s)',
1343
- lookahead.symbol, args.join(" or "))
1344
- end
1345
- result
1346
- end
1952
+ def parens__modseq; lpar; _ = permsg_modsequence; rpar; _ end
1347
1953
 
1348
- # See https://www.rfc-editor.org/errata/rfc3501
1349
- #
1350
- # charset = atom / quoted
1351
- def charset
1352
- if token = accept(T_QUOTED)
1353
- token.value
1354
- else
1355
- atom
1356
- end
1357
- end
1954
+ # RFC8474:
1955
+ # objectid = 1*255(ALPHA / DIGIT / "_" / "-")
1956
+ # ; characters in object identifiers are case
1957
+ # ; significant
1958
+ alias objectid atom
1358
1959
 
1359
- def number
1360
- token = lookahead
1361
- if token.symbol == T_NIL
1362
- shift_token
1363
- return nil
1364
- end
1365
- token = match(T_NUMBER)
1366
- return token.value.to_i
1367
- end
1960
+ def parens__objectid; lpar; _ = objectid; rpar; _ end
1961
+ def nparens__objectid; NIL? ? nil : parens__objectid end
1368
1962
 
1369
1963
  # RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
1370
1964
  # uid-set = (uniqueid / uid-range) *("," uid-set)
@@ -1393,64 +1987,15 @@ module Net
1393
1987
 
1394
1988
  SPACES_REGEXP = /\G */n
1395
1989
 
1396
- # This advances @pos directly so it's safe before changing @lex_state.
1397
- def accept_space
1398
- if @token
1399
- shift_token if @token.symbol == T_SPACE
1400
- elsif @str[@pos] == " "
1401
- @pos += 1
1402
- end
1403
- end
1404
-
1405
1990
  # The RFC is very strict about this and usually we should be too.
1406
1991
  # But skipping spaces is usually a safe workaround for buggy servers.
1407
1992
  #
1408
1993
  # This advances @pos directly so it's safe before changing @lex_state.
1409
1994
  def accept_spaces
1410
- shift_token if @token&.symbol == T_SPACE
1411
- if @str.index(SPACES_REGEXP, @pos)
1995
+ return false unless SP?
1996
+ @str.index(SPACES_REGEXP, @pos) and
1412
1997
  @pos = $~.end(0)
1413
- end
1414
- end
1415
-
1416
- def match(*args, lex_state: @lex_state)
1417
- if @token && lex_state != @lex_state
1418
- parse_error("invalid lex_state change to %s with unconsumed token",
1419
- lex_state)
1420
- end
1421
- begin
1422
- @lex_state, original_lex_state = lex_state, @lex_state
1423
- token = lookahead
1424
- unless args.include?(token.symbol)
1425
- parse_error('unexpected token %s (expected %s)',
1426
- token.symbol.id2name,
1427
- args.collect {|i| i.id2name}.join(" or "))
1428
- end
1429
- shift_token
1430
- return token
1431
- ensure
1432
- @lex_state = original_lex_state
1433
- end
1434
- end
1435
-
1436
- # like match, but does not raise error on failure.
1437
- #
1438
- # returns and shifts token on successful match
1439
- # returns nil and leaves @token unshifted on no match
1440
- def accept(*args)
1441
- token = lookahead
1442
- if args.include?(token.symbol)
1443
- shift_token
1444
- token
1445
- end
1446
- end
1447
-
1448
- def lookahead
1449
- @token ||= next_token
1450
- end
1451
-
1452
- def shift_token
1453
- @token = nil
1998
+ true
1454
1999
  end
1455
2000
 
1456
2001
  def next_token
@@ -1461,38 +2006,46 @@ module Net
1461
2006
  if $1
1462
2007
  return Token.new(T_SPACE, $+)
1463
2008
  elsif $2
1464
- return Token.new(T_NIL, $+)
1465
- elsif $3
1466
- return Token.new(T_NUMBER, $+)
2009
+ len = $+.to_i
2010
+ val = @str[@pos, len]
2011
+ @pos += len
2012
+ return Token.new(T_LITERAL8, val)
2013
+ elsif $3 && $7
2014
+ # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
2015
+ return Token.new(T_ATOM, $3)
1467
2016
  elsif $4
1468
- return Token.new(T_ATOM, $+)
2017
+ return Token.new(T_NIL, $+)
1469
2018
  elsif $5
1470
- return Token.new(T_QUOTED,
1471
- $+.gsub(/\\(["\\])/n, "\\1"))
2019
+ return Token.new(T_NUMBER, $+)
1472
2020
  elsif $6
2021
+ return Token.new(T_PLUS, $+)
2022
+ elsif $8
2023
+ # match ATOM, without a NUMBER, NIL, or PLUS prefix
2024
+ return Token.new(T_ATOM, $+)
2025
+ elsif $9
2026
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
2027
+ elsif $10
1473
2028
  return Token.new(T_LPAR, $+)
1474
- elsif $7
2029
+ elsif $11
1475
2030
  return Token.new(T_RPAR, $+)
1476
- elsif $8
2031
+ elsif $12
1477
2032
  return Token.new(T_BSLASH, $+)
1478
- elsif $9
2033
+ elsif $13
1479
2034
  return Token.new(T_STAR, $+)
1480
- elsif $10
2035
+ elsif $14
1481
2036
  return Token.new(T_LBRA, $+)
1482
- elsif $11
2037
+ elsif $15
1483
2038
  return Token.new(T_RBRA, $+)
1484
- elsif $12
2039
+ elsif $16
1485
2040
  len = $+.to_i
1486
2041
  val = @str[@pos, len]
1487
2042
  @pos += len
1488
2043
  return Token.new(T_LITERAL, val)
1489
- elsif $13
1490
- return Token.new(T_PLUS, $+)
1491
- elsif $14
2044
+ elsif $17
1492
2045
  return Token.new(T_PERCENT, $+)
1493
- elsif $15
2046
+ elsif $18
1494
2047
  return Token.new(T_CRLF, $+)
1495
- elsif $16
2048
+ elsif $19
1496
2049
  return Token.new(T_EOF, $+)
1497
2050
  else
1498
2051
  parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")
@@ -1511,8 +2064,7 @@ module Net
1511
2064
  elsif $3
1512
2065
  return Token.new(T_NUMBER, $+)
1513
2066
  elsif $4
1514
- return Token.new(T_QUOTED,
1515
- $+.gsub(/\\(["\\])/n, "\\1"))
2067
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1516
2068
  elsif $5
1517
2069
  len = $+.to_i
1518
2070
  val = @str[@pos, len]
@@ -1529,63 +2081,11 @@ module Net
1529
2081
  @str.index(/\S*/n, @pos)
1530
2082
  parse_error("unknown token - %s", $&.dump)
1531
2083
  end
1532
- when EXPR_TEXT
1533
- if @str.index(TEXT_REGEXP, @pos)
1534
- @pos = $~.end(0)
1535
- if $1
1536
- return Token.new(T_TEXT, $+)
1537
- else
1538
- parse_error("[Net::IMAP BUG] TEXT_REGEXP is invalid")
1539
- end
1540
- else
1541
- @str.index(/\S*/n, @pos)
1542
- parse_error("unknown token - %s", $&.dump)
1543
- end
1544
- when EXPR_RTEXT
1545
- if @str.index(RTEXT_REGEXP, @pos)
1546
- @pos = $~.end(0)
1547
- if $1
1548
- return Token.new(T_LBRA, $+)
1549
- elsif $2
1550
- return Token.new(T_TEXT, $+)
1551
- else
1552
- parse_error("[Net::IMAP BUG] RTEXT_REGEXP is invalid")
1553
- end
1554
- else
1555
- @str.index(/\S*/n, @pos)
1556
- parse_error("unknown token - %s", $&.dump)
1557
- end
1558
- when EXPR_CTEXT
1559
- if @str.index(CTEXT_REGEXP, @pos)
1560
- @pos = $~.end(0)
1561
- if $1
1562
- return Token.new(T_TEXT, $+)
1563
- else
1564
- parse_error("[Net::IMAP BUG] CTEXT_REGEXP is invalid")
1565
- end
1566
- else
1567
- @str.index(/\S*/n, @pos) #/
1568
- parse_error("unknown token - %s", $&.dump)
1569
- end
1570
2084
  else
1571
2085
  parse_error("invalid @lex_state - %s", @lex_state.inspect)
1572
2086
  end
1573
2087
  end
1574
2088
 
1575
- def parse_error(fmt, *args)
1576
- if IMAP.debug
1577
- $stderr.printf("@str: %s\n", @str.dump)
1578
- $stderr.printf("@pos: %d\n", @pos)
1579
- $stderr.printf("@lex_state: %s\n", @lex_state)
1580
- if @token
1581
- $stderr.printf("@token.symbol: %s\n", @token.symbol)
1582
- $stderr.printf("@token.value: %s\n", @token.value.inspect)
1583
- end
1584
- end
1585
- raise ResponseParseError, format(fmt, *args)
1586
- end
1587
2089
  end
1588
-
1589
2090
  end
1590
-
1591
2091
  end