net-imap 0.3.7 → 0.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of net-imap might be problematic. Click here for more details.

Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/pages.yml +46 -0
  3. data/.github/workflows/test.yml +5 -12
  4. data/.gitignore +2 -0
  5. data/Gemfile +3 -0
  6. data/README.md +15 -4
  7. data/Rakefile +0 -7
  8. data/docs/styles.css +0 -12
  9. data/lib/net/imap/authenticators.rb +26 -57
  10. data/lib/net/imap/command_data.rb +13 -6
  11. data/lib/net/imap/data_encoding.rb +14 -2
  12. data/lib/net/imap/deprecated_client_options.rb +139 -0
  13. data/lib/net/imap/errors.rb +20 -0
  14. data/lib/net/imap/fetch_data.rb +518 -0
  15. data/lib/net/imap/response_data.rb +178 -255
  16. data/lib/net/imap/response_parser/parser_utils.rb +240 -0
  17. data/lib/net/imap/response_parser.rb +1722 -1193
  18. data/lib/net/imap/sasl/anonymous_authenticator.rb +69 -0
  19. data/lib/net/imap/sasl/authentication_exchange.rb +107 -0
  20. data/lib/net/imap/sasl/authenticators.rb +118 -0
  21. data/lib/net/imap/sasl/client_adapter.rb +72 -0
  22. data/lib/net/imap/{authenticators/cram_md5.rb → sasl/cram_md5_authenticator.rb} +21 -11
  23. data/lib/net/imap/sasl/digest_md5_authenticator.rb +180 -0
  24. data/lib/net/imap/sasl/external_authenticator.rb +83 -0
  25. data/lib/net/imap/sasl/gs2_header.rb +80 -0
  26. data/lib/net/imap/{authenticators/login.rb → sasl/login_authenticator.rb} +25 -16
  27. data/lib/net/imap/sasl/oauthbearer_authenticator.rb +199 -0
  28. data/lib/net/imap/sasl/plain_authenticator.rb +101 -0
  29. data/lib/net/imap/sasl/protocol_adapters.rb +45 -0
  30. data/lib/net/imap/sasl/scram_algorithm.rb +58 -0
  31. data/lib/net/imap/sasl/scram_authenticator.rb +287 -0
  32. data/lib/net/imap/sasl/stringprep.rb +6 -66
  33. data/lib/net/imap/sasl/xoauth2_authenticator.rb +106 -0
  34. data/lib/net/imap/sasl.rb +144 -43
  35. data/lib/net/imap/sasl_adapter.rb +21 -0
  36. data/lib/net/imap/search_result.rb +150 -0
  37. data/lib/net/imap/sequence_set.rb +1414 -0
  38. data/lib/net/imap/stringprep/nameprep.rb +70 -0
  39. data/lib/net/imap/stringprep/saslprep.rb +69 -0
  40. data/lib/net/imap/stringprep/saslprep_tables.rb +96 -0
  41. data/lib/net/imap/stringprep/tables.rb +146 -0
  42. data/lib/net/imap/stringprep/trace.rb +85 -0
  43. data/lib/net/imap/stringprep.rb +159 -0
  44. data/lib/net/imap.rb +1213 -636
  45. data/net-imap.gemspec +5 -3
  46. data/rakelib/benchmarks.rake +91 -0
  47. data/rakelib/saslprep.rake +4 -4
  48. data/rakelib/string_prep_tables_generator.rb +82 -60
  49. metadata +34 -14
  50. data/benchmarks/stringprep.yml +0 -65
  51. data/benchmarks/table-regexps.yml +0 -39
  52. data/lib/net/imap/authenticators/digest_md5.rb +0 -115
  53. data/lib/net/imap/authenticators/plain.rb +0 -41
  54. data/lib/net/imap/authenticators/xoauth2.rb +0 -20
  55. data/lib/net/imap/sasl/saslprep.rb +0 -55
  56. data/lib/net/imap/sasl/saslprep_tables.rb +0 -98
  57. data/lib/net/imap/sasl/stringprep_tables.rb +0 -153
@@ -1,12 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "errors"
4
+ require_relative "response_parser/parser_utils"
4
5
 
5
6
  module Net
6
7
  class IMAP < Protocol
7
8
 
8
9
  # Parses an \IMAP server response.
9
10
  class ResponseParser
11
+ include ParserUtils
12
+ extend ParserUtils::Generator
13
+
10
14
  # :call-seq: Net::IMAP::ResponseParser.new -> Net::IMAP::ResponseParser
11
15
  def initialize
12
16
  @str = nil
@@ -33,745 +37,1340 @@ module Net
33
37
 
34
38
  # :stopdoc:
35
39
 
36
- EXPR_BEG = :EXPR_BEG
37
- EXPR_DATA = :EXPR_DATA
38
- EXPR_TEXT = :EXPR_TEXT
39
- EXPR_RTEXT = :EXPR_RTEXT
40
- EXPR_CTEXT = :EXPR_CTEXT
41
-
42
- T_SPACE = :SPACE
43
- T_NIL = :NIL
44
- T_NUMBER = :NUMBER
45
- T_ATOM = :ATOM
46
- T_QUOTED = :QUOTED
47
- T_LPAR = :LPAR
48
- T_RPAR = :RPAR
49
- T_BSLASH = :BSLASH
50
- T_STAR = :STAR
51
- T_LBRA = :LBRA
52
- T_RBRA = :RBRA
53
- T_LITERAL = :LITERAL
54
- T_PLUS = :PLUS
55
- T_PERCENT = :PERCENT
56
- T_CRLF = :CRLF
57
- T_EOF = :EOF
58
- T_TEXT = :TEXT
59
-
40
+ EXPR_BEG = :EXPR_BEG # the default, used in most places
41
+ EXPR_DATA = :EXPR_DATA # envelope, body(structure), namespaces
42
+
43
+ T_SPACE = :SPACE # atom special
44
+ T_ATOM = :ATOM # atom (subset of astring chars)
45
+ T_NIL = :NIL # subset of atom and label
46
+ T_NUMBER = :NUMBER # subset of atom
47
+ T_LBRA = :LBRA # subset of atom
48
+ T_PLUS = :PLUS # subset of atom; tag special
49
+ T_RBRA = :RBRA # atom special; resp_special; valid astring char
50
+ T_QUOTED = :QUOTED # starts/end with atom special
51
+ T_BSLASH = :BSLASH # atom special; quoted special
52
+ T_LPAR = :LPAR # atom special; paren list delimiter
53
+ T_RPAR = :RPAR # atom special; paren list delimiter
54
+ T_STAR = :STAR # atom special; list wildcard
55
+ T_PERCENT = :PERCENT # atom special; list wildcard
56
+ T_LITERAL = :LITERAL # starts with atom special
57
+ T_LITERAL8 = :LITERAL8 # starts with atom char "~"
58
+ T_CRLF = :CRLF # atom special; text special; quoted special
59
+ T_TEXT = :TEXT # any char except CRLF
60
+ T_EOF = :EOF # end of response string
61
+
62
+ module ResponseConditions
63
+ OK = "OK"
64
+ NO = "NO"
65
+ BAD = "BAD"
66
+ BYE = "BYE"
67
+ PREAUTH = "PREAUTH"
68
+
69
+ RESP_COND_STATES = [OK, NO, BAD ].freeze
70
+ RESP_DATA_CONDS = [OK, NO, BAD, BYE, ].freeze
71
+ AUTH_CONDS = [OK, PREAUTH].freeze
72
+ GREETING_CONDS = [OK, BYE, PREAUTH].freeze
73
+ RESP_CONDS = [OK, NO, BAD, BYE, PREAUTH].freeze
74
+ end
75
+ include ResponseConditions
76
+
77
+ module Patterns
78
+
79
+ module CharClassSubtraction
80
+ refine Regexp do
81
+ def -(rhs); /[#{source}&&[^#{rhs.source}]]/n.freeze end
82
+ end
83
+ end
84
+ using CharClassSubtraction
85
+
86
+ # From RFC5234, "Augmented BNF for Syntax Specifications: ABNF"
87
+ # >>>
88
+ # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
89
+ # CHAR = %x01-7F
90
+ # CRLF = CR LF
91
+ # ; Internet standard newline
92
+ # CTL = %x00-1F / %x7F
93
+ # ; controls
94
+ # DIGIT = %x30-39
95
+ # ; 0-9
96
+ # DQUOTE = %x22
97
+ # ; " (Double Quote)
98
+ # HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
99
+ # OCTET = %x00-FF
100
+ # SP = %x20
101
+ module RFC5234
102
+ ALPHA = /[A-Za-z]/n
103
+ CHAR = /[\x01-\x7f]/n
104
+ CRLF = /\r\n/n
105
+ CTL = /[\x00-\x1F\x7F]/n
106
+ DIGIT = /\d/n
107
+ DQUOTE = /"/n
108
+ HEXDIG = /\h/
109
+ OCTET = /[\x00-\xFF]/n # not using /./m for embedding purposes
110
+ SP = / /n
111
+ end
112
+
113
+ # UTF-8, a transformation format of ISO 10646
114
+ # >>>
115
+ # UTF8-1 = %x00-7F
116
+ # UTF8-tail = %x80-BF
117
+ # UTF8-2 = %xC2-DF UTF8-tail
118
+ # UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
119
+ # %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
120
+ # UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
121
+ # %xF4 %x80-8F 2( UTF8-tail )
122
+ # UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
123
+ # UTF8-octets = *( UTF8-char )
124
+ #
125
+ # n.b. String * Integer is used for repetition, rather than /x{3}/,
126
+ # because ruby 3.2's linear-time cache-based optimization doesn't work
127
+ # with "bounded or fixed times repetition nesting in another repetition
128
+ # (e.g. /(a{2,3})*/). It is an implementation issue entirely, but we
129
+ # believe it is hard to support this case correctly."
130
+ # See https://bugs.ruby-lang.org/issues/19104
131
+ module RFC3629
132
+ UTF8_1 = /[\x00-\x7f]/n # aka ASCII 7bit
133
+ UTF8_TAIL = /[\x80-\xBF]/n
134
+ UTF8_2 = /[\xC2-\xDF]#{UTF8_TAIL}/n
135
+ UTF8_3 = Regexp.union(/\xE0[\xA0-\xBF]#{UTF8_TAIL}/n,
136
+ /\xED[\x80-\x9F]#{UTF8_TAIL}/n,
137
+ /[\xE1-\xEC]#{ UTF8_TAIL.source * 2}/n,
138
+ /[\xEE-\xEF]#{ UTF8_TAIL.source * 2}/n)
139
+ UTF8_4 = Regexp.union(/[\xF1-\xF3]#{ UTF8_TAIL.source * 3}/n,
140
+ /\xF0[\x90-\xBF]#{UTF8_TAIL.source * 2}/n,
141
+ /\xF4[\x80-\x8F]#{UTF8_TAIL.source * 2}/n)
142
+ UTF8_CHAR = Regexp.union(UTF8_1, UTF8_2, UTF8_3, UTF8_4)
143
+ UTF8_OCTETS = /#{UTF8_CHAR}*/n
144
+ end
145
+
146
+ include RFC5234
147
+ include RFC3629
148
+
149
+ # CHAR8 = %x01-ff
150
+ # ; any OCTET except NUL, %x00
151
+ CHAR8 = /[\x01-\xff]/n
152
+
153
+ # list-wildcards = "%" / "*"
154
+ LIST_WILDCARDS = /[%*]/n
155
+ # quoted-specials = DQUOTE / "\"
156
+ QUOTED_SPECIALS = /["\\]/n
157
+ # resp-specials = "]"
158
+ RESP_SPECIALS = /[\]]/n
159
+
160
+ # atomish = 1*<any ATOM-CHAR except "[">
161
+ # ; We use "atomish" for msg-att and section, in order
162
+ # ; to simplify "BODY[HEADER.FIELDS (foo bar)]".
163
+ #
164
+ # atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
165
+ # quoted-specials / resp-specials
166
+ # ATOM-CHAR = <any CHAR except atom-specials>
167
+ # atom = 1*ATOM-CHAR
168
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
169
+ # tag = 1*<any ASTRING-CHAR except "+">
170
+
171
+ ATOM_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\\]]/n
172
+ ASTRING_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\]/n
173
+
174
+ ASTRING_CHAR = CHAR - ASTRING_SPECIALS
175
+ ATOM_CHAR = CHAR - ATOM_SPECIALS
176
+
177
+ ATOM = /#{ATOM_CHAR}+/n
178
+ ASTRING_CHARS = /#{ASTRING_CHAR}+/n
179
+ ATOMISH = /#{ATOM_CHAR - /[\[]/ }+/
180
+ TAG = /#{ASTRING_CHAR - /[+]/ }+/
181
+
182
+ # TEXT-CHAR = <any CHAR except CR and LF>
183
+ TEXT_CHAR = CHAR - /[\r\n]/
184
+
185
+ # resp-text-code = ... / atom [SP 1*<any TEXT-CHAR except "]">]
186
+ CODE_TEXT_CHAR = TEXT_CHAR - RESP_SPECIALS
187
+ CODE_TEXT = /#{CODE_TEXT_CHAR}+/n
188
+
189
+ # flag = "\Answered" / "\Flagged" / "\Deleted" /
190
+ # "\Seen" / "\Draft" / flag-keyword / flag-extension
191
+ # ; Does not include "\Recent"
192
+ # flag-extension = "\" atom
193
+ # ; Future expansion. Client implementations
194
+ # ; MUST accept flag-extension flags. Server
195
+ # ; implementations MUST NOT generate
196
+ # ; flag-extension flags except as defined by
197
+ # ; a future Standard or Standards Track
198
+ # ; revisions of this specification.
199
+ # flag-keyword = "$MDNSent" / "$Forwarded" / "$Junk" /
200
+ # "$NotJunk" / "$Phishing" / atom
201
+ #
202
+ # flag-perm = flag / "\*"
203
+ #
204
+ # Not checking for max one mbx-list-sflag in the parser.
205
+ # >>>
206
+ # mbx-list-oflag = "\Noinferiors" / child-mbox-flag /
207
+ # "\Subscribed" / "\Remote" / flag-extension
208
+ # ; Other flags; multiple from this list are
209
+ # ; possible per LIST response, but each flag
210
+ # ; can only appear once per LIST response
211
+ # mbx-list-sflag = "\NonExistent" / "\Noselect" / "\Marked" /
212
+ # "\Unmarked"
213
+ # ; Selectability flags; only one per LIST response
214
+ # child-mbox-flag = "\HasChildren" / "\HasNoChildren"
215
+ # ; attributes for the CHILDREN return option, at most
216
+ # ; one possible per LIST response
217
+ FLAG = /\\?#{ATOM}/n
218
+ FLAG_EXTENSION = /\\#{ATOM}/n
219
+ FLAG_KEYWORD = ATOM
220
+ FLAG_PERM = Regexp.union(FLAG, "\\*")
221
+ MBX_FLAG = FLAG_EXTENSION
222
+
223
+ # flag-list = "(" [flag *(SP flag)] ")"
224
+ # resp-text-code =/ "PERMANENTFLAGS" SP
225
+ # "(" [flag-perm *(SP flag-perm)] ")"
226
+ # mbx-list-flags = *(mbx-list-oflag SP) mbx-list-sflag
227
+ # *(SP mbx-list-oflag) /
228
+ # mbx-list-oflag *(SP mbx-list-oflag)
229
+ # (Not checking for max one mbx-list-sflag in the parser.)
230
+ FLAG_LIST = /\G\((#{FLAG }(?:#{SP}#{FLAG })*|)\)/ni
231
+ FLAG_PERM_LIST = /\G\((#{FLAG_PERM}(?:#{SP}#{FLAG_PERM})*|)\)/ni
232
+ MBX_LIST_FLAGS = /\G (#{MBX_FLAG }(?:#{SP}#{MBX_FLAG })*) /nix
233
+
234
+ # Gmail allows SP and "]" in flags.......
235
+ QUIRKY_FLAG = Regexp.union(/\\?#{ASTRING_CHARS}/n, "\\*")
236
+ QUIRKY_FLAGS_LIST = /\G\(( [^)]* )\)/nx
237
+
238
+ # RFC3501:
239
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
240
+ # "\" quoted-specials
241
+ # RFC9051:
242
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
243
+ # "\" quoted-specials / UTF8-2 / UTF8-3 / UTF8-4
244
+ # RFC3501 & RFC9051:
245
+ # quoted = DQUOTE *QUOTED-CHAR DQUOTE
246
+ QUOTED_CHAR_safe = TEXT_CHAR - QUOTED_SPECIALS
247
+ QUOTED_CHAR_esc = /\\#{QUOTED_SPECIALS}/n
248
+ QUOTED_CHAR_rev1 = Regexp.union(QUOTED_CHAR_safe, QUOTED_CHAR_esc)
249
+ QUOTED_CHAR_rev2 = Regexp.union(QUOTED_CHAR_rev1,
250
+ UTF8_2, UTF8_3, UTF8_4)
251
+ QUOTED_rev1 = /"(#{QUOTED_CHAR_rev1}*)"/n
252
+ QUOTED_rev2 = /"(#{QUOTED_CHAR_rev2}*)"/n
253
+
254
+ # RFC3501:
255
+ # text = 1*TEXT-CHAR
256
+ # RFC9051:
257
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
258
+ # ; Non-ASCII text can only be returned
259
+ # ; after ENABLE IMAP4rev2 command
260
+ TEXT_rev1 = /#{TEXT_CHAR}+/
261
+ TEXT_rev2 = /#{Regexp.union TEXT_CHAR, UTF8_2, UTF8_3, UTF8_4}+/
262
+
263
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
264
+ TAGGED_LABEL_FCHAR = /[a-zA-Z\-_.]/n
265
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
266
+ TAGGED_LABEL_CHAR = /[a-zA-Z\-_.0-9:]*/n
267
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
268
+ # ; Is a valid RFC 3501 "atom".
269
+ TAGGED_EXT_LABEL = /#{TAGGED_LABEL_FCHAR}#{TAGGED_LABEL_CHAR}*/n
270
+
271
+ # nz-number = digit-nz *DIGIT
272
+ # ; Non-zero unsigned 32-bit integer
273
+ # ; (0 < n < 4,294,967,296)
274
+ NZ_NUMBER = /[1-9]\d*/n
275
+
276
+ # seq-number = nz-number / "*"
277
+ # ; message sequence number (COPY, FETCH, STORE
278
+ # ; commands) or unique identifier (UID COPY,
279
+ # ; UID FETCH, UID STORE commands).
280
+ # ; * represents the largest number in use. In
281
+ # ; the case of message sequence numbers, it is
282
+ # ; the number of messages in a non-empty mailbox.
283
+ # ; In the case of unique identifiers, it is the
284
+ # ; unique identifier of the last message in the
285
+ # ; mailbox or, if the mailbox is empty, the
286
+ # ; mailbox's current UIDNEXT value.
287
+ # ; The server should respond with a tagged BAD
288
+ # ; response to a command that uses a message
289
+ # ; sequence number greater than the number of
290
+ # ; messages in the selected mailbox. This
291
+ # ; includes "*" if the selected mailbox is empty.
292
+ SEQ_NUMBER = /#{NZ_NUMBER}|\*/n
293
+
294
+ # seq-range = seq-number ":" seq-number
295
+ # ; two seq-number values and all values between
296
+ # ; these two regardless of order.
297
+ # ; Example: 2:4 and 4:2 are equivalent and
298
+ # ; indicate values 2, 3, and 4.
299
+ # ; Example: a unique identifier sequence range of
300
+ # ; 3291:* includes the UID of the last message in
301
+ # ; the mailbox, even if that value is less than
302
+ # ; 3291.
303
+ SEQ_RANGE = /#{SEQ_NUMBER}:#{SEQ_NUMBER}/n
304
+
305
+ # sequence-set = (seq-number / seq-range) ["," sequence-set]
306
+ # ; set of seq-number values, regardless of order.
307
+ # ; Servers MAY coalesce overlaps and/or execute
308
+ # ; the sequence in any order.
309
+ # ; Example: a message sequence number set of
310
+ # ; 2,4:7,9,12:* for a mailbox with 15 messages is
311
+ # ; equivalent to 2,4,5,6,7,9,12,13,14,15
312
+ # ; Example: a message sequence number set of
313
+ # ; *:4,5:7 for a mailbox with 10 messages is
314
+ # ; equivalent to 10,9,8,7,6,5,4,5,6,7 and MAY
315
+ # ; be reordered and overlap coalesced to be
316
+ # ; 4,5,6,7,8,9,10.
317
+ SEQUENCE_SET_ITEM = /#{SEQ_NUMBER}|#{SEQ_RANGE}/n
318
+ SEQUENCE_SET = /#{SEQUENCE_SET_ITEM}(?:,#{SEQUENCE_SET_ITEM})*/n
319
+ SEQUENCE_SET_STR = /\A#{SEQUENCE_SET}\z/n
320
+
321
+ # RFC3501:
322
+ # literal = "{" number "}" CRLF *CHAR8
323
+ # ; Number represents the number of CHAR8s
324
+ # RFC9051:
325
+ # literal = "{" number64 ["+"] "}" CRLF *CHAR8
326
+ # ; <number64> represents the number of CHAR8s.
327
+ # ; A non-synchronizing literal is distinguished
328
+ # ; from a synchronizing literal by the presence of
329
+ # ; "+" before the closing "}".
330
+ # ; Non-synchronizing literals are not allowed when
331
+ # ; sent from server to the client.
332
+ LITERAL = /\{(\d+)\}\r\n/n
333
+
334
+ # RFC3516 (BINARY):
335
+ # literal8 = "~{" number "}" CRLF *OCTET
336
+ # ; <number> represents the number of OCTETs
337
+ # ; in the response string.
338
+ # RFC9051:
339
+ # literal8 = "~{" number64 "}" CRLF *OCTET
340
+ # ; <number64> represents the number of OCTETs
341
+ # ; in the response string.
342
+ LITERAL8 = /~\{(\d+)\}\r\n/n
343
+
344
+ module_function
345
+
346
+ def unescape_quoted!(quoted)
347
+ quoted
348
+ &.gsub!(/\\(#{QUOTED_SPECIALS})/n, "\\1")
349
+ &.force_encoding("UTF-8")
350
+ end
351
+
352
+ def unescape_quoted(quoted)
353
+ quoted
354
+ &.gsub(/\\(#{QUOTED_SPECIALS})/n, "\\1")
355
+ &.force_encoding("UTF-8")
356
+ end
357
+
358
+ end
359
+
360
+ # the default, used in most places
60
361
  BEG_REGEXP = /\G(?:\
61
- (?# 1: SPACE )( +)|\
62
- (?# 2: NIL )(NIL)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
63
- (?# 3: NUMBER )(\d+)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
64
- (?# 4: ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+]+)|\
65
- (?# 5: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
66
- (?# 6: LPAR )(\()|\
67
- (?# 7: RPAR )(\))|\
68
- (?# 8: BSLASH )(\\)|\
69
- (?# 9: STAR )(\*)|\
70
- (?# 10: LBRA )(\[)|\
71
- (?# 11: RBRA )(\])|\
72
- (?# 12: LITERAL )\{(\d+)\}\r\n|\
73
- (?# 13: PLUS )(\+)|\
74
- (?# 14: PERCENT )(%)|\
75
- (?# 15: CRLF )(\r\n)|\
76
- (?# 16: EOF )(\z))/ni
77
-
362
+ (?# 1: SPACE )( )|\
363
+ (?# 2: LITERAL8)#{Patterns::LITERAL8}|\
364
+ (?# 3: ATOM prefixed with a compatible subtype)\
365
+ ((?:\
366
+ (?# 4: NIL )(NIL)|\
367
+ (?# 5: NUMBER )(\d+)|\
368
+ (?# 6: PLUS )(\+))\
369
+ (?# 7: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
370
+ (?# This enables greedy alternation without lookahead, in linear time.)\
371
+ )|\
372
+ (?# Also need to check for ATOM without a subtype prefix.)\
373
+ (?# 8: ATOM )(#{Patterns::ATOMISH})|\
374
+ (?# 9: QUOTED )#{Patterns::QUOTED_rev2}|\
375
+ (?# 10: LPAR )(\()|\
376
+ (?# 11: RPAR )(\))|\
377
+ (?# 12: BSLASH )(\\)|\
378
+ (?# 13: STAR )(\*)|\
379
+ (?# 14: LBRA )(\[)|\
380
+ (?# 15: RBRA )(\])|\
381
+ (?# 16: LITERAL )#{Patterns::LITERAL}|\
382
+ (?# 17: PERCENT )(%)|\
383
+ (?# 18: CRLF )(\r\n)|\
384
+ (?# 19: EOF )(\z))/ni
385
+
386
+ # envelope, body(structure), namespaces
78
387
  DATA_REGEXP = /\G(?:\
79
388
  (?# 1: SPACE )( )|\
80
389
  (?# 2: NIL )(NIL)|\
81
390
  (?# 3: NUMBER )(\d+)|\
82
- (?# 4: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
83
- (?# 5: LITERAL )\{(\d+)\}\r\n|\
391
+ (?# 4: QUOTED )#{Patterns::QUOTED_rev2}|\
392
+ (?# 5: LITERAL )#{Patterns::LITERAL}|\
84
393
  (?# 6: LPAR )(\()|\
85
394
  (?# 7: RPAR )(\)))/ni
86
395
 
87
- TEXT_REGEXP = /\G(?:\
88
- (?# 1: TEXT )([^\x00\r\n]*))/ni
89
-
90
- RTEXT_REGEXP = /\G(?:\
91
- (?# 1: LBRA )(\[)|\
92
- (?# 2: TEXT )([^\x00\r\n]*))/ni
396
+ # text, after 'resp-text-code "]"'
397
+ TEXT_REGEXP = /\G(#{Patterns::TEXT_rev2})/n
93
398
 
94
- CTEXT_REGEXP = /\G(?:\
95
- (?# 1: TEXT )([^\x00\r\n\]]*))/ni
399
+ # resp-text-code, after 'atom SP'
400
+ CTEXT_REGEXP = /\G(#{Patterns::CODE_TEXT})/n
96
401
 
97
402
  Token = Struct.new(:symbol, :value)
98
403
 
99
- def response
100
- token = lookahead
101
- case token.symbol
102
- when T_PLUS
103
- result = continue_req
104
- when T_STAR
105
- result = response_untagged
106
- else
107
- result = response_tagged
108
- end
109
- while lookahead.symbol == T_SPACE
110
- # Ignore trailing space for Microsoft Exchange Server
111
- shift_token
112
- end
113
- match(T_CRLF)
114
- match(T_EOF)
115
- return result
116
- end
404
+ def_char_matchers :SP, " ", :T_SPACE
405
+ def_char_matchers :PLUS, "+", :T_PLUS
406
+ def_char_matchers :STAR, "*", :T_STAR
117
407
 
118
- def continue_req
119
- match(T_PLUS)
120
- token = lookahead
121
- if token.symbol == T_SPACE
122
- shift_token
123
- return ContinuationRequest.new(resp_text, @str)
408
+ def_char_matchers :lpar, "(", :T_LPAR
409
+ def_char_matchers :rpar, ")", :T_RPAR
410
+
411
+ def_char_matchers :lbra, "[", :T_LBRA
412
+ def_char_matchers :rbra, "]", :T_RBRA
413
+
414
+ # valid number ranges are not enforced by parser
415
+ # number = 1*DIGIT
416
+ # ; Unsigned 32-bit integer
417
+ # ; (0 <= n < 4,294,967,296)
418
+ def_token_matchers :number, T_NUMBER, coerce: Integer
419
+
420
+ def_token_matchers :quoted, T_QUOTED
421
+
422
+ # string = quoted / literal
423
+ def_token_matchers :string, T_QUOTED, T_LITERAL
424
+
425
+ # used by nstring8 = nstring / literal8
426
+ def_token_matchers :string8, T_QUOTED, T_LITERAL, T_LITERAL8
427
+
428
+ # use where string represents "LABEL" values
429
+ def_token_matchers :case_insensitive__string,
430
+ T_QUOTED, T_LITERAL,
431
+ send: :upcase
432
+
433
+ # n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
434
+ # NIL? returns nil when it does *not* match
435
+ def_token_matchers :NIL, T_NIL
436
+
437
+ # In addition to explicitly uses of +tagged-ext-label+, use this to match
438
+ # keywords when the grammar has not provided any extension syntax.
439
+ #
440
+ # Do *not* use this for labels where the grammar specifies extensions
441
+ # can be +atom+, even if all currently defined labels would match. For
442
+ # example response codes in +resp-text-code+.
443
+ #
444
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
445
+ # ; Is a valid RFC 3501 "atom".
446
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
447
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
448
+ #
449
+ # TODO: add to lexer and only match tagged-ext-label
450
+ def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase
451
+
452
+ def_token_matchers :CRLF, T_CRLF
453
+ def_token_matchers :EOF, T_EOF
454
+
455
+ # atom = 1*ATOM-CHAR
456
+ # ATOM-CHAR = <any CHAR except atom-specials>
457
+ ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
458
+
459
+ SEQUENCE_SET_TOKENS = [T_ATOM, T_NUMBER, T_STAR]
460
+
461
+ # sequence-set = (seq-number / seq-range) ["," sequence-set]
462
+ # sequence-set =/ seq-last-command
463
+ # ; Allow for "result of the last command"
464
+ # ; indicator.
465
+ # seq-last-command = "$"
466
+ #
467
+ # *note*: doesn't match seq-last-command
468
+ def sequence_set
469
+ str = combine_adjacent(*SEQUENCE_SET_TOKENS)
470
+ if Patterns::SEQUENCE_SET_STR.match?(str)
471
+ SequenceSet[str]
124
472
  else
125
- return ContinuationRequest.new(ResponseText.new(nil, ""), @str)
473
+ parse_error("unexpected atom %p, expected sequence-set", str)
126
474
  end
127
475
  end
128
476
 
129
- def response_untagged
130
- match(T_STAR)
131
- match(T_SPACE)
132
- token = lookahead
133
- if token.symbol == T_NUMBER
134
- return numeric_response
135
- elsif token.symbol == T_ATOM
136
- case token.value
137
- when /\A(?:OK|NO|BAD|BYE|PREAUTH)\z/ni
138
- return response_cond
139
- when /\A(?:FLAGS)\z/ni
140
- return flags_response
141
- when /\A(?:ID)\z/ni
142
- return id_response
143
- when /\A(?:LIST|LSUB|XLIST)\z/ni
144
- return list_response
145
- when /\A(?:NAMESPACE)\z/ni
146
- return namespace_response
147
- when /\A(?:QUOTA)\z/ni
148
- return getquota_response
149
- when /\A(?:QUOTAROOT)\z/ni
150
- return getquotaroot_response
151
- when /\A(?:ACL)\z/ni
152
- return getacl_response
153
- when /\A(?:SEARCH|SORT)\z/ni
154
- return search_response
155
- when /\A(?:THREAD)\z/ni
156
- return thread_response
157
- when /\A(?:STATUS)\z/ni
158
- return status_response
159
- when /\A(?:CAPABILITY)\z/ni
160
- return capability_response
161
- when /\A(?:NOOP)\z/ni
162
- return ignored_response
163
- else
164
- return text_response
165
- end
166
- else
167
- parse_error("unexpected token %s", token.symbol)
168
- end
477
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
478
+ # resp-specials = "]"
479
+ ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
480
+
481
+ ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze
482
+
483
+ # tag = 1*<any ASTRING-CHAR except "+">
484
+ TAG_TOKENS = (ASTRING_CHARS_TOKENS - [T_PLUS]).freeze
485
+
486
+ # TODO: handle atom, astring_chars, and tag entirely inside the lexer
487
+ def atom; combine_adjacent(*ATOM_TOKENS) end
488
+ def astring_chars; combine_adjacent(*ASTRING_CHARS_TOKENS) end
489
+ def tag; combine_adjacent(*TAG_TOKENS) end
490
+
491
+ # the #accept version of #atom
492
+ def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
493
+
494
+ # Returns <tt>atom.upcase</tt>
495
+ def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
496
+
497
+ # Returns <tt>atom?&.upcase</tt>
498
+ def case_insensitive__atom?
499
+ -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
169
500
  end
170
501
 
171
- def response_tagged
172
- tag = astring_chars
173
- match(T_SPACE)
174
- token = match(T_ATOM)
175
- name = token.value.upcase
176
- match(T_SPACE)
177
- return TaggedResponse.new(tag, name, resp_text, @str)
502
+ # astring = 1*ASTRING-CHAR / string
503
+ def astring
504
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
178
505
  end
179
506
 
180
- def response_cond
181
- token = match(T_ATOM)
182
- name = token.value.upcase
183
- match(T_SPACE)
184
- return UntaggedResponse.new(name, resp_text, @str)
507
+ def astring?
508
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
185
509
  end
186
510
 
187
- def numeric_response
188
- n = number
189
- match(T_SPACE)
190
- token = match(T_ATOM)
191
- name = token.value.upcase
192
- case name
193
- when "EXISTS", "RECENT", "EXPUNGE"
194
- return UntaggedResponse.new(name, n, @str)
195
- when "FETCH"
196
- shift_token
197
- match(T_SPACE)
198
- data = FetchData.new(n, msg_att(n))
199
- return UntaggedResponse.new(name, data, @str)
200
- end
511
+ # Use #label or #label_in to assert specific known labels
512
+ # (+tagged-ext-label+ only, not +atom+).
513
+ def label(word)
514
+ (val = tagged_ext_label) == word and return val
515
+ parse_error("unexpected atom %p, expected %p instead", val, word)
201
516
  end
202
517
 
203
- def msg_att(n)
204
- match(T_LPAR)
205
- attr = {}
206
- while true
207
- token = lookahead
208
- case token.symbol
209
- when T_RPAR
210
- shift_token
211
- break
212
- when T_SPACE
213
- shift_token
214
- next
215
- end
216
- case token.value
217
- when /\A(?:ENVELOPE)\z/ni
218
- name, val = envelope_data
219
- when /\A(?:FLAGS)\z/ni
220
- name, val = flags_data
221
- when /\A(?:INTERNALDATE)\z/ni
222
- name, val = internaldate_data
223
- when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
224
- name, val = rfc822_text
225
- when /\A(?:RFC822\.SIZE)\z/ni
226
- name, val = rfc822_size
227
- when /\A(?:BODY(?:STRUCTURE)?)\z/ni
228
- name, val = body_data
229
- when /\A(?:UID)\z/ni
230
- name, val = uid_data
231
- when /\A(?:MODSEQ)\z/ni
232
- name, val = modseq_data
233
- else
234
- parse_error("unknown attribute `%s' for {%d}", token.value, n)
235
- end
236
- attr[name] = val
237
- end
238
- return attr
518
+ # Use #label or #label_in to assert specific known labels
519
+ # (+tagged-ext-label+ only, not +atom+).
520
+ def label_in(*labels)
521
+ lbl = tagged_ext_label and labels.include?(lbl) and return lbl
522
+ parse_error("unexpected atom %p, expected one of %s instead",
523
+ lbl, labels.join(" or "))
239
524
  end
240
525
 
241
- def envelope_data
242
- token = match(T_ATOM)
243
- name = token.value.upcase
244
- match(T_SPACE)
245
- return name, envelope
526
+ # expects "OK" or "PREAUTH" and raises InvalidResponseError on failure
527
+ def resp_cond_auth__name
528
+ lbl = tagged_ext_label and AUTH_CONDS.include? lbl and return lbl
529
+ raise InvalidResponseError, "bad response type %p, expected %s" % [
530
+ lbl, AUTH_CONDS.join(" or ")
531
+ ]
246
532
  end
247
533
 
248
- def envelope
249
- @lex_state = EXPR_DATA
250
- token = lookahead
251
- if token.symbol == T_NIL
252
- shift_token
253
- result = nil
254
- else
255
- match(T_LPAR)
256
- date = nstring
257
- match(T_SPACE)
258
- subject = nstring
259
- match(T_SPACE)
260
- from = address_list
261
- match(T_SPACE)
262
- sender = address_list
263
- match(T_SPACE)
264
- reply_to = address_list
265
- match(T_SPACE)
266
- to = address_list
267
- match(T_SPACE)
268
- cc = address_list
269
- match(T_SPACE)
270
- bcc = address_list
271
- match(T_SPACE)
272
- in_reply_to = nstring
273
- match(T_SPACE)
274
- message_id = nstring
275
- match(T_RPAR)
276
- result = Envelope.new(date, subject, from, sender, reply_to,
277
- to, cc, bcc, in_reply_to, message_id)
278
- end
279
- @lex_state = EXPR_BEG
280
- return result
534
+ # expects "OK" or "NO" or "BAD" and raises InvalidResponseError on failure
535
+ def resp_cond_state__name
536
+ lbl = tagged_ext_label and RESP_COND_STATES.include? lbl and return lbl
537
+ raise InvalidResponseError, "bad response type %p, expected %s" % [
538
+ lbl, RESP_COND_STATES.join(" or ")
539
+ ]
281
540
  end
282
541
 
283
- def flags_data
284
- token = match(T_ATOM)
285
- name = token.value.upcase
286
- match(T_SPACE)
287
- return name, flag_list
542
+ # nstring = string / nil
543
+ def nstring
544
+ NIL? ? nil : string
288
545
  end
289
546
 
290
- def internaldate_data
291
- token = match(T_ATOM)
292
- name = token.value.upcase
293
- match(T_SPACE)
294
- token = match(T_QUOTED)
295
- return name, token.value
547
+ def nstring8
548
+ NIL? ? nil : string8
296
549
  end
297
550
 
298
- def rfc822_text
299
- token = match(T_ATOM)
300
- name = token.value.upcase
301
- token = lookahead
302
- if token.symbol == T_LBRA
303
- shift_token
304
- match(T_RBRA)
305
- end
306
- match(T_SPACE)
307
- return name, nstring
551
+ def nquoted
552
+ NIL? ? nil : quoted
308
553
  end
309
554
 
310
- def rfc822_size
311
- token = match(T_ATOM)
312
- name = token.value.upcase
313
- match(T_SPACE)
314
- return name, number
555
+ # use where nstring represents "LABEL" values
556
+ def case_insensitive__nstring
557
+ NIL? ? nil : case_insensitive__string
315
558
  end
316
559
 
317
- def body_data
318
- token = match(T_ATOM)
319
- name = token.value.upcase
320
- token = lookahead
321
- if token.symbol == T_SPACE
322
- shift_token
323
- return name, body
324
- end
325
- name.concat(section)
326
- token = lookahead
327
- if token.symbol == T_ATOM
328
- name.concat(token.value)
329
- shift_token
560
+ # tagged-ext-comp = astring /
561
+ # tagged-ext-comp *(SP tagged-ext-comp) /
562
+ # "(" tagged-ext-comp ")"
563
+ # ; Extensions that follow this general
564
+ # ; syntax should use nstring instead of
565
+ # ; astring when appropriate in the context
566
+ # ; of the extension.
567
+ # ; Note that a message set or a "number"
568
+ # ; can always be represented as an "atom".
569
+ # ; A URL should be represented as
570
+ # ; a "quoted" string.
571
+ def tagged_ext_comp
572
+ vals = []
573
+ while true
574
+ vals << case lookahead!(*ASTRING_TOKENS, T_LPAR).symbol
575
+ when T_LPAR then lpar; ary = tagged_ext_comp; rpar; ary
576
+ when T_NUMBER then number
577
+ else astring
578
+ end
579
+ SP? or break
330
580
  end
331
- match(T_SPACE)
332
- data = nstring
333
- return name, data
581
+ vals
334
582
  end
335
583
 
336
- def body
337
- @lex_state = EXPR_DATA
338
- token = lookahead
339
- if token.symbol == T_NIL
340
- shift_token
341
- result = nil
584
+ # tagged-ext-simple is a subset of atom
585
+ # TODO: recognize sequence-set in the lexer
586
+ #
587
+ # tagged-ext-simple = sequence-set / number / number64
588
+ def tagged_ext_simple
589
+ number? || sequence_set
590
+ end
591
+
592
+ # tagged-ext-val = tagged-ext-simple /
593
+ # "(" [tagged-ext-comp] ")"
594
+ def tagged_ext_val
595
+ if lpar?
596
+ _ = peek_rpar? ? [] : tagged_ext_comp
597
+ rpar
598
+ _
342
599
  else
343
- match(T_LPAR)
344
- token = lookahead
345
- if token.symbol == T_LPAR
346
- result = body_type_mpart
347
- else
348
- result = body_type_1part
600
+ tagged_ext_simple
601
+ end
602
+ end
603
+
604
+ # mailbox = "INBOX" / astring
605
+ # ; INBOX is case-insensitive. All case variants of
606
+ # ; INBOX (e.g., "iNbOx") MUST be interpreted as INBOX
607
+ # ; not as an astring. An astring which consists of
608
+ # ; the case-insensitive sequence "I" "N" "B" "O" "X"
609
+ # ; is considered to be INBOX and not an astring.
610
+ # ; Refer to section 5.1 for further
611
+ # ; semantic details of mailbox names.
612
+ alias mailbox astring
613
+
614
+ # valid number ranges are not enforced by parser
615
+ # number64 = 1*DIGIT
616
+ # ; Unsigned 63-bit integer
617
+ # ; (0 <= n <= 9,223,372,036,854,775,807)
618
+ alias number64 number
619
+ alias number64? number?
620
+
621
+ # valid number ranges are not enforced by parser
622
+ # nz-number = digit-nz *DIGIT
623
+ # ; Non-zero unsigned 32-bit integer
624
+ # ; (0 < n < 4,294,967,296)
625
+ alias nz_number number
626
+ alias nz_number? number?
627
+
628
+ # valid number ranges are not enforced by parser
629
+ # nz-number64 = digit-nz *DIGIT
630
+ # ; Unsigned 63-bit integer
631
+ # ; (0 < n <= 9,223,372,036,854,775,807)
632
+ alias nz_number64 nz_number
633
+
634
+ # valid number ranges are not enforced by parser
635
+ # uniqueid = nz-number
636
+ # ; Strictly ascending
637
+ alias uniqueid nz_number
638
+
639
+ # valid number ranges are not enforced by parser
640
+ #
641
+ # a 64-bit unsigned integer and is the decimal equivalent for the ID hex
642
+ # string used in the web interface and the Gmail API.
643
+ alias x_gm_id number
644
+
645
+ # [RFC3501 & RFC9051:]
646
+ # response = *(continue-req / response-data) response-done
647
+ #
648
+ # For simplicity, response isn't interpreted as the combination of the
649
+ # three response types, but instead represents any individual server
650
+ # response. Our simplified interpretation is defined as:
651
+ # response = continue-req | response_data | response-tagged
652
+ #
653
+ # n.b: our "response-tagged" definition parses "greeting" too.
654
+ def response
655
+ resp = case lookahead!(T_PLUS, T_STAR, *TAG_TOKENS).symbol
656
+ when T_PLUS then continue_req
657
+ when T_STAR then response_data
658
+ else response_tagged
659
+ end
660
+ accept_spaces # QUIRKY: Ignore trailing space (MS Exchange Server?)
661
+ CRLF!
662
+ EOF!
663
+ resp
664
+ end
665
+
666
+ # RFC3501 & RFC9051:
667
+ # continue-req = "+" SP (resp-text / base64) CRLF
668
+ #
669
+ # n.b: base64 is valid resp-text. And in the spirit of RFC9051 Appx E 23
670
+ # (and to workaround existing servers), we use the following grammar:
671
+ #
672
+ # continue-req = "+" (SP (resp-text)) CRLF
673
+ def continue_req
674
+ PLUS!
675
+ ContinuationRequest.new(SP? ? resp_text : ResponseText::EMPTY, @str)
676
+ end
677
+
678
+ RE_RESPONSE_TYPE = /\G(?:\d+ )?(?<type>#{Patterns::TAGGED_EXT_LABEL})/n
679
+
680
+ # [RFC3501:]
681
+ # response-data = "*" SP (resp-cond-state / resp-cond-bye /
682
+ # mailbox-data / message-data / capability-data) CRLF
683
+ # [RFC4466:]
684
+ # response-data = "*" SP response-payload CRLF
685
+ # response-payload = resp-cond-state / resp-cond-bye /
686
+ # mailbox-data / message-data / capability-data
687
+ # RFC5161 (ENABLE capability):
688
+ # response-data =/ "*" SP enable-data CRLF
689
+ # RFC5255 (LANGUAGE capability)
690
+ # response-payload =/ language-data
691
+ # RFC5255 (I18NLEVEL=1 and I18NLEVEL=2 capabilities)
692
+ # response-payload =/ comparator-data
693
+ # [RFC9051:]
694
+ # response-data = "*" SP (resp-cond-state / resp-cond-bye /
695
+ # mailbox-data / message-data / capability-data /
696
+ # enable-data) CRLF
697
+ #
698
+ # [merging in greeting and response-fatal:]
699
+ # greeting = "*" SP (resp-cond-auth / resp-cond-bye) CRLF
700
+ # response-fatal = "*" SP resp-cond-bye CRLF
701
+ # response-data =/ "*" SP (resp-cond-auth / resp-cond-bye) CRLF
702
+ # [removing duplicates, this is simply]
703
+ # response-payload =/ resp-cond-auth
704
+ #
705
+ # TODO: remove resp-cond-auth and handle greeting separately
706
+ def response_data
707
+ STAR!; SP!
708
+ m = peek_re(RE_RESPONSE_TYPE) or parse_error("unparsable response")
709
+ case m["type"].upcase
710
+ when "OK" then resp_cond_state__untagged # RFC3501, RFC9051
711
+ when "FETCH" then message_data__fetch # RFC3501, RFC9051
712
+ when "EXPUNGE" then message_data__expunge # RFC3501, RFC9051
713
+ when "EXISTS" then mailbox_data__exists # RFC3501, RFC9051
714
+ when "ESEARCH" then esearch_response # RFC4731, RFC9051, etc
715
+ when "VANISHED" then expunged_resp # RFC7162
716
+ when "UIDFETCH" then uidfetch_resp # (draft) UIDONLY
717
+ when "SEARCH" then mailbox_data__search # RFC3501 (obsolete)
718
+ when "CAPABILITY" then capability_data__untagged # RFC3501, RFC9051
719
+ when "FLAGS" then mailbox_data__flags # RFC3501, RFC9051
720
+ when "LIST" then mailbox_data__list # RFC3501, RFC9051
721
+ when "STATUS" then mailbox_data__status # RFC3501, RFC9051
722
+ when "NAMESPACE" then namespace_response # RFC2342, RFC9051
723
+ when "ENABLED" then enable_data # RFC5161, RFC9051
724
+ when "BAD" then resp_cond_state__untagged # RFC3501, RFC9051
725
+ when "NO" then resp_cond_state__untagged # RFC3501, RFC9051
726
+ when "PREAUTH" then resp_cond_auth # RFC3501, RFC9051
727
+ when "BYE" then resp_cond_bye # RFC3501, RFC9051
728
+ when "RECENT" then mailbox_data__recent # RFC3501 (obsolete)
729
+ when "SORT" then sort_data # RFC5256, RFC7162
730
+ when "THREAD" then thread_data # RFC5256
731
+ when "QUOTA" then quota_response # RFC2087, RFC9208
732
+ when "QUOTAROOT" then quotaroot_response # RFC2087, RFC9208
733
+ when "ID" then id_response # RFC2971
734
+ when "ACL" then acl_data # RFC4314
735
+ when "LISTRIGHTS" then listrights_data # RFC4314
736
+ when "MYRIGHTS" then myrights_data # RFC4314
737
+ when "METADATA" then metadata_resp # RFC5464
738
+ when "LANGUAGE" then language_data # RFC5255
739
+ when "COMPARATOR" then comparator_data # RFC5255
740
+ when "CONVERTED" then message_data__converted # RFC5259
741
+ when "LSUB" then mailbox_data__lsub # RFC3501 (obsolete)
742
+ when "XLIST" then mailbox_data__xlist # deprecated
743
+ when "NOOP" then response_data__noop
744
+ else response_data__unhandled
745
+ end
746
+ end
747
+
748
+ def response_data__unhandled(klass = UntaggedResponse)
749
+ num = number?; SP?
750
+ type = tagged_ext_label; SP?
751
+ text = remaining_unparsed
752
+ data =
753
+ if num && text then UnparsedNumericResponseData.new(num, text)
754
+ elsif text then UnparsedData.new(text)
755
+ else num
349
756
  end
350
- match(T_RPAR)
351
- end
352
- @lex_state = EXPR_BEG
353
- return result
757
+ klass.new(type, data, @str)
354
758
  end
355
759
 
356
- def body_type_1part
357
- token = lookahead
358
- case token.value
359
- when /\A(?:TEXT)\z/ni
360
- return body_type_text
361
- when /\A(?:MESSAGE)\z/ni
362
- return body_type_msg
363
- when /\A(?:ATTACHMENT)\z/ni
364
- return body_type_attachment
365
- when /\A(?:MIXED)\z/ni
366
- return body_type_mixed
367
- else
368
- return body_type_basic
369
- end
760
+ # reads all the way up until CRLF
761
+ def remaining_unparsed
762
+ str = @str[@pos...-2] and @pos += str.bytesize
763
+ str&.empty? ? nil : str
370
764
  end
371
765
 
372
- def body_type_basic
373
- mtype, msubtype = media_type
374
- token = lookahead
375
- if token.symbol == T_RPAR
376
- return BodyTypeBasic.new(mtype, msubtype)
377
- end
378
- match(T_SPACE)
379
- param, content_id, desc, enc, size = body_fields
380
- md5, disposition, language, extension = body_ext_1part
381
- return BodyTypeBasic.new(mtype, msubtype,
382
- param, content_id,
383
- desc, enc, size,
384
- md5, disposition, language, extension)
766
+ def response_data__ignored; response_data__unhandled(IgnoredResponse) end
767
+ alias response_data__noop response_data__ignored
768
+
769
+ alias esearch_response response_data__unhandled
770
+ alias expunged_resp response_data__unhandled
771
+ alias uidfetch_resp response_data__unhandled
772
+ alias listrights_data response_data__unhandled
773
+ alias myrights_data response_data__unhandled
774
+ alias metadata_resp response_data__unhandled
775
+ alias language_data response_data__unhandled
776
+ alias comparator_data response_data__unhandled
777
+ alias message_data__converted response_data__unhandled
778
+
779
+ # RFC3501 & RFC9051:
780
+ # response-tagged = tag SP resp-cond-state CRLF
781
+ def response_tagged
782
+ TaggedResponse.new(tag, *(SP!; resp_cond_state), @str)
385
783
  end
386
784
 
387
- def body_type_text
388
- mtype, msubtype = media_type
389
- match(T_SPACE)
390
- param, content_id, desc, enc, size = body_fields
391
- match(T_SPACE)
392
- lines = number
393
- md5, disposition, language, extension = body_ext_1part
394
- return BodyTypeText.new(mtype, msubtype,
395
- param, content_id,
396
- desc, enc, size,
397
- lines,
398
- md5, disposition, language, extension)
785
+ # RFC3501 & RFC9051:
786
+ # resp-cond-state = ("OK" / "NO" / "BAD") SP resp-text
787
+ #
788
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
789
+ # servers), we don't require a final SP and instead parse this as:
790
+ #
791
+ # resp-cond-state = ("OK" / "NO" / "BAD") [SP resp-text]
792
+ def resp_cond_state
793
+ [resp_cond_state__name, SP? ? resp_text : ResponseText::EMPTY]
399
794
  end
400
795
 
401
- def body_type_msg
402
- mtype, msubtype = media_type
403
- match(T_SPACE)
404
- param, content_id, desc, enc, size = body_fields
796
+ def resp_cond_state__untagged
797
+ UntaggedResponse.new(*resp_cond_state, @str)
798
+ end
405
799
 
406
- token = lookahead
407
- if token.symbol == T_RPAR
408
- # If this is not message/rfc822, we shouldn't apply the RFC822
409
- # spec to it. We should handle anything other than
410
- # message/rfc822 using multipart extension data [rfc3501] (i.e.
411
- # the data itself won't be returned, we would have to retrieve it
412
- # with BODYSTRUCTURE instead of with BODY
413
-
414
- # Also, sometimes a message/rfc822 is included as a large
415
- # attachment instead of having all of the other details
416
- # (e.g. attaching a .eml file to an email)
417
- if msubtype == "RFC822"
418
- return BodyTypeMessage.new(mtype, msubtype, param, content_id,
419
- desc, enc, size, nil, nil, nil, nil,
420
- nil, nil, nil)
421
- else
422
- return BodyTypeExtension.new(mtype, msubtype,
423
- param, content_id,
424
- desc, enc, size)
425
- end
426
- end
800
+ # resp-cond-auth = ("OK" / "PREAUTH") SP resp-text
801
+ #
802
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
803
+ # servers), we don't require a final SP and instead parse this as:
804
+ #
805
+ # resp-cond-auth = ("OK" / "PREAUTH") [SP resp-text]
806
+ def resp_cond_auth
807
+ UntaggedResponse.new(resp_cond_auth__name,
808
+ SP? ? resp_text : ResponseText::EMPTY,
809
+ @str)
810
+ end
427
811
 
428
- match(T_SPACE)
429
- env = envelope
430
- match(T_SPACE)
431
- b = body
432
- match(T_SPACE)
433
- lines = number
434
- md5, disposition, language, extension = body_ext_1part
435
- return BodyTypeMessage.new(mtype, msubtype,
436
- param, content_id,
437
- desc, enc, size,
438
- env, b, lines,
439
- md5, disposition, language, extension)
812
+ # resp-cond-bye = "BYE" SP resp-text
813
+ #
814
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
815
+ # servers), we don't require a final SP and instead parse this as:
816
+ #
817
+ # resp-cond-bye = "BYE" [SP resp-text]
818
+ def resp_cond_bye
819
+ UntaggedResponse.new(label(BYE),
820
+ SP? ? resp_text : ResponseText::EMPTY,
821
+ @str)
440
822
  end
441
823
 
442
- def body_type_attachment
443
- mtype = case_insensitive_string
444
- match(T_SPACE)
445
- param = body_fld_param
446
- return BodyTypeAttachment.new(mtype, nil, param)
824
+ # message-data = nz-number SP ("EXPUNGE" / ("FETCH" SP msg-att))
825
+ def message_data__fetch
826
+ seq = nz_number; SP!
827
+ name = label "FETCH"; SP!
828
+ data = FetchData.new(seq, msg_att(seq))
829
+ UntaggedResponse.new(name, data, @str)
447
830
  end
448
831
 
449
- def body_type_mixed
450
- mtype = "MULTIPART"
451
- msubtype = case_insensitive_string
452
- param, disposition, language, extension = body_ext_mpart
453
- return BodyTypeBasic.new(mtype, msubtype, param, nil, nil, nil, nil, nil, disposition, language, extension)
832
+ def response_data__simple_numeric
833
+ data = nz_number; SP!
834
+ name = tagged_ext_label
835
+ UntaggedResponse.new(name, data, @str)
454
836
  end
455
837
 
456
- def body_type_mpart
457
- parts = []
838
+ alias message_data__expunge response_data__simple_numeric
839
+ alias mailbox_data__exists response_data__simple_numeric
840
+ alias mailbox_data__recent response_data__simple_numeric
841
+
842
+ # RFC3501 & RFC9051:
843
+ # msg-att = "(" (msg-att-dynamic / msg-att-static)
844
+ # *(SP (msg-att-dynamic / msg-att-static)) ")"
845
+ #
846
+ # msg-att-dynamic = "FLAGS" SP "(" [flag-fetch *(SP flag-fetch)] ")"
847
+ # RFC5257 (ANNOTATE extension):
848
+ # msg-att-dynamic =/ "ANNOTATION" SP
849
+ # ( "(" entry-att *(SP entry-att) ")" /
850
+ # "(" entry *(SP entry) ")" )
851
+ # RFC7162 (CONDSTORE extension):
852
+ # msg-att-dynamic =/ fetch-mod-resp
853
+ # fetch-mod-resp = "MODSEQ" SP "(" permsg-modsequence ")"
854
+ # RFC8970 (PREVIEW extension):
855
+ # msg-att-dynamic =/ "PREVIEW" SP nstring
856
+ #
857
+ # RFC3501:
858
+ # msg-att-static = "ENVELOPE" SP envelope /
859
+ # "INTERNALDATE" SP date-time /
860
+ # "RFC822" [".HEADER" / ".TEXT"] SP nstring /
861
+ # "RFC822.SIZE" SP number /
862
+ # "BODY" ["STRUCTURE"] SP body /
863
+ # "BODY" section ["<" number ">"] SP nstring /
864
+ # "UID" SP uniqueid
865
+ # RFC3516 (BINARY extension):
866
+ # msg-att-static =/ "BINARY" section-binary SP (nstring / literal8)
867
+ # / "BINARY.SIZE" section-binary SP number
868
+ # RFC8514 (SAVEDATE extension):
869
+ # msg-att-static =/ "SAVEDATE" SP (date-time / nil)
870
+ # RFC8474 (OBJECTID extension):
871
+ # msg-att-static =/ fetch-emailid-resp / fetch-threadid-resp
872
+ # fetch-emailid-resp = "EMAILID" SP "(" objectid ")"
873
+ # fetch-threadid-resp = "THREADID" SP ( "(" objectid ")" / nil )
874
+ # RFC9051:
875
+ # msg-att-static = "ENVELOPE" SP envelope /
876
+ # "INTERNALDATE" SP date-time /
877
+ # "RFC822.SIZE" SP number64 /
878
+ # "BODY" ["STRUCTURE"] SP body /
879
+ # "BODY" section ["<" number ">"] SP nstring /
880
+ # "BINARY" section-binary SP (nstring / literal8) /
881
+ # "BINARY.SIZE" section-binary SP number /
882
+ # "UID" SP uniqueid
883
+ #
884
+ # Re https://www.rfc-editor.org/errata/eid7246, I'm adding "offset" to the
885
+ # official "BINARY" ABNF, like so:
886
+ #
887
+ # msg-att-static =/ "BINARY" section-binary ["<" number ">"] SP
888
+ # (nstring / literal8)
889
+ def msg_att(n)
890
+ lpar
891
+ attr = {}
458
892
  while true
459
- token = lookahead
460
- if token.symbol == T_SPACE
461
- shift_token
462
- break
463
- end
464
- parts.push(body)
893
+ name = msg_att__label; SP!
894
+ val =
895
+ case name
896
+ when "UID" then uniqueid
897
+ when "FLAGS" then flag_list
898
+ when "BODY" then body
899
+ when /\ABODY\[/ni then nstring
900
+ when "BODYSTRUCTURE" then body
901
+ when "ENVELOPE" then envelope
902
+ when "INTERNALDATE" then date_time
903
+ when "RFC822.SIZE" then number64
904
+ when /\ABINARY\[/ni then nstring8 # BINARY, IMAP4rev2
905
+ when /\ABINARY\.SIZE\[/ni then number # BINARY, IMAP4rev2
906
+ when "RFC822" then nstring # not in rev2
907
+ when "RFC822.HEADER" then nstring # not in rev2
908
+ when "RFC822.TEXT" then nstring # not in rev2
909
+ when "MODSEQ" then parens__modseq # CONDSTORE
910
+ when "EMAILID" then parens__objectid # OBJECTID
911
+ when "THREADID" then nparens__objectid # OBJECTID
912
+ when "X-GM-MSGID" then x_gm_id # GMail
913
+ when "X-GM-THRID" then x_gm_id # GMail
914
+ when "X-GM-LABELS" then x_gm_labels # GMail
915
+ else parse_error("unknown attribute `%s' for {%d}", name, n)
916
+ end
917
+ attr[name] = val
918
+ break unless SP?
919
+ break if lookahead_rpar?
465
920
  end
466
- mtype = "MULTIPART"
467
- msubtype = case_insensitive_string
468
- param, disposition, language, extension = body_ext_mpart
469
- return BodyTypeMultipart.new(mtype, msubtype, parts,
470
- param, disposition, language,
471
- extension)
921
+ rpar
922
+ attr
472
923
  end
473
924
 
474
- def media_type
475
- mtype = case_insensitive_string
476
- token = lookahead
477
- if token.symbol != T_SPACE
478
- return mtype, nil
925
+ # appends "[section]" and "<partial>" to the base label
926
+ def msg_att__label
927
+ case (name = tagged_ext_label)
928
+ when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
929
+ # ignoring "[]" fixes https://bugs.ruby-lang.org/issues/5620
930
+ lbra? and rbra
931
+ when "BODY"
932
+ peek_lbra? and name << section and
933
+ peek_str?("<") and name << gt__number__lt # partial
934
+ when "BINARY", "BINARY.SIZE"
935
+ name << section_binary
936
+ # see https://www.rfc-editor.org/errata/eid7246 and the note above
937
+ peek_str?("<") and name << gt__number__lt # partial
479
938
  end
480
- match(T_SPACE)
481
- msubtype = case_insensitive_string
482
- return mtype, msubtype
939
+ name
483
940
  end
484
941
 
485
- def body_fields
486
- param = body_fld_param
487
- match(T_SPACE)
488
- content_id = nstring
489
- match(T_SPACE)
490
- desc = nstring
491
- match(T_SPACE)
492
- enc = case_insensitive_string
493
- match(T_SPACE)
494
- size = number
495
- return param, content_id, desc, enc, size
496
- end
942
+ # this represents the partial size for BODY or BINARY
943
+ alias gt__number__lt atom
497
944
 
498
- def body_fld_param
499
- token = lookahead
500
- if token.symbol == T_NIL
501
- shift_token
502
- return nil
503
- end
504
- match(T_LPAR)
505
- param = {}
506
- while true
507
- token = lookahead
508
- case token.symbol
509
- when T_RPAR
510
- shift_token
511
- break
512
- when T_SPACE
513
- shift_token
514
- end
515
- name = case_insensitive_string
516
- match(T_SPACE)
517
- val = string
518
- param[name] = val
519
- end
520
- return param
945
+ # RFC3501 & RFC9051:
946
+ # envelope = "(" env-date SP env-subject SP env-from SP
947
+ # env-sender SP env-reply-to SP env-to SP env-cc SP
948
+ # env-bcc SP env-in-reply-to SP env-message-id ")"
949
+ def envelope
950
+ @lex_state = EXPR_DATA
951
+ lpar; date = env_date
952
+ SP!; subject = env_subject
953
+ SP!; from = env_from
954
+ SP!; sender = env_sender
955
+ SP!; reply_to = env_reply_to
956
+ SP!; to = env_to
957
+ SP!; cc = env_cc
958
+ SP!; bcc = env_bcc
959
+ SP!; in_reply_to = env_in_reply_to
960
+ SP!; message_id = env_message_id
961
+ rpar
962
+ Envelope.new(date, subject, from, sender, reply_to,
963
+ to, cc, bcc, in_reply_to, message_id)
964
+ ensure
965
+ @lex_state = EXPR_BEG
521
966
  end
522
967
 
523
- def body_ext_1part
524
- token = lookahead
525
- if token.symbol == T_SPACE
526
- shift_token
527
- else
528
- return nil
529
- end
530
- md5 = nstring
968
+ # env-date = nstring
969
+ # env-subject = nstring
970
+ # env-in-reply-to = nstring
971
+ # env-message-id = nstring
972
+ alias env_date nstring
973
+ alias env_subject nstring
974
+ alias env_in_reply_to nstring
975
+ alias env_message_id nstring
976
+
977
+ # env-from = "(" 1*address ")" / nil
978
+ # env-sender = "(" 1*address ")" / nil
979
+ # env-reply-to = "(" 1*address ")" / nil
980
+ # env-to = "(" 1*address ")" / nil
981
+ # env-cc = "(" 1*address ")" / nil
982
+ # env-bcc = "(" 1*address ")" / nil
983
+ def nlist__address
984
+ return if NIL?
985
+ lpar; list = [address]; list << address until (quirky_SP?; rpar?)
986
+ list
987
+ end
988
+
989
+ alias env_from nlist__address
990
+ alias env_sender nlist__address
991
+ alias env_reply_to nlist__address
992
+ alias env_to nlist__address
993
+ alias env_cc nlist__address
994
+ alias env_bcc nlist__address
995
+
996
+ # Used when servers erroneously send an extra SP.
997
+ #
998
+ # As of 2023-11-28, Outlook.com (still) sends SP
999
+ # between +address+ in <tt>env-*</tt> lists.
1000
+ alias quirky_SP? SP?
531
1001
 
532
- token = lookahead
533
- if token.symbol == T_SPACE
534
- shift_token
535
- else
536
- return md5
537
- end
538
- disposition = body_fld_dsp
1002
+ # date-time = DQUOTE date-day-fixed "-" date-month "-" date-year
1003
+ # SP time SP zone DQUOTE
1004
+ alias date_time quoted
1005
+ alias ndatetime nquoted
539
1006
 
540
- token = lookahead
541
- if token.symbol == T_SPACE
542
- shift_token
543
- else
544
- return md5, disposition
545
- end
546
- language = body_fld_lang
1007
+ # RFC-3501 & RFC-9051:
1008
+ # body = "(" (body-type-1part / body-type-mpart) ")"
1009
+ def body
1010
+ @lex_state = EXPR_DATA
1011
+ lpar; result = peek_lpar? ? body_type_mpart : body_type_1part; rpar
1012
+ result
1013
+ ensure
1014
+ @lex_state = EXPR_BEG
1015
+ end
1016
+ alias lookahead_body? lookahead_lpar?
547
1017
 
548
- token = lookahead
549
- if token.symbol == T_SPACE
550
- shift_token
551
- else
552
- return md5, disposition, language
553
- end
1018
+ # RFC-3501 & RFC9051:
1019
+ # body-type-1part = (body-type-basic / body-type-msg / body-type-text)
1020
+ # [SP body-ext-1part]
1021
+ def body_type_1part
1022
+ # This regexp peek is a performance optimization.
1023
+ # The lookahead fallback would work fine too.
1024
+ m = peek_re(/\G(?:
1025
+ (?<TEXT> "TEXT" \s "[^"]+" )
1026
+ |(?<MESSAGE> "MESSAGE" \s "(?:RFC822|GLOBAL)" )
1027
+ |(?<BASIC> "[^"]+" \s "[^"]+" )
1028
+ |(?<MIXED> "MIXED" )
1029
+ )/nix)
1030
+ choice = m&.named_captures&.compact&.keys&.first
1031
+ # In practice, the following line should never be used. But the ABNF
1032
+ # *does* allow literals, and this will handle them.
1033
+ choice ||= lookahead_case_insensitive__string!
1034
+ case choice
1035
+ when "BASIC" then body_type_basic # => BodyTypeBasic
1036
+ when "MESSAGE" then body_type_msg # => BodyTypeMessage | BodyTypeBasic
1037
+ when "TEXT" then body_type_text # => BodyTypeText
1038
+ when "MIXED" then body_type_mixed # => BodyTypeMultipart (server bug)
1039
+ else body_type_basic # might be a bug; server's or ours?
1040
+ end
1041
+ end
1042
+
1043
+ # RFC-3501 & RFC9051:
1044
+ # body-type-basic = media-basic SP body-fields
1045
+ def body_type_basic
1046
+ type = media_basic # n.b. "basic" type isn't enforced here
1047
+ if lookahead_rpar? then return BodyTypeBasic.new(*type) end # invalid
1048
+ SP!; flds = body_fields
1049
+ SP? and exts = body_ext_1part
1050
+ BodyTypeBasic.new(*type, *flds, *exts)
1051
+ end
554
1052
 
555
- extension = body_extensions
556
- return md5, disposition, language, extension
1053
+ # RFC-3501 & RFC-9051:
1054
+ # body-type-text = media-text SP body-fields SP body-fld-lines
1055
+ def body_type_text
1056
+ type = media_text
1057
+ SP!; flds = body_fields
1058
+ SP!; lines = body_fld_lines
1059
+ SP? and exts = body_ext_1part
1060
+ BodyTypeText.new(*type, *flds, lines, *exts)
557
1061
  end
558
1062
 
559
- def body_ext_mpart
560
- token = lookahead
561
- if token.symbol == T_SPACE
562
- shift_token
563
- else
564
- return nil
565
- end
566
- param = body_fld_param
1063
+ # RFC-3501 & RFC-9051:
1064
+ # body-type-msg = media-message SP body-fields SP envelope
1065
+ # SP body SP body-fld-lines
1066
+ def body_type_msg
1067
+ # n.b. "message/rfc822" type isn't enforced here
1068
+ type = media_message
1069
+ SP!; flds = body_fields
1070
+
1071
+ # Sometimes servers send body-type-basic when body-type-msg should be.
1072
+ # E.g: when a message/rfc822 part has "Content-Disposition: attachment".
1073
+ #
1074
+ # * SP "(" --> SP envelope --> continue as body-type-msg
1075
+ # * ")" --> no body-ext-1part --> completed body-type-basic
1076
+ # * SP nstring --> SP body-fld-md5
1077
+ # --> SP body-ext-1part --> continue as body-type-basic
1078
+ #
1079
+ # It's probably better to return BodyTypeBasic---even for
1080
+ # "message/rfc822"---than BodyTypeMessage with invalid fields.
1081
+ unless peek_str?(" (")
1082
+ SP? and exts = body_ext_1part
1083
+ return BodyTypeBasic.new(*type, *flds, *exts)
1084
+ end
1085
+
1086
+ SP!; env = envelope
1087
+ SP!; bdy = body
1088
+ SP!; lines = body_fld_lines
1089
+ SP? and exts = body_ext_1part
1090
+ BodyTypeMessage.new(*type, *flds, env, bdy, lines, *exts)
1091
+ end
1092
+
1093
+ # This is a malformed body-type-mpart with no subparts.
1094
+ def body_type_mixed
1095
+ # warn "malformed body-type-mpart: multipart/mixed with no parts."
1096
+ type = media_subtype # => "MIXED"
1097
+ SP? and exts = body_ext_mpart
1098
+ BodyTypeMultipart.new("MULTIPART", type, nil, *exts)
1099
+ end
567
1100
 
568
- token = lookahead
569
- if token.symbol == T_SPACE
570
- shift_token
571
- else
572
- return param
573
- end
574
- disposition = body_fld_dsp
1101
+ # RFC-3501 & RFC-9051:
1102
+ # body-type-mpart = 1*body SP media-subtype
1103
+ # [SP body-ext-mpart]
1104
+ def body_type_mpart
1105
+ parts = [body]; parts << body until SP?; msubtype = media_subtype
1106
+ SP? and exts = body_ext_mpart
1107
+ BodyTypeMultipart.new("MULTIPART", msubtype, parts, *exts)
1108
+ end
575
1109
 
576
- token = lookahead
577
- if token.symbol == T_SPACE
578
- shift_token
579
- else
580
- return param, disposition
581
- end
582
- language = body_fld_lang
1110
+ # n.b. this handles both type and subtype
1111
+ #
1112
+ # RFC-3501 vs RFC-9051:
1113
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
1114
+ # "MESSAGE" /
1115
+ # "VIDEO") DQUOTE) / string) SP media-subtype
1116
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
1117
+ # "FONT" / "MESSAGE" / "MODEL" /
1118
+ # "VIDEO") DQUOTE) / string) SP media-subtype
1119
+ #
1120
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
1121
+ # DQUOTE "RFC822" DQUOTE
1122
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
1123
+ # DQUOTE ("RFC822" / "GLOBAL") DQUOTE
1124
+ #
1125
+ # RFC-3501 & RFC-9051:
1126
+ # media-text = DQUOTE "TEXT" DQUOTE SP media-subtype
1127
+ # media-subtype = string
1128
+ def media_type
1129
+ mtype = case_insensitive__string
1130
+ SP? or return mtype, nil # ??? quirky!
1131
+ msubtype = media_subtype
1132
+ return mtype, msubtype
1133
+ end
583
1134
 
584
- token = lookahead
585
- if token.symbol == T_SPACE
586
- shift_token
587
- else
588
- return param, disposition, language
589
- end
1135
+ # TODO: check types
1136
+ alias media_basic media_type # */* --- catchall
1137
+ alias media_message media_type # message/rfc822, message/global
1138
+ alias media_text media_type # text/*
590
1139
 
591
- extension = body_extensions
592
- return param, disposition, language, extension
1140
+ alias media_subtype case_insensitive__string
1141
+
1142
+ # RFC-3501 & RFC-9051:
1143
+ # body-fields = body-fld-param SP body-fld-id SP body-fld-desc SP
1144
+ # body-fld-enc SP body-fld-octets
1145
+ def body_fields
1146
+ fields = []
1147
+ fields << body_fld_param; SP!
1148
+ fields << body_fld_id; SP!
1149
+ fields << body_fld_desc; SP!
1150
+ fields << body_fld_enc; SP!
1151
+ fields << body_fld_octets
1152
+ fields
593
1153
  end
594
1154
 
1155
+ # RFC3501, RFC9051:
1156
+ # body-fld-param = "(" string SP string *(SP string SP string) ")" / nil
1157
+ def body_fld_param
1158
+ return if NIL?
1159
+ param = {}
1160
+ lpar
1161
+ name = case_insensitive__string; SP!; param[name] = string
1162
+ while SP?
1163
+ name = case_insensitive__string; SP!; param[name] = string
1164
+ end
1165
+ rpar
1166
+ param
1167
+ end
1168
+
1169
+ # RFC2060
1170
+ # body_ext_1part ::= body_fld_md5 [SPACE body_fld_dsp
1171
+ # [SPACE body_fld_lang
1172
+ # [SPACE 1#body_extension]]]
1173
+ # ;; MUST NOT be returned on non-extensible
1174
+ # ;; "BODY" fetch
1175
+ # RFC3501 & RFC9051
1176
+ # body-ext-1part = body-fld-md5 [SP body-fld-dsp [SP body-fld-lang
1177
+ # [SP body-fld-loc *(SP body-extension)]]]
1178
+ # ; MUST NOT be returned on non-extensible
1179
+ # ; "BODY" fetch
1180
+ def body_ext_1part
1181
+ fields = []; fields << body_fld_md5
1182
+ SP? or return fields; fields << body_fld_dsp
1183
+ SP? or return fields; fields << body_fld_lang
1184
+ SP? or return fields; fields << body_fld_loc
1185
+ SP? or return fields; fields << body_extensions
1186
+ fields
1187
+ end
1188
+
1189
+ # RFC-2060:
1190
+ # body_ext_mpart = body_fld_param [SP body_fld_dsp SP body_fld_lang
1191
+ # [SP 1#body_extension]]
1192
+ # ;; MUST NOT be returned on non-extensible
1193
+ # ;; "BODY" fetch
1194
+ # RFC-3501 & RFC-9051:
1195
+ # body-ext-mpart = body-fld-param [SP body-fld-dsp [SP body-fld-lang
1196
+ # [SP body-fld-loc *(SP body-extension)]]]
1197
+ # ; MUST NOT be returned on non-extensible
1198
+ # ; "BODY" fetch
1199
+ def body_ext_mpart
1200
+ fields = []; fields << body_fld_param
1201
+ SP? or return fields; fields << body_fld_dsp
1202
+ SP? or return fields; fields << body_fld_lang
1203
+ SP? or return fields; fields << body_fld_loc
1204
+ SP? or return fields; fields << body_extensions
1205
+ fields
1206
+ end
1207
+
1208
+ alias body_fld_desc nstring
1209
+ alias body_fld_id nstring
1210
+ alias body_fld_loc nstring
1211
+ alias body_fld_lines number64 # number in 3501, number64 in 9051
1212
+ alias body_fld_md5 nstring
1213
+ alias body_fld_octets number
1214
+
1215
+ # RFC-3501 & RFC-9051:
1216
+ # body-fld-enc = (DQUOTE ("7BIT" / "8BIT" / "BINARY" / "BASE64"/
1217
+ # "QUOTED-PRINTABLE") DQUOTE) / string
1218
+ alias body_fld_enc case_insensitive__string
1219
+
1220
+ # body-fld-dsp = "(" string SP body-fld-param ")" / nil
595
1221
  def body_fld_dsp
596
- token = lookahead
597
- if token.symbol == T_NIL
598
- shift_token
599
- return nil
600
- end
601
- match(T_LPAR)
602
- dsp_type = case_insensitive_string
603
- match(T_SPACE)
604
- param = body_fld_param
605
- match(T_RPAR)
606
- return ContentDisposition.new(dsp_type, param)
1222
+ return if NIL?
1223
+ lpar; dsp_type = case_insensitive__string
1224
+ SP!; param = body_fld_param
1225
+ rpar
1226
+ ContentDisposition.new(dsp_type, param)
607
1227
  end
608
1228
 
1229
+ # body-fld-lang = nstring / "(" string *(SP string) ")"
609
1230
  def body_fld_lang
610
- token = lookahead
611
- if token.symbol == T_LPAR
612
- shift_token
613
- result = []
614
- while true
615
- token = lookahead
616
- case token.symbol
617
- when T_RPAR
618
- shift_token
619
- return result
620
- when T_SPACE
621
- shift_token
622
- end
623
- result.push(case_insensitive_string)
624
- end
1231
+ if lpar?
1232
+ result = [case_insensitive__string]
1233
+ result << case_insensitive__string while SP?
1234
+ rpar
1235
+ result
625
1236
  else
626
- lang = nstring
627
- if lang
628
- return lang.upcase
629
- else
630
- return lang
631
- end
1237
+ case_insensitive__nstring
632
1238
  end
633
1239
  end
634
1240
 
1241
+ # body-extension *(SP body-extension)
635
1242
  def body_extensions
636
1243
  result = []
637
- while true
638
- token = lookahead
639
- case token.symbol
640
- when T_RPAR
641
- return result
642
- when T_SPACE
643
- shift_token
644
- end
645
- result.push(body_extension)
646
- end
1244
+ result << body_extension; while SP? do result << body_extension end
1245
+ result
647
1246
  end
648
1247
 
1248
+ # body-extension = nstring / number / number64 /
1249
+ # "(" body-extension *(SP body-extension) ")"
1250
+ # ; Future expansion. Client implementations
1251
+ # ; MUST accept body-extension fields. Server
1252
+ # ; implementations MUST NOT generate
1253
+ # ; body-extension fields except as defined by
1254
+ # ; future Standard or Standards Track
1255
+ # ; revisions of this specification.
649
1256
  def body_extension
650
- token = lookahead
651
- case token.symbol
652
- when T_LPAR
653
- shift_token
654
- result = body_extensions
655
- match(T_RPAR)
656
- return result
657
- when T_NUMBER
658
- return number
659
- else
660
- return nstring
1257
+ if (uint = number64?) then uint
1258
+ elsif lpar? then exts = body_extensions; rpar; exts
1259
+ else nstring
661
1260
  end
662
1261
  end
663
1262
 
1263
+ # section = "[" [section-spec] "]"
664
1264
  def section
665
- str = String.new
666
- token = match(T_LBRA)
667
- str.concat(token.value)
668
- token = match(T_ATOM, T_NUMBER, T_RBRA)
669
- if token.symbol == T_RBRA
670
- str.concat(token.value)
671
- return str
672
- end
673
- str.concat(token.value)
674
- token = lookahead
675
- if token.symbol == T_SPACE
676
- shift_token
677
- str.concat(token.value)
678
- token = match(T_LPAR)
679
- str.concat(token.value)
680
- while true
681
- token = lookahead
682
- case token.symbol
683
- when T_RPAR
684
- str.concat(token.value)
685
- shift_token
686
- break
687
- when T_SPACE
688
- shift_token
689
- str.concat(token.value)
690
- end
691
- str.concat(format_string(astring))
692
- end
693
- end
694
- token = match(T_RBRA)
695
- str.concat(token.value)
696
- return str
697
- end
698
-
699
- def format_string(str)
700
- case str
701
- when ""
702
- return '""'
703
- when /[\x80-\xff\r\n]/n
704
- # literal
705
- return "{" + str.bytesize.to_s + "}" + CRLF + str
706
- when /[(){ \x00-\x1f\x7f%*"\\]/n
707
- # quoted string
708
- return '"' + str.gsub(/["\\]/n, "\\\\\\&") + '"'
709
- else
710
- # atom
711
- return str
712
- end
713
- end
714
-
715
- def uid_data
716
- token = match(T_ATOM)
717
- name = token.value.upcase
718
- match(T_SPACE)
719
- return name, number
720
- end
721
-
722
- def modseq_data
723
- token = match(T_ATOM)
724
- name = token.value.upcase
725
- match(T_SPACE)
726
- match(T_LPAR)
727
- modseq = number
728
- match(T_RPAR)
729
- return name, modseq
730
- end
731
-
732
- def ignored_response
733
- while lookahead.symbol != T_CRLF
734
- shift_token
735
- end
736
- return IgnoredResponse.new(@str)
737
- end
738
-
739
- def text_response
740
- token = match(T_ATOM)
741
- name = token.value.upcase
742
- match(T_SPACE)
743
- return UntaggedResponse.new(name, text)
744
- end
745
-
746
- def flags_response
747
- token = match(T_ATOM)
748
- name = token.value.upcase
749
- match(T_SPACE)
750
- return UntaggedResponse.new(name, flag_list, @str)
751
- end
752
-
753
- def list_response
754
- token = match(T_ATOM)
755
- name = token.value.upcase
756
- match(T_SPACE)
757
- return UntaggedResponse.new(name, mailbox_list, @str)
758
- end
759
-
1265
+ str = +lbra
1266
+ str << section_spec unless peek_rbra?
1267
+ str << rbra
1268
+ end
1269
+
1270
+ # section-binary = "[" [section-part] "]"
1271
+ def section_binary
1272
+ str = +lbra
1273
+ str << section_part unless peek_rbra?
1274
+ str << rbra
1275
+ end
1276
+
1277
+ # section-spec = section-msgtext / (section-part ["." section-text])
1278
+ # section-msgtext = "HEADER" /
1279
+ # "HEADER.FIELDS" [".NOT"] SP header-list /
1280
+ # "TEXT"
1281
+ # ; top-level or MESSAGE/RFC822 or
1282
+ # ; MESSAGE/GLOBAL part
1283
+ # section-part = nz-number *("." nz-number)
1284
+ # ; body part reference.
1285
+ # ; Allows for accessing nested body parts.
1286
+ # section-text = section-msgtext / "MIME"
1287
+ # ; text other than actual body part (headers,
1288
+ # ; etc.)
1289
+ #
1290
+ # n.b: we could "cheat" here and just grab all text inside the brackets,
1291
+ # but literals would need special treatment.
1292
+ def section_spec
1293
+ str = "".b
1294
+ str << atom # grabs everything up to "SP header-list" or "]"
1295
+ str << " " << header_list if SP?
1296
+ str
1297
+ end
1298
+
1299
+ # header-list = "(" header-fld-name *(SP header-fld-name) ")"
1300
+ def header_list
1301
+ str = +""
1302
+ str << lpar << header_fld_name
1303
+ str << " " << header_fld_name while SP?
1304
+ str << rpar
1305
+ end
1306
+
1307
+ # section-part = nz-number *("." nz-number)
1308
+ # ; body part reference.
1309
+ # ; Allows for accessing nested body parts.
1310
+ alias section_part atom
1311
+
1312
+ # RFC3501 & RFC9051:
1313
+ # header-fld-name = astring
1314
+ #
1315
+ # NOTE: Previously, Net::IMAP recreated the raw original source string.
1316
+ # Now, it grabs the raw encoded value using @str and @pos. A future
1317
+ # version may simply return the decoded astring value. Although that is
1318
+ # technically incompatible, it should almost never make a difference: all
1319
+ # standard header field names are valid atoms:
1320
+ #
1321
+ # https://www.iana.org/assignments/message-headers/message-headers.xhtml
1322
+ #
1323
+ # Although RFC3501 allows any astring, RFC5322-valid header names are one
1324
+ # or more of the printable US-ASCII characters, except SP and colon. So
1325
+ # empty string isn't valid, and literals aren't needed and should not be
1326
+ # used. This is explicitly unchanged by [I18N-HDRS] (RFC6532).
1327
+ #
1328
+ # RFC5233:
1329
+ # optional-field = field-name ":" unstructured CRLF
1330
+ # field-name = 1*ftext
1331
+ # ftext = %d33-57 / ; Printable US-ASCII
1332
+ # %d59-126 ; characters not including
1333
+ # ; ":".
1334
+ def header_fld_name
1335
+ assert_no_lookahead
1336
+ start = @pos
1337
+ astring
1338
+ @str[start...@pos - 1]
1339
+ end
1340
+
1341
+ # mailbox-data = "FLAGS" SP flag-list / "LIST" SP mailbox-list /
1342
+ # "LSUB" SP mailbox-list / "SEARCH" *(SP nz-number) /
1343
+ # "STATUS" SP mailbox SP "(" [status-att-list] ")" /
1344
+ # number SP "EXISTS" / number SP "RECENT"
1345
+
1346
+ def mailbox_data__flags
1347
+ name = label("FLAGS")
1348
+ SP!
1349
+ UntaggedResponse.new(name, flag_list, @str)
1350
+ end
1351
+
1352
+ def mailbox_data__list
1353
+ name = label_in("LIST", "LSUB", "XLIST")
1354
+ SP!
1355
+ UntaggedResponse.new(name, mailbox_list, @str)
1356
+ end
1357
+ alias mailbox_data__lsub mailbox_data__list
1358
+ alias mailbox_data__xlist mailbox_data__list
1359
+
1360
+ # mailbox-list = "(" [mbx-list-flags] ")" SP
1361
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil) SP mailbox
1362
+ # [SP mbox-list-extended]
1363
+ # ; This is the list information pointed to by the ABNF
1364
+ # ; item "mailbox-data", which is defined above
760
1365
  def mailbox_list
761
- attr = flag_list
762
- match(T_SPACE)
763
- token = match(T_QUOTED, T_NIL)
764
- if token.symbol == T_NIL
765
- delim = nil
766
- else
767
- delim = token.value
768
- end
769
- match(T_SPACE)
770
- name = astring
771
- return MailboxList.new(attr, delim, name)
1366
+ lpar; attr = peek_rpar? ? [] : mbx_list_flags; rpar
1367
+ SP!; delim = nquoted
1368
+ SP!; name = mailbox
1369
+ # TODO: mbox-list-extended
1370
+ MailboxList.new(attr, delim, name)
772
1371
  end
773
1372
 
774
- def getquota_response
1373
+ def quota_response
775
1374
  # If quota never established, get back
776
1375
  # `NO Quota root does not exist'.
777
1376
  # If quota removed, get `()' after the
@@ -804,192 +1403,240 @@ module Net
804
1403
  end
805
1404
  end
806
1405
 
807
- def getquotaroot_response
808
- # Similar to getquota, but only admin can use getquota.
809
- token = match(T_ATOM)
810
- name = token.value.upcase
811
- match(T_SPACE)
812
- mailbox = astring
813
- quotaroots = []
814
- while true
815
- token = lookahead
816
- break unless token.symbol == T_SPACE
817
- shift_token
818
- quotaroots.push(astring)
819
- end
820
- data = MailboxQuotaRoot.new(mailbox, quotaroots)
821
- return UntaggedResponse.new(name, data, @str)
822
- end
823
-
824
- def getacl_response
825
- token = match(T_ATOM)
826
- name = token.value.upcase
827
- match(T_SPACE)
828
- mailbox = astring
829
- data = []
830
- token = lookahead
831
- if token.symbol == T_SPACE
832
- shift_token
833
- while true
834
- token = lookahead
835
- case token.symbol
836
- when T_CRLF
837
- break
838
- when T_SPACE
839
- shift_token
840
- end
841
- user = astring
842
- match(T_SPACE)
843
- rights = astring
844
- data.push(MailboxACLItem.new(user, rights, mailbox))
845
- end
846
- end
847
- return UntaggedResponse.new(name, data, @str)
848
- end
849
-
850
- def search_response
851
- token = match(T_ATOM)
852
- name = token.value.upcase
853
- token = lookahead
854
- if token.symbol == T_SPACE
855
- shift_token
856
- data = []
857
- while true
858
- token = lookahead
859
- case token.symbol
860
- when T_CRLF
861
- break
862
- when T_SPACE
863
- shift_token
864
- when T_NUMBER
865
- data.push(number)
866
- when T_LPAR
867
- # TODO: include the MODSEQ value in a response
868
- shift_token
869
- match(T_ATOM)
870
- match(T_SPACE)
871
- match(T_NUMBER)
872
- match(T_RPAR)
873
- end
874
- end
875
- else
876
- data = []
877
- end
878
- return UntaggedResponse.new(name, data, @str)
879
- end
880
-
881
- def thread_response
882
- token = match(T_ATOM)
883
- name = token.value.upcase
884
- token = lookahead
885
-
886
- if token.symbol == T_SPACE
887
- threads = []
888
-
889
- while true
890
- shift_token
891
- token = lookahead
892
-
893
- case token.symbol
894
- when T_LPAR
895
- threads << thread_branch(token)
896
- when T_CRLF
897
- break
898
- end
899
- end
900
- else
901
- # no member
902
- threads = []
903
- end
904
-
905
- return UntaggedResponse.new(name, threads, @str)
906
- end
907
-
908
- def thread_branch(token)
909
- rootmember = nil
910
- lastmember = nil
911
-
912
- while true
913
- shift_token # ignore first T_LPAR
914
- token = lookahead
915
-
916
- case token.symbol
917
- when T_NUMBER
918
- # new member
919
- newmember = ThreadMember.new(number, [])
920
- if rootmember.nil?
921
- rootmember = newmember
922
- else
923
- lastmember.children << newmember
924
- end
925
- lastmember = newmember
926
- when T_SPACE
927
- # do nothing
928
- when T_LPAR
929
- if rootmember.nil?
930
- # dummy member
931
- lastmember = rootmember = ThreadMember.new(nil, [])
932
- end
933
-
934
- lastmember.children << thread_branch(token)
935
- when T_RPAR
936
- break
937
- end
938
- end
939
-
940
- return rootmember
941
- end
942
-
943
- def status_response
1406
+ def quotaroot_response
1407
+ # Similar to getquota, but only admin can use getquota.
944
1408
  token = match(T_ATOM)
945
1409
  name = token.value.upcase
946
1410
  match(T_SPACE)
947
1411
  mailbox = astring
948
- match(T_SPACE)
949
- match(T_LPAR)
950
- attr = {}
1412
+ quotaroots = []
951
1413
  while true
952
1414
  token = lookahead
953
- case token.symbol
954
- when T_RPAR
955
- shift_token
956
- break
957
- when T_SPACE
958
- shift_token
959
- end
960
- token = match(T_ATOM)
961
- key = token.value.upcase
962
- match(T_SPACE)
963
- val = number
964
- attr[key] = val
1415
+ break unless token.symbol == T_SPACE
1416
+ shift_token
1417
+ quotaroots.push(astring)
965
1418
  end
966
- data = StatusData.new(mailbox, attr)
1419
+ data = MailboxQuotaRoot.new(mailbox, quotaroots)
967
1420
  return UntaggedResponse.new(name, data, @str)
968
1421
  end
969
1422
 
970
- def capability_response
1423
+ # acl-data = "ACL" SP mailbox *(SP identifier SP rights)
1424
+ def acl_data
971
1425
  token = match(T_ATOM)
972
1426
  name = token.value.upcase
973
1427
  match(T_SPACE)
974
- UntaggedResponse.new(name, capability_data, @str)
1428
+ mailbox = astring
1429
+ data = []
1430
+ token = lookahead
1431
+ if token.symbol == T_SPACE
1432
+ shift_token
1433
+ while true
1434
+ token = lookahead
1435
+ case token.symbol
1436
+ when T_CRLF
1437
+ break
1438
+ when T_SPACE
1439
+ shift_token
1440
+ end
1441
+ user = astring
1442
+ match(T_SPACE)
1443
+ rights = astring
1444
+ data.push(MailboxACLItem.new(user, rights, mailbox))
1445
+ end
1446
+ end
1447
+ return UntaggedResponse.new(name, data, @str)
975
1448
  end
976
1449
 
977
- def capability_data
1450
+ # RFC3501:
1451
+ # mailbox-data = "SEARCH" *(SP nz-number) / ...
1452
+ # RFC5256: SORT
1453
+ # sort-data = "SORT" *(SP nz-number)
1454
+ # RFC7162: CONDSTORE, QRESYNC
1455
+ # mailbox-data =/ "SEARCH" [1*(SP nz-number) SP
1456
+ # search-sort-mod-seq]
1457
+ # sort-data = "SORT" [1*(SP nz-number) SP
1458
+ # search-sort-mod-seq]
1459
+ # ; Updates the SORT response from RFC 5256.
1460
+ # search-sort-mod-seq = "(" "MODSEQ" SP mod-sequence-value ")"
1461
+ # RFC9051:
1462
+ # mailbox-data = obsolete-search-response / ...
1463
+ # obsolete-search-response = "SEARCH" *(SP nz-number)
1464
+ def mailbox_data__search
1465
+ name = label_in("SEARCH", "SORT")
978
1466
  data = []
979
- while true
980
- token = lookahead
981
- case token.symbol
982
- when T_CRLF, T_RBRA
983
- break
984
- when T_SPACE
985
- shift_token
986
- next
1467
+ while _ = SP? && nz_number? do data << _ end
1468
+ if lpar?
1469
+ label("MODSEQ"); SP!
1470
+ modseq = mod_sequence_value
1471
+ rpar
1472
+ end
1473
+ data = SearchResult.new(data, modseq: modseq)
1474
+ UntaggedResponse.new(name, data, @str)
1475
+ end
1476
+ alias sort_data mailbox_data__search
1477
+
1478
+ # RFC5256: THREAD
1479
+ # thread-data = "THREAD" [SP 1*thread-list]
1480
+ def thread_data
1481
+ name = label("THREAD")
1482
+ threads = []
1483
+ if SP?
1484
+ threads << thread_list while lookahead_thread_list?
1485
+ end
1486
+ UntaggedResponse.new(name, threads, @str)
1487
+ end
1488
+
1489
+ alias lookahead_thread_list? lookahead_lpar?
1490
+ alias lookahead_thread_nested? lookahead_thread_list?
1491
+
1492
+ # RFC5256: THREAD
1493
+ # thread-list = "(" (thread-members / thread-nested) ")"
1494
+ def thread_list
1495
+ lpar
1496
+ thread = if lookahead_thread_nested?
1497
+ ThreadMember.new(nil, thread_nested)
1498
+ else
1499
+ thread_members
1500
+ end
1501
+ rpar
1502
+ thread
1503
+ end
1504
+
1505
+ # RFC5256: THREAD
1506
+ # thread-members = nz-number *(SP nz-number) [SP thread-nested]
1507
+ def thread_members
1508
+ members = []
1509
+ members << nz_number # thread root
1510
+ while SP?
1511
+ case lookahead!(T_NUMBER, T_LPAR).symbol
1512
+ when T_NUMBER then members << nz_number
1513
+ else nested = thread_nested; break
987
1514
  end
988
- data.push(atom.upcase)
989
1515
  end
990
- data
1516
+ members.reverse.inject(nested || []) {|subthreads, number|
1517
+ [ThreadMember.new(number, subthreads)]
1518
+ }.first
1519
+ end
1520
+
1521
+ # RFC5256: THREAD
1522
+ # thread-nested = 2*thread-list
1523
+ def thread_nested
1524
+ nested = [thread_list, thread_list]
1525
+ while lookahead_thread_list? do nested << thread_list end
1526
+ nested
1527
+ end
1528
+
1529
+ # mailbox-data =/ "STATUS" SP mailbox SP "(" [status-att-list] ")"
1530
+ def mailbox_data__status
1531
+ resp_name = label("STATUS"); SP!
1532
+ mbox_name = mailbox; SP!
1533
+ lpar; attr = status_att_list; rpar
1534
+ UntaggedResponse.new(resp_name, StatusData.new(mbox_name, attr), @str)
1535
+ end
1536
+
1537
+ # RFC3501
1538
+ # status-att-list = status-att SP number *(SP status-att SP number)
1539
+ # RFC4466, RFC9051, and RFC3501 Errata
1540
+ # status-att-list = status-att-val *(SP status-att-val)
1541
+ def status_att_list
1542
+ attrs = [status_att_val]
1543
+ while SP? do attrs << status_att_val end
1544
+ attrs.to_h
1545
+ end
1546
+
1547
+ # RFC3501 Errata:
1548
+ # status-att-val = ("MESSAGES" SP number) / ("RECENT" SP number) /
1549
+ # ("UIDNEXT" SP nz-number) / ("UIDVALIDITY" SP nz-number) /
1550
+ # ("UNSEEN" SP number)
1551
+ # RFC4466:
1552
+ # status-att-val = ("MESSAGES" SP number) /
1553
+ # ("RECENT" SP number) /
1554
+ # ("UIDNEXT" SP nz-number) /
1555
+ # ("UIDVALIDITY" SP nz-number) /
1556
+ # ("UNSEEN" SP number)
1557
+ # ;; Extensions to the STATUS responses
1558
+ # ;; should extend this production.
1559
+ # ;; Extensions should use the generic
1560
+ # ;; syntax defined by tagged-ext.
1561
+ # RFC9051:
1562
+ # status-att-val = ("MESSAGES" SP number) /
1563
+ # ("UIDNEXT" SP nz-number) /
1564
+ # ("UIDVALIDITY" SP nz-number) /
1565
+ # ("UNSEEN" SP number) /
1566
+ # ("DELETED" SP number) /
1567
+ # ("SIZE" SP number64)
1568
+ # ; Extensions to the STATUS responses
1569
+ # ; should extend this production.
1570
+ # ; Extensions should use the generic
1571
+ # ; syntax defined by tagged-ext.
1572
+ # RFC7162:
1573
+ # status-att-val =/ "HIGHESTMODSEQ" SP mod-sequence-valzer
1574
+ # ;; Extends non-terminal defined in [RFC4466].
1575
+ # ;; Value 0 denotes that the mailbox doesn't
1576
+ # ;; support persistent mod-sequences
1577
+ # ;; as described in Section 3.1.2.2.
1578
+ # RFC7889:
1579
+ # status-att-val =/ "APPENDLIMIT" SP (number / nil)
1580
+ # ;; status-att-val is defined in RFC 4466
1581
+ # RFC8438:
1582
+ # status-att-val =/ "SIZE" SP number64
1583
+ # RFC8474:
1584
+ # status-att-val =/ "MAILBOXID" SP "(" objectid ")"
1585
+ # ; follows tagged-ext production from [RFC4466]
1586
+ def status_att_val
1587
+ key = tagged_ext_label
1588
+ SP!
1589
+ val =
1590
+ case key
1591
+ when "MESSAGES" then number # RFC3501, RFC9051
1592
+ when "UNSEEN" then number # RFC3501, RFC9051
1593
+ when "DELETED" then number # RFC3501, RFC9051
1594
+ when "UIDNEXT" then nz_number # RFC3501, RFC9051
1595
+ when "UIDVALIDITY" then nz_number # RFC3501, RFC9051
1596
+ when "RECENT" then number # RFC3501 (obsolete)
1597
+ when "SIZE" then number64 # RFC8483, RFC9051
1598
+ when "HIGHESTMODSEQ" then mod_sequence_valzer # RFC7162
1599
+ when "MAILBOXID" then parens__objectid # RFC8474
1600
+ else
1601
+ number? || ExtensionData.new(tagged_ext_val)
1602
+ end
1603
+ [key, val]
1604
+ end
1605
+
1606
+ # The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.
1607
+ # The grammar rule is used by both response-data and resp-text-code.
1608
+ # But this method only returns UntaggedResponse (response-data).
1609
+ #
1610
+ # RFC3501:
1611
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1612
+ # *(SP capability)
1613
+ # RFC9051:
1614
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1615
+ # *(SP capability)
1616
+ def capability_data__untagged
1617
+ UntaggedResponse.new label("CAPABILITY"), capability__list, @str
1618
+ end
1619
+
1620
+ # enable-data = "ENABLED" *(SP capability)
1621
+ def enable_data
1622
+ UntaggedResponse.new label("ENABLED"), capability__list, @str
1623
+ end
1624
+
1625
+ # As a workaround for buggy servers, allow a trailing SP:
1626
+ # *(SP capability) [SP]
1627
+ def capability__list
1628
+ list = []; while SP? && (capa = capability?) do list << capa end; list
991
1629
  end
992
1630
 
1631
+ alias resp_code__capability capability__list
1632
+
1633
+ # capability = ("AUTH=" auth-type) / atom
1634
+ # ; New capabilities MUST begin with "X" or be
1635
+ # ; registered with IANA as standard or
1636
+ # ; standards-track
1637
+ alias capability case_insensitive__atom
1638
+ alias capability? case_insensitive__atom?
1639
+
993
1640
  def id_response
994
1641
  token = match(T_ATOM)
995
1642
  name = token.value.upcase
@@ -1019,147 +1666,185 @@ module Net
1019
1666
  end
1020
1667
  end
1021
1668
 
1669
+ # namespace-response = "NAMESPACE" SP namespace
1670
+ # SP namespace SP namespace
1671
+ # ; The first Namespace is the Personal Namespace(s).
1672
+ # ; The second Namespace is the Other Users'
1673
+ # ; Namespace(s).
1674
+ # ; The third Namespace is the Shared Namespace(s).
1022
1675
  def namespace_response
1676
+ name = label("NAMESPACE")
1023
1677
  @lex_state = EXPR_DATA
1024
- token = lookahead
1025
- token = match(T_ATOM)
1026
- name = token.value.upcase
1027
- match(T_SPACE)
1028
- personal = namespaces
1029
- match(T_SPACE)
1030
- other = namespaces
1031
- match(T_SPACE)
1032
- shared = namespaces
1678
+ data = Namespaces.new((SP!; namespace),
1679
+ (SP!; namespace),
1680
+ (SP!; namespace))
1681
+ UntaggedResponse.new(name, data, @str)
1682
+ ensure
1033
1683
  @lex_state = EXPR_BEG
1034
- data = Namespaces.new(personal, other, shared)
1035
- return UntaggedResponse.new(name, data, @str)
1036
- end
1037
-
1038
- def namespaces
1039
- token = lookahead
1040
- # empty () is not allowed, so nil is functionally identical to empty.
1041
- data = []
1042
- if token.symbol == T_NIL
1043
- shift_token
1044
- else
1045
- match(T_LPAR)
1046
- loop do
1047
- data << namespace
1048
- break unless lookahead.symbol == T_SPACE
1049
- shift_token
1050
- end
1051
- match(T_RPAR)
1052
- end
1053
- data
1054
1684
  end
1055
1685
 
1686
+ # namespace = nil / "(" 1*namespace-descr ")"
1056
1687
  def namespace
1057
- match(T_LPAR)
1058
- prefix = match(T_QUOTED, T_LITERAL).value
1059
- match(T_SPACE)
1060
- delimiter = string
1688
+ NIL? and return []
1689
+ lpar
1690
+ list = [namespace_descr]
1691
+ list << namespace_descr until rpar?
1692
+ list
1693
+ end
1694
+
1695
+ # namespace-descr = "(" string SP
1696
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil)
1697
+ # [namespace-response-extensions] ")"
1698
+ def namespace_descr
1699
+ lpar
1700
+ prefix = string; SP!
1701
+ delimiter = nquoted # n.b: should only accept single char
1061
1702
  extensions = namespace_response_extensions
1062
- match(T_RPAR)
1703
+ rpar
1063
1704
  Namespace.new(prefix, delimiter, extensions)
1064
1705
  end
1065
1706
 
1707
+ # namespace-response-extensions = *namespace-response-extension
1708
+ # namespace-response-extension = SP string SP
1709
+ # "(" string *(SP string) ")"
1066
1710
  def namespace_response_extensions
1067
1711
  data = {}
1068
- token = lookahead
1069
- if token.symbol == T_SPACE
1070
- shift_token
1071
- name = match(T_QUOTED, T_LITERAL).value
1712
+ while SP?
1713
+ name = string; SP!
1714
+ lpar
1072
1715
  data[name] ||= []
1073
- match(T_SPACE)
1074
- match(T_LPAR)
1075
- loop do
1076
- data[name].push match(T_QUOTED, T_LITERAL).value
1077
- break unless lookahead.symbol == T_SPACE
1078
- shift_token
1079
- end
1080
- match(T_RPAR)
1716
+ data[name] << string
1717
+ data[name] << string while SP?
1718
+ rpar
1081
1719
  end
1082
1720
  data
1083
1721
  end
1084
1722
 
1085
- # text = 1*TEXT-CHAR
1086
- # TEXT-CHAR = <any CHAR except CR and LF>
1723
+ # TEXT-CHAR = <any CHAR except CR and LF>
1724
+ # RFC3501:
1725
+ # text = 1*TEXT-CHAR
1726
+ # RFC9051:
1727
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
1728
+ # ; Non-ASCII text can only be returned
1729
+ # ; after ENABLE IMAP4rev2 command
1087
1730
  def text
1088
- match(T_TEXT, lex_state: EXPR_TEXT).value
1731
+ match_re(TEXT_REGEXP, "text")[0].force_encoding("UTF-8")
1732
+ end
1733
+
1734
+ # an "accept" versiun of #text
1735
+ def text?
1736
+ accept_re(TEXT_REGEXP)&.[](0)&.force_encoding("UTF-8")
1089
1737
  end
1090
1738
 
1091
- # resp-text = ["[" resp-text-code "]" SP] text
1739
+ # RFC3501:
1740
+ # resp-text = ["[" resp-text-code "]" SP] text
1741
+ # RFC9051:
1742
+ # resp-text = ["[" resp-text-code "]" SP] [text]
1743
+ #
1744
+ # We leniently re-interpret this as
1745
+ # resp-text = ["[" resp-text-code "]" [SP [text]] / [text]
1092
1746
  def resp_text
1093
- token = match(T_LBRA, T_TEXT, lex_state: EXPR_RTEXT)
1094
- case token.symbol
1095
- when T_LBRA
1096
- code = resp_text_code
1097
- match(T_RBRA)
1098
- accept_space # violating RFC
1099
- ResponseText.new(code, text)
1100
- when T_TEXT
1101
- ResponseText.new(nil, token.value)
1747
+ if lbra?
1748
+ code = resp_text_code; rbra
1749
+ ResponseText.new(code, SP? && text? || "")
1750
+ else
1751
+ ResponseText.new(nil, text? || "")
1102
1752
  end
1103
1753
  end
1104
1754
 
1105
- # See https://www.rfc-editor.org/errata/rfc3501
1755
+ # RFC3501 (See https://www.rfc-editor.org/errata/rfc3501):
1756
+ # resp-text-code = "ALERT" /
1757
+ # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1758
+ # capability-data / "PARSE" /
1759
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
1760
+ # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1761
+ # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1762
+ # "UNSEEN" SP nz-number /
1763
+ # atom [SP 1*<any TEXT-CHAR except "]">]
1764
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1765
+ # *(SP capability)
1106
1766
  #
1107
- # resp-text-code = "ALERT" /
1108
- # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1109
- # capability-data / "PARSE" /
1110
- # "PERMANENTFLAGS" SP "("
1111
- # [flag-perm *(SP flag-perm)] ")" /
1112
- # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1113
- # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1114
- # "UNSEEN" SP nz-number /
1115
- # atom [SP 1*<any TEXT-CHAR except "]">]
1767
+ # RFC5530:
1768
+ # resp-text-code =/ "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
1769
+ # "AUTHORIZATIONFAILED" / "EXPIRED" /
1770
+ # "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
1771
+ # "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
1772
+ # "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
1773
+ # "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
1774
+ # "NONEXISTENT"
1775
+ # RFC9051:
1776
+ # resp-text-code = "ALERT" /
1777
+ # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1778
+ # capability-data / "PARSE" /
1779
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
1780
+ # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1781
+ # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1782
+ # resp-code-apnd / resp-code-copy / "UIDNOTSTICKY" /
1783
+ # "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
1784
+ # "AUTHORIZATIONFAILED" / "EXPIRED" /
1785
+ # "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
1786
+ # "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
1787
+ # "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
1788
+ # "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
1789
+ # "NONEXISTENT" / "NOTSAVED" / "HASCHILDREN" /
1790
+ # "CLOSED" /
1791
+ # "UNKNOWN-CTE" /
1792
+ # atom [SP 1*<any TEXT-CHAR except "]">]
1793
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1794
+ # *(SP capability)
1116
1795
  #
1117
- # +UIDPLUS+ ABNF:: https://www.rfc-editor.org/rfc/rfc4315.html#section-4
1118
- # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
1796
+ # RFC4315 (UIDPLUS), RFC9051 (IMAP4rev2):
1797
+ # resp-code-apnd = "APPENDUID" SP nz-number SP append-uid
1798
+ # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
1799
+ # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
1800
+ #
1801
+ # RFC7162 (CONDSTORE):
1802
+ # resp-text-code =/ "HIGHESTMODSEQ" SP mod-sequence-value /
1803
+ # "NOMODSEQ" /
1804
+ # "MODIFIED" SP sequence-set
1805
+ # RFC7162 (QRESYNC):
1806
+ # resp-text-code =/ "CLOSED"
1807
+ #
1808
+ # RFC8474: OBJECTID
1809
+ # resp-text-code =/ "MAILBOXID" SP "(" objectid ")"
1119
1810
  def resp_text_code
1120
- token = match(T_ATOM)
1121
- name = token.value.upcase
1122
- case name
1123
- when /\A(?:ALERT|PARSE|READ-ONLY|READ-WRITE|TRYCREATE|NOMODSEQ)\z/n
1124
- result = ResponseCode.new(name, nil)
1125
- when /\A(?:BADCHARSET)\z/n
1126
- result = ResponseCode.new(name, charset_list)
1127
- when /\A(?:CAPABILITY)\z/ni
1128
- result = ResponseCode.new(name, capability_data)
1129
- when /\A(?:PERMANENTFLAGS)\z/n
1130
- match(T_SPACE)
1131
- result = ResponseCode.new(name, flag_list)
1132
- when /\A(?:UIDVALIDITY|UIDNEXT|UNSEEN)\z/n
1133
- match(T_SPACE)
1134
- result = ResponseCode.new(name, number)
1135
- when /\A(?:APPENDUID)\z/n
1136
- result = ResponseCode.new(name, resp_code_apnd__data)
1137
- when /\A(?:COPYUID)\z/n
1138
- result = ResponseCode.new(name, resp_code_copy__data)
1139
- else
1140
- token = lookahead
1141
- if token.symbol == T_SPACE
1142
- shift_token
1143
- token = match(T_TEXT, lex_state: EXPR_CTEXT)
1144
- result = ResponseCode.new(name, token.value)
1811
+ name = resp_text_code__name
1812
+ data =
1813
+ case name
1814
+ when "CAPABILITY" then resp_code__capability
1815
+ when "PERMANENTFLAGS" then SP? ? flag_perm__list : []
1816
+ when "UIDNEXT" then SP!; nz_number
1817
+ when "UIDVALIDITY" then SP!; nz_number
1818
+ when "UNSEEN" then SP!; nz_number # rev1 only
1819
+ when "APPENDUID" then SP!; resp_code_apnd__data # rev2, UIDPLUS
1820
+ when "COPYUID" then SP!; resp_code_copy__data # rev2, UIDPLUS
1821
+ when "BADCHARSET" then SP? ? charset__list : []
1822
+ when "ALERT", "PARSE", "READ-ONLY", "READ-WRITE", "TRYCREATE",
1823
+ "UNAVAILABLE", "AUTHENTICATIONFAILED", "AUTHORIZATIONFAILED",
1824
+ "EXPIRED", "PRIVACYREQUIRED", "CONTACTADMIN", "NOPERM", "INUSE",
1825
+ "EXPUNGEISSUED", "CORRUPTION", "SERVERBUG", "CLIENTBUG", "CANNOT",
1826
+ "LIMIT", "OVERQUOTA", "ALREADYEXISTS", "NONEXISTENT", "CLOSED",
1827
+ "NOTSAVED", "UIDNOTSTICKY", "UNKNOWN-CTE", "HASCHILDREN"
1828
+ when "NOMODSEQ" then nil # CONDSTORE
1829
+ when "HIGHESTMODSEQ" then SP!; mod_sequence_value # CONDSTORE
1830
+ when "MODIFIED" then SP!; sequence_set # CONDSTORE
1831
+ when "MAILBOXID" then SP!; parens__objectid # RFC8474: OBJECTID
1145
1832
  else
1146
- result = ResponseCode.new(name, nil)
1833
+ SP? and text_chars_except_rbra
1147
1834
  end
1148
- end
1149
- return result
1835
+ ResponseCode.new(name, data)
1150
1836
  end
1151
1837
 
1152
- def charset_list
1153
- result = []
1154
- if accept(T_SPACE)
1155
- match(T_LPAR)
1156
- result << charset
1157
- while accept(T_SPACE)
1158
- result << charset
1159
- end
1160
- match(T_RPAR)
1161
- end
1162
- result
1838
+ alias resp_text_code__name case_insensitive__atom
1839
+
1840
+ # 1*<any TEXT-CHAR except "]">
1841
+ def text_chars_except_rbra
1842
+ match_re(CTEXT_REGEXP, '1*<any TEXT-CHAR except "]">')[0]
1843
+ end
1844
+
1845
+ # "(" charset *(SP charset) ")"
1846
+ def charset__list
1847
+ lpar; list = [charset]; while SP? do list << charset end; rpar; list
1163
1848
  end
1164
1849
 
1165
1850
  # already matched: "APPENDUID"
@@ -1175,8 +1860,8 @@ module Net
1175
1860
  # match uid_set even if that returns a single-member array.
1176
1861
  #
1177
1862
  def resp_code_apnd__data
1178
- match(T_SPACE); validity = number
1179
- match(T_SPACE); dst_uids = uid_set # uniqueid ⊂ uid-set
1863
+ validity = number; SP!
1864
+ dst_uids = uid_set # uniqueid ⊂ uid-set
1180
1865
  UIDPlusData.new(validity, nil, dst_uids)
1181
1866
  end
1182
1867
 
@@ -1184,187 +1869,125 @@ module Net
1184
1869
  #
1185
1870
  # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
1186
1871
  def resp_code_copy__data
1187
- match(T_SPACE); validity = number
1188
- match(T_SPACE); src_uids = uid_set
1189
- match(T_SPACE); dst_uids = uid_set
1872
+ validity = number; SP!
1873
+ src_uids = uid_set; SP!
1874
+ dst_uids = uid_set
1190
1875
  UIDPlusData.new(validity, src_uids, dst_uids)
1191
1876
  end
1192
1877
 
1193
- def address_list
1194
- token = lookahead
1195
- if token.symbol == T_NIL
1196
- shift_token
1197
- return nil
1198
- else
1199
- result = []
1200
- match(T_LPAR)
1201
- while true
1202
- token = lookahead
1203
- case token.symbol
1204
- when T_RPAR
1205
- shift_token
1206
- break
1207
- when T_SPACE
1208
- shift_token
1209
- end
1210
- result.push(address)
1211
- end
1212
- return result
1213
- end
1214
- end
1215
-
1216
- ADDRESS_REGEXP = /\G\
1217
- (?# 1: NAME )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1218
- (?# 2: ROUTE )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1219
- (?# 3: MAILBOX )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1220
- (?# 4: HOST )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)")\
1221
- \)/ni
1222
-
1878
+ ADDRESS_REGEXP = /\G
1879
+ \( (?: NIL | #{Patterns::QUOTED_rev2} ) # 1: NAME
1880
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 2: ROUTE
1881
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 3: MAILBOX
1882
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 4: HOST
1883
+ \)
1884
+ /nix
1885
+
1886
+ # address = "(" addr-name SP addr-adl SP addr-mailbox SP
1887
+ # addr-host ")"
1888
+ # addr-adl = nstring
1889
+ # addr-host = nstring
1890
+ # addr-mailbox = nstring
1891
+ # addr-name = nstring
1223
1892
  def address
1224
- match(T_LPAR)
1225
- if @str.index(ADDRESS_REGEXP, @pos)
1226
- # address does not include literal.
1227
- @pos = $~.end(0)
1228
- name = $1
1229
- route = $2
1230
- mailbox = $3
1231
- host = $4
1232
- for s in [name, route, mailbox, host]
1233
- if s
1234
- s.gsub!(/\\(["\\])/n, "\\1")
1235
- end
1236
- end
1237
- else
1238
- name = nstring
1239
- match(T_SPACE)
1240
- route = nstring
1241
- match(T_SPACE)
1242
- mailbox = nstring
1243
- match(T_SPACE)
1244
- host = nstring
1245
- match(T_RPAR)
1246
- end
1247
- return Address.new(name, route, mailbox, host)
1248
- end
1249
-
1250
- FLAG_REGEXP = /\
1251
- (?# FLAG )\\([^\x80-\xff(){ \x00-\x1f\x7f%"\\]+)|\
1252
- (?# ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\]+)/n
1253
-
1893
+ if (match = accept_re(ADDRESS_REGEXP))
1894
+ # note that "NIL" isn't captured by the regexp
1895
+ name, route, mailbox, host = match.captures
1896
+ .map { Patterns.unescape_quoted _1 }
1897
+ else # address may include literals
1898
+ lpar; name = addr_name
1899
+ SP!; route = addr_adl
1900
+ SP!; mailbox = addr_mailbox
1901
+ SP!; host = addr_host
1902
+ rpar
1903
+ end
1904
+ Address.new(name, route, mailbox, host)
1905
+ end
1906
+
1907
+ alias addr_adl nstring
1908
+ alias addr_host nstring
1909
+ alias addr_mailbox nstring
1910
+ alias addr_name nstring
1911
+
1912
+ # flag-list = "(" [flag *(SP flag)] ")"
1254
1913
  def flag_list
1255
- if @str.index(/\(([^)]*)\)/ni, @pos)
1256
- @pos = $~.end(0)
1257
- return $1.scan(FLAG_REGEXP).collect { |flag, atom|
1258
- if atom
1259
- atom
1260
- else
1261
- flag.capitalize.intern
1262
- end
1263
- }
1914
+ if (match = accept_re(Patterns::FLAG_LIST))
1915
+ match[1].split(nil)
1916
+ .map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
1264
1917
  else
1265
- parse_error("invalid flag list")
1918
+ quirky__flag_list "flags-list"
1266
1919
  end
1267
1920
  end
1268
1921
 
1269
- def nstring
1270
- token = lookahead
1271
- if token.symbol == T_NIL
1272
- shift_token
1273
- return nil
1922
+ # "(" [flag-perm *(SP flag-perm)] ")"
1923
+ def flag_perm__list
1924
+ if (match = accept_re(Patterns::FLAG_PERM_LIST))
1925
+ match[1].split(nil)
1926
+ .map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
1274
1927
  else
1275
- return string
1928
+ quirky__flag_list "PERMANENTFLAGS flag-perm list"
1276
1929
  end
1277
1930
  end
1278
1931
 
1279
- def astring
1280
- token = lookahead
1281
- if string_token?(token)
1282
- return string
1283
- else
1284
- return astring_chars
1285
- end
1932
+ # This allows illegal "]" in flag names (Gmail),
1933
+ # or "\*" in a FLAGS response (greenmail).
1934
+ def quirky__flag_list(name)
1935
+ match_re(Patterns::QUIRKY_FLAGS_LIST, "quirks mode #{name}")[1]
1936
+ .scan(Patterns::QUIRKY_FLAG)
1937
+ .map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
1286
1938
  end
1287
1939
 
1288
- def string
1289
- token = lookahead
1290
- if token.symbol == T_NIL
1291
- shift_token
1292
- return nil
1293
- end
1294
- token = match(T_QUOTED, T_LITERAL)
1295
- return token.value
1940
+ # See Patterns::MBX_LIST_FLAGS
1941
+ def mbx_list_flags
1942
+ match_re(Patterns::MBX_LIST_FLAGS, "mbx-list-flags")[1]
1943
+ .split(nil)
1944
+ .map! { _1.delete_prefix!("\\"); _1.capitalize.to_sym }
1296
1945
  end
1297
1946
 
1298
- STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL]
1299
-
1300
- def string_token?(token)
1301
- return STRING_TOKENS.include?(token.symbol)
1302
- end
1947
+ # See https://developers.google.com/gmail/imap/imap-extensions
1948
+ def x_gm_label; accept(T_BSLASH) ? atom.capitalize.to_sym : astring end
1303
1949
 
1304
- def case_insensitive_string
1305
- token = lookahead
1306
- if token.symbol == T_NIL
1307
- shift_token
1308
- return nil
1309
- end
1310
- token = match(T_QUOTED, T_LITERAL)
1311
- return token.value.upcase
1950
+ # See https://developers.google.com/gmail/imap/imap-extensions
1951
+ def x_gm_labels
1952
+ lpar; return [] if rpar?
1953
+ labels = []
1954
+ labels << x_gm_label
1955
+ labels << x_gm_label while SP?
1956
+ rpar
1957
+ labels
1312
1958
  end
1313
1959
 
1314
- # atom = 1*ATOM-CHAR
1315
- # ATOM-CHAR = <any CHAR except atom-specials>
1316
- ATOM_TOKENS = [
1317
- T_ATOM,
1318
- T_NUMBER,
1319
- T_NIL,
1320
- T_LBRA,
1321
- T_PLUS
1322
- ]
1960
+ # See https://www.rfc-editor.org/errata/rfc3501
1961
+ #
1962
+ # charset = atom / quoted
1963
+ def charset; quoted? || atom end
1323
1964
 
1324
- def atom
1325
- -combine_adjacent(*ATOM_TOKENS)
1326
- end
1965
+ # RFC7162:
1966
+ # mod-sequence-value = 1*DIGIT
1967
+ # ;; Positive unsigned 63-bit integer
1968
+ # ;; (mod-sequence)
1969
+ # ;; (1 <= n <= 9,223,372,036,854,775,807).
1970
+ alias mod_sequence_value nz_number64
1327
1971
 
1328
- # ASTRING-CHAR = ATOM-CHAR / resp-specials
1329
- # resp-specials = "]"
1330
- ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA]
1972
+ # RFC7162:
1973
+ # permsg-modsequence = mod-sequence-value
1974
+ # ;; Per-message mod-sequence.
1975
+ alias permsg_modsequence mod_sequence_value
1331
1976
 
1332
- def astring_chars
1333
- combine_adjacent(*ASTRING_CHARS_TOKENS)
1334
- end
1977
+ # RFC7162:
1978
+ # mod-sequence-valzer = "0" / mod-sequence-value
1979
+ alias mod_sequence_valzer number64
1335
1980
 
1336
- def combine_adjacent(*tokens)
1337
- result = "".b
1338
- while token = accept(*tokens)
1339
- result << token.value
1340
- end
1341
- if result.empty?
1342
- parse_error('unexpected token %s (expected %s)',
1343
- lookahead.symbol, args.join(" or "))
1344
- end
1345
- result
1346
- end
1981
+ def parens__modseq; lpar; _ = permsg_modsequence; rpar; _ end
1347
1982
 
1348
- # See https://www.rfc-editor.org/errata/rfc3501
1349
- #
1350
- # charset = atom / quoted
1351
- def charset
1352
- if token = accept(T_QUOTED)
1353
- token.value
1354
- else
1355
- atom
1356
- end
1357
- end
1983
+ # RFC8474:
1984
+ # objectid = 1*255(ALPHA / DIGIT / "_" / "-")
1985
+ # ; characters in object identifiers are case
1986
+ # ; significant
1987
+ alias objectid atom
1358
1988
 
1359
- def number
1360
- token = lookahead
1361
- if token.symbol == T_NIL
1362
- shift_token
1363
- return nil
1364
- end
1365
- token = match(T_NUMBER)
1366
- return token.value.to_i
1367
- end
1989
+ def parens__objectid; lpar; _ = objectid; rpar; _ end
1990
+ def nparens__objectid; NIL? ? nil : parens__objectid end
1368
1991
 
1369
1992
  # RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
1370
1993
  # uid-set = (uniqueid / uid-range) *("," uid-set)
@@ -1393,64 +2016,15 @@ module Net
1393
2016
 
1394
2017
  SPACES_REGEXP = /\G */n
1395
2018
 
1396
- # This advances @pos directly so it's safe before changing @lex_state.
1397
- def accept_space
1398
- if @token
1399
- shift_token if @token.symbol == T_SPACE
1400
- elsif @str[@pos] == " "
1401
- @pos += 1
1402
- end
1403
- end
1404
-
1405
2019
  # The RFC is very strict about this and usually we should be too.
1406
2020
  # But skipping spaces is usually a safe workaround for buggy servers.
1407
2021
  #
1408
2022
  # This advances @pos directly so it's safe before changing @lex_state.
1409
2023
  def accept_spaces
1410
- shift_token if @token&.symbol == T_SPACE
1411
- if @str.index(SPACES_REGEXP, @pos)
2024
+ return false unless SP?
2025
+ @str.index(SPACES_REGEXP, @pos) and
1412
2026
  @pos = $~.end(0)
1413
- end
1414
- end
1415
-
1416
- def match(*args, lex_state: @lex_state)
1417
- if @token && lex_state != @lex_state
1418
- parse_error("invalid lex_state change to %s with unconsumed token",
1419
- lex_state)
1420
- end
1421
- begin
1422
- @lex_state, original_lex_state = lex_state, @lex_state
1423
- token = lookahead
1424
- unless args.include?(token.symbol)
1425
- parse_error('unexpected token %s (expected %s)',
1426
- token.symbol.id2name,
1427
- args.collect {|i| i.id2name}.join(" or "))
1428
- end
1429
- shift_token
1430
- return token
1431
- ensure
1432
- @lex_state = original_lex_state
1433
- end
1434
- end
1435
-
1436
- # like match, but does not raise error on failure.
1437
- #
1438
- # returns and shifts token on successful match
1439
- # returns nil and leaves @token unshifted on no match
1440
- def accept(*args)
1441
- token = lookahead
1442
- if args.include?(token.symbol)
1443
- shift_token
1444
- token
1445
- end
1446
- end
1447
-
1448
- def lookahead
1449
- @token ||= next_token
1450
- end
1451
-
1452
- def shift_token
1453
- @token = nil
2027
+ true
1454
2028
  end
1455
2029
 
1456
2030
  def next_token
@@ -1461,38 +2035,46 @@ module Net
1461
2035
  if $1
1462
2036
  return Token.new(T_SPACE, $+)
1463
2037
  elsif $2
1464
- return Token.new(T_NIL, $+)
1465
- elsif $3
1466
- return Token.new(T_NUMBER, $+)
2038
+ len = $+.to_i
2039
+ val = @str[@pos, len]
2040
+ @pos += len
2041
+ return Token.new(T_LITERAL8, val)
2042
+ elsif $3 && $7
2043
+ # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
2044
+ return Token.new(T_ATOM, $3)
1467
2045
  elsif $4
1468
- return Token.new(T_ATOM, $+)
2046
+ return Token.new(T_NIL, $+)
1469
2047
  elsif $5
1470
- return Token.new(T_QUOTED,
1471
- $+.gsub(/\\(["\\])/n, "\\1"))
2048
+ return Token.new(T_NUMBER, $+)
1472
2049
  elsif $6
2050
+ return Token.new(T_PLUS, $+)
2051
+ elsif $8
2052
+ # match ATOM, without a NUMBER, NIL, or PLUS prefix
2053
+ return Token.new(T_ATOM, $+)
2054
+ elsif $9
2055
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
2056
+ elsif $10
1473
2057
  return Token.new(T_LPAR, $+)
1474
- elsif $7
2058
+ elsif $11
1475
2059
  return Token.new(T_RPAR, $+)
1476
- elsif $8
2060
+ elsif $12
1477
2061
  return Token.new(T_BSLASH, $+)
1478
- elsif $9
2062
+ elsif $13
1479
2063
  return Token.new(T_STAR, $+)
1480
- elsif $10
2064
+ elsif $14
1481
2065
  return Token.new(T_LBRA, $+)
1482
- elsif $11
2066
+ elsif $15
1483
2067
  return Token.new(T_RBRA, $+)
1484
- elsif $12
2068
+ elsif $16
1485
2069
  len = $+.to_i
1486
2070
  val = @str[@pos, len]
1487
2071
  @pos += len
1488
2072
  return Token.new(T_LITERAL, val)
1489
- elsif $13
1490
- return Token.new(T_PLUS, $+)
1491
- elsif $14
2073
+ elsif $17
1492
2074
  return Token.new(T_PERCENT, $+)
1493
- elsif $15
2075
+ elsif $18
1494
2076
  return Token.new(T_CRLF, $+)
1495
- elsif $16
2077
+ elsif $19
1496
2078
  return Token.new(T_EOF, $+)
1497
2079
  else
1498
2080
  parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")
@@ -1511,8 +2093,7 @@ module Net
1511
2093
  elsif $3
1512
2094
  return Token.new(T_NUMBER, $+)
1513
2095
  elsif $4
1514
- return Token.new(T_QUOTED,
1515
- $+.gsub(/\\(["\\])/n, "\\1"))
2096
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1516
2097
  elsif $5
1517
2098
  len = $+.to_i
1518
2099
  val = @str[@pos, len]
@@ -1529,63 +2110,11 @@ module Net
1529
2110
  @str.index(/\S*/n, @pos)
1530
2111
  parse_error("unknown token - %s", $&.dump)
1531
2112
  end
1532
- when EXPR_TEXT
1533
- if @str.index(TEXT_REGEXP, @pos)
1534
- @pos = $~.end(0)
1535
- if $1
1536
- return Token.new(T_TEXT, $+)
1537
- else
1538
- parse_error("[Net::IMAP BUG] TEXT_REGEXP is invalid")
1539
- end
1540
- else
1541
- @str.index(/\S*/n, @pos)
1542
- parse_error("unknown token - %s", $&.dump)
1543
- end
1544
- when EXPR_RTEXT
1545
- if @str.index(RTEXT_REGEXP, @pos)
1546
- @pos = $~.end(0)
1547
- if $1
1548
- return Token.new(T_LBRA, $+)
1549
- elsif $2
1550
- return Token.new(T_TEXT, $+)
1551
- else
1552
- parse_error("[Net::IMAP BUG] RTEXT_REGEXP is invalid")
1553
- end
1554
- else
1555
- @str.index(/\S*/n, @pos)
1556
- parse_error("unknown token - %s", $&.dump)
1557
- end
1558
- when EXPR_CTEXT
1559
- if @str.index(CTEXT_REGEXP, @pos)
1560
- @pos = $~.end(0)
1561
- if $1
1562
- return Token.new(T_TEXT, $+)
1563
- else
1564
- parse_error("[Net::IMAP BUG] CTEXT_REGEXP is invalid")
1565
- end
1566
- else
1567
- @str.index(/\S*/n, @pos) #/
1568
- parse_error("unknown token - %s", $&.dump)
1569
- end
1570
2113
  else
1571
2114
  parse_error("invalid @lex_state - %s", @lex_state.inspect)
1572
2115
  end
1573
2116
  end
1574
2117
 
1575
- def parse_error(fmt, *args)
1576
- if IMAP.debug
1577
- $stderr.printf("@str: %s\n", @str.dump)
1578
- $stderr.printf("@pos: %d\n", @pos)
1579
- $stderr.printf("@lex_state: %s\n", @lex_state)
1580
- if @token
1581
- $stderr.printf("@token.symbol: %s\n", @token.symbol)
1582
- $stderr.printf("@token.value: %s\n", @token.value.inspect)
1583
- end
1584
- end
1585
- raise ResponseParseError, format(fmt, *args)
1586
- end
1587
2118
  end
1588
-
1589
2119
  end
1590
-
1591
2120
  end