net-imap 0.3.4 → 0.5.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/BSDL +22 -0
  3. data/COPYING +56 -0
  4. data/Gemfile +14 -0
  5. data/LICENSE.txt +3 -22
  6. data/README.md +25 -8
  7. data/Rakefile +0 -7
  8. data/docs/styles.css +72 -23
  9. data/lib/net/imap/authenticators.rb +26 -57
  10. data/lib/net/imap/command_data.rb +74 -54
  11. data/lib/net/imap/config/attr_accessors.rb +75 -0
  12. data/lib/net/imap/config/attr_inheritance.rb +90 -0
  13. data/lib/net/imap/config/attr_type_coercion.rb +61 -0
  14. data/lib/net/imap/config.rb +470 -0
  15. data/lib/net/imap/data_encoding.rb +21 -9
  16. data/lib/net/imap/data_lite.rb +226 -0
  17. data/lib/net/imap/deprecated_client_options.rb +142 -0
  18. data/lib/net/imap/errors.rb +27 -1
  19. data/lib/net/imap/esearch_result.rb +180 -0
  20. data/lib/net/imap/fetch_data.rb +597 -0
  21. data/lib/net/imap/flags.rb +1 -1
  22. data/lib/net/imap/response_data.rb +250 -440
  23. data/lib/net/imap/response_parser/parser_utils.rb +245 -0
  24. data/lib/net/imap/response_parser.rb +1867 -1184
  25. data/lib/net/imap/sasl/anonymous_authenticator.rb +69 -0
  26. data/lib/net/imap/sasl/authentication_exchange.rb +139 -0
  27. data/lib/net/imap/sasl/authenticators.rb +122 -0
  28. data/lib/net/imap/sasl/client_adapter.rb +123 -0
  29. data/lib/net/imap/{authenticators/cram_md5.rb → sasl/cram_md5_authenticator.rb} +24 -14
  30. data/lib/net/imap/sasl/digest_md5_authenticator.rb +342 -0
  31. data/lib/net/imap/sasl/external_authenticator.rb +83 -0
  32. data/lib/net/imap/sasl/gs2_header.rb +80 -0
  33. data/lib/net/imap/{authenticators/login.rb → sasl/login_authenticator.rb} +28 -18
  34. data/lib/net/imap/sasl/oauthbearer_authenticator.rb +199 -0
  35. data/lib/net/imap/sasl/plain_authenticator.rb +101 -0
  36. data/lib/net/imap/sasl/protocol_adapters.rb +101 -0
  37. data/lib/net/imap/sasl/scram_algorithm.rb +58 -0
  38. data/lib/net/imap/sasl/scram_authenticator.rb +287 -0
  39. data/lib/net/imap/sasl/stringprep.rb +6 -66
  40. data/lib/net/imap/sasl/xoauth2_authenticator.rb +106 -0
  41. data/lib/net/imap/sasl.rb +148 -44
  42. data/lib/net/imap/sasl_adapter.rb +20 -0
  43. data/lib/net/imap/search_result.rb +146 -0
  44. data/lib/net/imap/sequence_set.rb +1565 -0
  45. data/lib/net/imap/stringprep/nameprep.rb +70 -0
  46. data/lib/net/imap/stringprep/saslprep.rb +69 -0
  47. data/lib/net/imap/stringprep/saslprep_tables.rb +96 -0
  48. data/lib/net/imap/stringprep/tables.rb +146 -0
  49. data/lib/net/imap/stringprep/trace.rb +85 -0
  50. data/lib/net/imap/stringprep.rb +159 -0
  51. data/lib/net/imap/uidplus_data.rb +244 -0
  52. data/lib/net/imap/vanished_data.rb +56 -0
  53. data/lib/net/imap.rb +2090 -823
  54. data/net-imap.gemspec +7 -8
  55. data/rakelib/benchmarks.rake +91 -0
  56. data/rakelib/rfcs.rake +2 -0
  57. data/rakelib/saslprep.rake +4 -4
  58. data/rakelib/string_prep_tables_generator.rb +84 -60
  59. data/sample/net-imap.rb +167 -0
  60. metadata +45 -49
  61. data/.github/dependabot.yml +0 -6
  62. data/.github/workflows/test.yml +0 -31
  63. data/.gitignore +0 -10
  64. data/benchmarks/stringprep.yml +0 -65
  65. data/benchmarks/table-regexps.yml +0 -39
  66. data/lib/net/imap/authenticators/digest_md5.rb +0 -115
  67. data/lib/net/imap/authenticators/plain.rb +0 -41
  68. data/lib/net/imap/authenticators/xoauth2.rb +0 -20
  69. data/lib/net/imap/sasl/saslprep.rb +0 -55
  70. data/lib/net/imap/sasl/saslprep_tables.rb +0 -98
  71. data/lib/net/imap/sasl/stringprep_tables.rb +0 -153
@@ -1,18 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "errors"
4
+ require_relative "response_parser/parser_utils"
4
5
 
5
6
  module Net
6
7
  class IMAP < Protocol
7
8
 
8
9
  # Parses an \IMAP server response.
9
10
  class ResponseParser
10
- # :call-seq: Net::IMAP::ResponseParser.new -> Net::IMAP::ResponseParser
11
- def initialize
11
+ include ParserUtils
12
+ extend ParserUtils::Generator
13
+
14
+ attr_reader :config
15
+
16
+ # Creates a new ResponseParser.
17
+ #
18
+ # When +config+ is frozen or global, the parser #config inherits from it.
19
+ # Otherwise, +config+ will be used directly.
20
+ def initialize(config: Config.global)
12
21
  @str = nil
13
22
  @pos = nil
14
23
  @lex_state = nil
15
24
  @token = nil
25
+ @config = Config[config]
26
+ @config = @config.new if @config == Config.global || @config.frozen?
16
27
  end
17
28
 
18
29
  # :call-seq:
@@ -33,745 +44,1367 @@ module Net
33
44
 
34
45
  # :stopdoc:
35
46
 
36
- EXPR_BEG = :EXPR_BEG
37
- EXPR_DATA = :EXPR_DATA
38
- EXPR_TEXT = :EXPR_TEXT
39
- EXPR_RTEXT = :EXPR_RTEXT
40
- EXPR_CTEXT = :EXPR_CTEXT
41
-
42
- T_SPACE = :SPACE
43
- T_NIL = :NIL
44
- T_NUMBER = :NUMBER
45
- T_ATOM = :ATOM
46
- T_QUOTED = :QUOTED
47
- T_LPAR = :LPAR
48
- T_RPAR = :RPAR
49
- T_BSLASH = :BSLASH
50
- T_STAR = :STAR
51
- T_LBRA = :LBRA
52
- T_RBRA = :RBRA
53
- T_LITERAL = :LITERAL
54
- T_PLUS = :PLUS
55
- T_PERCENT = :PERCENT
56
- T_CRLF = :CRLF
57
- T_EOF = :EOF
58
- T_TEXT = :TEXT
59
-
47
+ EXPR_BEG = :EXPR_BEG # the default, used in most places
48
+ EXPR_DATA = :EXPR_DATA # envelope, body(structure), namespaces
49
+
50
+ T_SPACE = :SPACE # atom special
51
+ T_ATOM = :ATOM # atom (subset of astring chars)
52
+ T_NIL = :NIL # subset of atom and label
53
+ T_NUMBER = :NUMBER # subset of atom
54
+ T_LBRA = :LBRA # subset of atom
55
+ T_PLUS = :PLUS # subset of atom; tag special
56
+ T_RBRA = :RBRA # atom special; resp_special; valid astring char
57
+ T_QUOTED = :QUOTED # starts/end with atom special
58
+ T_BSLASH = :BSLASH # atom special; quoted special
59
+ T_LPAR = :LPAR # atom special; paren list delimiter
60
+ T_RPAR = :RPAR # atom special; paren list delimiter
61
+ T_STAR = :STAR # atom special; list wildcard
62
+ T_PERCENT = :PERCENT # atom special; list wildcard
63
+ T_LITERAL = :LITERAL # starts with atom special
64
+ T_LITERAL8 = :LITERAL8 # starts with atom char "~"
65
+ T_CRLF = :CRLF # atom special; text special; quoted special
66
+ T_TEXT = :TEXT # any char except CRLF
67
+ T_EOF = :EOF # end of response string
68
+
69
+ module ResponseConditions
70
+ OK = "OK"
71
+ NO = "NO"
72
+ BAD = "BAD"
73
+ BYE = "BYE"
74
+ PREAUTH = "PREAUTH"
75
+
76
+ RESP_COND_STATES = [OK, NO, BAD ].freeze
77
+ RESP_DATA_CONDS = [OK, NO, BAD, BYE, ].freeze
78
+ AUTH_CONDS = [OK, PREAUTH].freeze
79
+ GREETING_CONDS = [OK, BYE, PREAUTH].freeze
80
+ RESP_CONDS = [OK, NO, BAD, BYE, PREAUTH].freeze
81
+ end
82
+ include ResponseConditions
83
+
84
+ module Patterns
85
+
86
+ module CharClassSubtraction
87
+ refine Regexp do
88
+ def -(rhs); /[#{source}&&[^#{rhs.source}]]/n.freeze end
89
+ end
90
+ end
91
+ using CharClassSubtraction
92
+
93
+ # From RFC5234, "Augmented BNF for Syntax Specifications: ABNF"
94
+ # >>>
95
+ # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
96
+ # CHAR = %x01-7F
97
+ # CRLF = CR LF
98
+ # ; Internet standard newline
99
+ # CTL = %x00-1F / %x7F
100
+ # ; controls
101
+ # DIGIT = %x30-39
102
+ # ; 0-9
103
+ # DQUOTE = %x22
104
+ # ; " (Double Quote)
105
+ # HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
106
+ # OCTET = %x00-FF
107
+ # SP = %x20
108
+ module RFC5234
109
+ ALPHA = /[A-Za-z]/n
110
+ CHAR = /[\x01-\x7f]/n
111
+ CRLF = /\r\n/n
112
+ CTL = /[\x00-\x1F\x7F]/n
113
+ DIGIT = /\d/n
114
+ DQUOTE = /"/n
115
+ HEXDIG = /\h/
116
+ OCTET = /[\x00-\xFF]/n # not using /./m for embedding purposes
117
+ SP = / /n
118
+ end
119
+
120
+ # UTF-8, a transformation format of ISO 10646
121
+ # >>>
122
+ # UTF8-1 = %x00-7F
123
+ # UTF8-tail = %x80-BF
124
+ # UTF8-2 = %xC2-DF UTF8-tail
125
+ # UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
126
+ # %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
127
+ # UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
128
+ # %xF4 %x80-8F 2( UTF8-tail )
129
+ # UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
130
+ # UTF8-octets = *( UTF8-char )
131
+ #
132
+ # n.b. String * Integer is used for repetition, rather than /x{3}/,
133
+ # because ruby 3.2's linear-time cache-based optimization doesn't work
134
+ # with "bounded or fixed times repetition nesting in another repetition
135
+ # (e.g. /(a{2,3})*/). It is an implementation issue entirely, but we
136
+ # believe it is hard to support this case correctly."
137
+ # See https://bugs.ruby-lang.org/issues/19104
138
+ module RFC3629
139
+ UTF8_1 = /[\x00-\x7f]/n # aka ASCII 7bit
140
+ UTF8_TAIL = /[\x80-\xBF]/n
141
+ UTF8_2 = /[\xC2-\xDF]#{UTF8_TAIL}/n
142
+ UTF8_3 = Regexp.union(/\xE0[\xA0-\xBF]#{UTF8_TAIL}/n,
143
+ /\xED[\x80-\x9F]#{UTF8_TAIL}/n,
144
+ /[\xE1-\xEC]#{ UTF8_TAIL.source * 2}/n,
145
+ /[\xEE-\xEF]#{ UTF8_TAIL.source * 2}/n)
146
+ UTF8_4 = Regexp.union(/[\xF1-\xF3]#{ UTF8_TAIL.source * 3}/n,
147
+ /\xF0[\x90-\xBF]#{UTF8_TAIL.source * 2}/n,
148
+ /\xF4[\x80-\x8F]#{UTF8_TAIL.source * 2}/n)
149
+ UTF8_CHAR = Regexp.union(UTF8_1, UTF8_2, UTF8_3, UTF8_4)
150
+ UTF8_OCTETS = /#{UTF8_CHAR}*/n
151
+ end
152
+
153
+ include RFC5234
154
+ include RFC3629
155
+
156
+ # CHAR8 = %x01-ff
157
+ # ; any OCTET except NUL, %x00
158
+ CHAR8 = /[\x01-\xff]/n
159
+
160
+ # list-wildcards = "%" / "*"
161
+ LIST_WILDCARDS = /[%*]/n
162
+ # quoted-specials = DQUOTE / "\"
163
+ QUOTED_SPECIALS = /["\\]/n
164
+ # resp-specials = "]"
165
+ RESP_SPECIALS = /[\]]/n
166
+
167
+ # atomish = 1*<any ATOM-CHAR except "[">
168
+ # ; We use "atomish" for msg-att and section, in order
169
+ # ; to simplify "BODY[HEADER.FIELDS (foo bar)]".
170
+ #
171
+ # atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
172
+ # quoted-specials / resp-specials
173
+ # ATOM-CHAR = <any CHAR except atom-specials>
174
+ # atom = 1*ATOM-CHAR
175
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
176
+ # tag = 1*<any ASTRING-CHAR except "+">
177
+
178
+ ATOM_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\\]]/n
179
+ ASTRING_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\]/n
180
+
181
+ ASTRING_CHAR = CHAR - ASTRING_SPECIALS
182
+ ATOM_CHAR = CHAR - ATOM_SPECIALS
183
+
184
+ ATOM = /#{ATOM_CHAR}+/n
185
+ ASTRING_CHARS = /#{ASTRING_CHAR}+/n
186
+ ATOMISH = /#{ATOM_CHAR - /[\[]/ }+/
187
+ TAG = /#{ASTRING_CHAR - /[+]/ }+/
188
+
189
+ # TEXT-CHAR = <any CHAR except CR and LF>
190
+ TEXT_CHAR = CHAR - /[\r\n]/
191
+
192
+ # resp-text-code = ... / atom [SP 1*<any TEXT-CHAR except "]">]
193
+ CODE_TEXT_CHAR = TEXT_CHAR - RESP_SPECIALS
194
+ CODE_TEXT = /#{CODE_TEXT_CHAR}+/n
195
+
196
+ # flag = "\Answered" / "\Flagged" / "\Deleted" /
197
+ # "\Seen" / "\Draft" / flag-keyword / flag-extension
198
+ # ; Does not include "\Recent"
199
+ # flag-extension = "\" atom
200
+ # ; Future expansion. Client implementations
201
+ # ; MUST accept flag-extension flags. Server
202
+ # ; implementations MUST NOT generate
203
+ # ; flag-extension flags except as defined by
204
+ # ; a future Standard or Standards Track
205
+ # ; revisions of this specification.
206
+ # flag-keyword = "$MDNSent" / "$Forwarded" / "$Junk" /
207
+ # "$NotJunk" / "$Phishing" / atom
208
+ #
209
+ # flag-perm = flag / "\*"
210
+ #
211
+ # Not checking for max one mbx-list-sflag in the parser.
212
+ # >>>
213
+ # mbx-list-oflag = "\Noinferiors" / child-mbox-flag /
214
+ # "\Subscribed" / "\Remote" / flag-extension
215
+ # ; Other flags; multiple from this list are
216
+ # ; possible per LIST response, but each flag
217
+ # ; can only appear once per LIST response
218
+ # mbx-list-sflag = "\NonExistent" / "\Noselect" / "\Marked" /
219
+ # "\Unmarked"
220
+ # ; Selectability flags; only one per LIST response
221
+ # child-mbox-flag = "\HasChildren" / "\HasNoChildren"
222
+ # ; attributes for the CHILDREN return option, at most
223
+ # ; one possible per LIST response
224
+ FLAG = /\\?#{ATOM}/n
225
+ FLAG_EXTENSION = /\\#{ATOM}/n
226
+ FLAG_KEYWORD = ATOM
227
+ FLAG_PERM = Regexp.union(FLAG, "\\*")
228
+ MBX_FLAG = FLAG_EXTENSION
229
+
230
+ # flag-list = "(" [flag *(SP flag)] ")"
231
+ # resp-text-code =/ "PERMANENTFLAGS" SP
232
+ # "(" [flag-perm *(SP flag-perm)] ")"
233
+ # mbx-list-flags = *(mbx-list-oflag SP) mbx-list-sflag
234
+ # *(SP mbx-list-oflag) /
235
+ # mbx-list-oflag *(SP mbx-list-oflag)
236
+ # (Not checking for max one mbx-list-sflag in the parser.)
237
+ FLAG_LIST = /\G\((#{FLAG }(?:#{SP}#{FLAG })*|)\)/ni
238
+ FLAG_PERM_LIST = /\G\((#{FLAG_PERM}(?:#{SP}#{FLAG_PERM})*|)\)/ni
239
+ MBX_LIST_FLAGS = /\G (#{MBX_FLAG }(?:#{SP}#{MBX_FLAG })*) /nix
240
+
241
+ # Gmail allows SP and "]" in flags.......
242
+ QUIRKY_FLAG = Regexp.union(/\\?#{ASTRING_CHARS}/n, "\\*")
243
+ QUIRKY_FLAGS_LIST = /\G\(( [^)]* )\)/nx
244
+
245
+ # RFC3501:
246
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
247
+ # "\" quoted-specials
248
+ # RFC9051:
249
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
250
+ # "\" quoted-specials / UTF8-2 / UTF8-3 / UTF8-4
251
+ # RFC3501 & RFC9051:
252
+ # quoted = DQUOTE *QUOTED-CHAR DQUOTE
253
+ QUOTED_CHAR_safe = TEXT_CHAR - QUOTED_SPECIALS
254
+ QUOTED_CHAR_esc = /\\#{QUOTED_SPECIALS}/n
255
+ QUOTED_CHAR_rev1 = Regexp.union(QUOTED_CHAR_safe, QUOTED_CHAR_esc)
256
+ QUOTED_CHAR_rev2 = Regexp.union(QUOTED_CHAR_rev1,
257
+ UTF8_2, UTF8_3, UTF8_4)
258
+ QUOTED_rev1 = /"(#{QUOTED_CHAR_rev1}*)"/n
259
+ QUOTED_rev2 = /"(#{QUOTED_CHAR_rev2}*)"/n
260
+
261
+ # RFC3501:
262
+ # text = 1*TEXT-CHAR
263
+ # RFC9051:
264
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
265
+ # ; Non-ASCII text can only be returned
266
+ # ; after ENABLE IMAP4rev2 command
267
+ TEXT_rev1 = /#{TEXT_CHAR}+/
268
+ TEXT_rev2 = /#{Regexp.union TEXT_CHAR, UTF8_2, UTF8_3, UTF8_4}+/
269
+
270
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
271
+ TAGGED_LABEL_FCHAR = /[a-zA-Z\-_.]/n
272
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
273
+ TAGGED_LABEL_CHAR = /[a-zA-Z\-_.0-9:]*/n
274
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
275
+ # ; Is a valid RFC 3501 "atom".
276
+ TAGGED_EXT_LABEL = /#{TAGGED_LABEL_FCHAR}#{TAGGED_LABEL_CHAR}*/n
277
+
278
+ # nz-number = digit-nz *DIGIT
279
+ # ; Non-zero unsigned 32-bit integer
280
+ # ; (0 < n < 4,294,967,296)
281
+ NZ_NUMBER = /[1-9]\d*/n
282
+
283
+ # seq-number = nz-number / "*"
284
+ # ; message sequence number (COPY, FETCH, STORE
285
+ # ; commands) or unique identifier (UID COPY,
286
+ # ; UID FETCH, UID STORE commands).
287
+ # ; * represents the largest number in use. In
288
+ # ; the case of message sequence numbers, it is
289
+ # ; the number of messages in a non-empty mailbox.
290
+ # ; In the case of unique identifiers, it is the
291
+ # ; unique identifier of the last message in the
292
+ # ; mailbox or, if the mailbox is empty, the
293
+ # ; mailbox's current UIDNEXT value.
294
+ # ; The server should respond with a tagged BAD
295
+ # ; response to a command that uses a message
296
+ # ; sequence number greater than the number of
297
+ # ; messages in the selected mailbox. This
298
+ # ; includes "*" if the selected mailbox is empty.
299
+ SEQ_NUMBER = /#{NZ_NUMBER}|\*/n
300
+
301
+ # seq-range = seq-number ":" seq-number
302
+ # ; two seq-number values and all values between
303
+ # ; these two regardless of order.
304
+ # ; Example: 2:4 and 4:2 are equivalent and
305
+ # ; indicate values 2, 3, and 4.
306
+ # ; Example: a unique identifier sequence range of
307
+ # ; 3291:* includes the UID of the last message in
308
+ # ; the mailbox, even if that value is less than
309
+ # ; 3291.
310
+ SEQ_RANGE = /#{SEQ_NUMBER}:#{SEQ_NUMBER}/n
311
+
312
+ # sequence-set = (seq-number / seq-range) ["," sequence-set]
313
+ # ; set of seq-number values, regardless of order.
314
+ # ; Servers MAY coalesce overlaps and/or execute
315
+ # ; the sequence in any order.
316
+ # ; Example: a message sequence number set of
317
+ # ; 2,4:7,9,12:* for a mailbox with 15 messages is
318
+ # ; equivalent to 2,4,5,6,7,9,12,13,14,15
319
+ # ; Example: a message sequence number set of
320
+ # ; *:4,5:7 for a mailbox with 10 messages is
321
+ # ; equivalent to 10,9,8,7,6,5,4,5,6,7 and MAY
322
+ # ; be reordered and overlap coalesced to be
323
+ # ; 4,5,6,7,8,9,10.
324
+ SEQUENCE_SET_ITEM = /#{SEQ_NUMBER}|#{SEQ_RANGE}/n
325
+ SEQUENCE_SET = /#{SEQUENCE_SET_ITEM}(?:,#{SEQUENCE_SET_ITEM})*/n
326
+ SEQUENCE_SET_STR = /\A#{SEQUENCE_SET}\z/n
327
+
328
+ # partial-range-first = nz-number ":" nz-number
329
+ # ;; Request to search from oldest (lowest UIDs) to
330
+ # ;; more recent messages.
331
+ # ;; A range 500:400 is the same as 400:500.
332
+ # ;; This is similar to <seq-range> from [RFC3501]
333
+ # ;; but cannot contain "*".
334
+ PARTIAL_RANGE_FIRST = /\A(#{NZ_NUMBER}):(#{NZ_NUMBER})\z/n
335
+
336
+ # partial-range-last = MINUS nz-number ":" MINUS nz-number
337
+ # ;; Request to search from newest (highest UIDs) to
338
+ # ;; oldest messages.
339
+ # ;; A range -500:-400 is the same as -400:-500.
340
+ PARTIAL_RANGE_LAST = /\A(-#{NZ_NUMBER}):(-#{NZ_NUMBER})\z/n
341
+
342
+ # partial-range = partial-range-first / partial-range-last
343
+ PARTIAL_RANGE = Regexp.union(PARTIAL_RANGE_FIRST,
344
+ PARTIAL_RANGE_LAST)
345
+
346
+ # RFC3501:
347
+ # literal = "{" number "}" CRLF *CHAR8
348
+ # ; Number represents the number of CHAR8s
349
+ # RFC9051:
350
+ # literal = "{" number64 ["+"] "}" CRLF *CHAR8
351
+ # ; <number64> represents the number of CHAR8s.
352
+ # ; A non-synchronizing literal is distinguished
353
+ # ; from a synchronizing literal by the presence of
354
+ # ; "+" before the closing "}".
355
+ # ; Non-synchronizing literals are not allowed when
356
+ # ; sent from server to the client.
357
+ LITERAL = /\{(\d+)\}\r\n/n
358
+
359
+ # RFC3516 (BINARY):
360
+ # literal8 = "~{" number "}" CRLF *OCTET
361
+ # ; <number> represents the number of OCTETs
362
+ # ; in the response string.
363
+ # RFC9051:
364
+ # literal8 = "~{" number64 "}" CRLF *OCTET
365
+ # ; <number64> represents the number of OCTETs
366
+ # ; in the response string.
367
+ LITERAL8 = /~\{(\d+)\}\r\n/n
368
+
369
+ module_function
370
+
371
+ def unescape_quoted!(quoted)
372
+ quoted
373
+ &.gsub!(/\\(#{QUOTED_SPECIALS})/n, "\\1")
374
+ &.force_encoding("UTF-8")
375
+ end
376
+
377
+ def unescape_quoted(quoted)
378
+ quoted
379
+ &.gsub(/\\(#{QUOTED_SPECIALS})/n, "\\1")
380
+ &.force_encoding("UTF-8")
381
+ end
382
+
383
+ end
384
+
385
+ # the default, used in most places
60
386
  BEG_REGEXP = /\G(?:\
61
- (?# 1: SPACE )( +)|\
62
- (?# 2: NIL )(NIL)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
63
- (?# 3: NUMBER )(\d+)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
64
- (?# 4: ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+]+)|\
65
- (?# 5: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
66
- (?# 6: LPAR )(\()|\
67
- (?# 7: RPAR )(\))|\
68
- (?# 8: BSLASH )(\\)|\
69
- (?# 9: STAR )(\*)|\
70
- (?# 10: LBRA )(\[)|\
71
- (?# 11: RBRA )(\])|\
72
- (?# 12: LITERAL )\{(\d+)\}\r\n|\
73
- (?# 13: PLUS )(\+)|\
74
- (?# 14: PERCENT )(%)|\
75
- (?# 15: CRLF )(\r\n)|\
76
- (?# 16: EOF )(\z))/ni
77
-
387
+ (?# 1: SPACE )( )|\
388
+ (?# 2: LITERAL8)#{Patterns::LITERAL8}|\
389
+ (?# 3: ATOM prefixed with a compatible subtype)\
390
+ ((?:\
391
+ (?# 4: NIL )(NIL)|\
392
+ (?# 5: NUMBER )(\d+)|\
393
+ (?# 6: PLUS )(\+))\
394
+ (?# 7: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
395
+ (?# This enables greedy alternation without lookahead, in linear time.)\
396
+ )|\
397
+ (?# Also need to check for ATOM without a subtype prefix.)\
398
+ (?# 8: ATOM )(#{Patterns::ATOMISH})|\
399
+ (?# 9: QUOTED )#{Patterns::QUOTED_rev2}|\
400
+ (?# 10: LPAR )(\()|\
401
+ (?# 11: RPAR )(\))|\
402
+ (?# 12: BSLASH )(\\)|\
403
+ (?# 13: STAR )(\*)|\
404
+ (?# 14: LBRA )(\[)|\
405
+ (?# 15: RBRA )(\])|\
406
+ (?# 16: LITERAL )#{Patterns::LITERAL}|\
407
+ (?# 17: PERCENT )(%)|\
408
+ (?# 18: CRLF )(\r\n)|\
409
+ (?# 19: EOF )(\z))/ni
410
+
411
+ # envelope, body(structure), namespaces
78
412
  DATA_REGEXP = /\G(?:\
79
413
  (?# 1: SPACE )( )|\
80
414
  (?# 2: NIL )(NIL)|\
81
415
  (?# 3: NUMBER )(\d+)|\
82
- (?# 4: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
83
- (?# 5: LITERAL )\{(\d+)\}\r\n|\
416
+ (?# 4: QUOTED )#{Patterns::QUOTED_rev2}|\
417
+ (?# 5: LITERAL )#{Patterns::LITERAL}|\
84
418
  (?# 6: LPAR )(\()|\
85
419
  (?# 7: RPAR )(\)))/ni
86
420
 
87
- TEXT_REGEXP = /\G(?:\
88
- (?# 1: TEXT )([^\x00\r\n]*))/ni
421
+ # text, after 'resp-text-code "]"'
422
+ TEXT_REGEXP = /\G(#{Patterns::TEXT_rev2})/n
89
423
 
90
- RTEXT_REGEXP = /\G(?:\
91
- (?# 1: LBRA )(\[)|\
92
- (?# 2: TEXT )([^\x00\r\n]*))/ni
93
-
94
- CTEXT_REGEXP = /\G(?:\
95
- (?# 1: TEXT )([^\x00\r\n\]]*))/ni
424
+ # resp-text-code, after 'atom SP'
425
+ CTEXT_REGEXP = /\G(#{Patterns::CODE_TEXT})/n
96
426
 
97
427
  Token = Struct.new(:symbol, :value)
98
428
 
99
- def response
100
- token = lookahead
101
- case token.symbol
102
- when T_PLUS
103
- result = continue_req
104
- when T_STAR
105
- result = response_untagged
106
- else
107
- result = response_tagged
108
- end
109
- while lookahead.symbol == T_SPACE
110
- # Ignore trailing space for Microsoft Exchange Server
111
- shift_token
112
- end
113
- match(T_CRLF)
114
- match(T_EOF)
115
- return result
116
- end
429
+ def_char_matchers :SP, " ", :T_SPACE
430
+ def_char_matchers :PLUS, "+", :T_PLUS
431
+ def_char_matchers :STAR, "*", :T_STAR
117
432
 
118
- def continue_req
119
- match(T_PLUS)
120
- token = lookahead
121
- if token.symbol == T_SPACE
122
- shift_token
123
- return ContinuationRequest.new(resp_text, @str)
124
- else
125
- return ContinuationRequest.new(ResponseText.new(nil, ""), @str)
126
- end
127
- end
433
+ def_char_matchers :lpar, "(", :T_LPAR
434
+ def_char_matchers :rpar, ")", :T_RPAR
128
435
 
129
- def response_untagged
130
- match(T_STAR)
131
- match(T_SPACE)
132
- token = lookahead
133
- if token.symbol == T_NUMBER
134
- return numeric_response
135
- elsif token.symbol == T_ATOM
136
- case token.value
137
- when /\A(?:OK|NO|BAD|BYE|PREAUTH)\z/ni
138
- return response_cond
139
- when /\A(?:FLAGS)\z/ni
140
- return flags_response
141
- when /\A(?:ID)\z/ni
142
- return id_response
143
- when /\A(?:LIST|LSUB|XLIST)\z/ni
144
- return list_response
145
- when /\A(?:NAMESPACE)\z/ni
146
- return namespace_response
147
- when /\A(?:QUOTA)\z/ni
148
- return getquota_response
149
- when /\A(?:QUOTAROOT)\z/ni
150
- return getquotaroot_response
151
- when /\A(?:ACL)\z/ni
152
- return getacl_response
153
- when /\A(?:SEARCH|SORT)\z/ni
154
- return search_response
155
- when /\A(?:THREAD)\z/ni
156
- return thread_response
157
- when /\A(?:STATUS)\z/ni
158
- return status_response
159
- when /\A(?:CAPABILITY)\z/ni
160
- return capability_response
161
- when /\A(?:NOOP)\z/ni
162
- return ignored_response
163
- else
164
- return text_response
165
- end
436
+ def_char_matchers :lbra, "[", :T_LBRA
437
+ def_char_matchers :rbra, "]", :T_RBRA
438
+
439
+ # valid number ranges are not enforced by parser
440
+ # number = 1*DIGIT
441
+ # ; Unsigned 32-bit integer
442
+ # ; (0 <= n < 4,294,967,296)
443
+ def_token_matchers :number, T_NUMBER, coerce: Integer
444
+
445
+ def_token_matchers :quoted, T_QUOTED
446
+
447
+ # string = quoted / literal
448
+ def_token_matchers :string, T_QUOTED, T_LITERAL
449
+
450
+ # used by nstring8 = nstring / literal8
451
+ def_token_matchers :string8, T_QUOTED, T_LITERAL, T_LITERAL8
452
+
453
+ # use where string represents "LABEL" values
454
+ def_token_matchers :case_insensitive__string,
455
+ T_QUOTED, T_LITERAL,
456
+ send: :upcase
457
+
458
+ # n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
459
+ # NIL? returns nil when it does *not* match
460
+ def_token_matchers :NIL, T_NIL
461
+
462
+ # In addition to explicitly uses of +tagged-ext-label+, use this to match
463
+ # keywords when the grammar has not provided any extension syntax.
464
+ #
465
+ # Do *not* use this for labels where the grammar specifies extensions
466
+ # can be +atom+, even if all currently defined labels would match. For
467
+ # example response codes in +resp-text-code+.
468
+ #
469
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
470
+ # ; Is a valid RFC 3501 "atom".
471
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
472
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
473
+ #
474
+ # TODO: add to lexer and only match tagged-ext-label
475
+ def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase
476
+
477
+ def_token_matchers :CRLF, T_CRLF
478
+ def_token_matchers :EOF, T_EOF
479
+
480
+ # atom = 1*ATOM-CHAR
481
+ # ATOM-CHAR = <any CHAR except atom-specials>
482
+ ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
483
+
484
+ SEQUENCE_SET_TOKENS = [T_ATOM, T_NUMBER, T_STAR]
485
+
486
+ # sequence-set = (seq-number / seq-range) ["," sequence-set]
487
+ # sequence-set =/ seq-last-command
488
+ # ; Allow for "result of the last command"
489
+ # ; indicator.
490
+ # seq-last-command = "$"
491
+ #
492
+ # *note*: doesn't match seq-last-command
493
+ def sequence_set
494
+ str = combine_adjacent(*SEQUENCE_SET_TOKENS)
495
+ if Patterns::SEQUENCE_SET_STR.match?(str)
496
+ SequenceSet[str]
166
497
  else
167
- parse_error("unexpected token %s", token.symbol)
498
+ parse_error("unexpected atom %p, expected sequence-set", str)
168
499
  end
169
500
  end
170
501
 
171
- def response_tagged
172
- tag = astring_chars
173
- match(T_SPACE)
174
- token = match(T_ATOM)
175
- name = token.value.upcase
176
- match(T_SPACE)
177
- return TaggedResponse.new(tag, name, resp_text, @str)
502
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
503
+ # resp-specials = "]"
504
+ ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
505
+
506
+ ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze
507
+
508
+ # tag = 1*<any ASTRING-CHAR except "+">
509
+ TAG_TOKENS = (ASTRING_CHARS_TOKENS - [T_PLUS]).freeze
510
+
511
+ # TODO: handle atom, astring_chars, and tag entirely inside the lexer
512
+ def atom; combine_adjacent(*ATOM_TOKENS) end
513
+ def astring_chars; combine_adjacent(*ASTRING_CHARS_TOKENS) end
514
+ def tag; combine_adjacent(*TAG_TOKENS) end
515
+
516
+ # the #accept version of #atom
517
+ def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
518
+
519
+ # Returns <tt>atom.upcase</tt>
520
+ def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
521
+
522
+ # Returns <tt>atom?&.upcase</tt>
523
+ def case_insensitive__atom?
524
+ -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
178
525
  end
179
526
 
180
- def response_cond
181
- token = match(T_ATOM)
182
- name = token.value.upcase
183
- match(T_SPACE)
184
- return UntaggedResponse.new(name, resp_text, @str)
527
+ # astring = 1*ASTRING-CHAR / string
528
+ def astring
529
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
185
530
  end
186
531
 
187
- def numeric_response
188
- n = number
189
- match(T_SPACE)
190
- token = match(T_ATOM)
191
- name = token.value.upcase
192
- case name
193
- when "EXISTS", "RECENT", "EXPUNGE"
194
- return UntaggedResponse.new(name, n, @str)
195
- when "FETCH"
196
- shift_token
197
- match(T_SPACE)
198
- data = FetchData.new(n, msg_att(n))
199
- return UntaggedResponse.new(name, data, @str)
200
- end
532
+ def astring?
533
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
201
534
  end
202
535
 
203
- def msg_att(n)
204
- match(T_LPAR)
205
- attr = {}
206
- while true
207
- token = lookahead
208
- case token.symbol
209
- when T_RPAR
210
- shift_token
211
- break
212
- when T_SPACE
213
- shift_token
214
- next
215
- end
216
- case token.value
217
- when /\A(?:ENVELOPE)\z/ni
218
- name, val = envelope_data
219
- when /\A(?:FLAGS)\z/ni
220
- name, val = flags_data
221
- when /\A(?:INTERNALDATE)\z/ni
222
- name, val = internaldate_data
223
- when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
224
- name, val = rfc822_text
225
- when /\A(?:RFC822\.SIZE)\z/ni
226
- name, val = rfc822_size
227
- when /\A(?:BODY(?:STRUCTURE)?)\z/ni
228
- name, val = body_data
229
- when /\A(?:UID)\z/ni
230
- name, val = uid_data
231
- when /\A(?:MODSEQ)\z/ni
232
- name, val = modseq_data
233
- else
234
- parse_error("unknown attribute `%s' for {%d}", token.value, n)
235
- end
236
- attr[name] = val
237
- end
238
- return attr
536
+ # Use #label or #label_in to assert specific known labels
537
+ # (+tagged-ext-label+ only, not +atom+).
538
+ def label(word)
539
+ (val = tagged_ext_label) == word and return val
540
+ parse_error("unexpected atom %p, expected %p instead", val, word)
239
541
  end
240
542
 
241
- def envelope_data
242
- token = match(T_ATOM)
243
- name = token.value.upcase
244
- match(T_SPACE)
245
- return name, envelope
543
+ # Use #label or #label_in to assert specific known labels
544
+ # (+tagged-ext-label+ only, not +atom+).
545
+ def label_in(*labels)
546
+ lbl = tagged_ext_label and labels.include?(lbl) and return lbl
547
+ parse_error("unexpected atom %p, expected one of %s instead",
548
+ lbl, labels.join(" or "))
246
549
  end
247
550
 
248
- def envelope
249
- @lex_state = EXPR_DATA
250
- token = lookahead
251
- if token.symbol == T_NIL
252
- shift_token
253
- result = nil
254
- else
255
- match(T_LPAR)
256
- date = nstring
257
- match(T_SPACE)
258
- subject = nstring
259
- match(T_SPACE)
260
- from = address_list
261
- match(T_SPACE)
262
- sender = address_list
263
- match(T_SPACE)
264
- reply_to = address_list
265
- match(T_SPACE)
266
- to = address_list
267
- match(T_SPACE)
268
- cc = address_list
269
- match(T_SPACE)
270
- bcc = address_list
271
- match(T_SPACE)
272
- in_reply_to = nstring
273
- match(T_SPACE)
274
- message_id = nstring
275
- match(T_RPAR)
276
- result = Envelope.new(date, subject, from, sender, reply_to,
277
- to, cc, bcc, in_reply_to, message_id)
278
- end
279
- @lex_state = EXPR_BEG
280
- return result
551
+ # expects "OK" or "PREAUTH" and raises InvalidResponseError on failure
552
+ def resp_cond_auth__name
553
+ lbl = tagged_ext_label and AUTH_CONDS.include? lbl and return lbl
554
+ raise InvalidResponseError, "bad response type %p, expected %s" % [
555
+ lbl, AUTH_CONDS.join(" or ")
556
+ ]
281
557
  end
282
558
 
283
- def flags_data
284
- token = match(T_ATOM)
285
- name = token.value.upcase
286
- match(T_SPACE)
287
- return name, flag_list
559
+ # expects "OK" or "NO" or "BAD" and raises InvalidResponseError on failure
560
+ def resp_cond_state__name
561
+ lbl = tagged_ext_label and RESP_COND_STATES.include? lbl and return lbl
562
+ raise InvalidResponseError, "bad response type %p, expected %s" % [
563
+ lbl, RESP_COND_STATES.join(" or ")
564
+ ]
288
565
  end
289
566
 
290
- def internaldate_data
291
- token = match(T_ATOM)
292
- name = token.value.upcase
293
- match(T_SPACE)
294
- token = match(T_QUOTED)
295
- return name, token.value
567
+ # nstring = string / nil
568
+ def nstring
569
+ NIL? ? nil : string
296
570
  end
297
571
 
298
- def rfc822_text
299
- token = match(T_ATOM)
300
- name = token.value.upcase
301
- token = lookahead
302
- if token.symbol == T_LBRA
303
- shift_token
304
- match(T_RBRA)
305
- end
306
- match(T_SPACE)
307
- return name, nstring
572
+ def nstring8
573
+ NIL? ? nil : string8
308
574
  end
309
575
 
310
- def rfc822_size
311
- token = match(T_ATOM)
312
- name = token.value.upcase
313
- match(T_SPACE)
314
- return name, number
576
+ def nquoted
577
+ NIL? ? nil : quoted
315
578
  end
316
579
 
317
- def body_data
318
- token = match(T_ATOM)
319
- name = token.value.upcase
320
- token = lookahead
321
- if token.symbol == T_SPACE
322
- shift_token
323
- return name, body
324
- end
325
- name.concat(section)
326
- token = lookahead
327
- if token.symbol == T_ATOM
328
- name.concat(token.value)
329
- shift_token
330
- end
331
- match(T_SPACE)
332
- data = nstring
333
- return name, data
580
+ # use where nstring represents "LABEL" values
581
+ def case_insensitive__nstring
582
+ NIL? ? nil : case_insensitive__string
334
583
  end
335
584
 
336
- def body
337
- @lex_state = EXPR_DATA
338
- token = lookahead
339
- if token.symbol == T_NIL
340
- shift_token
341
- result = nil
342
- else
343
- match(T_LPAR)
344
- token = lookahead
345
- if token.symbol == T_LPAR
346
- result = body_type_mpart
347
- else
348
- result = body_type_1part
349
- end
350
- match(T_RPAR)
585
+ # tagged-ext-comp = astring /
586
+ # tagged-ext-comp *(SP tagged-ext-comp) /
587
+ # "(" tagged-ext-comp ")"
588
+ # ; Extensions that follow this general
589
+ # ; syntax should use nstring instead of
590
+ # ; astring when appropriate in the context
591
+ # ; of the extension.
592
+ # ; Note that a message set or a "number"
593
+ # ; can always be represented as an "atom".
594
+ # ; A URL should be represented as
595
+ # ; a "quoted" string.
596
+ def tagged_ext_comp
597
+ vals = []
598
+ while true
599
+ vals << case lookahead!(*ASTRING_TOKENS, T_LPAR).symbol
600
+ when T_LPAR then lpar; ary = tagged_ext_comp; rpar; ary
601
+ when T_NUMBER then number
602
+ else astring
603
+ end
604
+ SP? or break
351
605
  end
352
- @lex_state = EXPR_BEG
353
- return result
606
+ vals
354
607
  end
355
608
 
356
- def body_type_1part
357
- token = lookahead
358
- case token.value
359
- when /\A(?:TEXT)\z/ni
360
- return body_type_text
361
- when /\A(?:MESSAGE)\z/ni
362
- return body_type_msg
363
- when /\A(?:ATTACHMENT)\z/ni
364
- return body_type_attachment
365
- when /\A(?:MIXED)\z/ni
366
- return body_type_mixed
609
+ # tagged-ext-simple is a subset of atom
610
+ # TODO: recognize sequence-set in the lexer
611
+ #
612
+ # tagged-ext-simple = sequence-set / number / number64
613
+ def tagged_ext_simple
614
+ number? || sequence_set
615
+ end
616
+
617
+ # tagged-ext-val = tagged-ext-simple /
618
+ # "(" [tagged-ext-comp] ")"
619
+ def tagged_ext_val
620
+ if lpar?
621
+ _ = peek_rpar? ? [] : tagged_ext_comp
622
+ rpar
623
+ _
367
624
  else
368
- return body_type_basic
369
- end
370
- end
625
+ tagged_ext_simple
626
+ end
627
+ end
628
+
629
+ # mailbox = "INBOX" / astring
630
+ # ; INBOX is case-insensitive. All case variants of
631
+ # ; INBOX (e.g., "iNbOx") MUST be interpreted as INBOX
632
+ # ; not as an astring. An astring which consists of
633
+ # ; the case-insensitive sequence "I" "N" "B" "O" "X"
634
+ # ; is considered to be INBOX and not an astring.
635
+ # ; Refer to section 5.1 for further
636
+ # ; semantic details of mailbox names.
637
+ alias mailbox astring
638
+
639
+ # valid number ranges are not enforced by parser
640
+ # number64 = 1*DIGIT
641
+ # ; Unsigned 63-bit integer
642
+ # ; (0 <= n <= 9,223,372,036,854,775,807)
643
+ alias number64 number
644
+ alias number64? number?
645
+
646
+ # valid number ranges are not enforced by parser
647
+ # nz-number = digit-nz *DIGIT
648
+ # ; Non-zero unsigned 32-bit integer
649
+ # ; (0 < n < 4,294,967,296)
650
+ alias nz_number number
651
+ alias nz_number? number?
652
+
653
+ # valid number ranges are not enforced by parser
654
+ # nz-number64 = digit-nz *DIGIT
655
+ # ; Unsigned 63-bit integer
656
+ # ; (0 < n <= 9,223,372,036,854,775,807)
657
+ alias nz_number64 nz_number
658
+
659
+ # valid number ranges are not enforced by parser
660
+ # uniqueid = nz-number
661
+ # ; Strictly ascending
662
+ alias uniqueid nz_number
371
663
 
372
- def body_type_basic
373
- mtype, msubtype = media_type
374
- token = lookahead
375
- if token.symbol == T_RPAR
376
- return BodyTypeBasic.new(mtype, msubtype)
377
- end
378
- match(T_SPACE)
379
- param, content_id, desc, enc, size = body_fields
380
- md5, disposition, language, extension = body_ext_1part
381
- return BodyTypeBasic.new(mtype, msubtype,
382
- param, content_id,
383
- desc, enc, size,
384
- md5, disposition, language, extension)
664
+ # valid number ranges are not enforced by parser
665
+ #
666
+ # a 64-bit unsigned integer and is the decimal equivalent for the ID hex
667
+ # string used in the web interface and the Gmail API.
668
+ alias x_gm_id number
669
+
670
+ # [RFC3501 & RFC9051:]
671
+ # response = *(continue-req / response-data) response-done
672
+ #
673
+ # For simplicity, response isn't interpreted as the combination of the
674
+ # three response types, but instead represents any individual server
675
+ # response. Our simplified interpretation is defined as:
676
+ # response = continue-req | response_data | response-tagged
677
+ #
678
+ # n.b: our "response-tagged" definition parses "greeting" too.
679
+ def response
680
+ resp = case lookahead!(T_PLUS, T_STAR, *TAG_TOKENS).symbol
681
+ when T_PLUS then continue_req
682
+ when T_STAR then response_data
683
+ else response_tagged
684
+ end
685
+ accept_spaces # QUIRKY: Ignore trailing space (MS Exchange Server?)
686
+ CRLF!
687
+ EOF!
688
+ resp
689
+ end
690
+
691
+ # RFC3501 & RFC9051:
692
+ # continue-req = "+" SP (resp-text / base64) CRLF
693
+ #
694
+ # n.b: base64 is valid resp-text. And in the spirit of RFC9051 Appx E 23
695
+ # (and to workaround existing servers), we use the following grammar:
696
+ #
697
+ # continue-req = "+" (SP (resp-text)) CRLF
698
+ def continue_req
699
+ PLUS!
700
+ ContinuationRequest.new(SP? ? resp_text : ResponseText::EMPTY, @str)
701
+ end
702
+
703
+ RE_RESPONSE_TYPE = /\G(?:\d+ )?(?<type>#{Patterns::TAGGED_EXT_LABEL})/n
704
+
705
+ # [RFC3501:]
706
+ # response-data = "*" SP (resp-cond-state / resp-cond-bye /
707
+ # mailbox-data / message-data / capability-data) CRLF
708
+ # [RFC4466:]
709
+ # response-data = "*" SP response-payload CRLF
710
+ # response-payload = resp-cond-state / resp-cond-bye /
711
+ # mailbox-data / message-data / capability-data
712
+ # RFC5161 (ENABLE capability):
713
+ # response-data =/ "*" SP enable-data CRLF
714
+ # RFC5255 (LANGUAGE capability)
715
+ # response-payload =/ language-data
716
+ # RFC5255 (I18NLEVEL=1 and I18NLEVEL=2 capabilities)
717
+ # response-payload =/ comparator-data
718
+ # [RFC9051:]
719
+ # response-data = "*" SP (resp-cond-state / resp-cond-bye /
720
+ # mailbox-data / message-data / capability-data /
721
+ # enable-data) CRLF
722
+ #
723
+ # [merging in greeting and response-fatal:]
724
+ # greeting = "*" SP (resp-cond-auth / resp-cond-bye) CRLF
725
+ # response-fatal = "*" SP resp-cond-bye CRLF
726
+ # response-data =/ "*" SP (resp-cond-auth / resp-cond-bye) CRLF
727
+ # [removing duplicates, this is simply]
728
+ # response-payload =/ resp-cond-auth
729
+ #
730
+ # TODO: remove resp-cond-auth and handle greeting separately
731
+ def response_data
732
+ STAR!; SP!
733
+ m = peek_re(RE_RESPONSE_TYPE) or parse_error("unparsable response")
734
+ case m["type"].upcase
735
+ when "OK" then resp_cond_state__untagged # RFC3501, RFC9051
736
+ when "FETCH" then message_data__fetch # RFC3501, RFC9051
737
+ when "EXPUNGE" then message_data__expunge # RFC3501, RFC9051
738
+ when "EXISTS" then mailbox_data__exists # RFC3501, RFC9051
739
+ when "ESEARCH" then esearch_response # RFC4731, RFC9051, etc
740
+ when "VANISHED" then expunged_resp # RFC7162
741
+ when "UIDFETCH" then uidfetch_resp # RFC9586
742
+ when "SEARCH" then mailbox_data__search # RFC3501 (obsolete)
743
+ when "CAPABILITY" then capability_data__untagged # RFC3501, RFC9051
744
+ when "FLAGS" then mailbox_data__flags # RFC3501, RFC9051
745
+ when "LIST" then mailbox_data__list # RFC3501, RFC9051
746
+ when "STATUS" then mailbox_data__status # RFC3501, RFC9051
747
+ when "NAMESPACE" then namespace_response # RFC2342, RFC9051
748
+ when "ENABLED" then enable_data # RFC5161, RFC9051
749
+ when "BAD" then resp_cond_state__untagged # RFC3501, RFC9051
750
+ when "NO" then resp_cond_state__untagged # RFC3501, RFC9051
751
+ when "PREAUTH" then resp_cond_auth # RFC3501, RFC9051
752
+ when "BYE" then resp_cond_bye # RFC3501, RFC9051
753
+ when "RECENT" then mailbox_data__recent # RFC3501 (obsolete)
754
+ when "SORT" then sort_data # RFC5256, RFC7162
755
+ when "THREAD" then thread_data # RFC5256
756
+ when "QUOTA" then quota_response # RFC2087, RFC9208
757
+ when "QUOTAROOT" then quotaroot_response # RFC2087, RFC9208
758
+ when "ID" then id_response # RFC2971
759
+ when "ACL" then acl_data # RFC4314
760
+ when "LISTRIGHTS" then listrights_data # RFC4314
761
+ when "MYRIGHTS" then myrights_data # RFC4314
762
+ when "METADATA" then metadata_resp # RFC5464
763
+ when "LANGUAGE" then language_data # RFC5255
764
+ when "COMPARATOR" then comparator_data # RFC5255
765
+ when "CONVERTED" then message_data__converted # RFC5259
766
+ when "LSUB" then mailbox_data__lsub # RFC3501 (obsolete)
767
+ when "XLIST" then mailbox_data__xlist # deprecated
768
+ when "NOOP" then response_data__noop
769
+ else response_data__unhandled
770
+ end
771
+ end
772
+
773
+ def response_data__unhandled(klass = UntaggedResponse)
774
+ num = number?; SP?
775
+ type = tagged_ext_label; SP?
776
+ text = remaining_unparsed
777
+ data =
778
+ if num && text then UnparsedNumericResponseData.new(num, text)
779
+ elsif text then UnparsedData.new(text)
780
+ else num
781
+ end
782
+ klass.new(type, data, @str)
385
783
  end
386
784
 
387
- def body_type_text
388
- mtype, msubtype = media_type
389
- match(T_SPACE)
390
- param, content_id, desc, enc, size = body_fields
391
- match(T_SPACE)
392
- lines = number
393
- md5, disposition, language, extension = body_ext_1part
394
- return BodyTypeText.new(mtype, msubtype,
395
- param, content_id,
396
- desc, enc, size,
397
- lines,
398
- md5, disposition, language, extension)
785
+ # reads all the way up until CRLF
786
+ def remaining_unparsed
787
+ str = @str[@pos...-2] and @pos += str.bytesize
788
+ str&.empty? ? nil : str
399
789
  end
400
790
 
401
- def body_type_msg
402
- mtype, msubtype = media_type
403
- match(T_SPACE)
404
- param, content_id, desc, enc, size = body_fields
791
+ def response_data__ignored; response_data__unhandled(IgnoredResponse) end
792
+ alias response_data__noop response_data__ignored
405
793
 
406
- token = lookahead
407
- if token.symbol == T_RPAR
408
- # If this is not message/rfc822, we shouldn't apply the RFC822
409
- # spec to it. We should handle anything other than
410
- # message/rfc822 using multipart extension data [rfc3501] (i.e.
411
- # the data itself won't be returned, we would have to retrieve it
412
- # with BODYSTRUCTURE instead of with BODY
413
-
414
- # Also, sometimes a message/rfc822 is included as a large
415
- # attachment instead of having all of the other details
416
- # (e.g. attaching a .eml file to an email)
417
- if msubtype == "RFC822"
418
- return BodyTypeMessage.new(mtype, msubtype, param, content_id,
419
- desc, enc, size, nil, nil, nil, nil,
420
- nil, nil, nil)
421
- else
422
- return BodyTypeExtension.new(mtype, msubtype,
423
- param, content_id,
424
- desc, enc, size)
425
- end
426
- end
794
+ alias listrights_data response_data__unhandled
795
+ alias myrights_data response_data__unhandled
796
+ alias metadata_resp response_data__unhandled
797
+ alias language_data response_data__unhandled
798
+ alias comparator_data response_data__unhandled
799
+ alias message_data__converted response_data__unhandled
427
800
 
428
- match(T_SPACE)
429
- env = envelope
430
- match(T_SPACE)
431
- b = body
432
- match(T_SPACE)
433
- lines = number
434
- md5, disposition, language, extension = body_ext_1part
435
- return BodyTypeMessage.new(mtype, msubtype,
436
- param, content_id,
437
- desc, enc, size,
438
- env, b, lines,
439
- md5, disposition, language, extension)
801
+ # RFC3501 & RFC9051:
802
+ # response-tagged = tag SP resp-cond-state CRLF
803
+ def response_tagged
804
+ TaggedResponse.new(tag, *(SP!; resp_cond_state), @str)
440
805
  end
441
806
 
442
- def body_type_attachment
443
- mtype = case_insensitive_string
444
- match(T_SPACE)
445
- param = body_fld_param
446
- return BodyTypeAttachment.new(mtype, nil, param)
807
+ # RFC3501 & RFC9051:
808
+ # resp-cond-state = ("OK" / "NO" / "BAD") SP resp-text
809
+ #
810
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
811
+ # servers), we don't require a final SP and instead parse this as:
812
+ #
813
+ # resp-cond-state = ("OK" / "NO" / "BAD") [SP resp-text]
814
+ def resp_cond_state
815
+ [resp_cond_state__name, SP? ? resp_text : ResponseText::EMPTY]
447
816
  end
448
817
 
449
- def body_type_mixed
450
- mtype = "MULTIPART"
451
- msubtype = case_insensitive_string
452
- param, disposition, language, extension = body_ext_mpart
453
- return BodyTypeBasic.new(mtype, msubtype, param, nil, nil, nil, nil, nil, disposition, language, extension)
818
+ def resp_cond_state__untagged
819
+ UntaggedResponse.new(*resp_cond_state, @str)
454
820
  end
455
821
 
456
- def body_type_mpart
457
- parts = []
458
- while true
459
- token = lookahead
460
- if token.symbol == T_SPACE
461
- shift_token
462
- break
463
- end
464
- parts.push(body)
465
- end
466
- mtype = "MULTIPART"
467
- msubtype = case_insensitive_string
468
- param, disposition, language, extension = body_ext_mpart
469
- return BodyTypeMultipart.new(mtype, msubtype, parts,
470
- param, disposition, language,
471
- extension)
822
+ # resp-cond-auth = ("OK" / "PREAUTH") SP resp-text
823
+ #
824
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
825
+ # servers), we don't require a final SP and instead parse this as:
826
+ #
827
+ # resp-cond-auth = ("OK" / "PREAUTH") [SP resp-text]
828
+ def resp_cond_auth
829
+ UntaggedResponse.new(resp_cond_auth__name,
830
+ SP? ? resp_text : ResponseText::EMPTY,
831
+ @str)
472
832
  end
473
833
 
474
- def media_type
475
- mtype = case_insensitive_string
476
- token = lookahead
477
- if token.symbol != T_SPACE
478
- return mtype, nil
479
- end
480
- match(T_SPACE)
481
- msubtype = case_insensitive_string
482
- return mtype, msubtype
834
+ # resp-cond-bye = "BYE" SP resp-text
835
+ #
836
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
837
+ # servers), we don't require a final SP and instead parse this as:
838
+ #
839
+ # resp-cond-bye = "BYE" [SP resp-text]
840
+ def resp_cond_bye
841
+ UntaggedResponse.new(label(BYE),
842
+ SP? ? resp_text : ResponseText::EMPTY,
843
+ @str)
483
844
  end
484
845
 
485
- def body_fields
486
- param = body_fld_param
487
- match(T_SPACE)
488
- content_id = nstring
489
- match(T_SPACE)
490
- desc = nstring
491
- match(T_SPACE)
492
- enc = case_insensitive_string
493
- match(T_SPACE)
494
- size = number
495
- return param, content_id, desc, enc, size
846
+ # message-data = nz-number SP ("EXPUNGE" / ("FETCH" SP msg-att))
847
+ def message_data__fetch
848
+ seq = nz_number; SP!
849
+ name = label "FETCH"; SP!
850
+ data = FetchData.new(seq, msg_att(seq))
851
+ UntaggedResponse.new(name, data, @str)
496
852
  end
497
853
 
498
- def body_fld_param
499
- token = lookahead
500
- if token.symbol == T_NIL
501
- shift_token
502
- return nil
503
- end
504
- match(T_LPAR)
505
- param = {}
506
- while true
507
- token = lookahead
508
- case token.symbol
509
- when T_RPAR
510
- shift_token
511
- break
512
- when T_SPACE
513
- shift_token
514
- end
515
- name = case_insensitive_string
516
- match(T_SPACE)
517
- val = string
518
- param[name] = val
519
- end
520
- return param
854
+ # uidfetch-resp = uniqueid SP "UIDFETCH" SP msg-att
855
+ def uidfetch_resp
856
+ uid = uniqueid; SP!
857
+ name = label "UIDFETCH"; SP!
858
+ data = UIDFetchData.new(uid, msg_att(uid))
859
+ UntaggedResponse.new(name, data, @str)
521
860
  end
522
861
 
523
- def body_ext_1part
524
- token = lookahead
525
- if token.symbol == T_SPACE
526
- shift_token
527
- else
528
- return nil
529
- end
530
- md5 = nstring
531
-
532
- token = lookahead
533
- if token.symbol == T_SPACE
534
- shift_token
535
- else
536
- return md5
537
- end
538
- disposition = body_fld_dsp
539
-
540
- token = lookahead
541
- if token.symbol == T_SPACE
542
- shift_token
543
- else
544
- return md5, disposition
545
- end
546
- language = body_fld_lang
862
+ def response_data__simple_numeric
863
+ data = nz_number; SP!
864
+ name = tagged_ext_label
865
+ UntaggedResponse.new(name, data, @str)
866
+ end
547
867
 
548
- token = lookahead
549
- if token.symbol == T_SPACE
550
- shift_token
551
- else
552
- return md5, disposition, language
553
- end
868
+ alias message_data__expunge response_data__simple_numeric
869
+ alias mailbox_data__exists response_data__simple_numeric
870
+ alias mailbox_data__recent response_data__simple_numeric
554
871
 
555
- extension = body_extensions
556
- return md5, disposition, language, extension
872
+ # The name for this is confusing, because it *replaces* EXPUNGE
873
+ # >>>
874
+ # expunged-resp = "VANISHED" [SP "(EARLIER)"] SP known-uids
875
+ def expunged_resp
876
+ name = label "VANISHED"; SP!
877
+ earlier = if lpar? then label("EARLIER"); rpar; SP!; true else false end
878
+ uids = known_uids
879
+ data = VanishedData[uids, earlier]
880
+ UntaggedResponse.new name, data, @str
557
881
  end
558
882
 
559
- def body_ext_mpart
560
- token = lookahead
561
- if token.symbol == T_SPACE
562
- shift_token
563
- else
564
- return nil
565
- end
566
- param = body_fld_param
883
+ # TODO: replace with uid_set
884
+ alias known_uids sequence_set
567
885
 
568
- token = lookahead
569
- if token.symbol == T_SPACE
570
- shift_token
571
- else
572
- return param
886
+ # RFC3501 & RFC9051:
887
+ # msg-att = "(" (msg-att-dynamic / msg-att-static)
888
+ # *(SP (msg-att-dynamic / msg-att-static)) ")"
889
+ #
890
+ # msg-att-dynamic = "FLAGS" SP "(" [flag-fetch *(SP flag-fetch)] ")"
891
+ # RFC5257 (ANNOTATE extension):
892
+ # msg-att-dynamic =/ "ANNOTATION" SP
893
+ # ( "(" entry-att *(SP entry-att) ")" /
894
+ # "(" entry *(SP entry) ")" )
895
+ # RFC7162 (CONDSTORE extension):
896
+ # msg-att-dynamic =/ fetch-mod-resp
897
+ # fetch-mod-resp = "MODSEQ" SP "(" permsg-modsequence ")"
898
+ # RFC8970 (PREVIEW extension):
899
+ # msg-att-dynamic =/ "PREVIEW" SP nstring
900
+ #
901
+ # RFC3501:
902
+ # msg-att-static = "ENVELOPE" SP envelope /
903
+ # "INTERNALDATE" SP date-time /
904
+ # "RFC822" [".HEADER" / ".TEXT"] SP nstring /
905
+ # "RFC822.SIZE" SP number /
906
+ # "BODY" ["STRUCTURE"] SP body /
907
+ # "BODY" section ["<" number ">"] SP nstring /
908
+ # "UID" SP uniqueid
909
+ # RFC3516 (BINARY extension):
910
+ # msg-att-static =/ "BINARY" section-binary SP (nstring / literal8)
911
+ # / "BINARY.SIZE" section-binary SP number
912
+ # RFC8514 (SAVEDATE extension):
913
+ # msg-att-static =/ "SAVEDATE" SP (date-time / nil)
914
+ # RFC8474 (OBJECTID extension):
915
+ # msg-att-static =/ fetch-emailid-resp / fetch-threadid-resp
916
+ # fetch-emailid-resp = "EMAILID" SP "(" objectid ")"
917
+ # fetch-threadid-resp = "THREADID" SP ( "(" objectid ")" / nil )
918
+ # RFC9051:
919
+ # msg-att-static = "ENVELOPE" SP envelope /
920
+ # "INTERNALDATE" SP date-time /
921
+ # "RFC822.SIZE" SP number64 /
922
+ # "BODY" ["STRUCTURE"] SP body /
923
+ # "BODY" section ["<" number ">"] SP nstring /
924
+ # "BINARY" section-binary SP (nstring / literal8) /
925
+ # "BINARY.SIZE" section-binary SP number /
926
+ # "UID" SP uniqueid
927
+ #
928
+ # Re https://www.rfc-editor.org/errata/eid7246, I'm adding "offset" to the
929
+ # official "BINARY" ABNF, like so:
930
+ #
931
+ # msg-att-static =/ "BINARY" section-binary ["<" number ">"] SP
932
+ # (nstring / literal8)
933
+ def msg_att(n)
934
+ lpar
935
+ attr = {}
936
+ while true
937
+ name = msg_att__label; SP!
938
+ val =
939
+ case name
940
+ when "UID" then uniqueid
941
+ when "FLAGS" then flag_list
942
+ when "BODY" then body
943
+ when /\ABODY\[/ni then nstring
944
+ when "BODYSTRUCTURE" then body
945
+ when "ENVELOPE" then envelope
946
+ when "INTERNALDATE" then date_time
947
+ when "RFC822.SIZE" then number64
948
+ when /\ABINARY\[/ni then nstring8 # BINARY, IMAP4rev2
949
+ when /\ABINARY\.SIZE\[/ni then number # BINARY, IMAP4rev2
950
+ when "RFC822" then nstring # not in rev2
951
+ when "RFC822.HEADER" then nstring # not in rev2
952
+ when "RFC822.TEXT" then nstring # not in rev2
953
+ when "MODSEQ" then parens__modseq # CONDSTORE
954
+ when "EMAILID" then parens__objectid # OBJECTID
955
+ when "THREADID" then nparens__objectid # OBJECTID
956
+ when "X-GM-MSGID" then x_gm_id # GMail
957
+ when "X-GM-THRID" then x_gm_id # GMail
958
+ when "X-GM-LABELS" then x_gm_labels # GMail
959
+ else parse_error("unknown attribute `%s' for {%d}", name, n)
960
+ end
961
+ attr[name] = val
962
+ break unless SP?
963
+ break if lookahead_rpar?
573
964
  end
574
- disposition = body_fld_dsp
965
+ rpar
966
+ attr
967
+ end
575
968
 
576
- token = lookahead
577
- if token.symbol == T_SPACE
578
- shift_token
579
- else
580
- return param, disposition
969
+ # appends "[section]" and "<partial>" to the base label
970
+ def msg_att__label
971
+ case (name = tagged_ext_label)
972
+ when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
973
+ # ignoring "[]" fixes https://bugs.ruby-lang.org/issues/5620
974
+ lbra? and rbra
975
+ when "BODY"
976
+ peek_lbra? and name << section and
977
+ peek_str?("<") and name << gt__number__lt # partial
978
+ when "BINARY", "BINARY.SIZE"
979
+ name << section_binary
980
+ # see https://www.rfc-editor.org/errata/eid7246 and the note above
981
+ peek_str?("<") and name << gt__number__lt # partial
581
982
  end
582
- language = body_fld_lang
983
+ name
984
+ end
583
985
 
584
- token = lookahead
585
- if token.symbol == T_SPACE
586
- shift_token
587
- else
588
- return param, disposition, language
589
- end
986
+ # this represents the partial size for BODY or BINARY
987
+ alias gt__number__lt atom
590
988
 
591
- extension = body_extensions
592
- return param, disposition, language, extension
989
+ # RFC3501 & RFC9051:
990
+ # envelope = "(" env-date SP env-subject SP env-from SP
991
+ # env-sender SP env-reply-to SP env-to SP env-cc SP
992
+ # env-bcc SP env-in-reply-to SP env-message-id ")"
993
+ def envelope
994
+ @lex_state = EXPR_DATA
995
+ lpar; date = env_date
996
+ SP!; subject = env_subject
997
+ SP!; from = env_from
998
+ SP!; sender = env_sender
999
+ SP!; reply_to = env_reply_to
1000
+ SP!; to = env_to
1001
+ SP!; cc = env_cc
1002
+ SP!; bcc = env_bcc
1003
+ SP!; in_reply_to = env_in_reply_to
1004
+ SP!; message_id = env_message_id
1005
+ rpar
1006
+ Envelope.new(date, subject, from, sender, reply_to,
1007
+ to, cc, bcc, in_reply_to, message_id)
1008
+ ensure
1009
+ @lex_state = EXPR_BEG
593
1010
  end
594
1011
 
595
- def body_fld_dsp
596
- token = lookahead
597
- if token.symbol == T_NIL
598
- shift_token
599
- return nil
600
- end
601
- match(T_LPAR)
602
- dsp_type = case_insensitive_string
603
- match(T_SPACE)
604
- param = body_fld_param
605
- match(T_RPAR)
606
- return ContentDisposition.new(dsp_type, param)
1012
+ # env-date = nstring
1013
+ # env-subject = nstring
1014
+ # env-in-reply-to = nstring
1015
+ # env-message-id = nstring
1016
+ alias env_date nstring
1017
+ alias env_subject nstring
1018
+ alias env_in_reply_to nstring
1019
+ alias env_message_id nstring
1020
+
1021
+ # env-from = "(" 1*address ")" / nil
1022
+ # env-sender = "(" 1*address ")" / nil
1023
+ # env-reply-to = "(" 1*address ")" / nil
1024
+ # env-to = "(" 1*address ")" / nil
1025
+ # env-cc = "(" 1*address ")" / nil
1026
+ # env-bcc = "(" 1*address ")" / nil
1027
+ def nlist__address
1028
+ return if NIL?
1029
+ lpar; list = [address]; list << address until (quirky_SP?; rpar?)
1030
+ list
1031
+ end
1032
+
1033
+ alias env_from nlist__address
1034
+ alias env_sender nlist__address
1035
+ alias env_reply_to nlist__address
1036
+ alias env_to nlist__address
1037
+ alias env_cc nlist__address
1038
+ alias env_bcc nlist__address
1039
+
1040
+ # Used when servers erroneously send an extra SP.
1041
+ #
1042
+ # As of 2023-11-28, Outlook.com (still) sends SP
1043
+ # between +address+ in <tt>env-*</tt> lists.
1044
+ alias quirky_SP? SP?
1045
+
1046
+ # date-time = DQUOTE date-day-fixed "-" date-month "-" date-year
1047
+ # SP time SP zone DQUOTE
1048
+ alias date_time quoted
1049
+ alias ndatetime nquoted
1050
+
1051
+ # RFC-3501 & RFC-9051:
1052
+ # body = "(" (body-type-1part / body-type-mpart) ")"
1053
+ def body
1054
+ @lex_state = EXPR_DATA
1055
+ lpar; result = peek_lpar? ? body_type_mpart : body_type_1part; rpar
1056
+ result
1057
+ ensure
1058
+ @lex_state = EXPR_BEG
607
1059
  end
1060
+ alias lookahead_body? lookahead_lpar?
608
1061
 
609
- def body_fld_lang
610
- token = lookahead
611
- if token.symbol == T_LPAR
612
- shift_token
613
- result = []
614
- while true
615
- token = lookahead
616
- case token.symbol
617
- when T_RPAR
618
- shift_token
619
- return result
620
- when T_SPACE
621
- shift_token
622
- end
623
- result.push(case_insensitive_string)
624
- end
625
- else
626
- lang = nstring
627
- if lang
628
- return lang.upcase
629
- else
630
- return lang
631
- end
632
- end
1062
+ # RFC-3501 & RFC9051:
1063
+ # body-type-1part = (body-type-basic / body-type-msg / body-type-text)
1064
+ # [SP body-ext-1part]
1065
+ def body_type_1part
1066
+ # This regexp peek is a performance optimization.
1067
+ # The lookahead fallback would work fine too.
1068
+ m = peek_re(/\G(?:
1069
+ (?<TEXT> "TEXT" \s "[^"]+" )
1070
+ |(?<MESSAGE> "MESSAGE" \s "(?:RFC822|GLOBAL)" )
1071
+ |(?<BASIC> "[^"]+" \s "[^"]+" )
1072
+ |(?<MIXED> "MIXED" )
1073
+ )/nix)
1074
+ choice = m&.named_captures&.compact&.keys&.first
1075
+ # In practice, the following line should never be used. But the ABNF
1076
+ # *does* allow literals, and this will handle them.
1077
+ choice ||= lookahead_case_insensitive__string!
1078
+ case choice
1079
+ when "BASIC" then body_type_basic # => BodyTypeBasic
1080
+ when "MESSAGE" then body_type_msg # => BodyTypeMessage | BodyTypeBasic
1081
+ when "TEXT" then body_type_text # => BodyTypeText
1082
+ when "MIXED" then body_type_mixed # => BodyTypeMultipart (server bug)
1083
+ else body_type_basic # might be a bug; server's or ours?
1084
+ end
1085
+ end
1086
+
1087
+ # RFC-3501 & RFC9051:
1088
+ # body-type-basic = media-basic SP body-fields
1089
+ def body_type_basic
1090
+ type = media_basic # n.b. "basic" type isn't enforced here
1091
+ if lookahead_rpar? then return BodyTypeBasic.new(*type) end # invalid
1092
+ SP!; flds = body_fields
1093
+ SP? and exts = body_ext_1part
1094
+ BodyTypeBasic.new(*type, *flds, *exts)
633
1095
  end
634
1096
 
635
- def body_extensions
636
- result = []
637
- while true
638
- token = lookahead
639
- case token.symbol
640
- when T_RPAR
641
- return result
642
- when T_SPACE
643
- shift_token
644
- end
645
- result.push(body_extension)
646
- end
1097
+ # RFC-3501 & RFC-9051:
1098
+ # body-type-text = media-text SP body-fields SP body-fld-lines
1099
+ def body_type_text
1100
+ type = media_text
1101
+ SP!; flds = body_fields
1102
+ SP!; lines = body_fld_lines
1103
+ SP? and exts = body_ext_1part
1104
+ BodyTypeText.new(*type, *flds, lines, *exts)
647
1105
  end
648
1106
 
649
- def body_extension
650
- token = lookahead
651
- case token.symbol
652
- when T_LPAR
653
- shift_token
654
- result = body_extensions
655
- match(T_RPAR)
656
- return result
657
- when T_NUMBER
658
- return number
659
- else
660
- return nstring
661
- end
1107
+ # RFC-3501 & RFC-9051:
1108
+ # body-type-msg = media-message SP body-fields SP envelope
1109
+ # SP body SP body-fld-lines
1110
+ def body_type_msg
1111
+ # n.b. "message/rfc822" type isn't enforced here
1112
+ type = media_message
1113
+ SP!; flds = body_fields
1114
+
1115
+ # Sometimes servers send body-type-basic when body-type-msg should be.
1116
+ # E.g: when a message/rfc822 part has "Content-Disposition: attachment".
1117
+ #
1118
+ # * SP "(" --> SP envelope --> continue as body-type-msg
1119
+ # * ")" --> no body-ext-1part --> completed body-type-basic
1120
+ # * SP nstring --> SP body-fld-md5
1121
+ # --> SP body-ext-1part --> continue as body-type-basic
1122
+ #
1123
+ # It's probably better to return BodyTypeBasic---even for
1124
+ # "message/rfc822"---than BodyTypeMessage with invalid fields.
1125
+ unless peek_str?(" (")
1126
+ SP? and exts = body_ext_1part
1127
+ return BodyTypeBasic.new(*type, *flds, *exts)
1128
+ end
1129
+
1130
+ SP!; env = envelope
1131
+ SP!; bdy = body
1132
+ SP!; lines = body_fld_lines
1133
+ SP? and exts = body_ext_1part
1134
+ BodyTypeMessage.new(*type, *flds, env, bdy, lines, *exts)
1135
+ end
1136
+
1137
+ # This is a malformed body-type-mpart with no subparts.
1138
+ def body_type_mixed
1139
+ # warn "malformed body-type-mpart: multipart/mixed with no parts."
1140
+ type = media_subtype # => "MIXED"
1141
+ SP? and exts = body_ext_mpart
1142
+ BodyTypeMultipart.new("MULTIPART", type, nil, *exts)
662
1143
  end
663
1144
 
664
- def section
665
- str = String.new
666
- token = match(T_LBRA)
667
- str.concat(token.value)
668
- token = match(T_ATOM, T_NUMBER, T_RBRA)
669
- if token.symbol == T_RBRA
670
- str.concat(token.value)
671
- return str
672
- end
673
- str.concat(token.value)
674
- token = lookahead
675
- if token.symbol == T_SPACE
676
- shift_token
677
- str.concat(token.value)
678
- token = match(T_LPAR)
679
- str.concat(token.value)
680
- while true
681
- token = lookahead
682
- case token.symbol
683
- when T_RPAR
684
- str.concat(token.value)
685
- shift_token
686
- break
687
- when T_SPACE
688
- shift_token
689
- str.concat(token.value)
690
- end
691
- str.concat(format_string(astring))
692
- end
693
- end
694
- token = match(T_RBRA)
695
- str.concat(token.value)
696
- return str
697
- end
698
-
699
- def format_string(str)
700
- case str
701
- when ""
702
- return '""'
703
- when /[\x80-\xff\r\n]/n
704
- # literal
705
- return "{" + str.bytesize.to_s + "}" + CRLF + str
706
- when /[(){ \x00-\x1f\x7f%*"\\]/n
707
- # quoted string
708
- return '"' + str.gsub(/["\\]/n, "\\\\\\&") + '"'
709
- else
710
- # atom
711
- return str
712
- end
1145
+ # RFC-3501 & RFC-9051:
1146
+ # body-type-mpart = 1*body SP media-subtype
1147
+ # [SP body-ext-mpart]
1148
+ def body_type_mpart
1149
+ parts = [body]; parts << body until SP?; msubtype = media_subtype
1150
+ SP? and exts = body_ext_mpart
1151
+ BodyTypeMultipart.new("MULTIPART", msubtype, parts, *exts)
713
1152
  end
714
1153
 
715
- def uid_data
716
- token = match(T_ATOM)
717
- name = token.value.upcase
718
- match(T_SPACE)
719
- return name, number
1154
+ # n.b. this handles both type and subtype
1155
+ #
1156
+ # RFC-3501 vs RFC-9051:
1157
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
1158
+ # "MESSAGE" /
1159
+ # "VIDEO") DQUOTE) / string) SP media-subtype
1160
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
1161
+ # "FONT" / "MESSAGE" / "MODEL" /
1162
+ # "VIDEO") DQUOTE) / string) SP media-subtype
1163
+ #
1164
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
1165
+ # DQUOTE "RFC822" DQUOTE
1166
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
1167
+ # DQUOTE ("RFC822" / "GLOBAL") DQUOTE
1168
+ #
1169
+ # RFC-3501 & RFC-9051:
1170
+ # media-text = DQUOTE "TEXT" DQUOTE SP media-subtype
1171
+ # media-subtype = string
1172
+ def media_type
1173
+ mtype = case_insensitive__string
1174
+ SP? or return mtype, nil # ??? quirky!
1175
+ msubtype = media_subtype
1176
+ return mtype, msubtype
720
1177
  end
721
1178
 
722
- def modseq_data
723
- token = match(T_ATOM)
724
- name = token.value.upcase
725
- match(T_SPACE)
726
- match(T_LPAR)
727
- modseq = number
728
- match(T_RPAR)
729
- return name, modseq
1179
+ # TODO: check types
1180
+ alias media_basic media_type # */* --- catchall
1181
+ alias media_message media_type # message/rfc822, message/global
1182
+ alias media_text media_type # text/*
1183
+
1184
+ alias media_subtype case_insensitive__string
1185
+
1186
+ # RFC-3501 & RFC-9051:
1187
+ # body-fields = body-fld-param SP body-fld-id SP body-fld-desc SP
1188
+ # body-fld-enc SP body-fld-octets
1189
+ def body_fields
1190
+ fields = []
1191
+ fields << body_fld_param; SP!
1192
+ fields << body_fld_id; SP!
1193
+ fields << body_fld_desc; SP!
1194
+ fields << body_fld_enc; SP!
1195
+ fields << body_fld_octets
1196
+ fields
730
1197
  end
731
1198
 
732
- def ignored_response
733
- while lookahead.symbol != T_CRLF
734
- shift_token
735
- end
736
- return IgnoredResponse.new(@str)
1199
+ # RFC3501, RFC9051:
1200
+ # body-fld-param = "(" string SP string *(SP string SP string) ")" / nil
1201
+ def body_fld_param
1202
+ quirky_SP? # See comments on test_bodystructure_extra_space
1203
+ return if NIL?
1204
+ param = {}
1205
+ lpar
1206
+ name = case_insensitive__string; SP!; param[name] = string
1207
+ while SP?
1208
+ name = case_insensitive__string; SP!; param[name] = string
1209
+ end
1210
+ rpar
1211
+ param
1212
+ end
1213
+
1214
+ # RFC2060
1215
+ # body_ext_1part ::= body_fld_md5 [SPACE body_fld_dsp
1216
+ # [SPACE body_fld_lang
1217
+ # [SPACE 1#body_extension]]]
1218
+ # ;; MUST NOT be returned on non-extensible
1219
+ # ;; "BODY" fetch
1220
+ # RFC3501 & RFC9051
1221
+ # body-ext-1part = body-fld-md5 [SP body-fld-dsp [SP body-fld-lang
1222
+ # [SP body-fld-loc *(SP body-extension)]]]
1223
+ # ; MUST NOT be returned on non-extensible
1224
+ # ; "BODY" fetch
1225
+ def body_ext_1part
1226
+ fields = []; fields << body_fld_md5
1227
+ SP? or return fields; fields << body_fld_dsp
1228
+ SP? or return fields; fields << body_fld_lang
1229
+ SP? or return fields; fields << body_fld_loc
1230
+ SP? or return fields; fields << body_extensions
1231
+ fields
1232
+ end
1233
+
1234
+ # RFC-2060:
1235
+ # body_ext_mpart = body_fld_param [SP body_fld_dsp SP body_fld_lang
1236
+ # [SP 1#body_extension]]
1237
+ # ;; MUST NOT be returned on non-extensible
1238
+ # ;; "BODY" fetch
1239
+ # RFC-3501 & RFC-9051:
1240
+ # body-ext-mpart = body-fld-param [SP body-fld-dsp [SP body-fld-lang
1241
+ # [SP body-fld-loc *(SP body-extension)]]]
1242
+ # ; MUST NOT be returned on non-extensible
1243
+ # ; "BODY" fetch
1244
+ def body_ext_mpart
1245
+ fields = []; fields << body_fld_param
1246
+ SP? or return fields; fields << body_fld_dsp
1247
+ SP? or return fields; fields << body_fld_lang
1248
+ SP? or return fields; fields << body_fld_loc
1249
+ SP? or return fields; fields << body_extensions
1250
+ fields
1251
+ end
1252
+
1253
+ alias body_fld_desc nstring
1254
+ alias body_fld_id nstring
1255
+ alias body_fld_loc nstring
1256
+ alias body_fld_lines number64 # number in 3501, number64 in 9051
1257
+ alias body_fld_md5 nstring
1258
+ alias body_fld_octets number
1259
+
1260
+ # RFC-3501 & RFC-9051:
1261
+ # body-fld-enc = (DQUOTE ("7BIT" / "8BIT" / "BINARY" / "BASE64"/
1262
+ # "QUOTED-PRINTABLE") DQUOTE) / string
1263
+ alias body_fld_enc case_insensitive__string
1264
+
1265
+ # body-fld-dsp = "(" string SP body-fld-param ")" / nil
1266
+ def body_fld_dsp
1267
+ return if NIL?
1268
+ lpar; dsp_type = case_insensitive__string
1269
+ SP!; param = body_fld_param
1270
+ rpar
1271
+ ContentDisposition.new(dsp_type, param)
737
1272
  end
738
1273
 
739
- def text_response
740
- token = match(T_ATOM)
741
- name = token.value.upcase
742
- match(T_SPACE)
743
- return UntaggedResponse.new(name, text)
1274
+ # body-fld-lang = nstring / "(" string *(SP string) ")"
1275
+ def body_fld_lang
1276
+ if lpar?
1277
+ result = [case_insensitive__string]
1278
+ result << case_insensitive__string while SP?
1279
+ rpar
1280
+ result
1281
+ else
1282
+ case_insensitive__nstring
1283
+ end
744
1284
  end
745
1285
 
746
- def flags_response
747
- token = match(T_ATOM)
748
- name = token.value.upcase
749
- match(T_SPACE)
750
- return UntaggedResponse.new(name, flag_list, @str)
1286
+ # body-extension *(SP body-extension)
1287
+ def body_extensions
1288
+ result = []
1289
+ result << body_extension; while SP? do result << body_extension end
1290
+ result
751
1291
  end
752
1292
 
753
- def list_response
754
- token = match(T_ATOM)
755
- name = token.value.upcase
756
- match(T_SPACE)
757
- return UntaggedResponse.new(name, mailbox_list, @str)
1293
+ # body-extension = nstring / number / number64 /
1294
+ # "(" body-extension *(SP body-extension) ")"
1295
+ # ; Future expansion. Client implementations
1296
+ # ; MUST accept body-extension fields. Server
1297
+ # ; implementations MUST NOT generate
1298
+ # ; body-extension fields except as defined by
1299
+ # ; future Standard or Standards Track
1300
+ # ; revisions of this specification.
1301
+ def body_extension
1302
+ if (uint = number64?) then uint
1303
+ elsif lpar? then exts = body_extensions; rpar; exts
1304
+ else nstring
1305
+ end
758
1306
  end
759
1307
 
1308
+ # section = "[" [section-spec] "]"
1309
+ def section
1310
+ str = +lbra
1311
+ str << section_spec unless peek_rbra?
1312
+ str << rbra
1313
+ end
1314
+
1315
+ # section-binary = "[" [section-part] "]"
1316
+ def section_binary
1317
+ str = +lbra
1318
+ str << section_part unless peek_rbra?
1319
+ str << rbra
1320
+ end
1321
+
1322
+ # section-spec = section-msgtext / (section-part ["." section-text])
1323
+ # section-msgtext = "HEADER" /
1324
+ # "HEADER.FIELDS" [".NOT"] SP header-list /
1325
+ # "TEXT"
1326
+ # ; top-level or MESSAGE/RFC822 or
1327
+ # ; MESSAGE/GLOBAL part
1328
+ # section-part = nz-number *("." nz-number)
1329
+ # ; body part reference.
1330
+ # ; Allows for accessing nested body parts.
1331
+ # section-text = section-msgtext / "MIME"
1332
+ # ; text other than actual body part (headers,
1333
+ # ; etc.)
1334
+ #
1335
+ # n.b: we could "cheat" here and just grab all text inside the brackets,
1336
+ # but literals would need special treatment.
1337
+ def section_spec
1338
+ str = "".b
1339
+ str << atom # grabs everything up to "SP header-list" or "]"
1340
+ str << " " << header_list if SP?
1341
+ str
1342
+ end
1343
+
1344
+ # header-list = "(" header-fld-name *(SP header-fld-name) ")"
1345
+ def header_list
1346
+ str = +""
1347
+ str << lpar << header_fld_name
1348
+ str << " " << header_fld_name while SP?
1349
+ str << rpar
1350
+ end
1351
+
1352
+ # section-part = nz-number *("." nz-number)
1353
+ # ; body part reference.
1354
+ # ; Allows for accessing nested body parts.
1355
+ alias section_part atom
1356
+
1357
+ # RFC3501 & RFC9051:
1358
+ # header-fld-name = astring
1359
+ #
1360
+ # NOTE: Previously, Net::IMAP recreated the raw original source string.
1361
+ # Now, it returns the decoded astring value. Although this is technically
1362
+ # incompatible, it should almost never make a difference: all standard
1363
+ # header field names are valid atoms:
1364
+ #
1365
+ # https://www.iana.org/assignments/message-headers/message-headers.xhtml
1366
+ #
1367
+ # See also RFC5233:
1368
+ # optional-field = field-name ":" unstructured CRLF
1369
+ # field-name = 1*ftext
1370
+ # ftext = %d33-57 / ; Printable US-ASCII
1371
+ # %d59-126 ; characters not including
1372
+ # ; ":".
1373
+ alias header_fld_name astring
1374
+
1375
+ # mailbox-data = "FLAGS" SP flag-list / "LIST" SP mailbox-list /
1376
+ # "LSUB" SP mailbox-list / "SEARCH" *(SP nz-number) /
1377
+ # "STATUS" SP mailbox SP "(" [status-att-list] ")" /
1378
+ # number SP "EXISTS" / number SP "RECENT"
1379
+
1380
+ def mailbox_data__flags
1381
+ name = label("FLAGS")
1382
+ SP!
1383
+ UntaggedResponse.new(name, flag_list, @str)
1384
+ end
1385
+
1386
+ def mailbox_data__list
1387
+ name = label_in("LIST", "LSUB", "XLIST")
1388
+ SP!
1389
+ UntaggedResponse.new(name, mailbox_list, @str)
1390
+ end
1391
+ alias mailbox_data__lsub mailbox_data__list
1392
+ alias mailbox_data__xlist mailbox_data__list
1393
+
1394
+ # mailbox-list = "(" [mbx-list-flags] ")" SP
1395
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil) SP mailbox
1396
+ # [SP mbox-list-extended]
1397
+ # ; This is the list information pointed to by the ABNF
1398
+ # ; item "mailbox-data", which is defined above
760
1399
  def mailbox_list
761
- attr = flag_list
762
- match(T_SPACE)
763
- token = match(T_QUOTED, T_NIL)
764
- if token.symbol == T_NIL
765
- delim = nil
766
- else
767
- delim = token.value
768
- end
769
- match(T_SPACE)
770
- name = astring
771
- return MailboxList.new(attr, delim, name)
1400
+ lpar; attr = peek_rpar? ? [] : mbx_list_flags; rpar
1401
+ SP!; delim = nquoted
1402
+ SP!; name = mailbox
1403
+ # TODO: mbox-list-extended
1404
+ MailboxList.new(attr, delim, name)
772
1405
  end
773
1406
 
774
- def getquota_response
1407
+ def quota_response
775
1408
  # If quota never established, get back
776
1409
  # `NO Quota root does not exist'.
777
1410
  # If quota removed, get `()' after the
@@ -804,7 +1437,7 @@ module Net
804
1437
  end
805
1438
  end
806
1439
 
807
- def getquotaroot_response
1440
+ def quotaroot_response
808
1441
  # Similar to getquota, but only admin can use getquota.
809
1442
  token = match(T_ATOM)
810
1443
  name = token.value.upcase
@@ -821,7 +1454,8 @@ module Net
821
1454
  return UntaggedResponse.new(name, data, @str)
822
1455
  end
823
1456
 
824
- def getacl_response
1457
+ # acl-data = "ACL" SP mailbox *(SP identifier SP rights)
1458
+ def acl_data
825
1459
  token = match(T_ATOM)
826
1460
  name = token.value.upcase
827
1461
  match(T_SPACE)
@@ -839,157 +1473,309 @@ module Net
839
1473
  shift_token
840
1474
  end
841
1475
  user = astring
842
- match(T_SPACE)
843
- rights = astring
844
- data.push(MailboxACLItem.new(user, rights, mailbox))
845
- end
846
- end
847
- return UntaggedResponse.new(name, data, @str)
848
- end
849
-
850
- def search_response
851
- token = match(T_ATOM)
852
- name = token.value.upcase
853
- token = lookahead
854
- if token.symbol == T_SPACE
855
- shift_token
856
- data = []
857
- while true
858
- token = lookahead
859
- case token.symbol
860
- when T_CRLF
861
- break
862
- when T_SPACE
863
- shift_token
864
- when T_NUMBER
865
- data.push(number)
866
- when T_LPAR
867
- # TODO: include the MODSEQ value in a response
868
- shift_token
869
- match(T_ATOM)
870
- match(T_SPACE)
871
- match(T_NUMBER)
872
- match(T_RPAR)
873
- end
874
- end
875
- else
876
- data = []
877
- end
878
- return UntaggedResponse.new(name, data, @str)
879
- end
880
-
881
- def thread_response
882
- token = match(T_ATOM)
883
- name = token.value.upcase
884
- token = lookahead
885
-
886
- if token.symbol == T_SPACE
887
- threads = []
888
-
889
- while true
890
- shift_token
891
- token = lookahead
892
-
893
- case token.symbol
894
- when T_LPAR
895
- threads << thread_branch(token)
896
- when T_CRLF
897
- break
898
- end
899
- end
900
- else
901
- # no member
902
- threads = []
903
- end
904
-
905
- return UntaggedResponse.new(name, threads, @str)
906
- end
907
-
908
- def thread_branch(token)
909
- rootmember = nil
910
- lastmember = nil
911
-
912
- while true
913
- shift_token # ignore first T_LPAR
914
- token = lookahead
915
-
916
- case token.symbol
917
- when T_NUMBER
918
- # new member
919
- newmember = ThreadMember.new(number, [])
920
- if rootmember.nil?
921
- rootmember = newmember
922
- else
923
- lastmember.children << newmember
924
- end
925
- lastmember = newmember
926
- when T_SPACE
927
- # do nothing
928
- when T_LPAR
929
- if rootmember.nil?
930
- # dummy member
931
- lastmember = rootmember = ThreadMember.new(nil, [])
932
- end
933
-
934
- lastmember.children << thread_branch(token)
935
- when T_RPAR
936
- break
1476
+ match(T_SPACE)
1477
+ rights = astring
1478
+ data.push(MailboxACLItem.new(user, rights, mailbox))
937
1479
  end
938
1480
  end
939
-
940
- return rootmember
1481
+ return UntaggedResponse.new(name, data, @str)
941
1482
  end
942
1483
 
943
- def status_response
944
- token = match(T_ATOM)
945
- name = token.value.upcase
946
- match(T_SPACE)
947
- mailbox = astring
948
- match(T_SPACE)
949
- match(T_LPAR)
950
- attr = {}
951
- while true
952
- token = lookahead
953
- case token.symbol
954
- when T_RPAR
955
- shift_token
956
- break
957
- when T_SPACE
958
- shift_token
1484
+ # RFC3501:
1485
+ # mailbox-data = "SEARCH" *(SP nz-number) / ...
1486
+ # RFC5256: SORT
1487
+ # sort-data = "SORT" *(SP nz-number)
1488
+ # RFC7162: CONDSTORE, QRESYNC
1489
+ # mailbox-data =/ "SEARCH" [1*(SP nz-number) SP
1490
+ # search-sort-mod-seq]
1491
+ # sort-data = "SORT" [1*(SP nz-number) SP
1492
+ # search-sort-mod-seq]
1493
+ # ; Updates the SORT response from RFC 5256.
1494
+ # search-sort-mod-seq = "(" "MODSEQ" SP mod-sequence-value ")"
1495
+ # RFC9051:
1496
+ # mailbox-data = obsolete-search-response / ...
1497
+ # obsolete-search-response = "SEARCH" *(SP nz-number)
1498
+ def mailbox_data__search
1499
+ name = label_in("SEARCH", "SORT")
1500
+ data = []
1501
+ while _ = SP? && nz_number? do data << _ end
1502
+ if lpar?
1503
+ label("MODSEQ"); SP!
1504
+ modseq = mod_sequence_value
1505
+ rpar
1506
+ end
1507
+ data = SearchResult.new(data, modseq: modseq)
1508
+ UntaggedResponse.new(name, data, @str)
1509
+ end
1510
+ alias sort_data mailbox_data__search
1511
+
1512
+ # esearch-response = "ESEARCH" [search-correlator] [SP "UID"]
1513
+ # *(SP search-return-data)
1514
+ # ;; Note that SEARCH and ESEARCH responses
1515
+ # ;; SHOULD be mutually exclusive,
1516
+ # ;; i.e., only one of the response types
1517
+ # ;; should be
1518
+ # ;; returned as a result of a command.
1519
+ # esearch-response = "ESEARCH" [search-correlator] [SP "UID"]
1520
+ # *(SP search-return-data)
1521
+ # ; ESEARCH response replaces SEARCH response
1522
+ # ; from IMAP4rev1.
1523
+ # search-correlator = SP "(" "TAG" SP tag-string ")"
1524
+ def esearch_response
1525
+ name = label("ESEARCH")
1526
+ tag = search_correlator if peek_str?(" (")
1527
+ uid = peek_re?(/\G UID\b/i) && (SP!; label("UID"); true)
1528
+ data = []
1529
+ data << search_return_data while SP?
1530
+ esearch = ESearchResult.new(tag, uid, data)
1531
+ UntaggedResponse.new(name, esearch, @str)
1532
+ end
1533
+
1534
+ # From RFC4731 (ESEARCH):
1535
+ # search-return-data = "MIN" SP nz-number /
1536
+ # "MAX" SP nz-number /
1537
+ # "ALL" SP sequence-set /
1538
+ # "COUNT" SP number /
1539
+ # search-ret-data-ext
1540
+ # ; All return data items conform to
1541
+ # ; search-ret-data-ext syntax.
1542
+ # search-ret-data-ext = search-modifier-name SP search-return-value
1543
+ # search-modifier-name = tagged-ext-label
1544
+ # search-return-value = tagged-ext-val
1545
+ #
1546
+ # From RFC4731 (ESEARCH):
1547
+ # search-return-data =/ "MODSEQ" SP mod-sequence-value
1548
+ #
1549
+ # From RFC9394 (PARTIAL):
1550
+ # search-return-data =/ ret-data-partial
1551
+ #
1552
+ def search_return_data
1553
+ label = search_modifier_name; SP!
1554
+ value =
1555
+ case label
1556
+ when "MIN" then nz_number
1557
+ when "MAX" then nz_number
1558
+ when "ALL" then sequence_set
1559
+ when "COUNT" then number
1560
+ when "MODSEQ" then mod_sequence_value # RFC7162: CONDSTORE
1561
+ when "PARTIAL" then ret_data_partial__value # RFC9394: PARTIAL
1562
+ else search_return_value
1563
+ end
1564
+ [label, value]
1565
+ end
1566
+
1567
+ # From RFC5267 (CONTEXT=SEARCH, CONTEXT=SORT) and RFC9394 (PARTIAL):
1568
+ # ret-data-partial = "PARTIAL"
1569
+ # SP "(" partial-range SP partial-results ")"
1570
+ def ret_data_partial__value
1571
+ lpar
1572
+ range = partial_range; SP!
1573
+ results = partial_results
1574
+ rpar
1575
+ ESearchResult::PartialResult.new(range, results)
1576
+ end
1577
+
1578
+ # partial-range = partial-range-first / partial-range-last
1579
+ # tagged-ext-simple =/ partial-range-last
1580
+ def partial_range
1581
+ case (str = atom)
1582
+ when Patterns::PARTIAL_RANGE_FIRST, Patterns::PARTIAL_RANGE_LAST
1583
+ min, max = [Integer($1), Integer($2)].minmax
1584
+ min..max
1585
+ else
1586
+ parse_error("unexpected atom %p, expected partial-range", str)
1587
+ end
1588
+ end
1589
+
1590
+ # partial-results = sequence-set / "NIL"
1591
+ # ;; <sequence-set> from [RFC3501].
1592
+ # ;; NIL indicates that no results correspond to
1593
+ # ;; the requested range.
1594
+ def partial_results; NIL? ? nil : sequence_set end
1595
+
1596
+ # search-modifier-name = tagged-ext-label
1597
+ alias search_modifier_name tagged_ext_label
1598
+
1599
+ # search-return-value = tagged-ext-val
1600
+ # ; Data for the returned search option.
1601
+ # ; A single "nz-number"/"number"/"number64" value
1602
+ # ; can be returned as an atom (i.e., without
1603
+ # ; quoting). A sequence-set can be returned
1604
+ # ; as an atom as well.
1605
+ def search_return_value; ExtensionData.new(tagged_ext_val) end
1606
+
1607
+ # search-correlator = SP "(" "TAG" SP tag-string ")"
1608
+ def search_correlator
1609
+ SP!; lpar; label("TAG"); SP!; tag = tag_string; rpar
1610
+ tag
1611
+ end
1612
+
1613
+ # tag-string = astring
1614
+ # ; <tag> represented as <astring>
1615
+ alias tag_string astring
1616
+
1617
+ # RFC5256: THREAD
1618
+ # thread-data = "THREAD" [SP 1*thread-list]
1619
+ def thread_data
1620
+ name = label("THREAD")
1621
+ threads = []
1622
+ if SP?
1623
+ threads << thread_list while lookahead_thread_list?
1624
+ end
1625
+ UntaggedResponse.new(name, threads, @str)
1626
+ end
1627
+
1628
+ alias lookahead_thread_list? lookahead_lpar?
1629
+ alias lookahead_thread_nested? lookahead_thread_list?
1630
+
1631
+ # RFC5256: THREAD
1632
+ # thread-list = "(" (thread-members / thread-nested) ")"
1633
+ def thread_list
1634
+ lpar
1635
+ thread = if lookahead_thread_nested?
1636
+ ThreadMember.new(nil, thread_nested)
1637
+ else
1638
+ thread_members
1639
+ end
1640
+ rpar
1641
+ thread
1642
+ end
1643
+
1644
+ # RFC5256: THREAD
1645
+ # thread-members = nz-number *(SP nz-number) [SP thread-nested]
1646
+ def thread_members
1647
+ members = []
1648
+ members << nz_number # thread root
1649
+ while SP?
1650
+ case lookahead!(T_NUMBER, T_LPAR).symbol
1651
+ when T_NUMBER then members << nz_number
1652
+ else nested = thread_nested; break
959
1653
  end
960
- token = match(T_ATOM)
961
- key = token.value.upcase
962
- match(T_SPACE)
963
- val = number
964
- attr[key] = val
965
1654
  end
966
- data = StatusData.new(mailbox, attr)
967
- return UntaggedResponse.new(name, data, @str)
1655
+ members.reverse.inject(nested || []) {|subthreads, number|
1656
+ [ThreadMember.new(number, subthreads)]
1657
+ }.first
1658
+ end
1659
+
1660
+ # RFC5256: THREAD
1661
+ # thread-nested = 2*thread-list
1662
+ def thread_nested
1663
+ nested = [thread_list, thread_list]
1664
+ while lookahead_thread_list? do nested << thread_list end
1665
+ nested
1666
+ end
1667
+
1668
+ # mailbox-data =/ "STATUS" SP mailbox SP "(" [status-att-list] ")"
1669
+ def mailbox_data__status
1670
+ resp_name = label("STATUS"); SP!
1671
+ mbox_name = mailbox; SP!
1672
+ lpar; attr = status_att_list; rpar
1673
+ UntaggedResponse.new(resp_name, StatusData.new(mbox_name, attr), @str)
1674
+ end
1675
+
1676
+ # RFC3501
1677
+ # status-att-list = status-att SP number *(SP status-att SP number)
1678
+ # RFC4466, RFC9051, and RFC3501 Errata
1679
+ # status-att-list = status-att-val *(SP status-att-val)
1680
+ def status_att_list
1681
+ attrs = [status_att_val]
1682
+ while SP? do attrs << status_att_val end
1683
+ attrs.to_h
1684
+ end
1685
+
1686
+ # RFC3501 Errata:
1687
+ # status-att-val = ("MESSAGES" SP number) / ("RECENT" SP number) /
1688
+ # ("UIDNEXT" SP nz-number) / ("UIDVALIDITY" SP nz-number) /
1689
+ # ("UNSEEN" SP number)
1690
+ # RFC4466:
1691
+ # status-att-val = ("MESSAGES" SP number) /
1692
+ # ("RECENT" SP number) /
1693
+ # ("UIDNEXT" SP nz-number) /
1694
+ # ("UIDVALIDITY" SP nz-number) /
1695
+ # ("UNSEEN" SP number)
1696
+ # ;; Extensions to the STATUS responses
1697
+ # ;; should extend this production.
1698
+ # ;; Extensions should use the generic
1699
+ # ;; syntax defined by tagged-ext.
1700
+ # RFC9051:
1701
+ # status-att-val = ("MESSAGES" SP number) /
1702
+ # ("UIDNEXT" SP nz-number) /
1703
+ # ("UIDVALIDITY" SP nz-number) /
1704
+ # ("UNSEEN" SP number) /
1705
+ # ("DELETED" SP number) /
1706
+ # ("SIZE" SP number64)
1707
+ # ; Extensions to the STATUS responses
1708
+ # ; should extend this production.
1709
+ # ; Extensions should use the generic
1710
+ # ; syntax defined by tagged-ext.
1711
+ # RFC7162:
1712
+ # status-att-val =/ "HIGHESTMODSEQ" SP mod-sequence-valzer
1713
+ # ;; Extends non-terminal defined in [RFC4466].
1714
+ # ;; Value 0 denotes that the mailbox doesn't
1715
+ # ;; support persistent mod-sequences
1716
+ # ;; as described in Section 3.1.2.2.
1717
+ # RFC7889:
1718
+ # status-att-val =/ "APPENDLIMIT" SP (number / nil)
1719
+ # ;; status-att-val is defined in RFC 4466
1720
+ # RFC8438:
1721
+ # status-att-val =/ "SIZE" SP number64
1722
+ # RFC8474:
1723
+ # status-att-val =/ "MAILBOXID" SP "(" objectid ")"
1724
+ # ; follows tagged-ext production from [RFC4466]
1725
+ def status_att_val
1726
+ key = tagged_ext_label
1727
+ SP!
1728
+ val =
1729
+ case key
1730
+ when "MESSAGES" then number # RFC3501, RFC9051
1731
+ when "UNSEEN" then number # RFC3501, RFC9051
1732
+ when "DELETED" then number # RFC3501, RFC9051
1733
+ when "UIDNEXT" then nz_number # RFC3501, RFC9051
1734
+ when "UIDVALIDITY" then nz_number # RFC3501, RFC9051
1735
+ when "RECENT" then number # RFC3501 (obsolete)
1736
+ when "SIZE" then number64 # RFC8483, RFC9051
1737
+ when "HIGHESTMODSEQ" then mod_sequence_valzer # RFC7162
1738
+ when "MAILBOXID" then parens__objectid # RFC8474
1739
+ else
1740
+ number? || ExtensionData.new(tagged_ext_val)
1741
+ end
1742
+ [key, val]
968
1743
  end
969
1744
 
970
- def capability_response
971
- token = match(T_ATOM)
972
- name = token.value.upcase
973
- match(T_SPACE)
974
- UntaggedResponse.new(name, capability_data, @str)
1745
+ # The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.
1746
+ # The grammar rule is used by both response-data and resp-text-code.
1747
+ # But this method only returns UntaggedResponse (response-data).
1748
+ #
1749
+ # RFC3501:
1750
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1751
+ # *(SP capability)
1752
+ # RFC9051:
1753
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1754
+ # *(SP capability)
1755
+ def capability_data__untagged
1756
+ UntaggedResponse.new label("CAPABILITY"), capability__list, @str
975
1757
  end
976
1758
 
977
- def capability_data
978
- data = []
979
- while true
980
- token = lookahead
981
- case token.symbol
982
- when T_CRLF, T_RBRA
983
- break
984
- when T_SPACE
985
- shift_token
986
- next
987
- end
988
- data.push(atom.upcase)
989
- end
990
- data
1759
+ # enable-data = "ENABLED" *(SP capability)
1760
+ def enable_data
1761
+ UntaggedResponse.new label("ENABLED"), capability__list, @str
1762
+ end
1763
+
1764
+ # As a workaround for buggy servers, allow a trailing SP:
1765
+ # *(SP capability) [SP]
1766
+ def capability__list
1767
+ list = []; while SP? && (capa = capability?) do list << capa end; list
991
1768
  end
992
1769
 
1770
+ alias resp_code__capability capability__list
1771
+
1772
+ # capability = ("AUTH=" auth-type) / atom
1773
+ # ; New capabilities MUST begin with "X" or be
1774
+ # ; registered with IANA as standard or
1775
+ # ; standards-track
1776
+ alias capability case_insensitive__atom
1777
+ alias capability? case_insensitive__atom?
1778
+
993
1779
  def id_response
994
1780
  token = match(T_ATOM)
995
1781
  name = token.value.upcase
@@ -1019,147 +1805,189 @@ module Net
1019
1805
  end
1020
1806
  end
1021
1807
 
1808
+ # namespace-response = "NAMESPACE" SP namespace
1809
+ # SP namespace SP namespace
1810
+ # ; The first Namespace is the Personal Namespace(s).
1811
+ # ; The second Namespace is the Other Users'
1812
+ # ; Namespace(s).
1813
+ # ; The third Namespace is the Shared Namespace(s).
1022
1814
  def namespace_response
1815
+ name = label("NAMESPACE")
1023
1816
  @lex_state = EXPR_DATA
1024
- token = lookahead
1025
- token = match(T_ATOM)
1026
- name = token.value.upcase
1027
- match(T_SPACE)
1028
- personal = namespaces
1029
- match(T_SPACE)
1030
- other = namespaces
1031
- match(T_SPACE)
1032
- shared = namespaces
1817
+ data = Namespaces.new((SP!; namespace),
1818
+ (SP!; namespace),
1819
+ (SP!; namespace))
1820
+ UntaggedResponse.new(name, data, @str)
1821
+ ensure
1033
1822
  @lex_state = EXPR_BEG
1034
- data = Namespaces.new(personal, other, shared)
1035
- return UntaggedResponse.new(name, data, @str)
1036
- end
1037
-
1038
- def namespaces
1039
- token = lookahead
1040
- # empty () is not allowed, so nil is functionally identical to empty.
1041
- data = []
1042
- if token.symbol == T_NIL
1043
- shift_token
1044
- else
1045
- match(T_LPAR)
1046
- loop do
1047
- data << namespace
1048
- break unless lookahead.symbol == T_SPACE
1049
- shift_token
1050
- end
1051
- match(T_RPAR)
1052
- end
1053
- data
1054
1823
  end
1055
1824
 
1825
+ # namespace = nil / "(" 1*namespace-descr ")"
1056
1826
  def namespace
1057
- match(T_LPAR)
1058
- prefix = match(T_QUOTED, T_LITERAL).value
1059
- match(T_SPACE)
1060
- delimiter = string
1827
+ NIL? and return []
1828
+ lpar
1829
+ list = [namespace_descr]
1830
+ list << namespace_descr until rpar?
1831
+ list
1832
+ end
1833
+
1834
+ # namespace-descr = "(" string SP
1835
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil)
1836
+ # [namespace-response-extensions] ")"
1837
+ def namespace_descr
1838
+ lpar
1839
+ prefix = string; SP!
1840
+ delimiter = nquoted # n.b: should only accept single char
1061
1841
  extensions = namespace_response_extensions
1062
- match(T_RPAR)
1842
+ rpar
1063
1843
  Namespace.new(prefix, delimiter, extensions)
1064
1844
  end
1065
1845
 
1846
+ # namespace-response-extensions = *namespace-response-extension
1847
+ # namespace-response-extension = SP string SP
1848
+ # "(" string *(SP string) ")"
1066
1849
  def namespace_response_extensions
1067
1850
  data = {}
1068
- token = lookahead
1069
- if token.symbol == T_SPACE
1070
- shift_token
1071
- name = match(T_QUOTED, T_LITERAL).value
1851
+ while SP?
1852
+ name = string; SP!
1853
+ lpar
1072
1854
  data[name] ||= []
1073
- match(T_SPACE)
1074
- match(T_LPAR)
1075
- loop do
1076
- data[name].push match(T_QUOTED, T_LITERAL).value
1077
- break unless lookahead.symbol == T_SPACE
1078
- shift_token
1079
- end
1080
- match(T_RPAR)
1855
+ data[name] << string
1856
+ data[name] << string while SP?
1857
+ rpar
1081
1858
  end
1082
1859
  data
1083
1860
  end
1084
1861
 
1085
- # text = 1*TEXT-CHAR
1086
- # TEXT-CHAR = <any CHAR except CR and LF>
1862
+ # TEXT-CHAR = <any CHAR except CR and LF>
1863
+ # RFC3501:
1864
+ # text = 1*TEXT-CHAR
1865
+ # RFC9051:
1866
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
1867
+ # ; Non-ASCII text can only be returned
1868
+ # ; after ENABLE IMAP4rev2 command
1087
1869
  def text
1088
- match(T_TEXT, lex_state: EXPR_TEXT).value
1870
+ match_re(TEXT_REGEXP, "text")[0].force_encoding("UTF-8")
1871
+ end
1872
+
1873
+ # an "accept" versiun of #text
1874
+ def text?
1875
+ accept_re(TEXT_REGEXP)&.[](0)&.force_encoding("UTF-8")
1089
1876
  end
1090
1877
 
1091
- # resp-text = ["[" resp-text-code "]" SP] text
1878
+ # RFC3501:
1879
+ # resp-text = ["[" resp-text-code "]" SP] text
1880
+ # RFC9051:
1881
+ # resp-text = ["[" resp-text-code "]" SP] [text]
1882
+ #
1883
+ # We leniently re-interpret this as
1884
+ # resp-text = ["[" resp-text-code "]" [SP [text]] / [text]
1092
1885
  def resp_text
1093
- token = match(T_LBRA, T_TEXT, lex_state: EXPR_RTEXT)
1094
- case token.symbol
1095
- when T_LBRA
1096
- code = resp_text_code
1097
- match(T_RBRA)
1098
- accept_space # violating RFC
1099
- ResponseText.new(code, text)
1100
- when T_TEXT
1101
- ResponseText.new(nil, token.value)
1886
+ if lbra?
1887
+ code = resp_text_code; rbra
1888
+ ResponseText.new(code, SP? && text? || "")
1889
+ else
1890
+ ResponseText.new(nil, text? || "")
1102
1891
  end
1103
1892
  end
1104
1893
 
1105
- # See https://www.rfc-editor.org/errata/rfc3501
1894
+ # RFC3501 (See https://www.rfc-editor.org/errata/rfc3501):
1895
+ # resp-text-code = "ALERT" /
1896
+ # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1897
+ # capability-data / "PARSE" /
1898
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
1899
+ # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1900
+ # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1901
+ # "UNSEEN" SP nz-number /
1902
+ # atom [SP 1*<any TEXT-CHAR except "]">]
1903
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1904
+ # *(SP capability)
1106
1905
  #
1107
- # resp-text-code = "ALERT" /
1108
- # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1109
- # capability-data / "PARSE" /
1110
- # "PERMANENTFLAGS" SP "("
1111
- # [flag-perm *(SP flag-perm)] ")" /
1112
- # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1113
- # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1114
- # "UNSEEN" SP nz-number /
1115
- # atom [SP 1*<any TEXT-CHAR except "]">]
1906
+ # RFC5530:
1907
+ # resp-text-code =/ "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
1908
+ # "AUTHORIZATIONFAILED" / "EXPIRED" /
1909
+ # "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
1910
+ # "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
1911
+ # "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
1912
+ # "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
1913
+ # "NONEXISTENT"
1914
+ # RFC9051:
1915
+ # resp-text-code = "ALERT" /
1916
+ # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1917
+ # capability-data / "PARSE" /
1918
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
1919
+ # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1920
+ # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1921
+ # resp-code-apnd / resp-code-copy / "UIDNOTSTICKY" /
1922
+ # "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
1923
+ # "AUTHORIZATIONFAILED" / "EXPIRED" /
1924
+ # "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
1925
+ # "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
1926
+ # "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
1927
+ # "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
1928
+ # "NONEXISTENT" / "NOTSAVED" / "HASCHILDREN" /
1929
+ # "CLOSED" /
1930
+ # "UNKNOWN-CTE" /
1931
+ # atom [SP 1*<any TEXT-CHAR except "]">]
1932
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1933
+ # *(SP capability)
1116
1934
  #
1117
- # +UIDPLUS+ ABNF:: https://www.rfc-editor.org/rfc/rfc4315.html#section-4
1118
- # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
1935
+ # RFC4315 (UIDPLUS), RFC9051 (IMAP4rev2):
1936
+ # resp-code-apnd = "APPENDUID" SP nz-number SP append-uid
1937
+ # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
1938
+ # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
1939
+ #
1940
+ # RFC7162 (CONDSTORE):
1941
+ # resp-text-code =/ "HIGHESTMODSEQ" SP mod-sequence-value /
1942
+ # "NOMODSEQ" /
1943
+ # "MODIFIED" SP sequence-set
1944
+ # RFC7162 (QRESYNC):
1945
+ # resp-text-code =/ "CLOSED"
1946
+ #
1947
+ # RFC8474: OBJECTID
1948
+ # resp-text-code =/ "MAILBOXID" SP "(" objectid ")"
1949
+ #
1950
+ # RFC9586: UIDONLY
1951
+ # resp-text-code =/ "UIDREQUIRED"
1119
1952
  def resp_text_code
1120
- token = match(T_ATOM)
1121
- name = token.value.upcase
1122
- case name
1123
- when /\A(?:ALERT|PARSE|READ-ONLY|READ-WRITE|TRYCREATE|NOMODSEQ)\z/n
1124
- result = ResponseCode.new(name, nil)
1125
- when /\A(?:BADCHARSET)\z/n
1126
- result = ResponseCode.new(name, charset_list)
1127
- when /\A(?:CAPABILITY)\z/ni
1128
- result = ResponseCode.new(name, capability_data)
1129
- when /\A(?:PERMANENTFLAGS)\z/n
1130
- match(T_SPACE)
1131
- result = ResponseCode.new(name, flag_list)
1132
- when /\A(?:UIDVALIDITY|UIDNEXT|UNSEEN)\z/n
1133
- match(T_SPACE)
1134
- result = ResponseCode.new(name, number)
1135
- when /\A(?:APPENDUID)\z/n
1136
- result = ResponseCode.new(name, resp_code_apnd__data)
1137
- when /\A(?:COPYUID)\z/n
1138
- result = ResponseCode.new(name, resp_code_copy__data)
1139
- else
1140
- token = lookahead
1141
- if token.symbol == T_SPACE
1142
- shift_token
1143
- token = match(T_TEXT, lex_state: EXPR_CTEXT)
1144
- result = ResponseCode.new(name, token.value)
1953
+ name = resp_text_code__name
1954
+ data =
1955
+ case name
1956
+ when "CAPABILITY" then resp_code__capability
1957
+ when "PERMANENTFLAGS" then SP? ? flag_perm__list : []
1958
+ when "UIDNEXT" then SP!; nz_number
1959
+ when "UIDVALIDITY" then SP!; nz_number
1960
+ when "UNSEEN" then SP!; nz_number # rev1 only
1961
+ when "APPENDUID" then SP!; resp_code_apnd__data # rev2, UIDPLUS
1962
+ when "COPYUID" then SP!; resp_code_copy__data # rev2, UIDPLUS
1963
+ when "BADCHARSET" then SP? ? charset__list : []
1964
+ when "ALERT", "PARSE", "READ-ONLY", "READ-WRITE", "TRYCREATE",
1965
+ "UNAVAILABLE", "AUTHENTICATIONFAILED", "AUTHORIZATIONFAILED",
1966
+ "EXPIRED", "PRIVACYREQUIRED", "CONTACTADMIN", "NOPERM", "INUSE",
1967
+ "EXPUNGEISSUED", "CORRUPTION", "SERVERBUG", "CLIENTBUG", "CANNOT",
1968
+ "LIMIT", "OVERQUOTA", "ALREADYEXISTS", "NONEXISTENT", "CLOSED",
1969
+ "NOTSAVED", "UIDNOTSTICKY", "UNKNOWN-CTE", "HASCHILDREN"
1970
+ when "NOMODSEQ" then nil # CONDSTORE
1971
+ when "HIGHESTMODSEQ" then SP!; mod_sequence_value # CONDSTORE
1972
+ when "MODIFIED" then SP!; sequence_set # CONDSTORE
1973
+ when "MAILBOXID" then SP!; parens__objectid # RFC8474: OBJECTID
1974
+ when "UIDREQUIRED" then # RFC9586: UIDONLY
1145
1975
  else
1146
- result = ResponseCode.new(name, nil)
1976
+ SP? and text_chars_except_rbra
1147
1977
  end
1148
- end
1149
- return result
1978
+ ResponseCode.new(name, data)
1150
1979
  end
1151
1980
 
1152
- def charset_list
1153
- result = []
1154
- if accept(T_SPACE)
1155
- match(T_LPAR)
1156
- result << charset
1157
- while accept(T_SPACE)
1158
- result << charset
1159
- end
1160
- match(T_RPAR)
1161
- end
1162
- result
1981
+ alias resp_text_code__name case_insensitive__atom
1982
+
1983
+ # 1*<any TEXT-CHAR except "]">
1984
+ def text_chars_except_rbra
1985
+ match_re(CTEXT_REGEXP, '1*<any TEXT-CHAR except "]">')[0]
1986
+ end
1987
+
1988
+ # "(" charset *(SP charset) ")"
1989
+ def charset__list
1990
+ lpar; list = [charset]; while SP? do list << charset end; rpar; list
1163
1991
  end
1164
1992
 
1165
1993
  # already matched: "APPENDUID"
@@ -1173,198 +2001,153 @@ module Net
1173
2001
  #
1174
2002
  # n.b, uniqueid ⊂ uid-set. To avoid inconsistent return types, we always
1175
2003
  # match uid_set even if that returns a single-member array.
1176
- #
1177
2004
  def resp_code_apnd__data
1178
- match(T_SPACE); validity = number
1179
- match(T_SPACE); dst_uids = uid_set # uniqueid ⊂ uid-set
1180
- UIDPlusData.new(validity, nil, dst_uids)
2005
+ validity = number; SP!
2006
+ dst_uids = uid_set # uniqueid ⊂ uid-set
2007
+ AppendUID(validity, dst_uids)
1181
2008
  end
1182
2009
 
1183
2010
  # already matched: "COPYUID"
1184
2011
  #
1185
2012
  # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
1186
2013
  def resp_code_copy__data
1187
- match(T_SPACE); validity = number
1188
- match(T_SPACE); src_uids = uid_set
1189
- match(T_SPACE); dst_uids = uid_set
1190
- UIDPlusData.new(validity, src_uids, dst_uids)
1191
- end
1192
-
1193
- def address_list
1194
- token = lookahead
1195
- if token.symbol == T_NIL
1196
- shift_token
1197
- return nil
1198
- else
1199
- result = []
1200
- match(T_LPAR)
1201
- while true
1202
- token = lookahead
1203
- case token.symbol
1204
- when T_RPAR
1205
- shift_token
1206
- break
1207
- when T_SPACE
1208
- shift_token
1209
- end
1210
- result.push(address)
1211
- end
1212
- return result
1213
- end
1214
- end
1215
-
1216
- ADDRESS_REGEXP = /\G\
1217
- (?# 1: NAME )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1218
- (?# 2: ROUTE )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1219
- (?# 3: MAILBOX )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1220
- (?# 4: HOST )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)")\
1221
- \)/ni
1222
-
2014
+ validity = number; SP!
2015
+ src_uids = uid_set; SP!
2016
+ dst_uids = uid_set
2017
+ CopyUID(validity, src_uids, dst_uids)
2018
+ end
2019
+
2020
+ def AppendUID(...) DeprecatedUIDPlus(...) || AppendUIDData.new(...) end
2021
+ def CopyUID(...) DeprecatedUIDPlus(...) || CopyUIDData.new(...) end
2022
+
2023
+ # TODO: remove this code in the v0.6.0 release
2024
+ def DeprecatedUIDPlus(validity, src_uids = nil, dst_uids)
2025
+ return unless config.parser_use_deprecated_uidplus_data
2026
+ compact_uid_sets = [src_uids, dst_uids].compact
2027
+ count = compact_uid_sets.map { _1.count_with_duplicates }.max
2028
+ max = config.parser_max_deprecated_uidplus_data_size
2029
+ if count <= max
2030
+ src_uids &&= src_uids.each_ordered_number.to_a
2031
+ dst_uids = dst_uids.each_ordered_number.to_a
2032
+ UIDPlusData.new(validity, src_uids, dst_uids)
2033
+ elsif config.parser_use_deprecated_uidplus_data != :up_to_max_size
2034
+ parse_error("uid-set is too large: %d > %d", count, max)
2035
+ end
2036
+ end
2037
+
2038
+ ADDRESS_REGEXP = /\G
2039
+ \( (?: NIL | #{Patterns::QUOTED_rev2} ) # 1: NAME
2040
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 2: ROUTE
2041
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 3: MAILBOX
2042
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 4: HOST
2043
+ \)
2044
+ /nix
2045
+
2046
+ # address = "(" addr-name SP addr-adl SP addr-mailbox SP
2047
+ # addr-host ")"
2048
+ # addr-adl = nstring
2049
+ # addr-host = nstring
2050
+ # addr-mailbox = nstring
2051
+ # addr-name = nstring
1223
2052
  def address
1224
- match(T_LPAR)
1225
- if @str.index(ADDRESS_REGEXP, @pos)
1226
- # address does not include literal.
1227
- @pos = $~.end(0)
1228
- name = $1
1229
- route = $2
1230
- mailbox = $3
1231
- host = $4
1232
- for s in [name, route, mailbox, host]
1233
- if s
1234
- s.gsub!(/\\(["\\])/n, "\\1")
1235
- end
1236
- end
1237
- else
1238
- name = nstring
1239
- match(T_SPACE)
1240
- route = nstring
1241
- match(T_SPACE)
1242
- mailbox = nstring
1243
- match(T_SPACE)
1244
- host = nstring
1245
- match(T_RPAR)
1246
- end
1247
- return Address.new(name, route, mailbox, host)
1248
- end
1249
-
1250
- FLAG_REGEXP = /\
1251
- (?# FLAG )\\([^\x80-\xff(){ \x00-\x1f\x7f%"\\]+)|\
1252
- (?# ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\]+)/n
1253
-
2053
+ if (match = accept_re(ADDRESS_REGEXP))
2054
+ # note that "NIL" isn't captured by the regexp
2055
+ name, route, mailbox, host = match.captures
2056
+ .map { Patterns.unescape_quoted _1 }
2057
+ else # address may include literals
2058
+ lpar; name = addr_name
2059
+ SP!; route = addr_adl
2060
+ SP!; mailbox = addr_mailbox
2061
+ SP!; host = addr_host
2062
+ rpar
2063
+ end
2064
+ Address.new(name, route, mailbox, host)
2065
+ end
2066
+
2067
+ alias addr_adl nstring
2068
+ alias addr_host nstring
2069
+ alias addr_mailbox nstring
2070
+ alias addr_name nstring
2071
+
2072
+ # flag-list = "(" [flag *(SP flag)] ")"
1254
2073
  def flag_list
1255
- if @str.index(/\(([^)]*)\)/ni, @pos)
1256
- @pos = $~.end(0)
1257
- return $1.scan(FLAG_REGEXP).collect { |flag, atom|
1258
- if atom
1259
- atom
1260
- else
1261
- flag.capitalize.intern
1262
- end
1263
- }
2074
+ if (match = accept_re(Patterns::FLAG_LIST))
2075
+ match[1].split(nil)
2076
+ .map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
1264
2077
  else
1265
- parse_error("invalid flag list")
2078
+ quirky__flag_list "flags-list"
1266
2079
  end
1267
2080
  end
1268
2081
 
1269
- def nstring
1270
- token = lookahead
1271
- if token.symbol == T_NIL
1272
- shift_token
1273
- return nil
2082
+ # "(" [flag-perm *(SP flag-perm)] ")"
2083
+ def flag_perm__list
2084
+ if (match = accept_re(Patterns::FLAG_PERM_LIST))
2085
+ match[1].split(nil)
2086
+ .map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
1274
2087
  else
1275
- return string
2088
+ quirky__flag_list "PERMANENTFLAGS flag-perm list"
1276
2089
  end
1277
2090
  end
1278
2091
 
1279
- def astring
1280
- token = lookahead
1281
- if string_token?(token)
1282
- return string
1283
- else
1284
- return astring_chars
1285
- end
2092
+ # This allows illegal "]" in flag names (Gmail),
2093
+ # or "\*" in a FLAGS response (greenmail).
2094
+ def quirky__flag_list(name)
2095
+ match_re(Patterns::QUIRKY_FLAGS_LIST, "quirks mode #{name}")[1]
2096
+ .scan(Patterns::QUIRKY_FLAG)
2097
+ .map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
1286
2098
  end
1287
2099
 
1288
- def string
1289
- token = lookahead
1290
- if token.symbol == T_NIL
1291
- shift_token
1292
- return nil
1293
- end
1294
- token = match(T_QUOTED, T_LITERAL)
1295
- return token.value
2100
+ # See Patterns::MBX_LIST_FLAGS
2101
+ def mbx_list_flags
2102
+ match_re(Patterns::MBX_LIST_FLAGS, "mbx-list-flags")[1]
2103
+ .split(nil)
2104
+ .map! { _1.delete_prefix!("\\"); _1.capitalize.to_sym }
1296
2105
  end
1297
2106
 
1298
- STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL]
1299
-
1300
- def string_token?(token)
1301
- return STRING_TOKENS.include?(token.symbol)
1302
- end
2107
+ # See https://developers.google.com/gmail/imap/imap-extensions
2108
+ def x_gm_label; accept(T_BSLASH) ? atom.capitalize.to_sym : astring end
1303
2109
 
1304
- def case_insensitive_string
1305
- token = lookahead
1306
- if token.symbol == T_NIL
1307
- shift_token
1308
- return nil
1309
- end
1310
- token = match(T_QUOTED, T_LITERAL)
1311
- return token.value.upcase
2110
+ # See https://developers.google.com/gmail/imap/imap-extensions
2111
+ def x_gm_labels
2112
+ lpar; return [] if rpar?
2113
+ labels = []
2114
+ labels << x_gm_label
2115
+ labels << x_gm_label while SP?
2116
+ rpar
2117
+ labels
1312
2118
  end
1313
2119
 
1314
- # atom = 1*ATOM-CHAR
1315
- # ATOM-CHAR = <any CHAR except atom-specials>
1316
- ATOM_TOKENS = [
1317
- T_ATOM,
1318
- T_NUMBER,
1319
- T_NIL,
1320
- T_LBRA,
1321
- T_PLUS
1322
- ]
2120
+ # See https://www.rfc-editor.org/errata/rfc3501
2121
+ #
2122
+ # charset = atom / quoted
2123
+ def charset; quoted? || atom end
1323
2124
 
1324
- def atom
1325
- -combine_adjacent(*ATOM_TOKENS)
1326
- end
2125
+ # RFC7162:
2126
+ # mod-sequence-value = 1*DIGIT
2127
+ # ;; Positive unsigned 63-bit integer
2128
+ # ;; (mod-sequence)
2129
+ # ;; (1 <= n <= 9,223,372,036,854,775,807).
2130
+ alias mod_sequence_value nz_number64
1327
2131
 
1328
- # ASTRING-CHAR = ATOM-CHAR / resp-specials
1329
- # resp-specials = "]"
1330
- ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA]
2132
+ # RFC7162:
2133
+ # permsg-modsequence = mod-sequence-value
2134
+ # ;; Per-message mod-sequence.
2135
+ alias permsg_modsequence mod_sequence_value
1331
2136
 
1332
- def astring_chars
1333
- combine_adjacent(*ASTRING_CHARS_TOKENS)
1334
- end
2137
+ # RFC7162:
2138
+ # mod-sequence-valzer = "0" / mod-sequence-value
2139
+ alias mod_sequence_valzer number64
1335
2140
 
1336
- def combine_adjacent(*tokens)
1337
- result = "".b
1338
- while token = accept(*tokens)
1339
- result << token.value
1340
- end
1341
- if result.empty?
1342
- parse_error('unexpected token %s (expected %s)',
1343
- lookahead.symbol, args.join(" or "))
1344
- end
1345
- result
1346
- end
2141
+ def parens__modseq; lpar; _ = permsg_modsequence; rpar; _ end
1347
2142
 
1348
- # See https://www.rfc-editor.org/errata/rfc3501
1349
- #
1350
- # charset = atom / quoted
1351
- def charset
1352
- if token = accept(T_QUOTED)
1353
- token.value
1354
- else
1355
- atom
1356
- end
1357
- end
2143
+ # RFC8474:
2144
+ # objectid = 1*255(ALPHA / DIGIT / "_" / "-")
2145
+ # ; characters in object identifiers are case
2146
+ # ; significant
2147
+ alias objectid atom
1358
2148
 
1359
- def number
1360
- token = lookahead
1361
- if token.symbol == T_NIL
1362
- shift_token
1363
- return nil
1364
- end
1365
- token = match(T_NUMBER)
1366
- return token.value.to_i
1367
- end
2149
+ def parens__objectid; lpar; _ = objectid; rpar; _ end
2150
+ def nparens__objectid; NIL? ? nil : parens__objectid end
1368
2151
 
1369
2152
  # RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
1370
2153
  # uid-set = (uniqueid / uid-range) *("," uid-set)
@@ -1375,15 +2158,9 @@ module Net
1375
2158
  # uniqueid = nz-number
1376
2159
  # ; Strictly ascending
1377
2160
  def uid_set
1378
- token = match(T_NUMBER, T_ATOM)
1379
- case token.symbol
1380
- when T_NUMBER then [Integer(token.value)]
1381
- when T_ATOM
1382
- token.value.split(",").flat_map {|range|
1383
- range = range.split(":").map {|uniqueid| Integer(uniqueid) }
1384
- range.size == 1 ? range : Range.new(range.min, range.max).to_a
1385
- }
1386
- end
2161
+ set = sequence_set
2162
+ parse_error("uid-set cannot contain '*'") if set.include_star?
2163
+ set
1387
2164
  end
1388
2165
 
1389
2166
  def nil_atom
@@ -1393,64 +2170,15 @@ module Net
1393
2170
 
1394
2171
  SPACES_REGEXP = /\G */n
1395
2172
 
1396
- # This advances @pos directly so it's safe before changing @lex_state.
1397
- def accept_space
1398
- if @token
1399
- shift_token if @token.symbol == T_SPACE
1400
- elsif @str[@pos] == " "
1401
- @pos += 1
1402
- end
1403
- end
1404
-
1405
2173
  # The RFC is very strict about this and usually we should be too.
1406
2174
  # But skipping spaces is usually a safe workaround for buggy servers.
1407
2175
  #
1408
2176
  # This advances @pos directly so it's safe before changing @lex_state.
1409
2177
  def accept_spaces
1410
- shift_token if @token&.symbol == T_SPACE
1411
- if @str.index(SPACES_REGEXP, @pos)
2178
+ return false unless SP?
2179
+ @str.index(SPACES_REGEXP, @pos) and
1412
2180
  @pos = $~.end(0)
1413
- end
1414
- end
1415
-
1416
- def match(*args, lex_state: @lex_state)
1417
- if @token && lex_state != @lex_state
1418
- parse_error("invalid lex_state change to %s with unconsumed token",
1419
- lex_state)
1420
- end
1421
- begin
1422
- @lex_state, original_lex_state = lex_state, @lex_state
1423
- token = lookahead
1424
- unless args.include?(token.symbol)
1425
- parse_error('unexpected token %s (expected %s)',
1426
- token.symbol.id2name,
1427
- args.collect {|i| i.id2name}.join(" or "))
1428
- end
1429
- shift_token
1430
- return token
1431
- ensure
1432
- @lex_state = original_lex_state
1433
- end
1434
- end
1435
-
1436
- # like match, but does not raise error on failure.
1437
- #
1438
- # returns and shifts token on successful match
1439
- # returns nil and leaves @token unshifted on no match
1440
- def accept(*args)
1441
- token = lookahead
1442
- if args.include?(token.symbol)
1443
- shift_token
1444
- token
1445
- end
1446
- end
1447
-
1448
- def lookahead
1449
- @token ||= next_token
1450
- end
1451
-
1452
- def shift_token
1453
- @token = nil
2181
+ true
1454
2182
  end
1455
2183
 
1456
2184
  def next_token
@@ -1461,38 +2189,46 @@ module Net
1461
2189
  if $1
1462
2190
  return Token.new(T_SPACE, $+)
1463
2191
  elsif $2
1464
- return Token.new(T_NIL, $+)
1465
- elsif $3
1466
- return Token.new(T_NUMBER, $+)
2192
+ len = $+.to_i
2193
+ val = @str[@pos, len]
2194
+ @pos += len
2195
+ return Token.new(T_LITERAL8, val)
2196
+ elsif $3 && $7
2197
+ # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
2198
+ return Token.new(T_ATOM, $3)
1467
2199
  elsif $4
1468
- return Token.new(T_ATOM, $+)
2200
+ return Token.new(T_NIL, $+)
1469
2201
  elsif $5
1470
- return Token.new(T_QUOTED,
1471
- $+.gsub(/\\(["\\])/n, "\\1"))
2202
+ return Token.new(T_NUMBER, $+)
1472
2203
  elsif $6
2204
+ return Token.new(T_PLUS, $+)
2205
+ elsif $8
2206
+ # match ATOM, without a NUMBER, NIL, or PLUS prefix
2207
+ return Token.new(T_ATOM, $+)
2208
+ elsif $9
2209
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
2210
+ elsif $10
1473
2211
  return Token.new(T_LPAR, $+)
1474
- elsif $7
2212
+ elsif $11
1475
2213
  return Token.new(T_RPAR, $+)
1476
- elsif $8
2214
+ elsif $12
1477
2215
  return Token.new(T_BSLASH, $+)
1478
- elsif $9
2216
+ elsif $13
1479
2217
  return Token.new(T_STAR, $+)
1480
- elsif $10
2218
+ elsif $14
1481
2219
  return Token.new(T_LBRA, $+)
1482
- elsif $11
2220
+ elsif $15
1483
2221
  return Token.new(T_RBRA, $+)
1484
- elsif $12
2222
+ elsif $16
1485
2223
  len = $+.to_i
1486
2224
  val = @str[@pos, len]
1487
2225
  @pos += len
1488
2226
  return Token.new(T_LITERAL, val)
1489
- elsif $13
1490
- return Token.new(T_PLUS, $+)
1491
- elsif $14
2227
+ elsif $17
1492
2228
  return Token.new(T_PERCENT, $+)
1493
- elsif $15
2229
+ elsif $18
1494
2230
  return Token.new(T_CRLF, $+)
1495
- elsif $16
2231
+ elsif $19
1496
2232
  return Token.new(T_EOF, $+)
1497
2233
  else
1498
2234
  parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")
@@ -1511,8 +2247,7 @@ module Net
1511
2247
  elsif $3
1512
2248
  return Token.new(T_NUMBER, $+)
1513
2249
  elsif $4
1514
- return Token.new(T_QUOTED,
1515
- $+.gsub(/\\(["\\])/n, "\\1"))
2250
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1516
2251
  elsif $5
1517
2252
  len = $+.to_i
1518
2253
  val = @str[@pos, len]
@@ -1529,63 +2264,11 @@ module Net
1529
2264
  @str.index(/\S*/n, @pos)
1530
2265
  parse_error("unknown token - %s", $&.dump)
1531
2266
  end
1532
- when EXPR_TEXT
1533
- if @str.index(TEXT_REGEXP, @pos)
1534
- @pos = $~.end(0)
1535
- if $1
1536
- return Token.new(T_TEXT, $+)
1537
- else
1538
- parse_error("[Net::IMAP BUG] TEXT_REGEXP is invalid")
1539
- end
1540
- else
1541
- @str.index(/\S*/n, @pos)
1542
- parse_error("unknown token - %s", $&.dump)
1543
- end
1544
- when EXPR_RTEXT
1545
- if @str.index(RTEXT_REGEXP, @pos)
1546
- @pos = $~.end(0)
1547
- if $1
1548
- return Token.new(T_LBRA, $+)
1549
- elsif $2
1550
- return Token.new(T_TEXT, $+)
1551
- else
1552
- parse_error("[Net::IMAP BUG] RTEXT_REGEXP is invalid")
1553
- end
1554
- else
1555
- @str.index(/\S*/n, @pos)
1556
- parse_error("unknown token - %s", $&.dump)
1557
- end
1558
- when EXPR_CTEXT
1559
- if @str.index(CTEXT_REGEXP, @pos)
1560
- @pos = $~.end(0)
1561
- if $1
1562
- return Token.new(T_TEXT, $+)
1563
- else
1564
- parse_error("[Net::IMAP BUG] CTEXT_REGEXP is invalid")
1565
- end
1566
- else
1567
- @str.index(/\S*/n, @pos) #/
1568
- parse_error("unknown token - %s", $&.dump)
1569
- end
1570
2267
  else
1571
2268
  parse_error("invalid @lex_state - %s", @lex_state.inspect)
1572
2269
  end
1573
2270
  end
1574
2271
 
1575
- def parse_error(fmt, *args)
1576
- if IMAP.debug
1577
- $stderr.printf("@str: %s\n", @str.dump)
1578
- $stderr.printf("@pos: %d\n", @pos)
1579
- $stderr.printf("@lex_state: %s\n", @lex_state)
1580
- if @token
1581
- $stderr.printf("@token.symbol: %s\n", @token.symbol)
1582
- $stderr.printf("@token.value: %s\n", @token.value.inspect)
1583
- end
1584
- end
1585
- raise ResponseParseError, format(fmt, *args)
1586
- end
1587
2272
  end
1588
-
1589
2273
  end
1590
-
1591
2274
  end