net-imap 0.3.7 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of net-imap might be problematic. Click here for more details.

Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/BSDL +22 -0
  3. data/COPYING +56 -0
  4. data/Gemfile +14 -0
  5. data/LICENSE.txt +3 -22
  6. data/README.md +25 -8
  7. data/Rakefile +0 -7
  8. data/docs/styles.css +72 -23
  9. data/lib/net/imap/authenticators.rb +26 -57
  10. data/lib/net/imap/command_data.rb +74 -54
  11. data/lib/net/imap/config/attr_accessors.rb +75 -0
  12. data/lib/net/imap/config/attr_inheritance.rb +90 -0
  13. data/lib/net/imap/config/attr_type_coercion.rb +61 -0
  14. data/lib/net/imap/config.rb +470 -0
  15. data/lib/net/imap/data_encoding.rb +18 -6
  16. data/lib/net/imap/data_lite.rb +226 -0
  17. data/lib/net/imap/deprecated_client_options.rb +142 -0
  18. data/lib/net/imap/errors.rb +27 -1
  19. data/lib/net/imap/esearch_result.rb +180 -0
  20. data/lib/net/imap/fetch_data.rb +597 -0
  21. data/lib/net/imap/flags.rb +1 -1
  22. data/lib/net/imap/response_data.rb +250 -440
  23. data/lib/net/imap/response_parser/parser_utils.rb +245 -0
  24. data/lib/net/imap/response_parser.rb +1867 -1184
  25. data/lib/net/imap/sasl/anonymous_authenticator.rb +69 -0
  26. data/lib/net/imap/sasl/authentication_exchange.rb +139 -0
  27. data/lib/net/imap/sasl/authenticators.rb +122 -0
  28. data/lib/net/imap/sasl/client_adapter.rb +123 -0
  29. data/lib/net/imap/{authenticators/cram_md5.rb → sasl/cram_md5_authenticator.rb} +24 -14
  30. data/lib/net/imap/sasl/digest_md5_authenticator.rb +342 -0
  31. data/lib/net/imap/sasl/external_authenticator.rb +83 -0
  32. data/lib/net/imap/sasl/gs2_header.rb +80 -0
  33. data/lib/net/imap/{authenticators/login.rb → sasl/login_authenticator.rb} +28 -18
  34. data/lib/net/imap/sasl/oauthbearer_authenticator.rb +199 -0
  35. data/lib/net/imap/sasl/plain_authenticator.rb +101 -0
  36. data/lib/net/imap/sasl/protocol_adapters.rb +101 -0
  37. data/lib/net/imap/sasl/scram_algorithm.rb +58 -0
  38. data/lib/net/imap/sasl/scram_authenticator.rb +287 -0
  39. data/lib/net/imap/sasl/stringprep.rb +6 -66
  40. data/lib/net/imap/sasl/xoauth2_authenticator.rb +106 -0
  41. data/lib/net/imap/sasl.rb +148 -44
  42. data/lib/net/imap/sasl_adapter.rb +20 -0
  43. data/lib/net/imap/search_result.rb +146 -0
  44. data/lib/net/imap/sequence_set.rb +1565 -0
  45. data/lib/net/imap/stringprep/nameprep.rb +70 -0
  46. data/lib/net/imap/stringprep/saslprep.rb +69 -0
  47. data/lib/net/imap/stringprep/saslprep_tables.rb +96 -0
  48. data/lib/net/imap/stringprep/tables.rb +146 -0
  49. data/lib/net/imap/stringprep/trace.rb +85 -0
  50. data/lib/net/imap/stringprep.rb +159 -0
  51. data/lib/net/imap/uidplus_data.rb +244 -0
  52. data/lib/net/imap/vanished_data.rb +56 -0
  53. data/lib/net/imap.rb +2090 -823
  54. data/net-imap.gemspec +7 -8
  55. data/rakelib/benchmarks.rake +91 -0
  56. data/rakelib/rfcs.rake +2 -0
  57. data/rakelib/saslprep.rake +4 -4
  58. data/rakelib/string_prep_tables_generator.rb +84 -60
  59. data/sample/net-imap.rb +167 -0
  60. metadata +45 -49
  61. data/.github/dependabot.yml +0 -6
  62. data/.github/workflows/test.yml +0 -38
  63. data/.gitignore +0 -10
  64. data/benchmarks/stringprep.yml +0 -65
  65. data/benchmarks/table-regexps.yml +0 -39
  66. data/lib/net/imap/authenticators/digest_md5.rb +0 -115
  67. data/lib/net/imap/authenticators/plain.rb +0 -41
  68. data/lib/net/imap/authenticators/xoauth2.rb +0 -20
  69. data/lib/net/imap/sasl/saslprep.rb +0 -55
  70. data/lib/net/imap/sasl/saslprep_tables.rb +0 -98
  71. data/lib/net/imap/sasl/stringprep_tables.rb +0 -153
@@ -1,18 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "errors"
4
+ require_relative "response_parser/parser_utils"
4
5
 
5
6
  module Net
6
7
  class IMAP < Protocol
7
8
 
8
9
  # Parses an \IMAP server response.
9
10
  class ResponseParser
10
- # :call-seq: Net::IMAP::ResponseParser.new -> Net::IMAP::ResponseParser
11
- def initialize
11
+ include ParserUtils
12
+ extend ParserUtils::Generator
13
+
14
+ attr_reader :config
15
+
16
+ # Creates a new ResponseParser.
17
+ #
18
+ # When +config+ is frozen or global, the parser #config inherits from it.
19
+ # Otherwise, +config+ will be used directly.
20
+ def initialize(config: Config.global)
12
21
  @str = nil
13
22
  @pos = nil
14
23
  @lex_state = nil
15
24
  @token = nil
25
+ @config = Config[config]
26
+ @config = @config.new if @config == Config.global || @config.frozen?
16
27
  end
17
28
 
18
29
  # :call-seq:
@@ -33,745 +44,1367 @@ module Net
33
44
 
34
45
  # :stopdoc:
35
46
 
36
- EXPR_BEG = :EXPR_BEG
37
- EXPR_DATA = :EXPR_DATA
38
- EXPR_TEXT = :EXPR_TEXT
39
- EXPR_RTEXT = :EXPR_RTEXT
40
- EXPR_CTEXT = :EXPR_CTEXT
41
-
42
- T_SPACE = :SPACE
43
- T_NIL = :NIL
44
- T_NUMBER = :NUMBER
45
- T_ATOM = :ATOM
46
- T_QUOTED = :QUOTED
47
- T_LPAR = :LPAR
48
- T_RPAR = :RPAR
49
- T_BSLASH = :BSLASH
50
- T_STAR = :STAR
51
- T_LBRA = :LBRA
52
- T_RBRA = :RBRA
53
- T_LITERAL = :LITERAL
54
- T_PLUS = :PLUS
55
- T_PERCENT = :PERCENT
56
- T_CRLF = :CRLF
57
- T_EOF = :EOF
58
- T_TEXT = :TEXT
59
-
47
+ EXPR_BEG = :EXPR_BEG # the default, used in most places
48
+ EXPR_DATA = :EXPR_DATA # envelope, body(structure), namespaces
49
+
50
+ T_SPACE = :SPACE # atom special
51
+ T_ATOM = :ATOM # atom (subset of astring chars)
52
+ T_NIL = :NIL # subset of atom and label
53
+ T_NUMBER = :NUMBER # subset of atom
54
+ T_LBRA = :LBRA # subset of atom
55
+ T_PLUS = :PLUS # subset of atom; tag special
56
+ T_RBRA = :RBRA # atom special; resp_special; valid astring char
57
+ T_QUOTED = :QUOTED # starts/end with atom special
58
+ T_BSLASH = :BSLASH # atom special; quoted special
59
+ T_LPAR = :LPAR # atom special; paren list delimiter
60
+ T_RPAR = :RPAR # atom special; paren list delimiter
61
+ T_STAR = :STAR # atom special; list wildcard
62
+ T_PERCENT = :PERCENT # atom special; list wildcard
63
+ T_LITERAL = :LITERAL # starts with atom special
64
+ T_LITERAL8 = :LITERAL8 # starts with atom char "~"
65
+ T_CRLF = :CRLF # atom special; text special; quoted special
66
+ T_TEXT = :TEXT # any char except CRLF
67
+ T_EOF = :EOF # end of response string
68
+
69
+ module ResponseConditions
70
+ OK = "OK"
71
+ NO = "NO"
72
+ BAD = "BAD"
73
+ BYE = "BYE"
74
+ PREAUTH = "PREAUTH"
75
+
76
+ RESP_COND_STATES = [OK, NO, BAD ].freeze
77
+ RESP_DATA_CONDS = [OK, NO, BAD, BYE, ].freeze
78
+ AUTH_CONDS = [OK, PREAUTH].freeze
79
+ GREETING_CONDS = [OK, BYE, PREAUTH].freeze
80
+ RESP_CONDS = [OK, NO, BAD, BYE, PREAUTH].freeze
81
+ end
82
+ include ResponseConditions
83
+
84
+ module Patterns
85
+
86
+ module CharClassSubtraction
87
+ refine Regexp do
88
+ def -(rhs); /[#{source}&&[^#{rhs.source}]]/n.freeze end
89
+ end
90
+ end
91
+ using CharClassSubtraction
92
+
93
+ # From RFC5234, "Augmented BNF for Syntax Specifications: ABNF"
94
+ # >>>
95
+ # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
96
+ # CHAR = %x01-7F
97
+ # CRLF = CR LF
98
+ # ; Internet standard newline
99
+ # CTL = %x00-1F / %x7F
100
+ # ; controls
101
+ # DIGIT = %x30-39
102
+ # ; 0-9
103
+ # DQUOTE = %x22
104
+ # ; " (Double Quote)
105
+ # HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
106
+ # OCTET = %x00-FF
107
+ # SP = %x20
108
+ module RFC5234
109
+ ALPHA = /[A-Za-z]/n
110
+ CHAR = /[\x01-\x7f]/n
111
+ CRLF = /\r\n/n
112
+ CTL = /[\x00-\x1F\x7F]/n
113
+ DIGIT = /\d/n
114
+ DQUOTE = /"/n
115
+ HEXDIG = /\h/
116
+ OCTET = /[\x00-\xFF]/n # not using /./m for embedding purposes
117
+ SP = / /n
118
+ end
119
+
120
+ # UTF-8, a transformation format of ISO 10646
121
+ # >>>
122
+ # UTF8-1 = %x00-7F
123
+ # UTF8-tail = %x80-BF
124
+ # UTF8-2 = %xC2-DF UTF8-tail
125
+ # UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
126
+ # %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
127
+ # UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
128
+ # %xF4 %x80-8F 2( UTF8-tail )
129
+ # UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
130
+ # UTF8-octets = *( UTF8-char )
131
+ #
132
+ # n.b. String * Integer is used for repetition, rather than /x{3}/,
133
+ # because ruby 3.2's linear-time cache-based optimization doesn't work
134
+ # with "bounded or fixed times repetition nesting in another repetition
135
+ # (e.g. /(a{2,3})*/). It is an implementation issue entirely, but we
136
+ # believe it is hard to support this case correctly."
137
+ # See https://bugs.ruby-lang.org/issues/19104
138
+ module RFC3629
139
+ UTF8_1 = /[\x00-\x7f]/n # aka ASCII 7bit
140
+ UTF8_TAIL = /[\x80-\xBF]/n
141
+ UTF8_2 = /[\xC2-\xDF]#{UTF8_TAIL}/n
142
+ UTF8_3 = Regexp.union(/\xE0[\xA0-\xBF]#{UTF8_TAIL}/n,
143
+ /\xED[\x80-\x9F]#{UTF8_TAIL}/n,
144
+ /[\xE1-\xEC]#{ UTF8_TAIL.source * 2}/n,
145
+ /[\xEE-\xEF]#{ UTF8_TAIL.source * 2}/n)
146
+ UTF8_4 = Regexp.union(/[\xF1-\xF3]#{ UTF8_TAIL.source * 3}/n,
147
+ /\xF0[\x90-\xBF]#{UTF8_TAIL.source * 2}/n,
148
+ /\xF4[\x80-\x8F]#{UTF8_TAIL.source * 2}/n)
149
+ UTF8_CHAR = Regexp.union(UTF8_1, UTF8_2, UTF8_3, UTF8_4)
150
+ UTF8_OCTETS = /#{UTF8_CHAR}*/n
151
+ end
152
+
153
+ include RFC5234
154
+ include RFC3629
155
+
156
+ # CHAR8 = %x01-ff
157
+ # ; any OCTET except NUL, %x00
158
+ CHAR8 = /[\x01-\xff]/n
159
+
160
+ # list-wildcards = "%" / "*"
161
+ LIST_WILDCARDS = /[%*]/n
162
+ # quoted-specials = DQUOTE / "\"
163
+ QUOTED_SPECIALS = /["\\]/n
164
+ # resp-specials = "]"
165
+ RESP_SPECIALS = /[\]]/n
166
+
167
+ # atomish = 1*<any ATOM-CHAR except "[">
168
+ # ; We use "atomish" for msg-att and section, in order
169
+ # ; to simplify "BODY[HEADER.FIELDS (foo bar)]".
170
+ #
171
+ # atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
172
+ # quoted-specials / resp-specials
173
+ # ATOM-CHAR = <any CHAR except atom-specials>
174
+ # atom = 1*ATOM-CHAR
175
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
176
+ # tag = 1*<any ASTRING-CHAR except "+">
177
+
178
+ ATOM_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\\]]/n
179
+ ASTRING_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\]/n
180
+
181
+ ASTRING_CHAR = CHAR - ASTRING_SPECIALS
182
+ ATOM_CHAR = CHAR - ATOM_SPECIALS
183
+
184
+ ATOM = /#{ATOM_CHAR}+/n
185
+ ASTRING_CHARS = /#{ASTRING_CHAR}+/n
186
+ ATOMISH = /#{ATOM_CHAR - /[\[]/ }+/
187
+ TAG = /#{ASTRING_CHAR - /[+]/ }+/
188
+
189
+ # TEXT-CHAR = <any CHAR except CR and LF>
190
+ TEXT_CHAR = CHAR - /[\r\n]/
191
+
192
+ # resp-text-code = ... / atom [SP 1*<any TEXT-CHAR except "]">]
193
+ CODE_TEXT_CHAR = TEXT_CHAR - RESP_SPECIALS
194
+ CODE_TEXT = /#{CODE_TEXT_CHAR}+/n
195
+
196
+ # flag = "\Answered" / "\Flagged" / "\Deleted" /
197
+ # "\Seen" / "\Draft" / flag-keyword / flag-extension
198
+ # ; Does not include "\Recent"
199
+ # flag-extension = "\" atom
200
+ # ; Future expansion. Client implementations
201
+ # ; MUST accept flag-extension flags. Server
202
+ # ; implementations MUST NOT generate
203
+ # ; flag-extension flags except as defined by
204
+ # ; a future Standard or Standards Track
205
+ # ; revisions of this specification.
206
+ # flag-keyword = "$MDNSent" / "$Forwarded" / "$Junk" /
207
+ # "$NotJunk" / "$Phishing" / atom
208
+ #
209
+ # flag-perm = flag / "\*"
210
+ #
211
+ # Not checking for max one mbx-list-sflag in the parser.
212
+ # >>>
213
+ # mbx-list-oflag = "\Noinferiors" / child-mbox-flag /
214
+ # "\Subscribed" / "\Remote" / flag-extension
215
+ # ; Other flags; multiple from this list are
216
+ # ; possible per LIST response, but each flag
217
+ # ; can only appear once per LIST response
218
+ # mbx-list-sflag = "\NonExistent" / "\Noselect" / "\Marked" /
219
+ # "\Unmarked"
220
+ # ; Selectability flags; only one per LIST response
221
+ # child-mbox-flag = "\HasChildren" / "\HasNoChildren"
222
+ # ; attributes for the CHILDREN return option, at most
223
+ # ; one possible per LIST response
224
+ FLAG = /\\?#{ATOM}/n
225
+ FLAG_EXTENSION = /\\#{ATOM}/n
226
+ FLAG_KEYWORD = ATOM
227
+ FLAG_PERM = Regexp.union(FLAG, "\\*")
228
+ MBX_FLAG = FLAG_EXTENSION
229
+
230
+ # flag-list = "(" [flag *(SP flag)] ")"
231
+ # resp-text-code =/ "PERMANENTFLAGS" SP
232
+ # "(" [flag-perm *(SP flag-perm)] ")"
233
+ # mbx-list-flags = *(mbx-list-oflag SP) mbx-list-sflag
234
+ # *(SP mbx-list-oflag) /
235
+ # mbx-list-oflag *(SP mbx-list-oflag)
236
+ # (Not checking for max one mbx-list-sflag in the parser.)
237
+ FLAG_LIST = /\G\((#{FLAG }(?:#{SP}#{FLAG })*|)\)/ni
238
+ FLAG_PERM_LIST = /\G\((#{FLAG_PERM}(?:#{SP}#{FLAG_PERM})*|)\)/ni
239
+ MBX_LIST_FLAGS = /\G (#{MBX_FLAG }(?:#{SP}#{MBX_FLAG })*) /nix
240
+
241
+ # Gmail allows SP and "]" in flags.......
242
+ QUIRKY_FLAG = Regexp.union(/\\?#{ASTRING_CHARS}/n, "\\*")
243
+ QUIRKY_FLAGS_LIST = /\G\(( [^)]* )\)/nx
244
+
245
+ # RFC3501:
246
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
247
+ # "\" quoted-specials
248
+ # RFC9051:
249
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
250
+ # "\" quoted-specials / UTF8-2 / UTF8-3 / UTF8-4
251
+ # RFC3501 & RFC9051:
252
+ # quoted = DQUOTE *QUOTED-CHAR DQUOTE
253
+ QUOTED_CHAR_safe = TEXT_CHAR - QUOTED_SPECIALS
254
+ QUOTED_CHAR_esc = /\\#{QUOTED_SPECIALS}/n
255
+ QUOTED_CHAR_rev1 = Regexp.union(QUOTED_CHAR_safe, QUOTED_CHAR_esc)
256
+ QUOTED_CHAR_rev2 = Regexp.union(QUOTED_CHAR_rev1,
257
+ UTF8_2, UTF8_3, UTF8_4)
258
+ QUOTED_rev1 = /"(#{QUOTED_CHAR_rev1}*)"/n
259
+ QUOTED_rev2 = /"(#{QUOTED_CHAR_rev2}*)"/n
260
+
261
+ # RFC3501:
262
+ # text = 1*TEXT-CHAR
263
+ # RFC9051:
264
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
265
+ # ; Non-ASCII text can only be returned
266
+ # ; after ENABLE IMAP4rev2 command
267
+ TEXT_rev1 = /#{TEXT_CHAR}+/
268
+ TEXT_rev2 = /#{Regexp.union TEXT_CHAR, UTF8_2, UTF8_3, UTF8_4}+/
269
+
270
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
271
+ TAGGED_LABEL_FCHAR = /[a-zA-Z\-_.]/n
272
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
273
+ TAGGED_LABEL_CHAR = /[a-zA-Z\-_.0-9:]*/n
274
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
275
+ # ; Is a valid RFC 3501 "atom".
276
+ TAGGED_EXT_LABEL = /#{TAGGED_LABEL_FCHAR}#{TAGGED_LABEL_CHAR}*/n
277
+
278
+ # nz-number = digit-nz *DIGIT
279
+ # ; Non-zero unsigned 32-bit integer
280
+ # ; (0 < n < 4,294,967,296)
281
+ NZ_NUMBER = /[1-9]\d*/n
282
+
283
+ # seq-number = nz-number / "*"
284
+ # ; message sequence number (COPY, FETCH, STORE
285
+ # ; commands) or unique identifier (UID COPY,
286
+ # ; UID FETCH, UID STORE commands).
287
+ # ; * represents the largest number in use. In
288
+ # ; the case of message sequence numbers, it is
289
+ # ; the number of messages in a non-empty mailbox.
290
+ # ; In the case of unique identifiers, it is the
291
+ # ; unique identifier of the last message in the
292
+ # ; mailbox or, if the mailbox is empty, the
293
+ # ; mailbox's current UIDNEXT value.
294
+ # ; The server should respond with a tagged BAD
295
+ # ; response to a command that uses a message
296
+ # ; sequence number greater than the number of
297
+ # ; messages in the selected mailbox. This
298
+ # ; includes "*" if the selected mailbox is empty.
299
+ SEQ_NUMBER = /#{NZ_NUMBER}|\*/n
300
+
301
+ # seq-range = seq-number ":" seq-number
302
+ # ; two seq-number values and all values between
303
+ # ; these two regardless of order.
304
+ # ; Example: 2:4 and 4:2 are equivalent and
305
+ # ; indicate values 2, 3, and 4.
306
+ # ; Example: a unique identifier sequence range of
307
+ # ; 3291:* includes the UID of the last message in
308
+ # ; the mailbox, even if that value is less than
309
+ # ; 3291.
310
+ SEQ_RANGE = /#{SEQ_NUMBER}:#{SEQ_NUMBER}/n
311
+
312
+ # sequence-set = (seq-number / seq-range) ["," sequence-set]
313
+ # ; set of seq-number values, regardless of order.
314
+ # ; Servers MAY coalesce overlaps and/or execute
315
+ # ; the sequence in any order.
316
+ # ; Example: a message sequence number set of
317
+ # ; 2,4:7,9,12:* for a mailbox with 15 messages is
318
+ # ; equivalent to 2,4,5,6,7,9,12,13,14,15
319
+ # ; Example: a message sequence number set of
320
+ # ; *:4,5:7 for a mailbox with 10 messages is
321
+ # ; equivalent to 10,9,8,7,6,5,4,5,6,7 and MAY
322
+ # ; be reordered and overlap coalesced to be
323
+ # ; 4,5,6,7,8,9,10.
324
+ SEQUENCE_SET_ITEM = /#{SEQ_NUMBER}|#{SEQ_RANGE}/n
325
+ SEQUENCE_SET = /#{SEQUENCE_SET_ITEM}(?:,#{SEQUENCE_SET_ITEM})*/n
326
+ SEQUENCE_SET_STR = /\A#{SEQUENCE_SET}\z/n
327
+
328
+ # partial-range-first = nz-number ":" nz-number
329
+ # ;; Request to search from oldest (lowest UIDs) to
330
+ # ;; more recent messages.
331
+ # ;; A range 500:400 is the same as 400:500.
332
+ # ;; This is similar to <seq-range> from [RFC3501]
333
+ # ;; but cannot contain "*".
334
+ PARTIAL_RANGE_FIRST = /\A(#{NZ_NUMBER}):(#{NZ_NUMBER})\z/n
335
+
336
+ # partial-range-last = MINUS nz-number ":" MINUS nz-number
337
+ # ;; Request to search from newest (highest UIDs) to
338
+ # ;; oldest messages.
339
+ # ;; A range -500:-400 is the same as -400:-500.
340
+ PARTIAL_RANGE_LAST = /\A(-#{NZ_NUMBER}):(-#{NZ_NUMBER})\z/n
341
+
342
+ # partial-range = partial-range-first / partial-range-last
343
+ PARTIAL_RANGE = Regexp.union(PARTIAL_RANGE_FIRST,
344
+ PARTIAL_RANGE_LAST)
345
+
346
+ # RFC3501:
347
+ # literal = "{" number "}" CRLF *CHAR8
348
+ # ; Number represents the number of CHAR8s
349
+ # RFC9051:
350
+ # literal = "{" number64 ["+"] "}" CRLF *CHAR8
351
+ # ; <number64> represents the number of CHAR8s.
352
+ # ; A non-synchronizing literal is distinguished
353
+ # ; from a synchronizing literal by the presence of
354
+ # ; "+" before the closing "}".
355
+ # ; Non-synchronizing literals are not allowed when
356
+ # ; sent from server to the client.
357
+ LITERAL = /\{(\d+)\}\r\n/n
358
+
359
+ # RFC3516 (BINARY):
360
+ # literal8 = "~{" number "}" CRLF *OCTET
361
+ # ; <number> represents the number of OCTETs
362
+ # ; in the response string.
363
+ # RFC9051:
364
+ # literal8 = "~{" number64 "}" CRLF *OCTET
365
+ # ; <number64> represents the number of OCTETs
366
+ # ; in the response string.
367
+ LITERAL8 = /~\{(\d+)\}\r\n/n
368
+
369
+ module_function
370
+
371
+ def unescape_quoted!(quoted)
372
+ quoted
373
+ &.gsub!(/\\(#{QUOTED_SPECIALS})/n, "\\1")
374
+ &.force_encoding("UTF-8")
375
+ end
376
+
377
+ def unescape_quoted(quoted)
378
+ quoted
379
+ &.gsub(/\\(#{QUOTED_SPECIALS})/n, "\\1")
380
+ &.force_encoding("UTF-8")
381
+ end
382
+
383
+ end
384
+
385
+ # the default, used in most places
60
386
  BEG_REGEXP = /\G(?:\
61
- (?# 1: SPACE )( +)|\
62
- (?# 2: NIL )(NIL)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
63
- (?# 3: NUMBER )(\d+)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
64
- (?# 4: ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+]+)|\
65
- (?# 5: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
66
- (?# 6: LPAR )(\()|\
67
- (?# 7: RPAR )(\))|\
68
- (?# 8: BSLASH )(\\)|\
69
- (?# 9: STAR )(\*)|\
70
- (?# 10: LBRA )(\[)|\
71
- (?# 11: RBRA )(\])|\
72
- (?# 12: LITERAL )\{(\d+)\}\r\n|\
73
- (?# 13: PLUS )(\+)|\
74
- (?# 14: PERCENT )(%)|\
75
- (?# 15: CRLF )(\r\n)|\
76
- (?# 16: EOF )(\z))/ni
77
-
387
+ (?# 1: SPACE )( )|\
388
+ (?# 2: LITERAL8)#{Patterns::LITERAL8}|\
389
+ (?# 3: ATOM prefixed with a compatible subtype)\
390
+ ((?:\
391
+ (?# 4: NIL )(NIL)|\
392
+ (?# 5: NUMBER )(\d+)|\
393
+ (?# 6: PLUS )(\+))\
394
+ (?# 7: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
395
+ (?# This enables greedy alternation without lookahead, in linear time.)\
396
+ )|\
397
+ (?# Also need to check for ATOM without a subtype prefix.)\
398
+ (?# 8: ATOM )(#{Patterns::ATOMISH})|\
399
+ (?# 9: QUOTED )#{Patterns::QUOTED_rev2}|\
400
+ (?# 10: LPAR )(\()|\
401
+ (?# 11: RPAR )(\))|\
402
+ (?# 12: BSLASH )(\\)|\
403
+ (?# 13: STAR )(\*)|\
404
+ (?# 14: LBRA )(\[)|\
405
+ (?# 15: RBRA )(\])|\
406
+ (?# 16: LITERAL )#{Patterns::LITERAL}|\
407
+ (?# 17: PERCENT )(%)|\
408
+ (?# 18: CRLF )(\r\n)|\
409
+ (?# 19: EOF )(\z))/ni
410
+
411
+ # envelope, body(structure), namespaces
78
412
  DATA_REGEXP = /\G(?:\
79
413
  (?# 1: SPACE )( )|\
80
414
  (?# 2: NIL )(NIL)|\
81
415
  (?# 3: NUMBER )(\d+)|\
82
- (?# 4: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
83
- (?# 5: LITERAL )\{(\d+)\}\r\n|\
416
+ (?# 4: QUOTED )#{Patterns::QUOTED_rev2}|\
417
+ (?# 5: LITERAL )#{Patterns::LITERAL}|\
84
418
  (?# 6: LPAR )(\()|\
85
419
  (?# 7: RPAR )(\)))/ni
86
420
 
87
- TEXT_REGEXP = /\G(?:\
88
- (?# 1: TEXT )([^\x00\r\n]*))/ni
421
+ # text, after 'resp-text-code "]"'
422
+ TEXT_REGEXP = /\G(#{Patterns::TEXT_rev2})/n
89
423
 
90
- RTEXT_REGEXP = /\G(?:\
91
- (?# 1: LBRA )(\[)|\
92
- (?# 2: TEXT )([^\x00\r\n]*))/ni
93
-
94
- CTEXT_REGEXP = /\G(?:\
95
- (?# 1: TEXT )([^\x00\r\n\]]*))/ni
424
+ # resp-text-code, after 'atom SP'
425
+ CTEXT_REGEXP = /\G(#{Patterns::CODE_TEXT})/n
96
426
 
97
427
  Token = Struct.new(:symbol, :value)
98
428
 
99
- def response
100
- token = lookahead
101
- case token.symbol
102
- when T_PLUS
103
- result = continue_req
104
- when T_STAR
105
- result = response_untagged
106
- else
107
- result = response_tagged
108
- end
109
- while lookahead.symbol == T_SPACE
110
- # Ignore trailing space for Microsoft Exchange Server
111
- shift_token
112
- end
113
- match(T_CRLF)
114
- match(T_EOF)
115
- return result
116
- end
429
+ def_char_matchers :SP, " ", :T_SPACE
430
+ def_char_matchers :PLUS, "+", :T_PLUS
431
+ def_char_matchers :STAR, "*", :T_STAR
117
432
 
118
- def continue_req
119
- match(T_PLUS)
120
- token = lookahead
121
- if token.symbol == T_SPACE
122
- shift_token
123
- return ContinuationRequest.new(resp_text, @str)
124
- else
125
- return ContinuationRequest.new(ResponseText.new(nil, ""), @str)
126
- end
127
- end
433
+ def_char_matchers :lpar, "(", :T_LPAR
434
+ def_char_matchers :rpar, ")", :T_RPAR
128
435
 
129
- def response_untagged
130
- match(T_STAR)
131
- match(T_SPACE)
132
- token = lookahead
133
- if token.symbol == T_NUMBER
134
- return numeric_response
135
- elsif token.symbol == T_ATOM
136
- case token.value
137
- when /\A(?:OK|NO|BAD|BYE|PREAUTH)\z/ni
138
- return response_cond
139
- when /\A(?:FLAGS)\z/ni
140
- return flags_response
141
- when /\A(?:ID)\z/ni
142
- return id_response
143
- when /\A(?:LIST|LSUB|XLIST)\z/ni
144
- return list_response
145
- when /\A(?:NAMESPACE)\z/ni
146
- return namespace_response
147
- when /\A(?:QUOTA)\z/ni
148
- return getquota_response
149
- when /\A(?:QUOTAROOT)\z/ni
150
- return getquotaroot_response
151
- when /\A(?:ACL)\z/ni
152
- return getacl_response
153
- when /\A(?:SEARCH|SORT)\z/ni
154
- return search_response
155
- when /\A(?:THREAD)\z/ni
156
- return thread_response
157
- when /\A(?:STATUS)\z/ni
158
- return status_response
159
- when /\A(?:CAPABILITY)\z/ni
160
- return capability_response
161
- when /\A(?:NOOP)\z/ni
162
- return ignored_response
163
- else
164
- return text_response
165
- end
436
+ def_char_matchers :lbra, "[", :T_LBRA
437
+ def_char_matchers :rbra, "]", :T_RBRA
438
+
439
+ # valid number ranges are not enforced by parser
440
+ # number = 1*DIGIT
441
+ # ; Unsigned 32-bit integer
442
+ # ; (0 <= n < 4,294,967,296)
443
+ def_token_matchers :number, T_NUMBER, coerce: Integer
444
+
445
+ def_token_matchers :quoted, T_QUOTED
446
+
447
+ # string = quoted / literal
448
+ def_token_matchers :string, T_QUOTED, T_LITERAL
449
+
450
+ # used by nstring8 = nstring / literal8
451
+ def_token_matchers :string8, T_QUOTED, T_LITERAL, T_LITERAL8
452
+
453
+ # use where string represents "LABEL" values
454
+ def_token_matchers :case_insensitive__string,
455
+ T_QUOTED, T_LITERAL,
456
+ send: :upcase
457
+
458
+ # n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
459
+ # NIL? returns nil when it does *not* match
460
+ def_token_matchers :NIL, T_NIL
461
+
462
+ # In addition to explicitly uses of +tagged-ext-label+, use this to match
463
+ # keywords when the grammar has not provided any extension syntax.
464
+ #
465
+ # Do *not* use this for labels where the grammar specifies extensions
466
+ # can be +atom+, even if all currently defined labels would match. For
467
+ # example response codes in +resp-text-code+.
468
+ #
469
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
470
+ # ; Is a valid RFC 3501 "atom".
471
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
472
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
473
+ #
474
+ # TODO: add to lexer and only match tagged-ext-label
475
+ def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase
476
+
477
+ def_token_matchers :CRLF, T_CRLF
478
+ def_token_matchers :EOF, T_EOF
479
+
480
+ # atom = 1*ATOM-CHAR
481
+ # ATOM-CHAR = <any CHAR except atom-specials>
482
+ ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
483
+
484
+ SEQUENCE_SET_TOKENS = [T_ATOM, T_NUMBER, T_STAR]
485
+
486
+ # sequence-set = (seq-number / seq-range) ["," sequence-set]
487
+ # sequence-set =/ seq-last-command
488
+ # ; Allow for "result of the last command"
489
+ # ; indicator.
490
+ # seq-last-command = "$"
491
+ #
492
+ # *note*: doesn't match seq-last-command
493
+ def sequence_set
494
+ str = combine_adjacent(*SEQUENCE_SET_TOKENS)
495
+ if Patterns::SEQUENCE_SET_STR.match?(str)
496
+ SequenceSet[str]
166
497
  else
167
- parse_error("unexpected token %s", token.symbol)
498
+ parse_error("unexpected atom %p, expected sequence-set", str)
168
499
  end
169
500
  end
170
501
 
171
- def response_tagged
172
- tag = astring_chars
173
- match(T_SPACE)
174
- token = match(T_ATOM)
175
- name = token.value.upcase
176
- match(T_SPACE)
177
- return TaggedResponse.new(tag, name, resp_text, @str)
502
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
503
+ # resp-specials = "]"
504
+ ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
505
+
506
+ ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze
507
+
508
+ # tag = 1*<any ASTRING-CHAR except "+">
509
+ TAG_TOKENS = (ASTRING_CHARS_TOKENS - [T_PLUS]).freeze
510
+
511
+ # TODO: handle atom, astring_chars, and tag entirely inside the lexer
512
+ def atom; combine_adjacent(*ATOM_TOKENS) end
513
+ def astring_chars; combine_adjacent(*ASTRING_CHARS_TOKENS) end
514
+ def tag; combine_adjacent(*TAG_TOKENS) end
515
+
516
+ # the #accept version of #atom
517
+ def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
518
+
519
+ # Returns <tt>atom.upcase</tt>
520
+ def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
521
+
522
+ # Returns <tt>atom?&.upcase</tt>
523
+ def case_insensitive__atom?
524
+ -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
178
525
  end
179
526
 
180
- def response_cond
181
- token = match(T_ATOM)
182
- name = token.value.upcase
183
- match(T_SPACE)
184
- return UntaggedResponse.new(name, resp_text, @str)
527
+ # astring = 1*ASTRING-CHAR / string
528
+ def astring
529
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
185
530
  end
186
531
 
187
- def numeric_response
188
- n = number
189
- match(T_SPACE)
190
- token = match(T_ATOM)
191
- name = token.value.upcase
192
- case name
193
- when "EXISTS", "RECENT", "EXPUNGE"
194
- return UntaggedResponse.new(name, n, @str)
195
- when "FETCH"
196
- shift_token
197
- match(T_SPACE)
198
- data = FetchData.new(n, msg_att(n))
199
- return UntaggedResponse.new(name, data, @str)
200
- end
532
+ def astring?
533
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
201
534
  end
202
535
 
203
- def msg_att(n)
204
- match(T_LPAR)
205
- attr = {}
206
- while true
207
- token = lookahead
208
- case token.symbol
209
- when T_RPAR
210
- shift_token
211
- break
212
- when T_SPACE
213
- shift_token
214
- next
215
- end
216
- case token.value
217
- when /\A(?:ENVELOPE)\z/ni
218
- name, val = envelope_data
219
- when /\A(?:FLAGS)\z/ni
220
- name, val = flags_data
221
- when /\A(?:INTERNALDATE)\z/ni
222
- name, val = internaldate_data
223
- when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
224
- name, val = rfc822_text
225
- when /\A(?:RFC822\.SIZE)\z/ni
226
- name, val = rfc822_size
227
- when /\A(?:BODY(?:STRUCTURE)?)\z/ni
228
- name, val = body_data
229
- when /\A(?:UID)\z/ni
230
- name, val = uid_data
231
- when /\A(?:MODSEQ)\z/ni
232
- name, val = modseq_data
233
- else
234
- parse_error("unknown attribute `%s' for {%d}", token.value, n)
235
- end
236
- attr[name] = val
237
- end
238
- return attr
536
+ # Use #label or #label_in to assert specific known labels
537
+ # (+tagged-ext-label+ only, not +atom+).
538
+ def label(word)
539
+ (val = tagged_ext_label) == word and return val
540
+ parse_error("unexpected atom %p, expected %p instead", val, word)
239
541
  end
240
542
 
241
- def envelope_data
242
- token = match(T_ATOM)
243
- name = token.value.upcase
244
- match(T_SPACE)
245
- return name, envelope
543
+ # Use #label or #label_in to assert specific known labels
544
+ # (+tagged-ext-label+ only, not +atom+).
545
+ def label_in(*labels)
546
+ lbl = tagged_ext_label and labels.include?(lbl) and return lbl
547
+ parse_error("unexpected atom %p, expected one of %s instead",
548
+ lbl, labels.join(" or "))
246
549
  end
247
550
 
248
- def envelope
249
- @lex_state = EXPR_DATA
250
- token = lookahead
251
- if token.symbol == T_NIL
252
- shift_token
253
- result = nil
254
- else
255
- match(T_LPAR)
256
- date = nstring
257
- match(T_SPACE)
258
- subject = nstring
259
- match(T_SPACE)
260
- from = address_list
261
- match(T_SPACE)
262
- sender = address_list
263
- match(T_SPACE)
264
- reply_to = address_list
265
- match(T_SPACE)
266
- to = address_list
267
- match(T_SPACE)
268
- cc = address_list
269
- match(T_SPACE)
270
- bcc = address_list
271
- match(T_SPACE)
272
- in_reply_to = nstring
273
- match(T_SPACE)
274
- message_id = nstring
275
- match(T_RPAR)
276
- result = Envelope.new(date, subject, from, sender, reply_to,
277
- to, cc, bcc, in_reply_to, message_id)
278
- end
279
- @lex_state = EXPR_BEG
280
- return result
551
+ # expects "OK" or "PREAUTH" and raises InvalidResponseError on failure
552
+ def resp_cond_auth__name
553
+ lbl = tagged_ext_label and AUTH_CONDS.include? lbl and return lbl
554
+ raise InvalidResponseError, "bad response type %p, expected %s" % [
555
+ lbl, AUTH_CONDS.join(" or ")
556
+ ]
281
557
  end
282
558
 
283
- def flags_data
284
- token = match(T_ATOM)
285
- name = token.value.upcase
286
- match(T_SPACE)
287
- return name, flag_list
559
+ # expects "OK" or "NO" or "BAD" and raises InvalidResponseError on failure
560
+ def resp_cond_state__name
561
+ lbl = tagged_ext_label and RESP_COND_STATES.include? lbl and return lbl
562
+ raise InvalidResponseError, "bad response type %p, expected %s" % [
563
+ lbl, RESP_COND_STATES.join(" or ")
564
+ ]
288
565
  end
289
566
 
290
- def internaldate_data
291
- token = match(T_ATOM)
292
- name = token.value.upcase
293
- match(T_SPACE)
294
- token = match(T_QUOTED)
295
- return name, token.value
567
+ # nstring = string / nil
568
+ def nstring
569
+ NIL? ? nil : string
296
570
  end
297
571
 
298
- def rfc822_text
299
- token = match(T_ATOM)
300
- name = token.value.upcase
301
- token = lookahead
302
- if token.symbol == T_LBRA
303
- shift_token
304
- match(T_RBRA)
305
- end
306
- match(T_SPACE)
307
- return name, nstring
572
+ def nstring8
573
+ NIL? ? nil : string8
308
574
  end
309
575
 
310
- def rfc822_size
311
- token = match(T_ATOM)
312
- name = token.value.upcase
313
- match(T_SPACE)
314
- return name, number
576
+ def nquoted
577
+ NIL? ? nil : quoted
315
578
  end
316
579
 
317
- def body_data
318
- token = match(T_ATOM)
319
- name = token.value.upcase
320
- token = lookahead
321
- if token.symbol == T_SPACE
322
- shift_token
323
- return name, body
324
- end
325
- name.concat(section)
326
- token = lookahead
327
- if token.symbol == T_ATOM
328
- name.concat(token.value)
329
- shift_token
330
- end
331
- match(T_SPACE)
332
- data = nstring
333
- return name, data
580
+ # use where nstring represents "LABEL" values
581
+ def case_insensitive__nstring
582
+ NIL? ? nil : case_insensitive__string
334
583
  end
335
584
 
336
- def body
337
- @lex_state = EXPR_DATA
338
- token = lookahead
339
- if token.symbol == T_NIL
340
- shift_token
341
- result = nil
342
- else
343
- match(T_LPAR)
344
- token = lookahead
345
- if token.symbol == T_LPAR
346
- result = body_type_mpart
347
- else
348
- result = body_type_1part
349
- end
350
- match(T_RPAR)
585
+ # tagged-ext-comp = astring /
586
+ # tagged-ext-comp *(SP tagged-ext-comp) /
587
+ # "(" tagged-ext-comp ")"
588
+ # ; Extensions that follow this general
589
+ # ; syntax should use nstring instead of
590
+ # ; astring when appropriate in the context
591
+ # ; of the extension.
592
+ # ; Note that a message set or a "number"
593
+ # ; can always be represented as an "atom".
594
+ # ; A URL should be represented as
595
+ # ; a "quoted" string.
596
+ def tagged_ext_comp
597
+ vals = []
598
+ while true
599
+ vals << case lookahead!(*ASTRING_TOKENS, T_LPAR).symbol
600
+ when T_LPAR then lpar; ary = tagged_ext_comp; rpar; ary
601
+ when T_NUMBER then number
602
+ else astring
603
+ end
604
+ SP? or break
351
605
  end
352
- @lex_state = EXPR_BEG
353
- return result
606
+ vals
354
607
  end
355
608
 
356
- def body_type_1part
357
- token = lookahead
358
- case token.value
359
- when /\A(?:TEXT)\z/ni
360
- return body_type_text
361
- when /\A(?:MESSAGE)\z/ni
362
- return body_type_msg
363
- when /\A(?:ATTACHMENT)\z/ni
364
- return body_type_attachment
365
- when /\A(?:MIXED)\z/ni
366
- return body_type_mixed
609
+ # tagged-ext-simple is a subset of atom
610
+ # TODO: recognize sequence-set in the lexer
611
+ #
612
+ # tagged-ext-simple = sequence-set / number / number64
613
+ def tagged_ext_simple
614
+ number? || sequence_set
615
+ end
616
+
617
+ # tagged-ext-val = tagged-ext-simple /
618
+ # "(" [tagged-ext-comp] ")"
619
+ def tagged_ext_val
620
+ if lpar?
621
+ _ = peek_rpar? ? [] : tagged_ext_comp
622
+ rpar
623
+ _
367
624
  else
368
- return body_type_basic
369
- end
370
- end
625
+ tagged_ext_simple
626
+ end
627
+ end
628
+
629
+ # mailbox = "INBOX" / astring
630
+ # ; INBOX is case-insensitive. All case variants of
631
+ # ; INBOX (e.g., "iNbOx") MUST be interpreted as INBOX
632
+ # ; not as an astring. An astring which consists of
633
+ # ; the case-insensitive sequence "I" "N" "B" "O" "X"
634
+ # ; is considered to be INBOX and not an astring.
635
+ # ; Refer to section 5.1 for further
636
+ # ; semantic details of mailbox names.
637
+ alias mailbox astring
638
+
639
+ # valid number ranges are not enforced by parser
640
+ # number64 = 1*DIGIT
641
+ # ; Unsigned 63-bit integer
642
+ # ; (0 <= n <= 9,223,372,036,854,775,807)
643
+ alias number64 number
644
+ alias number64? number?
645
+
646
+ # valid number ranges are not enforced by parser
647
+ # nz-number = digit-nz *DIGIT
648
+ # ; Non-zero unsigned 32-bit integer
649
+ # ; (0 < n < 4,294,967,296)
650
+ alias nz_number number
651
+ alias nz_number? number?
652
+
653
+ # valid number ranges are not enforced by parser
654
+ # nz-number64 = digit-nz *DIGIT
655
+ # ; Unsigned 63-bit integer
656
+ # ; (0 < n <= 9,223,372,036,854,775,807)
657
+ alias nz_number64 nz_number
658
+
659
+ # valid number ranges are not enforced by parser
660
+ # uniqueid = nz-number
661
+ # ; Strictly ascending
662
+ alias uniqueid nz_number
371
663
 
372
- def body_type_basic
373
- mtype, msubtype = media_type
374
- token = lookahead
375
- if token.symbol == T_RPAR
376
- return BodyTypeBasic.new(mtype, msubtype)
377
- end
378
- match(T_SPACE)
379
- param, content_id, desc, enc, size = body_fields
380
- md5, disposition, language, extension = body_ext_1part
381
- return BodyTypeBasic.new(mtype, msubtype,
382
- param, content_id,
383
- desc, enc, size,
384
- md5, disposition, language, extension)
664
+ # valid number ranges are not enforced by parser
665
+ #
666
+ # a 64-bit unsigned integer and is the decimal equivalent for the ID hex
667
+ # string used in the web interface and the Gmail API.
668
+ alias x_gm_id number
669
+
670
+ # [RFC3501 & RFC9051:]
671
+ # response = *(continue-req / response-data) response-done
672
+ #
673
+ # For simplicity, response isn't interpreted as the combination of the
674
+ # three response types, but instead represents any individual server
675
+ # response. Our simplified interpretation is defined as:
676
+ # response = continue-req | response_data | response-tagged
677
+ #
678
+ # n.b: our "response-tagged" definition parses "greeting" too.
679
+ def response
680
+ resp = case lookahead!(T_PLUS, T_STAR, *TAG_TOKENS).symbol
681
+ when T_PLUS then continue_req
682
+ when T_STAR then response_data
683
+ else response_tagged
684
+ end
685
+ accept_spaces # QUIRKY: Ignore trailing space (MS Exchange Server?)
686
+ CRLF!
687
+ EOF!
688
+ resp
689
+ end
690
+
691
+ # RFC3501 & RFC9051:
692
+ # continue-req = "+" SP (resp-text / base64) CRLF
693
+ #
694
+ # n.b: base64 is valid resp-text. And in the spirit of RFC9051 Appx E 23
695
+ # (and to workaround existing servers), we use the following grammar:
696
+ #
697
+ # continue-req = "+" (SP (resp-text)) CRLF
698
+ def continue_req
699
+ PLUS!
700
+ ContinuationRequest.new(SP? ? resp_text : ResponseText::EMPTY, @str)
701
+ end
702
+
703
+ RE_RESPONSE_TYPE = /\G(?:\d+ )?(?<type>#{Patterns::TAGGED_EXT_LABEL})/n
704
+
705
+ # [RFC3501:]
706
+ # response-data = "*" SP (resp-cond-state / resp-cond-bye /
707
+ # mailbox-data / message-data / capability-data) CRLF
708
+ # [RFC4466:]
709
+ # response-data = "*" SP response-payload CRLF
710
+ # response-payload = resp-cond-state / resp-cond-bye /
711
+ # mailbox-data / message-data / capability-data
712
+ # RFC5161 (ENABLE capability):
713
+ # response-data =/ "*" SP enable-data CRLF
714
+ # RFC5255 (LANGUAGE capability)
715
+ # response-payload =/ language-data
716
+ # RFC5255 (I18NLEVEL=1 and I18NLEVEL=2 capabilities)
717
+ # response-payload =/ comparator-data
718
+ # [RFC9051:]
719
+ # response-data = "*" SP (resp-cond-state / resp-cond-bye /
720
+ # mailbox-data / message-data / capability-data /
721
+ # enable-data) CRLF
722
+ #
723
+ # [merging in greeting and response-fatal:]
724
+ # greeting = "*" SP (resp-cond-auth / resp-cond-bye) CRLF
725
+ # response-fatal = "*" SP resp-cond-bye CRLF
726
+ # response-data =/ "*" SP (resp-cond-auth / resp-cond-bye) CRLF
727
+ # [removing duplicates, this is simply]
728
+ # response-payload =/ resp-cond-auth
729
+ #
730
+ # TODO: remove resp-cond-auth and handle greeting separately
731
+ def response_data
732
+ STAR!; SP!
733
+ m = peek_re(RE_RESPONSE_TYPE) or parse_error("unparsable response")
734
+ case m["type"].upcase
735
+ when "OK" then resp_cond_state__untagged # RFC3501, RFC9051
736
+ when "FETCH" then message_data__fetch # RFC3501, RFC9051
737
+ when "EXPUNGE" then message_data__expunge # RFC3501, RFC9051
738
+ when "EXISTS" then mailbox_data__exists # RFC3501, RFC9051
739
+ when "ESEARCH" then esearch_response # RFC4731, RFC9051, etc
740
+ when "VANISHED" then expunged_resp # RFC7162
741
+ when "UIDFETCH" then uidfetch_resp # RFC9586
742
+ when "SEARCH" then mailbox_data__search # RFC3501 (obsolete)
743
+ when "CAPABILITY" then capability_data__untagged # RFC3501, RFC9051
744
+ when "FLAGS" then mailbox_data__flags # RFC3501, RFC9051
745
+ when "LIST" then mailbox_data__list # RFC3501, RFC9051
746
+ when "STATUS" then mailbox_data__status # RFC3501, RFC9051
747
+ when "NAMESPACE" then namespace_response # RFC2342, RFC9051
748
+ when "ENABLED" then enable_data # RFC5161, RFC9051
749
+ when "BAD" then resp_cond_state__untagged # RFC3501, RFC9051
750
+ when "NO" then resp_cond_state__untagged # RFC3501, RFC9051
751
+ when "PREAUTH" then resp_cond_auth # RFC3501, RFC9051
752
+ when "BYE" then resp_cond_bye # RFC3501, RFC9051
753
+ when "RECENT" then mailbox_data__recent # RFC3501 (obsolete)
754
+ when "SORT" then sort_data # RFC5256, RFC7162
755
+ when "THREAD" then thread_data # RFC5256
756
+ when "QUOTA" then quota_response # RFC2087, RFC9208
757
+ when "QUOTAROOT" then quotaroot_response # RFC2087, RFC9208
758
+ when "ID" then id_response # RFC2971
759
+ when "ACL" then acl_data # RFC4314
760
+ when "LISTRIGHTS" then listrights_data # RFC4314
761
+ when "MYRIGHTS" then myrights_data # RFC4314
762
+ when "METADATA" then metadata_resp # RFC5464
763
+ when "LANGUAGE" then language_data # RFC5255
764
+ when "COMPARATOR" then comparator_data # RFC5255
765
+ when "CONVERTED" then message_data__converted # RFC5259
766
+ when "LSUB" then mailbox_data__lsub # RFC3501 (obsolete)
767
+ when "XLIST" then mailbox_data__xlist # deprecated
768
+ when "NOOP" then response_data__noop
769
+ else response_data__unhandled
770
+ end
771
+ end
772
+
773
+ def response_data__unhandled(klass = UntaggedResponse)
774
+ num = number?; SP?
775
+ type = tagged_ext_label; SP?
776
+ text = remaining_unparsed
777
+ data =
778
+ if num && text then UnparsedNumericResponseData.new(num, text)
779
+ elsif text then UnparsedData.new(text)
780
+ else num
781
+ end
782
+ klass.new(type, data, @str)
385
783
  end
386
784
 
387
- def body_type_text
388
- mtype, msubtype = media_type
389
- match(T_SPACE)
390
- param, content_id, desc, enc, size = body_fields
391
- match(T_SPACE)
392
- lines = number
393
- md5, disposition, language, extension = body_ext_1part
394
- return BodyTypeText.new(mtype, msubtype,
395
- param, content_id,
396
- desc, enc, size,
397
- lines,
398
- md5, disposition, language, extension)
785
+ # reads all the way up until CRLF
786
+ def remaining_unparsed
787
+ str = @str[@pos...-2] and @pos += str.bytesize
788
+ str&.empty? ? nil : str
399
789
  end
400
790
 
401
- def body_type_msg
402
- mtype, msubtype = media_type
403
- match(T_SPACE)
404
- param, content_id, desc, enc, size = body_fields
791
+ def response_data__ignored; response_data__unhandled(IgnoredResponse) end
792
+ alias response_data__noop response_data__ignored
405
793
 
406
- token = lookahead
407
- if token.symbol == T_RPAR
408
- # If this is not message/rfc822, we shouldn't apply the RFC822
409
- # spec to it. We should handle anything other than
410
- # message/rfc822 using multipart extension data [rfc3501] (i.e.
411
- # the data itself won't be returned, we would have to retrieve it
412
- # with BODYSTRUCTURE instead of with BODY
413
-
414
- # Also, sometimes a message/rfc822 is included as a large
415
- # attachment instead of having all of the other details
416
- # (e.g. attaching a .eml file to an email)
417
- if msubtype == "RFC822"
418
- return BodyTypeMessage.new(mtype, msubtype, param, content_id,
419
- desc, enc, size, nil, nil, nil, nil,
420
- nil, nil, nil)
421
- else
422
- return BodyTypeExtension.new(mtype, msubtype,
423
- param, content_id,
424
- desc, enc, size)
425
- end
426
- end
794
+ alias listrights_data response_data__unhandled
795
+ alias myrights_data response_data__unhandled
796
+ alias metadata_resp response_data__unhandled
797
+ alias language_data response_data__unhandled
798
+ alias comparator_data response_data__unhandled
799
+ alias message_data__converted response_data__unhandled
427
800
 
428
- match(T_SPACE)
429
- env = envelope
430
- match(T_SPACE)
431
- b = body
432
- match(T_SPACE)
433
- lines = number
434
- md5, disposition, language, extension = body_ext_1part
435
- return BodyTypeMessage.new(mtype, msubtype,
436
- param, content_id,
437
- desc, enc, size,
438
- env, b, lines,
439
- md5, disposition, language, extension)
801
+ # RFC3501 & RFC9051:
802
+ # response-tagged = tag SP resp-cond-state CRLF
803
+ def response_tagged
804
+ TaggedResponse.new(tag, *(SP!; resp_cond_state), @str)
440
805
  end
441
806
 
442
- def body_type_attachment
443
- mtype = case_insensitive_string
444
- match(T_SPACE)
445
- param = body_fld_param
446
- return BodyTypeAttachment.new(mtype, nil, param)
807
+ # RFC3501 & RFC9051:
808
+ # resp-cond-state = ("OK" / "NO" / "BAD") SP resp-text
809
+ #
810
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
811
+ # servers), we don't require a final SP and instead parse this as:
812
+ #
813
+ # resp-cond-state = ("OK" / "NO" / "BAD") [SP resp-text]
814
+ def resp_cond_state
815
+ [resp_cond_state__name, SP? ? resp_text : ResponseText::EMPTY]
447
816
  end
448
817
 
449
- def body_type_mixed
450
- mtype = "MULTIPART"
451
- msubtype = case_insensitive_string
452
- param, disposition, language, extension = body_ext_mpart
453
- return BodyTypeBasic.new(mtype, msubtype, param, nil, nil, nil, nil, nil, disposition, language, extension)
818
+ def resp_cond_state__untagged
819
+ UntaggedResponse.new(*resp_cond_state, @str)
454
820
  end
455
821
 
456
- def body_type_mpart
457
- parts = []
458
- while true
459
- token = lookahead
460
- if token.symbol == T_SPACE
461
- shift_token
462
- break
463
- end
464
- parts.push(body)
465
- end
466
- mtype = "MULTIPART"
467
- msubtype = case_insensitive_string
468
- param, disposition, language, extension = body_ext_mpart
469
- return BodyTypeMultipart.new(mtype, msubtype, parts,
470
- param, disposition, language,
471
- extension)
822
+ # resp-cond-auth = ("OK" / "PREAUTH") SP resp-text
823
+ #
824
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
825
+ # servers), we don't require a final SP and instead parse this as:
826
+ #
827
+ # resp-cond-auth = ("OK" / "PREAUTH") [SP resp-text]
828
+ def resp_cond_auth
829
+ UntaggedResponse.new(resp_cond_auth__name,
830
+ SP? ? resp_text : ResponseText::EMPTY,
831
+ @str)
472
832
  end
473
833
 
474
- def media_type
475
- mtype = case_insensitive_string
476
- token = lookahead
477
- if token.symbol != T_SPACE
478
- return mtype, nil
479
- end
480
- match(T_SPACE)
481
- msubtype = case_insensitive_string
482
- return mtype, msubtype
834
+ # resp-cond-bye = "BYE" SP resp-text
835
+ #
836
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
837
+ # servers), we don't require a final SP and instead parse this as:
838
+ #
839
+ # resp-cond-bye = "BYE" [SP resp-text]
840
+ def resp_cond_bye
841
+ UntaggedResponse.new(label(BYE),
842
+ SP? ? resp_text : ResponseText::EMPTY,
843
+ @str)
483
844
  end
484
845
 
485
- def body_fields
486
- param = body_fld_param
487
- match(T_SPACE)
488
- content_id = nstring
489
- match(T_SPACE)
490
- desc = nstring
491
- match(T_SPACE)
492
- enc = case_insensitive_string
493
- match(T_SPACE)
494
- size = number
495
- return param, content_id, desc, enc, size
846
+ # message-data = nz-number SP ("EXPUNGE" / ("FETCH" SP msg-att))
847
+ def message_data__fetch
848
+ seq = nz_number; SP!
849
+ name = label "FETCH"; SP!
850
+ data = FetchData.new(seq, msg_att(seq))
851
+ UntaggedResponse.new(name, data, @str)
496
852
  end
497
853
 
498
- def body_fld_param
499
- token = lookahead
500
- if token.symbol == T_NIL
501
- shift_token
502
- return nil
503
- end
504
- match(T_LPAR)
505
- param = {}
506
- while true
507
- token = lookahead
508
- case token.symbol
509
- when T_RPAR
510
- shift_token
511
- break
512
- when T_SPACE
513
- shift_token
514
- end
515
- name = case_insensitive_string
516
- match(T_SPACE)
517
- val = string
518
- param[name] = val
519
- end
520
- return param
854
+ # uidfetch-resp = uniqueid SP "UIDFETCH" SP msg-att
855
+ def uidfetch_resp
856
+ uid = uniqueid; SP!
857
+ name = label "UIDFETCH"; SP!
858
+ data = UIDFetchData.new(uid, msg_att(uid))
859
+ UntaggedResponse.new(name, data, @str)
521
860
  end
522
861
 
523
- def body_ext_1part
524
- token = lookahead
525
- if token.symbol == T_SPACE
526
- shift_token
527
- else
528
- return nil
529
- end
530
- md5 = nstring
531
-
532
- token = lookahead
533
- if token.symbol == T_SPACE
534
- shift_token
535
- else
536
- return md5
537
- end
538
- disposition = body_fld_dsp
539
-
540
- token = lookahead
541
- if token.symbol == T_SPACE
542
- shift_token
543
- else
544
- return md5, disposition
545
- end
546
- language = body_fld_lang
862
+ def response_data__simple_numeric
863
+ data = nz_number; SP!
864
+ name = tagged_ext_label
865
+ UntaggedResponse.new(name, data, @str)
866
+ end
547
867
 
548
- token = lookahead
549
- if token.symbol == T_SPACE
550
- shift_token
551
- else
552
- return md5, disposition, language
553
- end
868
+ alias message_data__expunge response_data__simple_numeric
869
+ alias mailbox_data__exists response_data__simple_numeric
870
+ alias mailbox_data__recent response_data__simple_numeric
554
871
 
555
- extension = body_extensions
556
- return md5, disposition, language, extension
872
+ # The name for this is confusing, because it *replaces* EXPUNGE
873
+ # >>>
874
+ # expunged-resp = "VANISHED" [SP "(EARLIER)"] SP known-uids
875
+ def expunged_resp
876
+ name = label "VANISHED"; SP!
877
+ earlier = if lpar? then label("EARLIER"); rpar; SP!; true else false end
878
+ uids = known_uids
879
+ data = VanishedData[uids, earlier]
880
+ UntaggedResponse.new name, data, @str
557
881
  end
558
882
 
559
- def body_ext_mpart
560
- token = lookahead
561
- if token.symbol == T_SPACE
562
- shift_token
563
- else
564
- return nil
565
- end
566
- param = body_fld_param
883
+ # TODO: replace with uid_set
884
+ alias known_uids sequence_set
567
885
 
568
- token = lookahead
569
- if token.symbol == T_SPACE
570
- shift_token
571
- else
572
- return param
886
+ # RFC3501 & RFC9051:
887
+ # msg-att = "(" (msg-att-dynamic / msg-att-static)
888
+ # *(SP (msg-att-dynamic / msg-att-static)) ")"
889
+ #
890
+ # msg-att-dynamic = "FLAGS" SP "(" [flag-fetch *(SP flag-fetch)] ")"
891
+ # RFC5257 (ANNOTATE extension):
892
+ # msg-att-dynamic =/ "ANNOTATION" SP
893
+ # ( "(" entry-att *(SP entry-att) ")" /
894
+ # "(" entry *(SP entry) ")" )
895
+ # RFC7162 (CONDSTORE extension):
896
+ # msg-att-dynamic =/ fetch-mod-resp
897
+ # fetch-mod-resp = "MODSEQ" SP "(" permsg-modsequence ")"
898
+ # RFC8970 (PREVIEW extension):
899
+ # msg-att-dynamic =/ "PREVIEW" SP nstring
900
+ #
901
+ # RFC3501:
902
+ # msg-att-static = "ENVELOPE" SP envelope /
903
+ # "INTERNALDATE" SP date-time /
904
+ # "RFC822" [".HEADER" / ".TEXT"] SP nstring /
905
+ # "RFC822.SIZE" SP number /
906
+ # "BODY" ["STRUCTURE"] SP body /
907
+ # "BODY" section ["<" number ">"] SP nstring /
908
+ # "UID" SP uniqueid
909
+ # RFC3516 (BINARY extension):
910
+ # msg-att-static =/ "BINARY" section-binary SP (nstring / literal8)
911
+ # / "BINARY.SIZE" section-binary SP number
912
+ # RFC8514 (SAVEDATE extension):
913
+ # msg-att-static =/ "SAVEDATE" SP (date-time / nil)
914
+ # RFC8474 (OBJECTID extension):
915
+ # msg-att-static =/ fetch-emailid-resp / fetch-threadid-resp
916
+ # fetch-emailid-resp = "EMAILID" SP "(" objectid ")"
917
+ # fetch-threadid-resp = "THREADID" SP ( "(" objectid ")" / nil )
918
+ # RFC9051:
919
+ # msg-att-static = "ENVELOPE" SP envelope /
920
+ # "INTERNALDATE" SP date-time /
921
+ # "RFC822.SIZE" SP number64 /
922
+ # "BODY" ["STRUCTURE"] SP body /
923
+ # "BODY" section ["<" number ">"] SP nstring /
924
+ # "BINARY" section-binary SP (nstring / literal8) /
925
+ # "BINARY.SIZE" section-binary SP number /
926
+ # "UID" SP uniqueid
927
+ #
928
+ # Re https://www.rfc-editor.org/errata/eid7246, I'm adding "offset" to the
929
+ # official "BINARY" ABNF, like so:
930
+ #
931
+ # msg-att-static =/ "BINARY" section-binary ["<" number ">"] SP
932
+ # (nstring / literal8)
933
+ def msg_att(n)
934
+ lpar
935
+ attr = {}
936
+ while true
937
+ name = msg_att__label; SP!
938
+ val =
939
+ case name
940
+ when "UID" then uniqueid
941
+ when "FLAGS" then flag_list
942
+ when "BODY" then body
943
+ when /\ABODY\[/ni then nstring
944
+ when "BODYSTRUCTURE" then body
945
+ when "ENVELOPE" then envelope
946
+ when "INTERNALDATE" then date_time
947
+ when "RFC822.SIZE" then number64
948
+ when /\ABINARY\[/ni then nstring8 # BINARY, IMAP4rev2
949
+ when /\ABINARY\.SIZE\[/ni then number # BINARY, IMAP4rev2
950
+ when "RFC822" then nstring # not in rev2
951
+ when "RFC822.HEADER" then nstring # not in rev2
952
+ when "RFC822.TEXT" then nstring # not in rev2
953
+ when "MODSEQ" then parens__modseq # CONDSTORE
954
+ when "EMAILID" then parens__objectid # OBJECTID
955
+ when "THREADID" then nparens__objectid # OBJECTID
956
+ when "X-GM-MSGID" then x_gm_id # GMail
957
+ when "X-GM-THRID" then x_gm_id # GMail
958
+ when "X-GM-LABELS" then x_gm_labels # GMail
959
+ else parse_error("unknown attribute `%s' for {%d}", name, n)
960
+ end
961
+ attr[name] = val
962
+ break unless SP?
963
+ break if lookahead_rpar?
573
964
  end
574
- disposition = body_fld_dsp
965
+ rpar
966
+ attr
967
+ end
575
968
 
576
- token = lookahead
577
- if token.symbol == T_SPACE
578
- shift_token
579
- else
580
- return param, disposition
969
+ # appends "[section]" and "<partial>" to the base label
970
+ def msg_att__label
971
+ case (name = tagged_ext_label)
972
+ when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
973
+ # ignoring "[]" fixes https://bugs.ruby-lang.org/issues/5620
974
+ lbra? and rbra
975
+ when "BODY"
976
+ peek_lbra? and name << section and
977
+ peek_str?("<") and name << gt__number__lt # partial
978
+ when "BINARY", "BINARY.SIZE"
979
+ name << section_binary
980
+ # see https://www.rfc-editor.org/errata/eid7246 and the note above
981
+ peek_str?("<") and name << gt__number__lt # partial
581
982
  end
582
- language = body_fld_lang
983
+ name
984
+ end
583
985
 
584
- token = lookahead
585
- if token.symbol == T_SPACE
586
- shift_token
587
- else
588
- return param, disposition, language
589
- end
986
+ # this represents the partial size for BODY or BINARY
987
+ alias gt__number__lt atom
590
988
 
591
- extension = body_extensions
592
- return param, disposition, language, extension
989
+ # RFC3501 & RFC9051:
990
+ # envelope = "(" env-date SP env-subject SP env-from SP
991
+ # env-sender SP env-reply-to SP env-to SP env-cc SP
992
+ # env-bcc SP env-in-reply-to SP env-message-id ")"
993
+ def envelope
994
+ @lex_state = EXPR_DATA
995
+ lpar; date = env_date
996
+ SP!; subject = env_subject
997
+ SP!; from = env_from
998
+ SP!; sender = env_sender
999
+ SP!; reply_to = env_reply_to
1000
+ SP!; to = env_to
1001
+ SP!; cc = env_cc
1002
+ SP!; bcc = env_bcc
1003
+ SP!; in_reply_to = env_in_reply_to
1004
+ SP!; message_id = env_message_id
1005
+ rpar
1006
+ Envelope.new(date, subject, from, sender, reply_to,
1007
+ to, cc, bcc, in_reply_to, message_id)
1008
+ ensure
1009
+ @lex_state = EXPR_BEG
593
1010
  end
594
1011
 
595
- def body_fld_dsp
596
- token = lookahead
597
- if token.symbol == T_NIL
598
- shift_token
599
- return nil
600
- end
601
- match(T_LPAR)
602
- dsp_type = case_insensitive_string
603
- match(T_SPACE)
604
- param = body_fld_param
605
- match(T_RPAR)
606
- return ContentDisposition.new(dsp_type, param)
1012
+ # env-date = nstring
1013
+ # env-subject = nstring
1014
+ # env-in-reply-to = nstring
1015
+ # env-message-id = nstring
1016
+ alias env_date nstring
1017
+ alias env_subject nstring
1018
+ alias env_in_reply_to nstring
1019
+ alias env_message_id nstring
1020
+
1021
+ # env-from = "(" 1*address ")" / nil
1022
+ # env-sender = "(" 1*address ")" / nil
1023
+ # env-reply-to = "(" 1*address ")" / nil
1024
+ # env-to = "(" 1*address ")" / nil
1025
+ # env-cc = "(" 1*address ")" / nil
1026
+ # env-bcc = "(" 1*address ")" / nil
1027
+ def nlist__address
1028
+ return if NIL?
1029
+ lpar; list = [address]; list << address until (quirky_SP?; rpar?)
1030
+ list
1031
+ end
1032
+
1033
+ alias env_from nlist__address
1034
+ alias env_sender nlist__address
1035
+ alias env_reply_to nlist__address
1036
+ alias env_to nlist__address
1037
+ alias env_cc nlist__address
1038
+ alias env_bcc nlist__address
1039
+
1040
+ # Used when servers erroneously send an extra SP.
1041
+ #
1042
+ # As of 2023-11-28, Outlook.com (still) sends SP
1043
+ # between +address+ in <tt>env-*</tt> lists.
1044
+ alias quirky_SP? SP?
1045
+
1046
+ # date-time = DQUOTE date-day-fixed "-" date-month "-" date-year
1047
+ # SP time SP zone DQUOTE
1048
+ alias date_time quoted
1049
+ alias ndatetime nquoted
1050
+
1051
+ # RFC-3501 & RFC-9051:
1052
+ # body = "(" (body-type-1part / body-type-mpart) ")"
1053
+ def body
1054
+ @lex_state = EXPR_DATA
1055
+ lpar; result = peek_lpar? ? body_type_mpart : body_type_1part; rpar
1056
+ result
1057
+ ensure
1058
+ @lex_state = EXPR_BEG
607
1059
  end
1060
+ alias lookahead_body? lookahead_lpar?
608
1061
 
609
- def body_fld_lang
610
- token = lookahead
611
- if token.symbol == T_LPAR
612
- shift_token
613
- result = []
614
- while true
615
- token = lookahead
616
- case token.symbol
617
- when T_RPAR
618
- shift_token
619
- return result
620
- when T_SPACE
621
- shift_token
622
- end
623
- result.push(case_insensitive_string)
624
- end
625
- else
626
- lang = nstring
627
- if lang
628
- return lang.upcase
629
- else
630
- return lang
631
- end
632
- end
1062
+ # RFC-3501 & RFC9051:
1063
+ # body-type-1part = (body-type-basic / body-type-msg / body-type-text)
1064
+ # [SP body-ext-1part]
1065
+ def body_type_1part
1066
+ # This regexp peek is a performance optimization.
1067
+ # The lookahead fallback would work fine too.
1068
+ m = peek_re(/\G(?:
1069
+ (?<TEXT> "TEXT" \s "[^"]+" )
1070
+ |(?<MESSAGE> "MESSAGE" \s "(?:RFC822|GLOBAL)" )
1071
+ |(?<BASIC> "[^"]+" \s "[^"]+" )
1072
+ |(?<MIXED> "MIXED" )
1073
+ )/nix)
1074
+ choice = m&.named_captures&.compact&.keys&.first
1075
+ # In practice, the following line should never be used. But the ABNF
1076
+ # *does* allow literals, and this will handle them.
1077
+ choice ||= lookahead_case_insensitive__string!
1078
+ case choice
1079
+ when "BASIC" then body_type_basic # => BodyTypeBasic
1080
+ when "MESSAGE" then body_type_msg # => BodyTypeMessage | BodyTypeBasic
1081
+ when "TEXT" then body_type_text # => BodyTypeText
1082
+ when "MIXED" then body_type_mixed # => BodyTypeMultipart (server bug)
1083
+ else body_type_basic # might be a bug; server's or ours?
1084
+ end
1085
+ end
1086
+
1087
+ # RFC-3501 & RFC9051:
1088
+ # body-type-basic = media-basic SP body-fields
1089
+ def body_type_basic
1090
+ type = media_basic # n.b. "basic" type isn't enforced here
1091
+ if lookahead_rpar? then return BodyTypeBasic.new(*type) end # invalid
1092
+ SP!; flds = body_fields
1093
+ SP? and exts = body_ext_1part
1094
+ BodyTypeBasic.new(*type, *flds, *exts)
633
1095
  end
634
1096
 
635
- def body_extensions
636
- result = []
637
- while true
638
- token = lookahead
639
- case token.symbol
640
- when T_RPAR
641
- return result
642
- when T_SPACE
643
- shift_token
644
- end
645
- result.push(body_extension)
646
- end
1097
+ # RFC-3501 & RFC-9051:
1098
+ # body-type-text = media-text SP body-fields SP body-fld-lines
1099
+ def body_type_text
1100
+ type = media_text
1101
+ SP!; flds = body_fields
1102
+ SP!; lines = body_fld_lines
1103
+ SP? and exts = body_ext_1part
1104
+ BodyTypeText.new(*type, *flds, lines, *exts)
647
1105
  end
648
1106
 
649
- def body_extension
650
- token = lookahead
651
- case token.symbol
652
- when T_LPAR
653
- shift_token
654
- result = body_extensions
655
- match(T_RPAR)
656
- return result
657
- when T_NUMBER
658
- return number
659
- else
660
- return nstring
661
- end
1107
+ # RFC-3501 & RFC-9051:
1108
+ # body-type-msg = media-message SP body-fields SP envelope
1109
+ # SP body SP body-fld-lines
1110
+ def body_type_msg
1111
+ # n.b. "message/rfc822" type isn't enforced here
1112
+ type = media_message
1113
+ SP!; flds = body_fields
1114
+
1115
+ # Sometimes servers send body-type-basic when body-type-msg should be.
1116
+ # E.g: when a message/rfc822 part has "Content-Disposition: attachment".
1117
+ #
1118
+ # * SP "(" --> SP envelope --> continue as body-type-msg
1119
+ # * ")" --> no body-ext-1part --> completed body-type-basic
1120
+ # * SP nstring --> SP body-fld-md5
1121
+ # --> SP body-ext-1part --> continue as body-type-basic
1122
+ #
1123
+ # It's probably better to return BodyTypeBasic---even for
1124
+ # "message/rfc822"---than BodyTypeMessage with invalid fields.
1125
+ unless peek_str?(" (")
1126
+ SP? and exts = body_ext_1part
1127
+ return BodyTypeBasic.new(*type, *flds, *exts)
1128
+ end
1129
+
1130
+ SP!; env = envelope
1131
+ SP!; bdy = body
1132
+ SP!; lines = body_fld_lines
1133
+ SP? and exts = body_ext_1part
1134
+ BodyTypeMessage.new(*type, *flds, env, bdy, lines, *exts)
1135
+ end
1136
+
1137
+ # This is a malformed body-type-mpart with no subparts.
1138
+ def body_type_mixed
1139
+ # warn "malformed body-type-mpart: multipart/mixed with no parts."
1140
+ type = media_subtype # => "MIXED"
1141
+ SP? and exts = body_ext_mpart
1142
+ BodyTypeMultipart.new("MULTIPART", type, nil, *exts)
662
1143
  end
663
1144
 
664
- def section
665
- str = String.new
666
- token = match(T_LBRA)
667
- str.concat(token.value)
668
- token = match(T_ATOM, T_NUMBER, T_RBRA)
669
- if token.symbol == T_RBRA
670
- str.concat(token.value)
671
- return str
672
- end
673
- str.concat(token.value)
674
- token = lookahead
675
- if token.symbol == T_SPACE
676
- shift_token
677
- str.concat(token.value)
678
- token = match(T_LPAR)
679
- str.concat(token.value)
680
- while true
681
- token = lookahead
682
- case token.symbol
683
- when T_RPAR
684
- str.concat(token.value)
685
- shift_token
686
- break
687
- when T_SPACE
688
- shift_token
689
- str.concat(token.value)
690
- end
691
- str.concat(format_string(astring))
692
- end
693
- end
694
- token = match(T_RBRA)
695
- str.concat(token.value)
696
- return str
697
- end
698
-
699
- def format_string(str)
700
- case str
701
- when ""
702
- return '""'
703
- when /[\x80-\xff\r\n]/n
704
- # literal
705
- return "{" + str.bytesize.to_s + "}" + CRLF + str
706
- when /[(){ \x00-\x1f\x7f%*"\\]/n
707
- # quoted string
708
- return '"' + str.gsub(/["\\]/n, "\\\\\\&") + '"'
709
- else
710
- # atom
711
- return str
712
- end
1145
+ # RFC-3501 & RFC-9051:
1146
+ # body-type-mpart = 1*body SP media-subtype
1147
+ # [SP body-ext-mpart]
1148
+ def body_type_mpart
1149
+ parts = [body]; parts << body until SP?; msubtype = media_subtype
1150
+ SP? and exts = body_ext_mpart
1151
+ BodyTypeMultipart.new("MULTIPART", msubtype, parts, *exts)
713
1152
  end
714
1153
 
715
- def uid_data
716
- token = match(T_ATOM)
717
- name = token.value.upcase
718
- match(T_SPACE)
719
- return name, number
1154
+ # n.b. this handles both type and subtype
1155
+ #
1156
+ # RFC-3501 vs RFC-9051:
1157
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
1158
+ # "MESSAGE" /
1159
+ # "VIDEO") DQUOTE) / string) SP media-subtype
1160
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
1161
+ # "FONT" / "MESSAGE" / "MODEL" /
1162
+ # "VIDEO") DQUOTE) / string) SP media-subtype
1163
+ #
1164
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
1165
+ # DQUOTE "RFC822" DQUOTE
1166
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
1167
+ # DQUOTE ("RFC822" / "GLOBAL") DQUOTE
1168
+ #
1169
+ # RFC-3501 & RFC-9051:
1170
+ # media-text = DQUOTE "TEXT" DQUOTE SP media-subtype
1171
+ # media-subtype = string
1172
+ def media_type
1173
+ mtype = case_insensitive__string
1174
+ SP? or return mtype, nil # ??? quirky!
1175
+ msubtype = media_subtype
1176
+ return mtype, msubtype
720
1177
  end
721
1178
 
722
- def modseq_data
723
- token = match(T_ATOM)
724
- name = token.value.upcase
725
- match(T_SPACE)
726
- match(T_LPAR)
727
- modseq = number
728
- match(T_RPAR)
729
- return name, modseq
1179
+ # TODO: check types
1180
+ alias media_basic media_type # */* --- catchall
1181
+ alias media_message media_type # message/rfc822, message/global
1182
+ alias media_text media_type # text/*
1183
+
1184
+ alias media_subtype case_insensitive__string
1185
+
1186
+ # RFC-3501 & RFC-9051:
1187
+ # body-fields = body-fld-param SP body-fld-id SP body-fld-desc SP
1188
+ # body-fld-enc SP body-fld-octets
1189
+ def body_fields
1190
+ fields = []
1191
+ fields << body_fld_param; SP!
1192
+ fields << body_fld_id; SP!
1193
+ fields << body_fld_desc; SP!
1194
+ fields << body_fld_enc; SP!
1195
+ fields << body_fld_octets
1196
+ fields
730
1197
  end
731
1198
 
732
- def ignored_response
733
- while lookahead.symbol != T_CRLF
734
- shift_token
735
- end
736
- return IgnoredResponse.new(@str)
1199
+ # RFC3501, RFC9051:
1200
+ # body-fld-param = "(" string SP string *(SP string SP string) ")" / nil
1201
+ def body_fld_param
1202
+ quirky_SP? # See comments on test_bodystructure_extra_space
1203
+ return if NIL?
1204
+ param = {}
1205
+ lpar
1206
+ name = case_insensitive__string; SP!; param[name] = string
1207
+ while SP?
1208
+ name = case_insensitive__string; SP!; param[name] = string
1209
+ end
1210
+ rpar
1211
+ param
1212
+ end
1213
+
1214
+ # RFC2060
1215
+ # body_ext_1part ::= body_fld_md5 [SPACE body_fld_dsp
1216
+ # [SPACE body_fld_lang
1217
+ # [SPACE 1#body_extension]]]
1218
+ # ;; MUST NOT be returned on non-extensible
1219
+ # ;; "BODY" fetch
1220
+ # RFC3501 & RFC9051
1221
+ # body-ext-1part = body-fld-md5 [SP body-fld-dsp [SP body-fld-lang
1222
+ # [SP body-fld-loc *(SP body-extension)]]]
1223
+ # ; MUST NOT be returned on non-extensible
1224
+ # ; "BODY" fetch
1225
+ def body_ext_1part
1226
+ fields = []; fields << body_fld_md5
1227
+ SP? or return fields; fields << body_fld_dsp
1228
+ SP? or return fields; fields << body_fld_lang
1229
+ SP? or return fields; fields << body_fld_loc
1230
+ SP? or return fields; fields << body_extensions
1231
+ fields
1232
+ end
1233
+
1234
+ # RFC-2060:
1235
+ # body_ext_mpart = body_fld_param [SP body_fld_dsp SP body_fld_lang
1236
+ # [SP 1#body_extension]]
1237
+ # ;; MUST NOT be returned on non-extensible
1238
+ # ;; "BODY" fetch
1239
+ # RFC-3501 & RFC-9051:
1240
+ # body-ext-mpart = body-fld-param [SP body-fld-dsp [SP body-fld-lang
1241
+ # [SP body-fld-loc *(SP body-extension)]]]
1242
+ # ; MUST NOT be returned on non-extensible
1243
+ # ; "BODY" fetch
1244
+ def body_ext_mpart
1245
+ fields = []; fields << body_fld_param
1246
+ SP? or return fields; fields << body_fld_dsp
1247
+ SP? or return fields; fields << body_fld_lang
1248
+ SP? or return fields; fields << body_fld_loc
1249
+ SP? or return fields; fields << body_extensions
1250
+ fields
1251
+ end
1252
+
1253
+ alias body_fld_desc nstring
1254
+ alias body_fld_id nstring
1255
+ alias body_fld_loc nstring
1256
+ alias body_fld_lines number64 # number in 3501, number64 in 9051
1257
+ alias body_fld_md5 nstring
1258
+ alias body_fld_octets number
1259
+
1260
+ # RFC-3501 & RFC-9051:
1261
+ # body-fld-enc = (DQUOTE ("7BIT" / "8BIT" / "BINARY" / "BASE64"/
1262
+ # "QUOTED-PRINTABLE") DQUOTE) / string
1263
+ alias body_fld_enc case_insensitive__string
1264
+
1265
+ # body-fld-dsp = "(" string SP body-fld-param ")" / nil
1266
+ def body_fld_dsp
1267
+ return if NIL?
1268
+ lpar; dsp_type = case_insensitive__string
1269
+ SP!; param = body_fld_param
1270
+ rpar
1271
+ ContentDisposition.new(dsp_type, param)
737
1272
  end
738
1273
 
739
- def text_response
740
- token = match(T_ATOM)
741
- name = token.value.upcase
742
- match(T_SPACE)
743
- return UntaggedResponse.new(name, text)
1274
+ # body-fld-lang = nstring / "(" string *(SP string) ")"
1275
+ def body_fld_lang
1276
+ if lpar?
1277
+ result = [case_insensitive__string]
1278
+ result << case_insensitive__string while SP?
1279
+ rpar
1280
+ result
1281
+ else
1282
+ case_insensitive__nstring
1283
+ end
744
1284
  end
745
1285
 
746
- def flags_response
747
- token = match(T_ATOM)
748
- name = token.value.upcase
749
- match(T_SPACE)
750
- return UntaggedResponse.new(name, flag_list, @str)
1286
+ # body-extension *(SP body-extension)
1287
+ def body_extensions
1288
+ result = []
1289
+ result << body_extension; while SP? do result << body_extension end
1290
+ result
751
1291
  end
752
1292
 
753
- def list_response
754
- token = match(T_ATOM)
755
- name = token.value.upcase
756
- match(T_SPACE)
757
- return UntaggedResponse.new(name, mailbox_list, @str)
1293
+ # body-extension = nstring / number / number64 /
1294
+ # "(" body-extension *(SP body-extension) ")"
1295
+ # ; Future expansion. Client implementations
1296
+ # ; MUST accept body-extension fields. Server
1297
+ # ; implementations MUST NOT generate
1298
+ # ; body-extension fields except as defined by
1299
+ # ; future Standard or Standards Track
1300
+ # ; revisions of this specification.
1301
+ def body_extension
1302
+ if (uint = number64?) then uint
1303
+ elsif lpar? then exts = body_extensions; rpar; exts
1304
+ else nstring
1305
+ end
758
1306
  end
759
1307
 
1308
+ # section = "[" [section-spec] "]"
1309
+ def section
1310
+ str = +lbra
1311
+ str << section_spec unless peek_rbra?
1312
+ str << rbra
1313
+ end
1314
+
1315
+ # section-binary = "[" [section-part] "]"
1316
+ def section_binary
1317
+ str = +lbra
1318
+ str << section_part unless peek_rbra?
1319
+ str << rbra
1320
+ end
1321
+
1322
+ # section-spec = section-msgtext / (section-part ["." section-text])
1323
+ # section-msgtext = "HEADER" /
1324
+ # "HEADER.FIELDS" [".NOT"] SP header-list /
1325
+ # "TEXT"
1326
+ # ; top-level or MESSAGE/RFC822 or
1327
+ # ; MESSAGE/GLOBAL part
1328
+ # section-part = nz-number *("." nz-number)
1329
+ # ; body part reference.
1330
+ # ; Allows for accessing nested body parts.
1331
+ # section-text = section-msgtext / "MIME"
1332
+ # ; text other than actual body part (headers,
1333
+ # ; etc.)
1334
+ #
1335
+ # n.b: we could "cheat" here and just grab all text inside the brackets,
1336
+ # but literals would need special treatment.
1337
+ def section_spec
1338
+ str = "".b
1339
+ str << atom # grabs everything up to "SP header-list" or "]"
1340
+ str << " " << header_list if SP?
1341
+ str
1342
+ end
1343
+
1344
+ # header-list = "(" header-fld-name *(SP header-fld-name) ")"
1345
+ def header_list
1346
+ str = +""
1347
+ str << lpar << header_fld_name
1348
+ str << " " << header_fld_name while SP?
1349
+ str << rpar
1350
+ end
1351
+
1352
+ # section-part = nz-number *("." nz-number)
1353
+ # ; body part reference.
1354
+ # ; Allows for accessing nested body parts.
1355
+ alias section_part atom
1356
+
1357
+ # RFC3501 & RFC9051:
1358
+ # header-fld-name = astring
1359
+ #
1360
+ # NOTE: Previously, Net::IMAP recreated the raw original source string.
1361
+ # Now, it returns the decoded astring value. Although this is technically
1362
+ # incompatible, it should almost never make a difference: all standard
1363
+ # header field names are valid atoms:
1364
+ #
1365
+ # https://www.iana.org/assignments/message-headers/message-headers.xhtml
1366
+ #
1367
+ # See also RFC5233:
1368
+ # optional-field = field-name ":" unstructured CRLF
1369
+ # field-name = 1*ftext
1370
+ # ftext = %d33-57 / ; Printable US-ASCII
1371
+ # %d59-126 ; characters not including
1372
+ # ; ":".
1373
+ alias header_fld_name astring
1374
+
1375
+ # mailbox-data = "FLAGS" SP flag-list / "LIST" SP mailbox-list /
1376
+ # "LSUB" SP mailbox-list / "SEARCH" *(SP nz-number) /
1377
+ # "STATUS" SP mailbox SP "(" [status-att-list] ")" /
1378
+ # number SP "EXISTS" / number SP "RECENT"
1379
+
1380
+ def mailbox_data__flags
1381
+ name = label("FLAGS")
1382
+ SP!
1383
+ UntaggedResponse.new(name, flag_list, @str)
1384
+ end
1385
+
1386
+ def mailbox_data__list
1387
+ name = label_in("LIST", "LSUB", "XLIST")
1388
+ SP!
1389
+ UntaggedResponse.new(name, mailbox_list, @str)
1390
+ end
1391
+ alias mailbox_data__lsub mailbox_data__list
1392
+ alias mailbox_data__xlist mailbox_data__list
1393
+
1394
+ # mailbox-list = "(" [mbx-list-flags] ")" SP
1395
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil) SP mailbox
1396
+ # [SP mbox-list-extended]
1397
+ # ; This is the list information pointed to by the ABNF
1398
+ # ; item "mailbox-data", which is defined above
760
1399
  def mailbox_list
761
- attr = flag_list
762
- match(T_SPACE)
763
- token = match(T_QUOTED, T_NIL)
764
- if token.symbol == T_NIL
765
- delim = nil
766
- else
767
- delim = token.value
768
- end
769
- match(T_SPACE)
770
- name = astring
771
- return MailboxList.new(attr, delim, name)
1400
+ lpar; attr = peek_rpar? ? [] : mbx_list_flags; rpar
1401
+ SP!; delim = nquoted
1402
+ SP!; name = mailbox
1403
+ # TODO: mbox-list-extended
1404
+ MailboxList.new(attr, delim, name)
772
1405
  end
773
1406
 
774
- def getquota_response
1407
+ def quota_response
775
1408
  # If quota never established, get back
776
1409
  # `NO Quota root does not exist'.
777
1410
  # If quota removed, get `()' after the
@@ -804,7 +1437,7 @@ module Net
804
1437
  end
805
1438
  end
806
1439
 
807
- def getquotaroot_response
1440
+ def quotaroot_response
808
1441
  # Similar to getquota, but only admin can use getquota.
809
1442
  token = match(T_ATOM)
810
1443
  name = token.value.upcase
@@ -821,7 +1454,8 @@ module Net
821
1454
  return UntaggedResponse.new(name, data, @str)
822
1455
  end
823
1456
 
824
- def getacl_response
1457
+ # acl-data = "ACL" SP mailbox *(SP identifier SP rights)
1458
+ def acl_data
825
1459
  token = match(T_ATOM)
826
1460
  name = token.value.upcase
827
1461
  match(T_SPACE)
@@ -839,157 +1473,309 @@ module Net
839
1473
  shift_token
840
1474
  end
841
1475
  user = astring
842
- match(T_SPACE)
843
- rights = astring
844
- data.push(MailboxACLItem.new(user, rights, mailbox))
845
- end
846
- end
847
- return UntaggedResponse.new(name, data, @str)
848
- end
849
-
850
- def search_response
851
- token = match(T_ATOM)
852
- name = token.value.upcase
853
- token = lookahead
854
- if token.symbol == T_SPACE
855
- shift_token
856
- data = []
857
- while true
858
- token = lookahead
859
- case token.symbol
860
- when T_CRLF
861
- break
862
- when T_SPACE
863
- shift_token
864
- when T_NUMBER
865
- data.push(number)
866
- when T_LPAR
867
- # TODO: include the MODSEQ value in a response
868
- shift_token
869
- match(T_ATOM)
870
- match(T_SPACE)
871
- match(T_NUMBER)
872
- match(T_RPAR)
873
- end
874
- end
875
- else
876
- data = []
877
- end
878
- return UntaggedResponse.new(name, data, @str)
879
- end
880
-
881
- def thread_response
882
- token = match(T_ATOM)
883
- name = token.value.upcase
884
- token = lookahead
885
-
886
- if token.symbol == T_SPACE
887
- threads = []
888
-
889
- while true
890
- shift_token
891
- token = lookahead
892
-
893
- case token.symbol
894
- when T_LPAR
895
- threads << thread_branch(token)
896
- when T_CRLF
897
- break
898
- end
899
- end
900
- else
901
- # no member
902
- threads = []
903
- end
904
-
905
- return UntaggedResponse.new(name, threads, @str)
906
- end
907
-
908
- def thread_branch(token)
909
- rootmember = nil
910
- lastmember = nil
911
-
912
- while true
913
- shift_token # ignore first T_LPAR
914
- token = lookahead
915
-
916
- case token.symbol
917
- when T_NUMBER
918
- # new member
919
- newmember = ThreadMember.new(number, [])
920
- if rootmember.nil?
921
- rootmember = newmember
922
- else
923
- lastmember.children << newmember
924
- end
925
- lastmember = newmember
926
- when T_SPACE
927
- # do nothing
928
- when T_LPAR
929
- if rootmember.nil?
930
- # dummy member
931
- lastmember = rootmember = ThreadMember.new(nil, [])
932
- end
933
-
934
- lastmember.children << thread_branch(token)
935
- when T_RPAR
936
- break
1476
+ match(T_SPACE)
1477
+ rights = astring
1478
+ data.push(MailboxACLItem.new(user, rights, mailbox))
937
1479
  end
938
1480
  end
939
-
940
- return rootmember
1481
+ return UntaggedResponse.new(name, data, @str)
941
1482
  end
942
1483
 
943
- def status_response
944
- token = match(T_ATOM)
945
- name = token.value.upcase
946
- match(T_SPACE)
947
- mailbox = astring
948
- match(T_SPACE)
949
- match(T_LPAR)
950
- attr = {}
951
- while true
952
- token = lookahead
953
- case token.symbol
954
- when T_RPAR
955
- shift_token
956
- break
957
- when T_SPACE
958
- shift_token
1484
+ # RFC3501:
1485
+ # mailbox-data = "SEARCH" *(SP nz-number) / ...
1486
+ # RFC5256: SORT
1487
+ # sort-data = "SORT" *(SP nz-number)
1488
+ # RFC7162: CONDSTORE, QRESYNC
1489
+ # mailbox-data =/ "SEARCH" [1*(SP nz-number) SP
1490
+ # search-sort-mod-seq]
1491
+ # sort-data = "SORT" [1*(SP nz-number) SP
1492
+ # search-sort-mod-seq]
1493
+ # ; Updates the SORT response from RFC 5256.
1494
+ # search-sort-mod-seq = "(" "MODSEQ" SP mod-sequence-value ")"
1495
+ # RFC9051:
1496
+ # mailbox-data = obsolete-search-response / ...
1497
+ # obsolete-search-response = "SEARCH" *(SP nz-number)
1498
+ def mailbox_data__search
1499
+ name = label_in("SEARCH", "SORT")
1500
+ data = []
1501
+ while _ = SP? && nz_number? do data << _ end
1502
+ if lpar?
1503
+ label("MODSEQ"); SP!
1504
+ modseq = mod_sequence_value
1505
+ rpar
1506
+ end
1507
+ data = SearchResult.new(data, modseq: modseq)
1508
+ UntaggedResponse.new(name, data, @str)
1509
+ end
1510
+ alias sort_data mailbox_data__search
1511
+
1512
+ # esearch-response = "ESEARCH" [search-correlator] [SP "UID"]
1513
+ # *(SP search-return-data)
1514
+ # ;; Note that SEARCH and ESEARCH responses
1515
+ # ;; SHOULD be mutually exclusive,
1516
+ # ;; i.e., only one of the response types
1517
+ # ;; should be
1518
+ # ;; returned as a result of a command.
1519
+ # esearch-response = "ESEARCH" [search-correlator] [SP "UID"]
1520
+ # *(SP search-return-data)
1521
+ # ; ESEARCH response replaces SEARCH response
1522
+ # ; from IMAP4rev1.
1523
+ # search-correlator = SP "(" "TAG" SP tag-string ")"
1524
+ def esearch_response
1525
+ name = label("ESEARCH")
1526
+ tag = search_correlator if peek_str?(" (")
1527
+ uid = peek_re?(/\G UID\b/i) && (SP!; label("UID"); true)
1528
+ data = []
1529
+ data << search_return_data while SP?
1530
+ esearch = ESearchResult.new(tag, uid, data)
1531
+ UntaggedResponse.new(name, esearch, @str)
1532
+ end
1533
+
1534
+ # From RFC4731 (ESEARCH):
1535
+ # search-return-data = "MIN" SP nz-number /
1536
+ # "MAX" SP nz-number /
1537
+ # "ALL" SP sequence-set /
1538
+ # "COUNT" SP number /
1539
+ # search-ret-data-ext
1540
+ # ; All return data items conform to
1541
+ # ; search-ret-data-ext syntax.
1542
+ # search-ret-data-ext = search-modifier-name SP search-return-value
1543
+ # search-modifier-name = tagged-ext-label
1544
+ # search-return-value = tagged-ext-val
1545
+ #
1546
+ # From RFC4731 (ESEARCH):
1547
+ # search-return-data =/ "MODSEQ" SP mod-sequence-value
1548
+ #
1549
+ # From RFC9394 (PARTIAL):
1550
+ # search-return-data =/ ret-data-partial
1551
+ #
1552
+ def search_return_data
1553
+ label = search_modifier_name; SP!
1554
+ value =
1555
+ case label
1556
+ when "MIN" then nz_number
1557
+ when "MAX" then nz_number
1558
+ when "ALL" then sequence_set
1559
+ when "COUNT" then number
1560
+ when "MODSEQ" then mod_sequence_value # RFC7162: CONDSTORE
1561
+ when "PARTIAL" then ret_data_partial__value # RFC9394: PARTIAL
1562
+ else search_return_value
1563
+ end
1564
+ [label, value]
1565
+ end
1566
+
1567
+ # From RFC5267 (CONTEXT=SEARCH, CONTEXT=SORT) and RFC9394 (PARTIAL):
1568
+ # ret-data-partial = "PARTIAL"
1569
+ # SP "(" partial-range SP partial-results ")"
1570
+ def ret_data_partial__value
1571
+ lpar
1572
+ range = partial_range; SP!
1573
+ results = partial_results
1574
+ rpar
1575
+ ESearchResult::PartialResult.new(range, results)
1576
+ end
1577
+
1578
+ # partial-range = partial-range-first / partial-range-last
1579
+ # tagged-ext-simple =/ partial-range-last
1580
+ def partial_range
1581
+ case (str = atom)
1582
+ when Patterns::PARTIAL_RANGE_FIRST, Patterns::PARTIAL_RANGE_LAST
1583
+ min, max = [Integer($1), Integer($2)].minmax
1584
+ min..max
1585
+ else
1586
+ parse_error("unexpected atom %p, expected partial-range", str)
1587
+ end
1588
+ end
1589
+
1590
+ # partial-results = sequence-set / "NIL"
1591
+ # ;; <sequence-set> from [RFC3501].
1592
+ # ;; NIL indicates that no results correspond to
1593
+ # ;; the requested range.
1594
+ def partial_results; NIL? ? nil : sequence_set end
1595
+
1596
+ # search-modifier-name = tagged-ext-label
1597
+ alias search_modifier_name tagged_ext_label
1598
+
1599
+ # search-return-value = tagged-ext-val
1600
+ # ; Data for the returned search option.
1601
+ # ; A single "nz-number"/"number"/"number64" value
1602
+ # ; can be returned as an atom (i.e., without
1603
+ # ; quoting). A sequence-set can be returned
1604
+ # ; as an atom as well.
1605
+ def search_return_value; ExtensionData.new(tagged_ext_val) end
1606
+
1607
+ # search-correlator = SP "(" "TAG" SP tag-string ")"
1608
+ def search_correlator
1609
+ SP!; lpar; label("TAG"); SP!; tag = tag_string; rpar
1610
+ tag
1611
+ end
1612
+
1613
+ # tag-string = astring
1614
+ # ; <tag> represented as <astring>
1615
+ alias tag_string astring
1616
+
1617
+ # RFC5256: THREAD
1618
+ # thread-data = "THREAD" [SP 1*thread-list]
1619
+ def thread_data
1620
+ name = label("THREAD")
1621
+ threads = []
1622
+ if SP?
1623
+ threads << thread_list while lookahead_thread_list?
1624
+ end
1625
+ UntaggedResponse.new(name, threads, @str)
1626
+ end
1627
+
1628
+ alias lookahead_thread_list? lookahead_lpar?
1629
+ alias lookahead_thread_nested? lookahead_thread_list?
1630
+
1631
+ # RFC5256: THREAD
1632
+ # thread-list = "(" (thread-members / thread-nested) ")"
1633
+ def thread_list
1634
+ lpar
1635
+ thread = if lookahead_thread_nested?
1636
+ ThreadMember.new(nil, thread_nested)
1637
+ else
1638
+ thread_members
1639
+ end
1640
+ rpar
1641
+ thread
1642
+ end
1643
+
1644
+ # RFC5256: THREAD
1645
+ # thread-members = nz-number *(SP nz-number) [SP thread-nested]
1646
+ def thread_members
1647
+ members = []
1648
+ members << nz_number # thread root
1649
+ while SP?
1650
+ case lookahead!(T_NUMBER, T_LPAR).symbol
1651
+ when T_NUMBER then members << nz_number
1652
+ else nested = thread_nested; break
959
1653
  end
960
- token = match(T_ATOM)
961
- key = token.value.upcase
962
- match(T_SPACE)
963
- val = number
964
- attr[key] = val
965
1654
  end
966
- data = StatusData.new(mailbox, attr)
967
- return UntaggedResponse.new(name, data, @str)
1655
+ members.reverse.inject(nested || []) {|subthreads, number|
1656
+ [ThreadMember.new(number, subthreads)]
1657
+ }.first
1658
+ end
1659
+
1660
+ # RFC5256: THREAD
1661
+ # thread-nested = 2*thread-list
1662
+ def thread_nested
1663
+ nested = [thread_list, thread_list]
1664
+ while lookahead_thread_list? do nested << thread_list end
1665
+ nested
1666
+ end
1667
+
1668
+ # mailbox-data =/ "STATUS" SP mailbox SP "(" [status-att-list] ")"
1669
+ def mailbox_data__status
1670
+ resp_name = label("STATUS"); SP!
1671
+ mbox_name = mailbox; SP!
1672
+ lpar; attr = status_att_list; rpar
1673
+ UntaggedResponse.new(resp_name, StatusData.new(mbox_name, attr), @str)
1674
+ end
1675
+
1676
+ # RFC3501
1677
+ # status-att-list = status-att SP number *(SP status-att SP number)
1678
+ # RFC4466, RFC9051, and RFC3501 Errata
1679
+ # status-att-list = status-att-val *(SP status-att-val)
1680
+ def status_att_list
1681
+ attrs = [status_att_val]
1682
+ while SP? do attrs << status_att_val end
1683
+ attrs.to_h
1684
+ end
1685
+
1686
+ # RFC3501 Errata:
1687
+ # status-att-val = ("MESSAGES" SP number) / ("RECENT" SP number) /
1688
+ # ("UIDNEXT" SP nz-number) / ("UIDVALIDITY" SP nz-number) /
1689
+ # ("UNSEEN" SP number)
1690
+ # RFC4466:
1691
+ # status-att-val = ("MESSAGES" SP number) /
1692
+ # ("RECENT" SP number) /
1693
+ # ("UIDNEXT" SP nz-number) /
1694
+ # ("UIDVALIDITY" SP nz-number) /
1695
+ # ("UNSEEN" SP number)
1696
+ # ;; Extensions to the STATUS responses
1697
+ # ;; should extend this production.
1698
+ # ;; Extensions should use the generic
1699
+ # ;; syntax defined by tagged-ext.
1700
+ # RFC9051:
1701
+ # status-att-val = ("MESSAGES" SP number) /
1702
+ # ("UIDNEXT" SP nz-number) /
1703
+ # ("UIDVALIDITY" SP nz-number) /
1704
+ # ("UNSEEN" SP number) /
1705
+ # ("DELETED" SP number) /
1706
+ # ("SIZE" SP number64)
1707
+ # ; Extensions to the STATUS responses
1708
+ # ; should extend this production.
1709
+ # ; Extensions should use the generic
1710
+ # ; syntax defined by tagged-ext.
1711
+ # RFC7162:
1712
+ # status-att-val =/ "HIGHESTMODSEQ" SP mod-sequence-valzer
1713
+ # ;; Extends non-terminal defined in [RFC4466].
1714
+ # ;; Value 0 denotes that the mailbox doesn't
1715
+ # ;; support persistent mod-sequences
1716
+ # ;; as described in Section 3.1.2.2.
1717
+ # RFC7889:
1718
+ # status-att-val =/ "APPENDLIMIT" SP (number / nil)
1719
+ # ;; status-att-val is defined in RFC 4466
1720
+ # RFC8438:
1721
+ # status-att-val =/ "SIZE" SP number64
1722
+ # RFC8474:
1723
+ # status-att-val =/ "MAILBOXID" SP "(" objectid ")"
1724
+ # ; follows tagged-ext production from [RFC4466]
1725
+ def status_att_val
1726
+ key = tagged_ext_label
1727
+ SP!
1728
+ val =
1729
+ case key
1730
+ when "MESSAGES" then number # RFC3501, RFC9051
1731
+ when "UNSEEN" then number # RFC3501, RFC9051
1732
+ when "DELETED" then number # RFC3501, RFC9051
1733
+ when "UIDNEXT" then nz_number # RFC3501, RFC9051
1734
+ when "UIDVALIDITY" then nz_number # RFC3501, RFC9051
1735
+ when "RECENT" then number # RFC3501 (obsolete)
1736
+ when "SIZE" then number64 # RFC8483, RFC9051
1737
+ when "HIGHESTMODSEQ" then mod_sequence_valzer # RFC7162
1738
+ when "MAILBOXID" then parens__objectid # RFC8474
1739
+ else
1740
+ number? || ExtensionData.new(tagged_ext_val)
1741
+ end
1742
+ [key, val]
968
1743
  end
969
1744
 
970
- def capability_response
971
- token = match(T_ATOM)
972
- name = token.value.upcase
973
- match(T_SPACE)
974
- UntaggedResponse.new(name, capability_data, @str)
1745
+ # The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.
1746
+ # The grammar rule is used by both response-data and resp-text-code.
1747
+ # But this method only returns UntaggedResponse (response-data).
1748
+ #
1749
+ # RFC3501:
1750
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1751
+ # *(SP capability)
1752
+ # RFC9051:
1753
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1754
+ # *(SP capability)
1755
+ def capability_data__untagged
1756
+ UntaggedResponse.new label("CAPABILITY"), capability__list, @str
975
1757
  end
976
1758
 
977
- def capability_data
978
- data = []
979
- while true
980
- token = lookahead
981
- case token.symbol
982
- when T_CRLF, T_RBRA
983
- break
984
- when T_SPACE
985
- shift_token
986
- next
987
- end
988
- data.push(atom.upcase)
989
- end
990
- data
1759
+ # enable-data = "ENABLED" *(SP capability)
1760
+ def enable_data
1761
+ UntaggedResponse.new label("ENABLED"), capability__list, @str
1762
+ end
1763
+
1764
+ # As a workaround for buggy servers, allow a trailing SP:
1765
+ # *(SP capability) [SP]
1766
+ def capability__list
1767
+ list = []; while SP? && (capa = capability?) do list << capa end; list
991
1768
  end
992
1769
 
1770
+ alias resp_code__capability capability__list
1771
+
1772
+ # capability = ("AUTH=" auth-type) / atom
1773
+ # ; New capabilities MUST begin with "X" or be
1774
+ # ; registered with IANA as standard or
1775
+ # ; standards-track
1776
+ alias capability case_insensitive__atom
1777
+ alias capability? case_insensitive__atom?
1778
+
993
1779
  def id_response
994
1780
  token = match(T_ATOM)
995
1781
  name = token.value.upcase
@@ -1019,147 +1805,189 @@ module Net
1019
1805
  end
1020
1806
  end
1021
1807
 
1808
+ # namespace-response = "NAMESPACE" SP namespace
1809
+ # SP namespace SP namespace
1810
+ # ; The first Namespace is the Personal Namespace(s).
1811
+ # ; The second Namespace is the Other Users'
1812
+ # ; Namespace(s).
1813
+ # ; The third Namespace is the Shared Namespace(s).
1022
1814
  def namespace_response
1815
+ name = label("NAMESPACE")
1023
1816
  @lex_state = EXPR_DATA
1024
- token = lookahead
1025
- token = match(T_ATOM)
1026
- name = token.value.upcase
1027
- match(T_SPACE)
1028
- personal = namespaces
1029
- match(T_SPACE)
1030
- other = namespaces
1031
- match(T_SPACE)
1032
- shared = namespaces
1817
+ data = Namespaces.new((SP!; namespace),
1818
+ (SP!; namespace),
1819
+ (SP!; namespace))
1820
+ UntaggedResponse.new(name, data, @str)
1821
+ ensure
1033
1822
  @lex_state = EXPR_BEG
1034
- data = Namespaces.new(personal, other, shared)
1035
- return UntaggedResponse.new(name, data, @str)
1036
- end
1037
-
1038
- def namespaces
1039
- token = lookahead
1040
- # empty () is not allowed, so nil is functionally identical to empty.
1041
- data = []
1042
- if token.symbol == T_NIL
1043
- shift_token
1044
- else
1045
- match(T_LPAR)
1046
- loop do
1047
- data << namespace
1048
- break unless lookahead.symbol == T_SPACE
1049
- shift_token
1050
- end
1051
- match(T_RPAR)
1052
- end
1053
- data
1054
1823
  end
1055
1824
 
1825
+ # namespace = nil / "(" 1*namespace-descr ")"
1056
1826
  def namespace
1057
- match(T_LPAR)
1058
- prefix = match(T_QUOTED, T_LITERAL).value
1059
- match(T_SPACE)
1060
- delimiter = string
1827
+ NIL? and return []
1828
+ lpar
1829
+ list = [namespace_descr]
1830
+ list << namespace_descr until rpar?
1831
+ list
1832
+ end
1833
+
1834
+ # namespace-descr = "(" string SP
1835
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil)
1836
+ # [namespace-response-extensions] ")"
1837
+ def namespace_descr
1838
+ lpar
1839
+ prefix = string; SP!
1840
+ delimiter = nquoted # n.b: should only accept single char
1061
1841
  extensions = namespace_response_extensions
1062
- match(T_RPAR)
1842
+ rpar
1063
1843
  Namespace.new(prefix, delimiter, extensions)
1064
1844
  end
1065
1845
 
1846
+ # namespace-response-extensions = *namespace-response-extension
1847
+ # namespace-response-extension = SP string SP
1848
+ # "(" string *(SP string) ")"
1066
1849
  def namespace_response_extensions
1067
1850
  data = {}
1068
- token = lookahead
1069
- if token.symbol == T_SPACE
1070
- shift_token
1071
- name = match(T_QUOTED, T_LITERAL).value
1851
+ while SP?
1852
+ name = string; SP!
1853
+ lpar
1072
1854
  data[name] ||= []
1073
- match(T_SPACE)
1074
- match(T_LPAR)
1075
- loop do
1076
- data[name].push match(T_QUOTED, T_LITERAL).value
1077
- break unless lookahead.symbol == T_SPACE
1078
- shift_token
1079
- end
1080
- match(T_RPAR)
1855
+ data[name] << string
1856
+ data[name] << string while SP?
1857
+ rpar
1081
1858
  end
1082
1859
  data
1083
1860
  end
1084
1861
 
1085
- # text = 1*TEXT-CHAR
1086
- # TEXT-CHAR = <any CHAR except CR and LF>
1862
+ # TEXT-CHAR = <any CHAR except CR and LF>
1863
+ # RFC3501:
1864
+ # text = 1*TEXT-CHAR
1865
+ # RFC9051:
1866
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
1867
+ # ; Non-ASCII text can only be returned
1868
+ # ; after ENABLE IMAP4rev2 command
1087
1869
  def text
1088
- match(T_TEXT, lex_state: EXPR_TEXT).value
1870
+ match_re(TEXT_REGEXP, "text")[0].force_encoding("UTF-8")
1871
+ end
1872
+
1873
+ # an "accept" versiun of #text
1874
+ def text?
1875
+ accept_re(TEXT_REGEXP)&.[](0)&.force_encoding("UTF-8")
1089
1876
  end
1090
1877
 
1091
- # resp-text = ["[" resp-text-code "]" SP] text
1878
+ # RFC3501:
1879
+ # resp-text = ["[" resp-text-code "]" SP] text
1880
+ # RFC9051:
1881
+ # resp-text = ["[" resp-text-code "]" SP] [text]
1882
+ #
1883
+ # We leniently re-interpret this as
1884
+ # resp-text = ["[" resp-text-code "]" [SP [text]] / [text]
1092
1885
  def resp_text
1093
- token = match(T_LBRA, T_TEXT, lex_state: EXPR_RTEXT)
1094
- case token.symbol
1095
- when T_LBRA
1096
- code = resp_text_code
1097
- match(T_RBRA)
1098
- accept_space # violating RFC
1099
- ResponseText.new(code, text)
1100
- when T_TEXT
1101
- ResponseText.new(nil, token.value)
1886
+ if lbra?
1887
+ code = resp_text_code; rbra
1888
+ ResponseText.new(code, SP? && text? || "")
1889
+ else
1890
+ ResponseText.new(nil, text? || "")
1102
1891
  end
1103
1892
  end
1104
1893
 
1105
- # See https://www.rfc-editor.org/errata/rfc3501
1894
+ # RFC3501 (See https://www.rfc-editor.org/errata/rfc3501):
1895
+ # resp-text-code = "ALERT" /
1896
+ # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1897
+ # capability-data / "PARSE" /
1898
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
1899
+ # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1900
+ # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1901
+ # "UNSEEN" SP nz-number /
1902
+ # atom [SP 1*<any TEXT-CHAR except "]">]
1903
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1904
+ # *(SP capability)
1106
1905
  #
1107
- # resp-text-code = "ALERT" /
1108
- # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1109
- # capability-data / "PARSE" /
1110
- # "PERMANENTFLAGS" SP "("
1111
- # [flag-perm *(SP flag-perm)] ")" /
1112
- # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1113
- # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1114
- # "UNSEEN" SP nz-number /
1115
- # atom [SP 1*<any TEXT-CHAR except "]">]
1906
+ # RFC5530:
1907
+ # resp-text-code =/ "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
1908
+ # "AUTHORIZATIONFAILED" / "EXPIRED" /
1909
+ # "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
1910
+ # "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
1911
+ # "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
1912
+ # "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
1913
+ # "NONEXISTENT"
1914
+ # RFC9051:
1915
+ # resp-text-code = "ALERT" /
1916
+ # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1917
+ # capability-data / "PARSE" /
1918
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
1919
+ # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1920
+ # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1921
+ # resp-code-apnd / resp-code-copy / "UIDNOTSTICKY" /
1922
+ # "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
1923
+ # "AUTHORIZATIONFAILED" / "EXPIRED" /
1924
+ # "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
1925
+ # "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
1926
+ # "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
1927
+ # "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
1928
+ # "NONEXISTENT" / "NOTSAVED" / "HASCHILDREN" /
1929
+ # "CLOSED" /
1930
+ # "UNKNOWN-CTE" /
1931
+ # atom [SP 1*<any TEXT-CHAR except "]">]
1932
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1933
+ # *(SP capability)
1116
1934
  #
1117
- # +UIDPLUS+ ABNF:: https://www.rfc-editor.org/rfc/rfc4315.html#section-4
1118
- # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
1935
+ # RFC4315 (UIDPLUS), RFC9051 (IMAP4rev2):
1936
+ # resp-code-apnd = "APPENDUID" SP nz-number SP append-uid
1937
+ # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
1938
+ # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
1939
+ #
1940
+ # RFC7162 (CONDSTORE):
1941
+ # resp-text-code =/ "HIGHESTMODSEQ" SP mod-sequence-value /
1942
+ # "NOMODSEQ" /
1943
+ # "MODIFIED" SP sequence-set
1944
+ # RFC7162 (QRESYNC):
1945
+ # resp-text-code =/ "CLOSED"
1946
+ #
1947
+ # RFC8474: OBJECTID
1948
+ # resp-text-code =/ "MAILBOXID" SP "(" objectid ")"
1949
+ #
1950
+ # RFC9586: UIDONLY
1951
+ # resp-text-code =/ "UIDREQUIRED"
1119
1952
  def resp_text_code
1120
- token = match(T_ATOM)
1121
- name = token.value.upcase
1122
- case name
1123
- when /\A(?:ALERT|PARSE|READ-ONLY|READ-WRITE|TRYCREATE|NOMODSEQ)\z/n
1124
- result = ResponseCode.new(name, nil)
1125
- when /\A(?:BADCHARSET)\z/n
1126
- result = ResponseCode.new(name, charset_list)
1127
- when /\A(?:CAPABILITY)\z/ni
1128
- result = ResponseCode.new(name, capability_data)
1129
- when /\A(?:PERMANENTFLAGS)\z/n
1130
- match(T_SPACE)
1131
- result = ResponseCode.new(name, flag_list)
1132
- when /\A(?:UIDVALIDITY|UIDNEXT|UNSEEN)\z/n
1133
- match(T_SPACE)
1134
- result = ResponseCode.new(name, number)
1135
- when /\A(?:APPENDUID)\z/n
1136
- result = ResponseCode.new(name, resp_code_apnd__data)
1137
- when /\A(?:COPYUID)\z/n
1138
- result = ResponseCode.new(name, resp_code_copy__data)
1139
- else
1140
- token = lookahead
1141
- if token.symbol == T_SPACE
1142
- shift_token
1143
- token = match(T_TEXT, lex_state: EXPR_CTEXT)
1144
- result = ResponseCode.new(name, token.value)
1953
+ name = resp_text_code__name
1954
+ data =
1955
+ case name
1956
+ when "CAPABILITY" then resp_code__capability
1957
+ when "PERMANENTFLAGS" then SP? ? flag_perm__list : []
1958
+ when "UIDNEXT" then SP!; nz_number
1959
+ when "UIDVALIDITY" then SP!; nz_number
1960
+ when "UNSEEN" then SP!; nz_number # rev1 only
1961
+ when "APPENDUID" then SP!; resp_code_apnd__data # rev2, UIDPLUS
1962
+ when "COPYUID" then SP!; resp_code_copy__data # rev2, UIDPLUS
1963
+ when "BADCHARSET" then SP? ? charset__list : []
1964
+ when "ALERT", "PARSE", "READ-ONLY", "READ-WRITE", "TRYCREATE",
1965
+ "UNAVAILABLE", "AUTHENTICATIONFAILED", "AUTHORIZATIONFAILED",
1966
+ "EXPIRED", "PRIVACYREQUIRED", "CONTACTADMIN", "NOPERM", "INUSE",
1967
+ "EXPUNGEISSUED", "CORRUPTION", "SERVERBUG", "CLIENTBUG", "CANNOT",
1968
+ "LIMIT", "OVERQUOTA", "ALREADYEXISTS", "NONEXISTENT", "CLOSED",
1969
+ "NOTSAVED", "UIDNOTSTICKY", "UNKNOWN-CTE", "HASCHILDREN"
1970
+ when "NOMODSEQ" then nil # CONDSTORE
1971
+ when "HIGHESTMODSEQ" then SP!; mod_sequence_value # CONDSTORE
1972
+ when "MODIFIED" then SP!; sequence_set # CONDSTORE
1973
+ when "MAILBOXID" then SP!; parens__objectid # RFC8474: OBJECTID
1974
+ when "UIDREQUIRED" then # RFC9586: UIDONLY
1145
1975
  else
1146
- result = ResponseCode.new(name, nil)
1976
+ SP? and text_chars_except_rbra
1147
1977
  end
1148
- end
1149
- return result
1978
+ ResponseCode.new(name, data)
1150
1979
  end
1151
1980
 
1152
- def charset_list
1153
- result = []
1154
- if accept(T_SPACE)
1155
- match(T_LPAR)
1156
- result << charset
1157
- while accept(T_SPACE)
1158
- result << charset
1159
- end
1160
- match(T_RPAR)
1161
- end
1162
- result
1981
+ alias resp_text_code__name case_insensitive__atom
1982
+
1983
+ # 1*<any TEXT-CHAR except "]">
1984
+ def text_chars_except_rbra
1985
+ match_re(CTEXT_REGEXP, '1*<any TEXT-CHAR except "]">')[0]
1986
+ end
1987
+
1988
+ # "(" charset *(SP charset) ")"
1989
+ def charset__list
1990
+ lpar; list = [charset]; while SP? do list << charset end; rpar; list
1163
1991
  end
1164
1992
 
1165
1993
  # already matched: "APPENDUID"
@@ -1173,198 +2001,153 @@ module Net
1173
2001
  #
1174
2002
  # n.b, uniqueid ⊂ uid-set. To avoid inconsistent return types, we always
1175
2003
  # match uid_set even if that returns a single-member array.
1176
- #
1177
2004
  def resp_code_apnd__data
1178
- match(T_SPACE); validity = number
1179
- match(T_SPACE); dst_uids = uid_set # uniqueid ⊂ uid-set
1180
- UIDPlusData.new(validity, nil, dst_uids)
2005
+ validity = number; SP!
2006
+ dst_uids = uid_set # uniqueid ⊂ uid-set
2007
+ AppendUID(validity, dst_uids)
1181
2008
  end
1182
2009
 
1183
2010
  # already matched: "COPYUID"
1184
2011
  #
1185
2012
  # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
1186
2013
  def resp_code_copy__data
1187
- match(T_SPACE); validity = number
1188
- match(T_SPACE); src_uids = uid_set
1189
- match(T_SPACE); dst_uids = uid_set
1190
- UIDPlusData.new(validity, src_uids, dst_uids)
1191
- end
1192
-
1193
- def address_list
1194
- token = lookahead
1195
- if token.symbol == T_NIL
1196
- shift_token
1197
- return nil
1198
- else
1199
- result = []
1200
- match(T_LPAR)
1201
- while true
1202
- token = lookahead
1203
- case token.symbol
1204
- when T_RPAR
1205
- shift_token
1206
- break
1207
- when T_SPACE
1208
- shift_token
1209
- end
1210
- result.push(address)
1211
- end
1212
- return result
1213
- end
1214
- end
1215
-
1216
- ADDRESS_REGEXP = /\G\
1217
- (?# 1: NAME )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1218
- (?# 2: ROUTE )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1219
- (?# 3: MAILBOX )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1220
- (?# 4: HOST )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)")\
1221
- \)/ni
1222
-
2014
+ validity = number; SP!
2015
+ src_uids = uid_set; SP!
2016
+ dst_uids = uid_set
2017
+ CopyUID(validity, src_uids, dst_uids)
2018
+ end
2019
+
2020
+ def AppendUID(...) DeprecatedUIDPlus(...) || AppendUIDData.new(...) end
2021
+ def CopyUID(...) DeprecatedUIDPlus(...) || CopyUIDData.new(...) end
2022
+
2023
+ # TODO: remove this code in the v0.6.0 release
2024
+ def DeprecatedUIDPlus(validity, src_uids = nil, dst_uids)
2025
+ return unless config.parser_use_deprecated_uidplus_data
2026
+ compact_uid_sets = [src_uids, dst_uids].compact
2027
+ count = compact_uid_sets.map { _1.count_with_duplicates }.max
2028
+ max = config.parser_max_deprecated_uidplus_data_size
2029
+ if count <= max
2030
+ src_uids &&= src_uids.each_ordered_number.to_a
2031
+ dst_uids = dst_uids.each_ordered_number.to_a
2032
+ UIDPlusData.new(validity, src_uids, dst_uids)
2033
+ elsif config.parser_use_deprecated_uidplus_data != :up_to_max_size
2034
+ parse_error("uid-set is too large: %d > %d", count, max)
2035
+ end
2036
+ end
2037
+
2038
+ ADDRESS_REGEXP = /\G
2039
+ \( (?: NIL | #{Patterns::QUOTED_rev2} ) # 1: NAME
2040
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 2: ROUTE
2041
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 3: MAILBOX
2042
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 4: HOST
2043
+ \)
2044
+ /nix
2045
+
2046
+ # address = "(" addr-name SP addr-adl SP addr-mailbox SP
2047
+ # addr-host ")"
2048
+ # addr-adl = nstring
2049
+ # addr-host = nstring
2050
+ # addr-mailbox = nstring
2051
+ # addr-name = nstring
1223
2052
  def address
1224
- match(T_LPAR)
1225
- if @str.index(ADDRESS_REGEXP, @pos)
1226
- # address does not include literal.
1227
- @pos = $~.end(0)
1228
- name = $1
1229
- route = $2
1230
- mailbox = $3
1231
- host = $4
1232
- for s in [name, route, mailbox, host]
1233
- if s
1234
- s.gsub!(/\\(["\\])/n, "\\1")
1235
- end
1236
- end
1237
- else
1238
- name = nstring
1239
- match(T_SPACE)
1240
- route = nstring
1241
- match(T_SPACE)
1242
- mailbox = nstring
1243
- match(T_SPACE)
1244
- host = nstring
1245
- match(T_RPAR)
1246
- end
1247
- return Address.new(name, route, mailbox, host)
1248
- end
1249
-
1250
- FLAG_REGEXP = /\
1251
- (?# FLAG )\\([^\x80-\xff(){ \x00-\x1f\x7f%"\\]+)|\
1252
- (?# ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\]+)/n
1253
-
2053
+ if (match = accept_re(ADDRESS_REGEXP))
2054
+ # note that "NIL" isn't captured by the regexp
2055
+ name, route, mailbox, host = match.captures
2056
+ .map { Patterns.unescape_quoted _1 }
2057
+ else # address may include literals
2058
+ lpar; name = addr_name
2059
+ SP!; route = addr_adl
2060
+ SP!; mailbox = addr_mailbox
2061
+ SP!; host = addr_host
2062
+ rpar
2063
+ end
2064
+ Address.new(name, route, mailbox, host)
2065
+ end
2066
+
2067
+ alias addr_adl nstring
2068
+ alias addr_host nstring
2069
+ alias addr_mailbox nstring
2070
+ alias addr_name nstring
2071
+
2072
+ # flag-list = "(" [flag *(SP flag)] ")"
1254
2073
  def flag_list
1255
- if @str.index(/\(([^)]*)\)/ni, @pos)
1256
- @pos = $~.end(0)
1257
- return $1.scan(FLAG_REGEXP).collect { |flag, atom|
1258
- if atom
1259
- atom
1260
- else
1261
- flag.capitalize.intern
1262
- end
1263
- }
2074
+ if (match = accept_re(Patterns::FLAG_LIST))
2075
+ match[1].split(nil)
2076
+ .map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
1264
2077
  else
1265
- parse_error("invalid flag list")
2078
+ quirky__flag_list "flags-list"
1266
2079
  end
1267
2080
  end
1268
2081
 
1269
- def nstring
1270
- token = lookahead
1271
- if token.symbol == T_NIL
1272
- shift_token
1273
- return nil
2082
+ # "(" [flag-perm *(SP flag-perm)] ")"
2083
+ def flag_perm__list
2084
+ if (match = accept_re(Patterns::FLAG_PERM_LIST))
2085
+ match[1].split(nil)
2086
+ .map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
1274
2087
  else
1275
- return string
2088
+ quirky__flag_list "PERMANENTFLAGS flag-perm list"
1276
2089
  end
1277
2090
  end
1278
2091
 
1279
- def astring
1280
- token = lookahead
1281
- if string_token?(token)
1282
- return string
1283
- else
1284
- return astring_chars
1285
- end
2092
+ # This allows illegal "]" in flag names (Gmail),
2093
+ # or "\*" in a FLAGS response (greenmail).
2094
+ def quirky__flag_list(name)
2095
+ match_re(Patterns::QUIRKY_FLAGS_LIST, "quirks mode #{name}")[1]
2096
+ .scan(Patterns::QUIRKY_FLAG)
2097
+ .map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
1286
2098
  end
1287
2099
 
1288
- def string
1289
- token = lookahead
1290
- if token.symbol == T_NIL
1291
- shift_token
1292
- return nil
1293
- end
1294
- token = match(T_QUOTED, T_LITERAL)
1295
- return token.value
2100
+ # See Patterns::MBX_LIST_FLAGS
2101
+ def mbx_list_flags
2102
+ match_re(Patterns::MBX_LIST_FLAGS, "mbx-list-flags")[1]
2103
+ .split(nil)
2104
+ .map! { _1.delete_prefix!("\\"); _1.capitalize.to_sym }
1296
2105
  end
1297
2106
 
1298
- STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL]
1299
-
1300
- def string_token?(token)
1301
- return STRING_TOKENS.include?(token.symbol)
1302
- end
2107
+ # See https://developers.google.com/gmail/imap/imap-extensions
2108
+ def x_gm_label; accept(T_BSLASH) ? atom.capitalize.to_sym : astring end
1303
2109
 
1304
- def case_insensitive_string
1305
- token = lookahead
1306
- if token.symbol == T_NIL
1307
- shift_token
1308
- return nil
1309
- end
1310
- token = match(T_QUOTED, T_LITERAL)
1311
- return token.value.upcase
2110
+ # See https://developers.google.com/gmail/imap/imap-extensions
2111
+ def x_gm_labels
2112
+ lpar; return [] if rpar?
2113
+ labels = []
2114
+ labels << x_gm_label
2115
+ labels << x_gm_label while SP?
2116
+ rpar
2117
+ labels
1312
2118
  end
1313
2119
 
1314
- # atom = 1*ATOM-CHAR
1315
- # ATOM-CHAR = <any CHAR except atom-specials>
1316
- ATOM_TOKENS = [
1317
- T_ATOM,
1318
- T_NUMBER,
1319
- T_NIL,
1320
- T_LBRA,
1321
- T_PLUS
1322
- ]
2120
+ # See https://www.rfc-editor.org/errata/rfc3501
2121
+ #
2122
+ # charset = atom / quoted
2123
+ def charset; quoted? || atom end
1323
2124
 
1324
- def atom
1325
- -combine_adjacent(*ATOM_TOKENS)
1326
- end
2125
+ # RFC7162:
2126
+ # mod-sequence-value = 1*DIGIT
2127
+ # ;; Positive unsigned 63-bit integer
2128
+ # ;; (mod-sequence)
2129
+ # ;; (1 <= n <= 9,223,372,036,854,775,807).
2130
+ alias mod_sequence_value nz_number64
1327
2131
 
1328
- # ASTRING-CHAR = ATOM-CHAR / resp-specials
1329
- # resp-specials = "]"
1330
- ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA]
2132
+ # RFC7162:
2133
+ # permsg-modsequence = mod-sequence-value
2134
+ # ;; Per-message mod-sequence.
2135
+ alias permsg_modsequence mod_sequence_value
1331
2136
 
1332
- def astring_chars
1333
- combine_adjacent(*ASTRING_CHARS_TOKENS)
1334
- end
2137
+ # RFC7162:
2138
+ # mod-sequence-valzer = "0" / mod-sequence-value
2139
+ alias mod_sequence_valzer number64
1335
2140
 
1336
- def combine_adjacent(*tokens)
1337
- result = "".b
1338
- while token = accept(*tokens)
1339
- result << token.value
1340
- end
1341
- if result.empty?
1342
- parse_error('unexpected token %s (expected %s)',
1343
- lookahead.symbol, args.join(" or "))
1344
- end
1345
- result
1346
- end
2141
+ def parens__modseq; lpar; _ = permsg_modsequence; rpar; _ end
1347
2142
 
1348
- # See https://www.rfc-editor.org/errata/rfc3501
1349
- #
1350
- # charset = atom / quoted
1351
- def charset
1352
- if token = accept(T_QUOTED)
1353
- token.value
1354
- else
1355
- atom
1356
- end
1357
- end
2143
+ # RFC8474:
2144
+ # objectid = 1*255(ALPHA / DIGIT / "_" / "-")
2145
+ # ; characters in object identifiers are case
2146
+ # ; significant
2147
+ alias objectid atom
1358
2148
 
1359
- def number
1360
- token = lookahead
1361
- if token.symbol == T_NIL
1362
- shift_token
1363
- return nil
1364
- end
1365
- token = match(T_NUMBER)
1366
- return token.value.to_i
1367
- end
2149
+ def parens__objectid; lpar; _ = objectid; rpar; _ end
2150
+ def nparens__objectid; NIL? ? nil : parens__objectid end
1368
2151
 
1369
2152
  # RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
1370
2153
  # uid-set = (uniqueid / uid-range) *("," uid-set)
@@ -1375,15 +2158,9 @@ module Net
1375
2158
  # uniqueid = nz-number
1376
2159
  # ; Strictly ascending
1377
2160
  def uid_set
1378
- token = match(T_NUMBER, T_ATOM)
1379
- case token.symbol
1380
- when T_NUMBER then [Integer(token.value)]
1381
- when T_ATOM
1382
- token.value.split(",").flat_map {|range|
1383
- range = range.split(":").map {|uniqueid| Integer(uniqueid) }
1384
- range.size == 1 ? range : Range.new(range.min, range.max).to_a
1385
- }
1386
- end
2161
+ set = sequence_set
2162
+ parse_error("uid-set cannot contain '*'") if set.include_star?
2163
+ set
1387
2164
  end
1388
2165
 
1389
2166
  def nil_atom
@@ -1393,64 +2170,15 @@ module Net
1393
2170
 
1394
2171
  SPACES_REGEXP = /\G */n
1395
2172
 
1396
- # This advances @pos directly so it's safe before changing @lex_state.
1397
- def accept_space
1398
- if @token
1399
- shift_token if @token.symbol == T_SPACE
1400
- elsif @str[@pos] == " "
1401
- @pos += 1
1402
- end
1403
- end
1404
-
1405
2173
  # The RFC is very strict about this and usually we should be too.
1406
2174
  # But skipping spaces is usually a safe workaround for buggy servers.
1407
2175
  #
1408
2176
  # This advances @pos directly so it's safe before changing @lex_state.
1409
2177
  def accept_spaces
1410
- shift_token if @token&.symbol == T_SPACE
1411
- if @str.index(SPACES_REGEXP, @pos)
2178
+ return false unless SP?
2179
+ @str.index(SPACES_REGEXP, @pos) and
1412
2180
  @pos = $~.end(0)
1413
- end
1414
- end
1415
-
1416
- def match(*args, lex_state: @lex_state)
1417
- if @token && lex_state != @lex_state
1418
- parse_error("invalid lex_state change to %s with unconsumed token",
1419
- lex_state)
1420
- end
1421
- begin
1422
- @lex_state, original_lex_state = lex_state, @lex_state
1423
- token = lookahead
1424
- unless args.include?(token.symbol)
1425
- parse_error('unexpected token %s (expected %s)',
1426
- token.symbol.id2name,
1427
- args.collect {|i| i.id2name}.join(" or "))
1428
- end
1429
- shift_token
1430
- return token
1431
- ensure
1432
- @lex_state = original_lex_state
1433
- end
1434
- end
1435
-
1436
- # like match, but does not raise error on failure.
1437
- #
1438
- # returns and shifts token on successful match
1439
- # returns nil and leaves @token unshifted on no match
1440
- def accept(*args)
1441
- token = lookahead
1442
- if args.include?(token.symbol)
1443
- shift_token
1444
- token
1445
- end
1446
- end
1447
-
1448
- def lookahead
1449
- @token ||= next_token
1450
- end
1451
-
1452
- def shift_token
1453
- @token = nil
2181
+ true
1454
2182
  end
1455
2183
 
1456
2184
  def next_token
@@ -1461,38 +2189,46 @@ module Net
1461
2189
  if $1
1462
2190
  return Token.new(T_SPACE, $+)
1463
2191
  elsif $2
1464
- return Token.new(T_NIL, $+)
1465
- elsif $3
1466
- return Token.new(T_NUMBER, $+)
2192
+ len = $+.to_i
2193
+ val = @str[@pos, len]
2194
+ @pos += len
2195
+ return Token.new(T_LITERAL8, val)
2196
+ elsif $3 && $7
2197
+ # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
2198
+ return Token.new(T_ATOM, $3)
1467
2199
  elsif $4
1468
- return Token.new(T_ATOM, $+)
2200
+ return Token.new(T_NIL, $+)
1469
2201
  elsif $5
1470
- return Token.new(T_QUOTED,
1471
- $+.gsub(/\\(["\\])/n, "\\1"))
2202
+ return Token.new(T_NUMBER, $+)
1472
2203
  elsif $6
2204
+ return Token.new(T_PLUS, $+)
2205
+ elsif $8
2206
+ # match ATOM, without a NUMBER, NIL, or PLUS prefix
2207
+ return Token.new(T_ATOM, $+)
2208
+ elsif $9
2209
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
2210
+ elsif $10
1473
2211
  return Token.new(T_LPAR, $+)
1474
- elsif $7
2212
+ elsif $11
1475
2213
  return Token.new(T_RPAR, $+)
1476
- elsif $8
2214
+ elsif $12
1477
2215
  return Token.new(T_BSLASH, $+)
1478
- elsif $9
2216
+ elsif $13
1479
2217
  return Token.new(T_STAR, $+)
1480
- elsif $10
2218
+ elsif $14
1481
2219
  return Token.new(T_LBRA, $+)
1482
- elsif $11
2220
+ elsif $15
1483
2221
  return Token.new(T_RBRA, $+)
1484
- elsif $12
2222
+ elsif $16
1485
2223
  len = $+.to_i
1486
2224
  val = @str[@pos, len]
1487
2225
  @pos += len
1488
2226
  return Token.new(T_LITERAL, val)
1489
- elsif $13
1490
- return Token.new(T_PLUS, $+)
1491
- elsif $14
2227
+ elsif $17
1492
2228
  return Token.new(T_PERCENT, $+)
1493
- elsif $15
2229
+ elsif $18
1494
2230
  return Token.new(T_CRLF, $+)
1495
- elsif $16
2231
+ elsif $19
1496
2232
  return Token.new(T_EOF, $+)
1497
2233
  else
1498
2234
  parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")
@@ -1511,8 +2247,7 @@ module Net
1511
2247
  elsif $3
1512
2248
  return Token.new(T_NUMBER, $+)
1513
2249
  elsif $4
1514
- return Token.new(T_QUOTED,
1515
- $+.gsub(/\\(["\\])/n, "\\1"))
2250
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1516
2251
  elsif $5
1517
2252
  len = $+.to_i
1518
2253
  val = @str[@pos, len]
@@ -1529,63 +2264,11 @@ module Net
1529
2264
  @str.index(/\S*/n, @pos)
1530
2265
  parse_error("unknown token - %s", $&.dump)
1531
2266
  end
1532
- when EXPR_TEXT
1533
- if @str.index(TEXT_REGEXP, @pos)
1534
- @pos = $~.end(0)
1535
- if $1
1536
- return Token.new(T_TEXT, $+)
1537
- else
1538
- parse_error("[Net::IMAP BUG] TEXT_REGEXP is invalid")
1539
- end
1540
- else
1541
- @str.index(/\S*/n, @pos)
1542
- parse_error("unknown token - %s", $&.dump)
1543
- end
1544
- when EXPR_RTEXT
1545
- if @str.index(RTEXT_REGEXP, @pos)
1546
- @pos = $~.end(0)
1547
- if $1
1548
- return Token.new(T_LBRA, $+)
1549
- elsif $2
1550
- return Token.new(T_TEXT, $+)
1551
- else
1552
- parse_error("[Net::IMAP BUG] RTEXT_REGEXP is invalid")
1553
- end
1554
- else
1555
- @str.index(/\S*/n, @pos)
1556
- parse_error("unknown token - %s", $&.dump)
1557
- end
1558
- when EXPR_CTEXT
1559
- if @str.index(CTEXT_REGEXP, @pos)
1560
- @pos = $~.end(0)
1561
- if $1
1562
- return Token.new(T_TEXT, $+)
1563
- else
1564
- parse_error("[Net::IMAP BUG] CTEXT_REGEXP is invalid")
1565
- end
1566
- else
1567
- @str.index(/\S*/n, @pos) #/
1568
- parse_error("unknown token - %s", $&.dump)
1569
- end
1570
2267
  else
1571
2268
  parse_error("invalid @lex_state - %s", @lex_state.inspect)
1572
2269
  end
1573
2270
  end
1574
2271
 
1575
- def parse_error(fmt, *args)
1576
- if IMAP.debug
1577
- $stderr.printf("@str: %s\n", @str.dump)
1578
- $stderr.printf("@pos: %d\n", @pos)
1579
- $stderr.printf("@lex_state: %s\n", @lex_state)
1580
- if @token
1581
- $stderr.printf("@token.symbol: %s\n", @token.symbol)
1582
- $stderr.printf("@token.value: %s\n", @token.value.inspect)
1583
- end
1584
- end
1585
- raise ResponseParseError, format(fmt, *args)
1586
- end
1587
2272
  end
1588
-
1589
2273
  end
1590
-
1591
2274
  end