net-imap 0.3.7 → 0.4.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/pages.yml +46 -0
  3. data/.github/workflows/test.yml +5 -12
  4. data/.gitignore +2 -0
  5. data/Gemfile +3 -0
  6. data/README.md +15 -4
  7. data/Rakefile +0 -7
  8. data/docs/styles.css +0 -12
  9. data/lib/net/imap/authenticators.rb +26 -57
  10. data/lib/net/imap/command_data.rb +13 -6
  11. data/lib/net/imap/data_encoding.rb +14 -2
  12. data/lib/net/imap/deprecated_client_options.rb +139 -0
  13. data/lib/net/imap/errors.rb +20 -0
  14. data/lib/net/imap/fetch_data.rb +518 -0
  15. data/lib/net/imap/response_data.rb +178 -255
  16. data/lib/net/imap/response_parser/parser_utils.rb +240 -0
  17. data/lib/net/imap/response_parser.rb +1722 -1193
  18. data/lib/net/imap/sasl/anonymous_authenticator.rb +69 -0
  19. data/lib/net/imap/sasl/authentication_exchange.rb +107 -0
  20. data/lib/net/imap/sasl/authenticators.rb +118 -0
  21. data/lib/net/imap/sasl/client_adapter.rb +72 -0
  22. data/lib/net/imap/{authenticators/cram_md5.rb → sasl/cram_md5_authenticator.rb} +21 -11
  23. data/lib/net/imap/sasl/digest_md5_authenticator.rb +180 -0
  24. data/lib/net/imap/sasl/external_authenticator.rb +83 -0
  25. data/lib/net/imap/sasl/gs2_header.rb +80 -0
  26. data/lib/net/imap/{authenticators/login.rb → sasl/login_authenticator.rb} +25 -16
  27. data/lib/net/imap/sasl/oauthbearer_authenticator.rb +199 -0
  28. data/lib/net/imap/sasl/plain_authenticator.rb +101 -0
  29. data/lib/net/imap/sasl/protocol_adapters.rb +45 -0
  30. data/lib/net/imap/sasl/scram_algorithm.rb +58 -0
  31. data/lib/net/imap/sasl/scram_authenticator.rb +287 -0
  32. data/lib/net/imap/sasl/stringprep.rb +6 -66
  33. data/lib/net/imap/sasl/xoauth2_authenticator.rb +106 -0
  34. data/lib/net/imap/sasl.rb +144 -43
  35. data/lib/net/imap/sasl_adapter.rb +21 -0
  36. data/lib/net/imap/search_result.rb +150 -0
  37. data/lib/net/imap/sequence_set.rb +1414 -0
  38. data/lib/net/imap/stringprep/nameprep.rb +70 -0
  39. data/lib/net/imap/stringprep/saslprep.rb +69 -0
  40. data/lib/net/imap/stringprep/saslprep_tables.rb +96 -0
  41. data/lib/net/imap/stringprep/tables.rb +146 -0
  42. data/lib/net/imap/stringprep/trace.rb +85 -0
  43. data/lib/net/imap/stringprep.rb +159 -0
  44. data/lib/net/imap.rb +1213 -636
  45. data/net-imap.gemspec +5 -3
  46. data/rakelib/benchmarks.rake +91 -0
  47. data/rakelib/saslprep.rake +4 -4
  48. data/rakelib/string_prep_tables_generator.rb +82 -60
  49. metadata +34 -14
  50. data/benchmarks/stringprep.yml +0 -65
  51. data/benchmarks/table-regexps.yml +0 -39
  52. data/lib/net/imap/authenticators/digest_md5.rb +0 -115
  53. data/lib/net/imap/authenticators/plain.rb +0 -41
  54. data/lib/net/imap/authenticators/xoauth2.rb +0 -20
  55. data/lib/net/imap/sasl/saslprep.rb +0 -55
  56. data/lib/net/imap/sasl/saslprep_tables.rb +0 -98
  57. data/lib/net/imap/sasl/stringprep_tables.rb +0 -153
@@ -1,12 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "errors"
4
+ require_relative "response_parser/parser_utils"
4
5
 
5
6
  module Net
6
7
  class IMAP < Protocol
7
8
 
8
9
  # Parses an \IMAP server response.
9
10
  class ResponseParser
11
+ include ParserUtils
12
+ extend ParserUtils::Generator
13
+
10
14
  # :call-seq: Net::IMAP::ResponseParser.new -> Net::IMAP::ResponseParser
11
15
  def initialize
12
16
  @str = nil
@@ -33,745 +37,1340 @@ module Net
33
37
 
34
38
  # :stopdoc:
35
39
 
36
- EXPR_BEG = :EXPR_BEG
37
- EXPR_DATA = :EXPR_DATA
38
- EXPR_TEXT = :EXPR_TEXT
39
- EXPR_RTEXT = :EXPR_RTEXT
40
- EXPR_CTEXT = :EXPR_CTEXT
41
-
42
- T_SPACE = :SPACE
43
- T_NIL = :NIL
44
- T_NUMBER = :NUMBER
45
- T_ATOM = :ATOM
46
- T_QUOTED = :QUOTED
47
- T_LPAR = :LPAR
48
- T_RPAR = :RPAR
49
- T_BSLASH = :BSLASH
50
- T_STAR = :STAR
51
- T_LBRA = :LBRA
52
- T_RBRA = :RBRA
53
- T_LITERAL = :LITERAL
54
- T_PLUS = :PLUS
55
- T_PERCENT = :PERCENT
56
- T_CRLF = :CRLF
57
- T_EOF = :EOF
58
- T_TEXT = :TEXT
59
-
40
+ EXPR_BEG = :EXPR_BEG # the default, used in most places
41
+ EXPR_DATA = :EXPR_DATA # envelope, body(structure), namespaces
42
+
43
+ T_SPACE = :SPACE # atom special
44
+ T_ATOM = :ATOM # atom (subset of astring chars)
45
+ T_NIL = :NIL # subset of atom and label
46
+ T_NUMBER = :NUMBER # subset of atom
47
+ T_LBRA = :LBRA # subset of atom
48
+ T_PLUS = :PLUS # subset of atom; tag special
49
+ T_RBRA = :RBRA # atom special; resp_special; valid astring char
50
+ T_QUOTED = :QUOTED # starts/end with atom special
51
+ T_BSLASH = :BSLASH # atom special; quoted special
52
+ T_LPAR = :LPAR # atom special; paren list delimiter
53
+ T_RPAR = :RPAR # atom special; paren list delimiter
54
+ T_STAR = :STAR # atom special; list wildcard
55
+ T_PERCENT = :PERCENT # atom special; list wildcard
56
+ T_LITERAL = :LITERAL # starts with atom special
57
+ T_LITERAL8 = :LITERAL8 # starts with atom char "~"
58
+ T_CRLF = :CRLF # atom special; text special; quoted special
59
+ T_TEXT = :TEXT # any char except CRLF
60
+ T_EOF = :EOF # end of response string
61
+
62
+ module ResponseConditions
63
+ OK = "OK"
64
+ NO = "NO"
65
+ BAD = "BAD"
66
+ BYE = "BYE"
67
+ PREAUTH = "PREAUTH"
68
+
69
+ RESP_COND_STATES = [OK, NO, BAD ].freeze
70
+ RESP_DATA_CONDS = [OK, NO, BAD, BYE, ].freeze
71
+ AUTH_CONDS = [OK, PREAUTH].freeze
72
+ GREETING_CONDS = [OK, BYE, PREAUTH].freeze
73
+ RESP_CONDS = [OK, NO, BAD, BYE, PREAUTH].freeze
74
+ end
75
+ include ResponseConditions
76
+
77
+ module Patterns
78
+
79
+ module CharClassSubtraction
80
+ refine Regexp do
81
+ def -(rhs); /[#{source}&&[^#{rhs.source}]]/n.freeze end
82
+ end
83
+ end
84
+ using CharClassSubtraction
85
+
86
+ # From RFC5234, "Augmented BNF for Syntax Specifications: ABNF"
87
+ # >>>
88
+ # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
89
+ # CHAR = %x01-7F
90
+ # CRLF = CR LF
91
+ # ; Internet standard newline
92
+ # CTL = %x00-1F / %x7F
93
+ # ; controls
94
+ # DIGIT = %x30-39
95
+ # ; 0-9
96
+ # DQUOTE = %x22
97
+ # ; " (Double Quote)
98
+ # HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
99
+ # OCTET = %x00-FF
100
+ # SP = %x20
101
+ module RFC5234
102
+ ALPHA = /[A-Za-z]/n
103
+ CHAR = /[\x01-\x7f]/n
104
+ CRLF = /\r\n/n
105
+ CTL = /[\x00-\x1F\x7F]/n
106
+ DIGIT = /\d/n
107
+ DQUOTE = /"/n
108
+ HEXDIG = /\h/
109
+ OCTET = /[\x00-\xFF]/n # not using /./m for embedding purposes
110
+ SP = / /n
111
+ end
112
+
113
+ # UTF-8, a transformation format of ISO 10646
114
+ # >>>
115
+ # UTF8-1 = %x00-7F
116
+ # UTF8-tail = %x80-BF
117
+ # UTF8-2 = %xC2-DF UTF8-tail
118
+ # UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
119
+ # %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
120
+ # UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
121
+ # %xF4 %x80-8F 2( UTF8-tail )
122
+ # UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
123
+ # UTF8-octets = *( UTF8-char )
124
+ #
125
+ # n.b. String * Integer is used for repetition, rather than /x{3}/,
126
+ # because ruby 3.2's linear-time cache-based optimization doesn't work
127
+ # with "bounded or fixed times repetition nesting in another repetition
128
+ # (e.g. /(a{2,3})*/). It is an implementation issue entirely, but we
129
+ # believe it is hard to support this case correctly."
130
+ # See https://bugs.ruby-lang.org/issues/19104
131
+ module RFC3629
132
+ UTF8_1 = /[\x00-\x7f]/n # aka ASCII 7bit
133
+ UTF8_TAIL = /[\x80-\xBF]/n
134
+ UTF8_2 = /[\xC2-\xDF]#{UTF8_TAIL}/n
135
+ UTF8_3 = Regexp.union(/\xE0[\xA0-\xBF]#{UTF8_TAIL}/n,
136
+ /\xED[\x80-\x9F]#{UTF8_TAIL}/n,
137
+ /[\xE1-\xEC]#{ UTF8_TAIL.source * 2}/n,
138
+ /[\xEE-\xEF]#{ UTF8_TAIL.source * 2}/n)
139
+ UTF8_4 = Regexp.union(/[\xF1-\xF3]#{ UTF8_TAIL.source * 3}/n,
140
+ /\xF0[\x90-\xBF]#{UTF8_TAIL.source * 2}/n,
141
+ /\xF4[\x80-\x8F]#{UTF8_TAIL.source * 2}/n)
142
+ UTF8_CHAR = Regexp.union(UTF8_1, UTF8_2, UTF8_3, UTF8_4)
143
+ UTF8_OCTETS = /#{UTF8_CHAR}*/n
144
+ end
145
+
146
+ include RFC5234
147
+ include RFC3629
148
+
149
+ # CHAR8 = %x01-ff
150
+ # ; any OCTET except NUL, %x00
151
+ CHAR8 = /[\x01-\xff]/n
152
+
153
+ # list-wildcards = "%" / "*"
154
+ LIST_WILDCARDS = /[%*]/n
155
+ # quoted-specials = DQUOTE / "\"
156
+ QUOTED_SPECIALS = /["\\]/n
157
+ # resp-specials = "]"
158
+ RESP_SPECIALS = /[\]]/n
159
+
160
+ # atomish = 1*<any ATOM-CHAR except "[">
161
+ # ; We use "atomish" for msg-att and section, in order
162
+ # ; to simplify "BODY[HEADER.FIELDS (foo bar)]".
163
+ #
164
+ # atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
165
+ # quoted-specials / resp-specials
166
+ # ATOM-CHAR = <any CHAR except atom-specials>
167
+ # atom = 1*ATOM-CHAR
168
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
169
+ # tag = 1*<any ASTRING-CHAR except "+">
170
+
171
+ ATOM_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\\]]/n
172
+ ASTRING_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\]/n
173
+
174
+ ASTRING_CHAR = CHAR - ASTRING_SPECIALS
175
+ ATOM_CHAR = CHAR - ATOM_SPECIALS
176
+
177
+ ATOM = /#{ATOM_CHAR}+/n
178
+ ASTRING_CHARS = /#{ASTRING_CHAR}+/n
179
+ ATOMISH = /#{ATOM_CHAR - /[\[]/ }+/
180
+ TAG = /#{ASTRING_CHAR - /[+]/ }+/
181
+
182
+ # TEXT-CHAR = <any CHAR except CR and LF>
183
+ TEXT_CHAR = CHAR - /[\r\n]/
184
+
185
+ # resp-text-code = ... / atom [SP 1*<any TEXT-CHAR except "]">]
186
+ CODE_TEXT_CHAR = TEXT_CHAR - RESP_SPECIALS
187
+ CODE_TEXT = /#{CODE_TEXT_CHAR}+/n
188
+
189
+ # flag = "\Answered" / "\Flagged" / "\Deleted" /
190
+ # "\Seen" / "\Draft" / flag-keyword / flag-extension
191
+ # ; Does not include "\Recent"
192
+ # flag-extension = "\" atom
193
+ # ; Future expansion. Client implementations
194
+ # ; MUST accept flag-extension flags. Server
195
+ # ; implementations MUST NOT generate
196
+ # ; flag-extension flags except as defined by
197
+ # ; a future Standard or Standards Track
198
+ # ; revisions of this specification.
199
+ # flag-keyword = "$MDNSent" / "$Forwarded" / "$Junk" /
200
+ # "$NotJunk" / "$Phishing" / atom
201
+ #
202
+ # flag-perm = flag / "\*"
203
+ #
204
+ # Not checking for max one mbx-list-sflag in the parser.
205
+ # >>>
206
+ # mbx-list-oflag = "\Noinferiors" / child-mbox-flag /
207
+ # "\Subscribed" / "\Remote" / flag-extension
208
+ # ; Other flags; multiple from this list are
209
+ # ; possible per LIST response, but each flag
210
+ # ; can only appear once per LIST response
211
+ # mbx-list-sflag = "\NonExistent" / "\Noselect" / "\Marked" /
212
+ # "\Unmarked"
213
+ # ; Selectability flags; only one per LIST response
214
+ # child-mbox-flag = "\HasChildren" / "\HasNoChildren"
215
+ # ; attributes for the CHILDREN return option, at most
216
+ # ; one possible per LIST response
217
+ FLAG = /\\?#{ATOM}/n
218
+ FLAG_EXTENSION = /\\#{ATOM}/n
219
+ FLAG_KEYWORD = ATOM
220
+ FLAG_PERM = Regexp.union(FLAG, "\\*")
221
+ MBX_FLAG = FLAG_EXTENSION
222
+
223
+ # flag-list = "(" [flag *(SP flag)] ")"
224
+ # resp-text-code =/ "PERMANENTFLAGS" SP
225
+ # "(" [flag-perm *(SP flag-perm)] ")"
226
+ # mbx-list-flags = *(mbx-list-oflag SP) mbx-list-sflag
227
+ # *(SP mbx-list-oflag) /
228
+ # mbx-list-oflag *(SP mbx-list-oflag)
229
+ # (Not checking for max one mbx-list-sflag in the parser.)
230
+ FLAG_LIST = /\G\((#{FLAG }(?:#{SP}#{FLAG })*|)\)/ni
231
+ FLAG_PERM_LIST = /\G\((#{FLAG_PERM}(?:#{SP}#{FLAG_PERM})*|)\)/ni
232
+ MBX_LIST_FLAGS = /\G (#{MBX_FLAG }(?:#{SP}#{MBX_FLAG })*) /nix
233
+
234
+ # Gmail allows SP and "]" in flags.......
235
+ QUIRKY_FLAG = Regexp.union(/\\?#{ASTRING_CHARS}/n, "\\*")
236
+ QUIRKY_FLAGS_LIST = /\G\(( [^)]* )\)/nx
237
+
238
+ # RFC3501:
239
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
240
+ # "\" quoted-specials
241
+ # RFC9051:
242
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
243
+ # "\" quoted-specials / UTF8-2 / UTF8-3 / UTF8-4
244
+ # RFC3501 & RFC9051:
245
+ # quoted = DQUOTE *QUOTED-CHAR DQUOTE
246
+ QUOTED_CHAR_safe = TEXT_CHAR - QUOTED_SPECIALS
247
+ QUOTED_CHAR_esc = /\\#{QUOTED_SPECIALS}/n
248
+ QUOTED_CHAR_rev1 = Regexp.union(QUOTED_CHAR_safe, QUOTED_CHAR_esc)
249
+ QUOTED_CHAR_rev2 = Regexp.union(QUOTED_CHAR_rev1,
250
+ UTF8_2, UTF8_3, UTF8_4)
251
+ QUOTED_rev1 = /"(#{QUOTED_CHAR_rev1}*)"/n
252
+ QUOTED_rev2 = /"(#{QUOTED_CHAR_rev2}*)"/n
253
+
254
+ # RFC3501:
255
+ # text = 1*TEXT-CHAR
256
+ # RFC9051:
257
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
258
+ # ; Non-ASCII text can only be returned
259
+ # ; after ENABLE IMAP4rev2 command
260
+ TEXT_rev1 = /#{TEXT_CHAR}+/
261
+ TEXT_rev2 = /#{Regexp.union TEXT_CHAR, UTF8_2, UTF8_3, UTF8_4}+/
262
+
263
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
264
+ TAGGED_LABEL_FCHAR = /[a-zA-Z\-_.]/n
265
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
266
+ TAGGED_LABEL_CHAR = /[a-zA-Z\-_.0-9:]*/n
267
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
268
+ # ; Is a valid RFC 3501 "atom".
269
+ TAGGED_EXT_LABEL = /#{TAGGED_LABEL_FCHAR}#{TAGGED_LABEL_CHAR}*/n
270
+
271
+ # nz-number = digit-nz *DIGIT
272
+ # ; Non-zero unsigned 32-bit integer
273
+ # ; (0 < n < 4,294,967,296)
274
+ NZ_NUMBER = /[1-9]\d*/n
275
+
276
+ # seq-number = nz-number / "*"
277
+ # ; message sequence number (COPY, FETCH, STORE
278
+ # ; commands) or unique identifier (UID COPY,
279
+ # ; UID FETCH, UID STORE commands).
280
+ # ; * represents the largest number in use. In
281
+ # ; the case of message sequence numbers, it is
282
+ # ; the number of messages in a non-empty mailbox.
283
+ # ; In the case of unique identifiers, it is the
284
+ # ; unique identifier of the last message in the
285
+ # ; mailbox or, if the mailbox is empty, the
286
+ # ; mailbox's current UIDNEXT value.
287
+ # ; The server should respond with a tagged BAD
288
+ # ; response to a command that uses a message
289
+ # ; sequence number greater than the number of
290
+ # ; messages in the selected mailbox. This
291
+ # ; includes "*" if the selected mailbox is empty.
292
+ SEQ_NUMBER = /#{NZ_NUMBER}|\*/n
293
+
294
+ # seq-range = seq-number ":" seq-number
295
+ # ; two seq-number values and all values between
296
+ # ; these two regardless of order.
297
+ # ; Example: 2:4 and 4:2 are equivalent and
298
+ # ; indicate values 2, 3, and 4.
299
+ # ; Example: a unique identifier sequence range of
300
+ # ; 3291:* includes the UID of the last message in
301
+ # ; the mailbox, even if that value is less than
302
+ # ; 3291.
303
+ SEQ_RANGE = /#{SEQ_NUMBER}:#{SEQ_NUMBER}/n
304
+
305
+ # sequence-set = (seq-number / seq-range) ["," sequence-set]
306
+ # ; set of seq-number values, regardless of order.
307
+ # ; Servers MAY coalesce overlaps and/or execute
308
+ # ; the sequence in any order.
309
+ # ; Example: a message sequence number set of
310
+ # ; 2,4:7,9,12:* for a mailbox with 15 messages is
311
+ # ; equivalent to 2,4,5,6,7,9,12,13,14,15
312
+ # ; Example: a message sequence number set of
313
+ # ; *:4,5:7 for a mailbox with 10 messages is
314
+ # ; equivalent to 10,9,8,7,6,5,4,5,6,7 and MAY
315
+ # ; be reordered and overlap coalesced to be
316
+ # ; 4,5,6,7,8,9,10.
317
+ SEQUENCE_SET_ITEM = /#{SEQ_NUMBER}|#{SEQ_RANGE}/n
318
+ SEQUENCE_SET = /#{SEQUENCE_SET_ITEM}(?:,#{SEQUENCE_SET_ITEM})*/n
319
+ SEQUENCE_SET_STR = /\A#{SEQUENCE_SET}\z/n
320
+
321
+ # RFC3501:
322
+ # literal = "{" number "}" CRLF *CHAR8
323
+ # ; Number represents the number of CHAR8s
324
+ # RFC9051:
325
+ # literal = "{" number64 ["+"] "}" CRLF *CHAR8
326
+ # ; <number64> represents the number of CHAR8s.
327
+ # ; A non-synchronizing literal is distinguished
328
+ # ; from a synchronizing literal by the presence of
329
+ # ; "+" before the closing "}".
330
+ # ; Non-synchronizing literals are not allowed when
331
+ # ; sent from server to the client.
332
+ LITERAL = /\{(\d+)\}\r\n/n
333
+
334
+ # RFC3516 (BINARY):
335
+ # literal8 = "~{" number "}" CRLF *OCTET
336
+ # ; <number> represents the number of OCTETs
337
+ # ; in the response string.
338
+ # RFC9051:
339
+ # literal8 = "~{" number64 "}" CRLF *OCTET
340
+ # ; <number64> represents the number of OCTETs
341
+ # ; in the response string.
342
+ LITERAL8 = /~\{(\d+)\}\r\n/n
343
+
344
+ module_function
345
+
346
+ def unescape_quoted!(quoted)
347
+ quoted
348
+ &.gsub!(/\\(#{QUOTED_SPECIALS})/n, "\\1")
349
+ &.force_encoding("UTF-8")
350
+ end
351
+
352
+ def unescape_quoted(quoted)
353
+ quoted
354
+ &.gsub(/\\(#{QUOTED_SPECIALS})/n, "\\1")
355
+ &.force_encoding("UTF-8")
356
+ end
357
+
358
+ end
359
+
360
+ # the default, used in most places
60
361
  BEG_REGEXP = /\G(?:\
61
- (?# 1: SPACE )( +)|\
62
- (?# 2: NIL )(NIL)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
63
- (?# 3: NUMBER )(\d+)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
64
- (?# 4: ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+]+)|\
65
- (?# 5: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
66
- (?# 6: LPAR )(\()|\
67
- (?# 7: RPAR )(\))|\
68
- (?# 8: BSLASH )(\\)|\
69
- (?# 9: STAR )(\*)|\
70
- (?# 10: LBRA )(\[)|\
71
- (?# 11: RBRA )(\])|\
72
- (?# 12: LITERAL )\{(\d+)\}\r\n|\
73
- (?# 13: PLUS )(\+)|\
74
- (?# 14: PERCENT )(%)|\
75
- (?# 15: CRLF )(\r\n)|\
76
- (?# 16: EOF )(\z))/ni
77
-
362
+ (?# 1: SPACE )( )|\
363
+ (?# 2: LITERAL8)#{Patterns::LITERAL8}|\
364
+ (?# 3: ATOM prefixed with a compatible subtype)\
365
+ ((?:\
366
+ (?# 4: NIL )(NIL)|\
367
+ (?# 5: NUMBER )(\d+)|\
368
+ (?# 6: PLUS )(\+))\
369
+ (?# 7: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
370
+ (?# This enables greedy alternation without lookahead, in linear time.)\
371
+ )|\
372
+ (?# Also need to check for ATOM without a subtype prefix.)\
373
+ (?# 8: ATOM )(#{Patterns::ATOMISH})|\
374
+ (?# 9: QUOTED )#{Patterns::QUOTED_rev2}|\
375
+ (?# 10: LPAR )(\()|\
376
+ (?# 11: RPAR )(\))|\
377
+ (?# 12: BSLASH )(\\)|\
378
+ (?# 13: STAR )(\*)|\
379
+ (?# 14: LBRA )(\[)|\
380
+ (?# 15: RBRA )(\])|\
381
+ (?# 16: LITERAL )#{Patterns::LITERAL}|\
382
+ (?# 17: PERCENT )(%)|\
383
+ (?# 18: CRLF )(\r\n)|\
384
+ (?# 19: EOF )(\z))/ni
385
+
386
+ # envelope, body(structure), namespaces
78
387
  DATA_REGEXP = /\G(?:\
79
388
  (?# 1: SPACE )( )|\
80
389
  (?# 2: NIL )(NIL)|\
81
390
  (?# 3: NUMBER )(\d+)|\
82
- (?# 4: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
83
- (?# 5: LITERAL )\{(\d+)\}\r\n|\
391
+ (?# 4: QUOTED )#{Patterns::QUOTED_rev2}|\
392
+ (?# 5: LITERAL )#{Patterns::LITERAL}|\
84
393
  (?# 6: LPAR )(\()|\
85
394
  (?# 7: RPAR )(\)))/ni
86
395
 
87
- TEXT_REGEXP = /\G(?:\
88
- (?# 1: TEXT )([^\x00\r\n]*))/ni
89
-
90
- RTEXT_REGEXP = /\G(?:\
91
- (?# 1: LBRA )(\[)|\
92
- (?# 2: TEXT )([^\x00\r\n]*))/ni
396
+ # text, after 'resp-text-code "]"'
397
+ TEXT_REGEXP = /\G(#{Patterns::TEXT_rev2})/n
93
398
 
94
- CTEXT_REGEXP = /\G(?:\
95
- (?# 1: TEXT )([^\x00\r\n\]]*))/ni
399
+ # resp-text-code, after 'atom SP'
400
+ CTEXT_REGEXP = /\G(#{Patterns::CODE_TEXT})/n
96
401
 
97
402
  Token = Struct.new(:symbol, :value)
98
403
 
99
- def response
100
- token = lookahead
101
- case token.symbol
102
- when T_PLUS
103
- result = continue_req
104
- when T_STAR
105
- result = response_untagged
106
- else
107
- result = response_tagged
108
- end
109
- while lookahead.symbol == T_SPACE
110
- # Ignore trailing space for Microsoft Exchange Server
111
- shift_token
112
- end
113
- match(T_CRLF)
114
- match(T_EOF)
115
- return result
116
- end
404
+ def_char_matchers :SP, " ", :T_SPACE
405
+ def_char_matchers :PLUS, "+", :T_PLUS
406
+ def_char_matchers :STAR, "*", :T_STAR
117
407
 
118
- def continue_req
119
- match(T_PLUS)
120
- token = lookahead
121
- if token.symbol == T_SPACE
122
- shift_token
123
- return ContinuationRequest.new(resp_text, @str)
408
+ def_char_matchers :lpar, "(", :T_LPAR
409
+ def_char_matchers :rpar, ")", :T_RPAR
410
+
411
+ def_char_matchers :lbra, "[", :T_LBRA
412
+ def_char_matchers :rbra, "]", :T_RBRA
413
+
414
+ # valid number ranges are not enforced by parser
415
+ # number = 1*DIGIT
416
+ # ; Unsigned 32-bit integer
417
+ # ; (0 <= n < 4,294,967,296)
418
+ def_token_matchers :number, T_NUMBER, coerce: Integer
419
+
420
+ def_token_matchers :quoted, T_QUOTED
421
+
422
+ # string = quoted / literal
423
+ def_token_matchers :string, T_QUOTED, T_LITERAL
424
+
425
+ # used by nstring8 = nstring / literal8
426
+ def_token_matchers :string8, T_QUOTED, T_LITERAL, T_LITERAL8
427
+
428
+ # use where string represents "LABEL" values
429
+ def_token_matchers :case_insensitive__string,
430
+ T_QUOTED, T_LITERAL,
431
+ send: :upcase
432
+
433
+ # n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
434
+ # NIL? returns nil when it does *not* match
435
+ def_token_matchers :NIL, T_NIL
436
+
437
+ # In addition to explicitly uses of +tagged-ext-label+, use this to match
438
+ # keywords when the grammar has not provided any extension syntax.
439
+ #
440
+ # Do *not* use this for labels where the grammar specifies extensions
441
+ # can be +atom+, even if all currently defined labels would match. For
442
+ # example response codes in +resp-text-code+.
443
+ #
444
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
445
+ # ; Is a valid RFC 3501 "atom".
446
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
447
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
448
+ #
449
+ # TODO: add to lexer and only match tagged-ext-label
450
+ def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase
451
+
452
+ def_token_matchers :CRLF, T_CRLF
453
+ def_token_matchers :EOF, T_EOF
454
+
455
+ # atom = 1*ATOM-CHAR
456
+ # ATOM-CHAR = <any CHAR except atom-specials>
457
+ ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
458
+
459
+ SEQUENCE_SET_TOKENS = [T_ATOM, T_NUMBER, T_STAR]
460
+
461
+ # sequence-set = (seq-number / seq-range) ["," sequence-set]
462
+ # sequence-set =/ seq-last-command
463
+ # ; Allow for "result of the last command"
464
+ # ; indicator.
465
+ # seq-last-command = "$"
466
+ #
467
+ # *note*: doesn't match seq-last-command
468
+ def sequence_set
469
+ str = combine_adjacent(*SEQUENCE_SET_TOKENS)
470
+ if Patterns::SEQUENCE_SET_STR.match?(str)
471
+ SequenceSet[str]
124
472
  else
125
- return ContinuationRequest.new(ResponseText.new(nil, ""), @str)
473
+ parse_error("unexpected atom %p, expected sequence-set", str)
126
474
  end
127
475
  end
128
476
 
129
- def response_untagged
130
- match(T_STAR)
131
- match(T_SPACE)
132
- token = lookahead
133
- if token.symbol == T_NUMBER
134
- return numeric_response
135
- elsif token.symbol == T_ATOM
136
- case token.value
137
- when /\A(?:OK|NO|BAD|BYE|PREAUTH)\z/ni
138
- return response_cond
139
- when /\A(?:FLAGS)\z/ni
140
- return flags_response
141
- when /\A(?:ID)\z/ni
142
- return id_response
143
- when /\A(?:LIST|LSUB|XLIST)\z/ni
144
- return list_response
145
- when /\A(?:NAMESPACE)\z/ni
146
- return namespace_response
147
- when /\A(?:QUOTA)\z/ni
148
- return getquota_response
149
- when /\A(?:QUOTAROOT)\z/ni
150
- return getquotaroot_response
151
- when /\A(?:ACL)\z/ni
152
- return getacl_response
153
- when /\A(?:SEARCH|SORT)\z/ni
154
- return search_response
155
- when /\A(?:THREAD)\z/ni
156
- return thread_response
157
- when /\A(?:STATUS)\z/ni
158
- return status_response
159
- when /\A(?:CAPABILITY)\z/ni
160
- return capability_response
161
- when /\A(?:NOOP)\z/ni
162
- return ignored_response
163
- else
164
- return text_response
165
- end
166
- else
167
- parse_error("unexpected token %s", token.symbol)
168
- end
477
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
478
+ # resp-specials = "]"
479
+ ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
480
+
481
+ ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze
482
+
483
+ # tag = 1*<any ASTRING-CHAR except "+">
484
+ TAG_TOKENS = (ASTRING_CHARS_TOKENS - [T_PLUS]).freeze
485
+
486
+ # TODO: handle atom, astring_chars, and tag entirely inside the lexer
487
+ def atom; combine_adjacent(*ATOM_TOKENS) end
488
+ def astring_chars; combine_adjacent(*ASTRING_CHARS_TOKENS) end
489
+ def tag; combine_adjacent(*TAG_TOKENS) end
490
+
491
+ # the #accept version of #atom
492
+ def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
493
+
494
+ # Returns <tt>atom.upcase</tt>
495
+ def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
496
+
497
+ # Returns <tt>atom?&.upcase</tt>
498
+ def case_insensitive__atom?
499
+ -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
169
500
  end
170
501
 
171
- def response_tagged
172
- tag = astring_chars
173
- match(T_SPACE)
174
- token = match(T_ATOM)
175
- name = token.value.upcase
176
- match(T_SPACE)
177
- return TaggedResponse.new(tag, name, resp_text, @str)
502
+ # astring = 1*ASTRING-CHAR / string
503
+ def astring
504
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
178
505
  end
179
506
 
180
- def response_cond
181
- token = match(T_ATOM)
182
- name = token.value.upcase
183
- match(T_SPACE)
184
- return UntaggedResponse.new(name, resp_text, @str)
507
+ def astring?
508
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
185
509
  end
186
510
 
187
- def numeric_response
188
- n = number
189
- match(T_SPACE)
190
- token = match(T_ATOM)
191
- name = token.value.upcase
192
- case name
193
- when "EXISTS", "RECENT", "EXPUNGE"
194
- return UntaggedResponse.new(name, n, @str)
195
- when "FETCH"
196
- shift_token
197
- match(T_SPACE)
198
- data = FetchData.new(n, msg_att(n))
199
- return UntaggedResponse.new(name, data, @str)
200
- end
511
+ # Use #label or #label_in to assert specific known labels
512
+ # (+tagged-ext-label+ only, not +atom+).
513
+ def label(word)
514
+ (val = tagged_ext_label) == word and return val
515
+ parse_error("unexpected atom %p, expected %p instead", val, word)
201
516
  end
202
517
 
203
- def msg_att(n)
204
- match(T_LPAR)
205
- attr = {}
206
- while true
207
- token = lookahead
208
- case token.symbol
209
- when T_RPAR
210
- shift_token
211
- break
212
- when T_SPACE
213
- shift_token
214
- next
215
- end
216
- case token.value
217
- when /\A(?:ENVELOPE)\z/ni
218
- name, val = envelope_data
219
- when /\A(?:FLAGS)\z/ni
220
- name, val = flags_data
221
- when /\A(?:INTERNALDATE)\z/ni
222
- name, val = internaldate_data
223
- when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
224
- name, val = rfc822_text
225
- when /\A(?:RFC822\.SIZE)\z/ni
226
- name, val = rfc822_size
227
- when /\A(?:BODY(?:STRUCTURE)?)\z/ni
228
- name, val = body_data
229
- when /\A(?:UID)\z/ni
230
- name, val = uid_data
231
- when /\A(?:MODSEQ)\z/ni
232
- name, val = modseq_data
233
- else
234
- parse_error("unknown attribute `%s' for {%d}", token.value, n)
235
- end
236
- attr[name] = val
237
- end
238
- return attr
518
+ # Use #label or #label_in to assert specific known labels
519
+ # (+tagged-ext-label+ only, not +atom+).
520
+ def label_in(*labels)
521
+ lbl = tagged_ext_label and labels.include?(lbl) and return lbl
522
+ parse_error("unexpected atom %p, expected one of %s instead",
523
+ lbl, labels.join(" or "))
239
524
  end
240
525
 
241
- def envelope_data
242
- token = match(T_ATOM)
243
- name = token.value.upcase
244
- match(T_SPACE)
245
- return name, envelope
526
+ # expects "OK" or "PREAUTH" and raises InvalidResponseError on failure
527
+ def resp_cond_auth__name
528
+ lbl = tagged_ext_label and AUTH_CONDS.include? lbl and return lbl
529
+ raise InvalidResponseError, "bad response type %p, expected %s" % [
530
+ lbl, AUTH_CONDS.join(" or ")
531
+ ]
246
532
  end
247
533
 
248
- def envelope
249
- @lex_state = EXPR_DATA
250
- token = lookahead
251
- if token.symbol == T_NIL
252
- shift_token
253
- result = nil
254
- else
255
- match(T_LPAR)
256
- date = nstring
257
- match(T_SPACE)
258
- subject = nstring
259
- match(T_SPACE)
260
- from = address_list
261
- match(T_SPACE)
262
- sender = address_list
263
- match(T_SPACE)
264
- reply_to = address_list
265
- match(T_SPACE)
266
- to = address_list
267
- match(T_SPACE)
268
- cc = address_list
269
- match(T_SPACE)
270
- bcc = address_list
271
- match(T_SPACE)
272
- in_reply_to = nstring
273
- match(T_SPACE)
274
- message_id = nstring
275
- match(T_RPAR)
276
- result = Envelope.new(date, subject, from, sender, reply_to,
277
- to, cc, bcc, in_reply_to, message_id)
278
- end
279
- @lex_state = EXPR_BEG
280
- return result
534
+ # expects "OK" or "NO" or "BAD" and raises InvalidResponseError on failure
535
+ def resp_cond_state__name
536
+ lbl = tagged_ext_label and RESP_COND_STATES.include? lbl and return lbl
537
+ raise InvalidResponseError, "bad response type %p, expected %s" % [
538
+ lbl, RESP_COND_STATES.join(" or ")
539
+ ]
281
540
  end
282
541
 
283
- def flags_data
284
- token = match(T_ATOM)
285
- name = token.value.upcase
286
- match(T_SPACE)
287
- return name, flag_list
542
+ # nstring = string / nil
543
+ def nstring
544
+ NIL? ? nil : string
288
545
  end
289
546
 
290
- def internaldate_data
291
- token = match(T_ATOM)
292
- name = token.value.upcase
293
- match(T_SPACE)
294
- token = match(T_QUOTED)
295
- return name, token.value
547
+ def nstring8
548
+ NIL? ? nil : string8
296
549
  end
297
550
 
298
- def rfc822_text
299
- token = match(T_ATOM)
300
- name = token.value.upcase
301
- token = lookahead
302
- if token.symbol == T_LBRA
303
- shift_token
304
- match(T_RBRA)
305
- end
306
- match(T_SPACE)
307
- return name, nstring
551
+ def nquoted
552
+ NIL? ? nil : quoted
308
553
  end
309
554
 
310
- def rfc822_size
311
- token = match(T_ATOM)
312
- name = token.value.upcase
313
- match(T_SPACE)
314
- return name, number
555
+ # use where nstring represents "LABEL" values
556
+ def case_insensitive__nstring
557
+ NIL? ? nil : case_insensitive__string
315
558
  end
316
559
 
317
- def body_data
318
- token = match(T_ATOM)
319
- name = token.value.upcase
320
- token = lookahead
321
- if token.symbol == T_SPACE
322
- shift_token
323
- return name, body
324
- end
325
- name.concat(section)
326
- token = lookahead
327
- if token.symbol == T_ATOM
328
- name.concat(token.value)
329
- shift_token
560
+ # tagged-ext-comp = astring /
561
+ # tagged-ext-comp *(SP tagged-ext-comp) /
562
+ # "(" tagged-ext-comp ")"
563
+ # ; Extensions that follow this general
564
+ # ; syntax should use nstring instead of
565
+ # ; astring when appropriate in the context
566
+ # ; of the extension.
567
+ # ; Note that a message set or a "number"
568
+ # ; can always be represented as an "atom".
569
+ # ; A URL should be represented as
570
+ # ; a "quoted" string.
571
+ def tagged_ext_comp
572
+ vals = []
573
+ while true
574
+ vals << case lookahead!(*ASTRING_TOKENS, T_LPAR).symbol
575
+ when T_LPAR then lpar; ary = tagged_ext_comp; rpar; ary
576
+ when T_NUMBER then number
577
+ else astring
578
+ end
579
+ SP? or break
330
580
  end
331
- match(T_SPACE)
332
- data = nstring
333
- return name, data
581
+ vals
334
582
  end
335
583
 
336
- def body
337
- @lex_state = EXPR_DATA
338
- token = lookahead
339
- if token.symbol == T_NIL
340
- shift_token
341
- result = nil
584
+ # tagged-ext-simple is a subset of atom
585
+ # TODO: recognize sequence-set in the lexer
586
+ #
587
+ # tagged-ext-simple = sequence-set / number / number64
588
+ def tagged_ext_simple
589
+ number? || sequence_set
590
+ end
591
+
592
+ # tagged-ext-val = tagged-ext-simple /
593
+ # "(" [tagged-ext-comp] ")"
594
+ def tagged_ext_val
595
+ if lpar?
596
+ _ = peek_rpar? ? [] : tagged_ext_comp
597
+ rpar
598
+ _
342
599
  else
343
- match(T_LPAR)
344
- token = lookahead
345
- if token.symbol == T_LPAR
346
- result = body_type_mpart
347
- else
348
- result = body_type_1part
600
+ tagged_ext_simple
601
+ end
602
+ end
603
+
604
+ # mailbox = "INBOX" / astring
605
+ # ; INBOX is case-insensitive. All case variants of
606
+ # ; INBOX (e.g., "iNbOx") MUST be interpreted as INBOX
607
+ # ; not as an astring. An astring which consists of
608
+ # ; the case-insensitive sequence "I" "N" "B" "O" "X"
609
+ # ; is considered to be INBOX and not an astring.
610
+ # ; Refer to section 5.1 for further
611
+ # ; semantic details of mailbox names.
612
+ alias mailbox astring
613
+
614
+ # valid number ranges are not enforced by parser
615
+ # number64 = 1*DIGIT
616
+ # ; Unsigned 63-bit integer
617
+ # ; (0 <= n <= 9,223,372,036,854,775,807)
618
+ alias number64 number
619
+ alias number64? number?
620
+
621
+ # valid number ranges are not enforced by parser
622
+ # nz-number = digit-nz *DIGIT
623
+ # ; Non-zero unsigned 32-bit integer
624
+ # ; (0 < n < 4,294,967,296)
625
+ alias nz_number number
626
+ alias nz_number? number?
627
+
628
+ # valid number ranges are not enforced by parser
629
+ # nz-number64 = digit-nz *DIGIT
630
+ # ; Unsigned 63-bit integer
631
+ # ; (0 < n <= 9,223,372,036,854,775,807)
632
+ alias nz_number64 nz_number
633
+
634
+ # valid number ranges are not enforced by parser
635
+ # uniqueid = nz-number
636
+ # ; Strictly ascending
637
+ alias uniqueid nz_number
638
+
639
+ # valid number ranges are not enforced by parser
640
+ #
641
+ # a 64-bit unsigned integer and is the decimal equivalent for the ID hex
642
+ # string used in the web interface and the Gmail API.
643
+ alias x_gm_id number
644
+
645
+ # [RFC3501 & RFC9051:]
646
+ # response = *(continue-req / response-data) response-done
647
+ #
648
+ # For simplicity, response isn't interpreted as the combination of the
649
+ # three response types, but instead represents any individual server
650
+ # response. Our simplified interpretation is defined as:
651
+ # response = continue-req | response_data | response-tagged
652
+ #
653
+ # n.b: our "response-tagged" definition parses "greeting" too.
654
+ def response
655
+ resp = case lookahead!(T_PLUS, T_STAR, *TAG_TOKENS).symbol
656
+ when T_PLUS then continue_req
657
+ when T_STAR then response_data
658
+ else response_tagged
659
+ end
660
+ accept_spaces # QUIRKY: Ignore trailing space (MS Exchange Server?)
661
+ CRLF!
662
+ EOF!
663
+ resp
664
+ end
665
+
666
+ # RFC3501 & RFC9051:
667
+ # continue-req = "+" SP (resp-text / base64) CRLF
668
+ #
669
+ # n.b: base64 is valid resp-text. And in the spirit of RFC9051 Appx E 23
670
+ # (and to workaround existing servers), we use the following grammar:
671
+ #
672
+ # continue-req = "+" (SP (resp-text)) CRLF
673
+ def continue_req
674
+ PLUS!
675
+ ContinuationRequest.new(SP? ? resp_text : ResponseText::EMPTY, @str)
676
+ end
677
+
678
+ RE_RESPONSE_TYPE = /\G(?:\d+ )?(?<type>#{Patterns::TAGGED_EXT_LABEL})/n
679
+
680
+ # [RFC3501:]
681
+ # response-data = "*" SP (resp-cond-state / resp-cond-bye /
682
+ # mailbox-data / message-data / capability-data) CRLF
683
+ # [RFC4466:]
684
+ # response-data = "*" SP response-payload CRLF
685
+ # response-payload = resp-cond-state / resp-cond-bye /
686
+ # mailbox-data / message-data / capability-data
687
+ # RFC5161 (ENABLE capability):
688
+ # response-data =/ "*" SP enable-data CRLF
689
+ # RFC5255 (LANGUAGE capability)
690
+ # response-payload =/ language-data
691
+ # RFC5255 (I18NLEVEL=1 and I18NLEVEL=2 capabilities)
692
+ # response-payload =/ comparator-data
693
+ # [RFC9051:]
694
+ # response-data = "*" SP (resp-cond-state / resp-cond-bye /
695
+ # mailbox-data / message-data / capability-data /
696
+ # enable-data) CRLF
697
+ #
698
+ # [merging in greeting and response-fatal:]
699
+ # greeting = "*" SP (resp-cond-auth / resp-cond-bye) CRLF
700
+ # response-fatal = "*" SP resp-cond-bye CRLF
701
+ # response-data =/ "*" SP (resp-cond-auth / resp-cond-bye) CRLF
702
+ # [removing duplicates, this is simply]
703
+ # response-payload =/ resp-cond-auth
704
+ #
705
+ # TODO: remove resp-cond-auth and handle greeting separately
706
+ def response_data
707
+ STAR!; SP!
708
+ m = peek_re(RE_RESPONSE_TYPE) or parse_error("unparsable response")
709
+ case m["type"].upcase
710
+ when "OK" then resp_cond_state__untagged # RFC3501, RFC9051
711
+ when "FETCH" then message_data__fetch # RFC3501, RFC9051
712
+ when "EXPUNGE" then message_data__expunge # RFC3501, RFC9051
713
+ when "EXISTS" then mailbox_data__exists # RFC3501, RFC9051
714
+ when "ESEARCH" then esearch_response # RFC4731, RFC9051, etc
715
+ when "VANISHED" then expunged_resp # RFC7162
716
+ when "UIDFETCH" then uidfetch_resp # (draft) UIDONLY
717
+ when "SEARCH" then mailbox_data__search # RFC3501 (obsolete)
718
+ when "CAPABILITY" then capability_data__untagged # RFC3501, RFC9051
719
+ when "FLAGS" then mailbox_data__flags # RFC3501, RFC9051
720
+ when "LIST" then mailbox_data__list # RFC3501, RFC9051
721
+ when "STATUS" then mailbox_data__status # RFC3501, RFC9051
722
+ when "NAMESPACE" then namespace_response # RFC2342, RFC9051
723
+ when "ENABLED" then enable_data # RFC5161, RFC9051
724
+ when "BAD" then resp_cond_state__untagged # RFC3501, RFC9051
725
+ when "NO" then resp_cond_state__untagged # RFC3501, RFC9051
726
+ when "PREAUTH" then resp_cond_auth # RFC3501, RFC9051
727
+ when "BYE" then resp_cond_bye # RFC3501, RFC9051
728
+ when "RECENT" then mailbox_data__recent # RFC3501 (obsolete)
729
+ when "SORT" then sort_data # RFC5256, RFC7162
730
+ when "THREAD" then thread_data # RFC5256
731
+ when "QUOTA" then quota_response # RFC2087, RFC9208
732
+ when "QUOTAROOT" then quotaroot_response # RFC2087, RFC9208
733
+ when "ID" then id_response # RFC2971
734
+ when "ACL" then acl_data # RFC4314
735
+ when "LISTRIGHTS" then listrights_data # RFC4314
736
+ when "MYRIGHTS" then myrights_data # RFC4314
737
+ when "METADATA" then metadata_resp # RFC5464
738
+ when "LANGUAGE" then language_data # RFC5255
739
+ when "COMPARATOR" then comparator_data # RFC5255
740
+ when "CONVERTED" then message_data__converted # RFC5259
741
+ when "LSUB" then mailbox_data__lsub # RFC3501 (obsolete)
742
+ when "XLIST" then mailbox_data__xlist # deprecated
743
+ when "NOOP" then response_data__noop
744
+ else response_data__unhandled
745
+ end
746
+ end
747
+
748
+ def response_data__unhandled(klass = UntaggedResponse)
749
+ num = number?; SP?
750
+ type = tagged_ext_label; SP?
751
+ text = remaining_unparsed
752
+ data =
753
+ if num && text then UnparsedNumericResponseData.new(num, text)
754
+ elsif text then UnparsedData.new(text)
755
+ else num
349
756
  end
350
- match(T_RPAR)
351
- end
352
- @lex_state = EXPR_BEG
353
- return result
757
+ klass.new(type, data, @str)
354
758
  end
355
759
 
356
- def body_type_1part
357
- token = lookahead
358
- case token.value
359
- when /\A(?:TEXT)\z/ni
360
- return body_type_text
361
- when /\A(?:MESSAGE)\z/ni
362
- return body_type_msg
363
- when /\A(?:ATTACHMENT)\z/ni
364
- return body_type_attachment
365
- when /\A(?:MIXED)\z/ni
366
- return body_type_mixed
367
- else
368
- return body_type_basic
369
- end
760
+ # reads all the way up until CRLF
761
+ def remaining_unparsed
762
+ str = @str[@pos...-2] and @pos += str.bytesize
763
+ str&.empty? ? nil : str
370
764
  end
371
765
 
372
- def body_type_basic
373
- mtype, msubtype = media_type
374
- token = lookahead
375
- if token.symbol == T_RPAR
376
- return BodyTypeBasic.new(mtype, msubtype)
377
- end
378
- match(T_SPACE)
379
- param, content_id, desc, enc, size = body_fields
380
- md5, disposition, language, extension = body_ext_1part
381
- return BodyTypeBasic.new(mtype, msubtype,
382
- param, content_id,
383
- desc, enc, size,
384
- md5, disposition, language, extension)
766
+ def response_data__ignored; response_data__unhandled(IgnoredResponse) end
767
+ alias response_data__noop response_data__ignored
768
+
769
+ alias esearch_response response_data__unhandled
770
+ alias expunged_resp response_data__unhandled
771
+ alias uidfetch_resp response_data__unhandled
772
+ alias listrights_data response_data__unhandled
773
+ alias myrights_data response_data__unhandled
774
+ alias metadata_resp response_data__unhandled
775
+ alias language_data response_data__unhandled
776
+ alias comparator_data response_data__unhandled
777
+ alias message_data__converted response_data__unhandled
778
+
779
+ # RFC3501 & RFC9051:
780
+ # response-tagged = tag SP resp-cond-state CRLF
781
+ def response_tagged
782
+ TaggedResponse.new(tag, *(SP!; resp_cond_state), @str)
385
783
  end
386
784
 
387
- def body_type_text
388
- mtype, msubtype = media_type
389
- match(T_SPACE)
390
- param, content_id, desc, enc, size = body_fields
391
- match(T_SPACE)
392
- lines = number
393
- md5, disposition, language, extension = body_ext_1part
394
- return BodyTypeText.new(mtype, msubtype,
395
- param, content_id,
396
- desc, enc, size,
397
- lines,
398
- md5, disposition, language, extension)
785
+ # RFC3501 & RFC9051:
786
+ # resp-cond-state = ("OK" / "NO" / "BAD") SP resp-text
787
+ #
788
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
789
+ # servers), we don't require a final SP and instead parse this as:
790
+ #
791
+ # resp-cond-state = ("OK" / "NO" / "BAD") [SP resp-text]
792
+ def resp_cond_state
793
+ [resp_cond_state__name, SP? ? resp_text : ResponseText::EMPTY]
399
794
  end
400
795
 
401
- def body_type_msg
402
- mtype, msubtype = media_type
403
- match(T_SPACE)
404
- param, content_id, desc, enc, size = body_fields
796
+ def resp_cond_state__untagged
797
+ UntaggedResponse.new(*resp_cond_state, @str)
798
+ end
405
799
 
406
- token = lookahead
407
- if token.symbol == T_RPAR
408
- # If this is not message/rfc822, we shouldn't apply the RFC822
409
- # spec to it. We should handle anything other than
410
- # message/rfc822 using multipart extension data [rfc3501] (i.e.
411
- # the data itself won't be returned, we would have to retrieve it
412
- # with BODYSTRUCTURE instead of with BODY
413
-
414
- # Also, sometimes a message/rfc822 is included as a large
415
- # attachment instead of having all of the other details
416
- # (e.g. attaching a .eml file to an email)
417
- if msubtype == "RFC822"
418
- return BodyTypeMessage.new(mtype, msubtype, param, content_id,
419
- desc, enc, size, nil, nil, nil, nil,
420
- nil, nil, nil)
421
- else
422
- return BodyTypeExtension.new(mtype, msubtype,
423
- param, content_id,
424
- desc, enc, size)
425
- end
426
- end
800
+ # resp-cond-auth = ("OK" / "PREAUTH") SP resp-text
801
+ #
802
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
803
+ # servers), we don't require a final SP and instead parse this as:
804
+ #
805
+ # resp-cond-auth = ("OK" / "PREAUTH") [SP resp-text]
806
+ def resp_cond_auth
807
+ UntaggedResponse.new(resp_cond_auth__name,
808
+ SP? ? resp_text : ResponseText::EMPTY,
809
+ @str)
810
+ end
427
811
 
428
- match(T_SPACE)
429
- env = envelope
430
- match(T_SPACE)
431
- b = body
432
- match(T_SPACE)
433
- lines = number
434
- md5, disposition, language, extension = body_ext_1part
435
- return BodyTypeMessage.new(mtype, msubtype,
436
- param, content_id,
437
- desc, enc, size,
438
- env, b, lines,
439
- md5, disposition, language, extension)
812
+ # resp-cond-bye = "BYE" SP resp-text
813
+ #
814
+ # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
815
+ # servers), we don't require a final SP and instead parse this as:
816
+ #
817
+ # resp-cond-bye = "BYE" [SP resp-text]
818
+ def resp_cond_bye
819
+ UntaggedResponse.new(label(BYE),
820
+ SP? ? resp_text : ResponseText::EMPTY,
821
+ @str)
440
822
  end
441
823
 
442
- def body_type_attachment
443
- mtype = case_insensitive_string
444
- match(T_SPACE)
445
- param = body_fld_param
446
- return BodyTypeAttachment.new(mtype, nil, param)
824
+ # message-data = nz-number SP ("EXPUNGE" / ("FETCH" SP msg-att))
825
+ def message_data__fetch
826
+ seq = nz_number; SP!
827
+ name = label "FETCH"; SP!
828
+ data = FetchData.new(seq, msg_att(seq))
829
+ UntaggedResponse.new(name, data, @str)
447
830
  end
448
831
 
449
- def body_type_mixed
450
- mtype = "MULTIPART"
451
- msubtype = case_insensitive_string
452
- param, disposition, language, extension = body_ext_mpart
453
- return BodyTypeBasic.new(mtype, msubtype, param, nil, nil, nil, nil, nil, disposition, language, extension)
832
+ def response_data__simple_numeric
833
+ data = nz_number; SP!
834
+ name = tagged_ext_label
835
+ UntaggedResponse.new(name, data, @str)
454
836
  end
455
837
 
456
- def body_type_mpart
457
- parts = []
838
+ alias message_data__expunge response_data__simple_numeric
839
+ alias mailbox_data__exists response_data__simple_numeric
840
+ alias mailbox_data__recent response_data__simple_numeric
841
+
842
+ # RFC3501 & RFC9051:
843
+ # msg-att = "(" (msg-att-dynamic / msg-att-static)
844
+ # *(SP (msg-att-dynamic / msg-att-static)) ")"
845
+ #
846
+ # msg-att-dynamic = "FLAGS" SP "(" [flag-fetch *(SP flag-fetch)] ")"
847
+ # RFC5257 (ANNOTATE extension):
848
+ # msg-att-dynamic =/ "ANNOTATION" SP
849
+ # ( "(" entry-att *(SP entry-att) ")" /
850
+ # "(" entry *(SP entry) ")" )
851
+ # RFC7162 (CONDSTORE extension):
852
+ # msg-att-dynamic =/ fetch-mod-resp
853
+ # fetch-mod-resp = "MODSEQ" SP "(" permsg-modsequence ")"
854
+ # RFC8970 (PREVIEW extension):
855
+ # msg-att-dynamic =/ "PREVIEW" SP nstring
856
+ #
857
+ # RFC3501:
858
+ # msg-att-static = "ENVELOPE" SP envelope /
859
+ # "INTERNALDATE" SP date-time /
860
+ # "RFC822" [".HEADER" / ".TEXT"] SP nstring /
861
+ # "RFC822.SIZE" SP number /
862
+ # "BODY" ["STRUCTURE"] SP body /
863
+ # "BODY" section ["<" number ">"] SP nstring /
864
+ # "UID" SP uniqueid
865
+ # RFC3516 (BINARY extension):
866
+ # msg-att-static =/ "BINARY" section-binary SP (nstring / literal8)
867
+ # / "BINARY.SIZE" section-binary SP number
868
+ # RFC8514 (SAVEDATE extension):
869
+ # msg-att-static =/ "SAVEDATE" SP (date-time / nil)
870
+ # RFC8474 (OBJECTID extension):
871
+ # msg-att-static =/ fetch-emailid-resp / fetch-threadid-resp
872
+ # fetch-emailid-resp = "EMAILID" SP "(" objectid ")"
873
+ # fetch-threadid-resp = "THREADID" SP ( "(" objectid ")" / nil )
874
+ # RFC9051:
875
+ # msg-att-static = "ENVELOPE" SP envelope /
876
+ # "INTERNALDATE" SP date-time /
877
+ # "RFC822.SIZE" SP number64 /
878
+ # "BODY" ["STRUCTURE"] SP body /
879
+ # "BODY" section ["<" number ">"] SP nstring /
880
+ # "BINARY" section-binary SP (nstring / literal8) /
881
+ # "BINARY.SIZE" section-binary SP number /
882
+ # "UID" SP uniqueid
883
+ #
884
+ # Re https://www.rfc-editor.org/errata/eid7246, I'm adding "offset" to the
885
+ # official "BINARY" ABNF, like so:
886
+ #
887
+ # msg-att-static =/ "BINARY" section-binary ["<" number ">"] SP
888
+ # (nstring / literal8)
889
+ def msg_att(n)
890
+ lpar
891
+ attr = {}
458
892
  while true
459
- token = lookahead
460
- if token.symbol == T_SPACE
461
- shift_token
462
- break
463
- end
464
- parts.push(body)
893
+ name = msg_att__label; SP!
894
+ val =
895
+ case name
896
+ when "UID" then uniqueid
897
+ when "FLAGS" then flag_list
898
+ when "BODY" then body
899
+ when /\ABODY\[/ni then nstring
900
+ when "BODYSTRUCTURE" then body
901
+ when "ENVELOPE" then envelope
902
+ when "INTERNALDATE" then date_time
903
+ when "RFC822.SIZE" then number64
904
+ when /\ABINARY\[/ni then nstring8 # BINARY, IMAP4rev2
905
+ when /\ABINARY\.SIZE\[/ni then number # BINARY, IMAP4rev2
906
+ when "RFC822" then nstring # not in rev2
907
+ when "RFC822.HEADER" then nstring # not in rev2
908
+ when "RFC822.TEXT" then nstring # not in rev2
909
+ when "MODSEQ" then parens__modseq # CONDSTORE
910
+ when "EMAILID" then parens__objectid # OBJECTID
911
+ when "THREADID" then nparens__objectid # OBJECTID
912
+ when "X-GM-MSGID" then x_gm_id # GMail
913
+ when "X-GM-THRID" then x_gm_id # GMail
914
+ when "X-GM-LABELS" then x_gm_labels # GMail
915
+ else parse_error("unknown attribute `%s' for {%d}", name, n)
916
+ end
917
+ attr[name] = val
918
+ break unless SP?
919
+ break if lookahead_rpar?
465
920
  end
466
- mtype = "MULTIPART"
467
- msubtype = case_insensitive_string
468
- param, disposition, language, extension = body_ext_mpart
469
- return BodyTypeMultipart.new(mtype, msubtype, parts,
470
- param, disposition, language,
471
- extension)
921
+ rpar
922
+ attr
472
923
  end
473
924
 
474
- def media_type
475
- mtype = case_insensitive_string
476
- token = lookahead
477
- if token.symbol != T_SPACE
478
- return mtype, nil
925
+ # appends "[section]" and "<partial>" to the base label
926
+ def msg_att__label
927
+ case (name = tagged_ext_label)
928
+ when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
929
+ # ignoring "[]" fixes https://bugs.ruby-lang.org/issues/5620
930
+ lbra? and rbra
931
+ when "BODY"
932
+ peek_lbra? and name << section and
933
+ peek_str?("<") and name << gt__number__lt # partial
934
+ when "BINARY", "BINARY.SIZE"
935
+ name << section_binary
936
+ # see https://www.rfc-editor.org/errata/eid7246 and the note above
937
+ peek_str?("<") and name << gt__number__lt # partial
479
938
  end
480
- match(T_SPACE)
481
- msubtype = case_insensitive_string
482
- return mtype, msubtype
939
+ name
483
940
  end
484
941
 
485
- def body_fields
486
- param = body_fld_param
487
- match(T_SPACE)
488
- content_id = nstring
489
- match(T_SPACE)
490
- desc = nstring
491
- match(T_SPACE)
492
- enc = case_insensitive_string
493
- match(T_SPACE)
494
- size = number
495
- return param, content_id, desc, enc, size
496
- end
942
+ # this represents the partial size for BODY or BINARY
943
+ alias gt__number__lt atom
497
944
 
498
- def body_fld_param
499
- token = lookahead
500
- if token.symbol == T_NIL
501
- shift_token
502
- return nil
503
- end
504
- match(T_LPAR)
505
- param = {}
506
- while true
507
- token = lookahead
508
- case token.symbol
509
- when T_RPAR
510
- shift_token
511
- break
512
- when T_SPACE
513
- shift_token
514
- end
515
- name = case_insensitive_string
516
- match(T_SPACE)
517
- val = string
518
- param[name] = val
519
- end
520
- return param
945
+ # RFC3501 & RFC9051:
946
+ # envelope = "(" env-date SP env-subject SP env-from SP
947
+ # env-sender SP env-reply-to SP env-to SP env-cc SP
948
+ # env-bcc SP env-in-reply-to SP env-message-id ")"
949
+ def envelope
950
+ @lex_state = EXPR_DATA
951
+ lpar; date = env_date
952
+ SP!; subject = env_subject
953
+ SP!; from = env_from
954
+ SP!; sender = env_sender
955
+ SP!; reply_to = env_reply_to
956
+ SP!; to = env_to
957
+ SP!; cc = env_cc
958
+ SP!; bcc = env_bcc
959
+ SP!; in_reply_to = env_in_reply_to
960
+ SP!; message_id = env_message_id
961
+ rpar
962
+ Envelope.new(date, subject, from, sender, reply_to,
963
+ to, cc, bcc, in_reply_to, message_id)
964
+ ensure
965
+ @lex_state = EXPR_BEG
521
966
  end
522
967
 
523
- def body_ext_1part
524
- token = lookahead
525
- if token.symbol == T_SPACE
526
- shift_token
527
- else
528
- return nil
529
- end
530
- md5 = nstring
968
+ # env-date = nstring
969
+ # env-subject = nstring
970
+ # env-in-reply-to = nstring
971
+ # env-message-id = nstring
972
+ alias env_date nstring
973
+ alias env_subject nstring
974
+ alias env_in_reply_to nstring
975
+ alias env_message_id nstring
976
+
977
+ # env-from = "(" 1*address ")" / nil
978
+ # env-sender = "(" 1*address ")" / nil
979
+ # env-reply-to = "(" 1*address ")" / nil
980
+ # env-to = "(" 1*address ")" / nil
981
+ # env-cc = "(" 1*address ")" / nil
982
+ # env-bcc = "(" 1*address ")" / nil
983
+ def nlist__address
984
+ return if NIL?
985
+ lpar; list = [address]; list << address until (quirky_SP?; rpar?)
986
+ list
987
+ end
988
+
989
+ alias env_from nlist__address
990
+ alias env_sender nlist__address
991
+ alias env_reply_to nlist__address
992
+ alias env_to nlist__address
993
+ alias env_cc nlist__address
994
+ alias env_bcc nlist__address
995
+
996
+ # Used when servers erroneously send an extra SP.
997
+ #
998
+ # As of 2023-11-28, Outlook.com (still) sends SP
999
+ # between +address+ in <tt>env-*</tt> lists.
1000
+ alias quirky_SP? SP?
531
1001
 
532
- token = lookahead
533
- if token.symbol == T_SPACE
534
- shift_token
535
- else
536
- return md5
537
- end
538
- disposition = body_fld_dsp
1002
+ # date-time = DQUOTE date-day-fixed "-" date-month "-" date-year
1003
+ # SP time SP zone DQUOTE
1004
+ alias date_time quoted
1005
+ alias ndatetime nquoted
539
1006
 
540
- token = lookahead
541
- if token.symbol == T_SPACE
542
- shift_token
543
- else
544
- return md5, disposition
545
- end
546
- language = body_fld_lang
1007
+ # RFC-3501 & RFC-9051:
1008
+ # body = "(" (body-type-1part / body-type-mpart) ")"
1009
+ def body
1010
+ @lex_state = EXPR_DATA
1011
+ lpar; result = peek_lpar? ? body_type_mpart : body_type_1part; rpar
1012
+ result
1013
+ ensure
1014
+ @lex_state = EXPR_BEG
1015
+ end
1016
+ alias lookahead_body? lookahead_lpar?
547
1017
 
548
- token = lookahead
549
- if token.symbol == T_SPACE
550
- shift_token
551
- else
552
- return md5, disposition, language
553
- end
1018
+ # RFC-3501 & RFC9051:
1019
+ # body-type-1part = (body-type-basic / body-type-msg / body-type-text)
1020
+ # [SP body-ext-1part]
1021
+ def body_type_1part
1022
+ # This regexp peek is a performance optimization.
1023
+ # The lookahead fallback would work fine too.
1024
+ m = peek_re(/\G(?:
1025
+ (?<TEXT> "TEXT" \s "[^"]+" )
1026
+ |(?<MESSAGE> "MESSAGE" \s "(?:RFC822|GLOBAL)" )
1027
+ |(?<BASIC> "[^"]+" \s "[^"]+" )
1028
+ |(?<MIXED> "MIXED" )
1029
+ )/nix)
1030
+ choice = m&.named_captures&.compact&.keys&.first
1031
+ # In practice, the following line should never be used. But the ABNF
1032
+ # *does* allow literals, and this will handle them.
1033
+ choice ||= lookahead_case_insensitive__string!
1034
+ case choice
1035
+ when "BASIC" then body_type_basic # => BodyTypeBasic
1036
+ when "MESSAGE" then body_type_msg # => BodyTypeMessage | BodyTypeBasic
1037
+ when "TEXT" then body_type_text # => BodyTypeText
1038
+ when "MIXED" then body_type_mixed # => BodyTypeMultipart (server bug)
1039
+ else body_type_basic # might be a bug; server's or ours?
1040
+ end
1041
+ end
1042
+
1043
+ # RFC-3501 & RFC9051:
1044
+ # body-type-basic = media-basic SP body-fields
1045
+ def body_type_basic
1046
+ type = media_basic # n.b. "basic" type isn't enforced here
1047
+ if lookahead_rpar? then return BodyTypeBasic.new(*type) end # invalid
1048
+ SP!; flds = body_fields
1049
+ SP? and exts = body_ext_1part
1050
+ BodyTypeBasic.new(*type, *flds, *exts)
1051
+ end
554
1052
 
555
- extension = body_extensions
556
- return md5, disposition, language, extension
1053
+ # RFC-3501 & RFC-9051:
1054
+ # body-type-text = media-text SP body-fields SP body-fld-lines
1055
+ def body_type_text
1056
+ type = media_text
1057
+ SP!; flds = body_fields
1058
+ SP!; lines = body_fld_lines
1059
+ SP? and exts = body_ext_1part
1060
+ BodyTypeText.new(*type, *flds, lines, *exts)
557
1061
  end
558
1062
 
559
- def body_ext_mpart
560
- token = lookahead
561
- if token.symbol == T_SPACE
562
- shift_token
563
- else
564
- return nil
565
- end
566
- param = body_fld_param
1063
+ # RFC-3501 & RFC-9051:
1064
+ # body-type-msg = media-message SP body-fields SP envelope
1065
+ # SP body SP body-fld-lines
1066
+ def body_type_msg
1067
+ # n.b. "message/rfc822" type isn't enforced here
1068
+ type = media_message
1069
+ SP!; flds = body_fields
1070
+
1071
+ # Sometimes servers send body-type-basic when body-type-msg should be.
1072
+ # E.g: when a message/rfc822 part has "Content-Disposition: attachment".
1073
+ #
1074
+ # * SP "(" --> SP envelope --> continue as body-type-msg
1075
+ # * ")" --> no body-ext-1part --> completed body-type-basic
1076
+ # * SP nstring --> SP body-fld-md5
1077
+ # --> SP body-ext-1part --> continue as body-type-basic
1078
+ #
1079
+ # It's probably better to return BodyTypeBasic---even for
1080
+ # "message/rfc822"---than BodyTypeMessage with invalid fields.
1081
+ unless peek_str?(" (")
1082
+ SP? and exts = body_ext_1part
1083
+ return BodyTypeBasic.new(*type, *flds, *exts)
1084
+ end
1085
+
1086
+ SP!; env = envelope
1087
+ SP!; bdy = body
1088
+ SP!; lines = body_fld_lines
1089
+ SP? and exts = body_ext_1part
1090
+ BodyTypeMessage.new(*type, *flds, env, bdy, lines, *exts)
1091
+ end
1092
+
1093
+ # This is a malformed body-type-mpart with no subparts.
1094
+ def body_type_mixed
1095
+ # warn "malformed body-type-mpart: multipart/mixed with no parts."
1096
+ type = media_subtype # => "MIXED"
1097
+ SP? and exts = body_ext_mpart
1098
+ BodyTypeMultipart.new("MULTIPART", type, nil, *exts)
1099
+ end
567
1100
 
568
- token = lookahead
569
- if token.symbol == T_SPACE
570
- shift_token
571
- else
572
- return param
573
- end
574
- disposition = body_fld_dsp
1101
+ # RFC-3501 & RFC-9051:
1102
+ # body-type-mpart = 1*body SP media-subtype
1103
+ # [SP body-ext-mpart]
1104
+ def body_type_mpart
1105
+ parts = [body]; parts << body until SP?; msubtype = media_subtype
1106
+ SP? and exts = body_ext_mpart
1107
+ BodyTypeMultipart.new("MULTIPART", msubtype, parts, *exts)
1108
+ end
575
1109
 
576
- token = lookahead
577
- if token.symbol == T_SPACE
578
- shift_token
579
- else
580
- return param, disposition
581
- end
582
- language = body_fld_lang
1110
+ # n.b. this handles both type and subtype
1111
+ #
1112
+ # RFC-3501 vs RFC-9051:
1113
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
1114
+ # "MESSAGE" /
1115
+ # "VIDEO") DQUOTE) / string) SP media-subtype
1116
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
1117
+ # "FONT" / "MESSAGE" / "MODEL" /
1118
+ # "VIDEO") DQUOTE) / string) SP media-subtype
1119
+ #
1120
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
1121
+ # DQUOTE "RFC822" DQUOTE
1122
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
1123
+ # DQUOTE ("RFC822" / "GLOBAL") DQUOTE
1124
+ #
1125
+ # RFC-3501 & RFC-9051:
1126
+ # media-text = DQUOTE "TEXT" DQUOTE SP media-subtype
1127
+ # media-subtype = string
1128
+ def media_type
1129
+ mtype = case_insensitive__string
1130
+ SP? or return mtype, nil # ??? quirky!
1131
+ msubtype = media_subtype
1132
+ return mtype, msubtype
1133
+ end
583
1134
 
584
- token = lookahead
585
- if token.symbol == T_SPACE
586
- shift_token
587
- else
588
- return param, disposition, language
589
- end
1135
+ # TODO: check types
1136
+ alias media_basic media_type # */* --- catchall
1137
+ alias media_message media_type # message/rfc822, message/global
1138
+ alias media_text media_type # text/*
590
1139
 
591
- extension = body_extensions
592
- return param, disposition, language, extension
1140
+ alias media_subtype case_insensitive__string
1141
+
1142
+ # RFC-3501 & RFC-9051:
1143
+ # body-fields = body-fld-param SP body-fld-id SP body-fld-desc SP
1144
+ # body-fld-enc SP body-fld-octets
1145
+ def body_fields
1146
+ fields = []
1147
+ fields << body_fld_param; SP!
1148
+ fields << body_fld_id; SP!
1149
+ fields << body_fld_desc; SP!
1150
+ fields << body_fld_enc; SP!
1151
+ fields << body_fld_octets
1152
+ fields
593
1153
  end
594
1154
 
1155
+ # RFC3501, RFC9051:
1156
+ # body-fld-param = "(" string SP string *(SP string SP string) ")" / nil
1157
+ def body_fld_param
1158
+ return if NIL?
1159
+ param = {}
1160
+ lpar
1161
+ name = case_insensitive__string; SP!; param[name] = string
1162
+ while SP?
1163
+ name = case_insensitive__string; SP!; param[name] = string
1164
+ end
1165
+ rpar
1166
+ param
1167
+ end
1168
+
1169
+ # RFC2060
1170
+ # body_ext_1part ::= body_fld_md5 [SPACE body_fld_dsp
1171
+ # [SPACE body_fld_lang
1172
+ # [SPACE 1#body_extension]]]
1173
+ # ;; MUST NOT be returned on non-extensible
1174
+ # ;; "BODY" fetch
1175
+ # RFC3501 & RFC9051
1176
+ # body-ext-1part = body-fld-md5 [SP body-fld-dsp [SP body-fld-lang
1177
+ # [SP body-fld-loc *(SP body-extension)]]]
1178
+ # ; MUST NOT be returned on non-extensible
1179
+ # ; "BODY" fetch
1180
+ def body_ext_1part
1181
+ fields = []; fields << body_fld_md5
1182
+ SP? or return fields; fields << body_fld_dsp
1183
+ SP? or return fields; fields << body_fld_lang
1184
+ SP? or return fields; fields << body_fld_loc
1185
+ SP? or return fields; fields << body_extensions
1186
+ fields
1187
+ end
1188
+
1189
+ # RFC-2060:
1190
+ # body_ext_mpart = body_fld_param [SP body_fld_dsp SP body_fld_lang
1191
+ # [SP 1#body_extension]]
1192
+ # ;; MUST NOT be returned on non-extensible
1193
+ # ;; "BODY" fetch
1194
+ # RFC-3501 & RFC-9051:
1195
+ # body-ext-mpart = body-fld-param [SP body-fld-dsp [SP body-fld-lang
1196
+ # [SP body-fld-loc *(SP body-extension)]]]
1197
+ # ; MUST NOT be returned on non-extensible
1198
+ # ; "BODY" fetch
1199
+ def body_ext_mpart
1200
+ fields = []; fields << body_fld_param
1201
+ SP? or return fields; fields << body_fld_dsp
1202
+ SP? or return fields; fields << body_fld_lang
1203
+ SP? or return fields; fields << body_fld_loc
1204
+ SP? or return fields; fields << body_extensions
1205
+ fields
1206
+ end
1207
+
1208
+ alias body_fld_desc nstring
1209
+ alias body_fld_id nstring
1210
+ alias body_fld_loc nstring
1211
+ alias body_fld_lines number64 # number in 3501, number64 in 9051
1212
+ alias body_fld_md5 nstring
1213
+ alias body_fld_octets number
1214
+
1215
+ # RFC-3501 & RFC-9051:
1216
+ # body-fld-enc = (DQUOTE ("7BIT" / "8BIT" / "BINARY" / "BASE64"/
1217
+ # "QUOTED-PRINTABLE") DQUOTE) / string
1218
+ alias body_fld_enc case_insensitive__string
1219
+
1220
+ # body-fld-dsp = "(" string SP body-fld-param ")" / nil
595
1221
  def body_fld_dsp
596
- token = lookahead
597
- if token.symbol == T_NIL
598
- shift_token
599
- return nil
600
- end
601
- match(T_LPAR)
602
- dsp_type = case_insensitive_string
603
- match(T_SPACE)
604
- param = body_fld_param
605
- match(T_RPAR)
606
- return ContentDisposition.new(dsp_type, param)
1222
+ return if NIL?
1223
+ lpar; dsp_type = case_insensitive__string
1224
+ SP!; param = body_fld_param
1225
+ rpar
1226
+ ContentDisposition.new(dsp_type, param)
607
1227
  end
608
1228
 
1229
+ # body-fld-lang = nstring / "(" string *(SP string) ")"
609
1230
  def body_fld_lang
610
- token = lookahead
611
- if token.symbol == T_LPAR
612
- shift_token
613
- result = []
614
- while true
615
- token = lookahead
616
- case token.symbol
617
- when T_RPAR
618
- shift_token
619
- return result
620
- when T_SPACE
621
- shift_token
622
- end
623
- result.push(case_insensitive_string)
624
- end
1231
+ if lpar?
1232
+ result = [case_insensitive__string]
1233
+ result << case_insensitive__string while SP?
1234
+ rpar
1235
+ result
625
1236
  else
626
- lang = nstring
627
- if lang
628
- return lang.upcase
629
- else
630
- return lang
631
- end
1237
+ case_insensitive__nstring
632
1238
  end
633
1239
  end
634
1240
 
1241
+ # body-extension *(SP body-extension)
635
1242
  def body_extensions
636
1243
  result = []
637
- while true
638
- token = lookahead
639
- case token.symbol
640
- when T_RPAR
641
- return result
642
- when T_SPACE
643
- shift_token
644
- end
645
- result.push(body_extension)
646
- end
1244
+ result << body_extension; while SP? do result << body_extension end
1245
+ result
647
1246
  end
648
1247
 
1248
+ # body-extension = nstring / number / number64 /
1249
+ # "(" body-extension *(SP body-extension) ")"
1250
+ # ; Future expansion. Client implementations
1251
+ # ; MUST accept body-extension fields. Server
1252
+ # ; implementations MUST NOT generate
1253
+ # ; body-extension fields except as defined by
1254
+ # ; future Standard or Standards Track
1255
+ # ; revisions of this specification.
649
1256
  def body_extension
650
- token = lookahead
651
- case token.symbol
652
- when T_LPAR
653
- shift_token
654
- result = body_extensions
655
- match(T_RPAR)
656
- return result
657
- when T_NUMBER
658
- return number
659
- else
660
- return nstring
1257
+ if (uint = number64?) then uint
1258
+ elsif lpar? then exts = body_extensions; rpar; exts
1259
+ else nstring
661
1260
  end
662
1261
  end
663
1262
 
1263
+ # section = "[" [section-spec] "]"
664
1264
  def section
665
- str = String.new
666
- token = match(T_LBRA)
667
- str.concat(token.value)
668
- token = match(T_ATOM, T_NUMBER, T_RBRA)
669
- if token.symbol == T_RBRA
670
- str.concat(token.value)
671
- return str
672
- end
673
- str.concat(token.value)
674
- token = lookahead
675
- if token.symbol == T_SPACE
676
- shift_token
677
- str.concat(token.value)
678
- token = match(T_LPAR)
679
- str.concat(token.value)
680
- while true
681
- token = lookahead
682
- case token.symbol
683
- when T_RPAR
684
- str.concat(token.value)
685
- shift_token
686
- break
687
- when T_SPACE
688
- shift_token
689
- str.concat(token.value)
690
- end
691
- str.concat(format_string(astring))
692
- end
693
- end
694
- token = match(T_RBRA)
695
- str.concat(token.value)
696
- return str
697
- end
698
-
699
- def format_string(str)
700
- case str
701
- when ""
702
- return '""'
703
- when /[\x80-\xff\r\n]/n
704
- # literal
705
- return "{" + str.bytesize.to_s + "}" + CRLF + str
706
- when /[(){ \x00-\x1f\x7f%*"\\]/n
707
- # quoted string
708
- return '"' + str.gsub(/["\\]/n, "\\\\\\&") + '"'
709
- else
710
- # atom
711
- return str
712
- end
713
- end
714
-
715
- def uid_data
716
- token = match(T_ATOM)
717
- name = token.value.upcase
718
- match(T_SPACE)
719
- return name, number
720
- end
721
-
722
- def modseq_data
723
- token = match(T_ATOM)
724
- name = token.value.upcase
725
- match(T_SPACE)
726
- match(T_LPAR)
727
- modseq = number
728
- match(T_RPAR)
729
- return name, modseq
730
- end
731
-
732
- def ignored_response
733
- while lookahead.symbol != T_CRLF
734
- shift_token
735
- end
736
- return IgnoredResponse.new(@str)
737
- end
738
-
739
- def text_response
740
- token = match(T_ATOM)
741
- name = token.value.upcase
742
- match(T_SPACE)
743
- return UntaggedResponse.new(name, text)
744
- end
745
-
746
- def flags_response
747
- token = match(T_ATOM)
748
- name = token.value.upcase
749
- match(T_SPACE)
750
- return UntaggedResponse.new(name, flag_list, @str)
751
- end
752
-
753
- def list_response
754
- token = match(T_ATOM)
755
- name = token.value.upcase
756
- match(T_SPACE)
757
- return UntaggedResponse.new(name, mailbox_list, @str)
758
- end
759
-
1265
+ str = +lbra
1266
+ str << section_spec unless peek_rbra?
1267
+ str << rbra
1268
+ end
1269
+
1270
+ # section-binary = "[" [section-part] "]"
1271
+ def section_binary
1272
+ str = +lbra
1273
+ str << section_part unless peek_rbra?
1274
+ str << rbra
1275
+ end
1276
+
1277
+ # section-spec = section-msgtext / (section-part ["." section-text])
1278
+ # section-msgtext = "HEADER" /
1279
+ # "HEADER.FIELDS" [".NOT"] SP header-list /
1280
+ # "TEXT"
1281
+ # ; top-level or MESSAGE/RFC822 or
1282
+ # ; MESSAGE/GLOBAL part
1283
+ # section-part = nz-number *("." nz-number)
1284
+ # ; body part reference.
1285
+ # ; Allows for accessing nested body parts.
1286
+ # section-text = section-msgtext / "MIME"
1287
+ # ; text other than actual body part (headers,
1288
+ # ; etc.)
1289
+ #
1290
+ # n.b: we could "cheat" here and just grab all text inside the brackets,
1291
+ # but literals would need special treatment.
1292
+ def section_spec
1293
+ str = "".b
1294
+ str << atom # grabs everything up to "SP header-list" or "]"
1295
+ str << " " << header_list if SP?
1296
+ str
1297
+ end
1298
+
1299
+ # header-list = "(" header-fld-name *(SP header-fld-name) ")"
1300
+ def header_list
1301
+ str = +""
1302
+ str << lpar << header_fld_name
1303
+ str << " " << header_fld_name while SP?
1304
+ str << rpar
1305
+ end
1306
+
1307
+ # section-part = nz-number *("." nz-number)
1308
+ # ; body part reference.
1309
+ # ; Allows for accessing nested body parts.
1310
+ alias section_part atom
1311
+
1312
+ # RFC3501 & RFC9051:
1313
+ # header-fld-name = astring
1314
+ #
1315
+ # NOTE: Previously, Net::IMAP recreated the raw original source string.
1316
+ # Now, it grabs the raw encoded value using @str and @pos. A future
1317
+ # version may simply return the decoded astring value. Although that is
1318
+ # technically incompatible, it should almost never make a difference: all
1319
+ # standard header field names are valid atoms:
1320
+ #
1321
+ # https://www.iana.org/assignments/message-headers/message-headers.xhtml
1322
+ #
1323
+ # Although RFC3501 allows any astring, RFC5322-valid header names are one
1324
+ # or more of the printable US-ASCII characters, except SP and colon. So
1325
+ # empty string isn't valid, and literals aren't needed and should not be
1326
+ # used. This is explicitly unchanged by [I18N-HDRS] (RFC6532).
1327
+ #
1328
+ # RFC5233:
1329
+ # optional-field = field-name ":" unstructured CRLF
1330
+ # field-name = 1*ftext
1331
+ # ftext = %d33-57 / ; Printable US-ASCII
1332
+ # %d59-126 ; characters not including
1333
+ # ; ":".
1334
+ def header_fld_name
1335
+ assert_no_lookahead
1336
+ start = @pos
1337
+ astring
1338
+ @str[start...@pos - 1]
1339
+ end
1340
+
1341
+ # mailbox-data = "FLAGS" SP flag-list / "LIST" SP mailbox-list /
1342
+ # "LSUB" SP mailbox-list / "SEARCH" *(SP nz-number) /
1343
+ # "STATUS" SP mailbox SP "(" [status-att-list] ")" /
1344
+ # number SP "EXISTS" / number SP "RECENT"
1345
+
1346
+ def mailbox_data__flags
1347
+ name = label("FLAGS")
1348
+ SP!
1349
+ UntaggedResponse.new(name, flag_list, @str)
1350
+ end
1351
+
1352
+ def mailbox_data__list
1353
+ name = label_in("LIST", "LSUB", "XLIST")
1354
+ SP!
1355
+ UntaggedResponse.new(name, mailbox_list, @str)
1356
+ end
1357
+ alias mailbox_data__lsub mailbox_data__list
1358
+ alias mailbox_data__xlist mailbox_data__list
1359
+
1360
+ # mailbox-list = "(" [mbx-list-flags] ")" SP
1361
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil) SP mailbox
1362
+ # [SP mbox-list-extended]
1363
+ # ; This is the list information pointed to by the ABNF
1364
+ # ; item "mailbox-data", which is defined above
760
1365
  def mailbox_list
761
- attr = flag_list
762
- match(T_SPACE)
763
- token = match(T_QUOTED, T_NIL)
764
- if token.symbol == T_NIL
765
- delim = nil
766
- else
767
- delim = token.value
768
- end
769
- match(T_SPACE)
770
- name = astring
771
- return MailboxList.new(attr, delim, name)
1366
+ lpar; attr = peek_rpar? ? [] : mbx_list_flags; rpar
1367
+ SP!; delim = nquoted
1368
+ SP!; name = mailbox
1369
+ # TODO: mbox-list-extended
1370
+ MailboxList.new(attr, delim, name)
772
1371
  end
773
1372
 
774
- def getquota_response
1373
+ def quota_response
775
1374
  # If quota never established, get back
776
1375
  # `NO Quota root does not exist'.
777
1376
  # If quota removed, get `()' after the
@@ -804,192 +1403,240 @@ module Net
804
1403
  end
805
1404
  end
806
1405
 
807
- def getquotaroot_response
808
- # Similar to getquota, but only admin can use getquota.
809
- token = match(T_ATOM)
810
- name = token.value.upcase
811
- match(T_SPACE)
812
- mailbox = astring
813
- quotaroots = []
814
- while true
815
- token = lookahead
816
- break unless token.symbol == T_SPACE
817
- shift_token
818
- quotaroots.push(astring)
819
- end
820
- data = MailboxQuotaRoot.new(mailbox, quotaroots)
821
- return UntaggedResponse.new(name, data, @str)
822
- end
823
-
824
- def getacl_response
825
- token = match(T_ATOM)
826
- name = token.value.upcase
827
- match(T_SPACE)
828
- mailbox = astring
829
- data = []
830
- token = lookahead
831
- if token.symbol == T_SPACE
832
- shift_token
833
- while true
834
- token = lookahead
835
- case token.symbol
836
- when T_CRLF
837
- break
838
- when T_SPACE
839
- shift_token
840
- end
841
- user = astring
842
- match(T_SPACE)
843
- rights = astring
844
- data.push(MailboxACLItem.new(user, rights, mailbox))
845
- end
846
- end
847
- return UntaggedResponse.new(name, data, @str)
848
- end
849
-
850
- def search_response
851
- token = match(T_ATOM)
852
- name = token.value.upcase
853
- token = lookahead
854
- if token.symbol == T_SPACE
855
- shift_token
856
- data = []
857
- while true
858
- token = lookahead
859
- case token.symbol
860
- when T_CRLF
861
- break
862
- when T_SPACE
863
- shift_token
864
- when T_NUMBER
865
- data.push(number)
866
- when T_LPAR
867
- # TODO: include the MODSEQ value in a response
868
- shift_token
869
- match(T_ATOM)
870
- match(T_SPACE)
871
- match(T_NUMBER)
872
- match(T_RPAR)
873
- end
874
- end
875
- else
876
- data = []
877
- end
878
- return UntaggedResponse.new(name, data, @str)
879
- end
880
-
881
- def thread_response
882
- token = match(T_ATOM)
883
- name = token.value.upcase
884
- token = lookahead
885
-
886
- if token.symbol == T_SPACE
887
- threads = []
888
-
889
- while true
890
- shift_token
891
- token = lookahead
892
-
893
- case token.symbol
894
- when T_LPAR
895
- threads << thread_branch(token)
896
- when T_CRLF
897
- break
898
- end
899
- end
900
- else
901
- # no member
902
- threads = []
903
- end
904
-
905
- return UntaggedResponse.new(name, threads, @str)
906
- end
907
-
908
- def thread_branch(token)
909
- rootmember = nil
910
- lastmember = nil
911
-
912
- while true
913
- shift_token # ignore first T_LPAR
914
- token = lookahead
915
-
916
- case token.symbol
917
- when T_NUMBER
918
- # new member
919
- newmember = ThreadMember.new(number, [])
920
- if rootmember.nil?
921
- rootmember = newmember
922
- else
923
- lastmember.children << newmember
924
- end
925
- lastmember = newmember
926
- when T_SPACE
927
- # do nothing
928
- when T_LPAR
929
- if rootmember.nil?
930
- # dummy member
931
- lastmember = rootmember = ThreadMember.new(nil, [])
932
- end
933
-
934
- lastmember.children << thread_branch(token)
935
- when T_RPAR
936
- break
937
- end
938
- end
939
-
940
- return rootmember
941
- end
942
-
943
- def status_response
1406
+ def quotaroot_response
1407
+ # Similar to getquota, but only admin can use getquota.
944
1408
  token = match(T_ATOM)
945
1409
  name = token.value.upcase
946
1410
  match(T_SPACE)
947
1411
  mailbox = astring
948
- match(T_SPACE)
949
- match(T_LPAR)
950
- attr = {}
1412
+ quotaroots = []
951
1413
  while true
952
1414
  token = lookahead
953
- case token.symbol
954
- when T_RPAR
955
- shift_token
956
- break
957
- when T_SPACE
958
- shift_token
959
- end
960
- token = match(T_ATOM)
961
- key = token.value.upcase
962
- match(T_SPACE)
963
- val = number
964
- attr[key] = val
1415
+ break unless token.symbol == T_SPACE
1416
+ shift_token
1417
+ quotaroots.push(astring)
965
1418
  end
966
- data = StatusData.new(mailbox, attr)
1419
+ data = MailboxQuotaRoot.new(mailbox, quotaroots)
967
1420
  return UntaggedResponse.new(name, data, @str)
968
1421
  end
969
1422
 
970
- def capability_response
1423
+ # acl-data = "ACL" SP mailbox *(SP identifier SP rights)
1424
+ def acl_data
971
1425
  token = match(T_ATOM)
972
1426
  name = token.value.upcase
973
1427
  match(T_SPACE)
974
- UntaggedResponse.new(name, capability_data, @str)
1428
+ mailbox = astring
1429
+ data = []
1430
+ token = lookahead
1431
+ if token.symbol == T_SPACE
1432
+ shift_token
1433
+ while true
1434
+ token = lookahead
1435
+ case token.symbol
1436
+ when T_CRLF
1437
+ break
1438
+ when T_SPACE
1439
+ shift_token
1440
+ end
1441
+ user = astring
1442
+ match(T_SPACE)
1443
+ rights = astring
1444
+ data.push(MailboxACLItem.new(user, rights, mailbox))
1445
+ end
1446
+ end
1447
+ return UntaggedResponse.new(name, data, @str)
975
1448
  end
976
1449
 
977
- def capability_data
1450
+ # RFC3501:
1451
+ # mailbox-data = "SEARCH" *(SP nz-number) / ...
1452
+ # RFC5256: SORT
1453
+ # sort-data = "SORT" *(SP nz-number)
1454
+ # RFC7162: CONDSTORE, QRESYNC
1455
+ # mailbox-data =/ "SEARCH" [1*(SP nz-number) SP
1456
+ # search-sort-mod-seq]
1457
+ # sort-data = "SORT" [1*(SP nz-number) SP
1458
+ # search-sort-mod-seq]
1459
+ # ; Updates the SORT response from RFC 5256.
1460
+ # search-sort-mod-seq = "(" "MODSEQ" SP mod-sequence-value ")"
1461
+ # RFC9051:
1462
+ # mailbox-data = obsolete-search-response / ...
1463
+ # obsolete-search-response = "SEARCH" *(SP nz-number)
1464
+ def mailbox_data__search
1465
+ name = label_in("SEARCH", "SORT")
978
1466
  data = []
979
- while true
980
- token = lookahead
981
- case token.symbol
982
- when T_CRLF, T_RBRA
983
- break
984
- when T_SPACE
985
- shift_token
986
- next
1467
+ while _ = SP? && nz_number? do data << _ end
1468
+ if lpar?
1469
+ label("MODSEQ"); SP!
1470
+ modseq = mod_sequence_value
1471
+ rpar
1472
+ end
1473
+ data = SearchResult.new(data, modseq: modseq)
1474
+ UntaggedResponse.new(name, data, @str)
1475
+ end
1476
+ alias sort_data mailbox_data__search
1477
+
1478
+ # RFC5256: THREAD
1479
+ # thread-data = "THREAD" [SP 1*thread-list]
1480
+ def thread_data
1481
+ name = label("THREAD")
1482
+ threads = []
1483
+ if SP?
1484
+ threads << thread_list while lookahead_thread_list?
1485
+ end
1486
+ UntaggedResponse.new(name, threads, @str)
1487
+ end
1488
+
1489
+ alias lookahead_thread_list? lookahead_lpar?
1490
+ alias lookahead_thread_nested? lookahead_thread_list?
1491
+
1492
+ # RFC5256: THREAD
1493
+ # thread-list = "(" (thread-members / thread-nested) ")"
1494
+ def thread_list
1495
+ lpar
1496
+ thread = if lookahead_thread_nested?
1497
+ ThreadMember.new(nil, thread_nested)
1498
+ else
1499
+ thread_members
1500
+ end
1501
+ rpar
1502
+ thread
1503
+ end
1504
+
1505
+ # RFC5256: THREAD
1506
+ # thread-members = nz-number *(SP nz-number) [SP thread-nested]
1507
+ def thread_members
1508
+ members = []
1509
+ members << nz_number # thread root
1510
+ while SP?
1511
+ case lookahead!(T_NUMBER, T_LPAR).symbol
1512
+ when T_NUMBER then members << nz_number
1513
+ else nested = thread_nested; break
987
1514
  end
988
- data.push(atom.upcase)
989
1515
  end
990
- data
1516
+ members.reverse.inject(nested || []) {|subthreads, number|
1517
+ [ThreadMember.new(number, subthreads)]
1518
+ }.first
1519
+ end
1520
+
1521
+ # RFC5256: THREAD
1522
+ # thread-nested = 2*thread-list
1523
+ def thread_nested
1524
+ nested = [thread_list, thread_list]
1525
+ while lookahead_thread_list? do nested << thread_list end
1526
+ nested
1527
+ end
1528
+
1529
+ # mailbox-data =/ "STATUS" SP mailbox SP "(" [status-att-list] ")"
1530
+ def mailbox_data__status
1531
+ resp_name = label("STATUS"); SP!
1532
+ mbox_name = mailbox; SP!
1533
+ lpar; attr = status_att_list; rpar
1534
+ UntaggedResponse.new(resp_name, StatusData.new(mbox_name, attr), @str)
1535
+ end
1536
+
1537
+ # RFC3501
1538
+ # status-att-list = status-att SP number *(SP status-att SP number)
1539
+ # RFC4466, RFC9051, and RFC3501 Errata
1540
+ # status-att-list = status-att-val *(SP status-att-val)
1541
+ def status_att_list
1542
+ attrs = [status_att_val]
1543
+ while SP? do attrs << status_att_val end
1544
+ attrs.to_h
1545
+ end
1546
+
1547
+ # RFC3501 Errata:
1548
+ # status-att-val = ("MESSAGES" SP number) / ("RECENT" SP number) /
1549
+ # ("UIDNEXT" SP nz-number) / ("UIDVALIDITY" SP nz-number) /
1550
+ # ("UNSEEN" SP number)
1551
+ # RFC4466:
1552
+ # status-att-val = ("MESSAGES" SP number) /
1553
+ # ("RECENT" SP number) /
1554
+ # ("UIDNEXT" SP nz-number) /
1555
+ # ("UIDVALIDITY" SP nz-number) /
1556
+ # ("UNSEEN" SP number)
1557
+ # ;; Extensions to the STATUS responses
1558
+ # ;; should extend this production.
1559
+ # ;; Extensions should use the generic
1560
+ # ;; syntax defined by tagged-ext.
1561
+ # RFC9051:
1562
+ # status-att-val = ("MESSAGES" SP number) /
1563
+ # ("UIDNEXT" SP nz-number) /
1564
+ # ("UIDVALIDITY" SP nz-number) /
1565
+ # ("UNSEEN" SP number) /
1566
+ # ("DELETED" SP number) /
1567
+ # ("SIZE" SP number64)
1568
+ # ; Extensions to the STATUS responses
1569
+ # ; should extend this production.
1570
+ # ; Extensions should use the generic
1571
+ # ; syntax defined by tagged-ext.
1572
+ # RFC7162:
1573
+ # status-att-val =/ "HIGHESTMODSEQ" SP mod-sequence-valzer
1574
+ # ;; Extends non-terminal defined in [RFC4466].
1575
+ # ;; Value 0 denotes that the mailbox doesn't
1576
+ # ;; support persistent mod-sequences
1577
+ # ;; as described in Section 3.1.2.2.
1578
+ # RFC7889:
1579
+ # status-att-val =/ "APPENDLIMIT" SP (number / nil)
1580
+ # ;; status-att-val is defined in RFC 4466
1581
+ # RFC8438:
1582
+ # status-att-val =/ "SIZE" SP number64
1583
+ # RFC8474:
1584
+ # status-att-val =/ "MAILBOXID" SP "(" objectid ")"
1585
+ # ; follows tagged-ext production from [RFC4466]
1586
+ def status_att_val
1587
+ key = tagged_ext_label
1588
+ SP!
1589
+ val =
1590
+ case key
1591
+ when "MESSAGES" then number # RFC3501, RFC9051
1592
+ when "UNSEEN" then number # RFC3501, RFC9051
1593
+ when "DELETED" then number # RFC3501, RFC9051
1594
+ when "UIDNEXT" then nz_number # RFC3501, RFC9051
1595
+ when "UIDVALIDITY" then nz_number # RFC3501, RFC9051
1596
+ when "RECENT" then number # RFC3501 (obsolete)
1597
+ when "SIZE" then number64 # RFC8483, RFC9051
1598
+ when "HIGHESTMODSEQ" then mod_sequence_valzer # RFC7162
1599
+ when "MAILBOXID" then parens__objectid # RFC8474
1600
+ else
1601
+ number? || ExtensionData.new(tagged_ext_val)
1602
+ end
1603
+ [key, val]
1604
+ end
1605
+
1606
+ # The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.
1607
+ # The grammar rule is used by both response-data and resp-text-code.
1608
+ # But this method only returns UntaggedResponse (response-data).
1609
+ #
1610
+ # RFC3501:
1611
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1612
+ # *(SP capability)
1613
+ # RFC9051:
1614
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1615
+ # *(SP capability)
1616
+ def capability_data__untagged
1617
+ UntaggedResponse.new label("CAPABILITY"), capability__list, @str
1618
+ end
1619
+
1620
+ # enable-data = "ENABLED" *(SP capability)
1621
+ def enable_data
1622
+ UntaggedResponse.new label("ENABLED"), capability__list, @str
1623
+ end
1624
+
1625
+ # As a workaround for buggy servers, allow a trailing SP:
1626
+ # *(SP capability) [SP]
1627
+ def capability__list
1628
+ list = []; while SP? && (capa = capability?) do list << capa end; list
991
1629
  end
992
1630
 
1631
+ alias resp_code__capability capability__list
1632
+
1633
+ # capability = ("AUTH=" auth-type) / atom
1634
+ # ; New capabilities MUST begin with "X" or be
1635
+ # ; registered with IANA as standard or
1636
+ # ; standards-track
1637
+ alias capability case_insensitive__atom
1638
+ alias capability? case_insensitive__atom?
1639
+
993
1640
  def id_response
994
1641
  token = match(T_ATOM)
995
1642
  name = token.value.upcase
@@ -1019,147 +1666,185 @@ module Net
1019
1666
  end
1020
1667
  end
1021
1668
 
1669
+ # namespace-response = "NAMESPACE" SP namespace
1670
+ # SP namespace SP namespace
1671
+ # ; The first Namespace is the Personal Namespace(s).
1672
+ # ; The second Namespace is the Other Users'
1673
+ # ; Namespace(s).
1674
+ # ; The third Namespace is the Shared Namespace(s).
1022
1675
  def namespace_response
1676
+ name = label("NAMESPACE")
1023
1677
  @lex_state = EXPR_DATA
1024
- token = lookahead
1025
- token = match(T_ATOM)
1026
- name = token.value.upcase
1027
- match(T_SPACE)
1028
- personal = namespaces
1029
- match(T_SPACE)
1030
- other = namespaces
1031
- match(T_SPACE)
1032
- shared = namespaces
1678
+ data = Namespaces.new((SP!; namespace),
1679
+ (SP!; namespace),
1680
+ (SP!; namespace))
1681
+ UntaggedResponse.new(name, data, @str)
1682
+ ensure
1033
1683
  @lex_state = EXPR_BEG
1034
- data = Namespaces.new(personal, other, shared)
1035
- return UntaggedResponse.new(name, data, @str)
1036
- end
1037
-
1038
- def namespaces
1039
- token = lookahead
1040
- # empty () is not allowed, so nil is functionally identical to empty.
1041
- data = []
1042
- if token.symbol == T_NIL
1043
- shift_token
1044
- else
1045
- match(T_LPAR)
1046
- loop do
1047
- data << namespace
1048
- break unless lookahead.symbol == T_SPACE
1049
- shift_token
1050
- end
1051
- match(T_RPAR)
1052
- end
1053
- data
1054
1684
  end
1055
1685
 
1686
+ # namespace = nil / "(" 1*namespace-descr ")"
1056
1687
  def namespace
1057
- match(T_LPAR)
1058
- prefix = match(T_QUOTED, T_LITERAL).value
1059
- match(T_SPACE)
1060
- delimiter = string
1688
+ NIL? and return []
1689
+ lpar
1690
+ list = [namespace_descr]
1691
+ list << namespace_descr until rpar?
1692
+ list
1693
+ end
1694
+
1695
+ # namespace-descr = "(" string SP
1696
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil)
1697
+ # [namespace-response-extensions] ")"
1698
+ def namespace_descr
1699
+ lpar
1700
+ prefix = string; SP!
1701
+ delimiter = nquoted # n.b: should only accept single char
1061
1702
  extensions = namespace_response_extensions
1062
- match(T_RPAR)
1703
+ rpar
1063
1704
  Namespace.new(prefix, delimiter, extensions)
1064
1705
  end
1065
1706
 
1707
+ # namespace-response-extensions = *namespace-response-extension
1708
+ # namespace-response-extension = SP string SP
1709
+ # "(" string *(SP string) ")"
1066
1710
  def namespace_response_extensions
1067
1711
  data = {}
1068
- token = lookahead
1069
- if token.symbol == T_SPACE
1070
- shift_token
1071
- name = match(T_QUOTED, T_LITERAL).value
1712
+ while SP?
1713
+ name = string; SP!
1714
+ lpar
1072
1715
  data[name] ||= []
1073
- match(T_SPACE)
1074
- match(T_LPAR)
1075
- loop do
1076
- data[name].push match(T_QUOTED, T_LITERAL).value
1077
- break unless lookahead.symbol == T_SPACE
1078
- shift_token
1079
- end
1080
- match(T_RPAR)
1716
+ data[name] << string
1717
+ data[name] << string while SP?
1718
+ rpar
1081
1719
  end
1082
1720
  data
1083
1721
  end
1084
1722
 
1085
- # text = 1*TEXT-CHAR
1086
- # TEXT-CHAR = <any CHAR except CR and LF>
1723
+ # TEXT-CHAR = <any CHAR except CR and LF>
1724
+ # RFC3501:
1725
+ # text = 1*TEXT-CHAR
1726
+ # RFC9051:
1727
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
1728
+ # ; Non-ASCII text can only be returned
1729
+ # ; after ENABLE IMAP4rev2 command
1087
1730
  def text
1088
- match(T_TEXT, lex_state: EXPR_TEXT).value
1731
+ match_re(TEXT_REGEXP, "text")[0].force_encoding("UTF-8")
1732
+ end
1733
+
1734
+ # an "accept" versiun of #text
1735
+ def text?
1736
+ accept_re(TEXT_REGEXP)&.[](0)&.force_encoding("UTF-8")
1089
1737
  end
1090
1738
 
1091
- # resp-text = ["[" resp-text-code "]" SP] text
1739
+ # RFC3501:
1740
+ # resp-text = ["[" resp-text-code "]" SP] text
1741
+ # RFC9051:
1742
+ # resp-text = ["[" resp-text-code "]" SP] [text]
1743
+ #
1744
+ # We leniently re-interpret this as
1745
+ # resp-text = ["[" resp-text-code "]" [SP [text]] / [text]
1092
1746
  def resp_text
1093
- token = match(T_LBRA, T_TEXT, lex_state: EXPR_RTEXT)
1094
- case token.symbol
1095
- when T_LBRA
1096
- code = resp_text_code
1097
- match(T_RBRA)
1098
- accept_space # violating RFC
1099
- ResponseText.new(code, text)
1100
- when T_TEXT
1101
- ResponseText.new(nil, token.value)
1747
+ if lbra?
1748
+ code = resp_text_code; rbra
1749
+ ResponseText.new(code, SP? && text? || "")
1750
+ else
1751
+ ResponseText.new(nil, text? || "")
1102
1752
  end
1103
1753
  end
1104
1754
 
1105
- # See https://www.rfc-editor.org/errata/rfc3501
1755
+ # RFC3501 (See https://www.rfc-editor.org/errata/rfc3501):
1756
+ # resp-text-code = "ALERT" /
1757
+ # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1758
+ # capability-data / "PARSE" /
1759
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
1760
+ # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1761
+ # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1762
+ # "UNSEEN" SP nz-number /
1763
+ # atom [SP 1*<any TEXT-CHAR except "]">]
1764
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1765
+ # *(SP capability)
1106
1766
  #
1107
- # resp-text-code = "ALERT" /
1108
- # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1109
- # capability-data / "PARSE" /
1110
- # "PERMANENTFLAGS" SP "("
1111
- # [flag-perm *(SP flag-perm)] ")" /
1112
- # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1113
- # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1114
- # "UNSEEN" SP nz-number /
1115
- # atom [SP 1*<any TEXT-CHAR except "]">]
1767
+ # RFC5530:
1768
+ # resp-text-code =/ "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
1769
+ # "AUTHORIZATIONFAILED" / "EXPIRED" /
1770
+ # "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
1771
+ # "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
1772
+ # "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
1773
+ # "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
1774
+ # "NONEXISTENT"
1775
+ # RFC9051:
1776
+ # resp-text-code = "ALERT" /
1777
+ # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1778
+ # capability-data / "PARSE" /
1779
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
1780
+ # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1781
+ # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1782
+ # resp-code-apnd / resp-code-copy / "UIDNOTSTICKY" /
1783
+ # "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
1784
+ # "AUTHORIZATIONFAILED" / "EXPIRED" /
1785
+ # "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
1786
+ # "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
1787
+ # "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
1788
+ # "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
1789
+ # "NONEXISTENT" / "NOTSAVED" / "HASCHILDREN" /
1790
+ # "CLOSED" /
1791
+ # "UNKNOWN-CTE" /
1792
+ # atom [SP 1*<any TEXT-CHAR except "]">]
1793
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1794
+ # *(SP capability)
1116
1795
  #
1117
- # +UIDPLUS+ ABNF:: https://www.rfc-editor.org/rfc/rfc4315.html#section-4
1118
- # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
1796
+ # RFC4315 (UIDPLUS), RFC9051 (IMAP4rev2):
1797
+ # resp-code-apnd = "APPENDUID" SP nz-number SP append-uid
1798
+ # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
1799
+ # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
1800
+ #
1801
+ # RFC7162 (CONDSTORE):
1802
+ # resp-text-code =/ "HIGHESTMODSEQ" SP mod-sequence-value /
1803
+ # "NOMODSEQ" /
1804
+ # "MODIFIED" SP sequence-set
1805
+ # RFC7162 (QRESYNC):
1806
+ # resp-text-code =/ "CLOSED"
1807
+ #
1808
+ # RFC8474: OBJECTID
1809
+ # resp-text-code =/ "MAILBOXID" SP "(" objectid ")"
1119
1810
  def resp_text_code
1120
- token = match(T_ATOM)
1121
- name = token.value.upcase
1122
- case name
1123
- when /\A(?:ALERT|PARSE|READ-ONLY|READ-WRITE|TRYCREATE|NOMODSEQ)\z/n
1124
- result = ResponseCode.new(name, nil)
1125
- when /\A(?:BADCHARSET)\z/n
1126
- result = ResponseCode.new(name, charset_list)
1127
- when /\A(?:CAPABILITY)\z/ni
1128
- result = ResponseCode.new(name, capability_data)
1129
- when /\A(?:PERMANENTFLAGS)\z/n
1130
- match(T_SPACE)
1131
- result = ResponseCode.new(name, flag_list)
1132
- when /\A(?:UIDVALIDITY|UIDNEXT|UNSEEN)\z/n
1133
- match(T_SPACE)
1134
- result = ResponseCode.new(name, number)
1135
- when /\A(?:APPENDUID)\z/n
1136
- result = ResponseCode.new(name, resp_code_apnd__data)
1137
- when /\A(?:COPYUID)\z/n
1138
- result = ResponseCode.new(name, resp_code_copy__data)
1139
- else
1140
- token = lookahead
1141
- if token.symbol == T_SPACE
1142
- shift_token
1143
- token = match(T_TEXT, lex_state: EXPR_CTEXT)
1144
- result = ResponseCode.new(name, token.value)
1811
+ name = resp_text_code__name
1812
+ data =
1813
+ case name
1814
+ when "CAPABILITY" then resp_code__capability
1815
+ when "PERMANENTFLAGS" then SP? ? flag_perm__list : []
1816
+ when "UIDNEXT" then SP!; nz_number
1817
+ when "UIDVALIDITY" then SP!; nz_number
1818
+ when "UNSEEN" then SP!; nz_number # rev1 only
1819
+ when "APPENDUID" then SP!; resp_code_apnd__data # rev2, UIDPLUS
1820
+ when "COPYUID" then SP!; resp_code_copy__data # rev2, UIDPLUS
1821
+ when "BADCHARSET" then SP? ? charset__list : []
1822
+ when "ALERT", "PARSE", "READ-ONLY", "READ-WRITE", "TRYCREATE",
1823
+ "UNAVAILABLE", "AUTHENTICATIONFAILED", "AUTHORIZATIONFAILED",
1824
+ "EXPIRED", "PRIVACYREQUIRED", "CONTACTADMIN", "NOPERM", "INUSE",
1825
+ "EXPUNGEISSUED", "CORRUPTION", "SERVERBUG", "CLIENTBUG", "CANNOT",
1826
+ "LIMIT", "OVERQUOTA", "ALREADYEXISTS", "NONEXISTENT", "CLOSED",
1827
+ "NOTSAVED", "UIDNOTSTICKY", "UNKNOWN-CTE", "HASCHILDREN"
1828
+ when "NOMODSEQ" then nil # CONDSTORE
1829
+ when "HIGHESTMODSEQ" then SP!; mod_sequence_value # CONDSTORE
1830
+ when "MODIFIED" then SP!; sequence_set # CONDSTORE
1831
+ when "MAILBOXID" then SP!; parens__objectid # RFC8474: OBJECTID
1145
1832
  else
1146
- result = ResponseCode.new(name, nil)
1833
+ SP? and text_chars_except_rbra
1147
1834
  end
1148
- end
1149
- return result
1835
+ ResponseCode.new(name, data)
1150
1836
  end
1151
1837
 
1152
- def charset_list
1153
- result = []
1154
- if accept(T_SPACE)
1155
- match(T_LPAR)
1156
- result << charset
1157
- while accept(T_SPACE)
1158
- result << charset
1159
- end
1160
- match(T_RPAR)
1161
- end
1162
- result
1838
+ alias resp_text_code__name case_insensitive__atom
1839
+
1840
+ # 1*<any TEXT-CHAR except "]">
1841
+ def text_chars_except_rbra
1842
+ match_re(CTEXT_REGEXP, '1*<any TEXT-CHAR except "]">')[0]
1843
+ end
1844
+
1845
+ # "(" charset *(SP charset) ")"
1846
+ def charset__list
1847
+ lpar; list = [charset]; while SP? do list << charset end; rpar; list
1163
1848
  end
1164
1849
 
1165
1850
  # already matched: "APPENDUID"
@@ -1175,8 +1860,8 @@ module Net
1175
1860
  # match uid_set even if that returns a single-member array.
1176
1861
  #
1177
1862
  def resp_code_apnd__data
1178
- match(T_SPACE); validity = number
1179
- match(T_SPACE); dst_uids = uid_set # uniqueid ⊂ uid-set
1863
+ validity = number; SP!
1864
+ dst_uids = uid_set # uniqueid ⊂ uid-set
1180
1865
  UIDPlusData.new(validity, nil, dst_uids)
1181
1866
  end
1182
1867
 
@@ -1184,187 +1869,125 @@ module Net
1184
1869
  #
1185
1870
  # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
1186
1871
  def resp_code_copy__data
1187
- match(T_SPACE); validity = number
1188
- match(T_SPACE); src_uids = uid_set
1189
- match(T_SPACE); dst_uids = uid_set
1872
+ validity = number; SP!
1873
+ src_uids = uid_set; SP!
1874
+ dst_uids = uid_set
1190
1875
  UIDPlusData.new(validity, src_uids, dst_uids)
1191
1876
  end
1192
1877
 
1193
- def address_list
1194
- token = lookahead
1195
- if token.symbol == T_NIL
1196
- shift_token
1197
- return nil
1198
- else
1199
- result = []
1200
- match(T_LPAR)
1201
- while true
1202
- token = lookahead
1203
- case token.symbol
1204
- when T_RPAR
1205
- shift_token
1206
- break
1207
- when T_SPACE
1208
- shift_token
1209
- end
1210
- result.push(address)
1211
- end
1212
- return result
1213
- end
1214
- end
1215
-
1216
- ADDRESS_REGEXP = /\G\
1217
- (?# 1: NAME )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1218
- (?# 2: ROUTE )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1219
- (?# 3: MAILBOX )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
1220
- (?# 4: HOST )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)")\
1221
- \)/ni
1222
-
1878
+ ADDRESS_REGEXP = /\G
1879
+ \( (?: NIL | #{Patterns::QUOTED_rev2} ) # 1: NAME
1880
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 2: ROUTE
1881
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 3: MAILBOX
1882
+ \s (?: NIL | #{Patterns::QUOTED_rev2} ) # 4: HOST
1883
+ \)
1884
+ /nix
1885
+
1886
+ # address = "(" addr-name SP addr-adl SP addr-mailbox SP
1887
+ # addr-host ")"
1888
+ # addr-adl = nstring
1889
+ # addr-host = nstring
1890
+ # addr-mailbox = nstring
1891
+ # addr-name = nstring
1223
1892
  def address
1224
- match(T_LPAR)
1225
- if @str.index(ADDRESS_REGEXP, @pos)
1226
- # address does not include literal.
1227
- @pos = $~.end(0)
1228
- name = $1
1229
- route = $2
1230
- mailbox = $3
1231
- host = $4
1232
- for s in [name, route, mailbox, host]
1233
- if s
1234
- s.gsub!(/\\(["\\])/n, "\\1")
1235
- end
1236
- end
1237
- else
1238
- name = nstring
1239
- match(T_SPACE)
1240
- route = nstring
1241
- match(T_SPACE)
1242
- mailbox = nstring
1243
- match(T_SPACE)
1244
- host = nstring
1245
- match(T_RPAR)
1246
- end
1247
- return Address.new(name, route, mailbox, host)
1248
- end
1249
-
1250
- FLAG_REGEXP = /\
1251
- (?# FLAG )\\([^\x80-\xff(){ \x00-\x1f\x7f%"\\]+)|\
1252
- (?# ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\]+)/n
1253
-
1893
+ if (match = accept_re(ADDRESS_REGEXP))
1894
+ # note that "NIL" isn't captured by the regexp
1895
+ name, route, mailbox, host = match.captures
1896
+ .map { Patterns.unescape_quoted _1 }
1897
+ else # address may include literals
1898
+ lpar; name = addr_name
1899
+ SP!; route = addr_adl
1900
+ SP!; mailbox = addr_mailbox
1901
+ SP!; host = addr_host
1902
+ rpar
1903
+ end
1904
+ Address.new(name, route, mailbox, host)
1905
+ end
1906
+
1907
+ alias addr_adl nstring
1908
+ alias addr_host nstring
1909
+ alias addr_mailbox nstring
1910
+ alias addr_name nstring
1911
+
1912
+ # flag-list = "(" [flag *(SP flag)] ")"
1254
1913
  def flag_list
1255
- if @str.index(/\(([^)]*)\)/ni, @pos)
1256
- @pos = $~.end(0)
1257
- return $1.scan(FLAG_REGEXP).collect { |flag, atom|
1258
- if atom
1259
- atom
1260
- else
1261
- flag.capitalize.intern
1262
- end
1263
- }
1914
+ if (match = accept_re(Patterns::FLAG_LIST))
1915
+ match[1].split(nil)
1916
+ .map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
1264
1917
  else
1265
- parse_error("invalid flag list")
1918
+ quirky__flag_list "flags-list"
1266
1919
  end
1267
1920
  end
1268
1921
 
1269
- def nstring
1270
- token = lookahead
1271
- if token.symbol == T_NIL
1272
- shift_token
1273
- return nil
1922
+ # "(" [flag-perm *(SP flag-perm)] ")"
1923
+ def flag_perm__list
1924
+ if (match = accept_re(Patterns::FLAG_PERM_LIST))
1925
+ match[1].split(nil)
1926
+ .map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
1274
1927
  else
1275
- return string
1928
+ quirky__flag_list "PERMANENTFLAGS flag-perm list"
1276
1929
  end
1277
1930
  end
1278
1931
 
1279
- def astring
1280
- token = lookahead
1281
- if string_token?(token)
1282
- return string
1283
- else
1284
- return astring_chars
1285
- end
1932
+ # This allows illegal "]" in flag names (Gmail),
1933
+ # or "\*" in a FLAGS response (greenmail).
1934
+ def quirky__flag_list(name)
1935
+ match_re(Patterns::QUIRKY_FLAGS_LIST, "quirks mode #{name}")[1]
1936
+ .scan(Patterns::QUIRKY_FLAG)
1937
+ .map! { _1.delete_prefix!("\\") ? _1.capitalize.to_sym : _1 }
1286
1938
  end
1287
1939
 
1288
- def string
1289
- token = lookahead
1290
- if token.symbol == T_NIL
1291
- shift_token
1292
- return nil
1293
- end
1294
- token = match(T_QUOTED, T_LITERAL)
1295
- return token.value
1940
+ # See Patterns::MBX_LIST_FLAGS
1941
+ def mbx_list_flags
1942
+ match_re(Patterns::MBX_LIST_FLAGS, "mbx-list-flags")[1]
1943
+ .split(nil)
1944
+ .map! { _1.delete_prefix!("\\"); _1.capitalize.to_sym }
1296
1945
  end
1297
1946
 
1298
- STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL]
1299
-
1300
- def string_token?(token)
1301
- return STRING_TOKENS.include?(token.symbol)
1302
- end
1947
+ # See https://developers.google.com/gmail/imap/imap-extensions
1948
+ def x_gm_label; accept(T_BSLASH) ? atom.capitalize.to_sym : astring end
1303
1949
 
1304
- def case_insensitive_string
1305
- token = lookahead
1306
- if token.symbol == T_NIL
1307
- shift_token
1308
- return nil
1309
- end
1310
- token = match(T_QUOTED, T_LITERAL)
1311
- return token.value.upcase
1950
+ # See https://developers.google.com/gmail/imap/imap-extensions
1951
+ def x_gm_labels
1952
+ lpar; return [] if rpar?
1953
+ labels = []
1954
+ labels << x_gm_label
1955
+ labels << x_gm_label while SP?
1956
+ rpar
1957
+ labels
1312
1958
  end
1313
1959
 
1314
- # atom = 1*ATOM-CHAR
1315
- # ATOM-CHAR = <any CHAR except atom-specials>
1316
- ATOM_TOKENS = [
1317
- T_ATOM,
1318
- T_NUMBER,
1319
- T_NIL,
1320
- T_LBRA,
1321
- T_PLUS
1322
- ]
1960
+ # See https://www.rfc-editor.org/errata/rfc3501
1961
+ #
1962
+ # charset = atom / quoted
1963
+ def charset; quoted? || atom end
1323
1964
 
1324
- def atom
1325
- -combine_adjacent(*ATOM_TOKENS)
1326
- end
1965
+ # RFC7162:
1966
+ # mod-sequence-value = 1*DIGIT
1967
+ # ;; Positive unsigned 63-bit integer
1968
+ # ;; (mod-sequence)
1969
+ # ;; (1 <= n <= 9,223,372,036,854,775,807).
1970
+ alias mod_sequence_value nz_number64
1327
1971
 
1328
- # ASTRING-CHAR = ATOM-CHAR / resp-specials
1329
- # resp-specials = "]"
1330
- ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA]
1972
+ # RFC7162:
1973
+ # permsg-modsequence = mod-sequence-value
1974
+ # ;; Per-message mod-sequence.
1975
+ alias permsg_modsequence mod_sequence_value
1331
1976
 
1332
- def astring_chars
1333
- combine_adjacent(*ASTRING_CHARS_TOKENS)
1334
- end
1977
+ # RFC7162:
1978
+ # mod-sequence-valzer = "0" / mod-sequence-value
1979
+ alias mod_sequence_valzer number64
1335
1980
 
1336
- def combine_adjacent(*tokens)
1337
- result = "".b
1338
- while token = accept(*tokens)
1339
- result << token.value
1340
- end
1341
- if result.empty?
1342
- parse_error('unexpected token %s (expected %s)',
1343
- lookahead.symbol, args.join(" or "))
1344
- end
1345
- result
1346
- end
1981
+ def parens__modseq; lpar; _ = permsg_modsequence; rpar; _ end
1347
1982
 
1348
- # See https://www.rfc-editor.org/errata/rfc3501
1349
- #
1350
- # charset = atom / quoted
1351
- def charset
1352
- if token = accept(T_QUOTED)
1353
- token.value
1354
- else
1355
- atom
1356
- end
1357
- end
1983
+ # RFC8474:
1984
+ # objectid = 1*255(ALPHA / DIGIT / "_" / "-")
1985
+ # ; characters in object identifiers are case
1986
+ # ; significant
1987
+ alias objectid atom
1358
1988
 
1359
- def number
1360
- token = lookahead
1361
- if token.symbol == T_NIL
1362
- shift_token
1363
- return nil
1364
- end
1365
- token = match(T_NUMBER)
1366
- return token.value.to_i
1367
- end
1989
+ def parens__objectid; lpar; _ = objectid; rpar; _ end
1990
+ def nparens__objectid; NIL? ? nil : parens__objectid end
1368
1991
 
1369
1992
  # RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
1370
1993
  # uid-set = (uniqueid / uid-range) *("," uid-set)
@@ -1393,64 +2016,15 @@ module Net
1393
2016
 
1394
2017
  SPACES_REGEXP = /\G */n
1395
2018
 
1396
- # This advances @pos directly so it's safe before changing @lex_state.
1397
- def accept_space
1398
- if @token
1399
- shift_token if @token.symbol == T_SPACE
1400
- elsif @str[@pos] == " "
1401
- @pos += 1
1402
- end
1403
- end
1404
-
1405
2019
  # The RFC is very strict about this and usually we should be too.
1406
2020
  # But skipping spaces is usually a safe workaround for buggy servers.
1407
2021
  #
1408
2022
  # This advances @pos directly so it's safe before changing @lex_state.
1409
2023
  def accept_spaces
1410
- shift_token if @token&.symbol == T_SPACE
1411
- if @str.index(SPACES_REGEXP, @pos)
2024
+ return false unless SP?
2025
+ @str.index(SPACES_REGEXP, @pos) and
1412
2026
  @pos = $~.end(0)
1413
- end
1414
- end
1415
-
1416
- def match(*args, lex_state: @lex_state)
1417
- if @token && lex_state != @lex_state
1418
- parse_error("invalid lex_state change to %s with unconsumed token",
1419
- lex_state)
1420
- end
1421
- begin
1422
- @lex_state, original_lex_state = lex_state, @lex_state
1423
- token = lookahead
1424
- unless args.include?(token.symbol)
1425
- parse_error('unexpected token %s (expected %s)',
1426
- token.symbol.id2name,
1427
- args.collect {|i| i.id2name}.join(" or "))
1428
- end
1429
- shift_token
1430
- return token
1431
- ensure
1432
- @lex_state = original_lex_state
1433
- end
1434
- end
1435
-
1436
- # like match, but does not raise error on failure.
1437
- #
1438
- # returns and shifts token on successful match
1439
- # returns nil and leaves @token unshifted on no match
1440
- def accept(*args)
1441
- token = lookahead
1442
- if args.include?(token.symbol)
1443
- shift_token
1444
- token
1445
- end
1446
- end
1447
-
1448
- def lookahead
1449
- @token ||= next_token
1450
- end
1451
-
1452
- def shift_token
1453
- @token = nil
2027
+ true
1454
2028
  end
1455
2029
 
1456
2030
  def next_token
@@ -1461,38 +2035,46 @@ module Net
1461
2035
  if $1
1462
2036
  return Token.new(T_SPACE, $+)
1463
2037
  elsif $2
1464
- return Token.new(T_NIL, $+)
1465
- elsif $3
1466
- return Token.new(T_NUMBER, $+)
2038
+ len = $+.to_i
2039
+ val = @str[@pos, len]
2040
+ @pos += len
2041
+ return Token.new(T_LITERAL8, val)
2042
+ elsif $3 && $7
2043
+ # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
2044
+ return Token.new(T_ATOM, $3)
1467
2045
  elsif $4
1468
- return Token.new(T_ATOM, $+)
2046
+ return Token.new(T_NIL, $+)
1469
2047
  elsif $5
1470
- return Token.new(T_QUOTED,
1471
- $+.gsub(/\\(["\\])/n, "\\1"))
2048
+ return Token.new(T_NUMBER, $+)
1472
2049
  elsif $6
2050
+ return Token.new(T_PLUS, $+)
2051
+ elsif $8
2052
+ # match ATOM, without a NUMBER, NIL, or PLUS prefix
2053
+ return Token.new(T_ATOM, $+)
2054
+ elsif $9
2055
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
2056
+ elsif $10
1473
2057
  return Token.new(T_LPAR, $+)
1474
- elsif $7
2058
+ elsif $11
1475
2059
  return Token.new(T_RPAR, $+)
1476
- elsif $8
2060
+ elsif $12
1477
2061
  return Token.new(T_BSLASH, $+)
1478
- elsif $9
2062
+ elsif $13
1479
2063
  return Token.new(T_STAR, $+)
1480
- elsif $10
2064
+ elsif $14
1481
2065
  return Token.new(T_LBRA, $+)
1482
- elsif $11
2066
+ elsif $15
1483
2067
  return Token.new(T_RBRA, $+)
1484
- elsif $12
2068
+ elsif $16
1485
2069
  len = $+.to_i
1486
2070
  val = @str[@pos, len]
1487
2071
  @pos += len
1488
2072
  return Token.new(T_LITERAL, val)
1489
- elsif $13
1490
- return Token.new(T_PLUS, $+)
1491
- elsif $14
2073
+ elsif $17
1492
2074
  return Token.new(T_PERCENT, $+)
1493
- elsif $15
2075
+ elsif $18
1494
2076
  return Token.new(T_CRLF, $+)
1495
- elsif $16
2077
+ elsif $19
1496
2078
  return Token.new(T_EOF, $+)
1497
2079
  else
1498
2080
  parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")
@@ -1511,8 +2093,7 @@ module Net
1511
2093
  elsif $3
1512
2094
  return Token.new(T_NUMBER, $+)
1513
2095
  elsif $4
1514
- return Token.new(T_QUOTED,
1515
- $+.gsub(/\\(["\\])/n, "\\1"))
2096
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1516
2097
  elsif $5
1517
2098
  len = $+.to_i
1518
2099
  val = @str[@pos, len]
@@ -1529,63 +2110,11 @@ module Net
1529
2110
  @str.index(/\S*/n, @pos)
1530
2111
  parse_error("unknown token - %s", $&.dump)
1531
2112
  end
1532
- when EXPR_TEXT
1533
- if @str.index(TEXT_REGEXP, @pos)
1534
- @pos = $~.end(0)
1535
- if $1
1536
- return Token.new(T_TEXT, $+)
1537
- else
1538
- parse_error("[Net::IMAP BUG] TEXT_REGEXP is invalid")
1539
- end
1540
- else
1541
- @str.index(/\S*/n, @pos)
1542
- parse_error("unknown token - %s", $&.dump)
1543
- end
1544
- when EXPR_RTEXT
1545
- if @str.index(RTEXT_REGEXP, @pos)
1546
- @pos = $~.end(0)
1547
- if $1
1548
- return Token.new(T_LBRA, $+)
1549
- elsif $2
1550
- return Token.new(T_TEXT, $+)
1551
- else
1552
- parse_error("[Net::IMAP BUG] RTEXT_REGEXP is invalid")
1553
- end
1554
- else
1555
- @str.index(/\S*/n, @pos)
1556
- parse_error("unknown token - %s", $&.dump)
1557
- end
1558
- when EXPR_CTEXT
1559
- if @str.index(CTEXT_REGEXP, @pos)
1560
- @pos = $~.end(0)
1561
- if $1
1562
- return Token.new(T_TEXT, $+)
1563
- else
1564
- parse_error("[Net::IMAP BUG] CTEXT_REGEXP is invalid")
1565
- end
1566
- else
1567
- @str.index(/\S*/n, @pos) #/
1568
- parse_error("unknown token - %s", $&.dump)
1569
- end
1570
2113
  else
1571
2114
  parse_error("invalid @lex_state - %s", @lex_state.inspect)
1572
2115
  end
1573
2116
  end
1574
2117
 
1575
- def parse_error(fmt, *args)
1576
- if IMAP.debug
1577
- $stderr.printf("@str: %s\n", @str.dump)
1578
- $stderr.printf("@pos: %d\n", @pos)
1579
- $stderr.printf("@lex_state: %s\n", @lex_state)
1580
- if @token
1581
- $stderr.printf("@token.symbol: %s\n", @token.symbol)
1582
- $stderr.printf("@token.value: %s\n", @token.value.inspect)
1583
- end
1584
- end
1585
- raise ResponseParseError, format(fmt, *args)
1586
- end
1587
2118
  end
1588
-
1589
2119
  end
1590
-
1591
2120
  end