net-imap 0.3.6 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/pages.yml +46 -0
  3. data/.github/workflows/test.yml +5 -12
  4. data/.gitignore +1 -0
  5. data/Gemfile +3 -0
  6. data/README.md +15 -4
  7. data/Rakefile +0 -7
  8. data/lib/net/imap/authenticators.rb +26 -57
  9. data/lib/net/imap/command_data.rb +13 -6
  10. data/lib/net/imap/deprecated_client_options.rb +139 -0
  11. data/lib/net/imap/errors.rb +20 -0
  12. data/lib/net/imap/response_data.rb +92 -47
  13. data/lib/net/imap/response_parser/parser_utils.rb +240 -0
  14. data/lib/net/imap/response_parser.rb +1265 -986
  15. data/lib/net/imap/sasl/anonymous_authenticator.rb +69 -0
  16. data/lib/net/imap/sasl/authentication_exchange.rb +107 -0
  17. data/lib/net/imap/sasl/authenticators.rb +118 -0
  18. data/lib/net/imap/sasl/client_adapter.rb +72 -0
  19. data/lib/net/imap/{authenticators/cram_md5.rb → sasl/cram_md5_authenticator.rb} +21 -11
  20. data/lib/net/imap/sasl/digest_md5_authenticator.rb +180 -0
  21. data/lib/net/imap/sasl/external_authenticator.rb +83 -0
  22. data/lib/net/imap/sasl/gs2_header.rb +80 -0
  23. data/lib/net/imap/{authenticators/login.rb → sasl/login_authenticator.rb} +25 -16
  24. data/lib/net/imap/sasl/oauthbearer_authenticator.rb +199 -0
  25. data/lib/net/imap/sasl/plain_authenticator.rb +101 -0
  26. data/lib/net/imap/sasl/protocol_adapters.rb +45 -0
  27. data/lib/net/imap/sasl/scram_algorithm.rb +58 -0
  28. data/lib/net/imap/sasl/scram_authenticator.rb +287 -0
  29. data/lib/net/imap/sasl/stringprep.rb +6 -66
  30. data/lib/net/imap/sasl/xoauth2_authenticator.rb +106 -0
  31. data/lib/net/imap/sasl.rb +144 -43
  32. data/lib/net/imap/sasl_adapter.rb +21 -0
  33. data/lib/net/imap/stringprep/nameprep.rb +70 -0
  34. data/lib/net/imap/stringprep/saslprep.rb +69 -0
  35. data/lib/net/imap/stringprep/saslprep_tables.rb +96 -0
  36. data/lib/net/imap/stringprep/tables.rb +146 -0
  37. data/lib/net/imap/stringprep/trace.rb +85 -0
  38. data/lib/net/imap/stringprep.rb +159 -0
  39. data/lib/net/imap.rb +993 -609
  40. data/net-imap.gemspec +4 -3
  41. data/rakelib/benchmarks.rake +98 -0
  42. data/rakelib/saslprep.rake +4 -4
  43. data/rakelib/string_prep_tables_generator.rb +82 -60
  44. metadata +29 -13
  45. data/benchmarks/stringprep.yml +0 -65
  46. data/benchmarks/table-regexps.yml +0 -39
  47. data/lib/net/imap/authenticators/digest_md5.rb +0 -115
  48. data/lib/net/imap/authenticators/plain.rb +0 -41
  49. data/lib/net/imap/authenticators/xoauth2.rb +0 -20
  50. data/lib/net/imap/sasl/saslprep.rb +0 -55
  51. data/lib/net/imap/sasl/saslprep_tables.rb +0 -98
  52. data/lib/net/imap/sasl/stringprep_tables.rb +0 -153
@@ -1,12 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "errors"
4
+ require_relative "response_parser/parser_utils"
4
5
 
5
6
  module Net
6
7
  class IMAP < Protocol
7
8
 
8
9
  # Parses an \IMAP server response.
9
10
  class ResponseParser
11
+ include ParserUtils
12
+ extend ParserUtils::Generator
13
+
10
14
  # :call-seq: Net::IMAP::ResponseParser.new -> Net::IMAP::ResponseParser
11
15
  def initialize
12
16
  @str = nil
@@ -33,216 +37,734 @@ module Net
33
37
 
34
38
  # :stopdoc:
35
39
 
36
- EXPR_BEG = :EXPR_BEG
37
- EXPR_DATA = :EXPR_DATA
38
- EXPR_TEXT = :EXPR_TEXT
39
- EXPR_RTEXT = :EXPR_RTEXT
40
- EXPR_CTEXT = :EXPR_CTEXT
41
-
42
- T_SPACE = :SPACE
43
- T_NIL = :NIL
44
- T_NUMBER = :NUMBER
45
- T_ATOM = :ATOM
46
- T_QUOTED = :QUOTED
47
- T_LPAR = :LPAR
48
- T_RPAR = :RPAR
49
- T_BSLASH = :BSLASH
50
- T_STAR = :STAR
51
- T_LBRA = :LBRA
52
- T_RBRA = :RBRA
53
- T_LITERAL = :LITERAL
54
- T_PLUS = :PLUS
55
- T_PERCENT = :PERCENT
56
- T_CRLF = :CRLF
57
- T_EOF = :EOF
58
- T_TEXT = :TEXT
59
-
40
+ EXPR_BEG = :EXPR_BEG # the default, used in most places
41
+ EXPR_DATA = :EXPR_DATA # envelope, body(structure), namespaces
42
+
43
+ T_SPACE = :SPACE # atom special
44
+ T_ATOM = :ATOM # atom (subset of astring chars)
45
+ T_NIL = :NIL # subset of atom and label
46
+ T_NUMBER = :NUMBER # subset of atom
47
+ T_LBRA = :LBRA # subset of atom
48
+ T_PLUS = :PLUS # subset of atom; tag special
49
+ T_RBRA = :RBRA # atom special; resp_special; valid astring char
50
+ T_QUOTED = :QUOTED # starts/end with atom special
51
+ T_BSLASH = :BSLASH # atom special; quoted special
52
+ T_LPAR = :LPAR # atom special; paren list delimiter
53
+ T_RPAR = :RPAR # atom special; paren list delimiter
54
+ T_STAR = :STAR # atom special; list wildcard
55
+ T_PERCENT = :PERCENT # atom special; list wildcard
56
+ T_LITERAL = :LITERAL # starts with atom special
57
+ T_CRLF = :CRLF # atom special; text special; quoted special
58
+ T_TEXT = :TEXT # any char except CRLF
59
+ T_EOF = :EOF # end of response string
60
+
61
+ module ResponseConditions
62
+ OK = "OK"
63
+ NO = "NO"
64
+ BAD = "BAD"
65
+ BYE = "BYE"
66
+ PREAUTH = "PREAUTH"
67
+
68
+ RESP_COND_STATES = [OK, NO, BAD ].freeze
69
+ RESP_DATA_CONDS = [OK, NO, BAD, BYE, ].freeze
70
+ AUTH_CONDS = [OK, PREAUTH].freeze
71
+ GREETING_CONDS = [OK, BYE, PREAUTH].freeze
72
+ RESP_CONDS = [OK, NO, BAD, BYE, PREAUTH].freeze
73
+ end
74
+ include ResponseConditions
75
+
76
+ module Patterns
77
+
78
+ module CharClassSubtraction
79
+ refine Regexp do
80
+ def -(rhs); /[#{source}&&[^#{rhs.source}]]/n.freeze end
81
+ end
82
+ end
83
+ using CharClassSubtraction
84
+
85
+ # From RFC5234, "Augmented BNF for Syntax Specifications: ABNF"
86
+ # >>>
87
+ # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
88
+ # CHAR = %x01-7F
89
+ # CRLF = CR LF
90
+ # ; Internet standard newline
91
+ # CTL = %x00-1F / %x7F
92
+ # ; controls
93
+ # DIGIT = %x30-39
94
+ # ; 0-9
95
+ # DQUOTE = %x22
96
+ # ; " (Double Quote)
97
+ # HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
98
+ # OCTET = %x00-FF
99
+ # SP = %x20
100
+ module RFC5234
101
+ ALPHA = /[A-Za-z]/n
102
+ CHAR = /[\x01-\x7f]/n
103
+ CRLF = /\r\n/n
104
+ CTL = /[\x00-\x1F\x7F]/n
105
+ DIGIT = /\d/n
106
+ DQUOTE = /"/n
107
+ HEXDIG = /\h/
108
+ OCTET = /[\x00-\xFF]/n # not using /./m for embedding purposes
109
+ SP = / /n
110
+ end
111
+
112
+ # UTF-8, a transformation format of ISO 10646
113
+ # >>>
114
+ # UTF8-1 = %x00-7F
115
+ # UTF8-tail = %x80-BF
116
+ # UTF8-2 = %xC2-DF UTF8-tail
117
+ # UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
118
+ # %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
119
+ # UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
120
+ # %xF4 %x80-8F 2( UTF8-tail )
121
+ # UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
122
+ # UTF8-octets = *( UTF8-char )
123
+ #
124
+ # n.b. String * Integer is used for repetition, rather than /x{3}/,
125
+ # because ruby 3.2's linear-time cache-based optimization doesn't work
126
+ # with "bounded or fixed times repetition nesting in another repetition
127
+ # (e.g. /(a{2,3})*/). It is an implementation issue entirely, but we
128
+ # believe it is hard to support this case correctly."
129
+ # See https://bugs.ruby-lang.org/issues/19104
130
+ module RFC3629
131
+ UTF8_1 = /[\x00-\x7f]/n # aka ASCII 7bit
132
+ UTF8_TAIL = /[\x80-\xBF]/n
133
+ UTF8_2 = /[\xC2-\xDF]#{UTF8_TAIL}/n
134
+ UTF8_3 = Regexp.union(/\xE0[\xA0-\xBF]#{UTF8_TAIL}/n,
135
+ /\xED[\x80-\x9F]#{UTF8_TAIL}/n,
136
+ /[\xE1-\xEC]#{ UTF8_TAIL.source * 2}/n,
137
+ /[\xEE-\xEF]#{ UTF8_TAIL.source * 2}/n)
138
+ UTF8_4 = Regexp.union(/[\xF1-\xF3]#{ UTF8_TAIL.source * 3}/n,
139
+ /\xF0[\x90-\xBF]#{UTF8_TAIL.source * 2}/n,
140
+ /\xF4[\x80-\x8F]#{UTF8_TAIL.source * 2}/n)
141
+ UTF8_CHAR = Regexp.union(UTF8_1, UTF8_2, UTF8_3, UTF8_4)
142
+ UTF8_OCTETS = /#{UTF8_CHAR}*/n
143
+ end
144
+
145
+ include RFC5234
146
+ include RFC3629
147
+
148
+ # CHAR8 = %x01-ff
149
+ # ; any OCTET except NUL, %x00
150
+ CHAR8 = /[\x01-\xff]/n
151
+
152
+ # list-wildcards = "%" / "*"
153
+ LIST_WILDCARDS = /[%*]/n
154
+ # quoted-specials = DQUOTE / "\"
155
+ QUOTED_SPECIALS = /["\\]/n
156
+ # resp-specials = "]"
157
+ RESP_SPECIALS = /[\]]/n
158
+
159
+ # atomish = 1*<any ATOM-CHAR except "[">
160
+ # ; We use "atomish" for msg-att and section, in order
161
+ # ; to simplify "BODY[HEADER.FIELDS (foo bar)]".
162
+ #
163
+ # atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
164
+ # quoted-specials / resp-specials
165
+ # ATOM-CHAR = <any CHAR except atom-specials>
166
+ # atom = 1*ATOM-CHAR
167
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
168
+ # tag = 1*<any ASTRING-CHAR except "+">
169
+
170
+ ATOM_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\\]]/n
171
+ ASTRING_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\]/n
172
+
173
+ ASTRING_CHAR = CHAR - ASTRING_SPECIALS
174
+ ATOM_CHAR = CHAR - ATOM_SPECIALS
175
+
176
+ ATOM = /#{ATOM_CHAR}+/n
177
+ ASTRING_CHARS = /#{ASTRING_CHAR}+/n
178
+ ATOMISH = /#{ATOM_CHAR - /[\[]/ }+/
179
+ TAG = /#{ASTRING_CHAR - /[+]/ }+/
180
+
181
+ # TEXT-CHAR = <any CHAR except CR and LF>
182
+ TEXT_CHAR = CHAR - /[\r\n]/
183
+
184
+ # resp-text-code = ... / atom [SP 1*<any TEXT-CHAR except "]">]
185
+ CODE_TEXT_CHAR = TEXT_CHAR - RESP_SPECIALS
186
+ CODE_TEXT = /#{CODE_TEXT_CHAR}+/n
187
+
188
+ # flag = "\Answered" / "\Flagged" / "\Deleted" /
189
+ # "\Seen" / "\Draft" / flag-keyword / flag-extension
190
+ # ; Does not include "\Recent"
191
+ # flag-extension = "\" atom
192
+ # ; Future expansion. Client implementations
193
+ # ; MUST accept flag-extension flags. Server
194
+ # ; implementations MUST NOT generate
195
+ # ; flag-extension flags except as defined by
196
+ # ; a future Standard or Standards Track
197
+ # ; revisions of this specification.
198
+ # flag-keyword = "$MDNSent" / "$Forwarded" / "$Junk" /
199
+ # "$NotJunk" / "$Phishing" / atom
200
+ # flag-perm = flag / "\*"
201
+ #
202
+ # Not checking for max one mbx-list-sflag in the parser.
203
+ # >>>
204
+ # mbx-list-oflag = "\Noinferiors" / child-mbox-flag /
205
+ # "\Subscribed" / "\Remote" / flag-extension
206
+ # ; Other flags; multiple from this list are
207
+ # ; possible per LIST response, but each flag
208
+ # ; can only appear once per LIST response
209
+ # mbx-list-sflag = "\NonExistent" / "\Noselect" / "\Marked" /
210
+ # "\Unmarked"
211
+ # ; Selectability flags; only one per LIST response
212
+ # child-mbox-flag = "\HasChildren" / "\HasNoChildren"
213
+ # ; attributes for the CHILDREN return option, at most
214
+ # ; one possible per LIST response
215
+ FLAG = /\\?#{ATOM}/n
216
+ FLAG_EXTENSION = /\\#{ATOM}/n
217
+ FLAG_KEYWORD = ATOM
218
+ FLAG_PERM = Regexp.union(FLAG, "\\*")
219
+ MBX_FLAG = FLAG_EXTENSION
220
+
221
+ # flag-list = "(" [flag *(SP flag)] ")"
222
+ #
223
+ # part of resp-text-code:
224
+ # >>>
225
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")"
226
+ #
227
+ # parens from mailbox-list are included in the regexp:
228
+ # >>>
229
+ # mbx-list-flags = *(mbx-list-oflag SP) mbx-list-sflag
230
+ # *(SP mbx-list-oflag) /
231
+ # mbx-list-oflag *(SP mbx-list-oflag)
232
+ FLAG_LIST = /\G\((#{FLAG }(?:#{SP}#{FLAG })*|)\)/ni
233
+ FLAG_PERM_LIST = /\G\((#{FLAG_PERM}(?:#{SP}#{FLAG_PERM})*|)\)/ni
234
+ MBX_LIST_FLAGS = /\G\((#{MBX_FLAG }(?:#{SP}#{MBX_FLAG })*|)\)/ni
235
+
236
+ # RFC3501:
237
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
238
+ # "\" quoted-specials
239
+ # RFC9051:
240
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
241
+ # "\" quoted-specials / UTF8-2 / UTF8-3 / UTF8-4
242
+ # RFC3501 & RFC9051:
243
+ # quoted = DQUOTE *QUOTED-CHAR DQUOTE
244
+ QUOTED_CHAR_safe = TEXT_CHAR - QUOTED_SPECIALS
245
+ QUOTED_CHAR_esc = /\\#{QUOTED_SPECIALS}/n
246
+ QUOTED_CHAR_rev1 = Regexp.union(QUOTED_CHAR_safe, QUOTED_CHAR_esc)
247
+ QUOTED_CHAR_rev2 = Regexp.union(QUOTED_CHAR_rev1,
248
+ UTF8_2, UTF8_3, UTF8_4)
249
+ QUOTED_rev1 = /"(#{QUOTED_CHAR_rev1}*)"/n
250
+ QUOTED_rev2 = /"(#{QUOTED_CHAR_rev2}*)"/n
251
+
252
+ # RFC3501:
253
+ # text = 1*TEXT-CHAR
254
+ # RFC9051:
255
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
256
+ # ; Non-ASCII text can only be returned
257
+ # ; after ENABLE IMAP4rev2 command
258
+ TEXT_rev1 = /#{TEXT_CHAR}+/
259
+ TEXT_rev2 = /#{Regexp.union TEXT_CHAR, UTF8_2, UTF8_3, UTF8_4}+/
260
+
261
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
262
+ TAGGED_LABEL_FCHAR = /[a-zA-Z\-_.]/n
263
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
264
+ TAGGED_LABEL_CHAR = /[a-zA-Z\-_.0-9:]*/n
265
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
266
+ # ; Is a valid RFC 3501 "atom".
267
+ TAGGED_EXT_LABEL = /#{TAGGED_LABEL_FCHAR}#{TAGGED_LABEL_CHAR}*/n
268
+
269
+ # RFC3501:
270
+ # literal = "{" number "}" CRLF *CHAR8
271
+ # ; Number represents the number of CHAR8s
272
+ # RFC9051:
273
+ # literal = "{" number64 ["+"] "}" CRLF *CHAR8
274
+ # ; <number64> represents the number of CHAR8s.
275
+ # ; A non-synchronizing literal is distinguished
276
+ # ; from a synchronizing literal by the presence of
277
+ # ; "+" before the closing "}".
278
+ # ; Non-synchronizing literals are not allowed when
279
+ # ; sent from server to the client.
280
+ LITERAL = /\{(\d+)\}\r\n/n
281
+
282
+ module_function
283
+
284
+ def unescape_quoted!(quoted)
285
+ quoted
286
+ &.gsub!(/\\(#{QUOTED_SPECIALS})/n, "\\1")
287
+ &.force_encoding("UTF-8")
288
+ end
289
+
290
+ def unescape_quoted(quoted)
291
+ quoted
292
+ &.gsub(/\\(#{QUOTED_SPECIALS})/n, "\\1")
293
+ &.force_encoding("UTF-8")
294
+ end
295
+
296
+ end
297
+
298
+ # the default, used in most places
60
299
  BEG_REGEXP = /\G(?:\
61
- (?# 1: SPACE )( +)|\
62
- (?# 2: NIL )(NIL)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
63
- (?# 3: NUMBER )(\d+)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
64
- (?# 4: ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+]+)|\
65
- (?# 5: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
66
- (?# 6: LPAR )(\()|\
67
- (?# 7: RPAR )(\))|\
68
- (?# 8: BSLASH )(\\)|\
69
- (?# 9: STAR )(\*)|\
70
- (?# 10: LBRA )(\[)|\
71
- (?# 11: RBRA )(\])|\
72
- (?# 12: LITERAL )\{(\d+)\}\r\n|\
73
- (?# 13: PLUS )(\+)|\
74
- (?# 14: PERCENT )(%)|\
75
- (?# 15: CRLF )(\r\n)|\
76
- (?# 16: EOF )(\z))/ni
77
-
300
+ (?# 1: SPACE )( )|\
301
+ (?# 2: ATOM prefixed with a compatible subtype)\
302
+ ((?:\
303
+ (?# 3: NIL )(NIL)|\
304
+ (?# 4: NUMBER )(\d+)|\
305
+ (?# 5: PLUS )(\+))\
306
+ (?# 6: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
307
+ (?# This enables greedy alternation without lookahead, in linear time.)\
308
+ )|\
309
+ (?# Also need to check for ATOM without a subtype prefix.)\
310
+ (?# 7: ATOM )(#{Patterns::ATOMISH})|\
311
+ (?# 8: QUOTED )#{Patterns::QUOTED_rev2}|\
312
+ (?# 9: LPAR )(\()|\
313
+ (?# 10: RPAR )(\))|\
314
+ (?# 11: BSLASH )(\\)|\
315
+ (?# 12: STAR )(\*)|\
316
+ (?# 13: LBRA )(\[)|\
317
+ (?# 14: RBRA )(\])|\
318
+ (?# 15: LITERAL )#{Patterns::LITERAL}|\
319
+ (?# 16: PERCENT )(%)|\
320
+ (?# 17: CRLF )(\r\n)|\
321
+ (?# 18: EOF )(\z))/ni
322
+
323
+ # envelope, body(structure), namespaces
78
324
  DATA_REGEXP = /\G(?:\
79
325
  (?# 1: SPACE )( )|\
80
326
  (?# 2: NIL )(NIL)|\
81
327
  (?# 3: NUMBER )(\d+)|\
82
- (?# 4: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
83
- (?# 5: LITERAL )\{(\d+)\}\r\n|\
328
+ (?# 4: QUOTED )#{Patterns::QUOTED_rev2}|\
329
+ (?# 5: LITERAL )#{Patterns::LITERAL}|\
84
330
  (?# 6: LPAR )(\()|\
85
331
  (?# 7: RPAR )(\)))/ni
86
332
 
87
- TEXT_REGEXP = /\G(?:\
88
- (?# 1: TEXT )([^\x00\r\n]*))/ni
89
-
90
- RTEXT_REGEXP = /\G(?:\
91
- (?# 1: LBRA )(\[)|\
92
- (?# 2: TEXT )([^\x00\r\n]*))/ni
333
+ # text, after 'resp-text-code "]"'
334
+ TEXT_REGEXP = /\G(#{Patterns::TEXT_rev2})/n
93
335
 
94
- CTEXT_REGEXP = /\G(?:\
95
- (?# 1: TEXT )([^\x00\r\n\]]*))/ni
336
+ # resp-text-code, after 'atom SP'
337
+ CTEXT_REGEXP = /\G(#{Patterns::CODE_TEXT})/n
96
338
 
97
339
  Token = Struct.new(:symbol, :value)
98
340
 
99
- def response
100
- token = lookahead
101
- case token.symbol
102
- when T_PLUS
103
- result = continue_req
104
- when T_STAR
105
- result = response_untagged
106
- else
107
- result = response_tagged
108
- end
109
- while lookahead.symbol == T_SPACE
110
- # Ignore trailing space for Microsoft Exchange Server
111
- shift_token
112
- end
113
- match(T_CRLF)
114
- match(T_EOF)
115
- return result
341
+ def_char_matchers :SP, " ", :T_SPACE
342
+ def_char_matchers :PLUS, "+", :T_PLUS
343
+ def_char_matchers :STAR, "*", :T_STAR
344
+
345
+ def_char_matchers :lpar, "(", :T_LPAR
346
+ def_char_matchers :rpar, ")", :T_RPAR
347
+
348
+ def_char_matchers :lbra, "[", :T_LBRA
349
+ def_char_matchers :rbra, "]", :T_RBRA
350
+
351
+ # valid number ranges are not enforced by parser
352
+ # number = 1*DIGIT
353
+ # ; Unsigned 32-bit integer
354
+ # ; (0 <= n < 4,294,967,296)
355
+ def_token_matchers :number, T_NUMBER, coerce: Integer
356
+
357
+ def_token_matchers :quoted, T_QUOTED
358
+
359
+ # string = quoted / literal
360
+ def_token_matchers :string, T_QUOTED, T_LITERAL
361
+
362
+ # use where string represents "LABEL" values
363
+ def_token_matchers :case_insensitive__string,
364
+ T_QUOTED, T_LITERAL,
365
+ send: :upcase
366
+
367
+ # n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
368
+ # NIL? returns nil when it does *not* match
369
+ def_token_matchers :NIL, T_NIL
370
+
371
+ # In addition to explicitly uses of +tagged-ext-label+, use this to match
372
+ # keywords when the grammar has not provided any extension syntax.
373
+ #
374
+ # Do *not* use this for labels where the grammar specifies extensions
375
+ # can be +atom+, even if all currently defined labels would match. For
376
+ # example response codes in +resp-text-code+.
377
+ #
378
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
379
+ # ; Is a valid RFC 3501 "atom".
380
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
381
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
382
+ #
383
+ # TODO: add to lexer and only match tagged-ext-label
384
+ def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase
385
+
386
+ def_token_matchers :CRLF, T_CRLF
387
+ def_token_matchers :EOF, T_EOF
388
+
389
+ # atom = 1*ATOM-CHAR
390
+ # ATOM-CHAR = <any CHAR except atom-specials>
391
+ ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
392
+
393
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
394
+ # resp-specials = "]"
395
+ ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
396
+
397
+ ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze
398
+
399
+ # tag = 1*<any ASTRING-CHAR except "+">
400
+ TAG_TOKENS = (ASTRING_CHARS_TOKENS - [T_PLUS]).freeze
401
+
402
+ # TODO: handle atom, astring_chars, and tag entirely inside the lexer
403
+ def atom; combine_adjacent(*ATOM_TOKENS) end
404
+ def astring_chars; combine_adjacent(*ASTRING_CHARS_TOKENS) end
405
+ def tag; combine_adjacent(*TAG_TOKENS) end
406
+
407
+ # the #accept version of #atom
408
+ def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
409
+
410
+ # Returns <tt>atom.upcase</tt>
411
+ def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
412
+
413
+ # Returns <tt>atom?&.upcase</tt>
414
+ def case_insensitive__atom?
415
+ -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
116
416
  end
117
417
 
118
- def continue_req
119
- match(T_PLUS)
120
- token = lookahead
121
- if token.symbol == T_SPACE
122
- shift_token
123
- return ContinuationRequest.new(resp_text, @str)
124
- else
125
- return ContinuationRequest.new(ResponseText.new(nil, ""), @str)
126
- end
418
+ # astring = 1*ASTRING-CHAR / string
419
+ def astring
420
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
127
421
  end
128
422
 
129
- def response_untagged
130
- match(T_STAR)
131
- match(T_SPACE)
132
- token = lookahead
133
- if token.symbol == T_NUMBER
134
- return numeric_response
135
- elsif token.symbol == T_ATOM
136
- case token.value
137
- when /\A(?:OK|NO|BAD|BYE|PREAUTH)\z/ni
138
- return response_cond
139
- when /\A(?:FLAGS)\z/ni
140
- return flags_response
141
- when /\A(?:ID)\z/ni
142
- return id_response
143
- when /\A(?:LIST|LSUB|XLIST)\z/ni
144
- return list_response
145
- when /\A(?:NAMESPACE)\z/ni
146
- return namespace_response
147
- when /\A(?:QUOTA)\z/ni
148
- return getquota_response
149
- when /\A(?:QUOTAROOT)\z/ni
150
- return getquotaroot_response
151
- when /\A(?:ACL)\z/ni
152
- return getacl_response
153
- when /\A(?:SEARCH|SORT)\z/ni
154
- return search_response
155
- when /\A(?:THREAD)\z/ni
156
- return thread_response
157
- when /\A(?:STATUS)\z/ni
158
- return status_response
159
- when /\A(?:CAPABILITY)\z/ni
160
- return capability_response
161
- when /\A(?:NOOP)\z/ni
162
- return ignored_response
163
- else
164
- return text_response
423
+ def astring?
424
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
425
+ end
426
+
427
+ # Use #label or #label_in to assert specific known labels
428
+ # (+tagged-ext-label+ only, not +atom+).
429
+ def label(word)
430
+ (val = tagged_ext_label) == word and return val
431
+ parse_error("unexpected atom %p, expected %p instead", val, word)
432
+ end
433
+
434
+ # Use #label or #label_in to assert specific known labels
435
+ # (+tagged-ext-label+ only, not +atom+).
436
+ def label_in(*labels)
437
+ lbl = tagged_ext_label and labels.include?(lbl) and return lbl
438
+ parse_error("unexpected atom %p, expected one of %s instead",
439
+ lbl, labels.join(" or "))
440
+ end
441
+
442
+ # expects "OK" or "PREAUTH" and raises InvalidResponseError on failure
443
+ def resp_cond_auth__name
444
+ lbl = tagged_ext_label and AUTH_CONDS.include? lbl and return lbl
445
+ raise InvalidResponseError, "bad response type %p, expected %s" % [
446
+ lbl, AUTH_CONDS.join(" or ")
447
+ ]
448
+ end
449
+
450
+ # expects "OK" or "NO" or "BAD" and raises InvalidResponseError on failure
451
+ def resp_cond_state__name
452
+ lbl = tagged_ext_label and RESP_COND_STATES.include? lbl and return lbl
453
+ raise InvalidResponseError, "bad response type %p, expected %s" % [
454
+ lbl, RESP_COND_STATES.join(" or ")
455
+ ]
456
+ end
457
+
458
+ # nstring = string / nil
459
+ def nstring
460
+ NIL? ? nil : string
461
+ end
462
+
463
+ def nquoted
464
+ NIL? ? nil : quoted
465
+ end
466
+
467
+ # use where nstring represents "LABEL" values
468
+ def case_insensitive__nstring
469
+ NIL? ? nil : case_insensitive__string
470
+ end
471
+
472
+ # valid number ranges are not enforced by parser
473
+ # number64 = 1*DIGIT
474
+ # ; Unsigned 63-bit integer
475
+ # ; (0 <= n <= 9,223,372,036,854,775,807)
476
+ alias number64 number
477
+ alias number64? number?
478
+
479
+ # valid number ranges are not enforced by parser
480
+ # nz-number = digit-nz *DIGIT
481
+ # ; Non-zero unsigned 32-bit integer
482
+ # ; (0 < n < 4,294,967,296)
483
+ alias nz_number number
484
+ alias nz_number? number?
485
+
486
+ # valid number ranges are not enforced by parser
487
+ # nz-number64 = digit-nz *DIGIT
488
+ # ; Unsigned 63-bit integer
489
+ # ; (0 < n <= 9,223,372,036,854,775,807)
490
+ alias nz_number64 nz_number
491
+
492
+ # valid number ranges are not enforced by parser
493
+ # uniqueid = nz-number
494
+ # ; Strictly ascending
495
+ alias uniqueid nz_number
496
+
497
+ # [RFC3501 & RFC9051:]
498
+ # response = *(continue-req / response-data) response-done
499
+ #
500
+ # For simplicity, response isn't interpreted as the combination of the
501
+ # three response types, but instead represents any individual server
502
+ # response. Our simplified interpretation is defined as:
503
+ # response = continue-req | response_data | response-tagged
504
+ #
505
+ # n.b: our "response-tagged" definition parses "greeting" too.
506
+ def response
507
+ resp = case lookahead!(T_PLUS, T_STAR, *TAG_TOKENS).symbol
508
+ when T_PLUS then continue_req
509
+ when T_STAR then response_data
510
+ else response_tagged
511
+ end
512
+ accept_spaces # QUIRKY: Ignore trailing space (MS Exchange Server?)
513
+ CRLF!
514
+ EOF!
515
+ resp
516
+ end
517
+
518
+ # RFC3501 & RFC9051:
519
+ # continue-req = "+" SP (resp-text / base64) CRLF
520
+ #
521
+ # n.b: base64 is valid resp-text. And in the spirit of RFC9051 Appx E 23
522
+ # (and to workaround existing servers), we use the following grammar:
523
+ #
524
+ # continue-req = "+" (SP (resp-text)) CRLF
525
+ def continue_req
526
+ PLUS!
527
+ ContinuationRequest.new(SP? ? resp_text : ResponseText::EMPTY, @str)
528
+ end
529
+
530
+ RE_RESPONSE_TYPE = /\G(?:\d+ )?(?<type>#{Patterns::TAGGED_EXT_LABEL})/n
531
+
532
+ # [RFC3501:]
533
+ # response-data = "*" SP (resp-cond-state / resp-cond-bye /
534
+ # mailbox-data / message-data / capability-data) CRLF
535
+ # [RFC4466:]
536
+ # response-data = "*" SP response-payload CRLF
537
+ # response-payload = resp-cond-state / resp-cond-bye /
538
+ # mailbox-data / message-data / capability-data
539
+ # RFC5161 (ENABLE capability):
540
+ # response-data =/ "*" SP enable-data CRLF
541
+ # RFC5255 (LANGUAGE capability)
542
+ # response-payload =/ language-data
543
+ # RFC5255 (I18NLEVEL=1 and I18NLEVEL=2 capabilities)
544
+ # response-payload =/ comparator-data
545
+ # [RFC9051:]
546
+ # response-data = "*" SP (resp-cond-state / resp-cond-bye /
547
+ # mailbox-data / message-data / capability-data /
548
+ # enable-data) CRLF
549
+ #
550
+ # [merging in greeting and response-fatal:]
551
+ # greeting = "*" SP (resp-cond-auth / resp-cond-bye) CRLF
552
+ # response-fatal = "*" SP resp-cond-bye CRLF
553
+ # response-data =/ "*" SP (resp-cond-auth / resp-cond-bye) CRLF
554
+ # [removing duplicates, this is simply]
555
+ # response-payload =/ resp-cond-auth
556
+ #
557
+ # TODO: remove resp-cond-auth and handle greeting separately
558
+ def response_data
559
+ STAR!; SP!
560
+ m = peek_re(RE_RESPONSE_TYPE) or parse_error("unparsable response")
561
+ case m["type"].upcase
562
+ when "OK" then resp_cond_state__untagged # RFC3501, RFC9051
563
+ when "FETCH" then message_data__fetch # RFC3501, RFC9051
564
+ when "EXPUNGE" then message_data__expunge # RFC3501, RFC9051
565
+ when "EXISTS" then mailbox_data__exists # RFC3501, RFC9051
566
+ when "ESEARCH" then esearch_response # RFC4731, RFC9051, etc
567
+ when "VANISHED" then expunged_resp # RFC7162
568
+ when "UIDFETCH" then uidfetch_resp # (draft) UIDONLY
569
+ when "SEARCH" then mailbox_data__search # RFC3501 (obsolete)
570
+ when "CAPABILITY" then capability_data__untagged # RFC3501, RFC9051
571
+ when "FLAGS" then mailbox_data__flags # RFC3501, RFC9051
572
+ when "LIST" then mailbox_data__list # RFC3501, RFC9051
573
+ when "STATUS" then mailbox_data__status # RFC3501, RFC9051
574
+ when "NAMESPACE" then namespace_response # RFC2342, RFC9051
575
+ when "ENABLED" then enable_data # RFC5161, RFC9051
576
+ when "BAD" then resp_cond_state__untagged # RFC3501, RFC9051
577
+ when "NO" then resp_cond_state__untagged # RFC3501, RFC9051
578
+ when "PREAUTH" then resp_cond_auth # RFC3501, RFC9051
579
+ when "BYE" then resp_cond_bye # RFC3501, RFC9051
580
+ when "RECENT" then mailbox_data__recent # RFC3501 (obsolete)
581
+ when "SORT" then sort_data # RFC5256, RFC7162
582
+ when "THREAD" then thread_data # RFC5256
583
+ when "QUOTA" then quota_response # RFC2087, RFC9208
584
+ when "QUOTAROOT" then quotaroot_response # RFC2087, RFC9208
585
+ when "ID" then id_response # RFC2971
586
+ when "ACL" then acl_data # RFC4314
587
+ when "LISTRIGHTS" then listrights_data # RFC4314
588
+ when "MYRIGHTS" then myrights_data # RFC4314
589
+ when "METADATA" then metadata_resp # RFC5464
590
+ when "LANGUAGE" then language_data # RFC5255
591
+ when "COMPARATOR" then comparator_data # RFC5255
592
+ when "CONVERTED" then message_data__converted # RFC5259
593
+ when "LSUB" then mailbox_data__lsub # RFC3501 (obsolete)
594
+ when "XLIST" then mailbox_data__xlist # deprecated
595
+ when "NOOP" then response_data__noop
596
+ else response_data__unhandled
597
+ end
598
+ end
599
+
600
+ def response_data__unhandled(klass = UntaggedResponse)
601
+ num = number?; SP?
602
+ type = tagged_ext_label; SP?
603
+ text = remaining_unparsed
604
+ data =
605
+ if num && text then UnparsedNumericResponseData.new(num, text)
606
+ elsif text then UnparsedData.new(text)
607
+ else num
165
608
  end
166
- else
167
- parse_error("unexpected token %s", token.symbol)
168
- end
609
+ klass.new(type, data, @str)
610
+ end
611
+
612
+ # reads all the way up until CRLF
613
+ def remaining_unparsed
614
+ str = @str[@pos...-2] and @pos += str.bytesize
615
+ str&.empty? ? nil : str
169
616
  end
170
617
 
618
+ def response_data__ignored; response_data__unhandled(IgnoredResponse) end
619
+ alias response_data__noop response_data__ignored
620
+
621
+ alias esearch_response response_data__unhandled
622
+ alias expunged_resp response_data__unhandled
623
+ alias uidfetch_resp response_data__unhandled
624
+ alias listrights_data response_data__unhandled
625
+ alias myrights_data response_data__unhandled
626
+ alias metadata_resp response_data__unhandled
627
+ alias language_data response_data__unhandled
628
+ alias comparator_data response_data__unhandled
629
+ alias message_data__converted response_data__unhandled
630
+
631
+ # RFC3501 & RFC9051:
632
+ # response-tagged = tag SP resp-cond-state CRLF
633
+ #
634
+ # resp-cond-state = ("OK" / "NO" / "BAD") SP resp-text
635
+ # ; Status condition
636
+ #
637
+ # tag = 1*<any ASTRING-CHAR except "+">
171
638
  def response_tagged
172
- tag = astring_chars
173
- match(T_SPACE)
174
- token = match(T_ATOM)
175
- name = token.value.upcase
176
- match(T_SPACE)
177
- return TaggedResponse.new(tag, name, resp_text, @str)
639
+ tag = tag(); SP!
640
+ name = resp_cond_state__name; SP!
641
+ TaggedResponse.new(tag, name, resp_text, @str)
178
642
  end
179
643
 
180
- def response_cond
181
- token = match(T_ATOM)
182
- name = token.value.upcase
183
- match(T_SPACE)
184
- return UntaggedResponse.new(name, resp_text, @str)
644
+ # RFC3501 & RFC9051:
645
+ # resp-cond-state = ("OK" / "NO" / "BAD") SP resp-text
646
+ def resp_cond_state__untagged
647
+ name = resp_cond_state__name; SP!
648
+ UntaggedResponse.new(name, resp_text, @str)
185
649
  end
186
650
 
187
- def numeric_response
188
- n = number
189
- match(T_SPACE)
190
- token = match(T_ATOM)
191
- name = token.value.upcase
192
- case name
193
- when "EXISTS", "RECENT", "EXPUNGE"
194
- return UntaggedResponse.new(name, n, @str)
195
- when "FETCH"
196
- shift_token
197
- match(T_SPACE)
198
- data = FetchData.new(n, msg_att(n))
199
- return UntaggedResponse.new(name, data, @str)
200
- end
651
+ # resp-cond-auth = ("OK" / "PREAUTH") SP resp-text
652
+ def resp_cond_auth
653
+ name = resp_cond_auth__name; SP!
654
+ UntaggedResponse.new(name, resp_text, @str)
655
+ end
656
+
657
+ # resp-cond-bye = "BYE" SP resp-text
658
+ def resp_cond_bye
659
+ name = label(BYE); SP!
660
+ UntaggedResponse.new(name, resp_text, @str)
201
661
  end
202
662
 
663
+ # message-data = nz-number SP ("EXPUNGE" / ("FETCH" SP msg-att))
664
+ def message_data__fetch
665
+ seq = nz_number; SP!
666
+ name = label "FETCH"; SP!
667
+ data = FetchData.new(seq, msg_att(seq))
668
+ UntaggedResponse.new(name, data, @str)
669
+ end
670
+
671
+ def response_data__simple_numeric
672
+ data = nz_number; SP!
673
+ name = tagged_ext_label
674
+ UntaggedResponse.new(name, data, @str)
675
+ end
676
+
677
+ alias message_data__expunge response_data__simple_numeric
678
+ alias mailbox_data__exists response_data__simple_numeric
679
+ alias mailbox_data__recent response_data__simple_numeric
680
+
681
+ # RFC3501 & RFC9051:
682
+ # msg-att = "(" (msg-att-dynamic / msg-att-static)
683
+ # *(SP (msg-att-dynamic / msg-att-static)) ")"
684
+ #
685
+ # msg-att-dynamic = "FLAGS" SP "(" [flag-fetch *(SP flag-fetch)] ")"
686
+ # RFC5257 (ANNOTATE extension):
687
+ # msg-att-dynamic =/ "ANNOTATION" SP
688
+ # ( "(" entry-att *(SP entry-att) ")" /
689
+ # "(" entry *(SP entry) ")" )
690
+ # RFC7162 (CONDSTORE extension):
691
+ # msg-att-dynamic =/ fetch-mod-resp
692
+ # fetch-mod-resp = "MODSEQ" SP "(" permsg-modsequence ")"
693
+ # RFC8970 (PREVIEW extension):
694
+ # msg-att-dynamic =/ "PREVIEW" SP nstring
695
+ #
696
+ # RFC3501:
697
+ # msg-att-static = "ENVELOPE" SP envelope /
698
+ # "INTERNALDATE" SP date-time /
699
+ # "RFC822" [".HEADER" / ".TEXT"] SP nstring /
700
+ # "RFC822.SIZE" SP number /
701
+ # "BODY" ["STRUCTURE"] SP body /
702
+ # "BODY" section ["<" number ">"] SP nstring /
703
+ # "UID" SP uniqueid
704
+ # RFC3516 (BINARY extension):
705
+ # msg-att-static =/ "BINARY" section-binary SP (nstring / literal8)
706
+ # / "BINARY.SIZE" section-binary SP number
707
+ # RFC8514 (SAVEDATE extension):
708
+ # msg-att-static =/ "SAVEDATE" SP (date-time / nil)
709
+ # RFC8474 (OBJECTID extension):
710
+ # msg-att-static =/ fetch-emailid-resp / fetch-threadid-resp
711
+ # fetch-emailid-resp = "EMAILID" SP "(" objectid ")"
712
+ # fetch-threadid-resp = "THREADID" SP ( "(" objectid ")" / nil )
713
+ # RFC9051:
714
+ # msg-att-static = "ENVELOPE" SP envelope /
715
+ # "INTERNALDATE" SP date-time /
716
+ # "RFC822.SIZE" SP number64 /
717
+ # "BODY" ["STRUCTURE"] SP body /
718
+ # "BODY" section ["<" number ">"] SP nstring /
719
+ # "BINARY" section-binary SP (nstring / literal8) /
720
+ # "BINARY.SIZE" section-binary SP number /
721
+ # "UID" SP uniqueid
722
+ #
723
+ # Re https://www.rfc-editor.org/errata/eid7246, I'm adding "offset" to the
724
+ # official "BINARY" ABNF, like so:
725
+ #
726
+ # msg-att-static =/ "BINARY" section-binary ["<" number ">"] SP
727
+ # (nstring / literal8)
203
728
  def msg_att(n)
204
- match(T_LPAR)
729
+ lpar
205
730
  attr = {}
206
731
  while true
207
- token = lookahead
208
- case token.symbol
209
- when T_RPAR
210
- shift_token
211
- break
212
- when T_SPACE
213
- shift_token
214
- next
215
- end
216
- case token.value
217
- when /\A(?:ENVELOPE)\z/ni
218
- name, val = envelope_data
219
- when /\A(?:FLAGS)\z/ni
220
- name, val = flags_data
221
- when /\A(?:INTERNALDATE)\z/ni
222
- name, val = internaldate_data
223
- when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
224
- name, val = rfc822_text
225
- when /\A(?:RFC822\.SIZE)\z/ni
226
- name, val = rfc822_size
227
- when /\A(?:BODY(?:STRUCTURE)?)\z/ni
228
- name, val = body_data
229
- when /\A(?:UID)\z/ni
230
- name, val = uid_data
231
- when /\A(?:MODSEQ)\z/ni
232
- name, val = modseq_data
233
- else
234
- parse_error("unknown attribute `%s' for {%d}", token.value, n)
235
- end
732
+ name = msg_att__label; SP!
733
+ val =
734
+ case name
735
+ when "UID" then uniqueid
736
+ when "FLAGS" then flag_list
737
+ when "BODY" then body
738
+ when /\ABODY\[/ni then nstring
739
+ when "BODYSTRUCTURE" then body
740
+ when "ENVELOPE" then envelope
741
+ when "INTERNALDATE" then date_time
742
+ when "RFC822.SIZE" then number64
743
+ when "RFC822" then nstring # not in rev2
744
+ when "RFC822.HEADER" then nstring # not in rev2
745
+ when "RFC822.TEXT" then nstring # not in rev2
746
+ when "MODSEQ" then parens__modseq # CONDSTORE
747
+ else parse_error("unknown attribute `%s' for {%d}", name, n)
748
+ end
236
749
  attr[name] = val
750
+ break unless SP?
751
+ break if lookahead_rpar?
237
752
  end
238
- return attr
753
+ rpar
754
+ attr
239
755
  end
240
756
 
241
- def envelope_data
242
- token = match(T_ATOM)
243
- name = token.value.upcase
244
- match(T_SPACE)
245
- return name, envelope
757
+ # appends "[section]" and "<partial>" to the base label
758
+ def msg_att__label
759
+ case (name = tagged_ext_label)
760
+ when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni
761
+ # ignoring "[]" fixes https://bugs.ruby-lang.org/issues/5620
762
+ lbra? and rbra
763
+ when "BODY"
764
+ peek_lbra? and name << section and
765
+ peek_str?("<") and name << atom # partial
766
+ end
767
+ name
246
768
  end
247
769
 
248
770
  def envelope
@@ -280,482 +802,351 @@ module Net
280
802
  return result
281
803
  end
282
804
 
283
- def flags_data
284
- token = match(T_ATOM)
285
- name = token.value.upcase
286
- match(T_SPACE)
287
- return name, flag_list
288
- end
289
-
290
- def internaldate_data
291
- token = match(T_ATOM)
292
- name = token.value.upcase
293
- match(T_SPACE)
294
- token = match(T_QUOTED)
295
- return name, token.value
296
- end
297
-
298
- def rfc822_text
299
- token = match(T_ATOM)
300
- name = token.value.upcase
301
- token = lookahead
302
- if token.symbol == T_LBRA
303
- shift_token
304
- match(T_RBRA)
305
- end
306
- match(T_SPACE)
307
- return name, nstring
308
- end
309
-
310
- def rfc822_size
311
- token = match(T_ATOM)
312
- name = token.value.upcase
313
- match(T_SPACE)
314
- return name, number
315
- end
316
-
317
- def body_data
318
- token = match(T_ATOM)
319
- name = token.value.upcase
320
- token = lookahead
321
- if token.symbol == T_SPACE
322
- shift_token
323
- return name, body
324
- end
325
- name.concat(section)
326
- token = lookahead
327
- if token.symbol == T_ATOM
328
- name.concat(token.value)
329
- shift_token
330
- end
331
- match(T_SPACE)
332
- data = nstring
333
- return name, data
334
- end
805
+ # date-time = DQUOTE date-day-fixed "-" date-month "-" date-year
806
+ # SP time SP zone DQUOTE
807
+ alias date_time quoted
808
+ alias ndatetime nquoted
335
809
 
810
+ # RFC-3501 & RFC-9051:
811
+ # body = "(" (body-type-1part / body-type-mpart) ")"
336
812
  def body
337
813
  @lex_state = EXPR_DATA
338
- token = lookahead
339
- if token.symbol == T_NIL
340
- shift_token
341
- result = nil
342
- else
343
- match(T_LPAR)
344
- token = lookahead
345
- if token.symbol == T_LPAR
346
- result = body_type_mpart
347
- else
348
- result = body_type_1part
349
- end
350
- match(T_RPAR)
351
- end
814
+ lpar; result = peek_lpar? ? body_type_mpart : body_type_1part; rpar
815
+ result
816
+ ensure
352
817
  @lex_state = EXPR_BEG
353
- return result
354
818
  end
819
+ alias lookahead_body? lookahead_lpar?
355
820
 
821
+ # RFC-3501 & RFC9051:
822
+ # body-type-1part = (body-type-basic / body-type-msg / body-type-text)
823
+ # [SP body-ext-1part]
356
824
  def body_type_1part
357
- token = lookahead
358
- case token.value
359
- when /\A(?:TEXT)\z/ni
360
- return body_type_text
361
- when /\A(?:MESSAGE)\z/ni
362
- return body_type_msg
363
- when /\A(?:ATTACHMENT)\z/ni
364
- return body_type_attachment
365
- when /\A(?:MIXED)\z/ni
366
- return body_type_mixed
367
- else
368
- return body_type_basic
369
- end
370
- end
371
-
825
+ # This regexp peek is a performance optimization.
826
+ # The lookahead fallback would work fine too.
827
+ m = peek_re(/\G(?:
828
+ (?<TEXT> "TEXT" \s "[^"]+" )
829
+ |(?<MESSAGE> "MESSAGE" \s "(?:RFC822|GLOBAL)" )
830
+ |(?<BASIC> "[^"]+" \s "[^"]+" )
831
+ |(?<MIXED> "MIXED" )
832
+ )/nix)
833
+ choice = m&.named_captures&.compact&.keys&.first
834
+ # In practice, the following line should never be used. But the ABNF
835
+ # *does* allow literals, and this will handle them.
836
+ choice ||= lookahead_case_insensitive__string!
837
+ case choice
838
+ when "BASIC" then body_type_basic # => BodyTypeBasic
839
+ when "MESSAGE" then body_type_msg # => BodyTypeMessage | BodyTypeBasic
840
+ when "TEXT" then body_type_text # => BodyTypeText
841
+ when "MIXED" then body_type_mixed # => BodyTypeMultipart (server bug)
842
+ else body_type_basic # might be a bug; server's or ours?
843
+ end
844
+ end
845
+
846
+ # RFC-3501 & RFC9051:
847
+ # body-type-basic = media-basic SP body-fields
372
848
  def body_type_basic
373
- mtype, msubtype = media_type
374
- token = lookahead
375
- if token.symbol == T_RPAR
376
- return BodyTypeBasic.new(mtype, msubtype)
377
- end
378
- match(T_SPACE)
379
- param, content_id, desc, enc, size = body_fields
380
- md5, disposition, language, extension = body_ext_1part
381
- return BodyTypeBasic.new(mtype, msubtype,
382
- param, content_id,
383
- desc, enc, size,
384
- md5, disposition, language, extension)
849
+ type = media_basic # n.b. "basic" type isn't enforced here
850
+ if lookahead_rpar? then return BodyTypeBasic.new(*type) end # invalid
851
+ SP!; flds = body_fields
852
+ SP? and exts = body_ext_1part
853
+ BodyTypeBasic.new(*type, *flds, *exts)
385
854
  end
386
855
 
856
+ # RFC-3501 & RFC-9051:
857
+ # body-type-text = media-text SP body-fields SP body-fld-lines
387
858
  def body_type_text
388
- mtype, msubtype = media_type
389
- match(T_SPACE)
390
- param, content_id, desc, enc, size = body_fields
391
- match(T_SPACE)
392
- lines = number
393
- md5, disposition, language, extension = body_ext_1part
394
- return BodyTypeText.new(mtype, msubtype,
395
- param, content_id,
396
- desc, enc, size,
397
- lines,
398
- md5, disposition, language, extension)
859
+ type = media_text
860
+ SP!; flds = body_fields
861
+ SP!; lines = body_fld_lines
862
+ SP? and exts = body_ext_1part
863
+ BodyTypeText.new(*type, *flds, lines, *exts)
399
864
  end
400
865
 
866
+ # RFC-3501 & RFC-9051:
867
+ # body-type-msg = media-message SP body-fields SP envelope
868
+ # SP body SP body-fld-lines
401
869
  def body_type_msg
402
- mtype, msubtype = media_type
403
- match(T_SPACE)
404
- param, content_id, desc, enc, size = body_fields
405
-
406
- token = lookahead
407
- if token.symbol == T_RPAR
408
- # If this is not message/rfc822, we shouldn't apply the RFC822
409
- # spec to it. We should handle anything other than
410
- # message/rfc822 using multipart extension data [rfc3501] (i.e.
411
- # the data itself won't be returned, we would have to retrieve it
412
- # with BODYSTRUCTURE instead of with BODY
413
-
414
- # Also, sometimes a message/rfc822 is included as a large
415
- # attachment instead of having all of the other details
416
- # (e.g. attaching a .eml file to an email)
417
- if msubtype == "RFC822"
418
- return BodyTypeMessage.new(mtype, msubtype, param, content_id,
419
- desc, enc, size, nil, nil, nil, nil,
420
- nil, nil, nil)
421
- else
422
- return BodyTypeExtension.new(mtype, msubtype,
423
- param, content_id,
424
- desc, enc, size)
425
- end
426
- end
427
-
428
- match(T_SPACE)
429
- env = envelope
430
- match(T_SPACE)
431
- b = body
432
- match(T_SPACE)
433
- lines = number
434
- md5, disposition, language, extension = body_ext_1part
435
- return BodyTypeMessage.new(mtype, msubtype,
436
- param, content_id,
437
- desc, enc, size,
438
- env, b, lines,
439
- md5, disposition, language, extension)
440
- end
441
-
442
- def body_type_attachment
443
- mtype = case_insensitive_string
444
- match(T_SPACE)
445
- param = body_fld_param
446
- return BodyTypeAttachment.new(mtype, nil, param)
447
- end
448
-
870
+ # n.b. "message/rfc822" type isn't enforced here
871
+ type = media_message
872
+ SP!; flds = body_fields
873
+
874
+ # Sometimes servers send body-type-basic when body-type-msg should be.
875
+ # E.g: when a message/rfc822 part has "Content-Disposition: attachment".
876
+ #
877
+ # * SP "(" --> SP envelope --> continue as body-type-msg
878
+ # * ")" --> no body-ext-1part --> completed body-type-basic
879
+ # * SP nstring --> SP body-fld-md5
880
+ # --> SP body-ext-1part --> continue as body-type-basic
881
+ #
882
+ # It's probably better to return BodyTypeBasic---even for
883
+ # "message/rfc822"---than BodyTypeMessage with invalid fields.
884
+ unless peek_str?(" (")
885
+ SP? and exts = body_ext_1part
886
+ return BodyTypeBasic.new(*type, *flds, *exts)
887
+ end
888
+
889
+ SP!; env = envelope
890
+ SP!; bdy = body
891
+ SP!; lines = body_fld_lines
892
+ SP? and exts = body_ext_1part
893
+ BodyTypeMessage.new(*type, *flds, env, bdy, lines, *exts)
894
+ end
895
+
896
+ # This is a malformed body-type-mpart with no subparts.
449
897
  def body_type_mixed
450
- mtype = "MULTIPART"
451
- msubtype = case_insensitive_string
452
- param, disposition, language, extension = body_ext_mpart
453
- return BodyTypeBasic.new(mtype, msubtype, param, nil, nil, nil, nil, nil, disposition, language, extension)
898
+ # warn "malformed body-type-mpart: multipart/mixed with no parts."
899
+ type = media_subtype # => "MIXED"
900
+ SP? and exts = body_ext_mpart
901
+ BodyTypeMultipart.new("MULTIPART", type, nil, *exts)
454
902
  end
455
903
 
904
+ # RFC-3501 & RFC-9051:
905
+ # body-type-mpart = 1*body SP media-subtype
906
+ # [SP body-ext-mpart]
456
907
  def body_type_mpart
457
- parts = []
458
- while true
459
- token = lookahead
460
- if token.symbol == T_SPACE
461
- shift_token
462
- break
463
- end
464
- parts.push(body)
465
- end
466
- mtype = "MULTIPART"
467
- msubtype = case_insensitive_string
468
- param, disposition, language, extension = body_ext_mpart
469
- return BodyTypeMultipart.new(mtype, msubtype, parts,
470
- param, disposition, language,
471
- extension)
908
+ parts = [body]; parts << body until SP?; msubtype = media_subtype
909
+ SP? and exts = body_ext_mpart
910
+ BodyTypeMultipart.new("MULTIPART", msubtype, parts, *exts)
472
911
  end
473
912
 
913
+ # n.b. this handles both type and subtype
914
+ #
915
+ # RFC-3501 vs RFC-9051:
916
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
917
+ # "MESSAGE" /
918
+ # "VIDEO") DQUOTE) / string) SP media-subtype
919
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
920
+ # "FONT" / "MESSAGE" / "MODEL" /
921
+ # "VIDEO") DQUOTE) / string) SP media-subtype
922
+ #
923
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
924
+ # DQUOTE "RFC822" DQUOTE
925
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
926
+ # DQUOTE ("RFC822" / "GLOBAL") DQUOTE
927
+ #
928
+ # RFC-3501 & RFC-9051:
929
+ # media-text = DQUOTE "TEXT" DQUOTE SP media-subtype
930
+ # media-subtype = string
474
931
  def media_type
475
- mtype = case_insensitive_string
476
- token = lookahead
477
- if token.symbol != T_SPACE
478
- return mtype, nil
479
- end
480
- match(T_SPACE)
481
- msubtype = case_insensitive_string
932
+ mtype = case_insensitive__string
933
+ SP? or return mtype, nil # ??? quirky!
934
+ msubtype = media_subtype
482
935
  return mtype, msubtype
483
936
  end
484
937
 
938
+ # TODO: check types
939
+ alias media_basic media_type # */* --- catchall
940
+ alias media_message media_type # message/rfc822, message/global
941
+ alias media_text media_type # text/*
942
+
943
+ alias media_subtype case_insensitive__string
944
+
945
+ # RFC-3501 & RFC-9051:
946
+ # body-fields = body-fld-param SP body-fld-id SP body-fld-desc SP
947
+ # body-fld-enc SP body-fld-octets
485
948
  def body_fields
486
- param = body_fld_param
487
- match(T_SPACE)
488
- content_id = nstring
489
- match(T_SPACE)
490
- desc = nstring
491
- match(T_SPACE)
492
- enc = case_insensitive_string
493
- match(T_SPACE)
494
- size = number
495
- return param, content_id, desc, enc, size
949
+ fields = []
950
+ fields << body_fld_param; SP!
951
+ fields << body_fld_id; SP!
952
+ fields << body_fld_desc; SP!
953
+ fields << body_fld_enc; SP!
954
+ fields << body_fld_octets
955
+ fields
496
956
  end
497
957
 
958
+ # RFC3501, RFC9051:
959
+ # body-fld-param = "(" string SP string *(SP string SP string) ")" / nil
498
960
  def body_fld_param
499
- token = lookahead
500
- if token.symbol == T_NIL
501
- shift_token
502
- return nil
503
- end
504
- match(T_LPAR)
961
+ return if NIL?
505
962
  param = {}
506
- while true
507
- token = lookahead
508
- case token.symbol
509
- when T_RPAR
510
- shift_token
511
- break
512
- when T_SPACE
513
- shift_token
514
- end
515
- name = case_insensitive_string
516
- match(T_SPACE)
517
- val = string
518
- param[name] = val
519
- end
520
- return param
521
- end
522
-
963
+ lpar
964
+ name = case_insensitive__string; SP!; param[name] = string
965
+ while SP?
966
+ name = case_insensitive__string; SP!; param[name] = string
967
+ end
968
+ rpar
969
+ param
970
+ end
971
+
972
+ # RFC2060
973
+ # body_ext_1part ::= body_fld_md5 [SPACE body_fld_dsp
974
+ # [SPACE body_fld_lang
975
+ # [SPACE 1#body_extension]]]
976
+ # ;; MUST NOT be returned on non-extensible
977
+ # ;; "BODY" fetch
978
+ # RFC3501 & RFC9051
979
+ # body-ext-1part = body-fld-md5 [SP body-fld-dsp [SP body-fld-lang
980
+ # [SP body-fld-loc *(SP body-extension)]]]
981
+ # ; MUST NOT be returned on non-extensible
982
+ # ; "BODY" fetch
523
983
  def body_ext_1part
524
- token = lookahead
525
- if token.symbol == T_SPACE
526
- shift_token
527
- else
528
- return nil
529
- end
530
- md5 = nstring
531
-
532
- token = lookahead
533
- if token.symbol == T_SPACE
534
- shift_token
535
- else
536
- return md5
537
- end
538
- disposition = body_fld_dsp
539
-
540
- token = lookahead
541
- if token.symbol == T_SPACE
542
- shift_token
543
- else
544
- return md5, disposition
545
- end
546
- language = body_fld_lang
547
-
548
- token = lookahead
549
- if token.symbol == T_SPACE
550
- shift_token
551
- else
552
- return md5, disposition, language
553
- end
554
-
555
- extension = body_extensions
556
- return md5, disposition, language, extension
557
- end
558
-
559
- def body_ext_mpart
560
- token = lookahead
561
- if token.symbol == T_SPACE
562
- shift_token
563
- else
564
- return nil
565
- end
566
- param = body_fld_param
567
-
568
- token = lookahead
569
- if token.symbol == T_SPACE
570
- shift_token
571
- else
572
- return param
573
- end
574
- disposition = body_fld_dsp
575
-
576
- token = lookahead
577
- if token.symbol == T_SPACE
578
- shift_token
579
- else
580
- return param, disposition
581
- end
582
- language = body_fld_lang
583
-
584
- token = lookahead
585
- if token.symbol == T_SPACE
586
- shift_token
587
- else
588
- return param, disposition, language
589
- end
590
-
591
- extension = body_extensions
592
- return param, disposition, language, extension
593
- end
594
-
595
- def body_fld_dsp
596
- token = lookahead
597
- if token.symbol == T_NIL
598
- shift_token
599
- return nil
600
- end
601
- match(T_LPAR)
602
- dsp_type = case_insensitive_string
603
- match(T_SPACE)
604
- param = body_fld_param
605
- match(T_RPAR)
606
- return ContentDisposition.new(dsp_type, param)
607
- end
608
-
609
- def body_fld_lang
610
- token = lookahead
611
- if token.symbol == T_LPAR
612
- shift_token
613
- result = []
614
- while true
615
- token = lookahead
616
- case token.symbol
617
- when T_RPAR
618
- shift_token
619
- return result
620
- when T_SPACE
621
- shift_token
622
- end
623
- result.push(case_insensitive_string)
624
- end
625
- else
626
- lang = nstring
627
- if lang
628
- return lang.upcase
629
- else
630
- return lang
631
- end
632
- end
633
- end
634
-
635
- def body_extensions
636
- result = []
637
- while true
638
- token = lookahead
639
- case token.symbol
640
- when T_RPAR
641
- return result
642
- when T_SPACE
643
- shift_token
644
- end
645
- result.push(body_extension)
646
- end
647
- end
648
-
649
- def body_extension
650
- token = lookahead
651
- case token.symbol
652
- when T_LPAR
653
- shift_token
654
- result = body_extensions
655
- match(T_RPAR)
656
- return result
657
- when T_NUMBER
658
- return number
659
- else
660
- return nstring
661
- end
662
- end
663
-
664
- def section
665
- str = String.new
666
- token = match(T_LBRA)
667
- str.concat(token.value)
668
- token = match(T_ATOM, T_NUMBER, T_RBRA)
669
- if token.symbol == T_RBRA
670
- str.concat(token.value)
671
- return str
672
- end
673
- str.concat(token.value)
674
- token = lookahead
675
- if token.symbol == T_SPACE
676
- shift_token
677
- str.concat(token.value)
678
- token = match(T_LPAR)
679
- str.concat(token.value)
680
- while true
681
- token = lookahead
682
- case token.symbol
683
- when T_RPAR
684
- str.concat(token.value)
685
- shift_token
686
- break
687
- when T_SPACE
688
- shift_token
689
- str.concat(token.value)
690
- end
691
- str.concat(format_string(astring))
692
- end
693
- end
694
- token = match(T_RBRA)
695
- str.concat(token.value)
696
- return str
697
- end
698
-
699
- def format_string(str)
700
- case str
701
- when ""
702
- return '""'
703
- when /[\x80-\xff\r\n]/n
704
- # literal
705
- return "{" + str.bytesize.to_s + "}" + CRLF + str
706
- when /[(){ \x00-\x1f\x7f%*"\\]/n
707
- # quoted string
708
- return '"' + str.gsub(/["\\]/n, "\\\\\\&") + '"'
709
- else
710
- # atom
711
- return str
712
- end
984
+ fields = []; fields << body_fld_md5
985
+ SP? or return fields; fields << body_fld_dsp
986
+ SP? or return fields; fields << body_fld_lang
987
+ SP? or return fields; fields << body_fld_loc
988
+ SP? or return fields; fields << body_extensions
989
+ fields
990
+ end
991
+
992
+ # RFC-2060:
993
+ # body_ext_mpart = body_fld_param [SP body_fld_dsp SP body_fld_lang
994
+ # [SP 1#body_extension]]
995
+ # ;; MUST NOT be returned on non-extensible
996
+ # ;; "BODY" fetch
997
+ # RFC-3501 & RFC-9051:
998
+ # body-ext-mpart = body-fld-param [SP body-fld-dsp [SP body-fld-lang
999
+ # [SP body-fld-loc *(SP body-extension)]]]
1000
+ # ; MUST NOT be returned on non-extensible
1001
+ # ; "BODY" fetch
1002
+ def body_ext_mpart
1003
+ fields = []; fields << body_fld_param
1004
+ SP? or return fields; fields << body_fld_dsp
1005
+ SP? or return fields; fields << body_fld_lang
1006
+ SP? or return fields; fields << body_fld_loc
1007
+ SP? or return fields; fields << body_extensions
1008
+ fields
1009
+ end
1010
+
1011
+ alias body_fld_desc nstring
1012
+ alias body_fld_id nstring
1013
+ alias body_fld_loc nstring
1014
+ alias body_fld_lines number64 # number in 3501, number64 in 9051
1015
+ alias body_fld_md5 nstring
1016
+ alias body_fld_octets number
1017
+
1018
+ # RFC-3501 & RFC-9051:
1019
+ # body-fld-enc = (DQUOTE ("7BIT" / "8BIT" / "BINARY" / "BASE64"/
1020
+ # "QUOTED-PRINTABLE") DQUOTE) / string
1021
+ alias body_fld_enc case_insensitive__string
1022
+
1023
+ # body-fld-dsp = "(" string SP body-fld-param ")" / nil
1024
+ def body_fld_dsp
1025
+ return if NIL?
1026
+ lpar; dsp_type = case_insensitive__string
1027
+ SP!; param = body_fld_param
1028
+ rpar
1029
+ ContentDisposition.new(dsp_type, param)
713
1030
  end
714
1031
 
715
- def uid_data
716
- token = match(T_ATOM)
717
- name = token.value.upcase
718
- match(T_SPACE)
719
- return name, number
1032
+ # body-fld-lang = nstring / "(" string *(SP string) ")"
1033
+ def body_fld_lang
1034
+ if lpar?
1035
+ result = [case_insensitive__string]
1036
+ result << case_insensitive__string while SP?
1037
+ rpar
1038
+ result
1039
+ else
1040
+ case_insensitive__nstring
1041
+ end
720
1042
  end
721
1043
 
722
- def modseq_data
723
- token = match(T_ATOM)
724
- name = token.value.upcase
725
- match(T_SPACE)
726
- match(T_LPAR)
727
- modseq = number
728
- match(T_RPAR)
729
- return name, modseq
1044
+ # body-extension *(SP body-extension)
1045
+ def body_extensions
1046
+ result = []
1047
+ result << body_extension; while SP? do result << body_extension end
1048
+ result
730
1049
  end
731
1050
 
732
- def ignored_response
733
- while lookahead.symbol != T_CRLF
734
- shift_token
1051
+ # body-extension = nstring / number / number64 /
1052
+ # "(" body-extension *(SP body-extension) ")"
1053
+ # ; Future expansion. Client implementations
1054
+ # ; MUST accept body-extension fields. Server
1055
+ # ; implementations MUST NOT generate
1056
+ # ; body-extension fields except as defined by
1057
+ # ; future Standard or Standards Track
1058
+ # ; revisions of this specification.
1059
+ def body_extension
1060
+ if (uint = number64?) then uint
1061
+ elsif lpar? then exts = body_extensions; rpar; exts
1062
+ else nstring
735
1063
  end
736
- return IgnoredResponse.new(@str)
737
1064
  end
738
1065
 
739
- def text_response
740
- token = match(T_ATOM)
741
- name = token.value.upcase
742
- match(T_SPACE)
743
- return UntaggedResponse.new(name, text)
1066
+ # section = "[" [section-spec] "]"
1067
+ def section
1068
+ str = +lbra
1069
+ str << section_spec unless peek_rbra?
1070
+ str << rbra
1071
+ end
1072
+
1073
+ # section-spec = section-msgtext / (section-part ["." section-text])
1074
+ # section-msgtext = "HEADER" /
1075
+ # "HEADER.FIELDS" [".NOT"] SP header-list /
1076
+ # "TEXT"
1077
+ # ; top-level or MESSAGE/RFC822 or
1078
+ # ; MESSAGE/GLOBAL part
1079
+ # section-part = nz-number *("." nz-number)
1080
+ # ; body part reference.
1081
+ # ; Allows for accessing nested body parts.
1082
+ # section-text = section-msgtext / "MIME"
1083
+ # ; text other than actual body part (headers,
1084
+ # ; etc.)
1085
+ #
1086
+ # n.b: we could "cheat" here and just grab all text inside the brackets,
1087
+ # but literals would need special treatment.
1088
+ def section_spec
1089
+ str = "".b
1090
+ str << atom # grabs everything up to "SP header-list" or "]"
1091
+ str << " " << header_list if SP?
1092
+ str
744
1093
  end
745
1094
 
746
- def flags_response
747
- token = match(T_ATOM)
748
- name = token.value.upcase
749
- match(T_SPACE)
750
- return UntaggedResponse.new(name, flag_list, @str)
1095
+ # header-list = "(" header-fld-name *(SP header-fld-name) ")"
1096
+ def header_list
1097
+ str = +""
1098
+ str << lpar << header_fld_name
1099
+ str << " " << header_fld_name while SP?
1100
+ str << rpar
751
1101
  end
752
1102
 
753
- def list_response
754
- token = match(T_ATOM)
755
- name = token.value.upcase
756
- match(T_SPACE)
757
- return UntaggedResponse.new(name, mailbox_list, @str)
758
- end
1103
+ # RFC3501 & RFC9051:
1104
+ # header-fld-name = astring
1105
+ #
1106
+ # NOTE: Previously, Net::IMAP recreated the raw original source string.
1107
+ # Now, it grabs the raw encoded value using @str and @pos. A future
1108
+ # version may simply return the decoded astring value. Although that is
1109
+ # technically incompatible, it should almost never make a difference: all
1110
+ # standard header field names are valid atoms:
1111
+ #
1112
+ # https://www.iana.org/assignments/message-headers/message-headers.xhtml
1113
+ #
1114
+ # Although RFC3501 allows any astring, RFC5322-valid header names are one
1115
+ # or more of the printable US-ASCII characters, except SP and colon. So
1116
+ # empty string isn't valid, and literals aren't needed and should not be
1117
+ # used. This is explicitly unchanged by [I18N-HDRS] (RFC6532).
1118
+ #
1119
+ # RFC5233:
1120
+ # optional-field = field-name ":" unstructured CRLF
1121
+ # field-name = 1*ftext
1122
+ # ftext = %d33-57 / ; Printable US-ASCII
1123
+ # %d59-126 ; characters not including
1124
+ # ; ":".
1125
+ def header_fld_name
1126
+ assert_no_lookahead
1127
+ start = @pos
1128
+ astring
1129
+ @str[start...@pos - 1]
1130
+ end
1131
+
1132
+ # mailbox-data = "FLAGS" SP flag-list / "LIST" SP mailbox-list /
1133
+ # "LSUB" SP mailbox-list / "SEARCH" *(SP nz-number) /
1134
+ # "STATUS" SP mailbox SP "(" [status-att-list] ")" /
1135
+ # number SP "EXISTS" / number SP "RECENT"
1136
+
1137
+ def mailbox_data__flags
1138
+ name = label("FLAGS")
1139
+ SP!
1140
+ UntaggedResponse.new(name, flag_list, @str)
1141
+ end
1142
+
1143
+ def mailbox_data__list
1144
+ name = label_in("LIST", "LSUB", "XLIST")
1145
+ SP!
1146
+ UntaggedResponse.new(name, mailbox_list, @str)
1147
+ end
1148
+ alias mailbox_data__lsub mailbox_data__list
1149
+ alias mailbox_data__xlist mailbox_data__list
759
1150
 
760
1151
  def mailbox_list
761
1152
  attr = flag_list
@@ -821,7 +1212,8 @@ module Net
821
1212
  return UntaggedResponse.new(name, data, @str)
822
1213
  end
823
1214
 
824
- def getacl_response
1215
+ # acl-data = "ACL" SP mailbox *(SP identifier SP rights)
1216
+ def acl_data
825
1217
  token = match(T_ATOM)
826
1218
  name = token.value.upcase
827
1219
  match(T_SPACE)
@@ -847,7 +1239,21 @@ module Net
847
1239
  return UntaggedResponse.new(name, data, @str)
848
1240
  end
849
1241
 
850
- def search_response
1242
+ # RFC3501:
1243
+ # mailbox-data = "SEARCH" *(SP nz-number) / ...
1244
+ # RFC5256: SORT
1245
+ # sort-data = "SORT" *(SP nz-number)
1246
+ # RFC7162: CONDSTORE, QRESYNC
1247
+ # mailbox-data =/ "SEARCH" [1*(SP nz-number) SP
1248
+ # search-sort-mod-seq]
1249
+ # sort-data = "SORT" [1*(SP nz-number) SP
1250
+ # search-sort-mod-seq]
1251
+ # ; Updates the SORT response from RFC 5256.
1252
+ # search-sort-mod-seq = "(" "MODSEQ" SP mod-sequence-value ")"
1253
+ # RFC9051:
1254
+ # mailbox-data = obsolete-search-response / ...
1255
+ # obsolete-search-response = "SEARCH" *(SP nz-number)
1256
+ def mailbox_data__search
851
1257
  token = match(T_ATOM)
852
1258
  name = token.value.upcase
853
1259
  token = lookahead
@@ -877,8 +1283,9 @@ module Net
877
1283
  end
878
1284
  return UntaggedResponse.new(name, data, @str)
879
1285
  end
1286
+ alias sort_data mailbox_data__search
880
1287
 
881
- def thread_response
1288
+ def thread_data
882
1289
  token = match(T_ATOM)
883
1290
  name = token.value.upcase
884
1291
  token = lookahead
@@ -940,7 +1347,7 @@ module Net
940
1347
  return rootmember
941
1348
  end
942
1349
 
943
- def status_response
1350
+ def mailbox_data__status
944
1351
  token = match(T_ATOM)
945
1352
  name = token.value.upcase
946
1353
  match(T_SPACE)
@@ -967,29 +1374,40 @@ module Net
967
1374
  return UntaggedResponse.new(name, data, @str)
968
1375
  end
969
1376
 
970
- def capability_response
971
- token = match(T_ATOM)
972
- name = token.value.upcase
973
- match(T_SPACE)
974
- UntaggedResponse.new(name, capability_data, @str)
1377
+ # The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.
1378
+ # The grammar rule is used by both response-data and resp-text-code.
1379
+ # But this method only returns UntaggedResponse (response-data).
1380
+ #
1381
+ # RFC3501:
1382
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1383
+ # *(SP capability)
1384
+ # RFC9051:
1385
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1386
+ # *(SP capability)
1387
+ def capability_data__untagged
1388
+ UntaggedResponse.new label("CAPABILITY"), capability__list, @str
975
1389
  end
976
1390
 
977
- def capability_data
978
- data = []
979
- while true
980
- token = lookahead
981
- case token.symbol
982
- when T_CRLF, T_RBRA
983
- break
984
- when T_SPACE
985
- shift_token
986
- next
987
- end
988
- data.push(atom.upcase)
989
- end
990
- data
1391
+ # enable-data = "ENABLED" *(SP capability)
1392
+ def enable_data
1393
+ UntaggedResponse.new label("ENABLED"), capability__list, @str
1394
+ end
1395
+
1396
+ # As a workaround for buggy servers, allow a trailing SP:
1397
+ # *(SP capability) [SP]
1398
+ def capability__list
1399
+ list = []; while SP? && (capa = capability?) do list << capa end; list
991
1400
  end
992
1401
 
1402
+ alias resp_code__capability capability__list
1403
+
1404
+ # capability = ("AUTH=" auth-type) / atom
1405
+ # ; New capabilities MUST begin with "X" or be
1406
+ # ; registered with IANA as standard or
1407
+ # ; standards-track
1408
+ alias capability case_insensitive__atom
1409
+ alias capability? case_insensitive__atom?
1410
+
993
1411
  def id_response
994
1412
  token = match(T_ATOM)
995
1413
  name = token.value.upcase
@@ -1019,147 +1437,177 @@ module Net
1019
1437
  end
1020
1438
  end
1021
1439
 
1440
+ # namespace-response = "NAMESPACE" SP namespace
1441
+ # SP namespace SP namespace
1442
+ # ; The first Namespace is the Personal Namespace(s).
1443
+ # ; The second Namespace is the Other Users'
1444
+ # ; Namespace(s).
1445
+ # ; The third Namespace is the Shared Namespace(s).
1022
1446
  def namespace_response
1447
+ name = label("NAMESPACE")
1023
1448
  @lex_state = EXPR_DATA
1024
- token = lookahead
1025
- token = match(T_ATOM)
1026
- name = token.value.upcase
1027
- match(T_SPACE)
1028
- personal = namespaces
1029
- match(T_SPACE)
1030
- other = namespaces
1031
- match(T_SPACE)
1032
- shared = namespaces
1449
+ data = Namespaces.new((SP!; namespace),
1450
+ (SP!; namespace),
1451
+ (SP!; namespace))
1452
+ UntaggedResponse.new(name, data, @str)
1453
+ ensure
1033
1454
  @lex_state = EXPR_BEG
1034
- data = Namespaces.new(personal, other, shared)
1035
- return UntaggedResponse.new(name, data, @str)
1036
- end
1037
-
1038
- def namespaces
1039
- token = lookahead
1040
- # empty () is not allowed, so nil is functionally identical to empty.
1041
- data = []
1042
- if token.symbol == T_NIL
1043
- shift_token
1044
- else
1045
- match(T_LPAR)
1046
- loop do
1047
- data << namespace
1048
- break unless lookahead.symbol == T_SPACE
1049
- shift_token
1050
- end
1051
- match(T_RPAR)
1052
- end
1053
- data
1054
1455
  end
1055
1456
 
1457
+ # namespace = nil / "(" 1*namespace-descr ")"
1056
1458
  def namespace
1057
- match(T_LPAR)
1058
- prefix = match(T_QUOTED, T_LITERAL).value
1059
- match(T_SPACE)
1060
- delimiter = string
1459
+ NIL? and return []
1460
+ lpar
1461
+ list = [namespace_descr]
1462
+ list << namespace_descr until rpar?
1463
+ list
1464
+ end
1465
+
1466
+ # namespace-descr = "(" string SP
1467
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil)
1468
+ # [namespace-response-extensions] ")"
1469
+ def namespace_descr
1470
+ lpar
1471
+ prefix = string; SP!
1472
+ delimiter = nquoted # n.b: should only accept single char
1061
1473
  extensions = namespace_response_extensions
1062
- match(T_RPAR)
1474
+ rpar
1063
1475
  Namespace.new(prefix, delimiter, extensions)
1064
1476
  end
1065
1477
 
1478
+ # namespace-response-extensions = *namespace-response-extension
1479
+ # namespace-response-extension = SP string SP
1480
+ # "(" string *(SP string) ")"
1066
1481
  def namespace_response_extensions
1067
1482
  data = {}
1068
- token = lookahead
1069
- if token.symbol == T_SPACE
1070
- shift_token
1071
- name = match(T_QUOTED, T_LITERAL).value
1483
+ while SP?
1484
+ name = string; SP!
1485
+ lpar
1072
1486
  data[name] ||= []
1073
- match(T_SPACE)
1074
- match(T_LPAR)
1075
- loop do
1076
- data[name].push match(T_QUOTED, T_LITERAL).value
1077
- break unless lookahead.symbol == T_SPACE
1078
- shift_token
1079
- end
1080
- match(T_RPAR)
1487
+ data[name] << string
1488
+ data[name] << string while SP?
1489
+ rpar
1081
1490
  end
1082
1491
  data
1083
1492
  end
1084
1493
 
1085
- # text = 1*TEXT-CHAR
1086
- # TEXT-CHAR = <any CHAR except CR and LF>
1494
+ # TEXT-CHAR = <any CHAR except CR and LF>
1495
+ # RFC3501:
1496
+ # text = 1*TEXT-CHAR
1497
+ # RFC9051:
1498
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
1499
+ # ; Non-ASCII text can only be returned
1500
+ # ; after ENABLE IMAP4rev2 command
1087
1501
  def text
1088
- match(T_TEXT, lex_state: EXPR_TEXT).value
1502
+ match_re(TEXT_REGEXP, "text")[0].force_encoding("UTF-8")
1503
+ end
1504
+
1505
+ # an "accept" versiun of #text
1506
+ def text?
1507
+ accept_re(TEXT_REGEXP)&.[](0)&.force_encoding("UTF-8")
1089
1508
  end
1090
1509
 
1091
- # resp-text = ["[" resp-text-code "]" SP] text
1510
+ # RFC3501:
1511
+ # resp-text = ["[" resp-text-code "]" SP] text
1512
+ # RFC9051:
1513
+ # resp-text = ["[" resp-text-code "]" SP] [text]
1514
+ #
1515
+ # We leniently re-interpret this as
1516
+ # resp-text = ["[" resp-text-code "]" [SP [text]] / [text]
1092
1517
  def resp_text
1093
- token = match(T_LBRA, T_TEXT, lex_state: EXPR_RTEXT)
1094
- case token.symbol
1095
- when T_LBRA
1096
- code = resp_text_code
1097
- match(T_RBRA)
1098
- accept_space # violating RFC
1099
- ResponseText.new(code, text)
1100
- when T_TEXT
1101
- ResponseText.new(nil, token.value)
1518
+ if lbra?
1519
+ code = resp_text_code; rbra
1520
+ ResponseText.new(code, SP? && text? || "")
1521
+ else
1522
+ ResponseText.new(nil, text? || "")
1102
1523
  end
1103
1524
  end
1104
1525
 
1105
- # See https://www.rfc-editor.org/errata/rfc3501
1526
+ # RFC3501 (See https://www.rfc-editor.org/errata/rfc3501):
1527
+ # resp-text-code = "ALERT" /
1528
+ # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1529
+ # capability-data / "PARSE" /
1530
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
1531
+ # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1532
+ # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1533
+ # "UNSEEN" SP nz-number /
1534
+ # atom [SP 1*<any TEXT-CHAR except "]">]
1535
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1536
+ # *(SP capability)
1106
1537
  #
1107
- # resp-text-code = "ALERT" /
1108
- # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1109
- # capability-data / "PARSE" /
1110
- # "PERMANENTFLAGS" SP "("
1111
- # [flag-perm *(SP flag-perm)] ")" /
1112
- # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1113
- # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1114
- # "UNSEEN" SP nz-number /
1115
- # atom [SP 1*<any TEXT-CHAR except "]">]
1538
+ # RFC5530:
1539
+ # resp-text-code =/ "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
1540
+ # "AUTHORIZATIONFAILED" / "EXPIRED" /
1541
+ # "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
1542
+ # "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
1543
+ # "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
1544
+ # "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
1545
+ # "NONEXISTENT"
1546
+ # RFC9051:
1547
+ # resp-text-code = "ALERT" /
1548
+ # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] /
1549
+ # capability-data / "PARSE" /
1550
+ # "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")" /
1551
+ # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" /
1552
+ # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number /
1553
+ # resp-code-apnd / resp-code-copy / "UIDNOTSTICKY" /
1554
+ # "UNAVAILABLE" / "AUTHENTICATIONFAILED" /
1555
+ # "AUTHORIZATIONFAILED" / "EXPIRED" /
1556
+ # "PRIVACYREQUIRED" / "CONTACTADMIN" / "NOPERM" /
1557
+ # "INUSE" / "EXPUNGEISSUED" / "CORRUPTION" /
1558
+ # "SERVERBUG" / "CLIENTBUG" / "CANNOT" /
1559
+ # "LIMIT" / "OVERQUOTA" / "ALREADYEXISTS" /
1560
+ # "NONEXISTENT" / "NOTSAVED" / "HASCHILDREN" /
1561
+ # "CLOSED" /
1562
+ # "UNKNOWN-CTE" /
1563
+ # atom [SP 1*<any TEXT-CHAR except "]">]
1564
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1565
+ # *(SP capability)
1116
1566
  #
1117
- # +UIDPLUS+ ABNF:: https://www.rfc-editor.org/rfc/rfc4315.html#section-4
1118
- # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
1567
+ # RFC4315 (UIDPLUS), RFC9051 (IMAP4rev2):
1568
+ # resp-code-apnd = "APPENDUID" SP nz-number SP append-uid
1569
+ # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
1570
+ # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY"
1571
+ #
1572
+ # RFC7162 (CONDSTORE):
1573
+ # resp-text-code =/ "HIGHESTMODSEQ" SP mod-sequence-value /
1574
+ # "NOMODSEQ" /
1575
+ # "MODIFIED" SP sequence-set
1119
1576
  def resp_text_code
1120
- token = match(T_ATOM)
1121
- name = token.value.upcase
1122
- case name
1123
- when /\A(?:ALERT|PARSE|READ-ONLY|READ-WRITE|TRYCREATE|NOMODSEQ)\z/n
1124
- result = ResponseCode.new(name, nil)
1125
- when /\A(?:BADCHARSET)\z/n
1126
- result = ResponseCode.new(name, charset_list)
1127
- when /\A(?:CAPABILITY)\z/ni
1128
- result = ResponseCode.new(name, capability_data)
1129
- when /\A(?:PERMANENTFLAGS)\z/n
1130
- match(T_SPACE)
1131
- result = ResponseCode.new(name, flag_list)
1132
- when /\A(?:UIDVALIDITY|UIDNEXT|UNSEEN)\z/n
1133
- match(T_SPACE)
1134
- result = ResponseCode.new(name, number)
1135
- when /\A(?:APPENDUID)\z/n
1136
- result = ResponseCode.new(name, resp_code_apnd__data)
1137
- when /\A(?:COPYUID)\z/n
1138
- result = ResponseCode.new(name, resp_code_copy__data)
1139
- else
1140
- token = lookahead
1141
- if token.symbol == T_SPACE
1142
- shift_token
1143
- token = match(T_TEXT, lex_state: EXPR_CTEXT)
1144
- result = ResponseCode.new(name, token.value)
1577
+ name = resp_text_code__name
1578
+ data =
1579
+ case name
1580
+ when "CAPABILITY" then resp_code__capability
1581
+ when "PERMANENTFLAGS" then SP? ? flag_perm__list : []
1582
+ when "UIDNEXT" then SP!; nz_number
1583
+ when "UIDVALIDITY" then SP!; nz_number
1584
+ when "UNSEEN" then SP!; nz_number # rev1 only
1585
+ when "APPENDUID" then SP!; resp_code_apnd__data # rev2, UIDPLUS
1586
+ when "COPYUID" then SP!; resp_code_copy__data # rev2, UIDPLUS
1587
+ when "BADCHARSET" then SP? ? charset__list : []
1588
+ when "ALERT", "PARSE", "READ-ONLY", "READ-WRITE", "TRYCREATE",
1589
+ "UNAVAILABLE", "AUTHENTICATIONFAILED", "AUTHORIZATIONFAILED",
1590
+ "EXPIRED", "PRIVACYREQUIRED", "CONTACTADMIN", "NOPERM", "INUSE",
1591
+ "EXPUNGEISSUED", "CORRUPTION", "SERVERBUG", "CLIENTBUG", "CANNOT",
1592
+ "LIMIT", "OVERQUOTA", "ALREADYEXISTS", "NONEXISTENT", "CLOSED",
1593
+ "NOTSAVED", "UIDNOTSTICKY", "UNKNOWN-CTE", "HASCHILDREN"
1594
+ when "NOMODSEQ" # CONDSTORE
1145
1595
  else
1146
- result = ResponseCode.new(name, nil)
1596
+ SP? and text_chars_except_rbra
1147
1597
  end
1148
- end
1149
- return result
1598
+ ResponseCode.new(name, data)
1150
1599
  end
1151
1600
 
1152
- def charset_list
1153
- result = []
1154
- if accept(T_SPACE)
1155
- match(T_LPAR)
1156
- result << charset
1157
- while accept(T_SPACE)
1158
- result << charset
1159
- end
1160
- match(T_RPAR)
1161
- end
1162
- result
1601
+ alias resp_text_code__name case_insensitive__atom
1602
+
1603
+ # 1*<any TEXT-CHAR except "]">
1604
+ def text_chars_except_rbra
1605
+ match_re(CTEXT_REGEXP, '1*<any TEXT-CHAR except "]">')[0]
1606
+ end
1607
+
1608
+ # "(" charset *(SP charset) ")"
1609
+ def charset__list
1610
+ lpar; list = [charset]; while SP? do list << charset end; rpar; list
1163
1611
  end
1164
1612
 
1165
1613
  # already matched: "APPENDUID"
@@ -1175,8 +1623,8 @@ module Net
1175
1623
  # match uid_set even if that returns a single-member array.
1176
1624
  #
1177
1625
  def resp_code_apnd__data
1178
- match(T_SPACE); validity = number
1179
- match(T_SPACE); dst_uids = uid_set # uniqueid ⊂ uid-set
1626
+ validity = number; SP!
1627
+ dst_uids = uid_set # uniqueid ⊂ uid-set
1180
1628
  UIDPlusData.new(validity, nil, dst_uids)
1181
1629
  end
1182
1630
 
@@ -1184,9 +1632,9 @@ module Net
1184
1632
  #
1185
1633
  # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set
1186
1634
  def resp_code_copy__data
1187
- match(T_SPACE); validity = number
1188
- match(T_SPACE); src_uids = uid_set
1189
- match(T_SPACE); dst_uids = uid_set
1635
+ validity = number; SP!
1636
+ src_uids = uid_set; SP!
1637
+ dst_uids = uid_set
1190
1638
  UIDPlusData.new(validity, src_uids, dst_uids)
1191
1639
  end
1192
1640
 
@@ -1230,9 +1678,7 @@ module Net
1230
1678
  mailbox = $3
1231
1679
  host = $4
1232
1680
  for s in [name, route, mailbox, host]
1233
- if s
1234
- s.gsub!(/\\(["\\])/n, "\\1")
1235
- end
1681
+ Patterns.unescape_quoted! s
1236
1682
  end
1237
1683
  else
1238
1684
  name = nstring
@@ -1247,124 +1693,56 @@ module Net
1247
1693
  return Address.new(name, route, mailbox, host)
1248
1694
  end
1249
1695
 
1250
- FLAG_REGEXP = /\
1251
- (?# FLAG )\\([^\x80-\xff(){ \x00-\x1f\x7f%"\\]+)|\
1252
- (?# ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\]+)/n
1253
-
1696
+ # flag-list = "(" [flag *(SP flag)] ")"
1254
1697
  def flag_list
1255
- if @str.index(/\(([^)]*)\)/ni, @pos)
1256
- @pos = $~.end(0)
1257
- return $1.scan(FLAG_REGEXP).collect { |flag, atom|
1258
- if atom
1259
- atom
1260
- else
1261
- flag.capitalize.intern
1262
- end
1263
- }
1264
- else
1265
- parse_error("invalid flag list")
1266
- end
1267
- end
1268
-
1269
- def nstring
1270
- token = lookahead
1271
- if token.symbol == T_NIL
1272
- shift_token
1273
- return nil
1274
- else
1275
- return string
1276
- end
1277
- end
1278
-
1279
- def astring
1280
- token = lookahead
1281
- if string_token?(token)
1282
- return string
1283
- else
1284
- return astring_chars
1285
- end
1286
- end
1287
-
1288
- def string
1289
- token = lookahead
1290
- if token.symbol == T_NIL
1291
- shift_token
1292
- return nil
1293
- end
1294
- token = match(T_QUOTED, T_LITERAL)
1295
- return token.value
1296
- end
1297
-
1298
- STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL]
1299
-
1300
- def string_token?(token)
1301
- return STRING_TOKENS.include?(token.symbol)
1302
- end
1303
-
1304
- def case_insensitive_string
1305
- token = lookahead
1306
- if token.symbol == T_NIL
1307
- shift_token
1308
- return nil
1309
- end
1310
- token = match(T_QUOTED, T_LITERAL)
1311
- return token.value.upcase
1312
- end
1313
-
1314
- # atom = 1*ATOM-CHAR
1315
- # ATOM-CHAR = <any CHAR except atom-specials>
1316
- ATOM_TOKENS = [
1317
- T_ATOM,
1318
- T_NUMBER,
1319
- T_NIL,
1320
- T_LBRA,
1321
- T_PLUS
1322
- ]
1323
-
1324
- def atom
1325
- -combine_adjacent(*ATOM_TOKENS)
1326
- end
1327
-
1328
- # ASTRING-CHAR = ATOM-CHAR / resp-specials
1329
- # resp-specials = "]"
1330
- ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA]
1331
-
1332
- def astring_chars
1333
- combine_adjacent(*ASTRING_CHARS_TOKENS)
1334
- end
1335
-
1336
- def combine_adjacent(*tokens)
1337
- result = "".b
1338
- while token = accept(*tokens)
1339
- result << token.value
1340
- end
1341
- if result.empty?
1342
- parse_error('unexpected token %s (expected %s)',
1343
- lookahead.symbol, args.join(" or "))
1344
- end
1345
- result
1698
+ match_re(Patterns::FLAG_LIST, "flag-list")[1]
1699
+ .split(nil)
1700
+ .map! { _1.start_with?("\\") ? _1[1..].capitalize.to_sym : _1 }
1701
+ end
1702
+
1703
+ # "(" [flag-perm *(SP flag-perm)] ")"
1704
+ def flag_perm__list
1705
+ match_re(Patterns::FLAG_PERM_LIST, "PERMANENTFLAGS flag-perm list")[1]
1706
+ .split(nil)
1707
+ .map! { _1.start_with?("\\") ? _1[1..].capitalize.to_sym : _1 }
1708
+ end
1709
+
1710
+ # Not checking for max one mbx-list-sflag in the parser.
1711
+ # >>>
1712
+ # mbx-list-flags = *(mbx-list-oflag SP) mbx-list-sflag
1713
+ # *(SP mbx-list-oflag) /
1714
+ # mbx-list-oflag *(SP mbx-list-oflag)
1715
+ # mbx-list-oflag = "\Noinferiors" / child-mbox-flag /
1716
+ # "\Subscribed" / "\Remote" / flag-extension
1717
+ # ; Other flags; multiple from this list are
1718
+ # ; possible per LIST response, but each flag
1719
+ # ; can only appear once per LIST response
1720
+ # mbx-list-sflag = "\NonExistent" / "\Noselect" / "\Marked" /
1721
+ # "\Unmarked"
1722
+ # ; Selectability flags; only one per LIST response
1723
+ def parens__mbx_list_flags
1724
+ match_re(Patterns::MBX_LIST_FLAGS, "mbx-list-flags")[1]
1725
+ .split(nil).map! { _1.capitalize.to_sym }
1346
1726
  end
1347
1727
 
1348
1728
  # See https://www.rfc-editor.org/errata/rfc3501
1349
1729
  #
1350
1730
  # charset = atom / quoted
1351
- def charset
1352
- if token = accept(T_QUOTED)
1353
- token.value
1354
- else
1355
- atom
1356
- end
1357
- end
1731
+ def charset; quoted? || atom end
1358
1732
 
1359
- def number
1360
- token = lookahead
1361
- if token.symbol == T_NIL
1362
- shift_token
1363
- return nil
1364
- end
1365
- token = match(T_NUMBER)
1366
- return token.value.to_i
1367
- end
1733
+ # RFC7162:
1734
+ # mod-sequence-value = 1*DIGIT
1735
+ # ;; Positive unsigned 63-bit integer
1736
+ # ;; (mod-sequence)
1737
+ # ;; (1 <= n <= 9,223,372,036,854,775,807).
1738
+ alias mod_sequence_value nz_number64
1739
+
1740
+ # RFC7162:
1741
+ # permsg-modsequence = mod-sequence-value
1742
+ # ;; Per-message mod-sequence.
1743
+ alias permsg_modsequence mod_sequence_value
1744
+
1745
+ def parens__modseq; lpar; _ = permsg_modsequence; rpar; _ end
1368
1746
 
1369
1747
  # RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
1370
1748
  # uid-set = (uniqueid / uid-range) *("," uid-set)
@@ -1393,64 +1771,15 @@ module Net
1393
1771
 
1394
1772
  SPACES_REGEXP = /\G */n
1395
1773
 
1396
- # This advances @pos directly so it's safe before changing @lex_state.
1397
- def accept_space
1398
- if @token
1399
- shift_token if @token.symbol == T_SPACE
1400
- elsif @str[@pos] == " "
1401
- @pos += 1
1402
- end
1403
- end
1404
-
1405
1774
  # The RFC is very strict about this and usually we should be too.
1406
1775
  # But skipping spaces is usually a safe workaround for buggy servers.
1407
1776
  #
1408
1777
  # This advances @pos directly so it's safe before changing @lex_state.
1409
1778
  def accept_spaces
1410
- shift_token if @token&.symbol == T_SPACE
1411
- if @str.index(SPACES_REGEXP, @pos)
1779
+ return false unless SP?
1780
+ @str.index(SPACES_REGEXP, @pos) and
1412
1781
  @pos = $~.end(0)
1413
- end
1414
- end
1415
-
1416
- def match(*args, lex_state: @lex_state)
1417
- if @token && lex_state != @lex_state
1418
- parse_error("invalid lex_state change to %s with unconsumed token",
1419
- lex_state)
1420
- end
1421
- begin
1422
- @lex_state, original_lex_state = lex_state, @lex_state
1423
- token = lookahead
1424
- unless args.include?(token.symbol)
1425
- parse_error('unexpected token %s (expected %s)',
1426
- token.symbol.id2name,
1427
- args.collect {|i| i.id2name}.join(" or "))
1428
- end
1429
- shift_token
1430
- return token
1431
- ensure
1432
- @lex_state = original_lex_state
1433
- end
1434
- end
1435
-
1436
- # like match, but does not raise error on failure.
1437
- #
1438
- # returns and shifts token on successful match
1439
- # returns nil and leaves @token unshifted on no match
1440
- def accept(*args)
1441
- token = lookahead
1442
- if args.include?(token.symbol)
1443
- shift_token
1444
- token
1445
- end
1446
- end
1447
-
1448
- def lookahead
1449
- @token ||= next_token
1450
- end
1451
-
1452
- def shift_token
1453
- @token = nil
1782
+ true
1454
1783
  end
1455
1784
 
1456
1785
  def next_token
@@ -1460,39 +1789,42 @@ module Net
1460
1789
  @pos = $~.end(0)
1461
1790
  if $1
1462
1791
  return Token.new(T_SPACE, $+)
1463
- elsif $2
1464
- return Token.new(T_NIL, $+)
1792
+ elsif $2 && $6
1793
+ # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
1794
+ return Token.new(T_ATOM, $2)
1465
1795
  elsif $3
1466
- return Token.new(T_NUMBER, $+)
1796
+ return Token.new(T_NIL, $+)
1467
1797
  elsif $4
1468
- return Token.new(T_ATOM, $+)
1798
+ return Token.new(T_NUMBER, $+)
1469
1799
  elsif $5
1470
- return Token.new(T_QUOTED,
1471
- $+.gsub(/\\(["\\])/n, "\\1"))
1472
- elsif $6
1473
- return Token.new(T_LPAR, $+)
1800
+ return Token.new(T_PLUS, $+)
1474
1801
  elsif $7
1475
- return Token.new(T_RPAR, $+)
1802
+ # match ATOM, without a NUMBER, NIL, or PLUS prefix
1803
+ return Token.new(T_ATOM, $+)
1476
1804
  elsif $8
1477
- return Token.new(T_BSLASH, $+)
1805
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1478
1806
  elsif $9
1479
- return Token.new(T_STAR, $+)
1807
+ return Token.new(T_LPAR, $+)
1480
1808
  elsif $10
1481
- return Token.new(T_LBRA, $+)
1809
+ return Token.new(T_RPAR, $+)
1482
1810
  elsif $11
1483
- return Token.new(T_RBRA, $+)
1811
+ return Token.new(T_BSLASH, $+)
1484
1812
  elsif $12
1813
+ return Token.new(T_STAR, $+)
1814
+ elsif $13
1815
+ return Token.new(T_LBRA, $+)
1816
+ elsif $14
1817
+ return Token.new(T_RBRA, $+)
1818
+ elsif $15
1485
1819
  len = $+.to_i
1486
1820
  val = @str[@pos, len]
1487
1821
  @pos += len
1488
1822
  return Token.new(T_LITERAL, val)
1489
- elsif $13
1490
- return Token.new(T_PLUS, $+)
1491
- elsif $14
1823
+ elsif $16
1492
1824
  return Token.new(T_PERCENT, $+)
1493
- elsif $15
1825
+ elsif $17
1494
1826
  return Token.new(T_CRLF, $+)
1495
- elsif $16
1827
+ elsif $18
1496
1828
  return Token.new(T_EOF, $+)
1497
1829
  else
1498
1830
  parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")
@@ -1511,8 +1843,7 @@ module Net
1511
1843
  elsif $3
1512
1844
  return Token.new(T_NUMBER, $+)
1513
1845
  elsif $4
1514
- return Token.new(T_QUOTED,
1515
- $+.gsub(/\\(["\\])/n, "\\1"))
1846
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1516
1847
  elsif $5
1517
1848
  len = $+.to_i
1518
1849
  val = @str[@pos, len]
@@ -1529,63 +1860,11 @@ module Net
1529
1860
  @str.index(/\S*/n, @pos)
1530
1861
  parse_error("unknown token - %s", $&.dump)
1531
1862
  end
1532
- when EXPR_TEXT
1533
- if @str.index(TEXT_REGEXP, @pos)
1534
- @pos = $~.end(0)
1535
- if $1
1536
- return Token.new(T_TEXT, $+)
1537
- else
1538
- parse_error("[Net::IMAP BUG] TEXT_REGEXP is invalid")
1539
- end
1540
- else
1541
- @str.index(/\S*/n, @pos)
1542
- parse_error("unknown token - %s", $&.dump)
1543
- end
1544
- when EXPR_RTEXT
1545
- if @str.index(RTEXT_REGEXP, @pos)
1546
- @pos = $~.end(0)
1547
- if $1
1548
- return Token.new(T_LBRA, $+)
1549
- elsif $2
1550
- return Token.new(T_TEXT, $+)
1551
- else
1552
- parse_error("[Net::IMAP BUG] RTEXT_REGEXP is invalid")
1553
- end
1554
- else
1555
- @str.index(/\S*/n, @pos)
1556
- parse_error("unknown token - %s", $&.dump)
1557
- end
1558
- when EXPR_CTEXT
1559
- if @str.index(CTEXT_REGEXP, @pos)
1560
- @pos = $~.end(0)
1561
- if $1
1562
- return Token.new(T_TEXT, $+)
1563
- else
1564
- parse_error("[Net::IMAP BUG] CTEXT_REGEXP is invalid")
1565
- end
1566
- else
1567
- @str.index(/\S*/n, @pos) #/
1568
- parse_error("unknown token - %s", $&.dump)
1569
- end
1570
1863
  else
1571
1864
  parse_error("invalid @lex_state - %s", @lex_state.inspect)
1572
1865
  end
1573
1866
  end
1574
1867
 
1575
- def parse_error(fmt, *args)
1576
- if IMAP.debug
1577
- $stderr.printf("@str: %s\n", @str.dump)
1578
- $stderr.printf("@pos: %d\n", @pos)
1579
- $stderr.printf("@lex_state: %s\n", @lex_state)
1580
- if @token
1581
- $stderr.printf("@token.symbol: %s\n", @token.symbol)
1582
- $stderr.printf("@token.value: %s\n", @token.value.inspect)
1583
- end
1584
- end
1585
- raise ResponseParseError, format(fmt, *args)
1586
- end
1587
1868
  end
1588
-
1589
1869
  end
1590
-
1591
1870
  end