net-imap 0.3.9 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of net-imap might be problematic. Click here for more details.

Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/pages.yml +46 -0
  3. data/.github/workflows/test.yml +5 -12
  4. data/Gemfile +1 -0
  5. data/README.md +15 -4
  6. data/Rakefile +0 -7
  7. data/benchmarks/generate_parser_benchmarks +52 -0
  8. data/benchmarks/parser.yml +578 -0
  9. data/benchmarks/stringprep.yml +1 -1
  10. data/lib/net/imap/authenticators.rb +26 -57
  11. data/lib/net/imap/command_data.rb +13 -6
  12. data/lib/net/imap/deprecated_client_options.rb +139 -0
  13. data/lib/net/imap/errors.rb +0 -34
  14. data/lib/net/imap/response_data.rb +46 -41
  15. data/lib/net/imap/response_parser/parser_utils.rb +230 -0
  16. data/lib/net/imap/response_parser.rb +667 -649
  17. data/lib/net/imap/sasl/anonymous_authenticator.rb +68 -0
  18. data/lib/net/imap/sasl/authenticators.rb +112 -0
  19. data/lib/net/imap/{authenticators/cram_md5.rb → sasl/cram_md5_authenticator.rb} +15 -9
  20. data/lib/net/imap/{authenticators/digest_md5.rb → sasl/digest_md5_authenticator.rb} +74 -21
  21. data/lib/net/imap/sasl/external_authenticator.rb +62 -0
  22. data/lib/net/imap/sasl/gs2_header.rb +80 -0
  23. data/lib/net/imap/{authenticators/login.rb → sasl/login_authenticator.rb} +19 -14
  24. data/lib/net/imap/sasl/oauthbearer_authenticator.rb +164 -0
  25. data/lib/net/imap/sasl/plain_authenticator.rb +93 -0
  26. data/lib/net/imap/sasl/scram_algorithm.rb +58 -0
  27. data/lib/net/imap/sasl/scram_authenticator.rb +278 -0
  28. data/lib/net/imap/sasl/stringprep.rb +6 -66
  29. data/lib/net/imap/sasl/xoauth2_authenticator.rb +88 -0
  30. data/lib/net/imap/sasl.rb +139 -44
  31. data/lib/net/imap/stringprep/nameprep.rb +70 -0
  32. data/lib/net/imap/stringprep/saslprep.rb +69 -0
  33. data/lib/net/imap/stringprep/saslprep_tables.rb +96 -0
  34. data/lib/net/imap/stringprep/tables.rb +146 -0
  35. data/lib/net/imap/stringprep/trace.rb +85 -0
  36. data/lib/net/imap/stringprep.rb +159 -0
  37. data/lib/net/imap.rb +987 -690
  38. data/net-imap.gemspec +1 -1
  39. data/rakelib/saslprep.rake +4 -4
  40. data/rakelib/string_prep_tables_generator.rb +82 -60
  41. metadata +30 -13
  42. data/lib/net/imap/authenticators/plain.rb +0 -41
  43. data/lib/net/imap/authenticators/xoauth2.rb +0 -20
  44. data/lib/net/imap/response_reader.rb +0 -75
  45. data/lib/net/imap/sasl/saslprep.rb +0 -55
  46. data/lib/net/imap/sasl/saslprep_tables.rb +0 -98
  47. data/lib/net/imap/sasl/stringprep_tables.rb +0 -153
@@ -1,13 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "errors"
4
+ require_relative "response_parser/parser_utils"
4
5
 
5
6
  module Net
6
7
  class IMAP < Protocol
7
8
 
8
9
  # Parses an \IMAP server response.
9
10
  class ResponseParser
10
- MAX_UID_SET_SIZE = 10_000
11
+ include ParserUtils
12
+ extend ParserUtils::Generator
11
13
 
12
14
  # :call-seq: Net::IMAP::ResponseParser.new -> Net::IMAP::ResponseParser
13
15
  def initialize
@@ -35,69 +37,347 @@ module Net
35
37
 
36
38
  # :stopdoc:
37
39
 
38
- EXPR_BEG = :EXPR_BEG
39
- EXPR_DATA = :EXPR_DATA
40
- EXPR_TEXT = :EXPR_TEXT
41
- EXPR_RTEXT = :EXPR_RTEXT
42
- EXPR_CTEXT = :EXPR_CTEXT
43
-
44
- T_SPACE = :SPACE
45
- T_NIL = :NIL
46
- T_NUMBER = :NUMBER
47
- T_ATOM = :ATOM
48
- T_QUOTED = :QUOTED
49
- T_LPAR = :LPAR
50
- T_RPAR = :RPAR
51
- T_BSLASH = :BSLASH
52
- T_STAR = :STAR
53
- T_LBRA = :LBRA
54
- T_RBRA = :RBRA
55
- T_LITERAL = :LITERAL
56
- T_PLUS = :PLUS
57
- T_PERCENT = :PERCENT
58
- T_CRLF = :CRLF
59
- T_EOF = :EOF
60
- T_TEXT = :TEXT
61
-
40
+ EXPR_BEG = :EXPR_BEG # the default, used in most places
41
+ EXPR_DATA = :EXPR_DATA # envelope, body(structure), namespaces
42
+
43
+ T_SPACE = :SPACE # atom special
44
+ T_ATOM = :ATOM # atom (subset of astring chars)
45
+ T_NIL = :NIL # subset of atom and label
46
+ T_NUMBER = :NUMBER # subset of atom
47
+ T_LBRA = :LBRA # subset of atom
48
+ T_PLUS = :PLUS # subset of atom; tag special
49
+ T_RBRA = :RBRA # atom special; resp_special; valid astring char
50
+ T_QUOTED = :QUOTED # starts/end with atom special
51
+ T_BSLASH = :BSLASH # atom special; quoted special
52
+ T_LPAR = :LPAR # atom special; paren list delimiter
53
+ T_RPAR = :RPAR # atom special; paren list delimiter
54
+ T_STAR = :STAR # atom special; list wildcard
55
+ T_PERCENT = :PERCENT # atom special; list wildcard
56
+ T_LITERAL = :LITERAL # starts with atom special
57
+ T_CRLF = :CRLF # atom special; text special; quoted special
58
+ T_TEXT = :TEXT # any char except CRLF
59
+ T_EOF = :EOF # end of response string
60
+
61
+ module Patterns
62
+
63
+ module CharClassSubtraction
64
+ refine Regexp do
65
+ def -(rhs); /[#{source}&&[^#{rhs.source}]]/n.freeze end
66
+ end
67
+ end
68
+ using CharClassSubtraction
69
+
70
+ # From RFC5234, "Augmented BNF for Syntax Specifications: ABNF"
71
+ # >>>
72
+ # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
73
+ # CHAR = %x01-7F
74
+ # CRLF = CR LF
75
+ # ; Internet standard newline
76
+ # CTL = %x00-1F / %x7F
77
+ # ; controls
78
+ # DIGIT = %x30-39
79
+ # ; 0-9
80
+ # DQUOTE = %x22
81
+ # ; " (Double Quote)
82
+ # HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
83
+ # OCTET = %x00-FF
84
+ # SP = %x20
85
+ module RFC5234
86
+ ALPHA = /[A-Za-z]/n
87
+ CHAR = /[\x01-\x7f]/n
88
+ CRLF = /\r\n/n
89
+ CTL = /[\x00-\x1F\x7F]/n
90
+ DIGIT = /\d/n
91
+ DQUOTE = /"/n
92
+ HEXDIG = /\h/
93
+ OCTET = /[\x00-\xFF]/n # not using /./m for embedding purposes
94
+ SP = / /n
95
+ end
96
+
97
+ # UTF-8, a transformation format of ISO 10646
98
+ # >>>
99
+ # UTF8-1 = %x00-7F
100
+ # UTF8-tail = %x80-BF
101
+ # UTF8-2 = %xC2-DF UTF8-tail
102
+ # UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
103
+ # %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
104
+ # UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
105
+ # %xF4 %x80-8F 2( UTF8-tail )
106
+ # UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
107
+ # UTF8-octets = *( UTF8-char )
108
+ #
109
+ # n.b. String * Integer is used for repetition, rather than /x{3}/,
110
+ # because ruby 3.2's linear-time cache-based optimization doesn't work
111
+ # with "bounded or fixed times repetition nesting in another repetition
112
+ # (e.g. /(a{2,3})*/). It is an implementation issue entirely, but we
113
+ # believe it is hard to support this case correctly."
114
+ # See https://bugs.ruby-lang.org/issues/19104
115
+ module RFC3629
116
+ UTF8_1 = /[\x00-\x7f]/n # aka ASCII 7bit
117
+ UTF8_TAIL = /[\x80-\xBF]/n
118
+ UTF8_2 = /[\xC2-\xDF]#{UTF8_TAIL}/n
119
+ UTF8_3 = Regexp.union(/\xE0[\xA0-\xBF]#{UTF8_TAIL}/n,
120
+ /\xED[\x80-\x9F]#{UTF8_TAIL}/n,
121
+ /[\xE1-\xEC]#{ UTF8_TAIL.source * 2}/n,
122
+ /[\xEE-\xEF]#{ UTF8_TAIL.source * 2}/n)
123
+ UTF8_4 = Regexp.union(/[\xF1-\xF3]#{ UTF8_TAIL.source * 3}/n,
124
+ /\xF0[\x90-\xBF]#{UTF8_TAIL.source * 2}/n,
125
+ /\xF4[\x80-\x8F]#{UTF8_TAIL.source * 2}/n)
126
+ UTF8_CHAR = Regexp.union(UTF8_1, UTF8_2, UTF8_3, UTF8_4)
127
+ UTF8_OCTETS = /#{UTF8_CHAR}*/n
128
+ end
129
+
130
+ include RFC5234
131
+ include RFC3629
132
+
133
+ # CHAR8 = %x01-ff
134
+ # ; any OCTET except NUL, %x00
135
+ CHAR8 = /[\x01-\xff]/n
136
+
137
+ # list-wildcards = "%" / "*"
138
+ LIST_WILDCARDS = /[%*]/n
139
+ # quoted-specials = DQUOTE / "\"
140
+ QUOTED_SPECIALS = /["\\]/n
141
+ # resp-specials = "]"
142
+ RESP_SPECIALS = /[\]]/n
143
+
144
+ # atomish = 1*<any ATOM-CHAR except "[">
145
+ # ; We use "atomish" for msg-att and section, in order
146
+ # ; to simplify "BODY[HEADER.FIELDS (foo bar)]".
147
+ #
148
+ # atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
149
+ # quoted-specials / resp-specials
150
+ # ATOM-CHAR = <any CHAR except atom-specials>
151
+ # atom = 1*ATOM-CHAR
152
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
153
+ # tag = 1*<any ASTRING-CHAR except "+">
154
+
155
+ ATOM_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\\]]/n
156
+ ASTRING_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\]/n
157
+
158
+ ASTRING_CHAR = CHAR - ASTRING_SPECIALS
159
+ ATOM_CHAR = CHAR - ATOM_SPECIALS
160
+
161
+ ATOM = /#{ATOM_CHAR}+/n
162
+ ASTRING_CHARS = /#{ASTRING_CHAR}+/n
163
+ ATOMISH = /#{ATOM_CHAR - /[\[]/ }+/
164
+ TAG = /#{ASTRING_CHAR - /[+]/ }+/
165
+
166
+ # TEXT-CHAR = <any CHAR except CR and LF>
167
+ TEXT_CHAR = CHAR - /[\r\n]/
168
+
169
+ # resp-text-code = ... / atom [SP 1*<any TEXT-CHAR except "]">]
170
+ CODE_TEXT_CHAR = TEXT_CHAR - RESP_SPECIALS
171
+ CODE_TEXT = /#{CODE_TEXT_CHAR}+/n
172
+
173
+ # RFC3501:
174
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
175
+ # "\" quoted-specials
176
+ # RFC9051:
177
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
178
+ # "\" quoted-specials / UTF8-2 / UTF8-3 / UTF8-4
179
+ # RFC3501 & RFC9051:
180
+ # quoted = DQUOTE *QUOTED-CHAR DQUOTE
181
+ QUOTED_CHAR_safe = TEXT_CHAR - QUOTED_SPECIALS
182
+ QUOTED_CHAR_esc = /\\#{QUOTED_SPECIALS}/n
183
+ QUOTED_CHAR_rev1 = Regexp.union(QUOTED_CHAR_safe, QUOTED_CHAR_esc)
184
+ QUOTED_CHAR_rev2 = Regexp.union(QUOTED_CHAR_rev1,
185
+ UTF8_2, UTF8_3, UTF8_4)
186
+ QUOTED_rev1 = /"(#{QUOTED_CHAR_rev1}*)"/n
187
+ QUOTED_rev2 = /"(#{QUOTED_CHAR_rev2}*)"/n
188
+
189
+ # RFC3501:
190
+ # text = 1*TEXT-CHAR
191
+ # RFC9051:
192
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
193
+ # ; Non-ASCII text can only be returned
194
+ # ; after ENABLE IMAP4rev2 command
195
+ TEXT_rev1 = /#{TEXT_CHAR}+/
196
+ TEXT_rev2 = /#{Regexp.union TEXT_CHAR, UTF8_2, UTF8_3, UTF8_4}+/
197
+
198
+ # RFC3501:
199
+ # literal = "{" number "}" CRLF *CHAR8
200
+ # ; Number represents the number of CHAR8s
201
+ # RFC9051:
202
+ # literal = "{" number64 ["+"] "}" CRLF *CHAR8
203
+ # ; <number64> represents the number of CHAR8s.
204
+ # ; A non-synchronizing literal is distinguished
205
+ # ; from a synchronizing literal by the presence of
206
+ # ; "+" before the closing "}".
207
+ # ; Non-synchronizing literals are not allowed when
208
+ # ; sent from server to the client.
209
+ LITERAL = /\{(\d+)\}\r\n/n
210
+
211
+ module_function
212
+
213
+ def unescape_quoted!(quoted)
214
+ quoted
215
+ &.gsub!(/\\(#{QUOTED_SPECIALS})/n, "\\1")
216
+ &.force_encoding("UTF-8")
217
+ end
218
+
219
+ def unescape_quoted(quoted)
220
+ quoted
221
+ &.gsub(/\\(#{QUOTED_SPECIALS})/n, "\\1")
222
+ &.force_encoding("UTF-8")
223
+ end
224
+
225
+ end
226
+
227
+ # the default, used in most places
62
228
  BEG_REGEXP = /\G(?:\
63
- (?# 1: SPACE )( +)|\
64
- (?# 2: NIL )(NIL)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
65
- (?# 3: NUMBER )(\d+)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
66
- (?# 4: ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+]+)|\
67
- (?# 5: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
68
- (?# 6: LPAR )(\()|\
69
- (?# 7: RPAR )(\))|\
70
- (?# 8: BSLASH )(\\)|\
71
- (?# 9: STAR )(\*)|\
72
- (?# 10: LBRA )(\[)|\
73
- (?# 11: RBRA )(\])|\
74
- (?# 12: LITERAL )\{(\d+)\}\r\n|\
75
- (?# 13: PLUS )(\+)|\
76
- (?# 14: PERCENT )(%)|\
77
- (?# 15: CRLF )(\r\n)|\
78
- (?# 16: EOF )(\z))/ni
79
-
229
+ (?# 1: SPACE )( )|\
230
+ (?# 2: ATOM prefixed with a compatible subtype)\
231
+ ((?:\
232
+ (?# 3: NIL )(NIL)|\
233
+ (?# 4: NUMBER )(\d+)|\
234
+ (?# 5: PLUS )(\+))\
235
+ (?# 6: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
236
+ (?# This enables greedy alternation without lookahead, in linear time.)\
237
+ )|\
238
+ (?# Also need to check for ATOM without a subtype prefix.)\
239
+ (?# 7: ATOM )(#{Patterns::ATOMISH})|\
240
+ (?# 8: QUOTED )#{Patterns::QUOTED_rev2}|\
241
+ (?# 9: LPAR )(\()|\
242
+ (?# 10: RPAR )(\))|\
243
+ (?# 11: BSLASH )(\\)|\
244
+ (?# 12: STAR )(\*)|\
245
+ (?# 13: LBRA )(\[)|\
246
+ (?# 14: RBRA )(\])|\
247
+ (?# 15: LITERAL )#{Patterns::LITERAL}|\
248
+ (?# 16: PERCENT )(%)|\
249
+ (?# 17: CRLF )(\r\n)|\
250
+ (?# 18: EOF )(\z))/ni
251
+
252
+ # envelope, body(structure), namespaces
80
253
  DATA_REGEXP = /\G(?:\
81
254
  (?# 1: SPACE )( )|\
82
255
  (?# 2: NIL )(NIL)|\
83
256
  (?# 3: NUMBER )(\d+)|\
84
- (?# 4: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
85
- (?# 5: LITERAL )\{(\d+)\}\r\n|\
257
+ (?# 4: QUOTED )#{Patterns::QUOTED_rev2}|\
258
+ (?# 5: LITERAL )#{Patterns::LITERAL}|\
86
259
  (?# 6: LPAR )(\()|\
87
260
  (?# 7: RPAR )(\)))/ni
88
261
 
89
- TEXT_REGEXP = /\G(?:\
90
- (?# 1: TEXT )([^\x00\r\n]*))/ni
91
-
92
- RTEXT_REGEXP = /\G(?:\
93
- (?# 1: LBRA )(\[)|\
94
- (?# 2: TEXT )([^\x00\r\n]*))/ni
262
+ # text, after 'resp-text-code "]"'
263
+ TEXT_REGEXP = /\G(#{Patterns::TEXT_rev2})/n
95
264
 
96
- CTEXT_REGEXP = /\G(?:\
97
- (?# 1: TEXT )([^\x00\r\n\]]*))/ni
265
+ # resp-text-code, after 'atom SP'
266
+ CTEXT_REGEXP = /\G(#{Patterns::CODE_TEXT})/n
98
267
 
99
268
  Token = Struct.new(:symbol, :value)
100
269
 
270
+ def_char_matchers :SP, " ", :T_SPACE
271
+
272
+ def_char_matchers :lpar, "(", :T_LPAR
273
+ def_char_matchers :rpar, ")", :T_RPAR
274
+
275
+ def_char_matchers :lbra, "[", :T_LBRA
276
+ def_char_matchers :rbra, "]", :T_RBRA
277
+
278
+ # valid number ranges are not enforced by parser
279
+ # number = 1*DIGIT
280
+ # ; Unsigned 32-bit integer
281
+ # ; (0 <= n < 4,294,967,296)
282
+ def_token_matchers :number, T_NUMBER, coerce: Integer
283
+
284
+ def_token_matchers :quoted, T_QUOTED
285
+
286
+ # string = quoted / literal
287
+ def_token_matchers :string, T_QUOTED, T_LITERAL
288
+
289
+ # use where string represents "LABEL" values
290
+ def_token_matchers :case_insensitive__string,
291
+ T_QUOTED, T_LITERAL,
292
+ send: :upcase
293
+
294
+ # n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
295
+ # NIL? returns nil when it does *not* match
296
+ def_token_matchers :NIL, T_NIL
297
+
298
+ # In addition to explicitly uses of +tagged-ext-label+, use this to match
299
+ # keywords when the grammar has not provided any extension syntax.
300
+ #
301
+ # Do *not* use this for labels where the grammar specifies extensions
302
+ # can be +atom+, even if all currently defined labels would match. For
303
+ # example response codes in +resp-text-code+.
304
+ #
305
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
306
+ # ; Is a valid RFC 3501 "atom".
307
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
308
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
309
+ #
310
+ # TODO: add to lexer and only match tagged-ext-label
311
+ def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase
312
+
313
+ # atom = 1*ATOM-CHAR
314
+ # ATOM-CHAR = <any CHAR except atom-specials>
315
+ ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
316
+
317
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
318
+ # resp-specials = "]"
319
+ ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
320
+
321
+ ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze
322
+
323
+ # atom = 1*ATOM-CHAR
324
+ #
325
+ # TODO: match atom entirely by regexp (in the "lexer")
326
+ def atom; -combine_adjacent(*ATOM_TOKENS) end
327
+
328
+ # the #accept version of #atom
329
+ def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
330
+
331
+ # Returns <tt>atom.upcase</tt>
332
+ def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
333
+
334
+ # Returns <tt>atom?&.upcase</tt>
335
+ def case_insensitive__atom?
336
+ -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
337
+ end
338
+
339
+ # TODO: handle astring_chars entirely inside the lexer
340
+ def astring_chars
341
+ combine_adjacent(*ASTRING_CHARS_TOKENS)
342
+ end
343
+
344
+ # astring = 1*ASTRING-CHAR / string
345
+ def astring
346
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
347
+ end
348
+
349
+ def astring?
350
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
351
+ end
352
+
353
+ # Use #label or #label_in to assert specific known labels
354
+ # (+tagged-ext-label+ only, not +atom+).
355
+ def label(word)
356
+ (val = tagged_ext_label) == word and return val
357
+ parse_error("unexpected atom %p, expected %p instead", val, word)
358
+ end
359
+
360
+ # nstring = string / nil
361
+ def nstring
362
+ NIL? ? nil : string
363
+ end
364
+
365
+ def nquoted
366
+ NIL? ? nil : quoted
367
+ end
368
+
369
+ # use where nstring represents "LABEL" values
370
+ def case_insensitive__nstring
371
+ NIL? ? nil : case_insensitive__string
372
+ end
373
+
374
+ # valid number ranges are not enforced by parser
375
+ # number64 = 1*DIGIT
376
+ # ; Unsigned 63-bit integer
377
+ # ; (0 <= n <= 9,223,372,036,854,775,807)
378
+ alias number64 number
379
+ alias number64? number?
380
+
101
381
  def response
102
382
  token = lookahead
103
383
  case token.symbol
@@ -159,9 +439,11 @@ module Net
159
439
  when /\A(?:STATUS)\z/ni
160
440
  return status_response
161
441
  when /\A(?:CAPABILITY)\z/ni
162
- return capability_response
442
+ return capability_data__untagged
163
443
  when /\A(?:NOOP)\z/ni
164
444
  return ignored_response
445
+ when /\A(?:ENABLED)\z/ni
446
+ return enable_data
165
447
  else
166
448
  return text_response
167
449
  end
@@ -335,331 +617,258 @@ module Net
335
617
  return name, data
336
618
  end
337
619
 
620
+ # RFC-3501 & RFC-9051:
621
+ # body = "(" (body-type-1part / body-type-mpart) ")"
338
622
  def body
339
623
  @lex_state = EXPR_DATA
340
- token = lookahead
341
- if token.symbol == T_NIL
342
- shift_token
343
- result = nil
344
- else
345
- match(T_LPAR)
346
- token = lookahead
347
- if token.symbol == T_LPAR
348
- result = body_type_mpart
349
- else
350
- result = body_type_1part
351
- end
352
- match(T_RPAR)
353
- end
624
+ lpar; result = peek_lpar? ? body_type_mpart : body_type_1part; rpar
625
+ result
626
+ ensure
354
627
  @lex_state = EXPR_BEG
355
- return result
356
628
  end
629
+ alias lookahead_body? lookahead_lpar?
357
630
 
631
+ # RFC-3501 & RFC9051:
632
+ # body-type-1part = (body-type-basic / body-type-msg / body-type-text)
633
+ # [SP body-ext-1part]
358
634
  def body_type_1part
359
- token = lookahead
360
- case token.value
361
- when /\A(?:TEXT)\z/ni
362
- return body_type_text
363
- when /\A(?:MESSAGE)\z/ni
364
- return body_type_msg
365
- when /\A(?:ATTACHMENT)\z/ni
366
- return body_type_attachment
367
- when /\A(?:MIXED)\z/ni
368
- return body_type_mixed
369
- else
370
- return body_type_basic
371
- end
372
- end
373
-
635
+ # This regexp peek is a performance optimization.
636
+ # The lookahead fallback would work fine too.
637
+ m = peek_re(/\G(?:
638
+ (?<TEXT> "TEXT" \s "[^"]+" )
639
+ |(?<MESSAGE> "MESSAGE" \s "(?:RFC822|GLOBAL)" )
640
+ |(?<BASIC> "[^"]+" \s "[^"]+" )
641
+ |(?<MIXED> "MIXED" )
642
+ )/nix)
643
+ choice = m&.named_captures&.compact&.keys&.first
644
+ # In practice, the following line should never be used. But the ABNF
645
+ # *does* allow literals, and this will handle them.
646
+ choice ||= lookahead_case_insensitive__string!
647
+ case choice
648
+ when "BASIC" then body_type_basic # => BodyTypeBasic
649
+ when "MESSAGE" then body_type_msg # => BodyTypeMessage | BodyTypeBasic
650
+ when "TEXT" then body_type_text # => BodyTypeText
651
+ when "MIXED" then body_type_mixed # => BodyTypeMultipart (server bug)
652
+ else body_type_basic # might be a bug; server's or ours?
653
+ end
654
+ end
655
+
656
+ # RFC-3501 & RFC9051:
657
+ # body-type-basic = media-basic SP body-fields
374
658
  def body_type_basic
375
- mtype, msubtype = media_type
376
- token = lookahead
377
- if token.symbol == T_RPAR
378
- return BodyTypeBasic.new(mtype, msubtype)
379
- end
380
- match(T_SPACE)
381
- param, content_id, desc, enc, size = body_fields
382
- md5, disposition, language, extension = body_ext_1part
383
- return BodyTypeBasic.new(mtype, msubtype,
384
- param, content_id,
385
- desc, enc, size,
386
- md5, disposition, language, extension)
659
+ type = media_basic # n.b. "basic" type isn't enforced here
660
+ if lookahead_rpar? then return BodyTypeBasic.new(*type) end # invalid
661
+ SP!; flds = body_fields
662
+ SP? and exts = body_ext_1part
663
+ BodyTypeBasic.new(*type, *flds, *exts)
387
664
  end
388
665
 
666
+ # RFC-3501 & RFC-9051:
667
+ # body-type-text = media-text SP body-fields SP body-fld-lines
389
668
  def body_type_text
390
- mtype, msubtype = media_type
391
- match(T_SPACE)
392
- param, content_id, desc, enc, size = body_fields
393
- match(T_SPACE)
394
- lines = number
395
- md5, disposition, language, extension = body_ext_1part
396
- return BodyTypeText.new(mtype, msubtype,
397
- param, content_id,
398
- desc, enc, size,
399
- lines,
400
- md5, disposition, language, extension)
669
+ type = media_text
670
+ SP!; flds = body_fields
671
+ SP!; lines = body_fld_lines
672
+ SP? and exts = body_ext_1part
673
+ BodyTypeText.new(*type, *flds, lines, *exts)
401
674
  end
402
675
 
676
+ # RFC-3501 & RFC-9051:
677
+ # body-type-msg = media-message SP body-fields SP envelope
678
+ # SP body SP body-fld-lines
403
679
  def body_type_msg
404
- mtype, msubtype = media_type
405
- match(T_SPACE)
406
- param, content_id, desc, enc, size = body_fields
407
-
408
- token = lookahead
409
- if token.symbol == T_RPAR
410
- # If this is not message/rfc822, we shouldn't apply the RFC822
411
- # spec to it. We should handle anything other than
412
- # message/rfc822 using multipart extension data [rfc3501] (i.e.
413
- # the data itself won't be returned, we would have to retrieve it
414
- # with BODYSTRUCTURE instead of with BODY
415
-
416
- # Also, sometimes a message/rfc822 is included as a large
417
- # attachment instead of having all of the other details
418
- # (e.g. attaching a .eml file to an email)
419
- if msubtype == "RFC822"
420
- return BodyTypeMessage.new(mtype, msubtype, param, content_id,
421
- desc, enc, size, nil, nil, nil, nil,
422
- nil, nil, nil)
423
- else
424
- return BodyTypeExtension.new(mtype, msubtype,
425
- param, content_id,
426
- desc, enc, size)
427
- end
428
- end
429
-
430
- match(T_SPACE)
431
- env = envelope
432
- match(T_SPACE)
433
- b = body
434
- match(T_SPACE)
435
- lines = number
436
- md5, disposition, language, extension = body_ext_1part
437
- return BodyTypeMessage.new(mtype, msubtype,
438
- param, content_id,
439
- desc, enc, size,
440
- env, b, lines,
441
- md5, disposition, language, extension)
442
- end
443
-
444
- def body_type_attachment
445
- mtype = case_insensitive_string
446
- match(T_SPACE)
447
- param = body_fld_param
448
- return BodyTypeAttachment.new(mtype, nil, param)
449
- end
450
-
680
+ # n.b. "message/rfc822" type isn't enforced here
681
+ type = media_message
682
+ SP!; flds = body_fields
683
+
684
+ # Sometimes servers send body-type-basic when body-type-msg should be.
685
+ # E.g: when a message/rfc822 part has "Content-Disposition: attachment".
686
+ #
687
+ # * SP "(" --> SP envelope --> continue as body-type-msg
688
+ # * ")" --> no body-ext-1part --> completed body-type-basic
689
+ # * SP nstring --> SP body-fld-md5
690
+ # --> SP body-ext-1part --> continue as body-type-basic
691
+ #
692
+ # It's probably better to return BodyTypeBasic---even for
693
+ # "message/rfc822"---than BodyTypeMessage with invalid fields.
694
+ unless peek_str?(" (")
695
+ SP? and exts = body_ext_1part
696
+ return BodyTypeBasic.new(*type, *flds, *exts)
697
+ end
698
+
699
+ SP!; env = envelope
700
+ SP!; bdy = body
701
+ SP!; lines = body_fld_lines
702
+ SP? and exts = body_ext_1part
703
+ BodyTypeMessage.new(*type, *flds, env, bdy, lines, *exts)
704
+ end
705
+
706
+ # This is a malformed body-type-mpart with no subparts.
451
707
  def body_type_mixed
452
- mtype = "MULTIPART"
453
- msubtype = case_insensitive_string
454
- param, disposition, language, extension = body_ext_mpart
455
- return BodyTypeBasic.new(mtype, msubtype, param, nil, nil, nil, nil, nil, disposition, language, extension)
708
+ # warn "malformed body-type-mpart: multipart/mixed with no parts."
709
+ type = media_subtype # => "MIXED"
710
+ SP? and exts = body_ext_mpart
711
+ BodyTypeMultipart.new("MULTIPART", type, nil, *exts)
456
712
  end
457
713
 
714
+ # RFC-3501 & RFC-9051:
715
+ # body-type-mpart = 1*body SP media-subtype
716
+ # [SP body-ext-mpart]
458
717
  def body_type_mpart
459
- parts = []
460
- while true
461
- token = lookahead
462
- if token.symbol == T_SPACE
463
- shift_token
464
- break
465
- end
466
- parts.push(body)
467
- end
468
- mtype = "MULTIPART"
469
- msubtype = case_insensitive_string
470
- param, disposition, language, extension = body_ext_mpart
471
- return BodyTypeMultipart.new(mtype, msubtype, parts,
472
- param, disposition, language,
473
- extension)
718
+ parts = [body]; parts << body until SP?; msubtype = media_subtype
719
+ SP? and exts = body_ext_mpart
720
+ BodyTypeMultipart.new("MULTIPART", msubtype, parts, *exts)
474
721
  end
475
722
 
723
+ # n.b. this handles both type and subtype
724
+ #
725
+ # RFC-3501 vs RFC-9051:
726
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
727
+ # "MESSAGE" /
728
+ # "VIDEO") DQUOTE) / string) SP media-subtype
729
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
730
+ # "FONT" / "MESSAGE" / "MODEL" /
731
+ # "VIDEO") DQUOTE) / string) SP media-subtype
732
+ #
733
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
734
+ # DQUOTE "RFC822" DQUOTE
735
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
736
+ # DQUOTE ("RFC822" / "GLOBAL") DQUOTE
737
+ #
738
+ # RFC-3501 & RFC-9051:
739
+ # media-text = DQUOTE "TEXT" DQUOTE SP media-subtype
740
+ # media-subtype = string
476
741
  def media_type
477
- mtype = case_insensitive_string
478
- token = lookahead
479
- if token.symbol != T_SPACE
480
- return mtype, nil
481
- end
482
- match(T_SPACE)
483
- msubtype = case_insensitive_string
742
+ mtype = case_insensitive__string
743
+ SP? or return mtype, nil # ??? quirky!
744
+ msubtype = media_subtype
484
745
  return mtype, msubtype
485
746
  end
486
747
 
748
+ # TODO: check types
749
+ alias media_basic media_type # */* --- catchall
750
+ alias media_message media_type # message/rfc822, message/global
751
+ alias media_text media_type # text/*
752
+
753
+ alias media_subtype case_insensitive__string
754
+
755
+ # RFC-3501 & RFC-9051:
756
+ # body-fields = body-fld-param SP body-fld-id SP body-fld-desc SP
757
+ # body-fld-enc SP body-fld-octets
487
758
  def body_fields
488
- param = body_fld_param
489
- match(T_SPACE)
490
- content_id = nstring
491
- match(T_SPACE)
492
- desc = nstring
493
- match(T_SPACE)
494
- enc = case_insensitive_string
495
- match(T_SPACE)
496
- size = number
497
- return param, content_id, desc, enc, size
759
+ fields = []
760
+ fields << body_fld_param; SP!
761
+ fields << body_fld_id; SP!
762
+ fields << body_fld_desc; SP!
763
+ fields << body_fld_enc; SP!
764
+ fields << body_fld_octets
765
+ fields
498
766
  end
499
767
 
768
+ # RFC3501, RFC9051:
769
+ # body-fld-param = "(" string SP string *(SP string SP string) ")" / nil
500
770
  def body_fld_param
501
- token = lookahead
502
- if token.symbol == T_NIL
503
- shift_token
504
- return nil
505
- end
506
- match(T_LPAR)
771
+ return if NIL?
507
772
  param = {}
508
- while true
509
- token = lookahead
510
- case token.symbol
511
- when T_RPAR
512
- shift_token
513
- break
514
- when T_SPACE
515
- shift_token
516
- end
517
- name = case_insensitive_string
518
- match(T_SPACE)
519
- val = string
520
- param[name] = val
521
- end
522
- return param
523
- end
524
-
773
+ lpar
774
+ name = case_insensitive__string; SP!; param[name] = string
775
+ while SP?
776
+ name = case_insensitive__string; SP!; param[name] = string
777
+ end
778
+ rpar
779
+ param
780
+ end
781
+
782
+ # RFC2060
783
+ # body_ext_1part ::= body_fld_md5 [SPACE body_fld_dsp
784
+ # [SPACE body_fld_lang
785
+ # [SPACE 1#body_extension]]]
786
+ # ;; MUST NOT be returned on non-extensible
787
+ # ;; "BODY" fetch
788
+ # RFC3501 & RFC9051
789
+ # body-ext-1part = body-fld-md5 [SP body-fld-dsp [SP body-fld-lang
790
+ # [SP body-fld-loc *(SP body-extension)]]]
791
+ # ; MUST NOT be returned on non-extensible
792
+ # ; "BODY" fetch
525
793
  def body_ext_1part
526
- token = lookahead
527
- if token.symbol == T_SPACE
528
- shift_token
529
- else
530
- return nil
531
- end
532
- md5 = nstring
533
-
534
- token = lookahead
535
- if token.symbol == T_SPACE
536
- shift_token
537
- else
538
- return md5
539
- end
540
- disposition = body_fld_dsp
541
-
542
- token = lookahead
543
- if token.symbol == T_SPACE
544
- shift_token
545
- else
546
- return md5, disposition
547
- end
548
- language = body_fld_lang
549
-
550
- token = lookahead
551
- if token.symbol == T_SPACE
552
- shift_token
553
- else
554
- return md5, disposition, language
555
- end
556
-
557
- extension = body_extensions
558
- return md5, disposition, language, extension
559
- end
560
-
794
+ fields = []; fields << body_fld_md5
795
+ SP? or return fields; fields << body_fld_dsp
796
+ SP? or return fields; fields << body_fld_lang
797
+ SP? or return fields; fields << body_fld_loc
798
+ SP? or return fields; fields << body_extensions
799
+ fields
800
+ end
801
+
802
+ # RFC-2060:
803
+ # body_ext_mpart = body_fld_param [SP body_fld_dsp SP body_fld_lang
804
+ # [SP 1#body_extension]]
805
+ # ;; MUST NOT be returned on non-extensible
806
+ # ;; "BODY" fetch
807
+ # RFC-3501 & RFC-9051:
808
+ # body-ext-mpart = body-fld-param [SP body-fld-dsp [SP body-fld-lang
809
+ # [SP body-fld-loc *(SP body-extension)]]]
810
+ # ; MUST NOT be returned on non-extensible
811
+ # ; "BODY" fetch
561
812
  def body_ext_mpart
562
- token = lookahead
563
- if token.symbol == T_SPACE
564
- shift_token
565
- else
566
- return nil
567
- end
568
- param = body_fld_param
569
-
570
- token = lookahead
571
- if token.symbol == T_SPACE
572
- shift_token
573
- else
574
- return param
575
- end
576
- disposition = body_fld_dsp
577
-
578
- token = lookahead
579
- if token.symbol == T_SPACE
580
- shift_token
581
- else
582
- return param, disposition
583
- end
584
- language = body_fld_lang
585
-
586
- token = lookahead
587
- if token.symbol == T_SPACE
588
- shift_token
589
- else
590
- return param, disposition, language
591
- end
592
-
593
- extension = body_extensions
594
- return param, disposition, language, extension
595
- end
596
-
813
+ fields = []; fields << body_fld_param
814
+ SP? or return fields; fields << body_fld_dsp
815
+ SP? or return fields; fields << body_fld_lang
816
+ SP? or return fields; fields << body_fld_loc
817
+ SP? or return fields; fields << body_extensions
818
+ fields
819
+ end
820
+
821
+ alias body_fld_desc nstring
822
+ alias body_fld_id nstring
823
+ alias body_fld_loc nstring
824
+ alias body_fld_lines number64 # number in 3501, number64 in 9051
825
+ alias body_fld_md5 nstring
826
+ alias body_fld_octets number
827
+
828
+ # RFC-3501 & RFC-9051:
829
+ # body-fld-enc = (DQUOTE ("7BIT" / "8BIT" / "BINARY" / "BASE64"/
830
+ # "QUOTED-PRINTABLE") DQUOTE) / string
831
+ alias body_fld_enc case_insensitive__string
832
+
833
+ # body-fld-dsp = "(" string SP body-fld-param ")" / nil
597
834
  def body_fld_dsp
598
- token = lookahead
599
- if token.symbol == T_NIL
600
- shift_token
601
- return nil
602
- end
603
- match(T_LPAR)
604
- dsp_type = case_insensitive_string
605
- match(T_SPACE)
606
- param = body_fld_param
607
- match(T_RPAR)
608
- return ContentDisposition.new(dsp_type, param)
835
+ return if NIL?
836
+ lpar; dsp_type = case_insensitive__string
837
+ SP!; param = body_fld_param
838
+ rpar
839
+ ContentDisposition.new(dsp_type, param)
609
840
  end
610
841
 
842
+ # body-fld-lang = nstring / "(" string *(SP string) ")"
611
843
  def body_fld_lang
612
- token = lookahead
613
- if token.symbol == T_LPAR
614
- shift_token
615
- result = []
616
- while true
617
- token = lookahead
618
- case token.symbol
619
- when T_RPAR
620
- shift_token
621
- return result
622
- when T_SPACE
623
- shift_token
624
- end
625
- result.push(case_insensitive_string)
626
- end
844
+ if lpar?
845
+ result = [case_insensitive__string]
846
+ result << case_insensitive__string while SP?
847
+ result
627
848
  else
628
- lang = nstring
629
- if lang
630
- return lang.upcase
631
- else
632
- return lang
633
- end
849
+ case_insensitive__nstring
634
850
  end
635
851
  end
636
852
 
853
+ # body-extension *(SP body-extension)
637
854
  def body_extensions
638
855
  result = []
639
- while true
640
- token = lookahead
641
- case token.symbol
642
- when T_RPAR
643
- return result
644
- when T_SPACE
645
- shift_token
646
- end
647
- result.push(body_extension)
648
- end
856
+ result << body_extension; while SP? do result << body_extension end
857
+ result
649
858
  end
650
859
 
860
+ # body-extension = nstring / number / number64 /
861
+ # "(" body-extension *(SP body-extension) ")"
862
+ # ; Future expansion. Client implementations
863
+ # ; MUST accept body-extension fields. Server
864
+ # ; implementations MUST NOT generate
865
+ # ; body-extension fields except as defined by
866
+ # ; future Standard or Standards Track
867
+ # ; revisions of this specification.
651
868
  def body_extension
652
- token = lookahead
653
- case token.symbol
654
- when T_LPAR
655
- shift_token
656
- result = body_extensions
657
- match(T_RPAR)
658
- return result
659
- when T_NUMBER
660
- return number
661
- else
662
- return nstring
869
+ if (uint = number64?) then uint
870
+ elsif lpar? then exts = body_extensions; rpar; exts
871
+ else nstring
663
872
  end
664
873
  end
665
874
 
@@ -969,29 +1178,38 @@ module Net
969
1178
  return UntaggedResponse.new(name, data, @str)
970
1179
  end
971
1180
 
972
- def capability_response
973
- token = match(T_ATOM)
974
- name = token.value.upcase
975
- match(T_SPACE)
976
- UntaggedResponse.new(name, capability_data, @str)
1181
+ # The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.
1182
+ # The grammar rule is used by both response-data and resp-text-code.
1183
+ # But this method only returns UntaggedResponse (response-data).
1184
+ #
1185
+ # RFC3501:
1186
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1187
+ # *(SP capability)
1188
+ # RFC9051:
1189
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1190
+ # *(SP capability)
1191
+ def capability_data__untagged
1192
+ UntaggedResponse.new label("CAPABILITY"), capability__list, @str
977
1193
  end
978
1194
 
979
- def capability_data
980
- data = []
981
- while true
982
- token = lookahead
983
- case token.symbol
984
- when T_CRLF, T_RBRA
985
- break
986
- when T_SPACE
987
- shift_token
988
- next
989
- end
990
- data.push(atom.upcase)
991
- end
992
- data
1195
+ # enable-data = "ENABLED" *(SP capability)
1196
+ def enable_data
1197
+ UntaggedResponse.new label("ENABLED"), capability__list, @str
993
1198
  end
994
1199
 
1200
+ # As a workaround for buggy servers, allow a trailing SP:
1201
+ # *(SP capapility) [SP]
1202
+ def capability__list
1203
+ data = []; while _ = SP? && capability? do data << _ end; data
1204
+ end
1205
+
1206
+ # capability = ("AUTH=" auth-type) / atom
1207
+ # ; New capabilities MUST begin with "X" or be
1208
+ # ; registered with IANA as standard or
1209
+ # ; standards-track
1210
+ alias capability case_insensitive__atom
1211
+ alias capability? case_insensitive__atom?
1212
+
995
1213
  def id_response
996
1214
  token = match(T_ATOM)
997
1215
  name = token.value.upcase
@@ -1021,86 +1239,89 @@ module Net
1021
1239
  end
1022
1240
  end
1023
1241
 
1242
+ # namespace-response = "NAMESPACE" SP namespace
1243
+ # SP namespace SP namespace
1244
+ # ; The first Namespace is the Personal Namespace(s).
1245
+ # ; The second Namespace is the Other Users'
1246
+ # ; Namespace(s).
1247
+ # ; The third Namespace is the Shared Namespace(s).
1024
1248
  def namespace_response
1249
+ name = label("NAMESPACE")
1025
1250
  @lex_state = EXPR_DATA
1026
- token = lookahead
1027
- token = match(T_ATOM)
1028
- name = token.value.upcase
1029
- match(T_SPACE)
1030
- personal = namespaces
1031
- match(T_SPACE)
1032
- other = namespaces
1033
- match(T_SPACE)
1034
- shared = namespaces
1251
+ data = Namespaces.new((SP!; namespace),
1252
+ (SP!; namespace),
1253
+ (SP!; namespace))
1254
+ UntaggedResponse.new(name, data, @str)
1255
+ ensure
1035
1256
  @lex_state = EXPR_BEG
1036
- data = Namespaces.new(personal, other, shared)
1037
- return UntaggedResponse.new(name, data, @str)
1038
- end
1039
-
1040
- def namespaces
1041
- token = lookahead
1042
- # empty () is not allowed, so nil is functionally identical to empty.
1043
- data = []
1044
- if token.symbol == T_NIL
1045
- shift_token
1046
- else
1047
- match(T_LPAR)
1048
- loop do
1049
- data << namespace
1050
- break unless lookahead.symbol == T_SPACE
1051
- shift_token
1052
- end
1053
- match(T_RPAR)
1054
- end
1055
- data
1056
1257
  end
1057
1258
 
1259
+ # namespace = nil / "(" 1*namespace-descr ")"
1058
1260
  def namespace
1059
- match(T_LPAR)
1060
- prefix = match(T_QUOTED, T_LITERAL).value
1061
- match(T_SPACE)
1062
- delimiter = string
1261
+ NIL? and return []
1262
+ lpar
1263
+ list = [namespace_descr]
1264
+ list << namespace_descr until rpar?
1265
+ list
1266
+ end
1267
+
1268
+ # namespace-descr = "(" string SP
1269
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil)
1270
+ # [namespace-response-extensions] ")"
1271
+ def namespace_descr
1272
+ lpar
1273
+ prefix = string; SP!
1274
+ delimiter = nquoted # n.b: should only accept single char
1063
1275
  extensions = namespace_response_extensions
1064
- match(T_RPAR)
1276
+ rpar
1065
1277
  Namespace.new(prefix, delimiter, extensions)
1066
1278
  end
1067
1279
 
1280
+ # namespace-response-extensions = *namespace-response-extension
1281
+ # namespace-response-extension = SP string SP
1282
+ # "(" string *(SP string) ")"
1068
1283
  def namespace_response_extensions
1069
1284
  data = {}
1070
- token = lookahead
1071
- if token.symbol == T_SPACE
1072
- shift_token
1073
- name = match(T_QUOTED, T_LITERAL).value
1285
+ while SP?
1286
+ name = string; SP!
1287
+ lpar
1074
1288
  data[name] ||= []
1075
- match(T_SPACE)
1076
- match(T_LPAR)
1077
- loop do
1078
- data[name].push match(T_QUOTED, T_LITERAL).value
1079
- break unless lookahead.symbol == T_SPACE
1080
- shift_token
1081
- end
1082
- match(T_RPAR)
1289
+ data[name] << string
1290
+ data[name] << string while SP?
1291
+ rpar
1083
1292
  end
1084
1293
  data
1085
1294
  end
1086
1295
 
1087
- # text = 1*TEXT-CHAR
1088
- # TEXT-CHAR = <any CHAR except CR and LF>
1296
+ # TEXT-CHAR = <any CHAR except CR and LF>
1297
+ # RFC3501:
1298
+ # text = 1*TEXT-CHAR
1299
+ # RFC9051:
1300
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
1301
+ # ; Non-ASCII text can only be returned
1302
+ # ; after ENABLE IMAP4rev2 command
1089
1303
  def text
1090
- match(T_TEXT, lex_state: EXPR_TEXT).value
1304
+ match_re(TEXT_REGEXP, "text")[0].force_encoding("UTF-8")
1305
+ end
1306
+
1307
+ # an "accept" versiun of #text
1308
+ def text?
1309
+ accept_re(TEXT_REGEXP)&.[](0)&.force_encoding("UTF-8")
1091
1310
  end
1092
1311
 
1093
- # resp-text = ["[" resp-text-code "]" SP] text
1312
+ # RFC3501:
1313
+ # resp-text = ["[" resp-text-code "]" SP] text
1314
+ # RFC9051:
1315
+ # resp-text = ["[" resp-text-code "]" SP] [text]
1316
+ #
1317
+ # We leniently re-interpret this as
1318
+ # resp-text = ["[" resp-text-code "]" [SP [text]] / [text]
1094
1319
  def resp_text
1095
- token = match(T_LBRA, T_TEXT, lex_state: EXPR_RTEXT)
1096
- case token.symbol
1097
- when T_LBRA
1098
- code = resp_text_code
1099
- match(T_RBRA)
1100
- accept_space # violating RFC
1101
- ResponseText.new(code, text)
1102
- when T_TEXT
1103
- ResponseText.new(nil, token.value)
1320
+ if lbra?
1321
+ code = resp_text_code; rbra
1322
+ ResponseText.new(code, SP? && text? || "")
1323
+ else
1324
+ ResponseText.new(nil, text? || "")
1104
1325
  end
1105
1326
  end
1106
1327
 
@@ -1127,7 +1348,7 @@ module Net
1127
1348
  when /\A(?:BADCHARSET)\z/n
1128
1349
  result = ResponseCode.new(name, charset_list)
1129
1350
  when /\A(?:CAPABILITY)\z/ni
1130
- result = ResponseCode.new(name, capability_data)
1351
+ result = ResponseCode.new(name, capability__list)
1131
1352
  when /\A(?:PERMANENTFLAGS)\z/n
1132
1353
  match(T_SPACE)
1133
1354
  result = ResponseCode.new(name, flag_list)
@@ -1142,8 +1363,7 @@ module Net
1142
1363
  token = lookahead
1143
1364
  if token.symbol == T_SPACE
1144
1365
  shift_token
1145
- token = match(T_TEXT, lex_state: EXPR_CTEXT)
1146
- result = ResponseCode.new(name, token.value)
1366
+ result = ResponseCode.new(name, text_chars_except_rbra)
1147
1367
  else
1148
1368
  result = ResponseCode.new(name, nil)
1149
1369
  end
@@ -1151,6 +1371,11 @@ module Net
1151
1371
  return result
1152
1372
  end
1153
1373
 
1374
+ # 1*<any TEXT-CHAR except "]">
1375
+ def text_chars_except_rbra
1376
+ match_re(CTEXT_REGEXP, '1*<any TEXT-CHAR except "]">')[0]
1377
+ end
1378
+
1154
1379
  def charset_list
1155
1380
  result = []
1156
1381
  if accept(T_SPACE)
@@ -1232,9 +1457,7 @@ module Net
1232
1457
  mailbox = $3
1233
1458
  host = $4
1234
1459
  for s in [name, route, mailbox, host]
1235
- if s
1236
- s.gsub!(/\\(["\\])/n, "\\1")
1237
- end
1460
+ Patterns.unescape_quoted! s
1238
1461
  end
1239
1462
  else
1240
1463
  name = nstring
@@ -1268,84 +1491,6 @@ module Net
1268
1491
  end
1269
1492
  end
1270
1493
 
1271
- def nstring
1272
- token = lookahead
1273
- if token.symbol == T_NIL
1274
- shift_token
1275
- return nil
1276
- else
1277
- return string
1278
- end
1279
- end
1280
-
1281
- def astring
1282
- token = lookahead
1283
- if string_token?(token)
1284
- return string
1285
- else
1286
- return astring_chars
1287
- end
1288
- end
1289
-
1290
- def string
1291
- token = lookahead
1292
- if token.symbol == T_NIL
1293
- shift_token
1294
- return nil
1295
- end
1296
- token = match(T_QUOTED, T_LITERAL)
1297
- return token.value
1298
- end
1299
-
1300
- STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL]
1301
-
1302
- def string_token?(token)
1303
- return STRING_TOKENS.include?(token.symbol)
1304
- end
1305
-
1306
- def case_insensitive_string
1307
- token = lookahead
1308
- if token.symbol == T_NIL
1309
- shift_token
1310
- return nil
1311
- end
1312
- token = match(T_QUOTED, T_LITERAL)
1313
- return token.value.upcase
1314
- end
1315
-
1316
- # atom = 1*ATOM-CHAR
1317
- # ATOM-CHAR = <any CHAR except atom-specials>
1318
- ATOM_TOKENS = [
1319
- T_ATOM,
1320
- T_NUMBER,
1321
- T_NIL,
1322
- T_LBRA,
1323
- T_PLUS
1324
- ]
1325
-
1326
- def atom
1327
- -combine_adjacent(*ATOM_TOKENS)
1328
- end
1329
-
1330
- # ASTRING-CHAR = ATOM-CHAR / resp-specials
1331
- # resp-specials = "]"
1332
- ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA]
1333
-
1334
- def astring_chars
1335
- combine_adjacent(*ASTRING_CHARS_TOKENS)
1336
- end
1337
-
1338
- def combine_adjacent(*tokens)
1339
- result = "".b
1340
- while token = accept(*tokens)
1341
- result << token.value
1342
- end
1343
- if result.empty?
1344
- parse_error('unexpected token %s (expected %s)',
1345
- lookahead.symbol, args.join(" or "))
1346
- end
1347
- result
1348
- end
1349
1494
 
1350
1495
  # See https://www.rfc-editor.org/errata/rfc3501
1351
1496
  #
@@ -1358,16 +1503,6 @@ module Net
1358
1503
  end
1359
1504
  end
1360
1505
 
1361
- def number
1362
- token = lookahead
1363
- if token.symbol == T_NIL
1364
- shift_token
1365
- return nil
1366
- end
1367
- token = match(T_NUMBER)
1368
- return token.value.to_i
1369
- end
1370
-
1371
1506
  # RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
1372
1507
  # uid-set = (uniqueid / uid-range) *("," uid-set)
1373
1508
  # uid-range = (uniqueid ":" uniqueid)
@@ -1381,29 +1516,11 @@ module Net
1381
1516
  case token.symbol
1382
1517
  when T_NUMBER then [Integer(token.value)]
1383
1518
  when T_ATOM
1384
- entries = uid_set__ranges(token.value)
1385
- if (count = entries.sum(&:size)) > MAX_UID_SET_SIZE
1386
- parse_error("uid-set is too large: %d > 10k", count)
1387
- end
1388
- entries.flat_map(&:to_a)
1389
- end
1390
- end
1391
-
1392
- # returns an array of ranges
1393
- def uid_set__ranges(uidset)
1394
- entries = []
1395
- uidset.split(",") do |entry|
1396
- uids = entry.split(":", 2).map {|uid|
1397
- unless uid =~ /\A[1-9][0-9]*\z/
1398
- parse_error("invalid uid-set uid: %p", uid)
1399
- end
1400
- uid = Integer(uid)
1401
- NumValidator.ensure_nz_number(uid)
1402
- uid
1519
+ token.value.split(",").flat_map {|range|
1520
+ range = range.split(":").map {|uniqueid| Integer(uniqueid) }
1521
+ range.size == 1 ? range : Range.new(range.min, range.max).to_a
1403
1522
  }
1404
- entries << Range.new(*uids.minmax)
1405
1523
  end
1406
- entries
1407
1524
  end
1408
1525
 
1409
1526
  def nil_atom
@@ -1413,15 +1530,6 @@ module Net
1413
1530
 
1414
1531
  SPACES_REGEXP = /\G */n
1415
1532
 
1416
- # This advances @pos directly so it's safe before changing @lex_state.
1417
- def accept_space
1418
- if @token
1419
- shift_token if @token.symbol == T_SPACE
1420
- elsif @str[@pos] == " "
1421
- @pos += 1
1422
- end
1423
- end
1424
-
1425
1533
  # The RFC is very strict about this and usually we should be too.
1426
1534
  # But skipping spaces is usually a safe workaround for buggy servers.
1427
1535
  #
@@ -1433,46 +1541,6 @@ module Net
1433
1541
  end
1434
1542
  end
1435
1543
 
1436
- def match(*args, lex_state: @lex_state)
1437
- if @token && lex_state != @lex_state
1438
- parse_error("invalid lex_state change to %s with unconsumed token",
1439
- lex_state)
1440
- end
1441
- begin
1442
- @lex_state, original_lex_state = lex_state, @lex_state
1443
- token = lookahead
1444
- unless args.include?(token.symbol)
1445
- parse_error('unexpected token %s (expected %s)',
1446
- token.symbol.id2name,
1447
- args.collect {|i| i.id2name}.join(" or "))
1448
- end
1449
- shift_token
1450
- return token
1451
- ensure
1452
- @lex_state = original_lex_state
1453
- end
1454
- end
1455
-
1456
- # like match, but does not raise error on failure.
1457
- #
1458
- # returns and shifts token on successful match
1459
- # returns nil and leaves @token unshifted on no match
1460
- def accept(*args)
1461
- token = lookahead
1462
- if args.include?(token.symbol)
1463
- shift_token
1464
- token
1465
- end
1466
- end
1467
-
1468
- def lookahead
1469
- @token ||= next_token
1470
- end
1471
-
1472
- def shift_token
1473
- @token = nil
1474
- end
1475
-
1476
1544
  def next_token
1477
1545
  case @lex_state
1478
1546
  when EXPR_BEG
@@ -1480,39 +1548,42 @@ module Net
1480
1548
  @pos = $~.end(0)
1481
1549
  if $1
1482
1550
  return Token.new(T_SPACE, $+)
1483
- elsif $2
1484
- return Token.new(T_NIL, $+)
1551
+ elsif $2 && $6
1552
+ # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
1553
+ return Token.new(T_ATOM, $2)
1485
1554
  elsif $3
1486
- return Token.new(T_NUMBER, $+)
1555
+ return Token.new(T_NIL, $+)
1487
1556
  elsif $4
1488
- return Token.new(T_ATOM, $+)
1557
+ return Token.new(T_NUMBER, $+)
1489
1558
  elsif $5
1490
- return Token.new(T_QUOTED,
1491
- $+.gsub(/\\(["\\])/n, "\\1"))
1492
- elsif $6
1493
- return Token.new(T_LPAR, $+)
1559
+ return Token.new(T_PLUS, $+)
1494
1560
  elsif $7
1495
- return Token.new(T_RPAR, $+)
1561
+ # match ATOM, without a NUMBER, NIL, or PLUS prefix
1562
+ return Token.new(T_ATOM, $+)
1496
1563
  elsif $8
1497
- return Token.new(T_BSLASH, $+)
1564
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1498
1565
  elsif $9
1499
- return Token.new(T_STAR, $+)
1566
+ return Token.new(T_LPAR, $+)
1500
1567
  elsif $10
1501
- return Token.new(T_LBRA, $+)
1568
+ return Token.new(T_RPAR, $+)
1502
1569
  elsif $11
1503
- return Token.new(T_RBRA, $+)
1570
+ return Token.new(T_BSLASH, $+)
1504
1571
  elsif $12
1572
+ return Token.new(T_STAR, $+)
1573
+ elsif $13
1574
+ return Token.new(T_LBRA, $+)
1575
+ elsif $14
1576
+ return Token.new(T_RBRA, $+)
1577
+ elsif $15
1505
1578
  len = $+.to_i
1506
1579
  val = @str[@pos, len]
1507
1580
  @pos += len
1508
1581
  return Token.new(T_LITERAL, val)
1509
- elsif $13
1510
- return Token.new(T_PLUS, $+)
1511
- elsif $14
1582
+ elsif $16
1512
1583
  return Token.new(T_PERCENT, $+)
1513
- elsif $15
1584
+ elsif $17
1514
1585
  return Token.new(T_CRLF, $+)
1515
- elsif $16
1586
+ elsif $18
1516
1587
  return Token.new(T_EOF, $+)
1517
1588
  else
1518
1589
  parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")
@@ -1531,8 +1602,7 @@ module Net
1531
1602
  elsif $3
1532
1603
  return Token.new(T_NUMBER, $+)
1533
1604
  elsif $4
1534
- return Token.new(T_QUOTED,
1535
- $+.gsub(/\\(["\\])/n, "\\1"))
1605
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1536
1606
  elsif $5
1537
1607
  len = $+.to_i
1538
1608
  val = @str[@pos, len]
@@ -1549,63 +1619,11 @@ module Net
1549
1619
  @str.index(/\S*/n, @pos)
1550
1620
  parse_error("unknown token - %s", $&.dump)
1551
1621
  end
1552
- when EXPR_TEXT
1553
- if @str.index(TEXT_REGEXP, @pos)
1554
- @pos = $~.end(0)
1555
- if $1
1556
- return Token.new(T_TEXT, $+)
1557
- else
1558
- parse_error("[Net::IMAP BUG] TEXT_REGEXP is invalid")
1559
- end
1560
- else
1561
- @str.index(/\S*/n, @pos)
1562
- parse_error("unknown token - %s", $&.dump)
1563
- end
1564
- when EXPR_RTEXT
1565
- if @str.index(RTEXT_REGEXP, @pos)
1566
- @pos = $~.end(0)
1567
- if $1
1568
- return Token.new(T_LBRA, $+)
1569
- elsif $2
1570
- return Token.new(T_TEXT, $+)
1571
- else
1572
- parse_error("[Net::IMAP BUG] RTEXT_REGEXP is invalid")
1573
- end
1574
- else
1575
- @str.index(/\S*/n, @pos)
1576
- parse_error("unknown token - %s", $&.dump)
1577
- end
1578
- when EXPR_CTEXT
1579
- if @str.index(CTEXT_REGEXP, @pos)
1580
- @pos = $~.end(0)
1581
- if $1
1582
- return Token.new(T_TEXT, $+)
1583
- else
1584
- parse_error("[Net::IMAP BUG] CTEXT_REGEXP is invalid")
1585
- end
1586
- else
1587
- @str.index(/\S*/n, @pos) #/
1588
- parse_error("unknown token - %s", $&.dump)
1589
- end
1590
1622
  else
1591
1623
  parse_error("invalid @lex_state - %s", @lex_state.inspect)
1592
1624
  end
1593
1625
  end
1594
1626
 
1595
- def parse_error(fmt, *args)
1596
- if IMAP.debug
1597
- $stderr.printf("@str: %s\n", @str.dump)
1598
- $stderr.printf("@pos: %d\n", @pos)
1599
- $stderr.printf("@lex_state: %s\n", @lex_state)
1600
- if @token
1601
- $stderr.printf("@token.symbol: %s\n", @token.symbol)
1602
- $stderr.printf("@token.value: %s\n", @token.value.inspect)
1603
- end
1604
- end
1605
- raise ResponseParseError, format(fmt, *args)
1606
- end
1607
1627
  end
1608
-
1609
1628
  end
1610
-
1611
1629
  end