net-imap 0.3.8 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of net-imap might be problematic. Click here for more details.

Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/pages.yml +46 -0
  3. data/.github/workflows/test.yml +5 -12
  4. data/Gemfile +1 -0
  5. data/README.md +15 -4
  6. data/Rakefile +0 -7
  7. data/benchmarks/generate_parser_benchmarks +52 -0
  8. data/benchmarks/parser.yml +578 -0
  9. data/benchmarks/stringprep.yml +1 -1
  10. data/lib/net/imap/authenticators.rb +26 -57
  11. data/lib/net/imap/command_data.rb +13 -6
  12. data/lib/net/imap/deprecated_client_options.rb +139 -0
  13. data/lib/net/imap/response_data.rb +46 -41
  14. data/lib/net/imap/response_parser/parser_utils.rb +230 -0
  15. data/lib/net/imap/response_parser.rb +667 -649
  16. data/lib/net/imap/sasl/anonymous_authenticator.rb +68 -0
  17. data/lib/net/imap/sasl/authenticators.rb +112 -0
  18. data/lib/net/imap/{authenticators/cram_md5.rb → sasl/cram_md5_authenticator.rb} +15 -9
  19. data/lib/net/imap/{authenticators/digest_md5.rb → sasl/digest_md5_authenticator.rb} +74 -21
  20. data/lib/net/imap/sasl/external_authenticator.rb +62 -0
  21. data/lib/net/imap/sasl/gs2_header.rb +80 -0
  22. data/lib/net/imap/{authenticators/login.rb → sasl/login_authenticator.rb} +19 -14
  23. data/lib/net/imap/sasl/oauthbearer_authenticator.rb +164 -0
  24. data/lib/net/imap/sasl/plain_authenticator.rb +93 -0
  25. data/lib/net/imap/sasl/scram_algorithm.rb +58 -0
  26. data/lib/net/imap/sasl/scram_authenticator.rb +278 -0
  27. data/lib/net/imap/sasl/stringprep.rb +6 -66
  28. data/lib/net/imap/sasl/xoauth2_authenticator.rb +88 -0
  29. data/lib/net/imap/sasl.rb +139 -44
  30. data/lib/net/imap/stringprep/nameprep.rb +70 -0
  31. data/lib/net/imap/stringprep/saslprep.rb +69 -0
  32. data/lib/net/imap/stringprep/saslprep_tables.rb +96 -0
  33. data/lib/net/imap/stringprep/tables.rb +146 -0
  34. data/lib/net/imap/stringprep/trace.rb +85 -0
  35. data/lib/net/imap/stringprep.rb +159 -0
  36. data/lib/net/imap.rb +967 -588
  37. data/net-imap.gemspec +1 -1
  38. data/rakelib/saslprep.rake +4 -4
  39. data/rakelib/string_prep_tables_generator.rb +82 -60
  40. metadata +30 -12
  41. data/lib/net/imap/authenticators/plain.rb +0 -41
  42. data/lib/net/imap/authenticators/xoauth2.rb +0 -20
  43. data/lib/net/imap/sasl/saslprep.rb +0 -55
  44. data/lib/net/imap/sasl/saslprep_tables.rb +0 -98
  45. data/lib/net/imap/sasl/stringprep_tables.rb +0 -153
@@ -1,13 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "errors"
4
+ require_relative "response_parser/parser_utils"
4
5
 
5
6
  module Net
6
7
  class IMAP < Protocol
7
8
 
8
9
  # Parses an \IMAP server response.
9
10
  class ResponseParser
10
- MAX_UID_SET_SIZE = 10_000
11
+ include ParserUtils
12
+ extend ParserUtils::Generator
11
13
 
12
14
  # :call-seq: Net::IMAP::ResponseParser.new -> Net::IMAP::ResponseParser
13
15
  def initialize
@@ -35,69 +37,347 @@ module Net
35
37
 
36
38
  # :stopdoc:
37
39
 
38
- EXPR_BEG = :EXPR_BEG
39
- EXPR_DATA = :EXPR_DATA
40
- EXPR_TEXT = :EXPR_TEXT
41
- EXPR_RTEXT = :EXPR_RTEXT
42
- EXPR_CTEXT = :EXPR_CTEXT
43
-
44
- T_SPACE = :SPACE
45
- T_NIL = :NIL
46
- T_NUMBER = :NUMBER
47
- T_ATOM = :ATOM
48
- T_QUOTED = :QUOTED
49
- T_LPAR = :LPAR
50
- T_RPAR = :RPAR
51
- T_BSLASH = :BSLASH
52
- T_STAR = :STAR
53
- T_LBRA = :LBRA
54
- T_RBRA = :RBRA
55
- T_LITERAL = :LITERAL
56
- T_PLUS = :PLUS
57
- T_PERCENT = :PERCENT
58
- T_CRLF = :CRLF
59
- T_EOF = :EOF
60
- T_TEXT = :TEXT
61
-
40
+ EXPR_BEG = :EXPR_BEG # the default, used in most places
41
+ EXPR_DATA = :EXPR_DATA # envelope, body(structure), namespaces
42
+
43
+ T_SPACE = :SPACE # atom special
44
+ T_ATOM = :ATOM # atom (subset of astring chars)
45
+ T_NIL = :NIL # subset of atom and label
46
+ T_NUMBER = :NUMBER # subset of atom
47
+ T_LBRA = :LBRA # subset of atom
48
+ T_PLUS = :PLUS # subset of atom; tag special
49
+ T_RBRA = :RBRA # atom special; resp_special; valid astring char
50
+ T_QUOTED = :QUOTED # starts/end with atom special
51
+ T_BSLASH = :BSLASH # atom special; quoted special
52
+ T_LPAR = :LPAR # atom special; paren list delimiter
53
+ T_RPAR = :RPAR # atom special; paren list delimiter
54
+ T_STAR = :STAR # atom special; list wildcard
55
+ T_PERCENT = :PERCENT # atom special; list wildcard
56
+ T_LITERAL = :LITERAL # starts with atom special
57
+ T_CRLF = :CRLF # atom special; text special; quoted special
58
+ T_TEXT = :TEXT # any char except CRLF
59
+ T_EOF = :EOF # end of response string
60
+
61
+ module Patterns
62
+
63
+ module CharClassSubtraction
64
+ refine Regexp do
65
+ def -(rhs); /[#{source}&&[^#{rhs.source}]]/n.freeze end
66
+ end
67
+ end
68
+ using CharClassSubtraction
69
+
70
+ # From RFC5234, "Augmented BNF for Syntax Specifications: ABNF"
71
+ # >>>
72
+ # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
73
+ # CHAR = %x01-7F
74
+ # CRLF = CR LF
75
+ # ; Internet standard newline
76
+ # CTL = %x00-1F / %x7F
77
+ # ; controls
78
+ # DIGIT = %x30-39
79
+ # ; 0-9
80
+ # DQUOTE = %x22
81
+ # ; " (Double Quote)
82
+ # HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
83
+ # OCTET = %x00-FF
84
+ # SP = %x20
85
+ module RFC5234
86
+ ALPHA = /[A-Za-z]/n
87
+ CHAR = /[\x01-\x7f]/n
88
+ CRLF = /\r\n/n
89
+ CTL = /[\x00-\x1F\x7F]/n
90
+ DIGIT = /\d/n
91
+ DQUOTE = /"/n
92
+ HEXDIG = /\h/
93
+ OCTET = /[\x00-\xFF]/n # not using /./m for embedding purposes
94
+ SP = / /n
95
+ end
96
+
97
+ # UTF-8, a transformation format of ISO 10646
98
+ # >>>
99
+ # UTF8-1 = %x00-7F
100
+ # UTF8-tail = %x80-BF
101
+ # UTF8-2 = %xC2-DF UTF8-tail
102
+ # UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
103
+ # %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
104
+ # UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
105
+ # %xF4 %x80-8F 2( UTF8-tail )
106
+ # UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
107
+ # UTF8-octets = *( UTF8-char )
108
+ #
109
+ # n.b. String * Integer is used for repetition, rather than /x{3}/,
110
+ # because ruby 3.2's linear-time cache-based optimization doesn't work
111
+ # with "bounded or fixed times repetition nesting in another repetition
112
+ # (e.g. /(a{2,3})*/). It is an implementation issue entirely, but we
113
+ # believe it is hard to support this case correctly."
114
+ # See https://bugs.ruby-lang.org/issues/19104
115
+ module RFC3629
116
+ UTF8_1 = /[\x00-\x7f]/n # aka ASCII 7bit
117
+ UTF8_TAIL = /[\x80-\xBF]/n
118
+ UTF8_2 = /[\xC2-\xDF]#{UTF8_TAIL}/n
119
+ UTF8_3 = Regexp.union(/\xE0[\xA0-\xBF]#{UTF8_TAIL}/n,
120
+ /\xED[\x80-\x9F]#{UTF8_TAIL}/n,
121
+ /[\xE1-\xEC]#{ UTF8_TAIL.source * 2}/n,
122
+ /[\xEE-\xEF]#{ UTF8_TAIL.source * 2}/n)
123
+ UTF8_4 = Regexp.union(/[\xF1-\xF3]#{ UTF8_TAIL.source * 3}/n,
124
+ /\xF0[\x90-\xBF]#{UTF8_TAIL.source * 2}/n,
125
+ /\xF4[\x80-\x8F]#{UTF8_TAIL.source * 2}/n)
126
+ UTF8_CHAR = Regexp.union(UTF8_1, UTF8_2, UTF8_3, UTF8_4)
127
+ UTF8_OCTETS = /#{UTF8_CHAR}*/n
128
+ end
129
+
130
+ include RFC5234
131
+ include RFC3629
132
+
133
+ # CHAR8 = %x01-ff
134
+ # ; any OCTET except NUL, %x00
135
+ CHAR8 = /[\x01-\xff]/n
136
+
137
+ # list-wildcards = "%" / "*"
138
+ LIST_WILDCARDS = /[%*]/n
139
+ # quoted-specials = DQUOTE / "\"
140
+ QUOTED_SPECIALS = /["\\]/n
141
+ # resp-specials = "]"
142
+ RESP_SPECIALS = /[\]]/n
143
+
144
+ # atomish = 1*<any ATOM-CHAR except "[">
145
+ # ; We use "atomish" for msg-att and section, in order
146
+ # ; to simplify "BODY[HEADER.FIELDS (foo bar)]".
147
+ #
148
+ # atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
149
+ # quoted-specials / resp-specials
150
+ # ATOM-CHAR = <any CHAR except atom-specials>
151
+ # atom = 1*ATOM-CHAR
152
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
153
+ # tag = 1*<any ASTRING-CHAR except "+">
154
+
155
+ ATOM_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\\]]/n
156
+ ASTRING_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\]/n
157
+
158
+ ASTRING_CHAR = CHAR - ASTRING_SPECIALS
159
+ ATOM_CHAR = CHAR - ATOM_SPECIALS
160
+
161
+ ATOM = /#{ATOM_CHAR}+/n
162
+ ASTRING_CHARS = /#{ASTRING_CHAR}+/n
163
+ ATOMISH = /#{ATOM_CHAR - /[\[]/ }+/
164
+ TAG = /#{ASTRING_CHAR - /[+]/ }+/
165
+
166
+ # TEXT-CHAR = <any CHAR except CR and LF>
167
+ TEXT_CHAR = CHAR - /[\r\n]/
168
+
169
+ # resp-text-code = ... / atom [SP 1*<any TEXT-CHAR except "]">]
170
+ CODE_TEXT_CHAR = TEXT_CHAR - RESP_SPECIALS
171
+ CODE_TEXT = /#{CODE_TEXT_CHAR}+/n
172
+
173
+ # RFC3501:
174
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
175
+ # "\" quoted-specials
176
+ # RFC9051:
177
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
178
+ # "\" quoted-specials / UTF8-2 / UTF8-3 / UTF8-4
179
+ # RFC3501 & RFC9051:
180
+ # quoted = DQUOTE *QUOTED-CHAR DQUOTE
181
+ QUOTED_CHAR_safe = TEXT_CHAR - QUOTED_SPECIALS
182
+ QUOTED_CHAR_esc = /\\#{QUOTED_SPECIALS}/n
183
+ QUOTED_CHAR_rev1 = Regexp.union(QUOTED_CHAR_safe, QUOTED_CHAR_esc)
184
+ QUOTED_CHAR_rev2 = Regexp.union(QUOTED_CHAR_rev1,
185
+ UTF8_2, UTF8_3, UTF8_4)
186
+ QUOTED_rev1 = /"(#{QUOTED_CHAR_rev1}*)"/n
187
+ QUOTED_rev2 = /"(#{QUOTED_CHAR_rev2}*)"/n
188
+
189
+ # RFC3501:
190
+ # text = 1*TEXT-CHAR
191
+ # RFC9051:
192
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
193
+ # ; Non-ASCII text can only be returned
194
+ # ; after ENABLE IMAP4rev2 command
195
+ TEXT_rev1 = /#{TEXT_CHAR}+/
196
+ TEXT_rev2 = /#{Regexp.union TEXT_CHAR, UTF8_2, UTF8_3, UTF8_4}+/
197
+
198
+ # RFC3501:
199
+ # literal = "{" number "}" CRLF *CHAR8
200
+ # ; Number represents the number of CHAR8s
201
+ # RFC9051:
202
+ # literal = "{" number64 ["+"] "}" CRLF *CHAR8
203
+ # ; <number64> represents the number of CHAR8s.
204
+ # ; A non-synchronizing literal is distinguished
205
+ # ; from a synchronizing literal by the presence of
206
+ # ; "+" before the closing "}".
207
+ # ; Non-synchronizing literals are not allowed when
208
+ # ; sent from server to the client.
209
+ LITERAL = /\{(\d+)\}\r\n/n
210
+
211
+ module_function
212
+
213
+ def unescape_quoted!(quoted)
214
+ quoted
215
+ &.gsub!(/\\(#{QUOTED_SPECIALS})/n, "\\1")
216
+ &.force_encoding("UTF-8")
217
+ end
218
+
219
+ def unescape_quoted(quoted)
220
+ quoted
221
+ &.gsub(/\\(#{QUOTED_SPECIALS})/n, "\\1")
222
+ &.force_encoding("UTF-8")
223
+ end
224
+
225
+ end
226
+
227
+ # the default, used in most places
62
228
  BEG_REGEXP = /\G(?:\
63
- (?# 1: SPACE )( +)|\
64
- (?# 2: NIL )(NIL)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
65
- (?# 3: NUMBER )(\d+)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
66
- (?# 4: ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+]+)|\
67
- (?# 5: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
68
- (?# 6: LPAR )(\()|\
69
- (?# 7: RPAR )(\))|\
70
- (?# 8: BSLASH )(\\)|\
71
- (?# 9: STAR )(\*)|\
72
- (?# 10: LBRA )(\[)|\
73
- (?# 11: RBRA )(\])|\
74
- (?# 12: LITERAL )\{(\d+)\}\r\n|\
75
- (?# 13: PLUS )(\+)|\
76
- (?# 14: PERCENT )(%)|\
77
- (?# 15: CRLF )(\r\n)|\
78
- (?# 16: EOF )(\z))/ni
79
-
229
+ (?# 1: SPACE )( )|\
230
+ (?# 2: ATOM prefixed with a compatible subtype)\
231
+ ((?:\
232
+ (?# 3: NIL )(NIL)|\
233
+ (?# 4: NUMBER )(\d+)|\
234
+ (?# 5: PLUS )(\+))\
235
+ (?# 6: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
236
+ (?# This enables greedy alternation without lookahead, in linear time.)\
237
+ )|\
238
+ (?# Also need to check for ATOM without a subtype prefix.)\
239
+ (?# 7: ATOM )(#{Patterns::ATOMISH})|\
240
+ (?# 8: QUOTED )#{Patterns::QUOTED_rev2}|\
241
+ (?# 9: LPAR )(\()|\
242
+ (?# 10: RPAR )(\))|\
243
+ (?# 11: BSLASH )(\\)|\
244
+ (?# 12: STAR )(\*)|\
245
+ (?# 13: LBRA )(\[)|\
246
+ (?# 14: RBRA )(\])|\
247
+ (?# 15: LITERAL )#{Patterns::LITERAL}|\
248
+ (?# 16: PERCENT )(%)|\
249
+ (?# 17: CRLF )(\r\n)|\
250
+ (?# 18: EOF )(\z))/ni
251
+
252
+ # envelope, body(structure), namespaces
80
253
  DATA_REGEXP = /\G(?:\
81
254
  (?# 1: SPACE )( )|\
82
255
  (?# 2: NIL )(NIL)|\
83
256
  (?# 3: NUMBER )(\d+)|\
84
- (?# 4: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
85
- (?# 5: LITERAL )\{(\d+)\}\r\n|\
257
+ (?# 4: QUOTED )#{Patterns::QUOTED_rev2}|\
258
+ (?# 5: LITERAL )#{Patterns::LITERAL}|\
86
259
  (?# 6: LPAR )(\()|\
87
260
  (?# 7: RPAR )(\)))/ni
88
261
 
89
- TEXT_REGEXP = /\G(?:\
90
- (?# 1: TEXT )([^\x00\r\n]*))/ni
91
-
92
- RTEXT_REGEXP = /\G(?:\
93
- (?# 1: LBRA )(\[)|\
94
- (?# 2: TEXT )([^\x00\r\n]*))/ni
262
+ # text, after 'resp-text-code "]"'
263
+ TEXT_REGEXP = /\G(#{Patterns::TEXT_rev2})/n
95
264
 
96
- CTEXT_REGEXP = /\G(?:\
97
- (?# 1: TEXT )([^\x00\r\n\]]*))/ni
265
+ # resp-text-code, after 'atom SP'
266
+ CTEXT_REGEXP = /\G(#{Patterns::CODE_TEXT})/n
98
267
 
99
268
  Token = Struct.new(:symbol, :value)
100
269
 
270
+ def_char_matchers :SP, " ", :T_SPACE
271
+
272
+ def_char_matchers :lpar, "(", :T_LPAR
273
+ def_char_matchers :rpar, ")", :T_RPAR
274
+
275
+ def_char_matchers :lbra, "[", :T_LBRA
276
+ def_char_matchers :rbra, "]", :T_RBRA
277
+
278
+ # valid number ranges are not enforced by parser
279
+ # number = 1*DIGIT
280
+ # ; Unsigned 32-bit integer
281
+ # ; (0 <= n < 4,294,967,296)
282
+ def_token_matchers :number, T_NUMBER, coerce: Integer
283
+
284
+ def_token_matchers :quoted, T_QUOTED
285
+
286
+ # string = quoted / literal
287
+ def_token_matchers :string, T_QUOTED, T_LITERAL
288
+
289
+ # use where string represents "LABEL" values
290
+ def_token_matchers :case_insensitive__string,
291
+ T_QUOTED, T_LITERAL,
292
+ send: :upcase
293
+
294
+ # n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
295
+ # NIL? returns nil when it does *not* match
296
+ def_token_matchers :NIL, T_NIL
297
+
298
+ # In addition to explicitly uses of +tagged-ext-label+, use this to match
299
+ # keywords when the grammar has not provided any extension syntax.
300
+ #
301
+ # Do *not* use this for labels where the grammar specifies extensions
302
+ # can be +atom+, even if all currently defined labels would match. For
303
+ # example response codes in +resp-text-code+.
304
+ #
305
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
306
+ # ; Is a valid RFC 3501 "atom".
307
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
308
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
309
+ #
310
+ # TODO: add to lexer and only match tagged-ext-label
311
+ def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase
312
+
313
+ # atom = 1*ATOM-CHAR
314
+ # ATOM-CHAR = <any CHAR except atom-specials>
315
+ ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
316
+
317
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
318
+ # resp-specials = "]"
319
+ ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
320
+
321
+ ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze
322
+
323
+ # atom = 1*ATOM-CHAR
324
+ #
325
+ # TODO: match atom entirely by regexp (in the "lexer")
326
+ def atom; -combine_adjacent(*ATOM_TOKENS) end
327
+
328
+ # the #accept version of #atom
329
+ def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
330
+
331
+ # Returns <tt>atom.upcase</tt>
332
+ def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
333
+
334
+ # Returns <tt>atom?&.upcase</tt>
335
+ def case_insensitive__atom?
336
+ -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
337
+ end
338
+
339
+ # TODO: handle astring_chars entirely inside the lexer
340
+ def astring_chars
341
+ combine_adjacent(*ASTRING_CHARS_TOKENS)
342
+ end
343
+
344
+ # astring = 1*ASTRING-CHAR / string
345
+ def astring
346
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
347
+ end
348
+
349
+ def astring?
350
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
351
+ end
352
+
353
+ # Use #label or #label_in to assert specific known labels
354
+ # (+tagged-ext-label+ only, not +atom+).
355
+ def label(word)
356
+ (val = tagged_ext_label) == word and return val
357
+ parse_error("unexpected atom %p, expected %p instead", val, word)
358
+ end
359
+
360
+ # nstring = string / nil
361
+ def nstring
362
+ NIL? ? nil : string
363
+ end
364
+
365
+ def nquoted
366
+ NIL? ? nil : quoted
367
+ end
368
+
369
+ # use where nstring represents "LABEL" values
370
+ def case_insensitive__nstring
371
+ NIL? ? nil : case_insensitive__string
372
+ end
373
+
374
+ # valid number ranges are not enforced by parser
375
+ # number64 = 1*DIGIT
376
+ # ; Unsigned 63-bit integer
377
+ # ; (0 <= n <= 9,223,372,036,854,775,807)
378
+ alias number64 number
379
+ alias number64? number?
380
+
101
381
  def response
102
382
  token = lookahead
103
383
  case token.symbol
@@ -159,9 +439,11 @@ module Net
159
439
  when /\A(?:STATUS)\z/ni
160
440
  return status_response
161
441
  when /\A(?:CAPABILITY)\z/ni
162
- return capability_response
442
+ return capability_data__untagged
163
443
  when /\A(?:NOOP)\z/ni
164
444
  return ignored_response
445
+ when /\A(?:ENABLED)\z/ni
446
+ return enable_data
165
447
  else
166
448
  return text_response
167
449
  end
@@ -335,331 +617,258 @@ module Net
335
617
  return name, data
336
618
  end
337
619
 
620
+ # RFC-3501 & RFC-9051:
621
+ # body = "(" (body-type-1part / body-type-mpart) ")"
338
622
  def body
339
623
  @lex_state = EXPR_DATA
340
- token = lookahead
341
- if token.symbol == T_NIL
342
- shift_token
343
- result = nil
344
- else
345
- match(T_LPAR)
346
- token = lookahead
347
- if token.symbol == T_LPAR
348
- result = body_type_mpart
349
- else
350
- result = body_type_1part
351
- end
352
- match(T_RPAR)
353
- end
624
+ lpar; result = peek_lpar? ? body_type_mpart : body_type_1part; rpar
625
+ result
626
+ ensure
354
627
  @lex_state = EXPR_BEG
355
- return result
356
628
  end
629
+ alias lookahead_body? lookahead_lpar?
357
630
 
631
+ # RFC-3501 & RFC9051:
632
+ # body-type-1part = (body-type-basic / body-type-msg / body-type-text)
633
+ # [SP body-ext-1part]
358
634
  def body_type_1part
359
- token = lookahead
360
- case token.value
361
- when /\A(?:TEXT)\z/ni
362
- return body_type_text
363
- when /\A(?:MESSAGE)\z/ni
364
- return body_type_msg
365
- when /\A(?:ATTACHMENT)\z/ni
366
- return body_type_attachment
367
- when /\A(?:MIXED)\z/ni
368
- return body_type_mixed
369
- else
370
- return body_type_basic
371
- end
372
- end
373
-
635
+ # This regexp peek is a performance optimization.
636
+ # The lookahead fallback would work fine too.
637
+ m = peek_re(/\G(?:
638
+ (?<TEXT> "TEXT" \s "[^"]+" )
639
+ |(?<MESSAGE> "MESSAGE" \s "(?:RFC822|GLOBAL)" )
640
+ |(?<BASIC> "[^"]+" \s "[^"]+" )
641
+ |(?<MIXED> "MIXED" )
642
+ )/nix)
643
+ choice = m&.named_captures&.compact&.keys&.first
644
+ # In practice, the following line should never be used. But the ABNF
645
+ # *does* allow literals, and this will handle them.
646
+ choice ||= lookahead_case_insensitive__string!
647
+ case choice
648
+ when "BASIC" then body_type_basic # => BodyTypeBasic
649
+ when "MESSAGE" then body_type_msg # => BodyTypeMessage | BodyTypeBasic
650
+ when "TEXT" then body_type_text # => BodyTypeText
651
+ when "MIXED" then body_type_mixed # => BodyTypeMultipart (server bug)
652
+ else body_type_basic # might be a bug; server's or ours?
653
+ end
654
+ end
655
+
656
+ # RFC-3501 & RFC9051:
657
+ # body-type-basic = media-basic SP body-fields
374
658
  def body_type_basic
375
- mtype, msubtype = media_type
376
- token = lookahead
377
- if token.symbol == T_RPAR
378
- return BodyTypeBasic.new(mtype, msubtype)
379
- end
380
- match(T_SPACE)
381
- param, content_id, desc, enc, size = body_fields
382
- md5, disposition, language, extension = body_ext_1part
383
- return BodyTypeBasic.new(mtype, msubtype,
384
- param, content_id,
385
- desc, enc, size,
386
- md5, disposition, language, extension)
659
+ type = media_basic # n.b. "basic" type isn't enforced here
660
+ if lookahead_rpar? then return BodyTypeBasic.new(*type) end # invalid
661
+ SP!; flds = body_fields
662
+ SP? and exts = body_ext_1part
663
+ BodyTypeBasic.new(*type, *flds, *exts)
387
664
  end
388
665
 
666
+ # RFC-3501 & RFC-9051:
667
+ # body-type-text = media-text SP body-fields SP body-fld-lines
389
668
  def body_type_text
390
- mtype, msubtype = media_type
391
- match(T_SPACE)
392
- param, content_id, desc, enc, size = body_fields
393
- match(T_SPACE)
394
- lines = number
395
- md5, disposition, language, extension = body_ext_1part
396
- return BodyTypeText.new(mtype, msubtype,
397
- param, content_id,
398
- desc, enc, size,
399
- lines,
400
- md5, disposition, language, extension)
669
+ type = media_text
670
+ SP!; flds = body_fields
671
+ SP!; lines = body_fld_lines
672
+ SP? and exts = body_ext_1part
673
+ BodyTypeText.new(*type, *flds, lines, *exts)
401
674
  end
402
675
 
676
+ # RFC-3501 & RFC-9051:
677
+ # body-type-msg = media-message SP body-fields SP envelope
678
+ # SP body SP body-fld-lines
403
679
  def body_type_msg
404
- mtype, msubtype = media_type
405
- match(T_SPACE)
406
- param, content_id, desc, enc, size = body_fields
407
-
408
- token = lookahead
409
- if token.symbol == T_RPAR
410
- # If this is not message/rfc822, we shouldn't apply the RFC822
411
- # spec to it. We should handle anything other than
412
- # message/rfc822 using multipart extension data [rfc3501] (i.e.
413
- # the data itself won't be returned, we would have to retrieve it
414
- # with BODYSTRUCTURE instead of with BODY
415
-
416
- # Also, sometimes a message/rfc822 is included as a large
417
- # attachment instead of having all of the other details
418
- # (e.g. attaching a .eml file to an email)
419
- if msubtype == "RFC822"
420
- return BodyTypeMessage.new(mtype, msubtype, param, content_id,
421
- desc, enc, size, nil, nil, nil, nil,
422
- nil, nil, nil)
423
- else
424
- return BodyTypeExtension.new(mtype, msubtype,
425
- param, content_id,
426
- desc, enc, size)
427
- end
428
- end
429
-
430
- match(T_SPACE)
431
- env = envelope
432
- match(T_SPACE)
433
- b = body
434
- match(T_SPACE)
435
- lines = number
436
- md5, disposition, language, extension = body_ext_1part
437
- return BodyTypeMessage.new(mtype, msubtype,
438
- param, content_id,
439
- desc, enc, size,
440
- env, b, lines,
441
- md5, disposition, language, extension)
442
- end
443
-
444
- def body_type_attachment
445
- mtype = case_insensitive_string
446
- match(T_SPACE)
447
- param = body_fld_param
448
- return BodyTypeAttachment.new(mtype, nil, param)
449
- end
450
-
680
+ # n.b. "message/rfc822" type isn't enforced here
681
+ type = media_message
682
+ SP!; flds = body_fields
683
+
684
+ # Sometimes servers send body-type-basic when body-type-msg should be.
685
+ # E.g: when a message/rfc822 part has "Content-Disposition: attachment".
686
+ #
687
+ # * SP "(" --> SP envelope --> continue as body-type-msg
688
+ # * ")" --> no body-ext-1part --> completed body-type-basic
689
+ # * SP nstring --> SP body-fld-md5
690
+ # --> SP body-ext-1part --> continue as body-type-basic
691
+ #
692
+ # It's probably better to return BodyTypeBasic---even for
693
+ # "message/rfc822"---than BodyTypeMessage with invalid fields.
694
+ unless peek_str?(" (")
695
+ SP? and exts = body_ext_1part
696
+ return BodyTypeBasic.new(*type, *flds, *exts)
697
+ end
698
+
699
+ SP!; env = envelope
700
+ SP!; bdy = body
701
+ SP!; lines = body_fld_lines
702
+ SP? and exts = body_ext_1part
703
+ BodyTypeMessage.new(*type, *flds, env, bdy, lines, *exts)
704
+ end
705
+
706
+ # This is a malformed body-type-mpart with no subparts.
451
707
  def body_type_mixed
452
- mtype = "MULTIPART"
453
- msubtype = case_insensitive_string
454
- param, disposition, language, extension = body_ext_mpart
455
- return BodyTypeBasic.new(mtype, msubtype, param, nil, nil, nil, nil, nil, disposition, language, extension)
708
+ # warn "malformed body-type-mpart: multipart/mixed with no parts."
709
+ type = media_subtype # => "MIXED"
710
+ SP? and exts = body_ext_mpart
711
+ BodyTypeMultipart.new("MULTIPART", type, nil, *exts)
456
712
  end
457
713
 
714
+ # RFC-3501 & RFC-9051:
715
+ # body-type-mpart = 1*body SP media-subtype
716
+ # [SP body-ext-mpart]
458
717
  def body_type_mpart
459
- parts = []
460
- while true
461
- token = lookahead
462
- if token.symbol == T_SPACE
463
- shift_token
464
- break
465
- end
466
- parts.push(body)
467
- end
468
- mtype = "MULTIPART"
469
- msubtype = case_insensitive_string
470
- param, disposition, language, extension = body_ext_mpart
471
- return BodyTypeMultipart.new(mtype, msubtype, parts,
472
- param, disposition, language,
473
- extension)
718
+ parts = [body]; parts << body until SP?; msubtype = media_subtype
719
+ SP? and exts = body_ext_mpart
720
+ BodyTypeMultipart.new("MULTIPART", msubtype, parts, *exts)
474
721
  end
475
722
 
723
+ # n.b. this handles both type and subtype
724
+ #
725
+ # RFC-3501 vs RFC-9051:
726
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
727
+ # "MESSAGE" /
728
+ # "VIDEO") DQUOTE) / string) SP media-subtype
729
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
730
+ # "FONT" / "MESSAGE" / "MODEL" /
731
+ # "VIDEO") DQUOTE) / string) SP media-subtype
732
+ #
733
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
734
+ # DQUOTE "RFC822" DQUOTE
735
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
736
+ # DQUOTE ("RFC822" / "GLOBAL") DQUOTE
737
+ #
738
+ # RFC-3501 & RFC-9051:
739
+ # media-text = DQUOTE "TEXT" DQUOTE SP media-subtype
740
+ # media-subtype = string
476
741
  def media_type
477
- mtype = case_insensitive_string
478
- token = lookahead
479
- if token.symbol != T_SPACE
480
- return mtype, nil
481
- end
482
- match(T_SPACE)
483
- msubtype = case_insensitive_string
742
+ mtype = case_insensitive__string
743
+ SP? or return mtype, nil # ??? quirky!
744
+ msubtype = media_subtype
484
745
  return mtype, msubtype
485
746
  end
486
747
 
748
+ # TODO: check types
749
+ alias media_basic media_type # */* --- catchall
750
+ alias media_message media_type # message/rfc822, message/global
751
+ alias media_text media_type # text/*
752
+
753
+ alias media_subtype case_insensitive__string
754
+
755
+ # RFC-3501 & RFC-9051:
756
+ # body-fields = body-fld-param SP body-fld-id SP body-fld-desc SP
757
+ # body-fld-enc SP body-fld-octets
487
758
  def body_fields
488
- param = body_fld_param
489
- match(T_SPACE)
490
- content_id = nstring
491
- match(T_SPACE)
492
- desc = nstring
493
- match(T_SPACE)
494
- enc = case_insensitive_string
495
- match(T_SPACE)
496
- size = number
497
- return param, content_id, desc, enc, size
759
+ fields = []
760
+ fields << body_fld_param; SP!
761
+ fields << body_fld_id; SP!
762
+ fields << body_fld_desc; SP!
763
+ fields << body_fld_enc; SP!
764
+ fields << body_fld_octets
765
+ fields
498
766
  end
499
767
 
768
+ # RFC3501, RFC9051:
769
+ # body-fld-param = "(" string SP string *(SP string SP string) ")" / nil
500
770
  def body_fld_param
501
- token = lookahead
502
- if token.symbol == T_NIL
503
- shift_token
504
- return nil
505
- end
506
- match(T_LPAR)
771
+ return if NIL?
507
772
  param = {}
508
- while true
509
- token = lookahead
510
- case token.symbol
511
- when T_RPAR
512
- shift_token
513
- break
514
- when T_SPACE
515
- shift_token
516
- end
517
- name = case_insensitive_string
518
- match(T_SPACE)
519
- val = string
520
- param[name] = val
521
- end
522
- return param
523
- end
524
-
773
+ lpar
774
+ name = case_insensitive__string; SP!; param[name] = string
775
+ while SP?
776
+ name = case_insensitive__string; SP!; param[name] = string
777
+ end
778
+ rpar
779
+ param
780
+ end
781
+
782
+ # RFC2060
783
+ # body_ext_1part ::= body_fld_md5 [SPACE body_fld_dsp
784
+ # [SPACE body_fld_lang
785
+ # [SPACE 1#body_extension]]]
786
+ # ;; MUST NOT be returned on non-extensible
787
+ # ;; "BODY" fetch
788
+ # RFC3501 & RFC9051
789
+ # body-ext-1part = body-fld-md5 [SP body-fld-dsp [SP body-fld-lang
790
+ # [SP body-fld-loc *(SP body-extension)]]]
791
+ # ; MUST NOT be returned on non-extensible
792
+ # ; "BODY" fetch
525
793
  def body_ext_1part
526
- token = lookahead
527
- if token.symbol == T_SPACE
528
- shift_token
529
- else
530
- return nil
531
- end
532
- md5 = nstring
533
-
534
- token = lookahead
535
- if token.symbol == T_SPACE
536
- shift_token
537
- else
538
- return md5
539
- end
540
- disposition = body_fld_dsp
541
-
542
- token = lookahead
543
- if token.symbol == T_SPACE
544
- shift_token
545
- else
546
- return md5, disposition
547
- end
548
- language = body_fld_lang
549
-
550
- token = lookahead
551
- if token.symbol == T_SPACE
552
- shift_token
553
- else
554
- return md5, disposition, language
555
- end
556
-
557
- extension = body_extensions
558
- return md5, disposition, language, extension
559
- end
560
-
794
+ fields = []; fields << body_fld_md5
795
+ SP? or return fields; fields << body_fld_dsp
796
+ SP? or return fields; fields << body_fld_lang
797
+ SP? or return fields; fields << body_fld_loc
798
+ SP? or return fields; fields << body_extensions
799
+ fields
800
+ end
801
+
802
+ # RFC-2060:
803
+ # body_ext_mpart = body_fld_param [SP body_fld_dsp SP body_fld_lang
804
+ # [SP 1#body_extension]]
805
+ # ;; MUST NOT be returned on non-extensible
806
+ # ;; "BODY" fetch
807
+ # RFC-3501 & RFC-9051:
808
+ # body-ext-mpart = body-fld-param [SP body-fld-dsp [SP body-fld-lang
809
+ # [SP body-fld-loc *(SP body-extension)]]]
810
+ # ; MUST NOT be returned on non-extensible
811
+ # ; "BODY" fetch
561
812
  def body_ext_mpart
562
- token = lookahead
563
- if token.symbol == T_SPACE
564
- shift_token
565
- else
566
- return nil
567
- end
568
- param = body_fld_param
569
-
570
- token = lookahead
571
- if token.symbol == T_SPACE
572
- shift_token
573
- else
574
- return param
575
- end
576
- disposition = body_fld_dsp
577
-
578
- token = lookahead
579
- if token.symbol == T_SPACE
580
- shift_token
581
- else
582
- return param, disposition
583
- end
584
- language = body_fld_lang
585
-
586
- token = lookahead
587
- if token.symbol == T_SPACE
588
- shift_token
589
- else
590
- return param, disposition, language
591
- end
592
-
593
- extension = body_extensions
594
- return param, disposition, language, extension
595
- end
596
-
813
+ fields = []; fields << body_fld_param
814
+ SP? or return fields; fields << body_fld_dsp
815
+ SP? or return fields; fields << body_fld_lang
816
+ SP? or return fields; fields << body_fld_loc
817
+ SP? or return fields; fields << body_extensions
818
+ fields
819
+ end
820
+
821
+ alias body_fld_desc nstring
822
+ alias body_fld_id nstring
823
+ alias body_fld_loc nstring
824
+ alias body_fld_lines number64 # number in 3501, number64 in 9051
825
+ alias body_fld_md5 nstring
826
+ alias body_fld_octets number
827
+
828
+ # RFC-3501 & RFC-9051:
829
+ # body-fld-enc = (DQUOTE ("7BIT" / "8BIT" / "BINARY" / "BASE64"/
830
+ # "QUOTED-PRINTABLE") DQUOTE) / string
831
+ alias body_fld_enc case_insensitive__string
832
+
833
+ # body-fld-dsp = "(" string SP body-fld-param ")" / nil
597
834
  def body_fld_dsp
598
- token = lookahead
599
- if token.symbol == T_NIL
600
- shift_token
601
- return nil
602
- end
603
- match(T_LPAR)
604
- dsp_type = case_insensitive_string
605
- match(T_SPACE)
606
- param = body_fld_param
607
- match(T_RPAR)
608
- return ContentDisposition.new(dsp_type, param)
835
+ return if NIL?
836
+ lpar; dsp_type = case_insensitive__string
837
+ SP!; param = body_fld_param
838
+ rpar
839
+ ContentDisposition.new(dsp_type, param)
609
840
  end
610
841
 
842
+ # body-fld-lang = nstring / "(" string *(SP string) ")"
611
843
  def body_fld_lang
612
- token = lookahead
613
- if token.symbol == T_LPAR
614
- shift_token
615
- result = []
616
- while true
617
- token = lookahead
618
- case token.symbol
619
- when T_RPAR
620
- shift_token
621
- return result
622
- when T_SPACE
623
- shift_token
624
- end
625
- result.push(case_insensitive_string)
626
- end
844
+ if lpar?
845
+ result = [case_insensitive__string]
846
+ result << case_insensitive__string while SP?
847
+ result
627
848
  else
628
- lang = nstring
629
- if lang
630
- return lang.upcase
631
- else
632
- return lang
633
- end
849
+ case_insensitive__nstring
634
850
  end
635
851
  end
636
852
 
853
+ # body-extension *(SP body-extension)
637
854
  def body_extensions
638
855
  result = []
639
- while true
640
- token = lookahead
641
- case token.symbol
642
- when T_RPAR
643
- return result
644
- when T_SPACE
645
- shift_token
646
- end
647
- result.push(body_extension)
648
- end
856
+ result << body_extension; while SP? do result << body_extension end
857
+ result
649
858
  end
650
859
 
860
+ # body-extension = nstring / number / number64 /
861
+ # "(" body-extension *(SP body-extension) ")"
862
+ # ; Future expansion. Client implementations
863
+ # ; MUST accept body-extension fields. Server
864
+ # ; implementations MUST NOT generate
865
+ # ; body-extension fields except as defined by
866
+ # ; future Standard or Standards Track
867
+ # ; revisions of this specification.
651
868
  def body_extension
652
- token = lookahead
653
- case token.symbol
654
- when T_LPAR
655
- shift_token
656
- result = body_extensions
657
- match(T_RPAR)
658
- return result
659
- when T_NUMBER
660
- return number
661
- else
662
- return nstring
869
+ if (uint = number64?) then uint
870
+ elsif lpar? then exts = body_extensions; rpar; exts
871
+ else nstring
663
872
  end
664
873
  end
665
874
 
@@ -969,29 +1178,38 @@ module Net
969
1178
  return UntaggedResponse.new(name, data, @str)
970
1179
  end
971
1180
 
972
- def capability_response
973
- token = match(T_ATOM)
974
- name = token.value.upcase
975
- match(T_SPACE)
976
- UntaggedResponse.new(name, capability_data, @str)
1181
+ # The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.
1182
+ # The grammar rule is used by both response-data and resp-text-code.
1183
+ # But this method only returns UntaggedResponse (response-data).
1184
+ #
1185
+ # RFC3501:
1186
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1187
+ # *(SP capability)
1188
+ # RFC9051:
1189
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1190
+ # *(SP capability)
1191
+ def capability_data__untagged
1192
+ UntaggedResponse.new label("CAPABILITY"), capability__list, @str
977
1193
  end
978
1194
 
979
- def capability_data
980
- data = []
981
- while true
982
- token = lookahead
983
- case token.symbol
984
- when T_CRLF, T_RBRA
985
- break
986
- when T_SPACE
987
- shift_token
988
- next
989
- end
990
- data.push(atom.upcase)
991
- end
992
- data
1195
+ # enable-data = "ENABLED" *(SP capability)
1196
+ def enable_data
1197
+ UntaggedResponse.new label("ENABLED"), capability__list, @str
993
1198
  end
994
1199
 
1200
+ # As a workaround for buggy servers, allow a trailing SP:
1201
+ # *(SP capapility) [SP]
1202
+ def capability__list
1203
+ data = []; while _ = SP? && capability? do data << _ end; data
1204
+ end
1205
+
1206
+ # capability = ("AUTH=" auth-type) / atom
1207
+ # ; New capabilities MUST begin with "X" or be
1208
+ # ; registered with IANA as standard or
1209
+ # ; standards-track
1210
+ alias capability case_insensitive__atom
1211
+ alias capability? case_insensitive__atom?
1212
+
995
1213
  def id_response
996
1214
  token = match(T_ATOM)
997
1215
  name = token.value.upcase
@@ -1021,86 +1239,89 @@ module Net
1021
1239
  end
1022
1240
  end
1023
1241
 
1242
+ # namespace-response = "NAMESPACE" SP namespace
1243
+ # SP namespace SP namespace
1244
+ # ; The first Namespace is the Personal Namespace(s).
1245
+ # ; The second Namespace is the Other Users'
1246
+ # ; Namespace(s).
1247
+ # ; The third Namespace is the Shared Namespace(s).
1024
1248
  def namespace_response
1249
+ name = label("NAMESPACE")
1025
1250
  @lex_state = EXPR_DATA
1026
- token = lookahead
1027
- token = match(T_ATOM)
1028
- name = token.value.upcase
1029
- match(T_SPACE)
1030
- personal = namespaces
1031
- match(T_SPACE)
1032
- other = namespaces
1033
- match(T_SPACE)
1034
- shared = namespaces
1251
+ data = Namespaces.new((SP!; namespace),
1252
+ (SP!; namespace),
1253
+ (SP!; namespace))
1254
+ UntaggedResponse.new(name, data, @str)
1255
+ ensure
1035
1256
  @lex_state = EXPR_BEG
1036
- data = Namespaces.new(personal, other, shared)
1037
- return UntaggedResponse.new(name, data, @str)
1038
- end
1039
-
1040
- def namespaces
1041
- token = lookahead
1042
- # empty () is not allowed, so nil is functionally identical to empty.
1043
- data = []
1044
- if token.symbol == T_NIL
1045
- shift_token
1046
- else
1047
- match(T_LPAR)
1048
- loop do
1049
- data << namespace
1050
- break unless lookahead.symbol == T_SPACE
1051
- shift_token
1052
- end
1053
- match(T_RPAR)
1054
- end
1055
- data
1056
1257
  end
1057
1258
 
1259
+ # namespace = nil / "(" 1*namespace-descr ")"
1058
1260
  def namespace
1059
- match(T_LPAR)
1060
- prefix = match(T_QUOTED, T_LITERAL).value
1061
- match(T_SPACE)
1062
- delimiter = string
1261
+ NIL? and return []
1262
+ lpar
1263
+ list = [namespace_descr]
1264
+ list << namespace_descr until rpar?
1265
+ list
1266
+ end
1267
+
1268
+ # namespace-descr = "(" string SP
1269
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil)
1270
+ # [namespace-response-extensions] ")"
1271
+ def namespace_descr
1272
+ lpar
1273
+ prefix = string; SP!
1274
+ delimiter = nquoted # n.b: should only accept single char
1063
1275
  extensions = namespace_response_extensions
1064
- match(T_RPAR)
1276
+ rpar
1065
1277
  Namespace.new(prefix, delimiter, extensions)
1066
1278
  end
1067
1279
 
1280
+ # namespace-response-extensions = *namespace-response-extension
1281
+ # namespace-response-extension = SP string SP
1282
+ # "(" string *(SP string) ")"
1068
1283
  def namespace_response_extensions
1069
1284
  data = {}
1070
- token = lookahead
1071
- if token.symbol == T_SPACE
1072
- shift_token
1073
- name = match(T_QUOTED, T_LITERAL).value
1285
+ while SP?
1286
+ name = string; SP!
1287
+ lpar
1074
1288
  data[name] ||= []
1075
- match(T_SPACE)
1076
- match(T_LPAR)
1077
- loop do
1078
- data[name].push match(T_QUOTED, T_LITERAL).value
1079
- break unless lookahead.symbol == T_SPACE
1080
- shift_token
1081
- end
1082
- match(T_RPAR)
1289
+ data[name] << string
1290
+ data[name] << string while SP?
1291
+ rpar
1083
1292
  end
1084
1293
  data
1085
1294
  end
1086
1295
 
1087
- # text = 1*TEXT-CHAR
1088
- # TEXT-CHAR = <any CHAR except CR and LF>
1296
+ # TEXT-CHAR = <any CHAR except CR and LF>
1297
+ # RFC3501:
1298
+ # text = 1*TEXT-CHAR
1299
+ # RFC9051:
1300
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
1301
+ # ; Non-ASCII text can only be returned
1302
+ # ; after ENABLE IMAP4rev2 command
1089
1303
  def text
1090
- match(T_TEXT, lex_state: EXPR_TEXT).value
1304
+ match_re(TEXT_REGEXP, "text")[0].force_encoding("UTF-8")
1305
+ end
1306
+
1307
+ # an "accept" versiun of #text
1308
+ def text?
1309
+ accept_re(TEXT_REGEXP)&.[](0)&.force_encoding("UTF-8")
1091
1310
  end
1092
1311
 
1093
- # resp-text = ["[" resp-text-code "]" SP] text
1312
+ # RFC3501:
1313
+ # resp-text = ["[" resp-text-code "]" SP] text
1314
+ # RFC9051:
1315
+ # resp-text = ["[" resp-text-code "]" SP] [text]
1316
+ #
1317
+ # We leniently re-interpret this as
1318
+ # resp-text = ["[" resp-text-code "]" [SP [text]] / [text]
1094
1319
  def resp_text
1095
- token = match(T_LBRA, T_TEXT, lex_state: EXPR_RTEXT)
1096
- case token.symbol
1097
- when T_LBRA
1098
- code = resp_text_code
1099
- match(T_RBRA)
1100
- accept_space # violating RFC
1101
- ResponseText.new(code, text)
1102
- when T_TEXT
1103
- ResponseText.new(nil, token.value)
1320
+ if lbra?
1321
+ code = resp_text_code; rbra
1322
+ ResponseText.new(code, SP? && text? || "")
1323
+ else
1324
+ ResponseText.new(nil, text? || "")
1104
1325
  end
1105
1326
  end
1106
1327
 
@@ -1127,7 +1348,7 @@ module Net
1127
1348
  when /\A(?:BADCHARSET)\z/n
1128
1349
  result = ResponseCode.new(name, charset_list)
1129
1350
  when /\A(?:CAPABILITY)\z/ni
1130
- result = ResponseCode.new(name, capability_data)
1351
+ result = ResponseCode.new(name, capability__list)
1131
1352
  when /\A(?:PERMANENTFLAGS)\z/n
1132
1353
  match(T_SPACE)
1133
1354
  result = ResponseCode.new(name, flag_list)
@@ -1142,8 +1363,7 @@ module Net
1142
1363
  token = lookahead
1143
1364
  if token.symbol == T_SPACE
1144
1365
  shift_token
1145
- token = match(T_TEXT, lex_state: EXPR_CTEXT)
1146
- result = ResponseCode.new(name, token.value)
1366
+ result = ResponseCode.new(name, text_chars_except_rbra)
1147
1367
  else
1148
1368
  result = ResponseCode.new(name, nil)
1149
1369
  end
@@ -1151,6 +1371,11 @@ module Net
1151
1371
  return result
1152
1372
  end
1153
1373
 
1374
+ # 1*<any TEXT-CHAR except "]">
1375
+ def text_chars_except_rbra
1376
+ match_re(CTEXT_REGEXP, '1*<any TEXT-CHAR except "]">')[0]
1377
+ end
1378
+
1154
1379
  def charset_list
1155
1380
  result = []
1156
1381
  if accept(T_SPACE)
@@ -1232,9 +1457,7 @@ module Net
1232
1457
  mailbox = $3
1233
1458
  host = $4
1234
1459
  for s in [name, route, mailbox, host]
1235
- if s
1236
- s.gsub!(/\\(["\\])/n, "\\1")
1237
- end
1460
+ Patterns.unescape_quoted! s
1238
1461
  end
1239
1462
  else
1240
1463
  name = nstring
@@ -1268,84 +1491,6 @@ module Net
1268
1491
  end
1269
1492
  end
1270
1493
 
1271
- def nstring
1272
- token = lookahead
1273
- if token.symbol == T_NIL
1274
- shift_token
1275
- return nil
1276
- else
1277
- return string
1278
- end
1279
- end
1280
-
1281
- def astring
1282
- token = lookahead
1283
- if string_token?(token)
1284
- return string
1285
- else
1286
- return astring_chars
1287
- end
1288
- end
1289
-
1290
- def string
1291
- token = lookahead
1292
- if token.symbol == T_NIL
1293
- shift_token
1294
- return nil
1295
- end
1296
- token = match(T_QUOTED, T_LITERAL)
1297
- return token.value
1298
- end
1299
-
1300
- STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL]
1301
-
1302
- def string_token?(token)
1303
- return STRING_TOKENS.include?(token.symbol)
1304
- end
1305
-
1306
- def case_insensitive_string
1307
- token = lookahead
1308
- if token.symbol == T_NIL
1309
- shift_token
1310
- return nil
1311
- end
1312
- token = match(T_QUOTED, T_LITERAL)
1313
- return token.value.upcase
1314
- end
1315
-
1316
- # atom = 1*ATOM-CHAR
1317
- # ATOM-CHAR = <any CHAR except atom-specials>
1318
- ATOM_TOKENS = [
1319
- T_ATOM,
1320
- T_NUMBER,
1321
- T_NIL,
1322
- T_LBRA,
1323
- T_PLUS
1324
- ]
1325
-
1326
- def atom
1327
- -combine_adjacent(*ATOM_TOKENS)
1328
- end
1329
-
1330
- # ASTRING-CHAR = ATOM-CHAR / resp-specials
1331
- # resp-specials = "]"
1332
- ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA]
1333
-
1334
- def astring_chars
1335
- combine_adjacent(*ASTRING_CHARS_TOKENS)
1336
- end
1337
-
1338
- def combine_adjacent(*tokens)
1339
- result = "".b
1340
- while token = accept(*tokens)
1341
- result << token.value
1342
- end
1343
- if result.empty?
1344
- parse_error('unexpected token %s (expected %s)',
1345
- lookahead.symbol, args.join(" or "))
1346
- end
1347
- result
1348
- end
1349
1494
 
1350
1495
  # See https://www.rfc-editor.org/errata/rfc3501
1351
1496
  #
@@ -1358,16 +1503,6 @@ module Net
1358
1503
  end
1359
1504
  end
1360
1505
 
1361
- def number
1362
- token = lookahead
1363
- if token.symbol == T_NIL
1364
- shift_token
1365
- return nil
1366
- end
1367
- token = match(T_NUMBER)
1368
- return token.value.to_i
1369
- end
1370
-
1371
1506
  # RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
1372
1507
  # uid-set = (uniqueid / uid-range) *("," uid-set)
1373
1508
  # uid-range = (uniqueid ":" uniqueid)
@@ -1381,29 +1516,11 @@ module Net
1381
1516
  case token.symbol
1382
1517
  when T_NUMBER then [Integer(token.value)]
1383
1518
  when T_ATOM
1384
- entries = uid_set__ranges(token.value)
1385
- if (count = entries.sum(&:count)) > MAX_UID_SET_SIZE
1386
- parse_error("uid-set is too large: %d > 10k", count)
1387
- end
1388
- entries.flat_map(&:to_a)
1389
- end
1390
- end
1391
-
1392
- # returns an array of ranges
1393
- def uid_set__ranges(uidset)
1394
- entries = []
1395
- uidset.split(",") do |entry|
1396
- uids = entry.split(":", 2).map {|uid|
1397
- unless uid =~ /\A[1-9][0-9]*\z/
1398
- parse_error("invalid uid-set uid: %p", uid)
1399
- end
1400
- uid = Integer(uid)
1401
- NumValidator.ensure_nz_number(uid)
1402
- uid
1519
+ token.value.split(",").flat_map {|range|
1520
+ range = range.split(":").map {|uniqueid| Integer(uniqueid) }
1521
+ range.size == 1 ? range : Range.new(range.min, range.max).to_a
1403
1522
  }
1404
- entries << Range.new(*uids.minmax)
1405
1523
  end
1406
- entries
1407
1524
  end
1408
1525
 
1409
1526
  def nil_atom
@@ -1413,15 +1530,6 @@ module Net
1413
1530
 
1414
1531
  SPACES_REGEXP = /\G */n
1415
1532
 
1416
- # This advances @pos directly so it's safe before changing @lex_state.
1417
- def accept_space
1418
- if @token
1419
- shift_token if @token.symbol == T_SPACE
1420
- elsif @str[@pos] == " "
1421
- @pos += 1
1422
- end
1423
- end
1424
-
1425
1533
  # The RFC is very strict about this and usually we should be too.
1426
1534
  # But skipping spaces is usually a safe workaround for buggy servers.
1427
1535
  #
@@ -1433,46 +1541,6 @@ module Net
1433
1541
  end
1434
1542
  end
1435
1543
 
1436
- def match(*args, lex_state: @lex_state)
1437
- if @token && lex_state != @lex_state
1438
- parse_error("invalid lex_state change to %s with unconsumed token",
1439
- lex_state)
1440
- end
1441
- begin
1442
- @lex_state, original_lex_state = lex_state, @lex_state
1443
- token = lookahead
1444
- unless args.include?(token.symbol)
1445
- parse_error('unexpected token %s (expected %s)',
1446
- token.symbol.id2name,
1447
- args.collect {|i| i.id2name}.join(" or "))
1448
- end
1449
- shift_token
1450
- return token
1451
- ensure
1452
- @lex_state = original_lex_state
1453
- end
1454
- end
1455
-
1456
- # like match, but does not raise error on failure.
1457
- #
1458
- # returns and shifts token on successful match
1459
- # returns nil and leaves @token unshifted on no match
1460
- def accept(*args)
1461
- token = lookahead
1462
- if args.include?(token.symbol)
1463
- shift_token
1464
- token
1465
- end
1466
- end
1467
-
1468
- def lookahead
1469
- @token ||= next_token
1470
- end
1471
-
1472
- def shift_token
1473
- @token = nil
1474
- end
1475
-
1476
1544
  def next_token
1477
1545
  case @lex_state
1478
1546
  when EXPR_BEG
@@ -1480,39 +1548,42 @@ module Net
1480
1548
  @pos = $~.end(0)
1481
1549
  if $1
1482
1550
  return Token.new(T_SPACE, $+)
1483
- elsif $2
1484
- return Token.new(T_NIL, $+)
1551
+ elsif $2 && $6
1552
+ # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
1553
+ return Token.new(T_ATOM, $2)
1485
1554
  elsif $3
1486
- return Token.new(T_NUMBER, $+)
1555
+ return Token.new(T_NIL, $+)
1487
1556
  elsif $4
1488
- return Token.new(T_ATOM, $+)
1557
+ return Token.new(T_NUMBER, $+)
1489
1558
  elsif $5
1490
- return Token.new(T_QUOTED,
1491
- $+.gsub(/\\(["\\])/n, "\\1"))
1492
- elsif $6
1493
- return Token.new(T_LPAR, $+)
1559
+ return Token.new(T_PLUS, $+)
1494
1560
  elsif $7
1495
- return Token.new(T_RPAR, $+)
1561
+ # match ATOM, without a NUMBER, NIL, or PLUS prefix
1562
+ return Token.new(T_ATOM, $+)
1496
1563
  elsif $8
1497
- return Token.new(T_BSLASH, $+)
1564
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1498
1565
  elsif $9
1499
- return Token.new(T_STAR, $+)
1566
+ return Token.new(T_LPAR, $+)
1500
1567
  elsif $10
1501
- return Token.new(T_LBRA, $+)
1568
+ return Token.new(T_RPAR, $+)
1502
1569
  elsif $11
1503
- return Token.new(T_RBRA, $+)
1570
+ return Token.new(T_BSLASH, $+)
1504
1571
  elsif $12
1572
+ return Token.new(T_STAR, $+)
1573
+ elsif $13
1574
+ return Token.new(T_LBRA, $+)
1575
+ elsif $14
1576
+ return Token.new(T_RBRA, $+)
1577
+ elsif $15
1505
1578
  len = $+.to_i
1506
1579
  val = @str[@pos, len]
1507
1580
  @pos += len
1508
1581
  return Token.new(T_LITERAL, val)
1509
- elsif $13
1510
- return Token.new(T_PLUS, $+)
1511
- elsif $14
1582
+ elsif $16
1512
1583
  return Token.new(T_PERCENT, $+)
1513
- elsif $15
1584
+ elsif $17
1514
1585
  return Token.new(T_CRLF, $+)
1515
- elsif $16
1586
+ elsif $18
1516
1587
  return Token.new(T_EOF, $+)
1517
1588
  else
1518
1589
  parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")
@@ -1531,8 +1602,7 @@ module Net
1531
1602
  elsif $3
1532
1603
  return Token.new(T_NUMBER, $+)
1533
1604
  elsif $4
1534
- return Token.new(T_QUOTED,
1535
- $+.gsub(/\\(["\\])/n, "\\1"))
1605
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1536
1606
  elsif $5
1537
1607
  len = $+.to_i
1538
1608
  val = @str[@pos, len]
@@ -1549,63 +1619,11 @@ module Net
1549
1619
  @str.index(/\S*/n, @pos)
1550
1620
  parse_error("unknown token - %s", $&.dump)
1551
1621
  end
1552
- when EXPR_TEXT
1553
- if @str.index(TEXT_REGEXP, @pos)
1554
- @pos = $~.end(0)
1555
- if $1
1556
- return Token.new(T_TEXT, $+)
1557
- else
1558
- parse_error("[Net::IMAP BUG] TEXT_REGEXP is invalid")
1559
- end
1560
- else
1561
- @str.index(/\S*/n, @pos)
1562
- parse_error("unknown token - %s", $&.dump)
1563
- end
1564
- when EXPR_RTEXT
1565
- if @str.index(RTEXT_REGEXP, @pos)
1566
- @pos = $~.end(0)
1567
- if $1
1568
- return Token.new(T_LBRA, $+)
1569
- elsif $2
1570
- return Token.new(T_TEXT, $+)
1571
- else
1572
- parse_error("[Net::IMAP BUG] RTEXT_REGEXP is invalid")
1573
- end
1574
- else
1575
- @str.index(/\S*/n, @pos)
1576
- parse_error("unknown token - %s", $&.dump)
1577
- end
1578
- when EXPR_CTEXT
1579
- if @str.index(CTEXT_REGEXP, @pos)
1580
- @pos = $~.end(0)
1581
- if $1
1582
- return Token.new(T_TEXT, $+)
1583
- else
1584
- parse_error("[Net::IMAP BUG] CTEXT_REGEXP is invalid")
1585
- end
1586
- else
1587
- @str.index(/\S*/n, @pos) #/
1588
- parse_error("unknown token - %s", $&.dump)
1589
- end
1590
1622
  else
1591
1623
  parse_error("invalid @lex_state - %s", @lex_state.inspect)
1592
1624
  end
1593
1625
  end
1594
1626
 
1595
- def parse_error(fmt, *args)
1596
- if IMAP.debug
1597
- $stderr.printf("@str: %s\n", @str.dump)
1598
- $stderr.printf("@pos: %d\n", @pos)
1599
- $stderr.printf("@lex_state: %s\n", @lex_state)
1600
- if @token
1601
- $stderr.printf("@token.symbol: %s\n", @token.symbol)
1602
- $stderr.printf("@token.value: %s\n", @token.value.inspect)
1603
- end
1604
- end
1605
- raise ResponseParseError, format(fmt, *args)
1606
- end
1607
1627
  end
1608
-
1609
1628
  end
1610
-
1611
1629
  end