net-imap 0.3.7 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/pages.yml +46 -0
  3. data/.github/workflows/test.yml +5 -12
  4. data/Gemfile +1 -0
  5. data/README.md +15 -4
  6. data/Rakefile +0 -7
  7. data/benchmarks/generate_parser_benchmarks +52 -0
  8. data/benchmarks/parser.yml +578 -0
  9. data/benchmarks/stringprep.yml +1 -1
  10. data/lib/net/imap/authenticators.rb +26 -57
  11. data/lib/net/imap/command_data.rb +13 -6
  12. data/lib/net/imap/deprecated_client_options.rb +139 -0
  13. data/lib/net/imap/response_data.rb +46 -41
  14. data/lib/net/imap/response_parser/parser_utils.rb +230 -0
  15. data/lib/net/imap/response_parser.rb +665 -627
  16. data/lib/net/imap/sasl/anonymous_authenticator.rb +68 -0
  17. data/lib/net/imap/sasl/authenticators.rb +112 -0
  18. data/lib/net/imap/{authenticators/cram_md5.rb → sasl/cram_md5_authenticator.rb} +15 -9
  19. data/lib/net/imap/{authenticators/digest_md5.rb → sasl/digest_md5_authenticator.rb} +74 -21
  20. data/lib/net/imap/sasl/external_authenticator.rb +62 -0
  21. data/lib/net/imap/sasl/gs2_header.rb +80 -0
  22. data/lib/net/imap/{authenticators/login.rb → sasl/login_authenticator.rb} +19 -14
  23. data/lib/net/imap/sasl/oauthbearer_authenticator.rb +164 -0
  24. data/lib/net/imap/sasl/plain_authenticator.rb +93 -0
  25. data/lib/net/imap/sasl/scram_algorithm.rb +58 -0
  26. data/lib/net/imap/sasl/scram_authenticator.rb +278 -0
  27. data/lib/net/imap/sasl/stringprep.rb +6 -66
  28. data/lib/net/imap/sasl/xoauth2_authenticator.rb +88 -0
  29. data/lib/net/imap/sasl.rb +139 -44
  30. data/lib/net/imap/stringprep/nameprep.rb +70 -0
  31. data/lib/net/imap/stringprep/saslprep.rb +69 -0
  32. data/lib/net/imap/stringprep/saslprep_tables.rb +96 -0
  33. data/lib/net/imap/stringprep/tables.rb +146 -0
  34. data/lib/net/imap/stringprep/trace.rb +85 -0
  35. data/lib/net/imap/stringprep.rb +159 -0
  36. data/lib/net/imap.rb +967 -588
  37. data/net-imap.gemspec +1 -1
  38. data/rakelib/saslprep.rake +4 -4
  39. data/rakelib/string_prep_tables_generator.rb +82 -60
  40. metadata +26 -11
  41. data/lib/net/imap/authenticators/plain.rb +0 -41
  42. data/lib/net/imap/authenticators/xoauth2.rb +0 -20
  43. data/lib/net/imap/sasl/saslprep.rb +0 -55
  44. data/lib/net/imap/sasl/saslprep_tables.rb +0 -98
  45. data/lib/net/imap/sasl/stringprep_tables.rb +0 -153
@@ -1,12 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "errors"
4
+ require_relative "response_parser/parser_utils"
4
5
 
5
6
  module Net
6
7
  class IMAP < Protocol
7
8
 
8
9
  # Parses an \IMAP server response.
9
10
  class ResponseParser
11
+ include ParserUtils
12
+ extend ParserUtils::Generator
13
+
10
14
  # :call-seq: Net::IMAP::ResponseParser.new -> Net::IMAP::ResponseParser
11
15
  def initialize
12
16
  @str = nil
@@ -33,69 +37,347 @@ module Net
33
37
 
34
38
  # :stopdoc:
35
39
 
36
- EXPR_BEG = :EXPR_BEG
37
- EXPR_DATA = :EXPR_DATA
38
- EXPR_TEXT = :EXPR_TEXT
39
- EXPR_RTEXT = :EXPR_RTEXT
40
- EXPR_CTEXT = :EXPR_CTEXT
41
-
42
- T_SPACE = :SPACE
43
- T_NIL = :NIL
44
- T_NUMBER = :NUMBER
45
- T_ATOM = :ATOM
46
- T_QUOTED = :QUOTED
47
- T_LPAR = :LPAR
48
- T_RPAR = :RPAR
49
- T_BSLASH = :BSLASH
50
- T_STAR = :STAR
51
- T_LBRA = :LBRA
52
- T_RBRA = :RBRA
53
- T_LITERAL = :LITERAL
54
- T_PLUS = :PLUS
55
- T_PERCENT = :PERCENT
56
- T_CRLF = :CRLF
57
- T_EOF = :EOF
58
- T_TEXT = :TEXT
59
-
40
+ EXPR_BEG = :EXPR_BEG # the default, used in most places
41
+ EXPR_DATA = :EXPR_DATA # envelope, body(structure), namespaces
42
+
43
+ T_SPACE = :SPACE # atom special
44
+ T_ATOM = :ATOM # atom (subset of astring chars)
45
+ T_NIL = :NIL # subset of atom and label
46
+ T_NUMBER = :NUMBER # subset of atom
47
+ T_LBRA = :LBRA # subset of atom
48
+ T_PLUS = :PLUS # subset of atom; tag special
49
+ T_RBRA = :RBRA # atom special; resp_special; valid astring char
50
+ T_QUOTED = :QUOTED # starts/end with atom special
51
+ T_BSLASH = :BSLASH # atom special; quoted special
52
+ T_LPAR = :LPAR # atom special; paren list delimiter
53
+ T_RPAR = :RPAR # atom special; paren list delimiter
54
+ T_STAR = :STAR # atom special; list wildcard
55
+ T_PERCENT = :PERCENT # atom special; list wildcard
56
+ T_LITERAL = :LITERAL # starts with atom special
57
+ T_CRLF = :CRLF # atom special; text special; quoted special
58
+ T_TEXT = :TEXT # any char except CRLF
59
+ T_EOF = :EOF # end of response string
60
+
61
+ module Patterns
62
+
63
+ module CharClassSubtraction
64
+ refine Regexp do
65
+ def -(rhs); /[#{source}&&[^#{rhs.source}]]/n.freeze end
66
+ end
67
+ end
68
+ using CharClassSubtraction
69
+
70
+ # From RFC5234, "Augmented BNF for Syntax Specifications: ABNF"
71
+ # >>>
72
+ # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
73
+ # CHAR = %x01-7F
74
+ # CRLF = CR LF
75
+ # ; Internet standard newline
76
+ # CTL = %x00-1F / %x7F
77
+ # ; controls
78
+ # DIGIT = %x30-39
79
+ # ; 0-9
80
+ # DQUOTE = %x22
81
+ # ; " (Double Quote)
82
+ # HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
83
+ # OCTET = %x00-FF
84
+ # SP = %x20
85
+ module RFC5234
86
+ ALPHA = /[A-Za-z]/n
87
+ CHAR = /[\x01-\x7f]/n
88
+ CRLF = /\r\n/n
89
+ CTL = /[\x00-\x1F\x7F]/n
90
+ DIGIT = /\d/n
91
+ DQUOTE = /"/n
92
+ HEXDIG = /\h/
93
+ OCTET = /[\x00-\xFF]/n # not using /./m for embedding purposes
94
+ SP = / /n
95
+ end
96
+
97
+ # UTF-8, a transformation format of ISO 10646
98
+ # >>>
99
+ # UTF8-1 = %x00-7F
100
+ # UTF8-tail = %x80-BF
101
+ # UTF8-2 = %xC2-DF UTF8-tail
102
+ # UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
103
+ # %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
104
+ # UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
105
+ # %xF4 %x80-8F 2( UTF8-tail )
106
+ # UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
107
+ # UTF8-octets = *( UTF8-char )
108
+ #
109
+ # n.b. String * Integer is used for repetition, rather than /x{3}/,
110
+ # because ruby 3.2's linear-time cache-based optimization doesn't work
111
+ # with "bounded or fixed times repetition nesting in another repetition
112
+ # (e.g. /(a{2,3})*/). It is an implementation issue entirely, but we
113
+ # believe it is hard to support this case correctly."
114
+ # See https://bugs.ruby-lang.org/issues/19104
115
+ module RFC3629
116
+ UTF8_1 = /[\x00-\x7f]/n # aka ASCII 7bit
117
+ UTF8_TAIL = /[\x80-\xBF]/n
118
+ UTF8_2 = /[\xC2-\xDF]#{UTF8_TAIL}/n
119
+ UTF8_3 = Regexp.union(/\xE0[\xA0-\xBF]#{UTF8_TAIL}/n,
120
+ /\xED[\x80-\x9F]#{UTF8_TAIL}/n,
121
+ /[\xE1-\xEC]#{ UTF8_TAIL.source * 2}/n,
122
+ /[\xEE-\xEF]#{ UTF8_TAIL.source * 2}/n)
123
+ UTF8_4 = Regexp.union(/[\xF1-\xF3]#{ UTF8_TAIL.source * 3}/n,
124
+ /\xF0[\x90-\xBF]#{UTF8_TAIL.source * 2}/n,
125
+ /\xF4[\x80-\x8F]#{UTF8_TAIL.source * 2}/n)
126
+ UTF8_CHAR = Regexp.union(UTF8_1, UTF8_2, UTF8_3, UTF8_4)
127
+ UTF8_OCTETS = /#{UTF8_CHAR}*/n
128
+ end
129
+
130
+ include RFC5234
131
+ include RFC3629
132
+
133
+ # CHAR8 = %x01-ff
134
+ # ; any OCTET except NUL, %x00
135
+ CHAR8 = /[\x01-\xff]/n
136
+
137
+ # list-wildcards = "%" / "*"
138
+ LIST_WILDCARDS = /[%*]/n
139
+ # quoted-specials = DQUOTE / "\"
140
+ QUOTED_SPECIALS = /["\\]/n
141
+ # resp-specials = "]"
142
+ RESP_SPECIALS = /[\]]/n
143
+
144
+ # atomish = 1*<any ATOM-CHAR except "[">
145
+ # ; We use "atomish" for msg-att and section, in order
146
+ # ; to simplify "BODY[HEADER.FIELDS (foo bar)]".
147
+ #
148
+ # atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
149
+ # quoted-specials / resp-specials
150
+ # ATOM-CHAR = <any CHAR except atom-specials>
151
+ # atom = 1*ATOM-CHAR
152
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
153
+ # tag = 1*<any ASTRING-CHAR except "+">
154
+
155
+ ATOM_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\\]]/n
156
+ ASTRING_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\]/n
157
+
158
+ ASTRING_CHAR = CHAR - ASTRING_SPECIALS
159
+ ATOM_CHAR = CHAR - ATOM_SPECIALS
160
+
161
+ ATOM = /#{ATOM_CHAR}+/n
162
+ ASTRING_CHARS = /#{ASTRING_CHAR}+/n
163
+ ATOMISH = /#{ATOM_CHAR - /[\[]/ }+/
164
+ TAG = /#{ASTRING_CHAR - /[+]/ }+/
165
+
166
+ # TEXT-CHAR = <any CHAR except CR and LF>
167
+ TEXT_CHAR = CHAR - /[\r\n]/
168
+
169
+ # resp-text-code = ... / atom [SP 1*<any TEXT-CHAR except "]">]
170
+ CODE_TEXT_CHAR = TEXT_CHAR - RESP_SPECIALS
171
+ CODE_TEXT = /#{CODE_TEXT_CHAR}+/n
172
+
173
+ # RFC3501:
174
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
175
+ # "\" quoted-specials
176
+ # RFC9051:
177
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
178
+ # "\" quoted-specials / UTF8-2 / UTF8-3 / UTF8-4
179
+ # RFC3501 & RFC9051:
180
+ # quoted = DQUOTE *QUOTED-CHAR DQUOTE
181
+ QUOTED_CHAR_safe = TEXT_CHAR - QUOTED_SPECIALS
182
+ QUOTED_CHAR_esc = /\\#{QUOTED_SPECIALS}/n
183
+ QUOTED_CHAR_rev1 = Regexp.union(QUOTED_CHAR_safe, QUOTED_CHAR_esc)
184
+ QUOTED_CHAR_rev2 = Regexp.union(QUOTED_CHAR_rev1,
185
+ UTF8_2, UTF8_3, UTF8_4)
186
+ QUOTED_rev1 = /"(#{QUOTED_CHAR_rev1}*)"/n
187
+ QUOTED_rev2 = /"(#{QUOTED_CHAR_rev2}*)"/n
188
+
189
+ # RFC3501:
190
+ # text = 1*TEXT-CHAR
191
+ # RFC9051:
192
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
193
+ # ; Non-ASCII text can only be returned
194
+ # ; after ENABLE IMAP4rev2 command
195
+ TEXT_rev1 = /#{TEXT_CHAR}+/
196
+ TEXT_rev2 = /#{Regexp.union TEXT_CHAR, UTF8_2, UTF8_3, UTF8_4}+/
197
+
198
+ # RFC3501:
199
+ # literal = "{" number "}" CRLF *CHAR8
200
+ # ; Number represents the number of CHAR8s
201
+ # RFC9051:
202
+ # literal = "{" number64 ["+"] "}" CRLF *CHAR8
203
+ # ; <number64> represents the number of CHAR8s.
204
+ # ; A non-synchronizing literal is distinguished
205
+ # ; from a synchronizing literal by the presence of
206
+ # ; "+" before the closing "}".
207
+ # ; Non-synchronizing literals are not allowed when
208
+ # ; sent from server to the client.
209
+ LITERAL = /\{(\d+)\}\r\n/n
210
+
211
+ module_function
212
+
213
+ def unescape_quoted!(quoted)
214
+ quoted
215
+ &.gsub!(/\\(#{QUOTED_SPECIALS})/n, "\\1")
216
+ &.force_encoding("UTF-8")
217
+ end
218
+
219
+ def unescape_quoted(quoted)
220
+ quoted
221
+ &.gsub(/\\(#{QUOTED_SPECIALS})/n, "\\1")
222
+ &.force_encoding("UTF-8")
223
+ end
224
+
225
+ end
226
+
227
+ # the default, used in most places
60
228
  BEG_REGEXP = /\G(?:\
61
- (?# 1: SPACE )( +)|\
62
- (?# 2: NIL )(NIL)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
63
- (?# 3: NUMBER )(\d+)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
64
- (?# 4: ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+]+)|\
65
- (?# 5: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
66
- (?# 6: LPAR )(\()|\
67
- (?# 7: RPAR )(\))|\
68
- (?# 8: BSLASH )(\\)|\
69
- (?# 9: STAR )(\*)|\
70
- (?# 10: LBRA )(\[)|\
71
- (?# 11: RBRA )(\])|\
72
- (?# 12: LITERAL )\{(\d+)\}\r\n|\
73
- (?# 13: PLUS )(\+)|\
74
- (?# 14: PERCENT )(%)|\
75
- (?# 15: CRLF )(\r\n)|\
76
- (?# 16: EOF )(\z))/ni
77
-
229
+ (?# 1: SPACE )( )|\
230
+ (?# 2: ATOM prefixed with a compatible subtype)\
231
+ ((?:\
232
+ (?# 3: NIL )(NIL)|\
233
+ (?# 4: NUMBER )(\d+)|\
234
+ (?# 5: PLUS )(\+))\
235
+ (?# 6: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
236
+ (?# This enables greedy alternation without lookahead, in linear time.)\
237
+ )|\
238
+ (?# Also need to check for ATOM without a subtype prefix.)\
239
+ (?# 7: ATOM )(#{Patterns::ATOMISH})|\
240
+ (?# 8: QUOTED )#{Patterns::QUOTED_rev2}|\
241
+ (?# 9: LPAR )(\()|\
242
+ (?# 10: RPAR )(\))|\
243
+ (?# 11: BSLASH )(\\)|\
244
+ (?# 12: STAR )(\*)|\
245
+ (?# 13: LBRA )(\[)|\
246
+ (?# 14: RBRA )(\])|\
247
+ (?# 15: LITERAL )#{Patterns::LITERAL}|\
248
+ (?# 16: PERCENT )(%)|\
249
+ (?# 17: CRLF )(\r\n)|\
250
+ (?# 18: EOF )(\z))/ni
251
+
252
+ # envelope, body(structure), namespaces
78
253
  DATA_REGEXP = /\G(?:\
79
254
  (?# 1: SPACE )( )|\
80
255
  (?# 2: NIL )(NIL)|\
81
256
  (?# 3: NUMBER )(\d+)|\
82
- (?# 4: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
83
- (?# 5: LITERAL )\{(\d+)\}\r\n|\
257
+ (?# 4: QUOTED )#{Patterns::QUOTED_rev2}|\
258
+ (?# 5: LITERAL )#{Patterns::LITERAL}|\
84
259
  (?# 6: LPAR )(\()|\
85
260
  (?# 7: RPAR )(\)))/ni
86
261
 
87
- TEXT_REGEXP = /\G(?:\
88
- (?# 1: TEXT )([^\x00\r\n]*))/ni
262
+ # text, after 'resp-text-code "]"'
263
+ TEXT_REGEXP = /\G(#{Patterns::TEXT_rev2})/n
89
264
 
90
- RTEXT_REGEXP = /\G(?:\
91
- (?# 1: LBRA )(\[)|\
92
- (?# 2: TEXT )([^\x00\r\n]*))/ni
93
-
94
- CTEXT_REGEXP = /\G(?:\
95
- (?# 1: TEXT )([^\x00\r\n\]]*))/ni
265
+ # resp-text-code, after 'atom SP'
266
+ CTEXT_REGEXP = /\G(#{Patterns::CODE_TEXT})/n
96
267
 
97
268
  Token = Struct.new(:symbol, :value)
98
269
 
270
+ def_char_matchers :SP, " ", :T_SPACE
271
+
272
+ def_char_matchers :lpar, "(", :T_LPAR
273
+ def_char_matchers :rpar, ")", :T_RPAR
274
+
275
+ def_char_matchers :lbra, "[", :T_LBRA
276
+ def_char_matchers :rbra, "]", :T_RBRA
277
+
278
+ # valid number ranges are not enforced by parser
279
+ # number = 1*DIGIT
280
+ # ; Unsigned 32-bit integer
281
+ # ; (0 <= n < 4,294,967,296)
282
+ def_token_matchers :number, T_NUMBER, coerce: Integer
283
+
284
+ def_token_matchers :quoted, T_QUOTED
285
+
286
+ # string = quoted / literal
287
+ def_token_matchers :string, T_QUOTED, T_LITERAL
288
+
289
+ # use where string represents "LABEL" values
290
+ def_token_matchers :case_insensitive__string,
291
+ T_QUOTED, T_LITERAL,
292
+ send: :upcase
293
+
294
+ # n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
295
+ # NIL? returns nil when it does *not* match
296
+ def_token_matchers :NIL, T_NIL
297
+
298
+ # In addition to explicitly uses of +tagged-ext-label+, use this to match
299
+ # keywords when the grammar has not provided any extension syntax.
300
+ #
301
+ # Do *not* use this for labels where the grammar specifies extensions
302
+ # can be +atom+, even if all currently defined labels would match. For
303
+ # example response codes in +resp-text-code+.
304
+ #
305
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
306
+ # ; Is a valid RFC 3501 "atom".
307
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
308
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
309
+ #
310
+ # TODO: add to lexer and only match tagged-ext-label
311
+ def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase
312
+
313
+ # atom = 1*ATOM-CHAR
314
+ # ATOM-CHAR = <any CHAR except atom-specials>
315
+ ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
316
+
317
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
318
+ # resp-specials = "]"
319
+ ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
320
+
321
+ ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze
322
+
323
+ # atom = 1*ATOM-CHAR
324
+ #
325
+ # TODO: match atom entirely by regexp (in the "lexer")
326
+ def atom; -combine_adjacent(*ATOM_TOKENS) end
327
+
328
+ # the #accept version of #atom
329
+ def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
330
+
331
+ # Returns <tt>atom.upcase</tt>
332
+ def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
333
+
334
+ # Returns <tt>atom?&.upcase</tt>
335
+ def case_insensitive__atom?
336
+ -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
337
+ end
338
+
339
+ # TODO: handle astring_chars entirely inside the lexer
340
+ def astring_chars
341
+ combine_adjacent(*ASTRING_CHARS_TOKENS)
342
+ end
343
+
344
+ # astring = 1*ASTRING-CHAR / string
345
+ def astring
346
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
347
+ end
348
+
349
+ def astring?
350
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
351
+ end
352
+
353
+ # Use #label or #label_in to assert specific known labels
354
+ # (+tagged-ext-label+ only, not +atom+).
355
+ def label(word)
356
+ (val = tagged_ext_label) == word and return val
357
+ parse_error("unexpected atom %p, expected %p instead", val, word)
358
+ end
359
+
360
+ # nstring = string / nil
361
+ def nstring
362
+ NIL? ? nil : string
363
+ end
364
+
365
+ def nquoted
366
+ NIL? ? nil : quoted
367
+ end
368
+
369
+ # use where nstring represents "LABEL" values
370
+ def case_insensitive__nstring
371
+ NIL? ? nil : case_insensitive__string
372
+ end
373
+
374
+ # valid number ranges are not enforced by parser
375
+ # number64 = 1*DIGIT
376
+ # ; Unsigned 63-bit integer
377
+ # ; (0 <= n <= 9,223,372,036,854,775,807)
378
+ alias number64 number
379
+ alias number64? number?
380
+
99
381
  def response
100
382
  token = lookahead
101
383
  case token.symbol
@@ -157,9 +439,11 @@ module Net
157
439
  when /\A(?:STATUS)\z/ni
158
440
  return status_response
159
441
  when /\A(?:CAPABILITY)\z/ni
160
- return capability_response
442
+ return capability_data__untagged
161
443
  when /\A(?:NOOP)\z/ni
162
444
  return ignored_response
445
+ when /\A(?:ENABLED)\z/ni
446
+ return enable_data
163
447
  else
164
448
  return text_response
165
449
  end
@@ -333,331 +617,258 @@ module Net
333
617
  return name, data
334
618
  end
335
619
 
620
+ # RFC-3501 & RFC-9051:
621
+ # body = "(" (body-type-1part / body-type-mpart) ")"
336
622
  def body
337
623
  @lex_state = EXPR_DATA
338
- token = lookahead
339
- if token.symbol == T_NIL
340
- shift_token
341
- result = nil
342
- else
343
- match(T_LPAR)
344
- token = lookahead
345
- if token.symbol == T_LPAR
346
- result = body_type_mpart
347
- else
348
- result = body_type_1part
349
- end
350
- match(T_RPAR)
351
- end
624
+ lpar; result = peek_lpar? ? body_type_mpart : body_type_1part; rpar
625
+ result
626
+ ensure
352
627
  @lex_state = EXPR_BEG
353
- return result
354
628
  end
629
+ alias lookahead_body? lookahead_lpar?
355
630
 
631
+ # RFC-3501 & RFC9051:
632
+ # body-type-1part = (body-type-basic / body-type-msg / body-type-text)
633
+ # [SP body-ext-1part]
356
634
  def body_type_1part
357
- token = lookahead
358
- case token.value
359
- when /\A(?:TEXT)\z/ni
360
- return body_type_text
361
- when /\A(?:MESSAGE)\z/ni
362
- return body_type_msg
363
- when /\A(?:ATTACHMENT)\z/ni
364
- return body_type_attachment
365
- when /\A(?:MIXED)\z/ni
366
- return body_type_mixed
367
- else
368
- return body_type_basic
369
- end
370
- end
371
-
635
+ # This regexp peek is a performance optimization.
636
+ # The lookahead fallback would work fine too.
637
+ m = peek_re(/\G(?:
638
+ (?<TEXT> "TEXT" \s "[^"]+" )
639
+ |(?<MESSAGE> "MESSAGE" \s "(?:RFC822|GLOBAL)" )
640
+ |(?<BASIC> "[^"]+" \s "[^"]+" )
641
+ |(?<MIXED> "MIXED" )
642
+ )/nix)
643
+ choice = m&.named_captures&.compact&.keys&.first
644
+ # In practice, the following line should never be used. But the ABNF
645
+ # *does* allow literals, and this will handle them.
646
+ choice ||= lookahead_case_insensitive__string!
647
+ case choice
648
+ when "BASIC" then body_type_basic # => BodyTypeBasic
649
+ when "MESSAGE" then body_type_msg # => BodyTypeMessage | BodyTypeBasic
650
+ when "TEXT" then body_type_text # => BodyTypeText
651
+ when "MIXED" then body_type_mixed # => BodyTypeMultipart (server bug)
652
+ else body_type_basic # might be a bug; server's or ours?
653
+ end
654
+ end
655
+
656
+ # RFC-3501 & RFC9051:
657
+ # body-type-basic = media-basic SP body-fields
372
658
  def body_type_basic
373
- mtype, msubtype = media_type
374
- token = lookahead
375
- if token.symbol == T_RPAR
376
- return BodyTypeBasic.new(mtype, msubtype)
377
- end
378
- match(T_SPACE)
379
- param, content_id, desc, enc, size = body_fields
380
- md5, disposition, language, extension = body_ext_1part
381
- return BodyTypeBasic.new(mtype, msubtype,
382
- param, content_id,
383
- desc, enc, size,
384
- md5, disposition, language, extension)
659
+ type = media_basic # n.b. "basic" type isn't enforced here
660
+ if lookahead_rpar? then return BodyTypeBasic.new(*type) end # invalid
661
+ SP!; flds = body_fields
662
+ SP? and exts = body_ext_1part
663
+ BodyTypeBasic.new(*type, *flds, *exts)
385
664
  end
386
665
 
666
+ # RFC-3501 & RFC-9051:
667
+ # body-type-text = media-text SP body-fields SP body-fld-lines
387
668
  def body_type_text
388
- mtype, msubtype = media_type
389
- match(T_SPACE)
390
- param, content_id, desc, enc, size = body_fields
391
- match(T_SPACE)
392
- lines = number
393
- md5, disposition, language, extension = body_ext_1part
394
- return BodyTypeText.new(mtype, msubtype,
395
- param, content_id,
396
- desc, enc, size,
397
- lines,
398
- md5, disposition, language, extension)
669
+ type = media_text
670
+ SP!; flds = body_fields
671
+ SP!; lines = body_fld_lines
672
+ SP? and exts = body_ext_1part
673
+ BodyTypeText.new(*type, *flds, lines, *exts)
399
674
  end
400
675
 
676
+ # RFC-3501 & RFC-9051:
677
+ # body-type-msg = media-message SP body-fields SP envelope
678
+ # SP body SP body-fld-lines
401
679
  def body_type_msg
402
- mtype, msubtype = media_type
403
- match(T_SPACE)
404
- param, content_id, desc, enc, size = body_fields
405
-
406
- token = lookahead
407
- if token.symbol == T_RPAR
408
- # If this is not message/rfc822, we shouldn't apply the RFC822
409
- # spec to it. We should handle anything other than
410
- # message/rfc822 using multipart extension data [rfc3501] (i.e.
411
- # the data itself won't be returned, we would have to retrieve it
412
- # with BODYSTRUCTURE instead of with BODY
413
-
414
- # Also, sometimes a message/rfc822 is included as a large
415
- # attachment instead of having all of the other details
416
- # (e.g. attaching a .eml file to an email)
417
- if msubtype == "RFC822"
418
- return BodyTypeMessage.new(mtype, msubtype, param, content_id,
419
- desc, enc, size, nil, nil, nil, nil,
420
- nil, nil, nil)
421
- else
422
- return BodyTypeExtension.new(mtype, msubtype,
423
- param, content_id,
424
- desc, enc, size)
425
- end
426
- end
427
-
428
- match(T_SPACE)
429
- env = envelope
430
- match(T_SPACE)
431
- b = body
432
- match(T_SPACE)
433
- lines = number
434
- md5, disposition, language, extension = body_ext_1part
435
- return BodyTypeMessage.new(mtype, msubtype,
436
- param, content_id,
437
- desc, enc, size,
438
- env, b, lines,
439
- md5, disposition, language, extension)
440
- end
441
-
442
- def body_type_attachment
443
- mtype = case_insensitive_string
444
- match(T_SPACE)
445
- param = body_fld_param
446
- return BodyTypeAttachment.new(mtype, nil, param)
447
- end
448
-
680
+ # n.b. "message/rfc822" type isn't enforced here
681
+ type = media_message
682
+ SP!; flds = body_fields
683
+
684
+ # Sometimes servers send body-type-basic when body-type-msg should be.
685
+ # E.g: when a message/rfc822 part has "Content-Disposition: attachment".
686
+ #
687
+ # * SP "(" --> SP envelope --> continue as body-type-msg
688
+ # * ")" --> no body-ext-1part --> completed body-type-basic
689
+ # * SP nstring --> SP body-fld-md5
690
+ # --> SP body-ext-1part --> continue as body-type-basic
691
+ #
692
+ # It's probably better to return BodyTypeBasic---even for
693
+ # "message/rfc822"---than BodyTypeMessage with invalid fields.
694
+ unless peek_str?(" (")
695
+ SP? and exts = body_ext_1part
696
+ return BodyTypeBasic.new(*type, *flds, *exts)
697
+ end
698
+
699
+ SP!; env = envelope
700
+ SP!; bdy = body
701
+ SP!; lines = body_fld_lines
702
+ SP? and exts = body_ext_1part
703
+ BodyTypeMessage.new(*type, *flds, env, bdy, lines, *exts)
704
+ end
705
+
706
+ # This is a malformed body-type-mpart with no subparts.
449
707
  def body_type_mixed
450
- mtype = "MULTIPART"
451
- msubtype = case_insensitive_string
452
- param, disposition, language, extension = body_ext_mpart
453
- return BodyTypeBasic.new(mtype, msubtype, param, nil, nil, nil, nil, nil, disposition, language, extension)
708
+ # warn "malformed body-type-mpart: multipart/mixed with no parts."
709
+ type = media_subtype # => "MIXED"
710
+ SP? and exts = body_ext_mpart
711
+ BodyTypeMultipart.new("MULTIPART", type, nil, *exts)
454
712
  end
455
713
 
714
+ # RFC-3501 & RFC-9051:
715
+ # body-type-mpart = 1*body SP media-subtype
716
+ # [SP body-ext-mpart]
456
717
  def body_type_mpart
457
- parts = []
458
- while true
459
- token = lookahead
460
- if token.symbol == T_SPACE
461
- shift_token
462
- break
463
- end
464
- parts.push(body)
465
- end
466
- mtype = "MULTIPART"
467
- msubtype = case_insensitive_string
468
- param, disposition, language, extension = body_ext_mpart
469
- return BodyTypeMultipart.new(mtype, msubtype, parts,
470
- param, disposition, language,
471
- extension)
718
+ parts = [body]; parts << body until SP?; msubtype = media_subtype
719
+ SP? and exts = body_ext_mpart
720
+ BodyTypeMultipart.new("MULTIPART", msubtype, parts, *exts)
472
721
  end
473
722
 
723
+ # n.b. this handles both type and subtype
724
+ #
725
+ # RFC-3501 vs RFC-9051:
726
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
727
+ # "MESSAGE" /
728
+ # "VIDEO") DQUOTE) / string) SP media-subtype
729
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
730
+ # "FONT" / "MESSAGE" / "MODEL" /
731
+ # "VIDEO") DQUOTE) / string) SP media-subtype
732
+ #
733
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
734
+ # DQUOTE "RFC822" DQUOTE
735
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
736
+ # DQUOTE ("RFC822" / "GLOBAL") DQUOTE
737
+ #
738
+ # RFC-3501 & RFC-9051:
739
+ # media-text = DQUOTE "TEXT" DQUOTE SP media-subtype
740
+ # media-subtype = string
474
741
  def media_type
475
- mtype = case_insensitive_string
476
- token = lookahead
477
- if token.symbol != T_SPACE
478
- return mtype, nil
479
- end
480
- match(T_SPACE)
481
- msubtype = case_insensitive_string
742
+ mtype = case_insensitive__string
743
+ SP? or return mtype, nil # ??? quirky!
744
+ msubtype = media_subtype
482
745
  return mtype, msubtype
483
746
  end
484
747
 
748
+ # TODO: check types
749
+ alias media_basic media_type # */* --- catchall
750
+ alias media_message media_type # message/rfc822, message/global
751
+ alias media_text media_type # text/*
752
+
753
+ alias media_subtype case_insensitive__string
754
+
755
+ # RFC-3501 & RFC-9051:
756
+ # body-fields = body-fld-param SP body-fld-id SP body-fld-desc SP
757
+ # body-fld-enc SP body-fld-octets
485
758
  def body_fields
486
- param = body_fld_param
487
- match(T_SPACE)
488
- content_id = nstring
489
- match(T_SPACE)
490
- desc = nstring
491
- match(T_SPACE)
492
- enc = case_insensitive_string
493
- match(T_SPACE)
494
- size = number
495
- return param, content_id, desc, enc, size
759
+ fields = []
760
+ fields << body_fld_param; SP!
761
+ fields << body_fld_id; SP!
762
+ fields << body_fld_desc; SP!
763
+ fields << body_fld_enc; SP!
764
+ fields << body_fld_octets
765
+ fields
496
766
  end
497
767
 
768
+ # RFC3501, RFC9051:
769
+ # body-fld-param = "(" string SP string *(SP string SP string) ")" / nil
498
770
  def body_fld_param
499
- token = lookahead
500
- if token.symbol == T_NIL
501
- shift_token
502
- return nil
503
- end
504
- match(T_LPAR)
771
+ return if NIL?
505
772
  param = {}
506
- while true
507
- token = lookahead
508
- case token.symbol
509
- when T_RPAR
510
- shift_token
511
- break
512
- when T_SPACE
513
- shift_token
514
- end
515
- name = case_insensitive_string
516
- match(T_SPACE)
517
- val = string
518
- param[name] = val
519
- end
520
- return param
521
- end
522
-
773
+ lpar
774
+ name = case_insensitive__string; SP!; param[name] = string
775
+ while SP?
776
+ name = case_insensitive__string; SP!; param[name] = string
777
+ end
778
+ rpar
779
+ param
780
+ end
781
+
782
+ # RFC2060
783
+ # body_ext_1part ::= body_fld_md5 [SPACE body_fld_dsp
784
+ # [SPACE body_fld_lang
785
+ # [SPACE 1#body_extension]]]
786
+ # ;; MUST NOT be returned on non-extensible
787
+ # ;; "BODY" fetch
788
+ # RFC3501 & RFC9051
789
+ # body-ext-1part = body-fld-md5 [SP body-fld-dsp [SP body-fld-lang
790
+ # [SP body-fld-loc *(SP body-extension)]]]
791
+ # ; MUST NOT be returned on non-extensible
792
+ # ; "BODY" fetch
523
793
  def body_ext_1part
524
- token = lookahead
525
- if token.symbol == T_SPACE
526
- shift_token
527
- else
528
- return nil
529
- end
530
- md5 = nstring
531
-
532
- token = lookahead
533
- if token.symbol == T_SPACE
534
- shift_token
535
- else
536
- return md5
537
- end
538
- disposition = body_fld_dsp
539
-
540
- token = lookahead
541
- if token.symbol == T_SPACE
542
- shift_token
543
- else
544
- return md5, disposition
545
- end
546
- language = body_fld_lang
547
-
548
- token = lookahead
549
- if token.symbol == T_SPACE
550
- shift_token
551
- else
552
- return md5, disposition, language
553
- end
554
-
555
- extension = body_extensions
556
- return md5, disposition, language, extension
557
- end
558
-
794
+ fields = []; fields << body_fld_md5
795
+ SP? or return fields; fields << body_fld_dsp
796
+ SP? or return fields; fields << body_fld_lang
797
+ SP? or return fields; fields << body_fld_loc
798
+ SP? or return fields; fields << body_extensions
799
+ fields
800
+ end
801
+
802
+ # RFC-2060:
803
+ # body_ext_mpart = body_fld_param [SP body_fld_dsp SP body_fld_lang
804
+ # [SP 1#body_extension]]
805
+ # ;; MUST NOT be returned on non-extensible
806
+ # ;; "BODY" fetch
807
+ # RFC-3501 & RFC-9051:
808
+ # body-ext-mpart = body-fld-param [SP body-fld-dsp [SP body-fld-lang
809
+ # [SP body-fld-loc *(SP body-extension)]]]
810
+ # ; MUST NOT be returned on non-extensible
811
+ # ; "BODY" fetch
559
812
  def body_ext_mpart
560
- token = lookahead
561
- if token.symbol == T_SPACE
562
- shift_token
563
- else
564
- return nil
565
- end
566
- param = body_fld_param
567
-
568
- token = lookahead
569
- if token.symbol == T_SPACE
570
- shift_token
571
- else
572
- return param
573
- end
574
- disposition = body_fld_dsp
575
-
576
- token = lookahead
577
- if token.symbol == T_SPACE
578
- shift_token
579
- else
580
- return param, disposition
581
- end
582
- language = body_fld_lang
583
-
584
- token = lookahead
585
- if token.symbol == T_SPACE
586
- shift_token
587
- else
588
- return param, disposition, language
589
- end
590
-
591
- extension = body_extensions
592
- return param, disposition, language, extension
593
- end
594
-
813
+ fields = []; fields << body_fld_param
814
+ SP? or return fields; fields << body_fld_dsp
815
+ SP? or return fields; fields << body_fld_lang
816
+ SP? or return fields; fields << body_fld_loc
817
+ SP? or return fields; fields << body_extensions
818
+ fields
819
+ end
820
+
821
+ alias body_fld_desc nstring
822
+ alias body_fld_id nstring
823
+ alias body_fld_loc nstring
824
+ alias body_fld_lines number64 # number in 3501, number64 in 9051
825
+ alias body_fld_md5 nstring
826
+ alias body_fld_octets number
827
+
828
+ # RFC-3501 & RFC-9051:
829
+ # body-fld-enc = (DQUOTE ("7BIT" / "8BIT" / "BINARY" / "BASE64"/
830
+ # "QUOTED-PRINTABLE") DQUOTE) / string
831
+ alias body_fld_enc case_insensitive__string
832
+
833
+ # body-fld-dsp = "(" string SP body-fld-param ")" / nil
595
834
  def body_fld_dsp
596
- token = lookahead
597
- if token.symbol == T_NIL
598
- shift_token
599
- return nil
600
- end
601
- match(T_LPAR)
602
- dsp_type = case_insensitive_string
603
- match(T_SPACE)
604
- param = body_fld_param
605
- match(T_RPAR)
606
- return ContentDisposition.new(dsp_type, param)
835
+ return if NIL?
836
+ lpar; dsp_type = case_insensitive__string
837
+ SP!; param = body_fld_param
838
+ rpar
839
+ ContentDisposition.new(dsp_type, param)
607
840
  end
608
841
 
842
+ # body-fld-lang = nstring / "(" string *(SP string) ")"
609
843
  def body_fld_lang
610
- token = lookahead
611
- if token.symbol == T_LPAR
612
- shift_token
613
- result = []
614
- while true
615
- token = lookahead
616
- case token.symbol
617
- when T_RPAR
618
- shift_token
619
- return result
620
- when T_SPACE
621
- shift_token
622
- end
623
- result.push(case_insensitive_string)
624
- end
844
+ if lpar?
845
+ result = [case_insensitive__string]
846
+ result << case_insensitive__string while SP?
847
+ result
625
848
  else
626
- lang = nstring
627
- if lang
628
- return lang.upcase
629
- else
630
- return lang
631
- end
849
+ case_insensitive__nstring
632
850
  end
633
851
  end
634
852
 
853
+ # body-extension *(SP body-extension)
635
854
  def body_extensions
636
855
  result = []
637
- while true
638
- token = lookahead
639
- case token.symbol
640
- when T_RPAR
641
- return result
642
- when T_SPACE
643
- shift_token
644
- end
645
- result.push(body_extension)
646
- end
856
+ result << body_extension; while SP? do result << body_extension end
857
+ result
647
858
  end
648
859
 
860
+ # body-extension = nstring / number / number64 /
861
+ # "(" body-extension *(SP body-extension) ")"
862
+ # ; Future expansion. Client implementations
863
+ # ; MUST accept body-extension fields. Server
864
+ # ; implementations MUST NOT generate
865
+ # ; body-extension fields except as defined by
866
+ # ; future Standard or Standards Track
867
+ # ; revisions of this specification.
649
868
  def body_extension
650
- token = lookahead
651
- case token.symbol
652
- when T_LPAR
653
- shift_token
654
- result = body_extensions
655
- match(T_RPAR)
656
- return result
657
- when T_NUMBER
658
- return number
659
- else
660
- return nstring
869
+ if (uint = number64?) then uint
870
+ elsif lpar? then exts = body_extensions; rpar; exts
871
+ else nstring
661
872
  end
662
873
  end
663
874
 
@@ -967,29 +1178,38 @@ module Net
967
1178
  return UntaggedResponse.new(name, data, @str)
968
1179
  end
969
1180
 
970
- def capability_response
971
- token = match(T_ATOM)
972
- name = token.value.upcase
973
- match(T_SPACE)
974
- UntaggedResponse.new(name, capability_data, @str)
1181
+ # The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.
1182
+ # The grammar rule is used by both response-data and resp-text-code.
1183
+ # But this method only returns UntaggedResponse (response-data).
1184
+ #
1185
+ # RFC3501:
1186
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1187
+ # *(SP capability)
1188
+ # RFC9051:
1189
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1190
+ # *(SP capability)
1191
+ def capability_data__untagged
1192
+ UntaggedResponse.new label("CAPABILITY"), capability__list, @str
975
1193
  end
976
1194
 
977
- def capability_data
978
- data = []
979
- while true
980
- token = lookahead
981
- case token.symbol
982
- when T_CRLF, T_RBRA
983
- break
984
- when T_SPACE
985
- shift_token
986
- next
987
- end
988
- data.push(atom.upcase)
989
- end
990
- data
1195
+ # enable-data = "ENABLED" *(SP capability)
1196
+ def enable_data
1197
+ UntaggedResponse.new label("ENABLED"), capability__list, @str
1198
+ end
1199
+
1200
+ # As a workaround for buggy servers, allow a trailing SP:
1201
+ # *(SP capapility) [SP]
1202
+ def capability__list
1203
+ data = []; while _ = SP? && capability? do data << _ end; data
991
1204
  end
992
1205
 
1206
+ # capability = ("AUTH=" auth-type) / atom
1207
+ # ; New capabilities MUST begin with "X" or be
1208
+ # ; registered with IANA as standard or
1209
+ # ; standards-track
1210
+ alias capability case_insensitive__atom
1211
+ alias capability? case_insensitive__atom?
1212
+
993
1213
  def id_response
994
1214
  token = match(T_ATOM)
995
1215
  name = token.value.upcase
@@ -1019,86 +1239,89 @@ module Net
1019
1239
  end
1020
1240
  end
1021
1241
 
1242
+ # namespace-response = "NAMESPACE" SP namespace
1243
+ # SP namespace SP namespace
1244
+ # ; The first Namespace is the Personal Namespace(s).
1245
+ # ; The second Namespace is the Other Users'
1246
+ # ; Namespace(s).
1247
+ # ; The third Namespace is the Shared Namespace(s).
1022
1248
  def namespace_response
1249
+ name = label("NAMESPACE")
1023
1250
  @lex_state = EXPR_DATA
1024
- token = lookahead
1025
- token = match(T_ATOM)
1026
- name = token.value.upcase
1027
- match(T_SPACE)
1028
- personal = namespaces
1029
- match(T_SPACE)
1030
- other = namespaces
1031
- match(T_SPACE)
1032
- shared = namespaces
1251
+ data = Namespaces.new((SP!; namespace),
1252
+ (SP!; namespace),
1253
+ (SP!; namespace))
1254
+ UntaggedResponse.new(name, data, @str)
1255
+ ensure
1033
1256
  @lex_state = EXPR_BEG
1034
- data = Namespaces.new(personal, other, shared)
1035
- return UntaggedResponse.new(name, data, @str)
1036
- end
1037
-
1038
- def namespaces
1039
- token = lookahead
1040
- # empty () is not allowed, so nil is functionally identical to empty.
1041
- data = []
1042
- if token.symbol == T_NIL
1043
- shift_token
1044
- else
1045
- match(T_LPAR)
1046
- loop do
1047
- data << namespace
1048
- break unless lookahead.symbol == T_SPACE
1049
- shift_token
1050
- end
1051
- match(T_RPAR)
1052
- end
1053
- data
1054
1257
  end
1055
1258
 
1259
+ # namespace = nil / "(" 1*namespace-descr ")"
1056
1260
  def namespace
1057
- match(T_LPAR)
1058
- prefix = match(T_QUOTED, T_LITERAL).value
1059
- match(T_SPACE)
1060
- delimiter = string
1261
+ NIL? and return []
1262
+ lpar
1263
+ list = [namespace_descr]
1264
+ list << namespace_descr until rpar?
1265
+ list
1266
+ end
1267
+
1268
+ # namespace-descr = "(" string SP
1269
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil)
1270
+ # [namespace-response-extensions] ")"
1271
+ def namespace_descr
1272
+ lpar
1273
+ prefix = string; SP!
1274
+ delimiter = nquoted # n.b: should only accept single char
1061
1275
  extensions = namespace_response_extensions
1062
- match(T_RPAR)
1276
+ rpar
1063
1277
  Namespace.new(prefix, delimiter, extensions)
1064
1278
  end
1065
1279
 
1280
+ # namespace-response-extensions = *namespace-response-extension
1281
+ # namespace-response-extension = SP string SP
1282
+ # "(" string *(SP string) ")"
1066
1283
  def namespace_response_extensions
1067
1284
  data = {}
1068
- token = lookahead
1069
- if token.symbol == T_SPACE
1070
- shift_token
1071
- name = match(T_QUOTED, T_LITERAL).value
1285
+ while SP?
1286
+ name = string; SP!
1287
+ lpar
1072
1288
  data[name] ||= []
1073
- match(T_SPACE)
1074
- match(T_LPAR)
1075
- loop do
1076
- data[name].push match(T_QUOTED, T_LITERAL).value
1077
- break unless lookahead.symbol == T_SPACE
1078
- shift_token
1079
- end
1080
- match(T_RPAR)
1289
+ data[name] << string
1290
+ data[name] << string while SP?
1291
+ rpar
1081
1292
  end
1082
1293
  data
1083
1294
  end
1084
1295
 
1085
- # text = 1*TEXT-CHAR
1086
- # TEXT-CHAR = <any CHAR except CR and LF>
1296
+ # TEXT-CHAR = <any CHAR except CR and LF>
1297
+ # RFC3501:
1298
+ # text = 1*TEXT-CHAR
1299
+ # RFC9051:
1300
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
1301
+ # ; Non-ASCII text can only be returned
1302
+ # ; after ENABLE IMAP4rev2 command
1087
1303
  def text
1088
- match(T_TEXT, lex_state: EXPR_TEXT).value
1304
+ match_re(TEXT_REGEXP, "text")[0].force_encoding("UTF-8")
1305
+ end
1306
+
1307
+ # an "accept" versiun of #text
1308
+ def text?
1309
+ accept_re(TEXT_REGEXP)&.[](0)&.force_encoding("UTF-8")
1089
1310
  end
1090
1311
 
1091
- # resp-text = ["[" resp-text-code "]" SP] text
1312
+ # RFC3501:
1313
+ # resp-text = ["[" resp-text-code "]" SP] text
1314
+ # RFC9051:
1315
+ # resp-text = ["[" resp-text-code "]" SP] [text]
1316
+ #
1317
+ # We leniently re-interpret this as
1318
+ # resp-text = ["[" resp-text-code "]" [SP [text]] / [text]
1092
1319
  def resp_text
1093
- token = match(T_LBRA, T_TEXT, lex_state: EXPR_RTEXT)
1094
- case token.symbol
1095
- when T_LBRA
1096
- code = resp_text_code
1097
- match(T_RBRA)
1098
- accept_space # violating RFC
1099
- ResponseText.new(code, text)
1100
- when T_TEXT
1101
- ResponseText.new(nil, token.value)
1320
+ if lbra?
1321
+ code = resp_text_code; rbra
1322
+ ResponseText.new(code, SP? && text? || "")
1323
+ else
1324
+ ResponseText.new(nil, text? || "")
1102
1325
  end
1103
1326
  end
1104
1327
 
@@ -1125,7 +1348,7 @@ module Net
1125
1348
  when /\A(?:BADCHARSET)\z/n
1126
1349
  result = ResponseCode.new(name, charset_list)
1127
1350
  when /\A(?:CAPABILITY)\z/ni
1128
- result = ResponseCode.new(name, capability_data)
1351
+ result = ResponseCode.new(name, capability__list)
1129
1352
  when /\A(?:PERMANENTFLAGS)\z/n
1130
1353
  match(T_SPACE)
1131
1354
  result = ResponseCode.new(name, flag_list)
@@ -1140,8 +1363,7 @@ module Net
1140
1363
  token = lookahead
1141
1364
  if token.symbol == T_SPACE
1142
1365
  shift_token
1143
- token = match(T_TEXT, lex_state: EXPR_CTEXT)
1144
- result = ResponseCode.new(name, token.value)
1366
+ result = ResponseCode.new(name, text_chars_except_rbra)
1145
1367
  else
1146
1368
  result = ResponseCode.new(name, nil)
1147
1369
  end
@@ -1149,6 +1371,11 @@ module Net
1149
1371
  return result
1150
1372
  end
1151
1373
 
1374
+ # 1*<any TEXT-CHAR except "]">
1375
+ def text_chars_except_rbra
1376
+ match_re(CTEXT_REGEXP, '1*<any TEXT-CHAR except "]">')[0]
1377
+ end
1378
+
1152
1379
  def charset_list
1153
1380
  result = []
1154
1381
  if accept(T_SPACE)
@@ -1230,9 +1457,7 @@ module Net
1230
1457
  mailbox = $3
1231
1458
  host = $4
1232
1459
  for s in [name, route, mailbox, host]
1233
- if s
1234
- s.gsub!(/\\(["\\])/n, "\\1")
1235
- end
1460
+ Patterns.unescape_quoted! s
1236
1461
  end
1237
1462
  else
1238
1463
  name = nstring
@@ -1266,84 +1491,6 @@ module Net
1266
1491
  end
1267
1492
  end
1268
1493
 
1269
- def nstring
1270
- token = lookahead
1271
- if token.symbol == T_NIL
1272
- shift_token
1273
- return nil
1274
- else
1275
- return string
1276
- end
1277
- end
1278
-
1279
- def astring
1280
- token = lookahead
1281
- if string_token?(token)
1282
- return string
1283
- else
1284
- return astring_chars
1285
- end
1286
- end
1287
-
1288
- def string
1289
- token = lookahead
1290
- if token.symbol == T_NIL
1291
- shift_token
1292
- return nil
1293
- end
1294
- token = match(T_QUOTED, T_LITERAL)
1295
- return token.value
1296
- end
1297
-
1298
- STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL]
1299
-
1300
- def string_token?(token)
1301
- return STRING_TOKENS.include?(token.symbol)
1302
- end
1303
-
1304
- def case_insensitive_string
1305
- token = lookahead
1306
- if token.symbol == T_NIL
1307
- shift_token
1308
- return nil
1309
- end
1310
- token = match(T_QUOTED, T_LITERAL)
1311
- return token.value.upcase
1312
- end
1313
-
1314
- # atom = 1*ATOM-CHAR
1315
- # ATOM-CHAR = <any CHAR except atom-specials>
1316
- ATOM_TOKENS = [
1317
- T_ATOM,
1318
- T_NUMBER,
1319
- T_NIL,
1320
- T_LBRA,
1321
- T_PLUS
1322
- ]
1323
-
1324
- def atom
1325
- -combine_adjacent(*ATOM_TOKENS)
1326
- end
1327
-
1328
- # ASTRING-CHAR = ATOM-CHAR / resp-specials
1329
- # resp-specials = "]"
1330
- ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA]
1331
-
1332
- def astring_chars
1333
- combine_adjacent(*ASTRING_CHARS_TOKENS)
1334
- end
1335
-
1336
- def combine_adjacent(*tokens)
1337
- result = "".b
1338
- while token = accept(*tokens)
1339
- result << token.value
1340
- end
1341
- if result.empty?
1342
- parse_error('unexpected token %s (expected %s)',
1343
- lookahead.symbol, args.join(" or "))
1344
- end
1345
- result
1346
- end
1347
1494
 
1348
1495
  # See https://www.rfc-editor.org/errata/rfc3501
1349
1496
  #
@@ -1356,16 +1503,6 @@ module Net
1356
1503
  end
1357
1504
  end
1358
1505
 
1359
- def number
1360
- token = lookahead
1361
- if token.symbol == T_NIL
1362
- shift_token
1363
- return nil
1364
- end
1365
- token = match(T_NUMBER)
1366
- return token.value.to_i
1367
- end
1368
-
1369
1506
  # RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
1370
1507
  # uid-set = (uniqueid / uid-range) *("," uid-set)
1371
1508
  # uid-range = (uniqueid ":" uniqueid)
@@ -1393,15 +1530,6 @@ module Net
1393
1530
 
1394
1531
  SPACES_REGEXP = /\G */n
1395
1532
 
1396
- # This advances @pos directly so it's safe before changing @lex_state.
1397
- def accept_space
1398
- if @token
1399
- shift_token if @token.symbol == T_SPACE
1400
- elsif @str[@pos] == " "
1401
- @pos += 1
1402
- end
1403
- end
1404
-
1405
1533
  # The RFC is very strict about this and usually we should be too.
1406
1534
  # But skipping spaces is usually a safe workaround for buggy servers.
1407
1535
  #
@@ -1413,46 +1541,6 @@ module Net
1413
1541
  end
1414
1542
  end
1415
1543
 
1416
- def match(*args, lex_state: @lex_state)
1417
- if @token && lex_state != @lex_state
1418
- parse_error("invalid lex_state change to %s with unconsumed token",
1419
- lex_state)
1420
- end
1421
- begin
1422
- @lex_state, original_lex_state = lex_state, @lex_state
1423
- token = lookahead
1424
- unless args.include?(token.symbol)
1425
- parse_error('unexpected token %s (expected %s)',
1426
- token.symbol.id2name,
1427
- args.collect {|i| i.id2name}.join(" or "))
1428
- end
1429
- shift_token
1430
- return token
1431
- ensure
1432
- @lex_state = original_lex_state
1433
- end
1434
- end
1435
-
1436
- # like match, but does not raise error on failure.
1437
- #
1438
- # returns and shifts token on successful match
1439
- # returns nil and leaves @token unshifted on no match
1440
- def accept(*args)
1441
- token = lookahead
1442
- if args.include?(token.symbol)
1443
- shift_token
1444
- token
1445
- end
1446
- end
1447
-
1448
- def lookahead
1449
- @token ||= next_token
1450
- end
1451
-
1452
- def shift_token
1453
- @token = nil
1454
- end
1455
-
1456
1544
  def next_token
1457
1545
  case @lex_state
1458
1546
  when EXPR_BEG
@@ -1460,39 +1548,42 @@ module Net
1460
1548
  @pos = $~.end(0)
1461
1549
  if $1
1462
1550
  return Token.new(T_SPACE, $+)
1463
- elsif $2
1464
- return Token.new(T_NIL, $+)
1551
+ elsif $2 && $6
1552
+ # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
1553
+ return Token.new(T_ATOM, $2)
1465
1554
  elsif $3
1466
- return Token.new(T_NUMBER, $+)
1555
+ return Token.new(T_NIL, $+)
1467
1556
  elsif $4
1468
- return Token.new(T_ATOM, $+)
1557
+ return Token.new(T_NUMBER, $+)
1469
1558
  elsif $5
1470
- return Token.new(T_QUOTED,
1471
- $+.gsub(/\\(["\\])/n, "\\1"))
1472
- elsif $6
1473
- return Token.new(T_LPAR, $+)
1559
+ return Token.new(T_PLUS, $+)
1474
1560
  elsif $7
1475
- return Token.new(T_RPAR, $+)
1561
+ # match ATOM, without a NUMBER, NIL, or PLUS prefix
1562
+ return Token.new(T_ATOM, $+)
1476
1563
  elsif $8
1477
- return Token.new(T_BSLASH, $+)
1564
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1478
1565
  elsif $9
1479
- return Token.new(T_STAR, $+)
1566
+ return Token.new(T_LPAR, $+)
1480
1567
  elsif $10
1481
- return Token.new(T_LBRA, $+)
1568
+ return Token.new(T_RPAR, $+)
1482
1569
  elsif $11
1483
- return Token.new(T_RBRA, $+)
1570
+ return Token.new(T_BSLASH, $+)
1484
1571
  elsif $12
1572
+ return Token.new(T_STAR, $+)
1573
+ elsif $13
1574
+ return Token.new(T_LBRA, $+)
1575
+ elsif $14
1576
+ return Token.new(T_RBRA, $+)
1577
+ elsif $15
1485
1578
  len = $+.to_i
1486
1579
  val = @str[@pos, len]
1487
1580
  @pos += len
1488
1581
  return Token.new(T_LITERAL, val)
1489
- elsif $13
1490
- return Token.new(T_PLUS, $+)
1491
- elsif $14
1582
+ elsif $16
1492
1583
  return Token.new(T_PERCENT, $+)
1493
- elsif $15
1584
+ elsif $17
1494
1585
  return Token.new(T_CRLF, $+)
1495
- elsif $16
1586
+ elsif $18
1496
1587
  return Token.new(T_EOF, $+)
1497
1588
  else
1498
1589
  parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")
@@ -1511,8 +1602,7 @@ module Net
1511
1602
  elsif $3
1512
1603
  return Token.new(T_NUMBER, $+)
1513
1604
  elsif $4
1514
- return Token.new(T_QUOTED,
1515
- $+.gsub(/\\(["\\])/n, "\\1"))
1605
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1516
1606
  elsif $5
1517
1607
  len = $+.to_i
1518
1608
  val = @str[@pos, len]
@@ -1529,63 +1619,11 @@ module Net
1529
1619
  @str.index(/\S*/n, @pos)
1530
1620
  parse_error("unknown token - %s", $&.dump)
1531
1621
  end
1532
- when EXPR_TEXT
1533
- if @str.index(TEXT_REGEXP, @pos)
1534
- @pos = $~.end(0)
1535
- if $1
1536
- return Token.new(T_TEXT, $+)
1537
- else
1538
- parse_error("[Net::IMAP BUG] TEXT_REGEXP is invalid")
1539
- end
1540
- else
1541
- @str.index(/\S*/n, @pos)
1542
- parse_error("unknown token - %s", $&.dump)
1543
- end
1544
- when EXPR_RTEXT
1545
- if @str.index(RTEXT_REGEXP, @pos)
1546
- @pos = $~.end(0)
1547
- if $1
1548
- return Token.new(T_LBRA, $+)
1549
- elsif $2
1550
- return Token.new(T_TEXT, $+)
1551
- else
1552
- parse_error("[Net::IMAP BUG] RTEXT_REGEXP is invalid")
1553
- end
1554
- else
1555
- @str.index(/\S*/n, @pos)
1556
- parse_error("unknown token - %s", $&.dump)
1557
- end
1558
- when EXPR_CTEXT
1559
- if @str.index(CTEXT_REGEXP, @pos)
1560
- @pos = $~.end(0)
1561
- if $1
1562
- return Token.new(T_TEXT, $+)
1563
- else
1564
- parse_error("[Net::IMAP BUG] CTEXT_REGEXP is invalid")
1565
- end
1566
- else
1567
- @str.index(/\S*/n, @pos) #/
1568
- parse_error("unknown token - %s", $&.dump)
1569
- end
1570
1622
  else
1571
1623
  parse_error("invalid @lex_state - %s", @lex_state.inspect)
1572
1624
  end
1573
1625
  end
1574
1626
 
1575
- def parse_error(fmt, *args)
1576
- if IMAP.debug
1577
- $stderr.printf("@str: %s\n", @str.dump)
1578
- $stderr.printf("@pos: %d\n", @pos)
1579
- $stderr.printf("@lex_state: %s\n", @lex_state)
1580
- if @token
1581
- $stderr.printf("@token.symbol: %s\n", @token.symbol)
1582
- $stderr.printf("@token.value: %s\n", @token.value.inspect)
1583
- end
1584
- end
1585
- raise ResponseParseError, format(fmt, *args)
1586
- end
1587
1627
  end
1588
-
1589
1628
  end
1590
-
1591
1629
  end