net-imap 0.3.4 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of net-imap might be problematic. Click here for more details.

Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/pages.yml +46 -0
  3. data/.github/workflows/test.yml +12 -12
  4. data/Gemfile +1 -0
  5. data/README.md +15 -4
  6. data/Rakefile +0 -7
  7. data/benchmarks/generate_parser_benchmarks +52 -0
  8. data/benchmarks/parser.yml +578 -0
  9. data/benchmarks/stringprep.yml +1 -1
  10. data/lib/net/imap/authenticators.rb +26 -57
  11. data/lib/net/imap/command_data.rb +13 -6
  12. data/lib/net/imap/data_encoding.rb +3 -3
  13. data/lib/net/imap/deprecated_client_options.rb +139 -0
  14. data/lib/net/imap/response_data.rb +46 -41
  15. data/lib/net/imap/response_parser/parser_utils.rb +230 -0
  16. data/lib/net/imap/response_parser.rb +665 -627
  17. data/lib/net/imap/sasl/anonymous_authenticator.rb +68 -0
  18. data/lib/net/imap/sasl/authentication_exchange.rb +107 -0
  19. data/lib/net/imap/sasl/authenticators.rb +118 -0
  20. data/lib/net/imap/sasl/client_adapter.rb +72 -0
  21. data/lib/net/imap/{authenticators/cram_md5.rb → sasl/cram_md5_authenticator.rb} +15 -9
  22. data/lib/net/imap/sasl/digest_md5_authenticator.rb +168 -0
  23. data/lib/net/imap/sasl/external_authenticator.rb +62 -0
  24. data/lib/net/imap/sasl/gs2_header.rb +80 -0
  25. data/lib/net/imap/{authenticators/login.rb → sasl/login_authenticator.rb} +19 -14
  26. data/lib/net/imap/sasl/oauthbearer_authenticator.rb +164 -0
  27. data/lib/net/imap/sasl/plain_authenticator.rb +93 -0
  28. data/lib/net/imap/sasl/protocol_adapters.rb +45 -0
  29. data/lib/net/imap/sasl/scram_algorithm.rb +58 -0
  30. data/lib/net/imap/sasl/scram_authenticator.rb +278 -0
  31. data/lib/net/imap/sasl/stringprep.rb +6 -66
  32. data/lib/net/imap/sasl/xoauth2_authenticator.rb +88 -0
  33. data/lib/net/imap/sasl.rb +144 -43
  34. data/lib/net/imap/sasl_adapter.rb +21 -0
  35. data/lib/net/imap/stringprep/nameprep.rb +70 -0
  36. data/lib/net/imap/stringprep/saslprep.rb +69 -0
  37. data/lib/net/imap/stringprep/saslprep_tables.rb +96 -0
  38. data/lib/net/imap/stringprep/tables.rb +146 -0
  39. data/lib/net/imap/stringprep/trace.rb +85 -0
  40. data/lib/net/imap/stringprep.rb +159 -0
  41. data/lib/net/imap.rb +976 -590
  42. data/net-imap.gemspec +2 -2
  43. data/rakelib/saslprep.rake +4 -4
  44. data/rakelib/string_prep_tables_generator.rb +82 -60
  45. metadata +31 -12
  46. data/lib/net/imap/authenticators/digest_md5.rb +0 -115
  47. data/lib/net/imap/authenticators/plain.rb +0 -41
  48. data/lib/net/imap/authenticators/xoauth2.rb +0 -20
  49. data/lib/net/imap/sasl/saslprep.rb +0 -55
  50. data/lib/net/imap/sasl/saslprep_tables.rb +0 -98
  51. data/lib/net/imap/sasl/stringprep_tables.rb +0 -153
@@ -1,12 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "errors"
4
+ require_relative "response_parser/parser_utils"
4
5
 
5
6
  module Net
6
7
  class IMAP < Protocol
7
8
 
8
9
  # Parses an \IMAP server response.
9
10
  class ResponseParser
11
+ include ParserUtils
12
+ extend ParserUtils::Generator
13
+
10
14
  # :call-seq: Net::IMAP::ResponseParser.new -> Net::IMAP::ResponseParser
11
15
  def initialize
12
16
  @str = nil
@@ -33,69 +37,347 @@ module Net
33
37
 
34
38
  # :stopdoc:
35
39
 
36
- EXPR_BEG = :EXPR_BEG
37
- EXPR_DATA = :EXPR_DATA
38
- EXPR_TEXT = :EXPR_TEXT
39
- EXPR_RTEXT = :EXPR_RTEXT
40
- EXPR_CTEXT = :EXPR_CTEXT
41
-
42
- T_SPACE = :SPACE
43
- T_NIL = :NIL
44
- T_NUMBER = :NUMBER
45
- T_ATOM = :ATOM
46
- T_QUOTED = :QUOTED
47
- T_LPAR = :LPAR
48
- T_RPAR = :RPAR
49
- T_BSLASH = :BSLASH
50
- T_STAR = :STAR
51
- T_LBRA = :LBRA
52
- T_RBRA = :RBRA
53
- T_LITERAL = :LITERAL
54
- T_PLUS = :PLUS
55
- T_PERCENT = :PERCENT
56
- T_CRLF = :CRLF
57
- T_EOF = :EOF
58
- T_TEXT = :TEXT
59
-
40
+ EXPR_BEG = :EXPR_BEG # the default, used in most places
41
+ EXPR_DATA = :EXPR_DATA # envelope, body(structure), namespaces
42
+
43
+ T_SPACE = :SPACE # atom special
44
+ T_ATOM = :ATOM # atom (subset of astring chars)
45
+ T_NIL = :NIL # subset of atom and label
46
+ T_NUMBER = :NUMBER # subset of atom
47
+ T_LBRA = :LBRA # subset of atom
48
+ T_PLUS = :PLUS # subset of atom; tag special
49
+ T_RBRA = :RBRA # atom special; resp_special; valid astring char
50
+ T_QUOTED = :QUOTED # starts/end with atom special
51
+ T_BSLASH = :BSLASH # atom special; quoted special
52
+ T_LPAR = :LPAR # atom special; paren list delimiter
53
+ T_RPAR = :RPAR # atom special; paren list delimiter
54
+ T_STAR = :STAR # atom special; list wildcard
55
+ T_PERCENT = :PERCENT # atom special; list wildcard
56
+ T_LITERAL = :LITERAL # starts with atom special
57
+ T_CRLF = :CRLF # atom special; text special; quoted special
58
+ T_TEXT = :TEXT # any char except CRLF
59
+ T_EOF = :EOF # end of response string
60
+
61
+ module Patterns
62
+
63
+ module CharClassSubtraction
64
+ refine Regexp do
65
+ def -(rhs); /[#{source}&&[^#{rhs.source}]]/n.freeze end
66
+ end
67
+ end
68
+ using CharClassSubtraction
69
+
70
+ # From RFC5234, "Augmented BNF for Syntax Specifications: ABNF"
71
+ # >>>
72
+ # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z
73
+ # CHAR = %x01-7F
74
+ # CRLF = CR LF
75
+ # ; Internet standard newline
76
+ # CTL = %x00-1F / %x7F
77
+ # ; controls
78
+ # DIGIT = %x30-39
79
+ # ; 0-9
80
+ # DQUOTE = %x22
81
+ # ; " (Double Quote)
82
+ # HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
83
+ # OCTET = %x00-FF
84
+ # SP = %x20
85
+ module RFC5234
86
+ ALPHA = /[A-Za-z]/n
87
+ CHAR = /[\x01-\x7f]/n
88
+ CRLF = /\r\n/n
89
+ CTL = /[\x00-\x1F\x7F]/n
90
+ DIGIT = /\d/n
91
+ DQUOTE = /"/n
92
+ HEXDIG = /\h/
93
+ OCTET = /[\x00-\xFF]/n # not using /./m for embedding purposes
94
+ SP = / /n
95
+ end
96
+
97
+ # UTF-8, a transformation format of ISO 10646
98
+ # >>>
99
+ # UTF8-1 = %x00-7F
100
+ # UTF8-tail = %x80-BF
101
+ # UTF8-2 = %xC2-DF UTF8-tail
102
+ # UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
103
+ # %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
104
+ # UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
105
+ # %xF4 %x80-8F 2( UTF8-tail )
106
+ # UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4
107
+ # UTF8-octets = *( UTF8-char )
108
+ #
109
+ # n.b. String * Integer is used for repetition, rather than /x{3}/,
110
+ # because ruby 3.2's linear-time cache-based optimization doesn't work
111
+ # with "bounded or fixed times repetition nesting in another repetition
112
+ # (e.g. /(a{2,3})*/). It is an implementation issue entirely, but we
113
+ # believe it is hard to support this case correctly."
114
+ # See https://bugs.ruby-lang.org/issues/19104
115
+ module RFC3629
116
+ UTF8_1 = /[\x00-\x7f]/n # aka ASCII 7bit
117
+ UTF8_TAIL = /[\x80-\xBF]/n
118
+ UTF8_2 = /[\xC2-\xDF]#{UTF8_TAIL}/n
119
+ UTF8_3 = Regexp.union(/\xE0[\xA0-\xBF]#{UTF8_TAIL}/n,
120
+ /\xED[\x80-\x9F]#{UTF8_TAIL}/n,
121
+ /[\xE1-\xEC]#{ UTF8_TAIL.source * 2}/n,
122
+ /[\xEE-\xEF]#{ UTF8_TAIL.source * 2}/n)
123
+ UTF8_4 = Regexp.union(/[\xF1-\xF3]#{ UTF8_TAIL.source * 3}/n,
124
+ /\xF0[\x90-\xBF]#{UTF8_TAIL.source * 2}/n,
125
+ /\xF4[\x80-\x8F]#{UTF8_TAIL.source * 2}/n)
126
+ UTF8_CHAR = Regexp.union(UTF8_1, UTF8_2, UTF8_3, UTF8_4)
127
+ UTF8_OCTETS = /#{UTF8_CHAR}*/n
128
+ end
129
+
130
+ include RFC5234
131
+ include RFC3629
132
+
133
+ # CHAR8 = %x01-ff
134
+ # ; any OCTET except NUL, %x00
135
+ CHAR8 = /[\x01-\xff]/n
136
+
137
+ # list-wildcards = "%" / "*"
138
+ LIST_WILDCARDS = /[%*]/n
139
+ # quoted-specials = DQUOTE / "\"
140
+ QUOTED_SPECIALS = /["\\]/n
141
+ # resp-specials = "]"
142
+ RESP_SPECIALS = /[\]]/n
143
+
144
+ # atomish = 1*<any ATOM-CHAR except "[">
145
+ # ; We use "atomish" for msg-att and section, in order
146
+ # ; to simplify "BODY[HEADER.FIELDS (foo bar)]".
147
+ #
148
+ # atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards /
149
+ # quoted-specials / resp-specials
150
+ # ATOM-CHAR = <any CHAR except atom-specials>
151
+ # atom = 1*ATOM-CHAR
152
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
153
+ # tag = 1*<any ASTRING-CHAR except "+">
154
+
155
+ ATOM_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\\]]/n
156
+ ASTRING_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\]/n
157
+
158
+ ASTRING_CHAR = CHAR - ASTRING_SPECIALS
159
+ ATOM_CHAR = CHAR - ATOM_SPECIALS
160
+
161
+ ATOM = /#{ATOM_CHAR}+/n
162
+ ASTRING_CHARS = /#{ASTRING_CHAR}+/n
163
+ ATOMISH = /#{ATOM_CHAR - /[\[]/ }+/
164
+ TAG = /#{ASTRING_CHAR - /[+]/ }+/
165
+
166
+ # TEXT-CHAR = <any CHAR except CR and LF>
167
+ TEXT_CHAR = CHAR - /[\r\n]/
168
+
169
+ # resp-text-code = ... / atom [SP 1*<any TEXT-CHAR except "]">]
170
+ CODE_TEXT_CHAR = TEXT_CHAR - RESP_SPECIALS
171
+ CODE_TEXT = /#{CODE_TEXT_CHAR}+/n
172
+
173
+ # RFC3501:
174
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
175
+ # "\" quoted-specials
176
+ # RFC9051:
177
+ # QUOTED-CHAR = <any TEXT-CHAR except quoted-specials> /
178
+ # "\" quoted-specials / UTF8-2 / UTF8-3 / UTF8-4
179
+ # RFC3501 & RFC9051:
180
+ # quoted = DQUOTE *QUOTED-CHAR DQUOTE
181
+ QUOTED_CHAR_safe = TEXT_CHAR - QUOTED_SPECIALS
182
+ QUOTED_CHAR_esc = /\\#{QUOTED_SPECIALS}/n
183
+ QUOTED_CHAR_rev1 = Regexp.union(QUOTED_CHAR_safe, QUOTED_CHAR_esc)
184
+ QUOTED_CHAR_rev2 = Regexp.union(QUOTED_CHAR_rev1,
185
+ UTF8_2, UTF8_3, UTF8_4)
186
+ QUOTED_rev1 = /"(#{QUOTED_CHAR_rev1}*)"/n
187
+ QUOTED_rev2 = /"(#{QUOTED_CHAR_rev2}*)"/n
188
+
189
+ # RFC3501:
190
+ # text = 1*TEXT-CHAR
191
+ # RFC9051:
192
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
193
+ # ; Non-ASCII text can only be returned
194
+ # ; after ENABLE IMAP4rev2 command
195
+ TEXT_rev1 = /#{TEXT_CHAR}+/
196
+ TEXT_rev2 = /#{Regexp.union TEXT_CHAR, UTF8_2, UTF8_3, UTF8_4}+/
197
+
198
+ # RFC3501:
199
+ # literal = "{" number "}" CRLF *CHAR8
200
+ # ; Number represents the number of CHAR8s
201
+ # RFC9051:
202
+ # literal = "{" number64 ["+"] "}" CRLF *CHAR8
203
+ # ; <number64> represents the number of CHAR8s.
204
+ # ; A non-synchronizing literal is distinguished
205
+ # ; from a synchronizing literal by the presence of
206
+ # ; "+" before the closing "}".
207
+ # ; Non-synchronizing literals are not allowed when
208
+ # ; sent from server to the client.
209
+ LITERAL = /\{(\d+)\}\r\n/n
210
+
211
+ module_function
212
+
213
+ def unescape_quoted!(quoted)
214
+ quoted
215
+ &.gsub!(/\\(#{QUOTED_SPECIALS})/n, "\\1")
216
+ &.force_encoding("UTF-8")
217
+ end
218
+
219
+ def unescape_quoted(quoted)
220
+ quoted
221
+ &.gsub(/\\(#{QUOTED_SPECIALS})/n, "\\1")
222
+ &.force_encoding("UTF-8")
223
+ end
224
+
225
+ end
226
+
227
+ # the default, used in most places
60
228
  BEG_REGEXP = /\G(?:\
61
- (?# 1: SPACE )( +)|\
62
- (?# 2: NIL )(NIL)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
63
- (?# 3: NUMBER )(\d+)(?=[\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+])|\
64
- (?# 4: ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\\[\]+]+)|\
65
- (?# 5: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
66
- (?# 6: LPAR )(\()|\
67
- (?# 7: RPAR )(\))|\
68
- (?# 8: BSLASH )(\\)|\
69
- (?# 9: STAR )(\*)|\
70
- (?# 10: LBRA )(\[)|\
71
- (?# 11: RBRA )(\])|\
72
- (?# 12: LITERAL )\{(\d+)\}\r\n|\
73
- (?# 13: PLUS )(\+)|\
74
- (?# 14: PERCENT )(%)|\
75
- (?# 15: CRLF )(\r\n)|\
76
- (?# 16: EOF )(\z))/ni
77
-
229
+ (?# 1: SPACE )( )|\
230
+ (?# 2: ATOM prefixed with a compatible subtype)\
231
+ ((?:\
232
+ (?# 3: NIL )(NIL)|\
233
+ (?# 4: NUMBER )(\d+)|\
234
+ (?# 5: PLUS )(\+))\
235
+ (?# 6: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
236
+ (?# This enables greedy alternation without lookahead, in linear time.)\
237
+ )|\
238
+ (?# Also need to check for ATOM without a subtype prefix.)\
239
+ (?# 7: ATOM )(#{Patterns::ATOMISH})|\
240
+ (?# 8: QUOTED )#{Patterns::QUOTED_rev2}|\
241
+ (?# 9: LPAR )(\()|\
242
+ (?# 10: RPAR )(\))|\
243
+ (?# 11: BSLASH )(\\)|\
244
+ (?# 12: STAR )(\*)|\
245
+ (?# 13: LBRA )(\[)|\
246
+ (?# 14: RBRA )(\])|\
247
+ (?# 15: LITERAL )#{Patterns::LITERAL}|\
248
+ (?# 16: PERCENT )(%)|\
249
+ (?# 17: CRLF )(\r\n)|\
250
+ (?# 18: EOF )(\z))/ni
251
+
252
+ # envelope, body(structure), namespaces
78
253
  DATA_REGEXP = /\G(?:\
79
254
  (?# 1: SPACE )( )|\
80
255
  (?# 2: NIL )(NIL)|\
81
256
  (?# 3: NUMBER )(\d+)|\
82
- (?# 4: QUOTED )"((?:[^\x00\r\n"\\]|\\["\\])*)"|\
83
- (?# 5: LITERAL )\{(\d+)\}\r\n|\
257
+ (?# 4: QUOTED )#{Patterns::QUOTED_rev2}|\
258
+ (?# 5: LITERAL )#{Patterns::LITERAL}|\
84
259
  (?# 6: LPAR )(\()|\
85
260
  (?# 7: RPAR )(\)))/ni
86
261
 
87
- TEXT_REGEXP = /\G(?:\
88
- (?# 1: TEXT )([^\x00\r\n]*))/ni
262
+ # text, after 'resp-text-code "]"'
263
+ TEXT_REGEXP = /\G(#{Patterns::TEXT_rev2})/n
89
264
 
90
- RTEXT_REGEXP = /\G(?:\
91
- (?# 1: LBRA )(\[)|\
92
- (?# 2: TEXT )([^\x00\r\n]*))/ni
93
-
94
- CTEXT_REGEXP = /\G(?:\
95
- (?# 1: TEXT )([^\x00\r\n\]]*))/ni
265
+ # resp-text-code, after 'atom SP'
266
+ CTEXT_REGEXP = /\G(#{Patterns::CODE_TEXT})/n
96
267
 
97
268
  Token = Struct.new(:symbol, :value)
98
269
 
270
+ def_char_matchers :SP, " ", :T_SPACE
271
+
272
+ def_char_matchers :lpar, "(", :T_LPAR
273
+ def_char_matchers :rpar, ")", :T_RPAR
274
+
275
+ def_char_matchers :lbra, "[", :T_LBRA
276
+ def_char_matchers :rbra, "]", :T_RBRA
277
+
278
+ # valid number ranges are not enforced by parser
279
+ # number = 1*DIGIT
280
+ # ; Unsigned 32-bit integer
281
+ # ; (0 <= n < 4,294,967,296)
282
+ def_token_matchers :number, T_NUMBER, coerce: Integer
283
+
284
+ def_token_matchers :quoted, T_QUOTED
285
+
286
+ # string = quoted / literal
287
+ def_token_matchers :string, T_QUOTED, T_LITERAL
288
+
289
+ # use where string represents "LABEL" values
290
+ def_token_matchers :case_insensitive__string,
291
+ T_QUOTED, T_LITERAL,
292
+ send: :upcase
293
+
294
+ # n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success.
295
+ # NIL? returns nil when it does *not* match
296
+ def_token_matchers :NIL, T_NIL
297
+
298
+ # In addition to explicitly uses of +tagged-ext-label+, use this to match
299
+ # keywords when the grammar has not provided any extension syntax.
300
+ #
301
+ # Do *not* use this for labels where the grammar specifies extensions
302
+ # can be +atom+, even if all currently defined labels would match. For
303
+ # example response codes in +resp-text-code+.
304
+ #
305
+ # tagged-ext-label = tagged-label-fchar *tagged-label-char
306
+ # ; Is a valid RFC 3501 "atom".
307
+ # tagged-label-fchar = ALPHA / "-" / "_" / "."
308
+ # tagged-label-char = tagged-label-fchar / DIGIT / ":"
309
+ #
310
+ # TODO: add to lexer and only match tagged-ext-label
311
+ def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase
312
+
313
+ # atom = 1*ATOM-CHAR
314
+ # ATOM-CHAR = <any CHAR except atom-specials>
315
+ ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
316
+
317
+ # ASTRING-CHAR = ATOM-CHAR / resp-specials
318
+ # resp-specials = "]"
319
+ ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
320
+
321
+ ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze
322
+
323
+ # atom = 1*ATOM-CHAR
324
+ #
325
+ # TODO: match atom entirely by regexp (in the "lexer")
326
+ def atom; -combine_adjacent(*ATOM_TOKENS) end
327
+
328
+ # the #accept version of #atom
329
+ def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end
330
+
331
+ # Returns <tt>atom.upcase</tt>
332
+ def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end
333
+
334
+ # Returns <tt>atom?&.upcase</tt>
335
+ def case_insensitive__atom?
336
+ -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS)
337
+ end
338
+
339
+ # TODO: handle astring_chars entirely inside the lexer
340
+ def astring_chars
341
+ combine_adjacent(*ASTRING_CHARS_TOKENS)
342
+ end
343
+
344
+ # astring = 1*ASTRING-CHAR / string
345
+ def astring
346
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string
347
+ end
348
+
349
+ def astring?
350
+ lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string?
351
+ end
352
+
353
+ # Use #label or #label_in to assert specific known labels
354
+ # (+tagged-ext-label+ only, not +atom+).
355
+ def label(word)
356
+ (val = tagged_ext_label) == word and return val
357
+ parse_error("unexpected atom %p, expected %p instead", val, word)
358
+ end
359
+
360
+ # nstring = string / nil
361
+ def nstring
362
+ NIL? ? nil : string
363
+ end
364
+
365
+ def nquoted
366
+ NIL? ? nil : quoted
367
+ end
368
+
369
+ # use where nstring represents "LABEL" values
370
+ def case_insensitive__nstring
371
+ NIL? ? nil : case_insensitive__string
372
+ end
373
+
374
+ # valid number ranges are not enforced by parser
375
+ # number64 = 1*DIGIT
376
+ # ; Unsigned 63-bit integer
377
+ # ; (0 <= n <= 9,223,372,036,854,775,807)
378
+ alias number64 number
379
+ alias number64? number?
380
+
99
381
  def response
100
382
  token = lookahead
101
383
  case token.symbol
@@ -157,9 +439,11 @@ module Net
157
439
  when /\A(?:STATUS)\z/ni
158
440
  return status_response
159
441
  when /\A(?:CAPABILITY)\z/ni
160
- return capability_response
442
+ return capability_data__untagged
161
443
  when /\A(?:NOOP)\z/ni
162
444
  return ignored_response
445
+ when /\A(?:ENABLED)\z/ni
446
+ return enable_data
163
447
  else
164
448
  return text_response
165
449
  end
@@ -333,331 +617,258 @@ module Net
333
617
  return name, data
334
618
  end
335
619
 
620
+ # RFC-3501 & RFC-9051:
621
+ # body = "(" (body-type-1part / body-type-mpart) ")"
336
622
  def body
337
623
  @lex_state = EXPR_DATA
338
- token = lookahead
339
- if token.symbol == T_NIL
340
- shift_token
341
- result = nil
342
- else
343
- match(T_LPAR)
344
- token = lookahead
345
- if token.symbol == T_LPAR
346
- result = body_type_mpart
347
- else
348
- result = body_type_1part
349
- end
350
- match(T_RPAR)
351
- end
624
+ lpar; result = peek_lpar? ? body_type_mpart : body_type_1part; rpar
625
+ result
626
+ ensure
352
627
  @lex_state = EXPR_BEG
353
- return result
354
628
  end
629
+ alias lookahead_body? lookahead_lpar?
355
630
 
631
+ # RFC-3501 & RFC9051:
632
+ # body-type-1part = (body-type-basic / body-type-msg / body-type-text)
633
+ # [SP body-ext-1part]
356
634
  def body_type_1part
357
- token = lookahead
358
- case token.value
359
- when /\A(?:TEXT)\z/ni
360
- return body_type_text
361
- when /\A(?:MESSAGE)\z/ni
362
- return body_type_msg
363
- when /\A(?:ATTACHMENT)\z/ni
364
- return body_type_attachment
365
- when /\A(?:MIXED)\z/ni
366
- return body_type_mixed
367
- else
368
- return body_type_basic
369
- end
370
- end
371
-
635
+ # This regexp peek is a performance optimization.
636
+ # The lookahead fallback would work fine too.
637
+ m = peek_re(/\G(?:
638
+ (?<TEXT> "TEXT" \s "[^"]+" )
639
+ |(?<MESSAGE> "MESSAGE" \s "(?:RFC822|GLOBAL)" )
640
+ |(?<BASIC> "[^"]+" \s "[^"]+" )
641
+ |(?<MIXED> "MIXED" )
642
+ )/nix)
643
+ choice = m&.named_captures&.compact&.keys&.first
644
+ # In practice, the following line should never be used. But the ABNF
645
+ # *does* allow literals, and this will handle them.
646
+ choice ||= lookahead_case_insensitive__string!
647
+ case choice
648
+ when "BASIC" then body_type_basic # => BodyTypeBasic
649
+ when "MESSAGE" then body_type_msg # => BodyTypeMessage | BodyTypeBasic
650
+ when "TEXT" then body_type_text # => BodyTypeText
651
+ when "MIXED" then body_type_mixed # => BodyTypeMultipart (server bug)
652
+ else body_type_basic # might be a bug; server's or ours?
653
+ end
654
+ end
655
+
656
+ # RFC-3501 & RFC9051:
657
+ # body-type-basic = media-basic SP body-fields
372
658
  def body_type_basic
373
- mtype, msubtype = media_type
374
- token = lookahead
375
- if token.symbol == T_RPAR
376
- return BodyTypeBasic.new(mtype, msubtype)
377
- end
378
- match(T_SPACE)
379
- param, content_id, desc, enc, size = body_fields
380
- md5, disposition, language, extension = body_ext_1part
381
- return BodyTypeBasic.new(mtype, msubtype,
382
- param, content_id,
383
- desc, enc, size,
384
- md5, disposition, language, extension)
659
+ type = media_basic # n.b. "basic" type isn't enforced here
660
+ if lookahead_rpar? then return BodyTypeBasic.new(*type) end # invalid
661
+ SP!; flds = body_fields
662
+ SP? and exts = body_ext_1part
663
+ BodyTypeBasic.new(*type, *flds, *exts)
385
664
  end
386
665
 
666
+ # RFC-3501 & RFC-9051:
667
+ # body-type-text = media-text SP body-fields SP body-fld-lines
387
668
  def body_type_text
388
- mtype, msubtype = media_type
389
- match(T_SPACE)
390
- param, content_id, desc, enc, size = body_fields
391
- match(T_SPACE)
392
- lines = number
393
- md5, disposition, language, extension = body_ext_1part
394
- return BodyTypeText.new(mtype, msubtype,
395
- param, content_id,
396
- desc, enc, size,
397
- lines,
398
- md5, disposition, language, extension)
669
+ type = media_text
670
+ SP!; flds = body_fields
671
+ SP!; lines = body_fld_lines
672
+ SP? and exts = body_ext_1part
673
+ BodyTypeText.new(*type, *flds, lines, *exts)
399
674
  end
400
675
 
676
+ # RFC-3501 & RFC-9051:
677
+ # body-type-msg = media-message SP body-fields SP envelope
678
+ # SP body SP body-fld-lines
401
679
  def body_type_msg
402
- mtype, msubtype = media_type
403
- match(T_SPACE)
404
- param, content_id, desc, enc, size = body_fields
405
-
406
- token = lookahead
407
- if token.symbol == T_RPAR
408
- # If this is not message/rfc822, we shouldn't apply the RFC822
409
- # spec to it. We should handle anything other than
410
- # message/rfc822 using multipart extension data [rfc3501] (i.e.
411
- # the data itself won't be returned, we would have to retrieve it
412
- # with BODYSTRUCTURE instead of with BODY
413
-
414
- # Also, sometimes a message/rfc822 is included as a large
415
- # attachment instead of having all of the other details
416
- # (e.g. attaching a .eml file to an email)
417
- if msubtype == "RFC822"
418
- return BodyTypeMessage.new(mtype, msubtype, param, content_id,
419
- desc, enc, size, nil, nil, nil, nil,
420
- nil, nil, nil)
421
- else
422
- return BodyTypeExtension.new(mtype, msubtype,
423
- param, content_id,
424
- desc, enc, size)
425
- end
426
- end
427
-
428
- match(T_SPACE)
429
- env = envelope
430
- match(T_SPACE)
431
- b = body
432
- match(T_SPACE)
433
- lines = number
434
- md5, disposition, language, extension = body_ext_1part
435
- return BodyTypeMessage.new(mtype, msubtype,
436
- param, content_id,
437
- desc, enc, size,
438
- env, b, lines,
439
- md5, disposition, language, extension)
440
- end
441
-
442
- def body_type_attachment
443
- mtype = case_insensitive_string
444
- match(T_SPACE)
445
- param = body_fld_param
446
- return BodyTypeAttachment.new(mtype, nil, param)
447
- end
448
-
680
+ # n.b. "message/rfc822" type isn't enforced here
681
+ type = media_message
682
+ SP!; flds = body_fields
683
+
684
+ # Sometimes servers send body-type-basic when body-type-msg should be.
685
+ # E.g: when a message/rfc822 part has "Content-Disposition: attachment".
686
+ #
687
+ # * SP "(" --> SP envelope --> continue as body-type-msg
688
+ # * ")" --> no body-ext-1part --> completed body-type-basic
689
+ # * SP nstring --> SP body-fld-md5
690
+ # --> SP body-ext-1part --> continue as body-type-basic
691
+ #
692
+ # It's probably better to return BodyTypeBasic---even for
693
+ # "message/rfc822"---than BodyTypeMessage with invalid fields.
694
+ unless peek_str?(" (")
695
+ SP? and exts = body_ext_1part
696
+ return BodyTypeBasic.new(*type, *flds, *exts)
697
+ end
698
+
699
+ SP!; env = envelope
700
+ SP!; bdy = body
701
+ SP!; lines = body_fld_lines
702
+ SP? and exts = body_ext_1part
703
+ BodyTypeMessage.new(*type, *flds, env, bdy, lines, *exts)
704
+ end
705
+
706
+ # This is a malformed body-type-mpart with no subparts.
449
707
  def body_type_mixed
450
- mtype = "MULTIPART"
451
- msubtype = case_insensitive_string
452
- param, disposition, language, extension = body_ext_mpart
453
- return BodyTypeBasic.new(mtype, msubtype, param, nil, nil, nil, nil, nil, disposition, language, extension)
708
+ # warn "malformed body-type-mpart: multipart/mixed with no parts."
709
+ type = media_subtype # => "MIXED"
710
+ SP? and exts = body_ext_mpart
711
+ BodyTypeMultipart.new("MULTIPART", type, nil, *exts)
454
712
  end
455
713
 
714
+ # RFC-3501 & RFC-9051:
715
+ # body-type-mpart = 1*body SP media-subtype
716
+ # [SP body-ext-mpart]
456
717
  def body_type_mpart
457
- parts = []
458
- while true
459
- token = lookahead
460
- if token.symbol == T_SPACE
461
- shift_token
462
- break
463
- end
464
- parts.push(body)
465
- end
466
- mtype = "MULTIPART"
467
- msubtype = case_insensitive_string
468
- param, disposition, language, extension = body_ext_mpart
469
- return BodyTypeMultipart.new(mtype, msubtype, parts,
470
- param, disposition, language,
471
- extension)
718
+ parts = [body]; parts << body until SP?; msubtype = media_subtype
719
+ SP? and exts = body_ext_mpart
720
+ BodyTypeMultipart.new("MULTIPART", msubtype, parts, *exts)
472
721
  end
473
722
 
723
+ # n.b. this handles both type and subtype
724
+ #
725
+ # RFC-3501 vs RFC-9051:
726
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
727
+ # "MESSAGE" /
728
+ # "VIDEO") DQUOTE) / string) SP media-subtype
729
+ # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" /
730
+ # "FONT" / "MESSAGE" / "MODEL" /
731
+ # "VIDEO") DQUOTE) / string) SP media-subtype
732
+ #
733
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
734
+ # DQUOTE "RFC822" DQUOTE
735
+ # media-message = DQUOTE "MESSAGE" DQUOTE SP
736
+ # DQUOTE ("RFC822" / "GLOBAL") DQUOTE
737
+ #
738
+ # RFC-3501 & RFC-9051:
739
+ # media-text = DQUOTE "TEXT" DQUOTE SP media-subtype
740
+ # media-subtype = string
474
741
  def media_type
475
- mtype = case_insensitive_string
476
- token = lookahead
477
- if token.symbol != T_SPACE
478
- return mtype, nil
479
- end
480
- match(T_SPACE)
481
- msubtype = case_insensitive_string
742
+ mtype = case_insensitive__string
743
+ SP? or return mtype, nil # ??? quirky!
744
+ msubtype = media_subtype
482
745
  return mtype, msubtype
483
746
  end
484
747
 
748
+ # TODO: check types
749
+ alias media_basic media_type # */* --- catchall
750
+ alias media_message media_type # message/rfc822, message/global
751
+ alias media_text media_type # text/*
752
+
753
+ alias media_subtype case_insensitive__string
754
+
755
+ # RFC-3501 & RFC-9051:
756
+ # body-fields = body-fld-param SP body-fld-id SP body-fld-desc SP
757
+ # body-fld-enc SP body-fld-octets
485
758
  def body_fields
486
- param = body_fld_param
487
- match(T_SPACE)
488
- content_id = nstring
489
- match(T_SPACE)
490
- desc = nstring
491
- match(T_SPACE)
492
- enc = case_insensitive_string
493
- match(T_SPACE)
494
- size = number
495
- return param, content_id, desc, enc, size
759
+ fields = []
760
+ fields << body_fld_param; SP!
761
+ fields << body_fld_id; SP!
762
+ fields << body_fld_desc; SP!
763
+ fields << body_fld_enc; SP!
764
+ fields << body_fld_octets
765
+ fields
496
766
  end
497
767
 
768
+ # RFC3501, RFC9051:
769
+ # body-fld-param = "(" string SP string *(SP string SP string) ")" / nil
498
770
  def body_fld_param
499
- token = lookahead
500
- if token.symbol == T_NIL
501
- shift_token
502
- return nil
503
- end
504
- match(T_LPAR)
771
+ return if NIL?
505
772
  param = {}
506
- while true
507
- token = lookahead
508
- case token.symbol
509
- when T_RPAR
510
- shift_token
511
- break
512
- when T_SPACE
513
- shift_token
514
- end
515
- name = case_insensitive_string
516
- match(T_SPACE)
517
- val = string
518
- param[name] = val
519
- end
520
- return param
521
- end
522
-
773
+ lpar
774
+ name = case_insensitive__string; SP!; param[name] = string
775
+ while SP?
776
+ name = case_insensitive__string; SP!; param[name] = string
777
+ end
778
+ rpar
779
+ param
780
+ end
781
+
782
+ # RFC2060
783
+ # body_ext_1part ::= body_fld_md5 [SPACE body_fld_dsp
784
+ # [SPACE body_fld_lang
785
+ # [SPACE 1#body_extension]]]
786
+ # ;; MUST NOT be returned on non-extensible
787
+ # ;; "BODY" fetch
788
+ # RFC3501 & RFC9051
789
+ # body-ext-1part = body-fld-md5 [SP body-fld-dsp [SP body-fld-lang
790
+ # [SP body-fld-loc *(SP body-extension)]]]
791
+ # ; MUST NOT be returned on non-extensible
792
+ # ; "BODY" fetch
523
793
  def body_ext_1part
524
- token = lookahead
525
- if token.symbol == T_SPACE
526
- shift_token
527
- else
528
- return nil
529
- end
530
- md5 = nstring
531
-
532
- token = lookahead
533
- if token.symbol == T_SPACE
534
- shift_token
535
- else
536
- return md5
537
- end
538
- disposition = body_fld_dsp
539
-
540
- token = lookahead
541
- if token.symbol == T_SPACE
542
- shift_token
543
- else
544
- return md5, disposition
545
- end
546
- language = body_fld_lang
547
-
548
- token = lookahead
549
- if token.symbol == T_SPACE
550
- shift_token
551
- else
552
- return md5, disposition, language
553
- end
554
-
555
- extension = body_extensions
556
- return md5, disposition, language, extension
557
- end
558
-
794
+ fields = []; fields << body_fld_md5
795
+ SP? or return fields; fields << body_fld_dsp
796
+ SP? or return fields; fields << body_fld_lang
797
+ SP? or return fields; fields << body_fld_loc
798
+ SP? or return fields; fields << body_extensions
799
+ fields
800
+ end
801
+
802
+ # RFC-2060:
803
+ # body_ext_mpart = body_fld_param [SP body_fld_dsp SP body_fld_lang
804
+ # [SP 1#body_extension]]
805
+ # ;; MUST NOT be returned on non-extensible
806
+ # ;; "BODY" fetch
807
+ # RFC-3501 & RFC-9051:
808
+ # body-ext-mpart = body-fld-param [SP body-fld-dsp [SP body-fld-lang
809
+ # [SP body-fld-loc *(SP body-extension)]]]
810
+ # ; MUST NOT be returned on non-extensible
811
+ # ; "BODY" fetch
559
812
  def body_ext_mpart
560
- token = lookahead
561
- if token.symbol == T_SPACE
562
- shift_token
563
- else
564
- return nil
565
- end
566
- param = body_fld_param
567
-
568
- token = lookahead
569
- if token.symbol == T_SPACE
570
- shift_token
571
- else
572
- return param
573
- end
574
- disposition = body_fld_dsp
575
-
576
- token = lookahead
577
- if token.symbol == T_SPACE
578
- shift_token
579
- else
580
- return param, disposition
581
- end
582
- language = body_fld_lang
583
-
584
- token = lookahead
585
- if token.symbol == T_SPACE
586
- shift_token
587
- else
588
- return param, disposition, language
589
- end
590
-
591
- extension = body_extensions
592
- return param, disposition, language, extension
593
- end
594
-
813
+ fields = []; fields << body_fld_param
814
+ SP? or return fields; fields << body_fld_dsp
815
+ SP? or return fields; fields << body_fld_lang
816
+ SP? or return fields; fields << body_fld_loc
817
+ SP? or return fields; fields << body_extensions
818
+ fields
819
+ end
820
+
821
+ alias body_fld_desc nstring
822
+ alias body_fld_id nstring
823
+ alias body_fld_loc nstring
824
+ alias body_fld_lines number64 # number in 3501, number64 in 9051
825
+ alias body_fld_md5 nstring
826
+ alias body_fld_octets number
827
+
828
+ # RFC-3501 & RFC-9051:
829
+ # body-fld-enc = (DQUOTE ("7BIT" / "8BIT" / "BINARY" / "BASE64"/
830
+ # "QUOTED-PRINTABLE") DQUOTE) / string
831
+ alias body_fld_enc case_insensitive__string
832
+
833
+ # body-fld-dsp = "(" string SP body-fld-param ")" / nil
595
834
  def body_fld_dsp
596
- token = lookahead
597
- if token.symbol == T_NIL
598
- shift_token
599
- return nil
600
- end
601
- match(T_LPAR)
602
- dsp_type = case_insensitive_string
603
- match(T_SPACE)
604
- param = body_fld_param
605
- match(T_RPAR)
606
- return ContentDisposition.new(dsp_type, param)
835
+ return if NIL?
836
+ lpar; dsp_type = case_insensitive__string
837
+ SP!; param = body_fld_param
838
+ rpar
839
+ ContentDisposition.new(dsp_type, param)
607
840
  end
608
841
 
842
+ # body-fld-lang = nstring / "(" string *(SP string) ")"
609
843
  def body_fld_lang
610
- token = lookahead
611
- if token.symbol == T_LPAR
612
- shift_token
613
- result = []
614
- while true
615
- token = lookahead
616
- case token.symbol
617
- when T_RPAR
618
- shift_token
619
- return result
620
- when T_SPACE
621
- shift_token
622
- end
623
- result.push(case_insensitive_string)
624
- end
844
+ if lpar?
845
+ result = [case_insensitive__string]
846
+ result << case_insensitive__string while SP?
847
+ result
625
848
  else
626
- lang = nstring
627
- if lang
628
- return lang.upcase
629
- else
630
- return lang
631
- end
849
+ case_insensitive__nstring
632
850
  end
633
851
  end
634
852
 
853
+ # body-extension *(SP body-extension)
635
854
  def body_extensions
636
855
  result = []
637
- while true
638
- token = lookahead
639
- case token.symbol
640
- when T_RPAR
641
- return result
642
- when T_SPACE
643
- shift_token
644
- end
645
- result.push(body_extension)
646
- end
856
+ result << body_extension; while SP? do result << body_extension end
857
+ result
647
858
  end
648
859
 
860
+ # body-extension = nstring / number / number64 /
861
+ # "(" body-extension *(SP body-extension) ")"
862
+ # ; Future expansion. Client implementations
863
+ # ; MUST accept body-extension fields. Server
864
+ # ; implementations MUST NOT generate
865
+ # ; body-extension fields except as defined by
866
+ # ; future Standard or Standards Track
867
+ # ; revisions of this specification.
649
868
  def body_extension
650
- token = lookahead
651
- case token.symbol
652
- when T_LPAR
653
- shift_token
654
- result = body_extensions
655
- match(T_RPAR)
656
- return result
657
- when T_NUMBER
658
- return number
659
- else
660
- return nstring
869
+ if (uint = number64?) then uint
870
+ elsif lpar? then exts = body_extensions; rpar; exts
871
+ else nstring
661
872
  end
662
873
  end
663
874
 
@@ -967,29 +1178,38 @@ module Net
967
1178
  return UntaggedResponse.new(name, data, @str)
968
1179
  end
969
1180
 
970
- def capability_response
971
- token = match(T_ATOM)
972
- name = token.value.upcase
973
- match(T_SPACE)
974
- UntaggedResponse.new(name, capability_data, @str)
1181
+ # The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.
1182
+ # The grammar rule is used by both response-data and resp-text-code.
1183
+ # But this method only returns UntaggedResponse (response-data).
1184
+ #
1185
+ # RFC3501:
1186
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1"
1187
+ # *(SP capability)
1188
+ # RFC9051:
1189
+ # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2"
1190
+ # *(SP capability)
1191
+ def capability_data__untagged
1192
+ UntaggedResponse.new label("CAPABILITY"), capability__list, @str
975
1193
  end
976
1194
 
977
- def capability_data
978
- data = []
979
- while true
980
- token = lookahead
981
- case token.symbol
982
- when T_CRLF, T_RBRA
983
- break
984
- when T_SPACE
985
- shift_token
986
- next
987
- end
988
- data.push(atom.upcase)
989
- end
990
- data
1195
+ # enable-data = "ENABLED" *(SP capability)
1196
+ def enable_data
1197
+ UntaggedResponse.new label("ENABLED"), capability__list, @str
1198
+ end
1199
+
1200
+ # As a workaround for buggy servers, allow a trailing SP:
1201
+ # *(SP capapility) [SP]
1202
+ def capability__list
1203
+ data = []; while _ = SP? && capability? do data << _ end; data
991
1204
  end
992
1205
 
1206
+ # capability = ("AUTH=" auth-type) / atom
1207
+ # ; New capabilities MUST begin with "X" or be
1208
+ # ; registered with IANA as standard or
1209
+ # ; standards-track
1210
+ alias capability case_insensitive__atom
1211
+ alias capability? case_insensitive__atom?
1212
+
993
1213
  def id_response
994
1214
  token = match(T_ATOM)
995
1215
  name = token.value.upcase
@@ -1019,86 +1239,89 @@ module Net
1019
1239
  end
1020
1240
  end
1021
1241
 
1242
+ # namespace-response = "NAMESPACE" SP namespace
1243
+ # SP namespace SP namespace
1244
+ # ; The first Namespace is the Personal Namespace(s).
1245
+ # ; The second Namespace is the Other Users'
1246
+ # ; Namespace(s).
1247
+ # ; The third Namespace is the Shared Namespace(s).
1022
1248
  def namespace_response
1249
+ name = label("NAMESPACE")
1023
1250
  @lex_state = EXPR_DATA
1024
- token = lookahead
1025
- token = match(T_ATOM)
1026
- name = token.value.upcase
1027
- match(T_SPACE)
1028
- personal = namespaces
1029
- match(T_SPACE)
1030
- other = namespaces
1031
- match(T_SPACE)
1032
- shared = namespaces
1251
+ data = Namespaces.new((SP!; namespace),
1252
+ (SP!; namespace),
1253
+ (SP!; namespace))
1254
+ UntaggedResponse.new(name, data, @str)
1255
+ ensure
1033
1256
  @lex_state = EXPR_BEG
1034
- data = Namespaces.new(personal, other, shared)
1035
- return UntaggedResponse.new(name, data, @str)
1036
- end
1037
-
1038
- def namespaces
1039
- token = lookahead
1040
- # empty () is not allowed, so nil is functionally identical to empty.
1041
- data = []
1042
- if token.symbol == T_NIL
1043
- shift_token
1044
- else
1045
- match(T_LPAR)
1046
- loop do
1047
- data << namespace
1048
- break unless lookahead.symbol == T_SPACE
1049
- shift_token
1050
- end
1051
- match(T_RPAR)
1052
- end
1053
- data
1054
1257
  end
1055
1258
 
1259
+ # namespace = nil / "(" 1*namespace-descr ")"
1056
1260
  def namespace
1057
- match(T_LPAR)
1058
- prefix = match(T_QUOTED, T_LITERAL).value
1059
- match(T_SPACE)
1060
- delimiter = string
1261
+ NIL? and return []
1262
+ lpar
1263
+ list = [namespace_descr]
1264
+ list << namespace_descr until rpar?
1265
+ list
1266
+ end
1267
+
1268
+ # namespace-descr = "(" string SP
1269
+ # (DQUOTE QUOTED-CHAR DQUOTE / nil)
1270
+ # [namespace-response-extensions] ")"
1271
+ def namespace_descr
1272
+ lpar
1273
+ prefix = string; SP!
1274
+ delimiter = nquoted # n.b: should only accept single char
1061
1275
  extensions = namespace_response_extensions
1062
- match(T_RPAR)
1276
+ rpar
1063
1277
  Namespace.new(prefix, delimiter, extensions)
1064
1278
  end
1065
1279
 
1280
+ # namespace-response-extensions = *namespace-response-extension
1281
+ # namespace-response-extension = SP string SP
1282
+ # "(" string *(SP string) ")"
1066
1283
  def namespace_response_extensions
1067
1284
  data = {}
1068
- token = lookahead
1069
- if token.symbol == T_SPACE
1070
- shift_token
1071
- name = match(T_QUOTED, T_LITERAL).value
1285
+ while SP?
1286
+ name = string; SP!
1287
+ lpar
1072
1288
  data[name] ||= []
1073
- match(T_SPACE)
1074
- match(T_LPAR)
1075
- loop do
1076
- data[name].push match(T_QUOTED, T_LITERAL).value
1077
- break unless lookahead.symbol == T_SPACE
1078
- shift_token
1079
- end
1080
- match(T_RPAR)
1289
+ data[name] << string
1290
+ data[name] << string while SP?
1291
+ rpar
1081
1292
  end
1082
1293
  data
1083
1294
  end
1084
1295
 
1085
- # text = 1*TEXT-CHAR
1086
- # TEXT-CHAR = <any CHAR except CR and LF>
1296
+ # TEXT-CHAR = <any CHAR except CR and LF>
1297
+ # RFC3501:
1298
+ # text = 1*TEXT-CHAR
1299
+ # RFC9051:
1300
+ # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4)
1301
+ # ; Non-ASCII text can only be returned
1302
+ # ; after ENABLE IMAP4rev2 command
1087
1303
  def text
1088
- match(T_TEXT, lex_state: EXPR_TEXT).value
1304
+ match_re(TEXT_REGEXP, "text")[0].force_encoding("UTF-8")
1305
+ end
1306
+
1307
+ # an "accept" versiun of #text
1308
+ def text?
1309
+ accept_re(TEXT_REGEXP)&.[](0)&.force_encoding("UTF-8")
1089
1310
  end
1090
1311
 
1091
- # resp-text = ["[" resp-text-code "]" SP] text
1312
+ # RFC3501:
1313
+ # resp-text = ["[" resp-text-code "]" SP] text
1314
+ # RFC9051:
1315
+ # resp-text = ["[" resp-text-code "]" SP] [text]
1316
+ #
1317
+ # We leniently re-interpret this as
1318
+ # resp-text = ["[" resp-text-code "]" [SP [text]] / [text]
1092
1319
  def resp_text
1093
- token = match(T_LBRA, T_TEXT, lex_state: EXPR_RTEXT)
1094
- case token.symbol
1095
- when T_LBRA
1096
- code = resp_text_code
1097
- match(T_RBRA)
1098
- accept_space # violating RFC
1099
- ResponseText.new(code, text)
1100
- when T_TEXT
1101
- ResponseText.new(nil, token.value)
1320
+ if lbra?
1321
+ code = resp_text_code; rbra
1322
+ ResponseText.new(code, SP? && text? || "")
1323
+ else
1324
+ ResponseText.new(nil, text? || "")
1102
1325
  end
1103
1326
  end
1104
1327
 
@@ -1125,7 +1348,7 @@ module Net
1125
1348
  when /\A(?:BADCHARSET)\z/n
1126
1349
  result = ResponseCode.new(name, charset_list)
1127
1350
  when /\A(?:CAPABILITY)\z/ni
1128
- result = ResponseCode.new(name, capability_data)
1351
+ result = ResponseCode.new(name, capability__list)
1129
1352
  when /\A(?:PERMANENTFLAGS)\z/n
1130
1353
  match(T_SPACE)
1131
1354
  result = ResponseCode.new(name, flag_list)
@@ -1140,8 +1363,7 @@ module Net
1140
1363
  token = lookahead
1141
1364
  if token.symbol == T_SPACE
1142
1365
  shift_token
1143
- token = match(T_TEXT, lex_state: EXPR_CTEXT)
1144
- result = ResponseCode.new(name, token.value)
1366
+ result = ResponseCode.new(name, text_chars_except_rbra)
1145
1367
  else
1146
1368
  result = ResponseCode.new(name, nil)
1147
1369
  end
@@ -1149,6 +1371,11 @@ module Net
1149
1371
  return result
1150
1372
  end
1151
1373
 
1374
+ # 1*<any TEXT-CHAR except "]">
1375
+ def text_chars_except_rbra
1376
+ match_re(CTEXT_REGEXP, '1*<any TEXT-CHAR except "]">')[0]
1377
+ end
1378
+
1152
1379
  def charset_list
1153
1380
  result = []
1154
1381
  if accept(T_SPACE)
@@ -1230,9 +1457,7 @@ module Net
1230
1457
  mailbox = $3
1231
1458
  host = $4
1232
1459
  for s in [name, route, mailbox, host]
1233
- if s
1234
- s.gsub!(/\\(["\\])/n, "\\1")
1235
- end
1460
+ Patterns.unescape_quoted! s
1236
1461
  end
1237
1462
  else
1238
1463
  name = nstring
@@ -1266,84 +1491,6 @@ module Net
1266
1491
  end
1267
1492
  end
1268
1493
 
1269
- def nstring
1270
- token = lookahead
1271
- if token.symbol == T_NIL
1272
- shift_token
1273
- return nil
1274
- else
1275
- return string
1276
- end
1277
- end
1278
-
1279
- def astring
1280
- token = lookahead
1281
- if string_token?(token)
1282
- return string
1283
- else
1284
- return astring_chars
1285
- end
1286
- end
1287
-
1288
- def string
1289
- token = lookahead
1290
- if token.symbol == T_NIL
1291
- shift_token
1292
- return nil
1293
- end
1294
- token = match(T_QUOTED, T_LITERAL)
1295
- return token.value
1296
- end
1297
-
1298
- STRING_TOKENS = [T_QUOTED, T_LITERAL, T_NIL]
1299
-
1300
- def string_token?(token)
1301
- return STRING_TOKENS.include?(token.symbol)
1302
- end
1303
-
1304
- def case_insensitive_string
1305
- token = lookahead
1306
- if token.symbol == T_NIL
1307
- shift_token
1308
- return nil
1309
- end
1310
- token = match(T_QUOTED, T_LITERAL)
1311
- return token.value.upcase
1312
- end
1313
-
1314
- # atom = 1*ATOM-CHAR
1315
- # ATOM-CHAR = <any CHAR except atom-specials>
1316
- ATOM_TOKENS = [
1317
- T_ATOM,
1318
- T_NUMBER,
1319
- T_NIL,
1320
- T_LBRA,
1321
- T_PLUS
1322
- ]
1323
-
1324
- def atom
1325
- -combine_adjacent(*ATOM_TOKENS)
1326
- end
1327
-
1328
- # ASTRING-CHAR = ATOM-CHAR / resp-specials
1329
- # resp-specials = "]"
1330
- ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA]
1331
-
1332
- def astring_chars
1333
- combine_adjacent(*ASTRING_CHARS_TOKENS)
1334
- end
1335
-
1336
- def combine_adjacent(*tokens)
1337
- result = "".b
1338
- while token = accept(*tokens)
1339
- result << token.value
1340
- end
1341
- if result.empty?
1342
- parse_error('unexpected token %s (expected %s)',
1343
- lookahead.symbol, args.join(" or "))
1344
- end
1345
- result
1346
- end
1347
1494
 
1348
1495
  # See https://www.rfc-editor.org/errata/rfc3501
1349
1496
  #
@@ -1356,16 +1503,6 @@ module Net
1356
1503
  end
1357
1504
  end
1358
1505
 
1359
- def number
1360
- token = lookahead
1361
- if token.symbol == T_NIL
1362
- shift_token
1363
- return nil
1364
- end
1365
- token = match(T_NUMBER)
1366
- return token.value.to_i
1367
- end
1368
-
1369
1506
  # RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
1370
1507
  # uid-set = (uniqueid / uid-range) *("," uid-set)
1371
1508
  # uid-range = (uniqueid ":" uniqueid)
@@ -1393,15 +1530,6 @@ module Net
1393
1530
 
1394
1531
  SPACES_REGEXP = /\G */n
1395
1532
 
1396
- # This advances @pos directly so it's safe before changing @lex_state.
1397
- def accept_space
1398
- if @token
1399
- shift_token if @token.symbol == T_SPACE
1400
- elsif @str[@pos] == " "
1401
- @pos += 1
1402
- end
1403
- end
1404
-
1405
1533
  # The RFC is very strict about this and usually we should be too.
1406
1534
  # But skipping spaces is usually a safe workaround for buggy servers.
1407
1535
  #
@@ -1413,46 +1541,6 @@ module Net
1413
1541
  end
1414
1542
  end
1415
1543
 
1416
- def match(*args, lex_state: @lex_state)
1417
- if @token && lex_state != @lex_state
1418
- parse_error("invalid lex_state change to %s with unconsumed token",
1419
- lex_state)
1420
- end
1421
- begin
1422
- @lex_state, original_lex_state = lex_state, @lex_state
1423
- token = lookahead
1424
- unless args.include?(token.symbol)
1425
- parse_error('unexpected token %s (expected %s)',
1426
- token.symbol.id2name,
1427
- args.collect {|i| i.id2name}.join(" or "))
1428
- end
1429
- shift_token
1430
- return token
1431
- ensure
1432
- @lex_state = original_lex_state
1433
- end
1434
- end
1435
-
1436
- # like match, but does not raise error on failure.
1437
- #
1438
- # returns and shifts token on successful match
1439
- # returns nil and leaves @token unshifted on no match
1440
- def accept(*args)
1441
- token = lookahead
1442
- if args.include?(token.symbol)
1443
- shift_token
1444
- token
1445
- end
1446
- end
1447
-
1448
- def lookahead
1449
- @token ||= next_token
1450
- end
1451
-
1452
- def shift_token
1453
- @token = nil
1454
- end
1455
-
1456
1544
  def next_token
1457
1545
  case @lex_state
1458
1546
  when EXPR_BEG
@@ -1460,39 +1548,42 @@ module Net
1460
1548
  @pos = $~.end(0)
1461
1549
  if $1
1462
1550
  return Token.new(T_SPACE, $+)
1463
- elsif $2
1464
- return Token.new(T_NIL, $+)
1551
+ elsif $2 && $6
1552
+ # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
1553
+ return Token.new(T_ATOM, $2)
1465
1554
  elsif $3
1466
- return Token.new(T_NUMBER, $+)
1555
+ return Token.new(T_NIL, $+)
1467
1556
  elsif $4
1468
- return Token.new(T_ATOM, $+)
1557
+ return Token.new(T_NUMBER, $+)
1469
1558
  elsif $5
1470
- return Token.new(T_QUOTED,
1471
- $+.gsub(/\\(["\\])/n, "\\1"))
1472
- elsif $6
1473
- return Token.new(T_LPAR, $+)
1559
+ return Token.new(T_PLUS, $+)
1474
1560
  elsif $7
1475
- return Token.new(T_RPAR, $+)
1561
+ # match ATOM, without a NUMBER, NIL, or PLUS prefix
1562
+ return Token.new(T_ATOM, $+)
1476
1563
  elsif $8
1477
- return Token.new(T_BSLASH, $+)
1564
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1478
1565
  elsif $9
1479
- return Token.new(T_STAR, $+)
1566
+ return Token.new(T_LPAR, $+)
1480
1567
  elsif $10
1481
- return Token.new(T_LBRA, $+)
1568
+ return Token.new(T_RPAR, $+)
1482
1569
  elsif $11
1483
- return Token.new(T_RBRA, $+)
1570
+ return Token.new(T_BSLASH, $+)
1484
1571
  elsif $12
1572
+ return Token.new(T_STAR, $+)
1573
+ elsif $13
1574
+ return Token.new(T_LBRA, $+)
1575
+ elsif $14
1576
+ return Token.new(T_RBRA, $+)
1577
+ elsif $15
1485
1578
  len = $+.to_i
1486
1579
  val = @str[@pos, len]
1487
1580
  @pos += len
1488
1581
  return Token.new(T_LITERAL, val)
1489
- elsif $13
1490
- return Token.new(T_PLUS, $+)
1491
- elsif $14
1582
+ elsif $16
1492
1583
  return Token.new(T_PERCENT, $+)
1493
- elsif $15
1584
+ elsif $17
1494
1585
  return Token.new(T_CRLF, $+)
1495
- elsif $16
1586
+ elsif $18
1496
1587
  return Token.new(T_EOF, $+)
1497
1588
  else
1498
1589
  parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")
@@ -1511,8 +1602,7 @@ module Net
1511
1602
  elsif $3
1512
1603
  return Token.new(T_NUMBER, $+)
1513
1604
  elsif $4
1514
- return Token.new(T_QUOTED,
1515
- $+.gsub(/\\(["\\])/n, "\\1"))
1605
+ return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
1516
1606
  elsif $5
1517
1607
  len = $+.to_i
1518
1608
  val = @str[@pos, len]
@@ -1529,63 +1619,11 @@ module Net
1529
1619
  @str.index(/\S*/n, @pos)
1530
1620
  parse_error("unknown token - %s", $&.dump)
1531
1621
  end
1532
- when EXPR_TEXT
1533
- if @str.index(TEXT_REGEXP, @pos)
1534
- @pos = $~.end(0)
1535
- if $1
1536
- return Token.new(T_TEXT, $+)
1537
- else
1538
- parse_error("[Net::IMAP BUG] TEXT_REGEXP is invalid")
1539
- end
1540
- else
1541
- @str.index(/\S*/n, @pos)
1542
- parse_error("unknown token - %s", $&.dump)
1543
- end
1544
- when EXPR_RTEXT
1545
- if @str.index(RTEXT_REGEXP, @pos)
1546
- @pos = $~.end(0)
1547
- if $1
1548
- return Token.new(T_LBRA, $+)
1549
- elsif $2
1550
- return Token.new(T_TEXT, $+)
1551
- else
1552
- parse_error("[Net::IMAP BUG] RTEXT_REGEXP is invalid")
1553
- end
1554
- else
1555
- @str.index(/\S*/n, @pos)
1556
- parse_error("unknown token - %s", $&.dump)
1557
- end
1558
- when EXPR_CTEXT
1559
- if @str.index(CTEXT_REGEXP, @pos)
1560
- @pos = $~.end(0)
1561
- if $1
1562
- return Token.new(T_TEXT, $+)
1563
- else
1564
- parse_error("[Net::IMAP BUG] CTEXT_REGEXP is invalid")
1565
- end
1566
- else
1567
- @str.index(/\S*/n, @pos) #/
1568
- parse_error("unknown token - %s", $&.dump)
1569
- end
1570
1622
  else
1571
1623
  parse_error("invalid @lex_state - %s", @lex_state.inspect)
1572
1624
  end
1573
1625
  end
1574
1626
 
1575
- def parse_error(fmt, *args)
1576
- if IMAP.debug
1577
- $stderr.printf("@str: %s\n", @str.dump)
1578
- $stderr.printf("@pos: %d\n", @pos)
1579
- $stderr.printf("@lex_state: %s\n", @lex_state)
1580
- if @token
1581
- $stderr.printf("@token.symbol: %s\n", @token.symbol)
1582
- $stderr.printf("@token.value: %s\n", @token.value.inspect)
1583
- end
1584
- end
1585
- raise ResponseParseError, format(fmt, *args)
1586
- end
1587
1627
  end
1588
-
1589
1628
  end
1590
-
1591
1629
  end