RubyGems - net-imap - Versions diffs - 0.4.4 → 0.4.7 - Mend

net-imap 0.4.4 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of net-imap might be problematic. Click here for more details.

Files changed (13) hide show

checksums.yaml +4 -4
data/.gitignore +2 -1
data/docs/styles.css +0 -12
data/lib/net/imap/data_encoding.rb +14 -2
data/lib/net/imap/fetch_data.rb +518 -0
data/lib/net/imap/response_data.rb +26 -207
data/lib/net/imap/response_parser/parser_utils.rb +1 -1
data/lib/net/imap/response_parser.rb +491 -270
data/lib/net/imap/sequence_set.rb +67 -0
data/lib/net/imap.rb +82 -17
data/net-imap.gemspec +1 -0
data/rakelib/benchmarks.rake +4 -11
metadata +6 -3

data/lib/net/imap/response_parser.rb CHANGED Viewed

@@ -54,6 +54,7 @@ module Net
       T_STAR     = :STAR         # atom special; list wildcard
       T_PERCENT  = :PERCENT      # atom special; list wildcard
       T_LITERAL  = :LITERAL      # starts with atom special
+      T_LITERAL8 = :LITERAL8     # starts with atom char "~"
       T_CRLF     = :CRLF         # atom special; text special; quoted special
       T_TEXT     = :TEXT         # any char except CRLF
       T_EOF      = :EOF          # end of response string
@@ -197,6 +198,7 @@ module Net
         #                     ; revisions of this specification.
         # flag-keyword    = "$MDNSent" / "$Forwarded" / "$Junk" /
         #                   "$NotJunk" / "$Phishing" / atom
+        #
         # flag-perm       = flag / "\*"
         #
         # Not checking for max one mbx-list-sflag in the parser.
@@ -219,19 +221,15 @@ module Net
         MBX_FLAG          = FLAG_EXTENSION
         # flag-list       = "(" [flag *(SP flag)] ")"
-        #
-        # part of resp-text-code:
-        # >>>
-        #   "PERMANENTFLAGS" SP "(" [flag-perm *(SP flag-perm)] ")"
-        #
-        # parens from mailbox-list are included in the regexp:
-        # >>>
-        #   mbx-list-flags  = *(mbx-list-oflag SP) mbx-list-sflag
-        #                     *(SP mbx-list-oflag) /
-        #                     mbx-list-oflag *(SP mbx-list-oflag)
-        FLAG_LIST      = /\G\((#{FLAG     }(?:#{SP}#{FLAG     })*|)\)/ni
-        FLAG_PERM_LIST = /\G\((#{FLAG_PERM}(?:#{SP}#{FLAG_PERM})*|)\)/ni
-        MBX_LIST_FLAGS = /\G\((#{MBX_FLAG }(?:#{SP}#{MBX_FLAG })*|)\)/ni
+        # resp-text-code =/ "PERMANENTFLAGS" SP
+        #                   "(" [flag-perm *(SP flag-perm)] ")"
+        # mbx-list-flags  = *(mbx-list-oflag SP) mbx-list-sflag
+        #                   *(SP mbx-list-oflag) /
+        #                   mbx-list-oflag *(SP mbx-list-oflag)
+        # (Not checking for max one mbx-list-sflag in the parser.)
+        FLAG_LIST         = /\G\((#{FLAG     }(?:#{SP}#{FLAG     })*|)\)/ni
+        FLAG_PERM_LIST    = /\G\((#{FLAG_PERM}(?:#{SP}#{FLAG_PERM})*|)\)/ni
+        MBX_LIST_FLAGS    = /\G  (#{MBX_FLAG }(?:#{SP}#{MBX_FLAG })*)   /nix
         # RFC3501:
         #   QUOTED-CHAR   = <any TEXT-CHAR except quoted-specials> /
@@ -266,6 +264,56 @@ module Net
         #                      ; Is a valid RFC 3501 "atom".
         TAGGED_EXT_LABEL     = /#{TAGGED_LABEL_FCHAR}#{TAGGED_LABEL_CHAR}*/n
+        # nz-number       = digit-nz *DIGIT
+        #                     ; Non-zero unsigned 32-bit integer
+        #                     ; (0 < n < 4,294,967,296)
+        NZ_NUMBER         = /[1-9]\d*/n
+        # seq-number      = nz-number / "*"
+        #                     ; message sequence number (COPY, FETCH, STORE
+        #                     ; commands) or unique identifier (UID COPY,
+        #                     ; UID FETCH, UID STORE commands).
+        #                     ; * represents the largest number in use.  In
+        #                     ; the case of message sequence numbers, it is
+        #                     ; the number of messages in a non-empty mailbox.
+        #                     ; In the case of unique identifiers, it is the
+        #                     ; unique identifier of the last message in the
+        #                     ; mailbox or, if the mailbox is empty, the
+        #                     ; mailbox's current UIDNEXT value.
+        #                     ; The server should respond with a tagged BAD
+        #                     ; response to a command that uses a message
+        #                     ; sequence number greater than the number of
+        #                     ; messages in the selected mailbox.  This
+        #                     ; includes "*" if the selected mailbox is empty.
+        SEQ_NUMBER        = /#{NZ_NUMBER}|\*/n
+        # seq-range       = seq-number ":" seq-number
+        #                     ; two seq-number values and all values between
+        #                     ; these two regardless of order.
+        #                     ; Example: 2:4 and 4:2 are equivalent and
+        #                     ; indicate values 2, 3, and 4.
+        #                     ; Example: a unique identifier sequence range of
+        #                     ; 3291:* includes the UID of the last message in
+        #                     ; the mailbox, even if that value is less than
+        #                     ; 3291.
+        SEQ_RANGE         = /#{SEQ_NUMBER}:#{SEQ_NUMBER}/n
+        # sequence-set    = (seq-number / seq-range) ["," sequence-set]
+        #                     ; set of seq-number values, regardless of order.
+        #                     ; Servers MAY coalesce overlaps and/or execute
+        #                     ; the sequence in any order.
+        #                     ; Example: a message sequence number set of
+        #                     ; 2,4:7,9,12:* for a mailbox with 15 messages is
+        #                     ; equivalent to 2,4,5,6,7,9,12,13,14,15
+        #                     ; Example: a message sequence number set of
+        #                     ; *:4,5:7 for a mailbox with 10 messages is
+        #                     ; equivalent to 10,9,8,7,6,5,4,5,6,7 and MAY
+        #                     ; be reordered and overlap coalesced to be
+        #                     ; 4,5,6,7,8,9,10.
+        SEQUENCE_SET_ITEM = /#{SEQ_NUMBER}|#{SEQ_RANGE}/n
+        SEQUENCE_SET      = /#{SEQUENCE_SET_ITEM}(?:,#{SEQUENCE_SET_ITEM})*/n
+        SEQUENCE_SET_STR  = /\A#{SEQUENCE_SET}\z/n
         # RFC3501:
         #   literal          = "{" number "}" CRLF *CHAR8
         #                        ; Number represents the number of CHAR8s
@@ -279,6 +327,16 @@ module Net
         #                        ; sent from server to the client.
         LITERAL              = /\{(\d+)\}\r\n/n
+        # RFC3516 (BINARY):
+        #   literal8         =   "~{" number "}" CRLF *OCTET
+        #                        ; <number> represents the number of OCTETs
+        #                        ; in the response string.
+        # RFC9051:
+        #   literal8         =  "~{" number64 "}" CRLF *OCTET
+        #                        ; <number64> represents the number of OCTETs
+        #                        ; in the response string.
+        LITERAL8             = /~\{(\d+)\}\r\n/n
         module_function
         def unescape_quoted!(quoted)
@@ -298,27 +356,28 @@ module Net
       # the default, used in most places
       BEG_REGEXP = /\G(?:\
 (?# 1:  SPACE   )( )|\
-(?# 2:  ATOM prefixed with a compatible subtype)\
+(?# 2:  LITERAL8)#{Patterns::LITERAL8}|\
+(?# 3:  ATOM prefixed with a compatible subtype)\
 ((?:\
-(?# 3:  NIL     )(NIL)|\
-(?# 4:  NUMBER  )(\d+)|\
-(?# 5:  PLUS    )(\+))\
-(?# 6:  ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
+(?# 4:  NIL     )(NIL)|\
+(?# 5:  NUMBER  )(\d+)|\
+(?# 6:  PLUS    )(\+))\
+(?# 7:  ATOM remaining after prefix )(#{Patterns::ATOMISH})?\
 (?# This enables greedy alternation without lookahead, in linear time.)\
 )|\
 (?# Also need to check for ATOM without a subtype prefix.)\
-(?# 7:  ATOM    )(#{Patterns::ATOMISH})|\
-(?# 8:  QUOTED  )#{Patterns::QUOTED_rev2}|\
-(?# 9: LPAR    )(\()|\
-(?# 10: RPAR    )(\))|\
-(?# 11: BSLASH  )(\\)|\
-(?# 12: STAR    )(\*)|\
-(?# 13: LBRA    )(\[)|\
-(?# 14: RBRA    )(\])|\
-(?# 15: LITERAL )#{Patterns::LITERAL}|\
-(?# 16: PERCENT )(%)|\
-(?# 17: CRLF    )(\r\n)|\
-(?# 18: EOF     )(\z))/ni
+(?# 8:  ATOM    )(#{Patterns::ATOMISH})|\
+(?# 9:  QUOTED  )#{Patterns::QUOTED_rev2}|\
+(?# 10: LPAR    )(\()|\
+(?# 11: RPAR    )(\))|\
+(?# 12: BSLASH  )(\\)|\
+(?# 13: STAR    )(\*)|\
+(?# 14: LBRA    )(\[)|\
+(?# 15: RBRA    )(\])|\
+(?# 16: LITERAL )#{Patterns::LITERAL}|\
+(?# 17: PERCENT )(%)|\
+(?# 18: CRLF    )(\r\n)|\
+(?# 19: EOF     )(\z))/ni
       # envelope, body(structure), namespaces
       DATA_REGEXP = /\G(?:\
@@ -359,6 +418,9 @@ module Net
       #   string          = quoted / literal
       def_token_matchers :string,  T_QUOTED, T_LITERAL
+      # used by nstring8 = nstring / literal8
+      def_token_matchers :string8, T_QUOTED, T_LITERAL, T_LITERAL8
       # use where string represents "LABEL" values
       def_token_matchers :case_insensitive__string,
                          T_QUOTED, T_LITERAL,
@@ -390,6 +452,24 @@ module Net
       # ATOM-CHAR       = <any CHAR except atom-specials>
       ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
+      SEQUENCE_SET_TOKENS = [T_ATOM, T_NUMBER, T_STAR]
+      #   sequence-set    = (seq-number / seq-range) ["," sequence-set]
+      #   sequence-set    =/ seq-last-command
+      #                       ; Allow for "result of the last command"
+      #                       ; indicator.
+      #   seq-last-command   = "$"
+      #
+      # *note*: doesn't match seq-last-command
+      def sequence_set
+        str = combine_adjacent(*SEQUENCE_SET_TOKENS)
+        if Patterns::SEQUENCE_SET_STR.match?(str)
+          SequenceSet.new(str)
+        else
+          parse_error("unexpected atom %p, expected sequence-set", str)
+        end
+      end
       # ASTRING-CHAR    = ATOM-CHAR / resp-specials
       # resp-specials   = "]"
       ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
@@ -460,6 +540,10 @@ module Net
         NIL? ? nil : string
       end
+      def nstring8
+        NIL? ? nil : string8
+      end
       def nquoted
         NIL? ? nil : quoted
       end
@@ -469,6 +553,60 @@ module Net
         NIL? ? nil : case_insensitive__string
       end
+      # tagged-ext-comp     = astring /
+      #                       tagged-ext-comp *(SP tagged-ext-comp) /
+      #                       "(" tagged-ext-comp ")"
+      #                       ; Extensions that follow this general
+      #                       ; syntax should use nstring instead of
+      #                       ; astring when appropriate in the context
+      #                       ; of the extension.
+      #                       ; Note that a message set or a "number"
+      #                       ; can always be represented as an "atom".
+      #                       ; A URL should be represented as
+      #                       ; a "quoted" string.
+      def tagged_ext_comp
+        vals = []
+        while true
+          vals << case lookahead!(*ASTRING_TOKENS, T_LPAR).symbol
+                  when T_LPAR   then lpar; ary = tagged_ext_comp; rpar; ary
+                  when T_NUMBER then number
+                  else               astring
+                  end
+          SP? or break
+        end
+        vals
+      end
+      # tagged-ext-simple is a subset of atom
+      # TODO: recognize sequence-set in the lexer
+      #
+      # tagged-ext-simple   = sequence-set / number / number64
+      def tagged_ext_simple
+        number? || sequence_set
+      end
+      # tagged-ext-val      = tagged-ext-simple /
+      #                       "(" [tagged-ext-comp] ")"
+      def tagged_ext_val
+        if lpar?
+          _ = peek_rpar? ? [] : tagged_ext_comp
+          rpar
+          _
+        else
+          tagged_ext_simple
+        end
+      end
+      # mailbox         = "INBOX" / astring
+      #                     ; INBOX is case-insensitive.  All case variants of
+      #                     ; INBOX (e.g., "iNbOx") MUST be interpreted as INBOX
+      #                     ; not as an astring.  An astring which consists of
+      #                     ; the case-insensitive sequence "I" "N" "B" "O" "X"
+      #                     ; is considered to be INBOX and not an astring.
+      #                     ;  Refer to section 5.1 for further
+      #                     ; semantic details of mailbox names.
+      alias mailbox astring
       # valid number ranges are not enforced by parser
       #   number64        = 1*DIGIT
       #                       ; Unsigned 63-bit integer
@@ -494,6 +632,12 @@ module Net
       #                          ; Strictly ascending
       alias uniqueid    nz_number
+      # valid number ranges are not enforced by parser
+      #
+      # a 64-bit unsigned integer and is the decimal equivalent for the ID hex
+      # string used in the web interface and the Gmail API.
+      alias x_gm_id     number
       # [RFC3501 & RFC9051:]
       #   response        = *(continue-req / response-data) response-done
       #
@@ -630,34 +774,47 @@ module Net
       # RFC3501 & RFC9051:
       #   response-tagged = tag SP resp-cond-state CRLF
-      #
-      #   resp-cond-state = ("OK" / "NO" / "BAD") SP resp-text
-      #                       ; Status condition
-      #
-      #   tag             = 1*<any ASTRING-CHAR except "+">
       def response_tagged
-        tag  = tag();                 SP!
-        name = resp_cond_state__name; SP!
-        TaggedResponse.new(tag, name, resp_text, @str)
+        TaggedResponse.new(tag, *(SP!; resp_cond_state), @str)
       end
       # RFC3501 & RFC9051:
       #   resp-cond-state  = ("OK" / "NO" / "BAD") SP resp-text
+      #
+      # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
+      # servers), we don't require a final SP and instead parse this as:
+      #
+      #   resp-cond-state = ("OK" / "NO" / "BAD") [SP resp-text]
+      def resp_cond_state
+        [resp_cond_state__name, SP? ? resp_text : ResponseText::EMPTY]
+      end
       def resp_cond_state__untagged
-        name = resp_cond_state__name; SP!
-        UntaggedResponse.new(name, resp_text, @str)
+        UntaggedResponse.new(*resp_cond_state, @str)
       end
       #   resp-cond-auth   = ("OK" / "PREAUTH") SP resp-text
+      #
+      # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
+      # servers), we don't require a final SP and instead parse this as:
+      #
+      #   resp-cond-auth   = ("OK" / "PREAUTH") [SP resp-text]
       def resp_cond_auth
-        name = resp_cond_auth__name; SP!
-        UntaggedResponse.new(name, resp_text, @str)
+        UntaggedResponse.new(resp_cond_auth__name,
+                             SP? ? resp_text : ResponseText::EMPTY,
+                             @str)
       end
       #   resp-cond-bye    = "BYE" SP resp-text
+      #
+      # NOTE: In the spirit of RFC9051 Appx E 23 (and to workaround existing
+      # servers), we don't require a final SP and instead parse this as:
+      #
+      #   resp-cond-bye    = "BYE" [SP resp-text]
       def resp_cond_bye
-        name = label(BYE); SP!
-        UntaggedResponse.new(name, resp_text, @str)
+        UntaggedResponse.new(label(BYE),
+                             SP? ? resp_text : ResponseText::EMPTY,
+                             @str)
       end
       #   message-data    = nz-number SP ("EXPUNGE" / ("FETCH" SP msg-att))
@@ -740,10 +897,17 @@ module Net
             when "ENVELOPE"             then envelope
             when "INTERNALDATE"         then date_time
             when "RFC822.SIZE"          then number64
+            when /\ABINARY\[/ni         then nstring8           # BINARY, IMAP4rev2
+            when /\ABINARY\.SIZE\[/ni   then number             # BINARY, IMAP4rev2
             when "RFC822"               then nstring            # not in rev2
             when "RFC822.HEADER"        then nstring            # not in rev2
             when "RFC822.TEXT"          then nstring            # not in rev2
             when "MODSEQ"               then parens__modseq     # CONDSTORE
+            when "EMAILID"              then parens__objectid   # OBJECTID
+            when "THREADID"             then nparens__objectid  # OBJECTID
+            when "X-GM-MSGID"           then x_gm_id            # GMail
+            when "X-GM-THRID"           then x_gm_id            # GMail
+            when "X-GM-LABELS"          then x_gm_labels        # GMail
             else parse_error("unknown attribute `%s' for {%d}", name, n)
             end
           attr[name] = val
@@ -762,46 +926,75 @@ module Net
           lbra? and rbra
         when "BODY"
           peek_lbra? and name << section and
-            peek_str?("<") and name << atom # partial
+            peek_str?("<") and name << gt__number__lt # partial
+        when "BINARY", "BINARY.SIZE"
+          name << section_binary
+          # see https://www.rfc-editor.org/errata/eid7246 and the note above
+          peek_str?("<") and name << gt__number__lt # partial
         end
         name
       end
+      # this represents the partial size for BODY or BINARY
+      alias gt__number__lt atom
+      # RFC3501 & RFC9051:
+      #   envelope        = "(" env-date SP env-subject SP env-from SP
+      #                     env-sender SP env-reply-to SP env-to SP env-cc SP
+      #                     env-bcc SP env-in-reply-to SP env-message-id ")"
       def envelope
         @lex_state = EXPR_DATA
-        token = lookahead
-        if token.symbol == T_NIL
-          shift_token
-          result = nil
-        else
-          match(T_LPAR)
-          date = nstring
-          match(T_SPACE)
-          subject = nstring
-          match(T_SPACE)
-          from = address_list
-          match(T_SPACE)
-          sender = address_list
-          match(T_SPACE)
-          reply_to = address_list
-          match(T_SPACE)
-          to = address_list
-          match(T_SPACE)
-          cc = address_list
-          match(T_SPACE)
-          bcc = address_list
-          match(T_SPACE)
-          in_reply_to = nstring
-          match(T_SPACE)
-          message_id = nstring
-          match(T_RPAR)
-          result = Envelope.new(date, subject, from, sender, reply_to,
-                                to, cc, bcc, in_reply_to, message_id)
-        end
+        lpar; date        = env_date
+        SP!;  subject     = env_subject
+        SP!;  from        = env_from
+        SP!;  sender      = env_sender
+        SP!;  reply_to    = env_reply_to
+        SP!;  to          = env_to
+        SP!;  cc          = env_cc
+        SP!;  bcc         = env_bcc
+        SP!;  in_reply_to = env_in_reply_to
+        SP!;  message_id  = env_message_id
+        rpar
+        Envelope.new(date, subject, from, sender, reply_to,
+                     to, cc, bcc, in_reply_to, message_id)
+      ensure
         @lex_state = EXPR_BEG
-        return result
       end
+      #   env-date        = nstring
+      #   env-subject     = nstring
+      #   env-in-reply-to = nstring
+      #   env-message-id  = nstring
+      alias env_date        nstring
+      alias env_subject     nstring
+      alias env_in_reply_to nstring
+      alias env_message_id  nstring
+      #   env-from        = "(" 1*address ")" / nil
+      #   env-sender      = "(" 1*address ")" / nil
+      #   env-reply-to    = "(" 1*address ")" / nil
+      #   env-to          = "(" 1*address ")" / nil
+      #   env-cc          = "(" 1*address ")" / nil
+      #   env-bcc         = "(" 1*address ")" / nil
+      def nlist__address
+        return if NIL?
+        lpar; list = [address]; list << address until (quirky_SP?; rpar?)
+        list
+      end
+      alias env_from     nlist__address
+      alias env_sender   nlist__address
+      alias env_reply_to nlist__address
+      alias env_to       nlist__address
+      alias env_cc       nlist__address
+      alias env_bcc      nlist__address
+      # Used when servers erroneously send an extra SP.
+      #
+      # As of 2023-11-28, Outlook.com (still) sends SP
+      #   between +address+ in <tt>env-*</tt> lists.
+      alias quirky_SP? SP?
       #   date-time       = DQUOTE date-day-fixed "-" date-month "-" date-year
       #                     SP time SP zone DQUOTE
       alias date_time quoted
@@ -1070,6 +1263,13 @@ module Net
         str << rbra
       end
+      # section-binary  = "[" [section-part] "]"
+      def section_binary
+        str = +lbra
+        str << section_part unless peek_rbra?
+        str << rbra
+      end
       # section-spec    = section-msgtext / (section-part ["." section-text])
       # section-msgtext = "HEADER" /
       #                   "HEADER.FIELDS" [".NOT"] SP header-list /
@@ -1100,6 +1300,11 @@ module Net
         str << rpar
       end
+      # section-part    = nz-number *("." nz-number)
+      #                     ; body part reference.
+      #                     ; Allows for accessing nested body parts.
+      alias section_part atom
       # RFC3501 & RFC9051:
       #   header-fld-name = astring
       #
@@ -1148,18 +1353,17 @@ module Net
       alias mailbox_data__lsub  mailbox_data__list
       alias mailbox_data__xlist mailbox_data__list
+      # mailbox-list    = "(" [mbx-list-flags] ")" SP
+      #                    (DQUOTE QUOTED-CHAR DQUOTE / nil) SP mailbox
+      #                    [SP mbox-list-extended]
+      #             ; This is the list information pointed to by the ABNF
+      #             ; item "mailbox-data", which is defined above
       def mailbox_list
-        attr = flag_list
-        match(T_SPACE)
-        token = match(T_QUOTED, T_NIL)
-        if token.symbol == T_NIL
-          delim = nil
-        else
-          delim = token.value
-        end
-        match(T_SPACE)
-        name = astring
-        return MailboxList.new(attr, delim, name)
+        lpar; attr  = peek_rpar? ? [] : mbx_list_flags; rpar
+        SP!;  delim = nquoted
+        SP!;  name  = mailbox
+        # TODO: mbox-list-extended
+        MailboxList.new(attr, delim, name)
       end
       def getquota_response
@@ -1254,124 +1458,143 @@ module Net
       #   mailbox-data        = obsolete-search-response / ...
       #   obsolete-search-response = "SEARCH" *(SP nz-number)
       def mailbox_data__search
-        token = match(T_ATOM)
-        name = token.value.upcase
-        token = lookahead
-        if token.symbol == T_SPACE
-          shift_token
-          data = []
-          while true
-            token = lookahead
-            case token.symbol
-            when T_CRLF
-              break
-            when T_SPACE
-              shift_token
-            when T_NUMBER
-              data.push(number)
-            when T_LPAR
-              # TODO: include the MODSEQ value in a response
-              shift_token
-              match(T_ATOM)
-              match(T_SPACE)
-              match(T_NUMBER)
-              match(T_RPAR)
-            end
-          end
-        else
-          data = []
+        name = label_in("SEARCH", "SORT")
+        data = []
+        while _ = SP? && nz_number? do data << _ end
+        if lpar?
+          label("MODSEQ"); SP!
+          mod_sequence_value
+          rpar
         end
-        return UntaggedResponse.new(name, data, @str)
+        UntaggedResponse.new(name, data, @str)
       end
       alias sort_data mailbox_data__search
+      # RFC5256: THREAD
+      #   thread-data     = "THREAD" [SP 1*thread-list]
       def thread_data
-        token = match(T_ATOM)
-        name = token.value.upcase
-        token = lookahead
-        if token.symbol == T_SPACE
-          threads = []
-          while true
-            shift_token
-            token = lookahead
-            case token.symbol
-            when T_LPAR
-              threads << thread_branch(token)
-            when T_CRLF
-              break
-            end
-          end
-        else
-          # no member
-          threads = []
+        name    = label("THREAD")
+        threads = []
+        if SP?
+          threads << thread_list while lookahead_thread_list?
         end
-        return UntaggedResponse.new(name, threads, @str)
+        UntaggedResponse.new(name, threads, @str)
       end
-      def thread_branch(token)
-        rootmember = nil
-        lastmember = nil
+      alias lookahead_thread_list?   lookahead_lpar?
+      alias lookahead_thread_nested? lookahead_thread_list?
-        while true
-          shift_token    # ignore first T_LPAR
-          token = lookahead
-          case token.symbol
-          when T_NUMBER
-            # new member
-            newmember = ThreadMember.new(number, [])
-            if rootmember.nil?
-              rootmember = newmember
-            else
-              lastmember.children << newmember
-            end
-            lastmember = newmember
-          when T_SPACE
-            # do nothing
-          when T_LPAR
-            if rootmember.nil?
-              # dummy member
-              lastmember = rootmember = ThreadMember.new(nil, [])
-            end
+      # RFC5256: THREAD
+      #   thread-list     = "(" (thread-members / thread-nested) ")"
+      def thread_list
+        lpar
+        thread = if lookahead_thread_nested?
+                   ThreadMember.new(nil, thread_nested)
+                 else
+                   thread_members
+                 end
+        rpar
+        thread
+      end
-            lastmember.children << thread_branch(token)
-          when T_RPAR
-            break
+      # RFC5256: THREAD
+      #   thread-members  = nz-number *(SP nz-number) [SP thread-nested]
+      def thread_members
+        members = []
+        members << nz_number # thread root
+        while SP?
+          case lookahead!(T_NUMBER, T_LPAR).symbol
+          when T_NUMBER then members << nz_number
+          else               nested = thread_nested; break
           end
         end
+        members.reverse.inject(nested || []) {|subthreads, number|
+          [ThreadMember.new(number, subthreads)]
+        }.first
+      end
-        return rootmember
+      # RFC5256: THREAD
+      #   thread-nested   = 2*thread-list
+      def thread_nested
+        nested = [thread_list, thread_list]
+        while lookahead_thread_list? do nested << thread_list end
+        nested
       end
+      #   mailbox-data    =/ "STATUS" SP mailbox SP "(" [status-att-list] ")"
       def mailbox_data__status
-        token = match(T_ATOM)
-        name = token.value.upcase
-        match(T_SPACE)
-        mailbox = astring
-        match(T_SPACE)
-        match(T_LPAR)
-        attr = {}
-        while true
-          token = lookahead
-          case token.symbol
-          when T_RPAR
-            shift_token
-            break
-          when T_SPACE
-            shift_token
+        resp_name  = label("STATUS"); SP!
+        mbox_name  = mailbox;         SP!
+        lpar; attr = status_att_list; rpar
+        UntaggedResponse.new(resp_name, StatusData.new(mbox_name, attr), @str)
+      end
+      # RFC3501
+      #   status-att-list = status-att SP number *(SP status-att SP number)
+      # RFC4466, RFC9051, and RFC3501 Errata
+      #   status-att-list = status-att-val *(SP status-att-val)
+      def status_att_list
+        attrs = [status_att_val]
+        while SP? do attrs << status_att_val end
+        attrs.to_h
+      end
+      # RFC3501 Errata:
+      # status-att-val  = ("MESSAGES" SP number) / ("RECENT" SP number) /
+      #                   ("UIDNEXT" SP nz-number) / ("UIDVALIDITY" SP nz-number) /
+      #                   ("UNSEEN" SP number)
+      # RFC4466:
+      # status-att-val  = ("MESSAGES" SP number) /
+      #                   ("RECENT" SP number) /
+      #                   ("UIDNEXT" SP nz-number) /
+      #                   ("UIDVALIDITY" SP nz-number) /
+      #                   ("UNSEEN" SP number)
+      #                   ;; Extensions to the STATUS responses
+      #                   ;; should extend this production.
+      #                   ;; Extensions should use the generic
+      #                   ;; syntax defined by tagged-ext.
+      # RFC9051:
+      # status-att-val  = ("MESSAGES" SP number) /
+      #                   ("UIDNEXT" SP nz-number) /
+      #                   ("UIDVALIDITY" SP nz-number) /
+      #                   ("UNSEEN" SP number) /
+      #                   ("DELETED" SP number) /
+      #                   ("SIZE" SP number64)
+      #                     ; Extensions to the STATUS responses
+      #                     ; should extend this production.
+      #                     ; Extensions should use the generic
+      #                     ; syntax defined by tagged-ext.
+      # RFC7162:
+      # status-att-val      =/ "HIGHESTMODSEQ" SP mod-sequence-valzer
+      #                        ;; Extends non-terminal defined in [RFC4466].
+      #                        ;; Value 0 denotes that the mailbox doesn't
+      #                        ;; support persistent mod-sequences
+      #                        ;; as described in Section 3.1.2.2.
+      # RFC7889:
+      # status-att-val =/ "APPENDLIMIT" SP (number / nil)
+      #                 ;; status-att-val is defined in RFC 4466
+      # RFC8438:
+      # status-att-val =/ "SIZE" SP number64
+      # RFC8474:
+      # status-att-val =/ "MAILBOXID" SP "(" objectid ")"
+      #         ; follows tagged-ext production from [RFC4466]
+      def status_att_val
+        key = tagged_ext_label
+        SP!
+        val =
+          case key
+          when "MESSAGES"      then number              # RFC3501, RFC9051
+          when "UNSEEN"        then number              # RFC3501, RFC9051
+          when "DELETED"       then number              # RFC3501, RFC9051
+          when "UIDNEXT"       then nz_number           # RFC3501, RFC9051
+          when "UIDVALIDITY"   then nz_number           # RFC3501, RFC9051
+          when "RECENT"        then number              # RFC3501 (obsolete)
+          when "SIZE"          then number64            # RFC8483, RFC9051
+          when "MAILBOXID"     then parens__objectid    # RFC8474
+          else
+            number? || ExtensionData.new(tagged_ext_val)
           end
-          token = match(T_ATOM)
-          key = token.value.upcase
-          match(T_SPACE)
-          val = number
-          attr[key] = val
-        end
-        data = StatusData.new(mailbox, attr)
-        return UntaggedResponse.new(name, data, @str)
+        [key, val]
       end
       # The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.
@@ -1573,6 +1796,9 @@ module Net
       #   resp-text-code   =/ "HIGHESTMODSEQ" SP mod-sequence-value /
       #                       "NOMODSEQ" /
       #                       "MODIFIED" SP sequence-set
+      #
+      # RFC8474: OBJECTID
+      #   resp-text-code   =/ "MAILBOXID" SP "(" objectid ")"
       def resp_text_code
         name = resp_text_code__name
         data =
@@ -1592,6 +1818,7 @@ module Net
             "LIMIT", "OVERQUOTA", "ALREADYEXISTS", "NONEXISTENT", "CLOSED",
             "NOTSAVED", "UIDNOTSTICKY", "UNKNOWN-CTE", "HASCHILDREN"
           when "NOMODSEQ"           # CONDSTORE
+          when "MAILBOXID"          then SP!; parens__objectid     # RFC8474: OBJECTID
           else
             SP? and text_chars_except_rbra
           end
@@ -1638,61 +1865,40 @@ module Net
         UIDPlusData.new(validity, src_uids, dst_uids)
       end
-      def address_list
-        token = lookahead
-        if token.symbol == T_NIL
-          shift_token
-          return nil
-        else
-          result = []
-          match(T_LPAR)
-          while true
-            token = lookahead
-            case token.symbol
-            when T_RPAR
-              shift_token
-              break
-            when T_SPACE
-              shift_token
-            end
-            result.push(address)
-          end
-          return result
-        end
-      end
-      ADDRESS_REGEXP = /\G\
-(?# 1: NAME     )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
-(?# 2: ROUTE    )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
-(?# 3: MAILBOX  )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \
-(?# 4: HOST     )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)")\
-\)/ni
+      ADDRESS_REGEXP = /\G
+        \( (?: NIL | #{Patterns::QUOTED_rev2} )  # 1: NAME
+        \s (?: NIL | #{Patterns::QUOTED_rev2} )  # 2: ROUTE
+        \s (?: NIL | #{Patterns::QUOTED_rev2} )  # 3: MAILBOX
+        \s (?: NIL | #{Patterns::QUOTED_rev2} )  # 4: HOST
+        \)
+      /nix
+      #   address         = "(" addr-name SP addr-adl SP addr-mailbox SP
+      #                     addr-host ")"
+      #   addr-adl        = nstring
+      #   addr-host       = nstring
+      #   addr-mailbox    = nstring
+      #   addr-name       = nstring
       def address
-        match(T_LPAR)
-        if @str.index(ADDRESS_REGEXP, @pos)
-          # address does not include literal.
-          @pos = $~.end(0)
-          name = $1
-          route = $2
-          mailbox = $3
-          host = $4
-          for s in [name, route, mailbox, host]
-            Patterns.unescape_quoted! s
-          end
-        else
-          name = nstring
-          match(T_SPACE)
-          route = nstring
-          match(T_SPACE)
-          mailbox = nstring
-          match(T_SPACE)
-          host = nstring
-          match(T_RPAR)
+        if (match = accept_re(ADDRESS_REGEXP))
+          # note that "NIL" isn't captured by the regexp
+          name, route, mailbox, host = match.captures
+            .map { Patterns.unescape_quoted _1 }
+        else # address may include literals
+          lpar; name    = addr_name
+          SP!;  route   = addr_adl
+          SP!;  mailbox = addr_mailbox
+          SP!;  host    = addr_host
+          rpar
         end
-        return Address.new(name, route, mailbox, host)
+        Address.new(name, route, mailbox, host)
       end
+      alias addr_adl     nstring
+      alias addr_host    nstring
+      alias addr_mailbox nstring
+      alias addr_name    nstring
       # flag-list       = "(" [flag *(SP flag)] ")"
       def flag_list
         match_re(Patterns::FLAG_LIST, "flag-list")[1]
@@ -1707,22 +1913,23 @@ module Net
           .map! { _1.start_with?("\\") ? _1[1..].capitalize.to_sym : _1 }
       end
-      # Not checking for max one mbx-list-sflag in the parser.
-      # >>>
-      #   mbx-list-flags  = *(mbx-list-oflag SP) mbx-list-sflag
-      #                     *(SP mbx-list-oflag) /
-      #                     mbx-list-oflag *(SP mbx-list-oflag)
-      #   mbx-list-oflag  = "\Noinferiors" / child-mbox-flag /
-      #                     "\Subscribed" / "\Remote" / flag-extension
-      #                  ; Other flags; multiple from this list are
-      #                  ; possible per LIST response, but each flag
-      #                  ; can only appear once per LIST response
-      #   mbx-list-sflag  = "\NonExistent" / "\Noselect" / "\Marked" /
-      #                     "\Unmarked"
-      #                  ; Selectability flags; only one per LIST response
-      def parens__mbx_list_flags
+      # See Patterns::MBX_LIST_FLAGS
+      def mbx_list_flags
         match_re(Patterns::MBX_LIST_FLAGS, "mbx-list-flags")[1]
-          .split(nil).map! { _1.capitalize.to_sym }
+          .split(nil).map! { _1[1..].capitalize.to_sym }
+      end
+      # See https://developers.google.com/gmail/imap/imap-extensions
+      def x_gm_label; accept(T_BSLASH) ? atom.capitalize.to_sym : astring end
+      # See https://developers.google.com/gmail/imap/imap-extensions
+      def x_gm_labels
+        lpar; return [] if rpar?
+        labels = []
+        labels << x_gm_label
+        labels << x_gm_label while SP?
+        rpar
+        labels
       end
       # See https://www.rfc-editor.org/errata/rfc3501
@@ -1744,6 +1951,15 @@ module Net
       def parens__modseq; lpar; _ = permsg_modsequence; rpar; _ end
+      # RFC8474:
+      # objectid = 1*255(ALPHA / DIGIT / "_" / "-")
+      #         ; characters in object identifiers are case
+      #         ; significant
+      alias objectid atom
+      def parens__objectid; lpar; _ = objectid; rpar; _ end
+      def nparens__objectid; NIL? ? nil : parens__objectid end
       # RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
       #      uid-set         = (uniqueid / uid-range) *("," uid-set)
       #      uid-range       = (uniqueid ":" uniqueid)
@@ -1789,42 +2005,47 @@ module Net
             @pos = $~.end(0)
             if $1
               return Token.new(T_SPACE, $+)
-            elsif $2 && $6
+            elsif $2
+              len = $+.to_i
+              val = @str[@pos, len]
+              @pos += len
+              return Token.new(T_LITERAL8, val)
+            elsif $3 && $7
               # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS.
-              return Token.new(T_ATOM, $2)
-            elsif $3
-              return Token.new(T_NIL, $+)
+              return Token.new(T_ATOM, $3)
             elsif $4
-              return Token.new(T_NUMBER, $+)
+              return Token.new(T_NIL, $+)
             elsif $5
+              return Token.new(T_NUMBER, $+)
+            elsif $6
               return Token.new(T_PLUS, $+)
-            elsif $7
+            elsif $8
               # match ATOM, without a NUMBER, NIL, or PLUS prefix
               return Token.new(T_ATOM, $+)
-            elsif $8
-              return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
             elsif $9
-              return Token.new(T_LPAR, $+)
+              return Token.new(T_QUOTED, Patterns.unescape_quoted($+))
             elsif $10
-              return Token.new(T_RPAR, $+)
+              return Token.new(T_LPAR, $+)
             elsif $11
-              return Token.new(T_BSLASH, $+)
+              return Token.new(T_RPAR, $+)
             elsif $12
-              return Token.new(T_STAR, $+)
+              return Token.new(T_BSLASH, $+)
             elsif $13
-              return Token.new(T_LBRA, $+)
+              return Token.new(T_STAR, $+)
             elsif $14
-              return Token.new(T_RBRA, $+)
+              return Token.new(T_LBRA, $+)
             elsif $15
+              return Token.new(T_RBRA, $+)
+            elsif $16
               len = $+.to_i
               val = @str[@pos, len]
               @pos += len
               return Token.new(T_LITERAL, val)
-            elsif $16
-              return Token.new(T_PERCENT, $+)
             elsif $17
-              return Token.new(T_CRLF, $+)
+              return Token.new(T_PERCENT, $+)
             elsif $18
+              return Token.new(T_CRLF, $+)
+            elsif $19
               return Token.new(T_EOF, $+)
             else
               parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid")