RubyGems - violet - Versions diffs - 0.0.1 - Mend

violet 0.0.1

Files changed (8) hide show

data/bin/violet ADDED

@@ -0,0 +1,34 @@
+#!/usr/bin/env ruby
+module Violet
+  require File.expand_path("../lib/violet", File.dirname(__FILE__))
+  require "pal"
+  # Internal: Defines the commands and variables available to the interactive
+  # shell.
+  class Command < Pal::Context
+    # Public: Lexes a string of JavaScript source code.
+    #
+    # source - The source `String`.
+    # patterns - Boolean arguments that correspond to each lexed token and
+    #   specify if the `/` and `/=` tokens may be interpreted as regular
+    #   expressions (`true`) or division operators (`false`).
+    #
+    # Returns an `Array` of `Token`s.
+    def lex(source, *patterns)
+      Lexer.new(source).tokens(*patterns)
+    end
+    # Public: Parses a string of JavaScript source code.
+    #
+    # source - The source `String`.
+    #
+    # Returns an `Array` of `Token`s.
+    def parse(source)
+      Parser.parse(source)
+    end
+  end
+  Pal::REPL.new("violet", Command.new).loop
+end

data/lib/violet.rb ADDED

@@ -0,0 +1,22 @@
+# -*- encoding: utf-8 -*-
+module Violet
+  # Public: Contains the version information.
+  module Version
+    # Public: The current version of Violet. The major, minor, and patch
+    # versions are exposed as individual constants, and comprise the
+    # semantic version string.
+    STRING = (MAJOR, MINOR, PATCH = 0, 0, 1) * "."
+  end
+  # Internal: A named `Error` class, used for reporting parse errors.
+  Error = Class.new(StandardError)
+  # Prepend the `lib` directory to the load path to facilitate loading Violet
+  # without RubyGems. Modules and classes will be loaded as needed.
+  $:.unshift File.expand_path(File.dirname(__FILE__))
+  autoload :Token, "violet/token"
+  autoload :Lexer, "violet/lexer"
+  autoload :Parser, "violet/parser"
+end

data/lib/violet/lexer.rb ADDED

@@ -0,0 +1,671 @@
+# -*- encoding: utf-8 -*-
+module Violet
+  # Internal: Records exceptions emitted by the lexer.
+  LexerError = Class.new(Error)
+  # Public: Lexes a JavaScript source string.
+  class Lexer
+    # Public: Matches line terminators: line feeds, carriage returns, line
+    # separators, and paragraph separators. See section 7.3 of the ES 5.1 spec.
+    LINE_TERMINATORS = /[\n\r\u2028\u2029]/
+    # Public: Matches line separators, paragraph separators, and carriage
+    # returns not followed by line separators. Used to convert all line
+    # terminators to line feeds. CRLF line endings are preserved.
+    NORMALIZE_LINE_ENDINGS = /[\u2028\u2029]|(?:\r[^\n])/
+    # Public: Matches Unicode letters, `$`, `_`, and Unicode escape sequences.
+    # See section 7.6.
+    IDENTIFIER_START = /[$_\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]/
+    # Public: Matches identifier starting characters, Unicode combining marks,
+    # Unicode digits, Unicode connector punctuators, zero-width non-joiners, and
+    # zero-width joiners. See section 7.1.
+    IDENTIFIER_FRAGMENT = Regexp.union(IDENTIFIER_START, /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}\u200c\u200d]/)
+    # Public: Matches an ECMAScript token. This is a superset of the `Token`
+    # production defined in section 7.5 of the spec.
+    TOKEN = %r(
+      ## Whitespace characters: tab, vertical tab, form feed, space,
+      # non-breaking space, byte-order mark, and other Unicode space separators
+      # (Category Z). The space and non-breaking space characters are matched by
+      # the \p{Z} Unicode category class. See section 7.2 of the ES spec.
+      (?<whitespace>[\t\v\f\ufeff\uffff\p{Z}])?
+      # Line terminators. See section 7.3.
+      (?<line_terminator>#{LINE_TERMINATORS})?
+      # Line and block comments. See section 7.4.
+      (?<line_comment>//)?
+      (?<block_comment>/\*)?
+      # Single- and double-quoted string literals. See section 7.8.4.
+      (?<single_quoted_string>')?
+      (?<double_quoted_string>")?
+      # Numeric literals. See section 7.8.3.
+      (?<number>\.?[0-9])?
+      # RegExp literals. See section 7.8.5. This capture may also match the
+      # `DivPunctuator` production.
+      (?:(?<pattern>/)[^=])?
+      # Punctuators. See section 7.7.
+      (?<punctuator>\>>>=|===|!==|>>>|<<=|>>=|<=|>=|==|!=|\+\+|--|<<|>>|&&|
+        \|\||\+=|-=|\*=|%=|&=|\|=|\^=|/=|\{|\}|\(|\)|\[|\]|\.|;|,|<|>|\+|-|
+        \*|%|\||&|\||\^|!|~|\?|:|=|/)?
+    )x
+    # Internal: The `true`, `false`, and `null` literals, as well as the
+    # `undefined` value. The lexer marks these four values as primitives.
+    LITERALS = %w( undefined null true false )
+    # Internal: A `Hash` that contains the quote character, token kind, and the
+    # unterminated string and invalid line continuation error messages for
+    # single- and double-quoted string tokens.
+    STRINGS = %w( single ' double " ).each_slice(2).with_object({}) do |(kind, quote), value|
+      value[kind.to_sym] = {
+        :quote => quote,
+        :kind => "#{kind}_quoted_string".to_sym,
+        :unterminated_string_error => "Unterminated #{kind}-quoted string literal.",
+        :invalid_continuation_error => "Unescaped line terminators are not permitted within #{kind}-quoted string literals."
+      }
+    end
+    # Public: Gets the source string.
+    attr_reader :source
+    # Public: Gets the current line.
+    attr_reader :line
+    # Public: Gets the current column.
+    attr_reader :column
+    # Public: Creates a new `Lexer` with a source string.
+    #
+    # source - The source `String`.
+    def initialize(source)
+      @source = source
+      # Replace all line terminators with a single line feed, but preserve CRLF
+      # line endings.
+      @normalized_source = @source.gsub(NORMALIZE_LINE_ENDINGS, ?\n)
+      reset!
+    end
+    # Public: Resets the lexer to its original position and clears the token
+    # stream.
+    def reset!
+      @index = @line = @column = 0
+      @terminated = false
+      (@tokens ||= []).clear
+    end
+    # Public: Produces a complete token stream from the source. This method
+    # resets the lexer prior to lexing the source string.
+    #
+    # patterns - Zero or more boolean arguments that correspond to each lexed
+    #   token and specify if the `/` and `/=` tokens may be interpreted as
+    #   regular expressions (`true`) or division operators (`false`). This
+    #   flag only applies to division and regular expression tokens; setting
+    #   it for other tokens has no effect.
+    def tokens(*patterns)
+      reset!
+      index = -1
+      # Lex tokens until the end-of-file mark is reached.
+      loop { break unless lex patterns[index += 1] }
+      @tokens
+    end
+    # Public: Inserts a new token into the token stream, before a reference
+    # token. If the reference token is the end-of-file mark, the token is
+    # appended instead.
+    #
+    # token - The `Token` to be inserted into the token stream.
+    # original - The reference `Token` before which the new `Token` is inserted.
+    #
+    # Returns the new `Token`.
+    def insert_before(token, original)
+      if original[:name] == Token::Types[:eof]
+        token[:index] = @tokens.size
+        @tokens << token
+      else
+        token[:index] = original[:index]
+        @tokens[token[:index]] = token
+        original[:index] += 1
+        @tokens[original[:index]] = original
+      end
+      token
+    end
+    # Internal: Returns the maximum number of characters, relative to the
+    # current scan pointer, that may be parsed as valid identifier
+    # characters. The scan pointer is not advanced.
+    #
+    # lex_as_fragment - A boolean that specifies whether the identifier may be
+    #   lexed as a fragment. Certain productions allow identifier fragments,
+    #   while others require that the identifier begin with a subset of valid
+    #   fragment characters (default: false).
+    def match_identifier?(lex_as_fragment = false)
+      size = @index
+      # Identifier starting characters are restricted to a subset of valid
+      # identifier fragment characters.
+      until eof?
+        # Unicode escape sequences may occur anywhere within an identifier.
+        if /^\\u\h{4}$/ =~ @source[size, 6]
+          # Advance the scan pointer past the Unicode escape sequence.
+          size += 6
+        else
+          character = @source[size]
+          if lex_as_fragment
+            # Use the full `IdentifierPart` production.
+            break unless character =~ IDENTIFIER_FRAGMENT
+          else
+            # The initial character must conform to the more restrictive
+            # `IdentifierStart` production.
+            break unless character =~ IDENTIFIER_START
+            # All subsequent characters may be lexed as identifier fragments.
+            lex_as_fragment = true
+          end
+          size += 1
+        end
+      end
+      size - @index
+    end
+    # Internal: Returns the maximum number of characters, relative to the
+    # current scan pointer, that may be parsed as valid decimal characters.
+    # The scan pointer is not advanced.
+    def match_decimal?
+      size = @index
+      size += 1 until eof? || @source[size] !~ /\d/
+      size - @index
+    end
+    # Public: Returns `true` if the lexer has reached the end of the source
+    # string.
+    def eof?
+      @terminated || @index >= @source.size
+    end
+    # Public: Lexes a token.
+    #
+    # pattern - If the token is `/` or `/=`, specifies whether it may be lexed
+    #   as part of a regular expression. If `false`, the token will be lexed as
+    #   a division operator instead (default: true).
+    #
+    # Returns the lexed `Token`, or `nil` if the lexer has finished scanning the
+    # source.
+    def lex(pattern = true)
+      return if @terminated
+      if eof?
+        @terminated ||= true
+        token = Token.new(self, :eof, @source.size...@source.size)
+        return token
+      end
+      token = TOKEN.match(@source, @index) do |match|
+        case
+        # Produces a whitespace, line terminator, line comment (`// ...`), or
+        # block comment (`/* ... */`) token.
+        when match[:whitespace] then lex_whitespace
+        when match[:line_terminator] then lex_line_terminator
+        when match[:line_comment] then lex_line_comment
+        when match[:block_comment] then lex_block_comment
+        # Produces a single- or double-quoted string token. A single method is
+        # used to produce both kinds of tokens.
+        when match[:single_quoted_string] then lex_string :single
+        when match[:double_quoted_string] then lex_string :double
+        # Produces a hexadecimal or decimal token. Octal numbers produce an
+        # error, as they are prohibited in ES 5.
+        when match[:number] then lex_number
+        # `/` and `/=` may be interpreted as either regular expressions or
+        # division operators. The `pattern` argument specifies whether
+        # these tokens should be lexed as RegExps or punctuators.
+        when pattern && match[:pattern] then lex_pattern
+        else
+          # The `<pattern>` capture may contain the `/` and `/=` tokens.
+          if result = match[:pattern] || match[:punctuator]
+            token = Token.new(self, :punctuator, @index...@index += result.size)
+            @column += token.size
+            token
+          else
+            # Lex the token as an identifier.
+            lex_identifier
+          end
+        end
+      end
+      # Record the position of the token in the token stream.
+      token[:index] = @tokens.size
+      @tokens << token
+      token
+    end
+    # Internal: Lexes a whitespace token at the current scan position.
+    #
+    # Returns the lexed `Token`.
+    def lex_whitespace
+      token = Token.new(self, :whitespace, @index...@index += 1)
+      token[:isWhite] = true
+      @column += 1
+      token
+    end
+    # Internal: Lexes a line terminator at the current scan position: either a
+    # line feed, carriage return, line separator, or paragraph separator. See
+    # section 7.3 of the spec.
+    #
+    # Returns the lexed `Token`.
+    def lex_line_terminator
+      character = @source[@index]
+      stop = @index + 1
+      # If the current character is a carriage return and the next character is
+      # a line feed, the source string contains CRLF line endings. The `stop`
+      # position is advanced one additional character, so that "\r\n" is treated
+      # as a single terminator.
+      stop += 1 if character == ?\r && @source[stop] == ?\n
+      # Advance the current index past the terminator.
+      token = Token.new(self, :line_terminator, @index...@index = stop)
+      token[:lines] = 1
+      token[:isWhite] = true
+      @line += 1
+      @column = 0
+      token
+    end
+    # Internal: Lexes a line comment at the current scan position.
+    #
+    # Returns the lexed `Token`.
+    def lex_line_comment
+      @column = @normalized_source.index(?\n, @index) || @source.length
+      token = Token.new(self, :line_comment, @index...@index = @column)
+      token[:isComment] = token[:isWhite] = true
+      token
+    end
+    # Internal: Lexes a block comment at the current scan position.
+    #
+    # Returns the lexed `Token`.
+    def lex_block_comment
+      start = @index
+      # Mark the ending position of the comment.
+      stop = @source.index("*/", start)
+      if stop
+        # Advance the current position past the end of the comment.
+        @index = stop + 2
+        token = Token.new(self, :block_comment, start...@index)
+        token[:isComment] = token[:isWhite] = true
+        # Block comments trigger automatic semicolon insertion only if they
+        # span multiple lines. The normalized source is used to quickly
+        # detect line terminators.
+        index = lines = 0
+        # Advance the current line.
+        lines += 1 while index = @normalized_source[start...@index].index(?\n, index + 1)
+        if lines.zero?
+          # For single-line block comments, increase the column by the size of
+          # the token.
+          @column += token[:size]
+        else
+          # For multiline block comments, record the number of lines comprising
+          # the comment and reset the column.
+          @line += token[:lines] = lines
+          @column = 0
+        end
+      else
+        # Unterminated block comment. If a line terminator is found, the comment
+        # is assumed to end immediately before it. Otherwise, the comment is
+        # assumed to end two characters after the current scan position.
+        stop = @normalized_source.index(?\n, @index)
+        @index = stop || @index + 2
+        token = Token.new(self, :error, start...@index)
+        token[:error] = "Unterminated block comment."
+        token[:isComment] = token[:isWhite] = token[:tokenError] = true
+        @column += token[:size]
+      end
+      token
+    end
+    # Internal: Lexes a single- or double-quoted string primitive at the
+    # current scan position.
+    #
+    # style - A `Symbol` that specifies the quoting style. The quoting style
+    #   must be defined as a key in the `Lexer::STRINGS` hash.
+    #
+    # Returns the lexed `Token`.
+    # Raises `KeyError` if the quoting style is not defined in the hash.
+    def lex_string(style)
+      style = STRINGS.fetch(style)
+      start = @index
+      lines = 0
+      loop do
+        # Parse escape sequences in strings.
+        until eof? || @source[@index += 1] != ?\\
+          # Record the number of new lines if the string contains linefeeds. The shadow input is
+          # used to avoid repeatedly normalizing line endings.
+          @line += (lines += 1) if @normalized_source[@index + 1] == ?\n
+          # Advance to the next character.
+          @index += 1
+        end
+        # If the string contains an unescaped line terminator, it is a syntax error. Some
+        # environments permit unescaped new lines in strings; however, the spec disallows them.
+        if @source[@index] =~ LINE_TERMINATORS
+          token = Token.new(self, :error, start...@index)
+          token[:error] = style[:invalid_continuation_error]
+          token[:isString] = token[:tokenError] = true
+          break
+        end
+        # Consume escape sequences until either the end of the source or the end-of-string character
+        # is reached.
+        break if eof? || @source[@index] == style[:quote]
+      end
+      # If the end of the source is reached without consuming the end-of-string character, the
+      # source contains an unterminated string literal.
+      if @source[@index] == style[:quote]
+        # Advance the index past the end-of-string character.
+        @index += 1
+        token = Token.new(self, style[:kind], start...@index)
+        token[:isPrimitive] = token[:isString] = true
+        # Update the line and column entries accordingly.
+        if lines.zero?
+          @column += token[:size]
+        else
+          token[:lines] = lines
+          @column = 0
+        end
+      else
+        token = Token.new(self, :error, start...@index)
+        token[:error] = style[:unterminated_string_error]
+        token[:isString] = token[:tokenError] = true
+        @column += token[:size]
+      end
+      token
+    end
+    # Internal: Lexes a decimal or hexadecimal numeric value. See section 7.8.3.
+    #
+    # Returns the lexed `Token`.
+    def lex_number
+      start = @index
+      @index += 1
+      # If the token begins with a `0x`, parse the remainder as a hexadecimal value.
+      if @source[start..@index] =~ /0[xX]/
+        position = @index += 1
+        # Consume characters until the end of the string or a non-hexdigit
+        # character is encountered.
+        @index += 1 until eof? || @source[@index] !~ /\h/
+        # If no additional characters were consumed, the hex value is invalid.
+        if position == @index
+          token = Token.new(self, :error, start...@index)
+          token[:error] = "Invalid hexdigit value."
+          token[:isNumber] = token[:tokenError] = true
+        else
+          # The value is syntactically sound.
+          token = Token.new(self, :hexadecimal_number, start...@index)
+          token[:isPrimitive] = token[:isNumber] = true
+        end
+      else
+        # Determine if an octal escape sequence is being parsed (i.e., a leading
+        # zero followed by a decimal digit).
+        is_octal = @source[start..@index] =~ /0\d/
+        # Parse the integral expression before the decimal point.
+        unless @source[start] == ?.
+          # Consume characters until the end of the string or a non-decimal
+          # character is encountered.
+          @index += match_decimal?
+          # Advance past the decimal point.
+          @index += 1 if @source[@index] == ?.
+        end
+        # Parse the decimal component.
+        @index += match_decimal?
+        # Parse the exponent.
+        if @source[@index] =~ /[eE]/
+          # Advance past the sign.
+          @index += 1 if @source[@index += 1] =~ /[+-]/
+          # Mark the current position and consume decimal digits past the
+          # exponential.
+          position = @index
+          @index += match_decimal?
+          # If no additional characters were consumed but an exponent was lexed,
+          # the decimal value is invalid.
+          if position == @index
+            token = Token.new(self, :error, start...@index)
+            token[:error] = "Exponents may not be empty."
+            token[:tokenError] = true
+          end
+        end
+        unless token
+          # Octal literals are invalid in ES 5.
+          if is_octal
+            token = Token.new(self, :error, start...@index)
+            token[:error] = "Invalid octal escape sequence."
+            token[:isNumber] = token[:isOctal] = token[:tokenError] = true
+          else
+            # Syntactically valid decimal value. As with hexdigits, the parser
+            # will determine if the lexed value is semantically sound.
+            token = Token.new(self, :decimal_number, start...@index)
+            token[:isPrimitive] = token[:isNumber] = true
+          end
+        end
+      end
+      @column += token[:size]
+      token
+    end
+    # Internal: Lexes a regular expression literal. See section 7.8.5.
+    #
+    # Returns the lexed `Token`.
+    def lex_pattern
+      start = @index
+      # Maintains a hash of the initial and terminal positions of balanced
+      # regular expression characters: grouping parentheses, character class
+      # brackets, and quantifier braces.
+      balanced = {}
+      # Ensures that all capturing groups in the pattern are balanced.
+      groups = []
+      # A flag that specifies if the regular expression is terminated.
+      terminated = false
+      # Only the last syntax error is preserved for improperly constructed
+      # regular expressions.
+      syntax_error = nil
+      loop do
+        @index += 1
+        break if eof?
+        # Use the normalized input to quickly detect line terminators.
+        case character = @normalized_source[@index]
+        when ?\n
+          # Line terminators cannot occur within RegExp literals.
+          token = Token.new(self, :error, start...@index)
+          token[:error] = "Line terminators are not permitted within RegExp literals."
+          token[:tokenError] = token[:errorHasContent] = true
+          # Avoid emitting a second unterminated RegExp error once lexing is
+          # complete.
+          terminated = true
+          break
+        when ?/
+          # An unescaped `/` marks the end of the regular expression.
+          terminated = true
+          break
+        when /[?*+]/
+          syntax_error = "`?`, `*`, and `+` require a value to repeat."
+        when ?^
+          # `^` may only occur immediately following `|`, or at the beginning
+          # of either the pattern, a capturing group, or a lookahead assertion
+          # (`?:`, `?=`, or `?!`). Note that `^` may also negate a character
+          # class; however, character classes have different semantics and are
+          # lexed separately.
+          unless @source[@index - 1] =~ %r{[/|(]} || @source[@index - 3, 3] =~ /\(\?[:!=]/
+            syntax_error = "`^` may not occur here."
+          end
+        when ?$
+          # `$` may only occur immediately before `|`, or at the end of either
+          # the pattern, a capturing group, or a lookahead assertion.
+          unless @source[@index + 1] =~ %r{[/|)]}
+            syntax_error = "`$` may not occur here."
+          end
+        when ?}
+          # Interpreters can distinguish between and automatically escape braces
+          # not used to delimit quantifiers. Nevertheless, it's considered a bad
+          # practice to leave special characters unescaped in RegExps. Both the
+          # Violet lexer and the ZeParser tokenizer assume that all unescaped
+          # braces delimit quantifiers, and emit errors accordingly.
+          syntax_error = "Mismatched `}`."
+        else
+          # Lex capturing groups.
+          if character == ?(
+            # Mark the initial position of the capturing group.
+            groups << @index - start
+          elsif character == ?)
+            if groups.empty?
+              syntax_error = "Capturing group parentheses must be balanced."
+            else
+              # Record the initial and terminal positions of the parentheses delimiting the group.
+              terminal = @index - start
+              balanced[initial = groups.pop] = terminal
+              balanced[terminal] = initial
+            end
+          end
+          # Character Classes.
+          # ------------------
+          if character == ?[
+            # Record the initial position of the character class.
+            initial = @index - start
+            # Characters in character classes are treated literally, so there
+            # is no need to escape them. The exceptions are line terminators and
+            # unescaped closing brackets, which are not part of the
+            # `RegularExpressionClassChar` grammar.
+            loop do
+              @index += 1
+              break if eof? || @normalized_source[@index] == ?\n || @source[@index] == ?]
+              if @source[@index] == ?\\
+                if @normalized_source[@index + 1] == ?\n
+                  # Abort lexing if a line terminator is encountered.
+                  break
+                else
+                  # Skip lexing the subsequent escaped character. This ensures
+                  # that escaped closing brackets (`\]`) are lexed correctly.
+                  @index += 1
+                end
+              end
+            end
+            if @source[@index] == ?]
+              # Record the initial and terminal positions of the brackets
+              # delimiting the class.
+              terminal = @index - start
+              balanced[initial] = terminal
+              balanced[terminal] = initial
+            else
+              token = Token.new(self, :error, start...@index)
+              token[:error] = "Character class brackets must be balanced."
+              token[:tokenError] = true
+              # Avoid emitting an unterminated RegExp error once lexing is
+              # complete.
+              terminated = true
+              break
+            end
+          # Lex escaped characters. Escape sequences may occur anywhere within
+          # the RegExp, and indicate that the following character should be
+          # interpreted literally.
+          elsif character == ?\\ && @normalized_source[@index + 1] != ?\n
+            @index += 1
+          end
+          # Lookahead Assertions and Quantifiers.
+          # -------------------------------------
+          if character == ?(
+            # Lex a non-capturing group, positive lookahead, or negative lookahead.
+            @index += 2 if @source[@index + 1, 2] =~ /\?[:=!]/
+          else
+            # Lex quantifiers.
+            case @source[@index + 1]
+            when ??
+              # The `?` quantifier matches the preceding character zero or one
+              # times.
+              @index += 1
+            when /[*+]/
+              # The `*` quantifier matches the preceding character zero or more
+              # times; `+` matches a character one or more times. `*?` and `+?`
+              # indicate a non-greedy match.
+              @index += 1 if @source[@index += 1] == ??
+            when ?{
+              # Advance one character and mark the initial position of the
+              # quantifier.
+              @index += 1
+              initial = @index - start
+              # The `{n}` quantifier matches the preceding character exactly
+              # `n` times. `{n,}` matches at least `n` occurrences of the
+              # preceding character. `{n,m}` matches at least `n` and at most
+              # `m` occurrences.
+              unless @source[@index += 1] =~ /\d/
+                syntax_error = "Quantifier curly requires at least one digit before the comma"
+              end
+              # Lex the `n` value.
+              @index += match_decimal?
+              # Lex the `m` value, if any, if a comma is specified.
+              @index += match_decimal? if @source[@index += 1] == ?,
+              # Quantifier braces must be balanced.
+              if @source[@index + 1] == ?}
+                @index += 1
+                terminal = @index - start
+                balanced[initial] = terminal
+                balanced[terminal] = initial
+                # A trailing `?` indicates a non-greedy match.
+                @index += 1 if @source[@index + 1] == ??
+              else
+                syntax_error = "Quantifier curly requires to be closed"
+              end
+            end
+          end
+        end
+      end
+      # Construct the token.
+      # --------------------
+      unless terminated
+        token = Token.new(self, :error, start...@index)
+        token[:error] = "Unterminated RegExp literal."
+        token[:tokenError] = true
+      else
+        # Advance one character and lex the regular expression flags, if any,
+        # as an identifier fragment (the grammar for `RegularExpressionFlags`
+        # is that of `IdentifierPart`).
+        @index += 1
+        @index += match_identifier? :fragment
+        if !groups.empty?
+          # If the `groups` list is not empty, at least one set of capturing
+          # group parentheses was not balanced.
+          token = Token.new(self, :error, start...@index)
+          token[:tokenError] = true
+          token[:error] = "Mismatched `(` or `)`."
+        elsif syntax_error
+          # Add the last syntax error to the stack.
+          token = Token.new(self, :error, start...@index)
+          token[:tokenError] = token[:errorHasContent] = true
+          token[:error] = syntax_error
+        else
+          token = Token.new(self, :pattern, start...@index)
+          token[:isPrimitive] = true
+          token[:pairs] = balanced
+        end
+      end
+      @column += @index - start
+      token
+    end
+    # Internal: Lexes a regular expression literal. See sections 7.1 and 7.6.
+    #
+    # Returns the lexed `Token`.
+    def lex_identifier
+      size = match_identifier?
+      if size.zero?
+        character = @source[@index]
+        token = Token.new(self, :error, @index...@index += 1)
+        token[:tokenError] = true
+        token[:error] = if character == ?\\
+          @source[@index] == ?u ? "Invalid Unicode escape sequence." : "Illegal escape sequence."
+        else
+          "Invalid token."
+        end
+      else
+        token = Token.new(self, :identifier, @index...@index += size)
+        # Mark the token as a primitive if it is in the `Lexer::LITERALS` array.
+        token[:isPrimitive] = LITERALS.include? token[:value]
+      end
+      @column += token[:size]
+      token
+    end
+  end
+end