violet 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ module Violet
4
+ require File.expand_path("../lib/violet", File.dirname(__FILE__))
5
+ require "pal"
6
+
7
+ # Internal: Defines the commands and variables available to the interactive
8
+ # shell.
9
+ class Command < Pal::Context
10
+
11
+ # Public: Lexes a string of JavaScript source code.
12
+ #
13
+ # source - The source `String`.
14
+ # patterns - Boolean arguments that correspond to each lexed token and
15
+ # specify if the `/` and `/=` tokens may be interpreted as regular
16
+ # expressions (`true`) or division operators (`false`).
17
+ #
18
+ # Returns an `Array` of `Token`s.
19
+ def lex(source, *patterns)
20
+ Lexer.new(source).tokens(*patterns)
21
+ end
22
+
23
+ # Public: Parses a string of JavaScript source code.
24
+ #
25
+ # source - The source `String`.
26
+ #
27
+ # Returns an `Array` of `Token`s.
28
+ def parse(source)
29
+ Parser.parse(source)
30
+ end
31
+ end
32
+
33
+ Pal::REPL.new("violet", Command.new).loop
34
+ end
@@ -0,0 +1,22 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module Violet
4
+ # Public: Contains the version information.
5
+ module Version
6
+ # Public: The current version of Violet. The major, minor, and patch
7
+ # versions are exposed as individual constants, and comprise the
8
+ # semantic version string.
9
+ STRING = (MAJOR, MINOR, PATCH = 0, 0, 1) * "."
10
+ end
11
+
12
+ # Internal: A named `Error` class, used for reporting parse errors.
13
+ Error = Class.new(StandardError)
14
+
15
+ # Prepend the `lib` directory to the load path to facilitate loading Violet
16
+ # without RubyGems. Modules and classes will be loaded as needed.
17
+ $:.unshift File.expand_path(File.dirname(__FILE__))
18
+
19
+ autoload :Token, "violet/token"
20
+ autoload :Lexer, "violet/lexer"
21
+ autoload :Parser, "violet/parser"
22
+ end
@@ -0,0 +1,671 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ module Violet
4
+ # Internal: Records exceptions emitted by the lexer.
5
+ LexerError = Class.new(Error)
6
+
7
+ # Public: Lexes a JavaScript source string.
8
+ class Lexer
9
+ # Public: Matches line terminators: line feeds, carriage returns, line
10
+ # separators, and paragraph separators. See section 7.3 of the ES 5.1 spec.
11
+ LINE_TERMINATORS = /[\n\r\u2028\u2029]/
12
+
13
+ # Public: Matches line separators, paragraph separators, and carriage
14
+ # returns not followed by line separators. Used to convert all line
15
+ # terminators to line feeds. CRLF line endings are preserved.
16
+ NORMALIZE_LINE_ENDINGS = /[\u2028\u2029]|(?:\r[^\n])/
17
+
18
+ # Public: Matches Unicode letters, `$`, `_`, and Unicode escape sequences.
19
+ # See section 7.6.
20
+ IDENTIFIER_START = /[$_\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]/
21
+
22
+ # Public: Matches identifier starting characters, Unicode combining marks,
23
+ # Unicode digits, Unicode connector punctuators, zero-width non-joiners, and
24
+ # zero-width joiners. See section 7.1.
25
+ IDENTIFIER_FRAGMENT = Regexp.union(IDENTIFIER_START, /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}\u200c\u200d]/)
26
+
27
+ # Public: Matches an ECMAScript token. This is a superset of the `Token`
28
+ # production defined in section 7.5 of the spec.
29
+ TOKEN = %r(
30
+ ## Whitespace characters: tab, vertical tab, form feed, space,
31
+ # non-breaking space, byte-order mark, and other Unicode space separators
32
+ # (Category Z). The space and non-breaking space characters are matched by
33
+ # the \p{Z} Unicode category class. See section 7.2 of the ES spec.
34
+ (?<whitespace>[\t\v\f\ufeff\uffff\p{Z}])?
35
+ # Line terminators. See section 7.3.
36
+ (?<line_terminator>#{LINE_TERMINATORS})?
37
+ # Line and block comments. See section 7.4.
38
+ (?<line_comment>//)?
39
+ (?<block_comment>/\*)?
40
+ # Single- and double-quoted string literals. See section 7.8.4.
41
+ (?<single_quoted_string>')?
42
+ (?<double_quoted_string>")?
43
+ # Numeric literals. See section 7.8.3.
44
+ (?<number>\.?[0-9])?
45
+ # RegExp literals. See section 7.8.5. This capture may also match the
46
+ # `DivPunctuator` production.
47
+ (?:(?<pattern>/)[^=])?
48
+ # Punctuators. See section 7.7.
49
+ (?<punctuator>\>>>=|===|!==|>>>|<<=|>>=|<=|>=|==|!=|\+\+|--|<<|>>|&&|
50
+ \|\||\+=|-=|\*=|%=|&=|\|=|\^=|/=|\{|\}|\(|\)|\[|\]|\.|;|,|<|>|\+|-|
51
+ \*|%|\||&|\||\^|!|~|\?|:|=|/)?
52
+ )x
53
+
54
+ # Internal: The `true`, `false`, and `null` literals, as well as the
55
+ # `undefined` value. The lexer marks these four values as primitives.
56
+ LITERALS = %w( undefined null true false )
57
+
58
+ # Internal: A `Hash` that contains the quote character, token kind, and the
59
+ # unterminated string and invalid line continuation error messages for
60
+ # single- and double-quoted string tokens.
61
+ STRINGS = %w( single ' double " ).each_slice(2).with_object({}) do |(kind, quote), value|
62
+ value[kind.to_sym] = {
63
+ :quote => quote,
64
+ :kind => "#{kind}_quoted_string".to_sym,
65
+ :unterminated_string_error => "Unterminated #{kind}-quoted string literal.",
66
+ :invalid_continuation_error => "Unescaped line terminators are not permitted within #{kind}-quoted string literals."
67
+ }
68
+ end
69
+
70
+ # Public: Gets the source string.
71
+ attr_reader :source
72
+
73
+ # Public: Gets the current line.
74
+ attr_reader :line
75
+
76
+ # Public: Gets the current column.
77
+ attr_reader :column
78
+
79
+ # Public: Creates a new `Lexer` with a source string.
80
+ #
81
+ # source - The source `String`.
82
+ def initialize(source)
83
+ @source = source
84
+ # Replace all line terminators with a single line feed, but preserve CRLF
85
+ # line endings.
86
+ @normalized_source = @source.gsub(NORMALIZE_LINE_ENDINGS, ?\n)
87
+ reset!
88
+ end
89
+
90
+ # Public: Resets the lexer to its original position and clears the token
91
+ # stream.
92
+ def reset!
93
+ @index = @line = @column = 0
94
+ @terminated = false
95
+ (@tokens ||= []).clear
96
+ end
97
+
98
+ # Public: Produces a complete token stream from the source. This method
99
+ # resets the lexer prior to lexing the source string.
100
+ #
101
+ # patterns - Zero or more boolean arguments that correspond to each lexed
102
+ # token and specify if the `/` and `/=` tokens may be interpreted as
103
+ # regular expressions (`true`) or division operators (`false`). This
104
+ # flag only applies to division and regular expression tokens; setting
105
+ # it for other tokens has no effect.
106
+ def tokens(*patterns)
107
+ reset!
108
+ index = -1
109
+ # Lex tokens until the end-of-file mark is reached.
110
+ loop { break unless lex patterns[index += 1] }
111
+ @tokens
112
+ end
113
+
114
+ # Public: Inserts a new token into the token stream, before a reference
115
+ # token. If the reference token is the end-of-file mark, the token is
116
+ # appended instead.
117
+ #
118
+ # token - The `Token` to be inserted into the token stream.
119
+ # original - The reference `Token` before which the new `Token` is inserted.
120
+ #
121
+ # Returns the new `Token`.
122
+ def insert_before(token, original)
123
+ if original[:name] == Token::Types[:eof]
124
+ token[:index] = @tokens.size
125
+ @tokens << token
126
+ else
127
+ token[:index] = original[:index]
128
+ @tokens[token[:index]] = token
129
+ original[:index] += 1
130
+ @tokens[original[:index]] = original
131
+ end
132
+ token
133
+ end
134
+
135
+ # Internal: Returns the maximum number of characters, relative to the
136
+ # current scan pointer, that may be parsed as valid identifier
137
+ # characters. The scan pointer is not advanced.
138
+ #
139
+ # lex_as_fragment - A boolean that specifies whether the identifier may be
140
+ # lexed as a fragment. Certain productions allow identifier fragments,
141
+ # while others require that the identifier begin with a subset of valid
142
+ # fragment characters (default: false).
143
+ def match_identifier?(lex_as_fragment = false)
144
+ size = @index
145
+ # Identifier starting characters are restricted to a subset of valid
146
+ # identifier fragment characters.
147
+ until eof?
148
+ # Unicode escape sequences may occur anywhere within an identifier.
149
+ if /^\\u\h{4}$/ =~ @source[size, 6]
150
+ # Advance the scan pointer past the Unicode escape sequence.
151
+ size += 6
152
+ else
153
+ character = @source[size]
154
+ if lex_as_fragment
155
+ # Use the full `IdentifierPart` production.
156
+ break unless character =~ IDENTIFIER_FRAGMENT
157
+ else
158
+ # The initial character must conform to the more restrictive
159
+ # `IdentifierStart` production.
160
+ break unless character =~ IDENTIFIER_START
161
+ # All subsequent characters may be lexed as identifier fragments.
162
+ lex_as_fragment = true
163
+ end
164
+ size += 1
165
+ end
166
+ end
167
+ size - @index
168
+ end
169
+
170
+ # Internal: Returns the maximum number of characters, relative to the
171
+ # current scan pointer, that may be parsed as valid decimal characters.
172
+ # The scan pointer is not advanced.
173
+ def match_decimal?
174
+ size = @index
175
+ size += 1 until eof? || @source[size] !~ /\d/
176
+ size - @index
177
+ end
178
+
179
+ # Public: Returns `true` if the lexer has reached the end of the source
180
+ # string.
181
+ def eof?
182
+ @terminated || @index >= @source.size
183
+ end
184
+
185
+ # Public: Lexes a token.
186
+ #
187
+ # pattern - If the token is `/` or `/=`, specifies whether it may be lexed
188
+ # as part of a regular expression. If `false`, the token will be lexed as
189
+ # a division operator instead (default: true).
190
+ #
191
+ # Returns the lexed `Token`, or `nil` if the lexer has finished scanning the
192
+ # source.
193
+ def lex(pattern = true)
194
+ return if @terminated
195
+ if eof?
196
+ @terminated ||= true
197
+ token = Token.new(self, :eof, @source.size...@source.size)
198
+ return token
199
+ end
200
+ token = TOKEN.match(@source, @index) do |match|
201
+ case
202
+ # Produces a whitespace, line terminator, line comment (`// ...`), or
203
+ # block comment (`/* ... */`) token.
204
+ when match[:whitespace] then lex_whitespace
205
+ when match[:line_terminator] then lex_line_terminator
206
+ when match[:line_comment] then lex_line_comment
207
+ when match[:block_comment] then lex_block_comment
208
+ # Produces a single- or double-quoted string token. A single method is
209
+ # used to produce both kinds of tokens.
210
+ when match[:single_quoted_string] then lex_string :single
211
+ when match[:double_quoted_string] then lex_string :double
212
+ # Produces a hexadecimal or decimal token. Octal numbers produce an
213
+ # error, as they are prohibited in ES 5.
214
+ when match[:number] then lex_number
215
+ # `/` and `/=` may be interpreted as either regular expressions or
216
+ # division operators. The `pattern` argument specifies whether
217
+ # these tokens should be lexed as RegExps or punctuators.
218
+ when pattern && match[:pattern] then lex_pattern
219
+ else
220
+ # The `<pattern>` capture may contain the `/` and `/=` tokens.
221
+ if result = match[:pattern] || match[:punctuator]
222
+ token = Token.new(self, :punctuator, @index...@index += result.size)
223
+ @column += token.size
224
+ token
225
+ else
226
+ # Lex the token as an identifier.
227
+ lex_identifier
228
+ end
229
+ end
230
+ end
231
+ # Record the position of the token in the token stream.
232
+ token[:index] = @tokens.size
233
+ @tokens << token
234
+ token
235
+ end
236
+
237
+ # Internal: Lexes a whitespace token at the current scan position.
238
+ #
239
+ # Returns the lexed `Token`.
240
+ def lex_whitespace
241
+ token = Token.new(self, :whitespace, @index...@index += 1)
242
+ token[:isWhite] = true
243
+ @column += 1
244
+ token
245
+ end
246
+
247
+ # Internal: Lexes a line terminator at the current scan position: either a
248
+ # line feed, carriage return, line separator, or paragraph separator. See
249
+ # section 7.3 of the spec.
250
+ #
251
+ # Returns the lexed `Token`.
252
+ def lex_line_terminator
253
+ character = @source[@index]
254
+ stop = @index + 1
255
+ # If the current character is a carriage return and the next character is
256
+ # a line feed, the source string contains CRLF line endings. The `stop`
257
+ # position is advanced one additional character, so that "\r\n" is treated
258
+ # as a single terminator.
259
+ stop += 1 if character == ?\r && @source[stop] == ?\n
260
+ # Advance the current index past the terminator.
261
+ token = Token.new(self, :line_terminator, @index...@index = stop)
262
+ token[:lines] = 1
263
+ token[:isWhite] = true
264
+ @line += 1
265
+ @column = 0
266
+ token
267
+ end
268
+
269
+ # Internal: Lexes a line comment at the current scan position.
270
+ #
271
+ # Returns the lexed `Token`.
272
+ def lex_line_comment
273
+ @column = @normalized_source.index(?\n, @index) || @source.length
274
+ token = Token.new(self, :line_comment, @index...@index = @column)
275
+ token[:isComment] = token[:isWhite] = true
276
+ token
277
+ end
278
+
279
+ # Internal: Lexes a block comment at the current scan position.
280
+ #
281
+ # Returns the lexed `Token`.
282
+ def lex_block_comment
283
+ start = @index
284
+ # Mark the ending position of the comment.
285
+ stop = @source.index("*/", start)
286
+ if stop
287
+ # Advance the current position past the end of the comment.
288
+ @index = stop + 2
289
+ token = Token.new(self, :block_comment, start...@index)
290
+ token[:isComment] = token[:isWhite] = true
291
+ # Block comments trigger automatic semicolon insertion only if they
292
+ # span multiple lines. The normalized source is used to quickly
293
+ # detect line terminators.
294
+ index = lines = 0
295
+ # Advance the current line.
296
+ lines += 1 while index = @normalized_source[start...@index].index(?\n, index + 1)
297
+ if lines.zero?
298
+ # For single-line block comments, increase the column by the size of
299
+ # the token.
300
+ @column += token[:size]
301
+ else
302
+ # For multiline block comments, record the number of lines comprising
303
+ # the comment and reset the column.
304
+ @line += token[:lines] = lines
305
+ @column = 0
306
+ end
307
+ else
308
+ # Unterminated block comment. If a line terminator is found, the comment
309
+ # is assumed to end immediately before it. Otherwise, the comment is
310
+ # assumed to end two characters after the current scan position.
311
+ stop = @normalized_source.index(?\n, @index)
312
+ @index = stop || @index + 2
313
+ token = Token.new(self, :error, start...@index)
314
+ token[:error] = "Unterminated block comment."
315
+ token[:isComment] = token[:isWhite] = token[:tokenError] = true
316
+ @column += token[:size]
317
+ end
318
+ token
319
+ end
320
+
321
+ # Internal: Lexes a single- or double-quoted string primitive at the
322
+ # current scan position.
323
+ #
324
+ # style - A `Symbol` that specifies the quoting style. The quoting style
325
+ # must be defined as a key in the `Lexer::STRINGS` hash.
326
+ #
327
+ # Returns the lexed `Token`.
328
+ # Raises `KeyError` if the quoting style is not defined in the hash.
329
+ def lex_string(style)
330
+ style = STRINGS.fetch(style)
331
+ start = @index
332
+ lines = 0
333
+ loop do
334
+ # Parse escape sequences in strings.
335
+ until eof? || @source[@index += 1] != ?\\
336
+ # Record the number of new lines if the string contains linefeeds. The shadow input is
337
+ # used to avoid repeatedly normalizing line endings.
338
+ @line += (lines += 1) if @normalized_source[@index + 1] == ?\n
339
+ # Advance to the next character.
340
+ @index += 1
341
+ end
342
+ # If the string contains an unescaped line terminator, it is a syntax error. Some
343
+ # environments permit unescaped new lines in strings; however, the spec disallows them.
344
+ if @source[@index] =~ LINE_TERMINATORS
345
+ token = Token.new(self, :error, start...@index)
346
+ token[:error] = style[:invalid_continuation_error]
347
+ token[:isString] = token[:tokenError] = true
348
+ break
349
+ end
350
+ # Consume escape sequences until either the end of the source or the end-of-string character
351
+ # is reached.
352
+ break if eof? || @source[@index] == style[:quote]
353
+ end
354
+ # If the end of the source is reached without consuming the end-of-string character, the
355
+ # source contains an unterminated string literal.
356
+ if @source[@index] == style[:quote]
357
+ # Advance the index past the end-of-string character.
358
+ @index += 1
359
+ token = Token.new(self, style[:kind], start...@index)
360
+ token[:isPrimitive] = token[:isString] = true
361
+ # Update the line and column entries accordingly.
362
+ if lines.zero?
363
+ @column += token[:size]
364
+ else
365
+ token[:lines] = lines
366
+ @column = 0
367
+ end
368
+ else
369
+ token = Token.new(self, :error, start...@index)
370
+ token[:error] = style[:unterminated_string_error]
371
+ token[:isString] = token[:tokenError] = true
372
+ @column += token[:size]
373
+ end
374
+ token
375
+ end
376
+
377
+ # Internal: Lexes a decimal or hexadecimal numeric value. See section 7.8.3.
378
+ #
379
+ # Returns the lexed `Token`.
380
+ def lex_number
381
+ start = @index
382
+ @index += 1
383
+ # If the token begins with a `0x`, parse the remainder as a hexadecimal value.
384
+ if @source[start..@index] =~ /0[xX]/
385
+ position = @index += 1
386
+ # Consume characters until the end of the string or a non-hexdigit
387
+ # character is encountered.
388
+ @index += 1 until eof? || @source[@index] !~ /\h/
389
+ # If no additional characters were consumed, the hex value is invalid.
390
+ if position == @index
391
+ token = Token.new(self, :error, start...@index)
392
+ token[:error] = "Invalid hexdigit value."
393
+ token[:isNumber] = token[:tokenError] = true
394
+ else
395
+ # The value is syntactically sound.
396
+ token = Token.new(self, :hexadecimal_number, start...@index)
397
+ token[:isPrimitive] = token[:isNumber] = true
398
+ end
399
+ else
400
+ # Determine if an octal escape sequence is being parsed (i.e., a leading
401
+ # zero followed by a decimal digit).
402
+ is_octal = @source[start..@index] =~ /0\d/
403
+ # Parse the integral expression before the decimal point.
404
+ unless @source[start] == ?.
405
+ # Consume characters until the end of the string or a non-decimal
406
+ # character is encountered.
407
+ @index += match_decimal?
408
+ # Advance past the decimal point.
409
+ @index += 1 if @source[@index] == ?.
410
+ end
411
+ # Parse the decimal component.
412
+ @index += match_decimal?
413
+ # Parse the exponent.
414
+ if @source[@index] =~ /[eE]/
415
+ # Advance past the sign.
416
+ @index += 1 if @source[@index += 1] =~ /[+-]/
417
+ # Mark the current position and consume decimal digits past the
418
+ # exponential.
419
+ position = @index
420
+ @index += match_decimal?
421
+ # If no additional characters were consumed but an exponent was lexed,
422
+ # the decimal value is invalid.
423
+ if position == @index
424
+ token = Token.new(self, :error, start...@index)
425
+ token[:error] = "Exponents may not be empty."
426
+ token[:tokenError] = true
427
+ end
428
+ end
429
+ unless token
430
+ # Octal literals are invalid in ES 5.
431
+ if is_octal
432
+ token = Token.new(self, :error, start...@index)
433
+ token[:error] = "Invalid octal escape sequence."
434
+ token[:isNumber] = token[:isOctal] = token[:tokenError] = true
435
+ else
436
+ # Syntactically valid decimal value. As with hexdigits, the parser
437
+ # will determine if the lexed value is semantically sound.
438
+ token = Token.new(self, :decimal_number, start...@index)
439
+ token[:isPrimitive] = token[:isNumber] = true
440
+ end
441
+ end
442
+ end
443
+ @column += token[:size]
444
+ token
445
+ end
446
+
447
+ # Internal: Lexes a regular expression literal. See section 7.8.5.
448
+ #
449
+ # Returns the lexed `Token`.
450
+ def lex_pattern
451
+ start = @index
452
+ # Maintains a hash of the initial and terminal positions of balanced
453
+ # regular expression characters: grouping parentheses, character class
454
+ # brackets, and quantifier braces.
455
+ balanced = {}
456
+ # Ensures that all capturing groups in the pattern are balanced.
457
+ groups = []
458
+ # A flag that specifies if the regular expression is terminated.
459
+ terminated = false
460
+ # Only the last syntax error is preserved for improperly constructed
461
+ # regular expressions.
462
+ syntax_error = nil
463
+ loop do
464
+ @index += 1
465
+ break if eof?
466
+ # Use the normalized input to quickly detect line terminators.
467
+ case character = @normalized_source[@index]
468
+ when ?\n
469
+ # Line terminators cannot occur within RegExp literals.
470
+ token = Token.new(self, :error, start...@index)
471
+ token[:error] = "Line terminators are not permitted within RegExp literals."
472
+ token[:tokenError] = token[:errorHasContent] = true
473
+ # Avoid emitting a second unterminated RegExp error once lexing is
474
+ # complete.
475
+ terminated = true
476
+ break
477
+ when ?/
478
+ # An unescaped `/` marks the end of the regular expression.
479
+ terminated = true
480
+ break
481
+ when /[?*+]/
482
+ syntax_error = "`?`, `*`, and `+` require a value to repeat."
483
+ when ?^
484
+ # `^` may only occur immediately following `|`, or at the beginning
485
+ # of either the pattern, a capturing group, or a lookahead assertion
486
+ # (`?:`, `?=`, or `?!`). Note that `^` may also negate a character
487
+ # class; however, character classes have different semantics and are
488
+ # lexed separately.
489
+ unless @source[@index - 1] =~ %r{[/|(]} || @source[@index - 3, 3] =~ /\(\?[:!=]/
490
+ syntax_error = "`^` may not occur here."
491
+ end
492
+ when ?$
493
+ # `$` may only occur immediately before `|`, or at the end of either
494
+ # the pattern, a capturing group, or a lookahead assertion.
495
+ unless @source[@index + 1] =~ %r{[/|)]}
496
+ syntax_error = "`$` may not occur here."
497
+ end
498
+ when ?}
499
+ # Interpreters can distinguish between and automatically escape braces
500
+ # not used to delimit quantifiers. Nevertheless, it's considered a bad
501
+ # practice to leave special characters unescaped in RegExps. Both the
502
+ # Violet lexer and the ZeParser tokenizer assume that all unescaped
503
+ # braces delimit quantifiers, and emit errors accordingly.
504
+ syntax_error = "Mismatched `}`."
505
+ else
506
+ # Lex capturing groups.
507
+ if character == ?(
508
+ # Mark the initial position of the capturing group.
509
+ groups << @index - start
510
+ elsif character == ?)
511
+ if groups.empty?
512
+ syntax_error = "Capturing group parentheses must be balanced."
513
+ else
514
+ # Record the initial and terminal positions of the parentheses delimiting the group.
515
+ terminal = @index - start
516
+ balanced[initial = groups.pop] = terminal
517
+ balanced[terminal] = initial
518
+ end
519
+ end
520
+
521
+ # Character Classes.
522
+ # ------------------
523
+ if character == ?[
524
+ # Record the initial position of the character class.
525
+ initial = @index - start
526
+ # Characters in character classes are treated literally, so there
527
+ # is no need to escape them. The exceptions are line terminators and
528
+ # unescaped closing brackets, which are not part of the
529
+ # `RegularExpressionClassChar` grammar.
530
+ loop do
531
+ @index += 1
532
+ break if eof? || @normalized_source[@index] == ?\n || @source[@index] == ?]
533
+ if @source[@index] == ?\\
534
+ if @normalized_source[@index + 1] == ?\n
535
+ # Abort lexing if a line terminator is encountered.
536
+ break
537
+ else
538
+ # Skip lexing the subsequent escaped character. This ensures
539
+ # that escaped closing brackets (`\]`) are lexed correctly.
540
+ @index += 1
541
+ end
542
+ end
543
+ end
544
+ if @source[@index] == ?]
545
+ # Record the initial and terminal positions of the brackets
546
+ # delimiting the class.
547
+ terminal = @index - start
548
+ balanced[initial] = terminal
549
+ balanced[terminal] = initial
550
+ else
551
+ token = Token.new(self, :error, start...@index)
552
+ token[:error] = "Character class brackets must be balanced."
553
+ token[:tokenError] = true
554
+ # Avoid emitting an unterminated RegExp error once lexing is
555
+ # complete.
556
+ terminated = true
557
+ break
558
+ end
559
+ # Lex escaped characters. Escape sequences may occur anywhere within
560
+ # the RegExp, and indicate that the following character should be
561
+ # interpreted literally.
562
+ elsif character == ?\\ && @normalized_source[@index + 1] != ?\n
563
+ @index += 1
564
+ end
565
+
566
+ # Lookahead Assertions and Quantifiers.
567
+ # -------------------------------------
568
+ if character == ?(
569
+ # Lex a non-capturing group, positive lookahead, or negative lookahead.
570
+ @index += 2 if @source[@index + 1, 2] =~ /\?[:=!]/
571
+ else
572
+ # Lex quantifiers.
573
+ case @source[@index + 1]
574
+ when ??
575
+ # The `?` quantifier matches the preceding character zero or one
576
+ # times.
577
+ @index += 1
578
+ when /[*+]/
579
+ # The `*` quantifier matches the preceding character zero or more
580
+ # times; `+` matches a character one or more times. `*?` and `+?`
581
+ # indicate a non-greedy match.
582
+ @index += 1 if @source[@index += 1] == ??
583
+ when ?{
584
+ # Advance one character and mark the initial position of the
585
+ # quantifier.
586
+ @index += 1
587
+ initial = @index - start
588
+ # The `{n}` quantifier matches the preceding character exactly
589
+ # `n` times. `{n,}` matches at least `n` occurrences of the
590
+ # preceding character. `{n,m}` matches at least `n` and at most
591
+ # `m` occurrences.
592
+ unless @source[@index += 1] =~ /\d/
593
+ syntax_error = "Quantifier curly requires at least one digit before the comma"
594
+ end
595
+ # Lex the `n` value.
596
+ @index += match_decimal?
597
+ # Lex the `m` value, if any, if a comma is specified.
598
+ @index += match_decimal? if @source[@index += 1] == ?,
599
+ # Quantifier braces must be balanced.
600
+ if @source[@index + 1] == ?}
601
+ @index += 1
602
+ terminal = @index - start
603
+ balanced[initial] = terminal
604
+ balanced[terminal] = initial
605
+ # A trailing `?` indicates a non-greedy match.
606
+ @index += 1 if @source[@index + 1] == ??
607
+ else
608
+ syntax_error = "Quantifier curly requires to be closed"
609
+ end
610
+ end
611
+ end
612
+ end
613
+ end
614
+
615
+ # Construct the token.
616
+ # --------------------
617
+ unless terminated
618
+ token = Token.new(self, :error, start...@index)
619
+ token[:error] = "Unterminated RegExp literal."
620
+ token[:tokenError] = true
621
+ else
622
+ # Advance one character and lex the regular expression flags, if any,
623
+ # as an identifier fragment (the grammar for `RegularExpressionFlags`
624
+ # is that of `IdentifierPart`).
625
+ @index += 1
626
+ @index += match_identifier? :fragment
627
+ if !groups.empty?
628
+ # If the `groups` list is not empty, at least one set of capturing
629
+ # group parentheses was not balanced.
630
+ token = Token.new(self, :error, start...@index)
631
+ token[:tokenError] = true
632
+ token[:error] = "Mismatched `(` or `)`."
633
+ elsif syntax_error
634
+ # Add the last syntax error to the stack.
635
+ token = Token.new(self, :error, start...@index)
636
+ token[:tokenError] = token[:errorHasContent] = true
637
+ token[:error] = syntax_error
638
+ else
639
+ token = Token.new(self, :pattern, start...@index)
640
+ token[:isPrimitive] = true
641
+ token[:pairs] = balanced
642
+ end
643
+ end
644
+ @column += @index - start
645
+ token
646
+ end
647
+
648
+ # Internal: Lexes a regular expression literal. See sections 7.1 and 7.6.
649
+ #
650
+ # Returns the lexed `Token`.
651
+ def lex_identifier
652
+ size = match_identifier?
653
+ if size.zero?
654
+ character = @source[@index]
655
+ token = Token.new(self, :error, @index...@index += 1)
656
+ token[:tokenError] = true
657
+ token[:error] = if character == ?\\
658
+ @source[@index] == ?u ? "Invalid Unicode escape sequence." : "Illegal escape sequence."
659
+ else
660
+ "Invalid token."
661
+ end
662
+ else
663
+ token = Token.new(self, :identifier, @index...@index += size)
664
+ # Mark the token as a primitive if it is in the `Lexer::LITERALS` array.
665
+ token[:isPrimitive] = LITERALS.include? token[:value]
666
+ end
667
+ @column += token[:size]
668
+ token
669
+ end
670
+ end
671
+ end