rip-lang 2.9.2 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lexer.js CHANGED
@@ -1,3259 +1,1552 @@
1
- var BALANCED_PAIRS, BOM, BOOL, CALLABLE, CALL_CLOSERS, CODE, COMMENT, COMPARABLE_LEFT_SIDE, COMPARE, COMPOUND_ASSIGN, CONTROL_IN_IMPLICIT, DISCARDED, EXPRESSION_CLOSE, EXPRESSION_END, EXPRESSION_START, HERECOMMENT_ILLEGAL, HEREDOC_DOUBLE, HEREDOC_INDENT, HEREDOC_SINGLE, HEREGEX, HEREGEX_COMMENT, HERE_JSTOKEN, IDENTIFIER, IMPLICIT_CALL, IMPLICIT_COMMA_BEFORE_ARROW, IMPLICIT_END, IMPLICIT_FUNC, IMPLICIT_UNSPACED_CALL, INDENTABLE_CLOSERS, INDEXABLE, INVERSES, JSTOKEN, JS_KEYWORDS, LINEBREAKS, LINE_BREAK, LINE_CONTINUER, MATH, MULTI_DENT, NOT_REGEX, NUMBER, OPERATOR, POSSIBLY_DIVISION, REGEX, REGEX_FLAGS, REGEX_ILLEGAL, REGEX_INVALID_ESCAPE, RELATION, RESERVED, RIP_ALIASES, RIP_ALIAS_MAP, RIP_KEYWORDS, Rewriter, SHIFT, SINGLE_CLOSERS, SINGLE_LINERS, STRICT_PROSCRIBED, STRING_DOUBLE, STRING_INVALID_ESCAPE, STRING_SINGLE, STRING_START, TRAILING_SPACES, UNARY, UNARY_MATH, UNFINISHED, VALID_FLAGS, WHITESPACE, addTokenData, generate, isForFrom, k, key, left, len, moveComments, right, indexOf = [].indexOf, slice = [].slice, hasProp = {}.hasOwnProperty;
2
-
3
- // The Rip Lexer. Uses a series of token-matching regexes to attempt
4
- // matches against the beginning of the source code. When a match is found,
5
- // a token is produced, we consume the match, and start again. Tokens are in the
6
- // form:
1
+ // ==========================================================================
2
+ // Rip Lexer — Clean reimplementation (2026)
3
+ // ==========================================================================
7
4
  //
8
- // [tag, value, locationData]
5
+ // Tokenizes Rip source into a stream of tagged tokens, then rewrites
6
+ // the stream to insert implicit syntax (calls, objects, blocks).
9
7
  //
10
- // where locationData is {first_line, first_column, last_line, last_column, last_line_exclusive, last_column_exclusive}.
11
- // These are read by the parser in the `parser.lexer` function defined in rip.rip.
12
-
13
- // Helper Functions (inlined from helpers.js)
14
- // --------------------------------------------
15
-
16
- // Repeat a string `n` times.
17
- var repeat = function(str, n) {
18
- var res = '';
19
- while (n > 0) {
20
- if (n & 1) res += str;
21
- n >>>= 1;
22
- str += str;
23
- }
24
- return res;
8
+ // Design principles:
9
+ // - Every token carries .pre (whitespace count before it)
10
+ // - Every token carries .data (metadata: await, predicate, quote, etc.)
11
+ // - Every token carries .loc (location: row, col, len)
12
+ // - Indentation is derived from .pre, not tracked during lexing
13
+ // - Token categories use Sets for O(1) membership tests
14
+ // - All let, no const — simplicity over ceremony
15
+ // - Parser reads .data directly — no new String() wrapping needed
16
+ //
17
+ // Token format:
18
+ // [tag, val] — minimal array (compatible with parser)
19
+ // token.pre — whitespace characters before this token
20
+ // token.data — metadata object (may be null)
21
+ // token.loc — { r: row, c: col, n: length }
22
+ // token.spaced — true if preceded by whitespace (sugar for .pre > 0)
23
+ // token.newLine — true if preceded by a newline
24
+ //
25
+ // Identifier suffixes:
26
+ // ! — dammit operator: fetch!() → await fetch()
27
+ // ? — predicate: empty? → isEmpty (returns boolean convention)
28
+ //
29
+ // The 9 tokenizer methods (in priority order):
30
+ // 1. identifier — variables, keywords, properties, ! and ? suffixes
31
+ // 2. comment — # line and ### block comments
32
+ // 3. whitespace — spaces/tabs between tokens on a line
33
+ // 4. line — newlines (records .pre for next line)
34
+ // 5. string — ' " ''' """ (with interpolation)
35
+ // 6. number — decimal, hex, octal, binary, bigint
36
+ // 7. regex — /pattern/flags and ///heregex///flags
37
+ // 8. js — `embedded javascript`
38
+ // 9. literal — operators, punctuation, everything else
39
+ //
40
+ // ==========================================================================
41
+
42
+ // ==========================================================================
43
+ // Token Category Sets
44
+ // ==========================================================================
45
+
46
+ // Keywords shared with JavaScript
47
+ let JS_KEYWORDS = new Set([
48
+ 'true', 'false', 'null', 'this',
49
+ 'new', 'delete', 'typeof', 'in', 'instanceof',
50
+ 'return', 'throw', 'break', 'continue', 'debugger',
51
+ 'yield', 'await',
52
+ 'if', 'else', 'switch', 'for', 'while', 'do',
53
+ 'try', 'catch', 'finally',
54
+ 'class', 'extends', 'super',
55
+ 'import', 'export', 'default',
56
+ ]);
57
+
58
+ // Rip-only keywords
59
+ let RIP_KEYWORDS = new Set([
60
+ 'undefined', 'Infinity', 'NaN',
61
+ 'then', 'unless', 'until', 'loop', 'of', 'by', 'when', 'def',
62
+ ]);
63
+
64
+ // Rip aliases: word → operator/value
65
+ let ALIASES = {
66
+ and: '&&',
67
+ or: '||',
68
+ is: '==',
69
+ isnt: '!=',
70
+ not: '!',
71
+ yes: 'true',
72
+ no: 'false',
73
+ on: 'true',
74
+ off: 'false',
25
75
  };
26
76
 
27
- // Count the number of occurrences of a string in a string.
28
- var count = function(string, substr) {
29
- var num = 0, pos = 0;
30
- if (!substr.length) return 1 / 0;
31
- while (pos = 1 + string.indexOf(substr, pos)) num++;
32
- return num;
77
+ let ALIAS_WORDS = new Set(Object.keys(ALIASES));
78
+
79
+ // Reserved words cannot be used as identifiers
80
+ let RESERVED = new Set([
81
+ 'case', 'function', 'var', 'void', 'with', 'const', 'let',
82
+ 'enum', 'native', 'implements', 'interface', 'package',
83
+ 'private', 'protected', 'public', 'static',
84
+ ]);
85
+
86
+ // Words that become STATEMENT tokens
87
+ let STATEMENTS = new Set(['break', 'continue', 'debugger']);
88
+
89
+ // Words that become UNARY tokens
90
+ let UNARY_WORDS = new Set(['NEW', 'TYPEOF', 'DELETE']);
91
+
92
+ // Relation keywords (in, of, instanceof)
93
+ let RELATIONS = new Set(['IN', 'OF', 'INSTANCEOF']);
94
+
95
+ // Tokens that can precede a function call (implicit call detection)
96
+ let CALLABLE = new Set([
97
+ 'IDENTIFIER', 'PROPERTY', ')', ']', '@', 'THIS', 'SUPER',
98
+ 'DYNAMIC_IMPORT', '?.',
99
+ ]);
100
+
101
+ // Tokens that can be indexed
102
+ let INDEXABLE = new Set([
103
+ ...CALLABLE,
104
+ 'NUMBER', 'INFINITY', 'NAN', 'STRING', 'STRING_END',
105
+ 'REGEX', 'REGEX_END', 'BOOL', 'NULL', 'UNDEFINED', '}',
106
+ ]);
107
+
108
+ // Tokens that can follow IMPLICIT_FUNC to start an implicit call
109
+ let IMPLICIT_CALL = new Set([
110
+ 'IDENTIFIER', 'PROPERTY', 'NUMBER', 'INFINITY', 'NAN',
111
+ 'STRING', 'STRING_START', 'REGEX', 'REGEX_START', 'JS',
112
+ 'NEW', 'PARAM_START', 'CLASS', 'IF', 'TRY', 'SWITCH',
113
+ 'THIS', 'DYNAMIC_IMPORT', 'IMPORT_META', 'NEW_TARGET',
114
+ 'UNDEFINED', 'NULL', 'BOOL', 'UNARY', 'DO', 'DO_IIFE',
115
+ 'YIELD', 'AWAIT', 'UNARY_MATH', 'SUPER', 'THROW',
116
+ '@', '->', '=>', '[', '(', '{', '--', '++',
117
+ ]);
118
+
119
+ // Tokens that can start an implicit call (unspaced, like +/-)
120
+ let IMPLICIT_UNSPACED_CALL = new Set(['+', '-']);
121
+
122
+ // Tokens that end an implicit call
123
+ let IMPLICIT_END = new Set([
124
+ 'POST_IF', 'POST_UNLESS', 'FOR', 'WHILE', 'UNTIL',
125
+ 'WHEN', 'BY', 'LOOP', 'TERMINATOR', '||', '&&',
126
+ ]);
127
+
128
+ // Tokens that trigger implicit comma insertion before arrows
129
+ let IMPLICIT_COMMA_BEFORE_ARROW = new Set([
130
+ 'STRING', 'STRING_END', 'REGEX', 'REGEX_END', 'NUMBER',
131
+ 'BOOL', 'NULL', 'UNDEFINED', 'INFINITY', 'NAN', ']', '}',
132
+ ]);
133
+
134
+ // Tokens that start/end balanced pairs
135
+ let EXPRESSION_START = new Set(['(', '[', '{', 'INDENT', 'CALL_START', 'PARAM_START', 'INDEX_START', 'STRING_START', 'INTERPOLATION_START', 'REGEX_START']);
136
+ let EXPRESSION_END = new Set([')', ']', '}', 'OUTDENT', 'CALL_END', 'PARAM_END', 'INDEX_END', 'STRING_END', 'INTERPOLATION_END', 'REGEX_END']);
137
+
138
+ // Balanced pair inverses
139
+ let INVERSES = {
140
+ '(': ')', ')': '(',
141
+ '[': ']', ']': '[',
142
+ '{': '}', '}': '{',
143
+ 'INDENT': 'OUTDENT', 'OUTDENT': 'INDENT',
144
+ 'CALL_START': 'CALL_END', 'CALL_END': 'CALL_START',
145
+ 'PARAM_START': 'PARAM_END', 'PARAM_END': 'PARAM_START',
146
+ 'INDEX_START': 'INDEX_END', 'INDEX_END': 'INDEX_START',
147
+ 'STRING_START': 'STRING_END', 'STRING_END': 'STRING_START',
148
+ 'INTERPOLATION_START': 'INTERPOLATION_END', 'INTERPOLATION_END': 'INTERPOLATION_START',
149
+ 'REGEX_START': 'REGEX_END', 'REGEX_END': 'REGEX_START',
33
150
  };
34
151
 
35
- // Extend a source object with the properties of another object (shallow copy).
36
- var extend = function(object, properties) {
37
- for (var key in properties) {
38
- object[key] = properties[key];
152
+ // Tokens that close a clause (for normalizeLines)
153
+ let EXPRESSION_CLOSE = new Set(['CATCH', 'THEN', 'ELSE', 'FINALLY', ...EXPRESSION_END]);
154
+
155
+ // Tokens that act as implicit function call starters
156
+ let IMPLICIT_FUNC = new Set([
157
+ 'IDENTIFIER', 'PROPERTY', 'SUPER', ')', 'CALL_END', ']', 'INDEX_END', '@', 'THIS',
158
+ ]);
159
+
160
+ // Control flow tokens that don't end implicit calls/objects
161
+ let CONTROL_IN_IMPLICIT = new Set(['IF', 'TRY', 'FINALLY', 'CATCH', 'CLASS', 'SWITCH']);
162
+
163
+ // Single-liner keywords that get implicit INDENT/OUTDENT
164
+ let SINGLE_LINERS = new Set(['ELSE', '->', '=>', 'TRY', 'FINALLY', 'THEN']);
165
+
166
+ // Tokens that close a single-liner
167
+ let SINGLE_CLOSERS = new Set(['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN']);
168
+
169
+ // Tokens that indicate end-of-line
170
+ let LINE_BREAK = new Set(['INDENT', 'OUTDENT', 'TERMINATOR']);
171
+
172
+ // Tokens that close implicit calls when following a newline
173
+ let CALL_CLOSERS = new Set(['.', '?.']);
174
+
175
+ // Tokens that suppress a following TERMINATOR/INDENT
176
+ let UNFINISHED = new Set([
177
+ '\\', '.', '?.', 'UNARY', 'DO', 'DO_IIFE',
178
+ 'MATH', 'UNARY_MATH', '+', '-', '**', 'SHIFT', 'RELATION',
179
+ 'COMPARE', '&', '^', '|', '&&', '||', 'SPACE?', 'EXTENDS',
180
+ ]);
181
+
182
+ // Tokens that are not followed by regex (division context)
183
+ let NOT_REGEX = new Set([...INDEXABLE, '++', '--']);
184
+
185
+ // Compound assignment operators
186
+ let COMPOUND_ASSIGN = new Set([
187
+ '-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '??=',
188
+ '<<=', '>>=', '>>>=', '&=', '^=', '|=', '**=', '//=', '%%=',
189
+ ]);
190
+
191
+ // Math operators
192
+ let MATH = new Set(['*', '/', '%', '//', '%%']);
193
+
194
+ // Comparison operators
195
+ let COMPARE = new Set(['==', '!=', '===', '!==', '<', '>', '<=', '>=', '=~']);
196
+
197
+ // Shift operators
198
+ let SHIFT = new Set(['<<', '>>', '>>>']);
199
+
200
+ // Unary non-word operators
201
+ let UNARY_MATH = new Set(['!', '~']);
202
+
203
+ // ==========================================================================
204
+ // Regex Patterns
205
+ // ==========================================================================
206
+
207
+ // Identifier: word chars + optional trailing ! (await) or ? (predicate)
208
+ // The ? suffix is only captured when NOT followed by . ? [ ( to avoid
209
+ // conflict with ?. (optional chaining), ?? (nullish), ?.( and ?.[
210
+ let IDENTIFIER_RE = /^(?!\d)((?:(?!\s)[$\w\x7f-\uffff])+(?:!|[?](?![.?[(]))?)([^\n\S]*:(?![=:]))?/;
211
+ let NUMBER_RE = /^0b[01](?:_?[01])*n?|^0o[0-7](?:_?[0-7])*n?|^0x[\da-f](?:_?[\da-f])*n?|^\d+(?:_\d+)*n|^(?:\d+(?:_\d+)*)?\.?\d+(?:_\d+)*(?:e[+-]?\d+(?:_\d+)*)?/i;
212
+ let OPERATOR_RE = /^(?:<=>|[-=]>|~>|~=|:=|=!|===|!==|!\?|\?\?|=~|[-+*\/%<>&|^!?=]=|>>>=?|([-+:])\1|([&|<>*\/%])\2=?|\?\.?|\.{2,3})/;
213
+ let WHITESPACE_RE = /^[^\n\S]+/;
214
+ let NEWLINE_RE = /^(?:\n[^\n\S]*)+/;
215
+ let COMMENT_RE = /^(\s*)###([^#][\s\S]*?)(?:###([^\n\S]*)|###$)|^((?:\s*#(?!##[^#]).*)+)/;
216
+ let CODE_RE = /^[-=]>/;
217
+ let REACTIVE_RE = /^(?:~[=>]|=!)/;
218
+ let STRING_START_RE = /^(?:'''|"""|'|")/;
219
+ let STRING_SINGLE_RE = /^(?:[^\\']|\\[\s\S])*/;
220
+ let STRING_DOUBLE_RE = /^(?:[^\\"#$]|\\[\s\S]|\#(?!\{)|\$(?!\{))*/;
221
+ let HEREDOC_SINGLE_RE = /^(?:[^\\']|\\[\s\S]|'(?!''))*/;
222
+ let HEREDOC_DOUBLE_RE = /^(?:[^\\"#$]|\\[\s\S]|"(?!"")|\#(?!\{)|\$(?!\{))*/;
223
+ let HEREDOC_INDENT_RE = /\n+([^\n\S]*)(?=\S)/g;
224
+ let REGEX_RE = /^\/(?!\/)((?:[^[\/\n\\]|\\[^\n]|\[(?:\\[^\n]|[^\]\n\\])*\])*)(\/)?/;
225
+ let REGEX_FLAGS_RE = /^\w*/;
226
+ let VALID_FLAGS_RE = /^(?!.*(.).*\1)[gimsuy]*$/;
227
+ let HEREGEX_RE = /^(?:[^\\\/#\s]|\\[\s\S]|\/(?!\/\/)|\#(?!\{)|\s+(?:#(?!\{).*)?)*/;
228
+ let JSTOKEN_RE = /^`(?!``)((?:[^`\\]|\\[\s\S])*)`/;
229
+ let HERE_JSTOKEN_RE = /^```((?:[^`\\]|\\[\s\S]|`(?!``))*)```/;
230
+ let TRAILING_SPACES_RE = /\s+$/;
231
+ let LINE_CONTINUER_RE = /^\s*(?:,|\??\.(?![.\d]))/;
232
+ let BOM = 65279;
233
+
234
+ // ==========================================================================
235
+ // Helpers
236
+ // ==========================================================================
237
+
238
+ // Create a token: [tag, val] with .pre, .data, .loc, .spaced, .newLine
239
+ function tok(tag, val, {pre = 0, row = 0, col = 0, len = 0, data = null} = {}) {
240
+ let t = [tag, val];
241
+ t.pre = pre;
242
+ t.data = data;
243
+ t.loc = {r: row, c: col, n: len};
244
+ t.spaced = pre > 0;
245
+ t.newLine = false;
246
+ return t;
247
+ }
248
+
249
+ // Create a generated token (for rewriter insertions)
250
+ function gen(tag, val, origin) {
251
+ let t = tok(tag, val);
252
+ t.generated = true;
253
+ if (origin) t.origin = origin;
254
+ return t;
255
+ }
256
+
257
+ // Throw a syntax error with location info
258
+ function syntaxError(message, {row = 0, col = 0, len = 1} = {}) {
259
+ let err = new SyntaxError(message);
260
+ err.location = {first_line: row, first_column: col, last_column: col + len - 1};
261
+ throw err;
262
+ }
263
+
264
+ // Parse a number literal to its numeric value
265
+ function parseNumber(str) {
266
+ if (str == null) return NaN;
267
+ switch (str.charAt(1)) {
268
+ case 'b': return parseInt(str.slice(2).replace(/_/g, ''), 2);
269
+ case 'o': return parseInt(str.slice(2).replace(/_/g, ''), 8);
270
+ case 'x': return parseInt(str.slice(2).replace(/_/g, ''), 16);
271
+ default: return parseFloat(str.replace(/_/g, ''));
39
272
  }
40
- return object;
41
- };
273
+ }
42
274
 
43
- // Merge objects, returning a fresh copy with attributes from both sides.
44
- var merge = function(options, overrides) {
45
- return extend(extend({}, options), overrides);
46
- };
275
+ // ==========================================================================
276
+ // Lexer
277
+ // ==========================================================================
47
278
 
48
- // Return a flattened version of an array.
49
- var flatten = function(array) {
50
- return array.flat(2e308);
51
- };
279
+ export class Lexer {
52
280
 
53
- // Build a list of all comments attached to tokens.
54
- var extractAllCommentTokens = function(tokens) {
55
- var allCommentsObj = {}, sortedKeys, results = [];
56
- for (var i = 0; i < tokens.length; i++) {
57
- var token = tokens[i];
58
- if (token.comments) {
59
- for (var j = 0; j < token.comments.length; j++) {
60
- var comment = token.comments[j];
61
- var commentKey = comment.locationData.range[0];
62
- allCommentsObj[commentKey] = comment;
281
+ // --------------------------------------------------------------------------
282
+ // Main entry point
283
+ // --------------------------------------------------------------------------
284
+
285
+ tokenize(code, opts = {}) {
286
+ this.code = code;
287
+ this.tokens = [];
288
+ this.ends = []; // Balanced pair stack
289
+ this.chunk = ''; // Remaining source
290
+ this.pos = 0; // Current position in source
291
+ this.row = opts.row || 0;
292
+ this.col = opts.col || 0;
293
+ this.indent = 0; // Current indentation level (derived from .pre)
294
+ this.indents = []; // Indent stack for INDENT/OUTDENT
295
+ this.seenFor = false;
296
+ this.seenImport = false;
297
+ this.seenExport = false;
298
+ this.importSpecifierList = false;
299
+ this.exportSpecifierList = false;
300
+
301
+ // Clean source
302
+ code = this.clean(code);
303
+ this.code = code;
304
+
305
+ // Main tokenization loop
306
+ while (this.pos < code.length) {
307
+ this.chunk = code.slice(this.pos);
308
+ let consumed =
309
+ this.identifierToken() ||
310
+ this.commentToken() ||
311
+ this.whitespaceToken() ||
312
+ this.lineToken() ||
313
+ this.stringToken() ||
314
+ this.numberToken() ||
315
+ this.regexToken() ||
316
+ this.jsToken() ||
317
+ this.literalToken();
318
+
319
+ if (consumed === 0) {
320
+ syntaxError(`unexpected character: ${this.chunk.charAt(0)}`, {
321
+ row: this.row, col: this.col,
322
+ });
323
+ }
324
+
325
+ this.advance(consumed);
326
+
327
+ // Support untilBalanced mode (for string interpolation sub-lexing)
328
+ if (opts.untilBalanced && this.ends.length === 0) {
329
+ return { tokens: this.tokens, index: this.pos };
63
330
  }
64
331
  }
65
- }
66
- sortedKeys = Object.keys(allCommentsObj).sort((a, b) => a - b);
67
- for (var k = 0; k < sortedKeys.length; k++) {
68
- results.push(allCommentsObj[sortedKeys[k]]);
69
- }
70
- return results;
71
- };
72
332
 
73
- // Attach comments to a node.
74
- var attachCommentsToNode = function(comments, node) {
75
- if (!comments || comments.length === 0) return;
76
- if (!node.comments) node.comments = [];
77
- node.comments.push(...comments);
78
- };
333
+ // Close any remaining indentation
334
+ this.closeIndentation();
79
335
 
80
- // Parse number literals including binary, octal, hex.
81
- var parseNumber = function(string) {
82
- if (string == null) return 0/0;
83
- var base = null;
84
- switch (string.charAt(1)) {
85
- case 'b': base = 2; break;
86
- case 'o': base = 8; break;
87
- case 'x': base = 16; break;
88
- }
89
- if (base != null) {
90
- return parseInt(string.slice(2).replace(/_/g, ''), base);
91
- } else {
92
- return parseFloat(string.replace(/_/g, ''));
93
- }
94
- };
336
+ // Check for unclosed pairs
337
+ if (this.ends.length > 0) {
338
+ let unclosed = this.ends[this.ends.length - 1];
339
+ syntaxError(`missing ${unclosed.tag}`, {row: this.row, col: this.col});
340
+ }
95
341
 
96
- // Syntax error formatting and throwing.
97
- var syntaxErrorToString = function() {
98
- if (!(this.code && this.location)) {
99
- return Error.prototype.toString.call(this);
100
- }
101
- var {first_line, first_column, last_line, last_column} = this.location;
102
- if (last_line == null) last_line = first_line;
103
- if (last_column == null) last_column = first_column;
104
-
105
- var filename = this.filename || '[stdin]';
106
- if (filename.startsWith('<anonymous')) filename = '[stdin]';
107
-
108
- var codeLine = this.code.split('\n')[first_line];
109
- var start = first_column;
110
- var end = first_line === last_line ? last_column + 1 : codeLine.length;
111
- var marker = codeLine.slice(0, start).replace(/[^\s]/g, ' ') + repeat('^', end - start);
112
-
113
- // Check for color support
114
- var colorsEnabled = typeof process !== "undefined" && process !== null &&
115
- process.stdout?.isTTY && !process.env?.NODE_DISABLE_COLORS;
116
-
117
- if (this.colorful != null ? this.colorful : colorsEnabled) {
118
- var colorize = (str) => `\x1B[1;31m${str}\x1B[0m`;
119
- codeLine = codeLine.slice(0, start) + colorize(codeLine.slice(start, end)) + codeLine.slice(end);
120
- marker = colorize(marker);
342
+ // Rewrite (unless disabled)
343
+ if (opts.rewrite === false) return this.tokens;
344
+ return this.rewrite(this.tokens);
121
345
  }
122
346
 
123
- return `${filename}:${first_line + 1}:${first_column + 1}: error: ${this.message}\n${codeLine}\n${marker}`;
124
- };
347
+ // --------------------------------------------------------------------------
348
+ // Source preprocessing
349
+ // --------------------------------------------------------------------------
125
350
 
126
- var throwSyntaxError = function(message, location) {
127
- var error = new SyntaxError(message);
128
- error.location = location;
129
- error.toString = syntaxErrorToString;
130
- error.stack = error.toString();
131
- throw error;
132
- };
351
+ clean(code) {
352
+ // Strip BOM
353
+ if (code.charCodeAt(0) === BOM) code = code.slice(1);
354
+ // Normalize line endings
355
+ code = code.replace(/\r\n?/g, '\n');
356
+ // Strip trailing whitespace
357
+ code = code.replace(TRAILING_SPACES_RE, '');
358
+ // Ensure leading newline if code starts with whitespace
359
+ if (/^[^\n\S]/.test(code)) code = '\n' + code;
360
+ return code;
361
+ }
133
362
 
134
- // Unicode code point handling for regex.
135
- var UNICODE_CODE_POINT_ESCAPE = /(\\\\)|\\u\{([\da-fA-F]+)\}/g;
363
+ // --------------------------------------------------------------------------
364
+ // Position tracking
365
+ // --------------------------------------------------------------------------
136
366
 
137
- var unicodeCodePointToUnicodeEscapes = function(codePoint) {
138
- var toUnicodeEscape = function(val) {
139
- var str = val.toString(16);
140
- return `\\u${repeat('0', 4 - str.length)}${str}`;
141
- };
142
- if (codePoint < 0x10000) {
143
- return toUnicodeEscape(codePoint);
367
+ advance(n) {
368
+ let consumed = this.code.slice(this.pos, this.pos + n);
369
+ for (let i = 0; i < consumed.length; i++) {
370
+ if (consumed[i] === '\n') {
371
+ this.row++;
372
+ this.col = 0;
373
+ } else {
374
+ this.col++;
375
+ }
376
+ }
377
+ this.pos += n;
144
378
  }
145
- // surrogate pair
146
- var high = Math.floor((codePoint - 0x10000) / 0x400) + 0xD800;
147
- var low = (codePoint - 0x10000) % 0x400 + 0xDC00;
148
- return `${toUnicodeEscape(high)}${toUnicodeEscape(low)}`;
149
- };
150
379
 
151
- var replaceUnicodeCodePointEscapes = function(str, {flags, error, delimiter = ''} = {}) {
152
- var shouldReplace = (flags != null) && indexOf.call(flags, 'u') < 0;
153
- return str.replace(UNICODE_CODE_POINT_ESCAPE, function(match, escapedBackslash, codePointHex, offset) {
154
- if (escapedBackslash) return escapedBackslash;
155
- var codePointDecimal = parseInt(codePointHex, 16);
156
- if (codePointDecimal > 0x10ffff) {
157
- error("unicode code point escapes greater than \\u{10ffff} are not allowed", {
158
- offset: offset + delimiter.length,
159
- length: codePointHex.length + 4
160
- });
161
- }
162
- if (!shouldReplace) return match;
163
- return unicodeCodePointToUnicodeEscapes(codePointDecimal);
164
- });
165
- };
380
+ // --------------------------------------------------------------------------
381
+ // Token helpers
382
+ // --------------------------------------------------------------------------
383
+
384
+ // Push a token onto the stream
385
+ emit(tag, val, {len, data, pre} = {}) {
386
+ let t = tok(tag, val, {
387
+ pre: pre ?? 0,
388
+ row: this.row,
389
+ col: this.col,
390
+ len: len ?? (typeof val === 'string' ? val.length : 0),
391
+ data: data,
392
+ });
393
+ this.tokens.push(t);
394
+ return t;
395
+ }
166
396
 
167
- // The Lexer Class
168
- // ---------------
169
-
170
- // The Lexer class reads a stream of Rip and divvies it up into tagged
171
- // tokens. Some potential ambiguity in the grammar has been avoided by
172
- // pushing some extra smarts into the Lexer.
173
- export var Lexer = class Lexer {
174
- constructor() {
175
- // Throws an error at either a given offset from the current chunk or at the
176
- // location of a token (`token[2]`).
177
- this.error = this.error.bind(this);
397
+ // Get the previous token (or undefined)
398
+ prev() {
399
+ return this.tokens[this.tokens.length - 1];
178
400
  }
179
401
 
180
- // **tokenize** is the Lexer's main method. Scan by attempting to match tokens
181
- // one at a time, using a regular expression anchored at the start of the
182
- // remaining code, or a custom recursive token-matching method
183
- // (for interpolations). When the next token has been recorded, we move forward
184
- // within the code past the token, and begin again.
185
- //
186
- // Each tokenizing method is responsible for returning the number of characters
187
- // it has consumed.
188
- //
189
- // Before returning the token stream, run it through the [Rewriter](rewriter.html).
190
- tokenize(code, opts = {}) {
191
- var consumed, end, i, ref;
192
- this.indent = 0; // The current indentation level.
193
- this.baseIndent = 0; // The overall minimum indentation level.
194
- this.overIndent = 0; // The over-indentation at the current level.
195
- this.outdebt = 0; // The under-outdentation at the current level.
196
- this.indents = []; // The stack of all current indentation levels.
197
- this.indentLiteral = ''; // The indentation.
198
- this.ends = []; // The stack for pairing up tokens.
199
- this.tokens = []; // Stream of parsed tokens in the form `['TYPE', value, location data]`.
200
- this.seenFor = false; // Used to recognize `FORIN`, `FOROF` and `FORFROM` tokens.
201
- this.seenImport = false; // Used to recognize `IMPORT FROM? AS?` tokens.
202
- this.seenExport = false; // Used to recognize `EXPORT FROM? AS?` tokens.
203
- this.importSpecifierList = false; // Used to identify when in an `IMPORT {...} FROM? ...`.
204
- this.exportSpecifierList = false; // Used to identify when in an `EXPORT {...} FROM? ...`.
205
- this.chunkLine = opts.line || 0; // The start line for the current @chunk.
206
- this.chunkColumn = opts.column || 0; // The start column of the current @chunk.
207
- this.chunkOffset = opts.offset || 0; // The start offset for the current @chunk.
208
- this.locTweaks = opts.locTweaks || {};
209
- code = this.clean(code); // The stripped, cleaned original source code.
210
-
211
- // At every position, run through this list of attempted matches,
212
- // short-circuiting if any of them succeed. Their order determines precedence:
213
- // `@literalToken` is the fallback catch-all.
214
- i = 0;
215
- while (this.chunk = code.slice(i)) {
216
- consumed = this.identifierToken() || this.commentToken() || this.whitespaceToken() || this.lineToken() || this.stringToken() || this.numberToken() || this.regexToken() || this.jsToken() || this.literalToken();
217
- // Update position.
218
- [this.chunkLine, this.chunkColumn, this.chunkOffset] = this.getLineAndColumnFromChunk(consumed);
219
- i += consumed;
220
- if (opts.untilBalanced && this.ends.length === 0) {
221
- return {
222
- tokens: this.tokens,
223
- index: i
224
- };
225
- }
226
- }
227
- this.closeIndentation();
228
- if (end = this.ends.pop()) {
229
- this.error(`missing ${end.tag}`, ((ref = end.origin) != null ? ref : end)[2]);
230
- }
231
- if (opts.rewrite === false) {
232
- return this.tokens;
233
- }
234
- return (new Rewriter()).rewrite(this.tokens);
402
+ // Get the previous token's tag
403
+ prevTag() {
404
+ let p = this.prev();
405
+ return p ? p[0] : undefined;
235
406
  }
236
407
 
237
- // Preprocess the code to remove leading and trailing whitespace, carriage
238
- // returns, etc.
239
- clean(code) {
240
- var base, thusFar;
241
- thusFar = 0;
242
- if (code.charCodeAt(0) === BOM) {
243
- code = code.slice(1);
244
- this.locTweaks[0] = 1;
245
- thusFar += 1;
246
- }
247
- if (WHITESPACE.test(code)) {
248
- code = `\n${code}`;
249
- this.chunkLine--;
250
- if ((base = this.locTweaks)[0] == null) {
251
- base[0] = 0;
252
- }
253
- this.locTweaks[0] -= 1;
254
- }
255
- return code.replace(/\r/g, (match, offset) => {
256
- this.locTweaks[thusFar + offset] = 1;
257
- return '';
258
- }).replace(TRAILING_SPACES, '');
408
+ // Get the previous token's value
409
+ prevVal() {
410
+ let p = this.prev();
411
+ return p ? p[1] : undefined;
259
412
  }
260
413
 
261
- // Tokenizers
262
- // ----------
414
+ // --------------------------------------------------------------------------
415
+ // 1. Identifier Token
416
+ // --------------------------------------------------------------------------
417
+ //
418
+ // Handles: variables, keywords, properties, aliases
419
+ //
420
+ // Suffix operators on identifiers:
421
+ // ! → dammit operator (await): fetch!() → await fetch()
422
+ // ? → predicate (boolean): empty? → isEmpty
423
+ //
424
+ // The ? suffix is captured by IDENTIFIER_RE only when NOT followed by
425
+ // . ? [ ( — so x?.y (optional chaining) and x?? (nullish coalescing)
426
+ // are never ambiguous.
427
+ //
428
+ // --------------------------------------------------------------------------
263
429
 
264
- // Matches identifying literals: variables, keywords, method names, etc.
265
- // Check to ensure that JavaScript reserved words aren't being used as
266
- // identifiers. Because Rip reserves a handful of keywords that are
267
- // allowed in JavaScript, we're careful not to tag them as keywords when
268
- // referenced as property names here, so you can still do `jQuery.is()` even
269
- // though `is` means `===` otherwise.
270
430
  identifierToken() {
271
- var afterNot, alias, colon, colonOffset, colonToken, id, idLength, input, match, poppedToken, prev, prevprev, ref, ref1, ref10, ref11, ref12, ref2, ref3, ref4, ref5, ref6, ref7, ref8, ref9, regExSuper, sup, tag, tagToken, tokenData;
272
- // Check for reactive operators before treating ~ as identifier
273
- if (/^~[=>]/.test(this.chunk) || /^=!/.test(this.chunk)) {
274
- return 0; // Let literalToken handle these
275
- }
276
- if (!(match = IDENTIFIER.exec(this.chunk))) {
277
- return 0;
278
- }
279
- [input, id, colon] = match;
280
- // Preserve length of id for location data
281
- idLength = id.length;
282
- poppedToken = void 0;
283
- if (id === 'own' && this.tag() === 'FOR') {
284
- this.token('OWN', id);
285
- return id.length;
431
+ // Reactive operators let literalToken handle these
432
+ if (REACTIVE_RE.test(this.chunk)) return 0;
433
+
434
+ let match = IDENTIFIER_RE.exec(this.chunk);
435
+ if (!match) return 0;
436
+
437
+ let [input, id, colon] = match;
438
+ let idLen = id.length;
439
+ let data = {};
440
+ let tag;
441
+
442
+ // --- Contextual keyword handling ---
443
+
444
+ // 'own' after FOR
445
+ if (id === 'own' && this.prevTag() === 'FOR') {
446
+ this.emit('OWN', id, {len: idLen});
447
+ return idLen;
286
448
  }
287
- if (id === 'from' && this.tag() === 'YIELD') {
288
- this.token('FROM', id);
289
- return id.length;
449
+
450
+ // 'from' after YIELD
451
+ if (id === 'from' && this.prevTag() === 'YIELD') {
452
+ this.emit('FROM', id, {len: idLen});
453
+ return idLen;
290
454
  }
291
- if (id === 'as' && this.seenImport) {
292
- if (this.value() === '*') {
293
- this.tokens[this.tokens.length - 1][0] = 'IMPORT_ALL';
294
- } else if (ref = this.value(true), indexOf.call(RIP_KEYWORDS, ref) >= 0) {
295
- prev = this.prev();
296
- [prev[0], prev[1]] = ['IDENTIFIER', this.value(true)];
455
+
456
+ // 'as' in import/export context (not in for-loop context)
457
+ if (id === 'as' && !this.seenFor && (this.seenImport || this.seenExport)) {
458
+ if (this.seenImport) {
459
+ if (this.prevVal() === '*') this.prev()[0] = 'IMPORT_ALL';
297
460
  }
298
- if ((ref1 = this.tag()) === 'DEFAULT' || ref1 === 'IMPORT_ALL' || ref1 === 'IDENTIFIER') {
299
- this.token('AS', id);
300
- return id.length;
461
+ let pt = this.prevTag();
462
+ if (pt === 'DEFAULT' || pt === 'IMPORT_ALL' || pt === 'IDENTIFIER') {
463
+ this.emit('AS', id, {len: idLen});
464
+ return idLen;
301
465
  }
302
466
  }
303
- if (id === 'as' && this.seenExport) {
304
- if ((ref2 = this.tag()) === 'IDENTIFIER' || ref2 === 'DEFAULT') {
305
- this.token('AS', id);
306
- return id.length;
307
- }
308
- if (ref3 = this.value(true), indexOf.call(RIP_KEYWORDS, ref3) >= 0) {
309
- prev = this.prev();
310
- [prev[0], prev[1]] = ['IDENTIFIER', this.value(true)];
311
- this.token('AS', id);
312
- return id.length;
313
- }
467
+
468
+ // 'as' in for loops → FORAS (for x as iterable ES6 for-of iteration)
469
+ // 'as!' in for loops → FORASAWAIT (for x as! iterable — async iteration shorthand)
470
+ if ((id === 'as' || id === 'as!') && this.seenFor) {
471
+ this.seenFor = false;
472
+ this.emit(id === 'as!' ? 'FORASAWAIT' : 'FORAS', 'as', {len: idLen});
473
+ return idLen;
314
474
  }
315
- if (id === 'default' && this.seenExport && ((ref4 = this.tag()) === 'EXPORT' || ref4 === 'AS')) {
316
- this.token('DEFAULT', id);
317
- return id.length;
475
+
476
+ // 'default' in export
477
+ if (id === 'default' && this.seenExport && (this.prevTag() === 'EXPORT' || this.prevTag() === 'AS')) {
478
+ this.emit('DEFAULT', id, {len: idLen});
479
+ return idLen;
318
480
  }
319
- // REMOVED: assert keyword handling
320
- // Modern JS uses 'with' for import attributes, not 'assert'
321
- // Codegen auto-adds 'with { type: "json" }' for .json files
322
- // So grammar doesn't need ASSERT token at all
323
- if (id === 'do' && (regExSuper = /^(\s*super)(?!\(\))/.exec(this.chunk.slice(3)))) {
324
- this.token('SUPER', 'super');
325
- this.token('CALL_START', '(');
326
- this.token('CALL_END', ')');
327
- [input, sup] = regExSuper;
328
- return sup.length + 3;
481
+
482
+ // 'do super' shorthand
483
+ if (id === 'do' && /^(\s*super)(?!\(\))/.test(this.chunk.slice(3))) {
484
+ let m = /^(\s*super)(?!\(\))/.exec(this.chunk.slice(3));
485
+ this.emit('SUPER', 'super');
486
+ this.emit('CALL_START', '(');
487
+ this.emit('CALL_END', ')');
488
+ return m[1].length + 3;
329
489
  }
330
- prev = this.prev();
331
- // Don't treat colon as property when in ternary context (after SPACE?)
332
- // This allows `a ? b : c` to parse correctly instead of treating `b :` as property
333
- if (colon && prev && prev[0] === 'SPACE?') {
334
- colon = null;
490
+
491
+ // --- Determine tag ---
492
+
493
+ let prev = this.prev();
494
+
495
+ // Don't treat colon as property when in ternary context
496
+ if (colon && prev && prev[0] === 'SPACE?') colon = null;
497
+
498
+ // Property vs identifier
499
+ if (colon || (prev && (prev[0] === '.' || prev[0] === '?.' || (!prev.spaced && prev[0] === '@')))) {
500
+ tag = 'PROPERTY';
501
+ } else {
502
+ tag = 'IDENTIFIER';
335
503
  }
336
- tag = colon || (prev != null) && (((ref5 = prev[0]) === '.' || ref5 === '?.' || ref5 === '::' || ref5 === '?::') || !prev.spaced && prev[0] === '@') ? 'PROPERTY' : 'IDENTIFIER';
337
- tokenData = {};
338
- if (tag === 'IDENTIFIER' && (indexOf.call(JS_KEYWORDS, id) >= 0 || indexOf.call(RIP_KEYWORDS, id) >= 0) && !(this.exportSpecifierList && indexOf.call(RIP_KEYWORDS, id) >= 0)) {
339
- tag = id.toUpperCase();
340
- if (tag === 'WHEN' && (ref6 = this.tag(), indexOf.call(LINE_BREAK, ref6) >= 0)) {
341
- tag = 'LEADING_WHEN';
342
- } else if (tag === 'FOR') {
343
- this.seenFor = {
344
- endsLength: this.ends.length
345
- };
346
- } else if (tag === 'UNLESS') {
347
- // Keep UNLESS as-is (don't convert to IF)
348
- // Rip's grammar and codegen handle unless properly with negation
349
- // tag = 'IF'; // ← DISABLED - was losing negation!
350
- } else if (tag === 'IMPORT') {
351
- this.seenImport = true;
352
- } else if (tag === 'EXPORT') {
353
- this.seenExport = true;
354
- } else if (indexOf.call(UNARY, tag) >= 0) {
355
- tag = 'UNARY';
356
- } else if (indexOf.call(RELATION, tag) >= 0) {
357
- if (tag !== 'INSTANCEOF' && this.seenFor) {
358
- tag = 'FOR' + tag;
359
- this.seenFor = false;
360
- } else {
361
- tag = 'RELATION';
362
- if (this.value() === '!') {
363
- poppedToken = this.tokens.pop();
364
- tokenData.invert = (ref7 = (ref8 = poppedToken.data) != null ? ref8.original : void 0) != null ? ref7 : poppedToken[1];
365
- }
366
- }
367
- }
368
- } else if (tag === 'IDENTIFIER' && this.seenFor && id === 'from' && isForFrom(prev)) {
369
- tag = 'FORFROM';
370
- this.seenFor = false;
371
- // Throw an error on attempts to use `get` or `set` as keywords, or
372
- // what Rip would normally interpret as calls to functions named
373
- // `get` or `set`, i.e. `get({foo: function () {}})`.
374
- } else if (tag === 'PROPERTY' && prev) {
375
- if (prev.spaced && (ref9 = prev[0], indexOf.call(CALLABLE, ref9) >= 0) && /^[gs]et$/.test(prev[1]) && this.tokens.length > 1 && ((ref10 = this.tokens[this.tokens.length - 2][0]) !== '.' && ref10 !== '?.' && ref10 !== '@')) {
376
- this.error(`'${prev[1]}' cannot be used as a keyword, or as a function call without parentheses`, prev[2]);
377
- } else if (prev[0] === '.' && this.tokens.length > 1 && (prevprev = this.tokens[this.tokens.length - 2])[0] === 'UNARY' && prevprev[1] === 'new') {
378
- prevprev[0] = 'NEW_TARGET';
379
- } else if (prev[0] === '.' && this.tokens.length > 1 && (prevprev = this.tokens[this.tokens.length - 2])[0] === 'IMPORT' && prevprev[1] === 'import') {
380
- this.seenImport = false;
381
- prevprev[0] = 'IMPORT_META';
382
- } else if (this.tokens.length > 2) {
383
- prevprev = this.tokens[this.tokens.length - 2];
384
- if (((ref11 = prev[0]) === '@' || ref11 === 'THIS') && prevprev && prevprev.spaced && /^[gs]et$/.test(prevprev[1]) && ((ref12 = this.tokens[this.tokens.length - 3][0]) !== '.' && ref12 !== '?.' && ref12 !== '@')) {
385
- this.error(`'${prevprev[1]}' cannot be used as a keyword, or as a function call without parentheses`, prevprev[2]);
386
- }
504
+
505
+ // Keyword classification (skip for words with ! or ? suffix)
506
+ let baseId = id.endsWith('!') || id.endsWith('?') ? id.slice(0, -1) : id;
507
+ if (tag === 'IDENTIFIER' && !id.endsWith('!') && !id.endsWith('?') &&
508
+ (JS_KEYWORDS.has(id) || RIP_KEYWORDS.has(id) || ALIAS_WORDS.has(id)) &&
509
+ !(this.exportSpecifierList && ALIAS_WORDS.has(id))) {
510
+
511
+ // Apply aliases
512
+ if (ALIASES[id] !== undefined) {
513
+ data.original = id;
514
+ id = ALIASES[id];
387
515
  }
516
+
517
+ // Map aliased values to their token types
518
+ tag = this.classifyKeyword(id, tag, data);
388
519
  }
389
- if (tag === 'IDENTIFIER' && indexOf.call(RESERVED, id) >= 0) {
390
- this.error(`reserved word '${id}'`, {
391
- length: id.length
392
- });
520
+
521
+ // Reserved words (check the base form, not the suffixed form)
522
+ if (tag === 'IDENTIFIER' && RESERVED.has(baseId)) {
523
+ syntaxError(`reserved word '${baseId}'`, {row: this.row, col: this.col, len: idLen});
393
524
  }
394
- if (!(tag === 'PROPERTY' || this.exportSpecifierList || this.importSpecifierList)) {
395
- // Transform 'is not' → 'isnt' for cleaner syntax (before alias processing)
396
- // Only transform when 'not' is followed by a non-boolean value to avoid breaking chains
397
- if (id === 'is' && this.chunk.slice(idLength, idLength + 4) === ' not') {
398
- // Look ahead to see what comes after ' not '
399
- afterNot = this.chunk.slice(idLength + 4).trim();
400
- // Only transform if NOT followed by 'false', 'true' (which could be part of chains)
401
- if (!afterNot.match(/^(false|true)\s+(is|isnt|==|!=)/)) {
402
- id = 'isnt';
403
- idLength += 4; // Consume ' not' as well
525
+
526
+ // Property-specific checks (new.target, import.meta)
527
+ if (tag === 'PROPERTY' && prev) {
528
+ if (prev[0] === '.' && this.tokens.length > 1) {
529
+ let pp = this.tokens[this.tokens.length - 2];
530
+ if (pp[0] === 'UNARY' && pp[1] === 'new') pp[0] = 'NEW_TARGET';
531
+ if (pp[0] === 'IMPORT' && pp[1] === 'import') {
532
+ this.seenImport = false;
533
+ pp[0] = 'IMPORT_META';
404
534
  }
405
535
  }
406
- if (indexOf.call(RIP_ALIASES, id) >= 0) {
407
- alias = id;
408
- id = RIP_ALIAS_MAP[id];
409
- tokenData.original = alias;
410
- }
411
- tag = (function() {
412
- switch (id) {
413
- case '!':
414
- return 'UNARY';
415
- case '==':
416
- case '!=':
417
- return 'COMPARE';
418
- case 'true':
419
- case 'false':
420
- return 'BOOL';
421
- case 'break':
422
- case 'continue':
423
- case 'debugger':
424
- return 'STATEMENT';
425
- case '&&':
426
- case '||':
427
- return id;
428
- default:
429
- return tag;
430
- }
431
- })();
432
536
  }
433
537
 
434
- // Check for async sigils on identifiers
435
- // Trailing ! (dammit operator) - forces await on call (.await = true)
436
- // Leading & (punt operator) - prevents await on call (.await = false) (future feature)
437
- // No sigil - use default mode (.await = undefined)
438
- const originalIdLength = idLength; // Keep original length for consumption
439
-
440
- // Only check for trailing ! if id is more than just '!' (to avoid aliased 'not')
538
+ // --- Dammit operator: trailing ! → await ---
441
539
  if (id.length > 1 && id.endsWith('!')) {
442
- tokenData.await = true; // Force await
443
- id = id.slice(0, -1); // Strip ! from identifier name
444
- }
445
- // TODO: Punt operator (when implemented)
446
- // if (id.startsWith('&')) {
447
- // tokenData.await = false; // Prevent await
448
- // id = id.slice(1);
449
- // }
450
-
451
- tagToken = this.token(tag, id, {
452
- length: originalIdLength, // Use original length (includes sigils)
453
- data: tokenData
454
- });
455
- if (alias) {
456
- tagToken.origin = [tag, alias, tagToken[2]];
457
- }
458
- if (poppedToken) {
459
- [tagToken[2].first_line, tagToken[2].first_column, tagToken[2].range[0]] = [poppedToken[2].first_line, poppedToken[2].first_column, poppedToken[2].range[0]];
540
+ data.await = true;
541
+ id = id.slice(0, -1);
460
542
  }
461
- if (colon) {
462
- colonOffset = input.lastIndexOf(':');
463
- colonToken = this.token(':', ':', {
464
- offset: colonOffset
465
- });
543
+
544
+ // --- Predicate operator: trailing ? → boolean convention ---
545
+ // empty? → isEmpty, active? → isActive, valid? → isValid
546
+ if (id.length > 1 && id.endsWith('?')) {
547
+ data.predicate = true;
548
+ id = id.slice(0, -1);
466
549
  }
467
- // Return the actual consumed length (accounts for 'is not' → 'isnt' transformation and sigils)
550
+
551
+ // --- Emit ---
552
+ let t = this.emit(tag, id, {len: idLen, data: Object.keys(data).length ? data : null});
553
+
468
554
  if (colon) {
469
- return originalIdLength + colon.length;
470
- } else {
471
- return originalIdLength;
555
+ this.emit(':', ':', {len: 1});
556
+ return idLen + colon.length;
472
557
  }
558
+
559
+ return idLen;
473
560
  }
474
561
 
475
- // Matches and consumes comments. The comments are taken out of the token
476
- // stream and saved for later, to be reinserted into the output after
477
- // everything has been parsed and the JavaScript code generated.
478
- commentToken(chunk = this.chunk, {heregex, returnCommentTokens = false, offsetInChunk = 0} = {}) {
479
- var commentAttachment, commentAttachments, commentWithSurroundingWhitespace, content, contents, getIndentSize, hasSeenFirstCommentLine, hereComment, hereLeadingWhitespace, hereTrailingWhitespace, i, indentSize, leadingNewline, leadingNewlineOffset, leadingNewlines, leadingWhitespace, length, lineComment, match, matchIllegal, noIndent, nonInitial, placeholderToken, precededByBlankLine, precedingNonCommentLines, prev;
480
- if (!(match = chunk.match(COMMENT))) {
481
- return 0;
482
- }
483
- [commentWithSurroundingWhitespace, hereLeadingWhitespace, hereComment, hereTrailingWhitespace, lineComment] = match;
484
- contents = null;
485
- // Does this comment follow code on the same line?
486
- leadingNewline = /^\s*\n+\s*#/.test(commentWithSurroundingWhitespace);
487
- if (hereComment) {
488
- matchIllegal = HERECOMMENT_ILLEGAL.exec(hereComment);
489
- if (matchIllegal) {
490
- this.error(`block comments cannot contain ${matchIllegal[0]}`, {
491
- offset: '###'.length + matchIllegal.index,
492
- length: matchIllegal[0].length
493
- });
494
- }
495
- // Parse indentation or outdentation as if this block comment didn't exist.
496
- chunk = chunk.replace(`###${hereComment}###`, '');
497
- // Remove leading newlines, like `Rewriter::removeLeadingNewlines`, to
498
- // avoid the creation of unwanted `TERMINATOR` tokens.
499
- chunk = chunk.replace(/^\n+/, '');
500
- this.lineToken({chunk});
501
- // Pull out the ###-style comment's content, and format it.
502
- content = hereComment;
503
- contents = [
504
- {
505
- content,
506
- length: commentWithSurroundingWhitespace.length - hereLeadingWhitespace.length - hereTrailingWhitespace.length,
507
- leadingWhitespace: hereLeadingWhitespace
508
- }
509
- ];
510
- } else {
511
- // The `COMMENT` regex captures successive line comments as one token.
512
- // Remove any leading newlines before the first comment, but preserve
513
- // blank lines between line comments.
514
- leadingNewlines = '';
515
- content = lineComment.replace(/^(\n*)/, function(leading) {
516
- leadingNewlines = leading;
517
- return '';
518
- });
519
- precedingNonCommentLines = '';
520
- hasSeenFirstCommentLine = false;
521
- contents = content.split('\n').map(function(line, index) {
522
- var comment, leadingWhitespace;
523
- if (!(line.indexOf('#') > -1)) {
524
- precedingNonCommentLines += `\n${line}`;
525
- return;
526
- }
527
- leadingWhitespace = '';
528
- content = line.replace(/^([ |\t]*)#/, function(_, whitespace) {
529
- leadingWhitespace = whitespace;
530
- return '';
531
- });
532
- comment = {
533
- content,
534
- length: '#'.length + content.length,
535
- leadingWhitespace: `${!hasSeenFirstCommentLine ? leadingNewlines : ''}${precedingNonCommentLines}${leadingWhitespace}`,
536
- precededByBlankLine: !!precedingNonCommentLines
537
- };
538
- hasSeenFirstCommentLine = true;
539
- precedingNonCommentLines = '';
540
- return comment;
541
- }).filter(function(comment) {
542
- return comment;
543
- });
544
- }
545
- getIndentSize = function({leadingWhitespace, nonInitial}) {
546
- var lastNewlineIndex;
547
- lastNewlineIndex = leadingWhitespace.lastIndexOf('\n');
548
- if ((hereComment != null) || !nonInitial) {
549
- if (!(lastNewlineIndex > -1)) {
550
- return null;
551
- }
552
- } else {
553
- if (lastNewlineIndex == null) {
554
- lastNewlineIndex = -1;
555
- }
562
+ // Classify a keyword/alias into its token tag
563
+ classifyKeyword(id, fallback, data) {
564
+ switch (id) {
565
+ case '!': return 'UNARY';
566
+ case '==': case '!=': return 'COMPARE';
567
+ case 'true': case 'false': return 'BOOL';
568
+ case '&&': case '||': return id;
569
+ }
570
+ if (STATEMENTS.has(id)) return 'STATEMENT';
571
+
572
+ // Uppercase keyword mapping
573
+ let upper = id.toUpperCase();
574
+ if (upper === 'WHEN' && LINE_BREAK.has(this.prevTag())) return 'LEADING_WHEN';
575
+ if (upper === 'FOR') { this.seenFor = {endsLength: this.ends.length}; return 'FOR'; }
576
+ if (upper === 'UNLESS') return 'UNLESS';
577
+ if (upper === 'IMPORT') { this.seenImport = true; return 'IMPORT'; }
578
+ if (upper === 'EXPORT') { this.seenExport = true; return 'EXPORT'; }
579
+ if (UNARY_WORDS.has(upper)) return 'UNARY';
580
+
581
+ if (RELATIONS.has(upper)) {
582
+ if (upper !== 'INSTANCEOF' && this.seenFor) {
583
+ this.seenFor = false;
584
+ return 'FOR' + upper;
556
585
  }
557
- return leadingWhitespace.length - 1 - lastNewlineIndex;
558
- };
559
- commentAttachments = (function() {
560
- var k, len, results;
561
- results = [];
562
- for (i = k = 0, len = contents.length; k < len; i = ++k) {
563
- ({content, length, leadingWhitespace, precededByBlankLine} = contents[i]);
564
- nonInitial = i !== 0;
565
- leadingNewlineOffset = nonInitial ? 1 : 0;
566
- offsetInChunk += leadingNewlineOffset + leadingWhitespace.length;
567
- indentSize = getIndentSize({leadingWhitespace, nonInitial});
568
- noIndent = (indentSize == null) || indentSize === -1;
569
- commentAttachment = {
570
- content,
571
- here: hereComment != null,
572
- newLine: leadingNewline || nonInitial, // Line comments after the first one start new lines, by definition.
573
- locationData: this.makeLocationData({offsetInChunk, length}),
574
- precededByBlankLine,
575
- indentSize,
576
- indented: !noIndent && indentSize > this.indent,
577
- outdented: !noIndent && indentSize < this.indent
578
- };
579
- if (heregex) {
580
- commentAttachment.heregex = true;
581
- }
582
- offsetInChunk += length;
583
- results.push(commentAttachment);
586
+ // Handle 'not in', 'not of', 'not instanceof' — pop the '!' and record inversion
587
+ if (this.prevVal() === '!') {
588
+ let popped = this.tokens.pop();
589
+ data.invert = popped.data?.original || popped[1];
584
590
  }
585
- return results;
586
- }).call(this);
587
- prev = this.prev();
588
- if (!prev) {
589
- // If there's no previous token, create a placeholder token to attach
590
- // this comment to; and follow with a newline.
591
- commentAttachments[0].newLine = true;
592
- this.lineToken({
593
- chunk: this.chunk.slice(commentWithSurroundingWhitespace.length),
594
- offset: commentWithSurroundingWhitespace.length // Set the indent.
595
- });
596
- placeholderToken = this.makeToken('JS', '', {
597
- offset: commentWithSurroundingWhitespace.length,
598
- generated: true
599
- });
600
- placeholderToken.comments = commentAttachments;
601
- this.tokens.push(placeholderToken);
602
- this.newlineToken(commentWithSurroundingWhitespace.length);
603
- } else {
604
- attachCommentsToNode(commentAttachments, prev);
591
+ return 'RELATION';
605
592
  }
606
- if (returnCommentTokens) {
607
- return commentAttachments;
608
- }
609
- return commentWithSurroundingWhitespace.length;
593
+
594
+ // If it's a known JS/Rip keyword, uppercase it
595
+ if (JS_KEYWORDS.has(id) || RIP_KEYWORDS.has(id)) return upper;
596
+
597
+ return fallback;
598
+ }
599
+
600
+ // --------------------------------------------------------------------------
601
+ // 2. Comment Token
602
+ // --------------------------------------------------------------------------
603
+
604
+ commentToken() {
605
+ let match = COMMENT_RE.exec(this.chunk);
606
+ if (!match) return 0;
607
+ // For now, consume the comment and discard it
608
+ // TODO: attach comments to adjacent tokens for source map support
609
+ return match[0].length;
610
610
  }
611
611
 
612
- // Matches and consumes non-meaningful whitespace. Tag the previous token
613
- // as being "spaced", because there are some cases where it makes a difference.
612
+ // --------------------------------------------------------------------------
613
+ // 3. Whitespace Token
614
+ // --------------------------------------------------------------------------
615
+
614
616
  whitespaceToken() {
615
- var match, nline, prev;
616
- if (!((match = WHITESPACE.exec(this.chunk)) || (nline = this.chunk.charAt(0) === '\n'))) {
617
- return 0;
618
- }
619
- prev = this.prev();
617
+ let match = WHITESPACE_RE.exec(this.chunk);
618
+ if (!match && this.chunk[0] !== '\n') return 0;
619
+
620
+ let prev = this.prev();
620
621
  if (prev) {
621
- prev[match ? 'spaced' : 'newLine'] = true;
622
- }
623
- if (match) {
624
- return match[0].length;
625
- } else {
626
- return 0;
622
+ if (match) {
623
+ prev.spaced = true;
624
+ prev.pre = match[0].length;
625
+ } else {
626
+ prev.newLine = true;
627
+ }
627
628
  }
629
+
630
+ return match ? match[0].length : 0;
628
631
  }
629
632
 
630
- // Matches newlines, indents, and outdents, and determines which is which.
631
- // If we can detect that the current line is continued onto the next line,
632
- // then the newline is suppressed:
633
- //
634
- // elements
635
- // .each( ... )
636
- // .map( ... )
637
- //
638
- // Keeps track of the level of indentation, because a single outdent token
639
- // can close multiple indents, so we need to know how far in we happen to be.
640
- lineToken({chunk = this.chunk, offset = 0} = {}) {
641
- var backslash, diff, endsContinuationLineIndentation, indent, match, minLiteralLength, newIndentLiteral, noNewlines, prev, ref, size;
642
- if (!(match = MULTI_DENT.exec(chunk))) {
643
- return 0;
644
- }
645
- indent = match[0];
646
- prev = this.prev();
647
- backslash = (prev != null ? prev[0] : void 0) === '\\';
648
- if (!((backslash || ((ref = this.seenFor) != null ? ref.endsLength : void 0) < this.ends.length) && this.seenFor)) {
649
- this.seenFor = false;
650
- }
651
- if (!((backslash && this.seenImport) || this.importSpecifierList)) {
652
- this.seenImport = false;
653
- }
654
- if (!((backslash && this.seenExport) || this.exportSpecifierList)) {
655
- this.seenExport = false;
656
- }
657
- size = indent.length - 1 - indent.lastIndexOf('\n');
658
- noNewlines = this.unfinished();
659
- newIndentLiteral = size > 0 ? indent.slice(-size) : '';
660
- if (!/^(.?)\1*$/.exec(newIndentLiteral)) {
661
- this.error('mixed indentation', {
662
- offset: indent.length
663
- });
633
+ // --------------------------------------------------------------------------
634
+ // 4. Line Token (newlines and indentation)
635
+ // --------------------------------------------------------------------------
636
+
637
+ lineToken() {
638
+ let match = NEWLINE_RE.exec(this.chunk);
639
+ if (!match) return 0;
640
+
641
+ let indent = match[0];
642
+ let size = indent.length - 1 - indent.lastIndexOf('\n');
643
+
644
+ // If we're in an unfinished expression, suppress the newline
645
+ if (this.isUnfinished()) {
646
+ // Exception: comma at a lower indent continues the outer call, not the block
647
+ if (size < this.indent && /^\s*,/.test(this.chunk) && !UNFINISHED.has(this.prevTag())) {
648
+ this.outdentTo(size, indent.length);
649
+ if (this.prevTag() === 'TERMINATOR') this.tokens.pop();
650
+ return indent.length;
651
+ }
664
652
  return indent.length;
665
653
  }
666
- minLiteralLength = Math.min(newIndentLiteral.length, this.indentLiteral.length);
667
- if (newIndentLiteral.slice(0, minLiteralLength) !== this.indentLiteral.slice(0, minLiteralLength)) {
668
- this.error('indentation mismatch', {
669
- offset: indent.length
670
- });
671
- return indent.length;
654
+
655
+ // Reset for-loop state on newlines (unless inside brackets)
656
+ if (this.seenFor && !(this.seenFor.endsLength < this.ends.length)) {
657
+ this.seenFor = false;
672
658
  }
673
- if (size - this.overIndent === this.indent) {
674
- if (noNewlines) {
675
- this.suppressNewlines();
676
- } else {
677
- this.newlineToken(offset);
678
- }
659
+ if (!this.importSpecifierList) this.seenImport = false;
660
+ if (!this.exportSpecifierList) this.seenExport = false;
661
+
662
+ // Same indentation → emit TERMINATOR
663
+ if (size === this.indent) {
664
+ this.emitNewline();
679
665
  return indent.length;
680
666
  }
667
+
668
+ // Increased indentation → emit INDENT
681
669
  if (size > this.indent) {
682
- if (noNewlines) {
683
- if (!backslash) {
684
- this.overIndent = size - this.indent;
685
- }
686
- if (this.overIndent) {
687
- prev.continuationLineIndent = this.indent + this.overIndent;
688
- }
689
- this.suppressNewlines();
690
- return indent.length;
691
- }
692
670
  if (!this.tokens.length) {
693
- this.baseIndent = this.indent = size;
694
- this.indentLiteral = newIndentLiteral;
671
+ // First line set base indent
672
+ this.indent = size;
695
673
  return indent.length;
696
674
  }
697
- diff = size - this.indent + this.outdebt;
698
- this.token('INDENT', diff, {
699
- offset: offset + indent.length - size,
700
- length: size
701
- });
675
+ let diff = size - this.indent;
676
+ this.emit('INDENT', diff, {len: size});
702
677
  this.indents.push(diff);
703
- this.ends.push({
704
- tag: 'OUTDENT'
705
- });
706
- this.outdebt = this.overIndent = 0;
678
+ this.ends.push({tag: 'OUTDENT'});
707
679
  this.indent = size;
708
- this.indentLiteral = newIndentLiteral;
709
- } else if (size < this.baseIndent) {
710
- this.error('missing indentation', {
711
- offset: offset + indent.length
712
- });
713
- } else {
714
- endsContinuationLineIndentation = this.overIndent > 0;
715
- this.overIndent = 0;
716
- this.outdentToken({
717
- moveOut: this.indent - size,
718
- noNewlines,
719
- outdentLength: indent.length,
720
- offset,
721
- indentSize: size,
722
- endsContinuationLineIndentation
723
- });
680
+ return indent.length;
724
681
  }
682
+
683
+ // Decreased indentation → emit OUTDENT(s)
684
+ this.outdentTo(size, indent.length);
725
685
  return indent.length;
726
686
  }
727
687
 
728
- // Helper: Get closing delimiter column position if it has only whitespace before it
729
- getHeredocClosingColumn(end, quoteLength) {
730
- const closingPos = end - quoteLength;
688
+ // Emit OUTDENT tokens to reach target indent level
689
+ outdentTo(targetSize, outdentLength = 0) {
690
+ let moveOut = this.indent - targetSize;
691
+ while (moveOut > 0) {
692
+ let lastIndent = this.indents[this.indents.length - 1];
693
+ if (!lastIndent) {
694
+ moveOut = 0;
695
+ } else {
696
+ this.indents.pop();
697
+ this.pair('OUTDENT');
698
+ this.emit('OUTDENT', moveOut, {len: outdentLength});
699
+ moveOut -= lastIndent;
700
+ }
701
+ }
702
+ this.emitNewline();
703
+ this.indent = targetSize;
704
+ }
705
+
706
+ // Close all remaining indentation at end of file
707
+ closeIndentation() {
708
+ this.outdentTo(0);
709
+ }
731
710
 
732
- // Find line start
733
- let lineStart = closingPos - 1;
734
- while (lineStart >= 0 && this.chunk[lineStart] !== '\n') {
735
- lineStart--;
711
+ // Emit a TERMINATOR if one isn't already there
712
+ emitNewline() {
713
+ if (this.prevTag() !== 'TERMINATOR') {
714
+ this.emit('TERMINATOR', '\n', {len: 0});
736
715
  }
737
- lineStart++;
716
+ }
738
717
 
739
- // Check if only whitespace before closing
740
- const beforeClosing = this.chunk.slice(lineStart, closingPos);
741
- return /^\s*$/.test(beforeClosing) ? beforeClosing.length : null;
718
+ // Check if the current line is unfinished (continuation)
719
+ isUnfinished() {
720
+ return LINE_CONTINUER_RE.test(this.chunk) || UNFINISHED.has(this.prevTag());
742
721
  }
743
722
 
744
- // Helper: Extract heredoc content from tokens
745
- extractHeredocContent(tokens) {
746
- const parts = [];
747
- for (let i = 0; i < tokens.length; i++) {
748
- if (tokens[i][0] === 'NEOSTRING') {
749
- parts.push(tokens[i][1]);
723
+ // Match balanced pairs
724
+ pair(tag) {
725
+ let expected = this.ends[this.ends.length - 1];
726
+ if (!expected || tag !== expected.tag) {
727
+ if (expected?.tag === 'OUTDENT') {
728
+ // Auto-close INDENT
729
+ let lastIndent = this.indents[this.indents.length - 1];
730
+ if (lastIndent) {
731
+ this.outdentTo(this.indent - lastIndent);
732
+ }
733
+ return this.pair(tag);
750
734
  }
735
+ syntaxError(`unmatched ${tag}`, {row: this.row, col: this.col});
751
736
  }
752
- return parts.join('#{}');
737
+ return this.ends.pop();
753
738
  }
754
739
 
755
- // Helper: Find minimum indentation in heredoc content
756
- findMinimumIndent(doc) {
740
+ // --------------------------------------------------------------------------
741
+ // 5. String Token
742
+ // --------------------------------------------------------------------------
743
+
744
+ stringToken() {
745
+ let m = STRING_START_RE.exec(this.chunk);
746
+ if (!m) return 0;
747
+
748
+ let quote = m[0];
749
+ let prev = this.prev();
750
+
751
+ // Tag 'from' in import/export context
752
+ if (prev && this.prevVal() === 'from' && (this.seenImport || this.seenExport)) {
753
+ prev[0] = 'FROM';
754
+ }
755
+
756
+ let regex;
757
+ switch (quote) {
758
+ case "'": regex = STRING_SINGLE_RE; break;
759
+ case '"': regex = STRING_DOUBLE_RE; break;
760
+ case "'''": regex = HEREDOC_SINGLE_RE; break;
761
+ case '"""': regex = HEREDOC_DOUBLE_RE; break;
762
+ }
763
+
764
+ let {tokens: parts, index: end} = this.matchWithInterpolations(regex, quote);
765
+ let heredoc = quote.length === 3;
766
+
767
+ // Heredoc indent processing
757
768
  let indent = null;
758
- let match;
759
- while (match = HEREDOC_INDENT.exec(doc)) {
760
- const attempt = match[1];
761
- if (indent === null || (0 < attempt.length && attempt.length < indent.length)) {
762
- indent = attempt;
763
- }
769
+ if (heredoc) {
770
+ indent = this.processHeredocIndent(end, quote, parts);
764
771
  }
765
- return indent;
772
+
773
+ // Merge interpolation tokens into the stream
774
+ this.mergeInterpolationTokens(parts, {quote, indent, endOffset: end});
775
+
776
+ return end;
766
777
  }
767
778
 
768
- // Helper: Choose between closing column and minimum indent
769
- selectHeredocIndent(closingColumn, minIndent) {
770
- if (closingColumn === null) {
771
- // No closing column detected, use minimum
772
- return minIndent;
773
- }
779
+ // Process heredoc indentation based on closing delimiter position
780
+ processHeredocIndent(end, quote, tokens) {
781
+ // Find closing delimiter column
782
+ let closingPos = end - quote.length;
783
+ let lineStart = closingPos - 1;
784
+ while (lineStart >= 0 && this.chunk[lineStart] !== '\n') lineStart--;
785
+ lineStart++;
786
+
787
+ let beforeClosing = this.chunk.slice(lineStart, closingPos);
788
+ let closingColumn = /^\s*$/.test(beforeClosing) ? beforeClosing.length : null;
774
789
 
775
- if (minIndent === null) {
776
- // No content indent (empty or whitespace-only), use closing
777
- return ' '.repeat(closingColumn);
790
+ // Get content for minimum indent analysis
791
+ let doc = '';
792
+ for (let t of tokens) {
793
+ if (t[0] === 'NEOSTRING') doc += t[1];
778
794
  }
779
795
 
780
- if (closingColumn <= minIndent.length) {
781
- // Closing at or left of content minimum - use closing
782
- return ' '.repeat(closingColumn);
796
+ // Find minimum indent in content
797
+ let minIndent = null;
798
+ let m;
799
+ HEREDOC_INDENT_RE.lastIndex = 0;
800
+ while (m = HEREDOC_INDENT_RE.exec(doc)) {
801
+ if (minIndent === null || (m[1].length > 0 && m[1].length < minIndent.length)) {
802
+ minIndent = m[1];
803
+ }
783
804
  }
784
805
 
785
- // Closing right of content - use minimum (old behavior)
806
+ // Choose indent baseline
807
+ if (closingColumn === null) return minIndent;
808
+ if (minIndent === null) return ' '.repeat(closingColumn);
809
+ if (closingColumn <= minIndent.length) return ' '.repeat(closingColumn);
786
810
  return minIndent;
787
811
  }
788
812
 
789
- // Helper: Remove trailing whitespace-only line from tokens
790
- removeTrailingWhitespaceLine(tokens) {
791
- if (tokens.length === 0) return;
813
+ // Match string/regex content with interpolation support
814
+ matchWithInterpolations(regex, delimiter, closingDelimiter, interpolators) {
815
+ if (!closingDelimiter) closingDelimiter = delimiter;
816
+ if (!interpolators) interpolators = /^[#$]\{/;
817
+
818
+ let tokens = [];
819
+ let offset = delimiter.length;
820
+
821
+ if (this.chunk.slice(0, offset) !== delimiter) return null;
822
+
823
+ let str = this.chunk.slice(offset);
824
+
825
+ while (true) {
826
+ let [strPart] = regex.exec(str);
827
+
828
+ tokens.push(['NEOSTRING', strPart, {offset}]);
829
+ str = str.slice(strPart.length);
830
+ offset += strPart.length;
831
+
832
+ // Check for interpolation start
833
+ let m = interpolators.exec(str);
834
+ if (!m) break;
835
+
836
+ let interpolator = m[0];
837
+ let interpOffset = interpolator.length - 1;
838
+
839
+ // Recursively lex the interpolated expression
840
+ let rest = str.slice(interpOffset);
841
+ let nested = new Lexer().tokenize(rest, {
842
+ row: this.row,
843
+ col: this.col + offset + interpOffset,
844
+ untilBalanced: true,
845
+ rewrite: false,
846
+ });
847
+
848
+ let index = nested.index + interpOffset;
792
849
 
793
- const lastToken = tokens[tokens.length - 1];
794
- if (lastToken[0] !== 'NEOSTRING') return;
850
+ // Tag opening/closing as interpolation markers
851
+ if (str[index - 1] === '}') {
852
+ let open = nested.tokens[0];
853
+ let close = nested.tokens[nested.tokens.length - 1];
854
+ open[0] = 'INTERPOLATION_START';
855
+ open[1] = '(';
856
+ close[0] = 'INTERPOLATION_END';
857
+ close[1] = ')';
858
+ }
795
859
 
796
- // Check if last line is whitespace-only
797
- const lines = lastToken[1].split('\n');
798
- const lastLine = lines[lines.length - 1];
860
+ // Clean up leading TERMINATOR and trailing INDENT/OUTDENT
861
+ if (nested.tokens[1]?.[0] === 'TERMINATOR') nested.tokens.splice(1, 1);
862
+ let ntl = nested.tokens.length;
863
+ if (ntl > 2 && nested.tokens[ntl - 3]?.[0] === 'INDENT' && nested.tokens[ntl - 2]?.[0] === 'OUTDENT') {
864
+ nested.tokens.splice(ntl - 3, 2);
865
+ }
799
866
 
800
- if (/^\s*$/.test(lastLine)) {
801
- // Remove the trailing whitespace line
802
- lines.pop();
803
- lastToken[1] = lines.join('\n');
867
+ tokens.push(['TOKENS', nested.tokens]);
868
+ str = str.slice(index);
869
+ offset += index;
804
870
  }
805
- }
806
871
 
807
- // Matches strings, including multiline strings, as well as heredocs, with or without
808
- // interpolation.
809
- stringToken() {
810
- var attempt, delimiter, doc, end, heredoc, i, indent, match, prev, quote, ref, regex, token, tokens;
811
- [quote] = STRING_START.exec(this.chunk) || [];
812
- if (!quote) {
813
- return 0;
872
+ if (str.slice(0, closingDelimiter.length) !== closingDelimiter) {
873
+ syntaxError(`missing ${closingDelimiter}`, {row: this.row, col: this.col});
814
874
  }
815
- // If the preceding token is `from` and this is an import or export statement,
816
- // properly tag the `from`.
817
- prev = this.prev();
818
- if (prev && this.value() === 'from' && (this.seenImport || this.seenExport)) {
819
- prev[0] = 'FROM';
875
+
876
+ return { tokens, index: offset + closingDelimiter.length };
877
+ }
878
+
879
+ // Merge NEOSTRING/TOKENS into the real token stream
880
+ mergeInterpolationTokens(tokens, {quote, indent, endOffset}) {
881
+ if (tokens.length > 1) {
882
+ this.emit('STRING_START', '(', {len: quote?.length || 0, data: {quote}});
820
883
  }
821
- regex = (function() {
822
- switch (quote) {
823
- case "'":
824
- return STRING_SINGLE;
825
- case '"':
826
- return STRING_DOUBLE;
827
- case "'''":
828
- return HEREDOC_SINGLE;
829
- case '"""':
830
- return HEREDOC_DOUBLE;
831
- }
832
- })();
833
- ({
834
- tokens,
835
- index: end
836
- } = this.matchWithInterpolations(regex, quote));
837
- heredoc = quote.length === 3;
838
- if (heredoc) {
839
- // Detect closing delimiter position for visual baseline control
840
- const closingColumn = this.getHeredocClosingColumn(end, quote.length);
841
884
 
842
- // Get document content for analysis
843
- doc = this.extractHeredocContent(tokens);
885
+ for (let i = 0; i < tokens.length; i++) {
886
+ let [tag, val] = tokens[i];
887
+
888
+ if (tag === 'TOKENS') {
889
+ for (let nested of val) this.tokens.push(nested);
890
+ } else if (tag === 'NEOSTRING') {
891
+ let processed = val;
892
+
893
+ // Strip heredoc indent
894
+ if (indent) {
895
+ let indentRe = new RegExp('\\n' + indent, 'g');
896
+ processed = processed.replace(indentRe, '\n');
897
+ }
844
898
 
845
- // Calculate minimum indentation from content
846
- indent = this.findMinimumIndent(doc);
899
+ // Strip leading newline for heredocs
900
+ if (i === 0 && quote?.length === 3) {
901
+ processed = processed.replace(/^\n/, '');
902
+ }
847
903
 
848
- // Choose dedenting baseline intelligently
849
- indent = this.selectHeredocIndent(closingColumn, indent);
904
+ // Strip trailing newline for heredocs
905
+ if (i === tokens.length - 1 && quote?.length === 3) {
906
+ processed = processed.replace(/\n[^\S\n]*$/, '');
907
+ }
850
908
 
851
- // Clean up trailing whitespace when using minimum indent
852
- if (closingColumn !== null && indent !== null && closingColumn > indent.length) {
853
- this.removeTrailingWhitespaceLine(tokens);
909
+ this.emit('STRING', `"${processed}"`, {len: val.length, data: {quote}});
854
910
  }
855
911
  }
856
- delimiter = quote.charAt(0);
857
- this.mergeInterpolationTokens(tokens, {
858
- quote,
859
- indent,
860
- endOffset: end
861
- }, (value) => {
862
- return this.validateUnicodeCodePointEscapes(value, {
863
- delimiter: quote
864
- });
865
- });
866
- return end;
912
+
913
+ if (tokens.length > 1) {
914
+ this.emit('STRING_END', ')', {len: quote?.length || 0});
915
+ }
916
+
917
+ return endOffset;
867
918
  }
868
919
 
869
- // Matches numbers, including decimals, hex, and exponential notation.
870
- // Be careful not to interfere with ranges in progress.
920
+ // --------------------------------------------------------------------------
921
+ // 6. Number Token
922
+ // --------------------------------------------------------------------------
923
+
871
924
  numberToken() {
872
- var lexedLength, match, number, parsedValue, tag, tokenData;
873
- if (!(match = NUMBER.exec(this.chunk))) {
874
- return 0;
875
- }
876
- number = match[0];
877
- lexedLength = number.length;
878
- switch (false) {
879
- case !/^0[BOX]/.test(number):
880
- this.error(`radix prefix in '${number}' must be lowercase`, {
881
- offset: 1
882
- });
883
- break;
884
- case !/^0\d*[89]/.test(number):
885
- this.error(`decimal literal '${number}' must not be prefixed with '0'`, {
886
- length: lexedLength
887
- });
888
- break;
889
- case !/^0\d+/.test(number):
890
- this.error(`octal literal '${number}' must be prefixed with '0o'`, {
891
- length: lexedLength
892
- });
925
+ let match = NUMBER_RE.exec(this.chunk);
926
+ if (!match) return 0;
927
+
928
+ let number = match[0];
929
+ let len = number.length;
930
+
931
+ // Validate
932
+ let loc = {row: this.row, col: this.col};
933
+
934
+ if (/^0[BOX]/.test(number)) {
935
+ syntaxError(`radix prefix in '${number}' must be lowercase`, {...loc, col: loc.col + 1});
893
936
  }
894
- parsedValue = parseNumber(number);
895
- tokenData = {parsedValue};
896
- tag = parsedValue === 2e308 ? 'INFINITY' : 'NUMBER';
897
- if (tag === 'INFINITY') {
898
- tokenData.original = number;
937
+ if (/^0\d*[89]/.test(number)) {
938
+ syntaxError(`decimal literal '${number}' must not be prefixed with '0'`, {...loc, len});
899
939
  }
900
- this.token(tag, number, {
901
- length: lexedLength,
902
- data: tokenData
903
- });
904
- return lexedLength;
940
+ if (/^0\d+/.test(number)) {
941
+ syntaxError(`octal literal '${number}' must be prefixed with '0o'`, {...loc, len});
942
+ }
943
+
944
+ let parsed = parseNumber(number);
945
+ let tag = parsed === Infinity ? 'INFINITY' : 'NUMBER';
946
+ let data = {parsedValue: parsed};
947
+ if (tag === 'INFINITY') data.original = number;
948
+
949
+ this.emit(tag, number, {len, data});
950
+ return len;
905
951
  }
906
952
 
907
- // Matches regular expression literals, as well as multiline extended ones.
908
- // Lexing regular expressions is difficult to distinguish from division, so we
909
- // borrow some basic heuristics from JavaScript and Ruby.
953
+ // --------------------------------------------------------------------------
954
+ // 7. Regex Token
955
+ // --------------------------------------------------------------------------
956
+
910
957
  regexToken() {
911
- var body, closed, comment, commentIndex, commentOpts, commentTokens, comments, delimiter, end, flags, fullMatch, index, leadingWhitespace, match, matchedComment, origin, prev, ref, ref1, regex, tokens;
912
- switch (false) {
913
- case !(match = REGEX_ILLEGAL.exec(this.chunk)):
914
- this.error(`regular expressions cannot begin with ${match[2]}`, {
915
- offset: match.index + match[1].length
916
- });
917
- break;
918
- case !(match = this.matchWithInterpolations(HEREGEX, '///')):
919
- ({tokens, index} = match);
920
- comments = [];
921
- while (matchedComment = HEREGEX_COMMENT.exec(this.chunk.slice(0, index))) {
922
- ({
923
- index: commentIndex
924
- } = matchedComment);
925
- [fullMatch, leadingWhitespace, comment] = matchedComment;
926
- comments.push({
927
- comment,
928
- offsetInChunk: commentIndex + leadingWhitespace.length
929
- });
930
- }
931
- commentTokens = flatten((function() {
932
- var k, len, results;
933
- results = [];
934
- for (k = 0, len = comments.length; k < len; k++) {
935
- commentOpts = comments[k];
936
- results.push(this.commentToken(commentOpts.comment, Object.assign(commentOpts, {
937
- heregex: true,
938
- returnCommentTokens: true
939
- })));
940
- }
941
- return results;
942
- }).call(this));
943
- break;
944
- case !(match = REGEX.exec(this.chunk)):
945
- [regex, body, closed] = match;
946
- this.validateEscapes(body, {
947
- isRegex: true,
948
- offsetInChunk: 1
949
- });
950
- index = regex.length;
951
- prev = this.prev();
952
- if (prev) {
953
- if (prev.spaced && (ref = prev[0], indexOf.call(CALLABLE, ref) >= 0)) {
954
- if (!closed || POSSIBLY_DIVISION.test(regex)) {
955
- return 0;
956
- }
957
- } else if (ref1 = prev[0], indexOf.call(NOT_REGEX, ref1) >= 0) {
958
- return 0;
959
- }
960
- }
961
- if (!closed) {
962
- this.error('missing / (unclosed regex)');
963
- }
964
- break;
965
- default:
966
- return 0;
967
- }
968
- [flags] = REGEX_FLAGS.exec(this.chunk.slice(index));
969
- end = index + flags.length;
970
- origin = this.makeToken('REGEX', null, {
971
- length: end
972
- });
973
- switch (false) {
974
- case !!VALID_FLAGS.test(flags):
975
- this.error(`invalid regular expression flags ${flags}`, {
976
- offset: index,
977
- length: flags.length
978
- });
979
- break;
980
- case !(regex || tokens.length === 1):
981
- delimiter = body ? '/' : '///';
982
- if (body == null) {
983
- body = tokens[0][1];
984
- }
985
- this.validateUnicodeCodePointEscapes(body, {delimiter});
986
- // For heregex (delimiter === '///'), mark it with heregex metadata
987
- const tokenData = {delimiter};
988
- if (delimiter === '///') {
989
- tokenData.heregex = {flags};
990
- }
991
- this.token('REGEX', `/${body}/${flags}`, {
992
- length: end,
993
- origin,
994
- data: tokenData
995
- });
996
- break;
997
- default:
998
- this.token('REGEX_START', '(', {
999
- length: 0,
1000
- origin,
1001
- generated: true
1002
- });
1003
- this.token('IDENTIFIER', 'RegExp', {
1004
- length: 0,
1005
- generated: true
1006
- });
1007
- this.token('CALL_START', '(', {
1008
- length: 0,
1009
- generated: true
1010
- });
1011
- this.mergeInterpolationTokens(tokens, {
1012
- double: true,
1013
- heregex: {flags},
1014
- endOffset: end - flags.length,
1015
- quote: '///'
1016
- }, (str) => {
1017
- return this.validateUnicodeCodePointEscapes(str, {delimiter});
1018
- });
958
+ // Try heregex first (///)
959
+ let hm = this.matchWithInterpolations(HEREGEX_RE, '///');
960
+ if (hm) {
961
+ let {tokens: parts, index} = hm;
962
+ let [flags] = REGEX_FLAGS_RE.exec(this.chunk.slice(index));
963
+ let end = index + flags.length;
964
+
965
+ if (parts.length === 1 || !parts.some(p => p[0] === 'TOKENS')) {
966
+ // Simple heregex (no interpolations)
967
+ let body = parts[0]?.[1] || '';
968
+ this.emit('REGEX', `/${body}/${flags}`, {len: end, data: {delimiter: '///', heregex: {flags}}});
969
+ } else {
970
+ // Complex heregex with interpolations
971
+ this.emit('REGEX_START', '(', {len: 0});
972
+ this.emit('IDENTIFIER', 'RegExp', {len: 0});
973
+ this.emit('CALL_START', '(', {len: 0});
974
+ this.mergeInterpolationTokens(parts, {quote: '///', endOffset: end - flags.length});
1019
975
  if (flags) {
1020
- this.token(',', ',', {
1021
- offset: index - 1,
1022
- length: 0,
1023
- generated: true
1024
- });
1025
- this.token('STRING', '"' + flags + '"', {
1026
- offset: index,
1027
- length: flags.length
1028
- });
976
+ this.emit(',', ',', {len: 0});
977
+ this.emit('STRING', `"${flags}"`, {len: flags.length});
1029
978
  }
1030
- this.token(')', ')', {
1031
- offset: end,
1032
- length: 0,
1033
- generated: true
1034
- });
1035
- this.token('REGEX_END', ')', {
1036
- offset: end,
1037
- length: 0,
1038
- generated: true
1039
- });
979
+ this.emit(')', ')', {len: 0});
980
+ this.emit('REGEX_END', ')', {len: 0});
981
+ }
982
+ return end;
1040
983
  }
1041
- // Explicitly attach any heregex comments to the REGEX/REGEX_END token.
1042
- if (commentTokens != null ? commentTokens.length : void 0) {
1043
- addTokenData(this.tokens[this.tokens.length - 1], {
1044
- heregexCommentTokens: commentTokens
1045
- });
984
+
985
+ // Try simple regex
986
+ let match = REGEX_RE.exec(this.chunk);
987
+ if (!match) return 0;
988
+
989
+ let [regex, body, closed] = match;
990
+ let prev = this.prev();
991
+
992
+ // Division disambiguation
993
+ if (prev) {
994
+ if (prev.spaced && CALLABLE.has(prev[0]) && (!closed || /^\/=?\s/.test(regex))) return 0;
995
+ if (NOT_REGEX.has(prev[0]) && !(prev.spaced && CALLABLE.has(prev[0]))) return 0;
996
+ }
997
+
998
+ if (!closed) syntaxError('missing / (unclosed regex)', {row: this.row, col: this.col});
999
+
1000
+ let index = regex.length;
1001
+ let [flags] = REGEX_FLAGS_RE.exec(this.chunk.slice(index));
1002
+ let end = index + flags.length;
1003
+
1004
+ if (!VALID_FLAGS_RE.test(flags)) {
1005
+ syntaxError(`invalid regular expression flags ${flags}`, {row: this.row, col: this.col + index, len: flags.length});
1046
1006
  }
1007
+
1008
+ this.emit('REGEX', `/${body}/${flags}`, {len: end, data: {delimiter: '/'}});
1047
1009
  return end;
1048
1010
  }
1049
1011
 
1050
- // Matches JavaScript interpolated directly into the source via backticks.
1012
+ // --------------------------------------------------------------------------
1013
+ // 8. JS Token (embedded JavaScript)
1014
+ // --------------------------------------------------------------------------
1015
+
1051
1016
  jsToken() {
1052
- var length, match, matchedHere, script;
1053
- if (!(this.chunk.charAt(0) === '`' && (match = (matchedHere = HERE_JSTOKEN.exec(this.chunk)) || JSTOKEN.exec(this.chunk)))) {
1054
- return 0;
1055
- }
1056
- // Convert escaped backticks to backticks, and escaped backslashes
1057
- // just before escaped backticks to backslashes
1058
- script = match[1];
1059
- ({length} = match[0]);
1060
- this.token('JS', script, {
1061
- length,
1062
- data: {
1063
- here: !!matchedHere
1064
- }
1065
- });
1066
- return length;
1017
+ if (this.chunk[0] !== '`') return 0;
1018
+
1019
+ let match = HERE_JSTOKEN_RE.exec(this.chunk) || JSTOKEN_RE.exec(this.chunk);
1020
+ if (!match) return 0;
1021
+
1022
+ let script = match[1];
1023
+ let len = match[0].length;
1024
+ this.emit('JS', script, {len, data: {here: match[0].startsWith('```')}});
1025
+ return len;
1067
1026
  }
1068
1027
 
1069
- // We treat all other single characters as a token. E.g.: `( ) , . !`
1070
- // Multi-character operators are also literal tokens, so that the parser can assign
1071
- // the proper order of operations. There are some symbols that we tag specially
1072
- // here. `;` and newlines are both treated as a `TERMINATOR`, we distinguish
1073
- // parentheses that indicate a method call from regular parentheses, and so on.
1028
+ // --------------------------------------------------------------------------
1029
+ // 9. Literal Token (operators, punctuation, everything else)
1030
+ // --------------------------------------------------------------------------
1031
+
1074
1032
  literalToken() {
1075
- var match, message, origin, prev, ref, ref1, ref2, ref3, ref4, ref5, skipToken, tag, token, value;
1076
- if (match = OPERATOR.exec(this.chunk)) {
1077
- [value] = match;
1078
- if (CODE.test(value)) {
1079
- this.tagParameters();
1080
- }
1081
- } else {
1082
- value = this.chunk.charAt(0);
1083
- }
1084
- tag = value;
1085
- prev = this.prev();
1086
- if (prev && indexOf.call(['=', ...COMPOUND_ASSIGN], value) >= 0) {
1087
- skipToken = false;
1088
- if (value === '=' && ((ref = prev[1]) === '||' || ref === '&&' || ref === '??') && !prev.spaced) {
1033
+ let match = OPERATOR_RE.exec(this.chunk);
1034
+ let val = match ? match[0] : this.chunk.charAt(0);
1035
+ let tag = val;
1036
+ let prev = this.prev();
1037
+
1038
+ // Arrow functions → tag parameters
1039
+ if (CODE_RE.test(val)) this.tagParameters();
1040
+
1041
+ // Compound assignment merging: ||= &&= ??=
1042
+ if (prev && (val === '=' || COMPOUND_ASSIGN.has(val))) {
1043
+ if (val === '=' && (prev[1] === '||' || prev[1] === '&&' || prev[1] === '??') && !prev.spaced) {
1089
1044
  prev[0] = 'COMPOUND_ASSIGN';
1090
1045
  prev[1] += '=';
1091
- if ((ref1 = prev.data) != null ? ref1.original : void 0) {
1092
- prev.data.original += '=';
1093
- }
1094
- prev[2].range = [prev[2].range[0], prev[2].range[1] + 1];
1095
- prev[2].last_column += 1;
1096
- prev[2].last_column_exclusive += 1;
1097
- prev = this.tokens[this.tokens.length - 2];
1098
- skipToken = true;
1046
+ return val.length;
1099
1047
  }
1100
- if (prev && prev[0] !== 'PROPERTY') {
1101
- origin = (ref2 = prev.origin) != null ? ref2 : prev;
1102
- message = isUnassignable(prev[1], origin[1]);
1103
- if (message) {
1104
- this.error(message, origin[2]);
1105
- }
1106
- }
1107
- if (skipToken) {
1108
- return value.length;
1109
- }
1110
- }
1111
- if (value === '(' && (prev != null ? prev[0] : void 0) === 'IMPORT') {
1112
- prev[0] = 'DYNAMIC_IMPORT';
1113
1048
  }
1114
- if (value === '{' && this.seenImport) {
1115
- this.importSpecifierList = true;
1116
- } else if (this.importSpecifierList && value === '}') {
1117
- this.importSpecifierList = false;
1118
- } else if (value === '{' && (prev != null ? prev[0] : void 0) === 'EXPORT') {
1119
- this.exportSpecifierList = true;
1120
- } else if (this.exportSpecifierList && value === '}') {
1121
- this.exportSpecifierList = false;
1122
- }
1123
- if (value === ';') {
1124
- if (ref3 = prev != null ? prev[0] : void 0, indexOf.call(['=', ...UNFINISHED], ref3) >= 0) {
1125
- this.error('unexpected ;');
1126
- }
1049
+
1050
+ // Dynamic import
1051
+ if (val === '(' && prev?.[0] === 'IMPORT') prev[0] = 'DYNAMIC_IMPORT';
1052
+
1053
+ // Import/export specifier list tracking
1054
+ if (val === '{' && this.seenImport) this.importSpecifierList = true;
1055
+ if (val === '}' && this.importSpecifierList) this.importSpecifierList = false;
1056
+ if (val === '{' && prev?.[0] === 'EXPORT') this.exportSpecifierList = true;
1057
+ if (val === '}' && this.exportSpecifierList) this.exportSpecifierList = false;
1058
+
1059
+ // Semicolons TERMINATOR
1060
+ if (val === ';') {
1127
1061
  this.seenFor = this.seenImport = this.seenExport = false;
1128
1062
  tag = 'TERMINATOR';
1129
- } else if (value === '*' && (prev != null ? prev[0] : void 0) === 'EXPORT') {
1130
- tag = 'EXPORT_ALL';
1131
- } else if (value === '<=>') {
1132
- tag = 'BIND';
1133
- } else if (value === '~=') {
1134
- tag = 'COMPUTED_ASSIGN';
1135
- } else if (value === ':=') {
1136
- tag = 'REACTIVE_ASSIGN';
1137
- } else if (value === '~>') {
1138
- tag = 'REACT_ASSIGN';
1139
- } else if (value === '=!') {
1140
- tag = 'READONLY_ASSIGN';
1141
- } else if (indexOf.call(MATH, value) >= 0) {
1142
- tag = 'MATH';
1143
- } else if (indexOf.call(COMPARE, value) >= 0) {
1144
- tag = 'COMPARE';
1145
- } else if (indexOf.call(COMPOUND_ASSIGN, value) >= 0) {
1146
- tag = 'COMPOUND_ASSIGN';
1147
- } else if (indexOf.call(UNARY, value) >= 0) {
1148
- tag = 'UNARY';
1149
- } else if (indexOf.call(UNARY_MATH, value) >= 0) {
1150
- tag = 'UNARY_MATH';
1151
- } else if (indexOf.call(SHIFT, value) >= 0) {
1152
- tag = 'SHIFT';
1153
- } else if (value === '?' && (prev != null ? prev.spaced : void 0)) {
1154
- tag = 'SPACE?'; // ? with space before it (ie - 'x ?' not 'x?')
1155
- } else if (prev) {
1156
- if (value === '(' && !prev.spaced && (ref4 = prev[0], indexOf.call(CALLABLE, ref4) >= 0)) {
1157
- if (prev[0] === '?') {
1158
- prev[0] = 'FUNC_EXIST';
1159
- } else if (prev[0] === '?.') {
1160
- prev[0] = 'ES6_OPTIONAL_CALL';
1161
- }
1063
+ }
1064
+ // Reactive operators
1065
+ else if (val === '~=') tag = 'COMPUTED_ASSIGN';
1066
+ else if (val === ':=') tag = 'REACTIVE_ASSIGN';
1067
+ else if (val === '~>') tag = 'REACT_ASSIGN';
1068
+ else if (val === '=!') tag = 'READONLY_ASSIGN';
1069
+ // Export all
1070
+ else if (val === '*' && prev?.[0] === 'EXPORT') tag = 'EXPORT_ALL';
1071
+ // Operator classification
1072
+ else if (MATH.has(val)) tag = 'MATH';
1073
+ else if (COMPARE.has(val)) tag = 'COMPARE';
1074
+ else if (COMPOUND_ASSIGN.has(val)) tag = 'COMPOUND_ASSIGN';
1075
+ else if (UNARY_MATH.has(val)) tag = 'UNARY_MATH';
1076
+ else if (SHIFT.has(val)) tag = 'SHIFT';
1077
+ // Spaced ? SPACE? (ternary)
1078
+ else if (val === '?' && prev?.spaced) tag = 'SPACE?';
1079
+ // Call/index context (ES6 optional chaining only)
1080
+ else if (prev) {
1081
+ if (val === '(' && !prev.spaced && CALLABLE.has(prev[0])) {
1082
+ if (prev[0] === '?.') prev[0] = 'ES6_OPTIONAL_CALL';
1162
1083
  tag = 'CALL_START';
1163
- } else if (value === '[' && (((ref5 = prev[0], indexOf.call(INDEXABLE, ref5) >= 0) && !prev.spaced) || (prev[0] === '::'))) { // `.prototype` can't be a method you can call.
1084
+ }
1085
+ if (val === '[' && !prev.spaced && INDEXABLE.has(prev[0])) {
1164
1086
  tag = 'INDEX_START';
1165
- switch (prev[0]) {
1166
- case '?':
1167
- prev[0] = 'INDEX_SOAK';
1168
- break;
1169
- case '?.':
1170
- prev[0] = 'ES6_OPTIONAL_INDEX';
1171
- break;
1172
- }
1087
+ if (prev[0] === '?.') prev[0] = 'ES6_OPTIONAL_INDEX';
1173
1088
  }
1174
1089
  }
1175
- token = this.makeToken(tag, value);
1176
- switch (value) {
1177
- case '(':
1178
- case '{':
1179
- case '[':
1180
- this.ends.push({
1181
- tag: INVERSES[value],
1182
- origin: token
1183
- });
1184
- break;
1185
- case ')':
1186
- case '}':
1187
- case ']':
1188
- this.pair(value);
1189
- }
1190
- this.tokens.push(this.makeToken(tag, value));
1191
- return value.length;
1192
- }
1193
1090
 
1194
- // Record an outdent token or multiple tokens, if we happen to be moving back
1195
- // inwards past several recorded indents. Sets new @indent value.
1196
- outdentToken({moveOut, noNewlines, outdentLength = 0, offset = 0, indentSize, endsContinuationLineIndentation}) {
1197
- var decreasedIndent, dent, lastIndent, ref, terminatorToken;
1198
- decreasedIndent = this.indent - moveOut;
1199
- while (moveOut > 0) {
1200
- lastIndent = this.indents[this.indents.length - 1];
1201
- if (!lastIndent) {
1202
- this.outdebt = moveOut = 0;
1203
- } else if (this.outdebt && moveOut <= this.outdebt) {
1204
- this.outdebt -= moveOut;
1205
- moveOut = 0;
1206
- } else {
1207
- dent = this.indents.pop() + this.outdebt;
1208
- if (outdentLength && (ref = this.chunk[outdentLength], indexOf.call(INDENTABLE_CLOSERS, ref) >= 0)) {
1209
- decreasedIndent -= dent - moveOut;
1210
- moveOut = dent;
1211
- }
1212
- this.outdebt = 0;
1213
- // pair might call outdentToken, so preserve decreasedIndent
1214
- this.pair('OUTDENT');
1215
- this.token('OUTDENT', moveOut, {
1216
- length: outdentLength,
1217
- indentSize: indentSize + moveOut - dent
1218
- });
1219
- moveOut -= dent;
1220
- }
1221
- }
1222
- if (dent) {
1223
- this.outdebt -= moveOut;
1224
- }
1225
- this.suppressSemicolons();
1226
- if (!(this.tag() === 'TERMINATOR' || noNewlines)) {
1227
- terminatorToken = this.token('TERMINATOR', '\n', {
1228
- offset: offset + outdentLength,
1229
- length: 0
1230
- });
1231
- if (endsContinuationLineIndentation) {
1232
- terminatorToken.endsContinuationLineIndentation = {
1233
- preContinuationLineIndent: this.indent
1234
- };
1235
- }
1091
+ // Balanced pair tracking
1092
+ if (val === '(' || val === '{' || val === '[') {
1093
+ this.ends.push({tag: INVERSES[val], origin: [tag, val]});
1094
+ } else if (val === ')' || val === '}' || val === ']') {
1095
+ this.pair(val);
1236
1096
  }
1237
- this.indent = decreasedIndent;
1238
- this.indentLiteral = this.indentLiteral.slice(0, decreasedIndent);
1239
- return this;
1240
- }
1241
1097
 
1242
- // Generate a newline token. Consecutive newlines get merged together.
1243
- newlineToken(offset) {
1244
- this.suppressSemicolons();
1245
- if (this.tag() !== 'TERMINATOR') {
1246
- this.token('TERMINATOR', '\n', {
1247
- offset,
1248
- length: 0
1249
- });
1250
- }
1251
- return this;
1098
+ this.emit(tag, val, {len: val.length});
1099
+ return val.length;
1252
1100
  }
1253
1101
 
1254
- // Use a `\` at a line-ending to suppress the newline.
1255
- // The slash is removed here once its job is done.
1256
- suppressNewlines() {
1257
- var prev;
1258
- prev = this.prev();
1259
- if (prev[1] === '\\') {
1260
- if (prev.comments && this.tokens.length > 1) {
1261
- // `@tokens.length` should be at least 2 (some code, then `\`).
1262
- // If something puts a `\` after nothing, they deserve to lose any
1263
- // comments that trail it.
1264
- attachCommentsToNode(prev.comments, this.tokens[this.tokens.length - 2]);
1102
+ // Walk back to tag parameters for arrow functions
1103
+ tagParameters() {
1104
+ if (this.prevTag() !== ')') return this.tagDoIife();
1105
+
1106
+ let i = this.tokens.length - 1;
1107
+ let stack = [];
1108
+ this.tokens[i][0] = 'PARAM_END';
1109
+
1110
+ while (i-- > 0) {
1111
+ let tok = this.tokens[i];
1112
+ if (tok[0] === ')') {
1113
+ stack.push(tok);
1114
+ } else if (tok[0] === '(' || tok[0] === 'CALL_START') {
1115
+ if (stack.length) {
1116
+ stack.pop();
1117
+ } else if (tok[0] === '(') {
1118
+ tok[0] = 'PARAM_START';
1119
+ return this.tagDoIife(i - 1);
1120
+ } else {
1121
+ this.tokens[this.tokens.length - 1][0] = 'CALL_END';
1122
+ return;
1123
+ }
1265
1124
  }
1266
- this.tokens.pop();
1267
1125
  }
1268
- return this;
1269
1126
  }
1270
1127
 
1271
- // Token Manipulators
1272
- // ------------------
1273
-
1274
- // A source of ambiguity in our grammar used to be parameter lists in function
1275
- // definitions versus argument lists in function calls. Walk backwards, tagging
1276
- // parameters specially in order to make things easier for the parser.
1277
- tagParameters() {
1278
- var i, paramEndToken, stack, tok, tokens;
1279
- if (this.tag() !== ')') {
1280
- return this.tagDoIife();
1281
- }
1282
- stack = [];
1283
- ({tokens} = this);
1284
- i = tokens.length;
1285
- paramEndToken = tokens[--i];
1286
- paramEndToken[0] = 'PARAM_END';
1287
- while (tok = tokens[--i]) {
1288
- switch (tok[0]) {
1289
- case ')':
1290
- stack.push(tok);
1291
- break;
1292
- case '(':
1293
- case 'CALL_START':
1294
- if (stack.length) {
1295
- stack.pop();
1296
- } else if (tok[0] === '(') {
1297
- tok[0] = 'PARAM_START';
1298
- return this.tagDoIife(i - 1);
1299
- } else {
1300
- paramEndToken[0] = 'CALL_END';
1301
- return this;
1302
- }
1303
- }
1304
- }
1305
- return this;
1128
+ // Tag 'do' before function as DO_IIFE
1129
+ tagDoIife(index) {
1130
+ let t = this.tokens[index ?? this.tokens.length - 1];
1131
+ if (t?.[0] === 'DO') t[0] = 'DO_IIFE';
1306
1132
  }
1307
1133
 
1308
- // Tag `do` followed by a function differently than `do` followed by eg an
1309
- // identifier to allow for different grammar precedence
1310
- tagDoIife(tokenIndex) {
1311
- var tok;
1312
- tok = this.tokens[tokenIndex != null ? tokenIndex : this.tokens.length - 1];
1313
- if ((tok != null ? tok[0] : void 0) !== 'DO') {
1314
- return this;
1315
- }
1316
- tok[0] = 'DO_IIFE';
1317
- return this;
1134
+ // ==========================================================================
1135
+ // Rewriter 7 passes
1136
+ // ==========================================================================
1137
+
1138
+ rewrite(tokens) {
1139
+ this.tokens = tokens;
1140
+ this.removeLeadingNewlines();
1141
+ this.closeOpenCalls();
1142
+ this.closeOpenIndexes();
1143
+ this.normalizeLines();
1144
+ this.tagPostfixConditionals();
1145
+ this.addImplicitBracesAndParens();
1146
+ this.addImplicitCallCommas();
1147
+ return this.tokens;
1318
1148
  }
1319
1149
 
1320
- // Close up all remaining open blocks at the end of the file.
1321
- closeIndentation() {
1322
- return this.outdentToken({
1323
- moveOut: this.indent,
1324
- indentSize: 0
1325
- });
1150
+ // --- Rewriter passes ---
1151
+
1152
+ removeLeadingNewlines() {
1153
+ let i = 0;
1154
+ while (this.tokens[i]?.[0] === 'TERMINATOR') i++;
1155
+ if (i > 0) this.tokens.splice(0, i);
1326
1156
  }
1327
1157
 
1328
- // Match the contents of a delimited token and expand variables and expressions
1329
- // inside it using Ruby-like notation for substitution of arbitrary
1330
- // expressions.
1331
- //
1332
- // "Hello #{name.capitalize()}."
1333
- //
1334
- // If it encounters an interpolation, this method will recursively create a new
1335
- // Lexer and tokenize until the `{` of `#{` is balanced with a `}`.
1336
- //
1337
- // - `regex` matches the contents of a token (but not `delimiter`, and not
1338
- // `#{` if interpolations are desired).
1339
- // - `delimiter` is the delimiter of the token. Examples are `'`, `"`, `'''`,
1340
- // `"""` and `///`.
1341
- // - `closingDelimiter` can be customized
1342
- // - `interpolators` matches the start of an interpolation
1343
- //
1344
- // This method allows us to have strings within interpolations within strings,
1345
- // ad infinitum.
1346
- matchWithInterpolations(regex, delimiter, closingDelimiter = delimiter, interpolators = /^[#$]\{/) {
1347
- var braceInterpolator, close, column, index, interpolationOffset, interpolator, line, match, nested, offset, offsetInChunk, open, ref, ref1, rest, str, strPart, tokens;
1348
- tokens = [];
1349
- offsetInChunk = delimiter.length;
1350
- if (this.chunk.slice(0, offsetInChunk) !== delimiter) {
1351
- return null;
1352
- }
1353
- str = this.chunk.slice(offsetInChunk);
1354
- while (true) {
1355
- [strPart] = regex.exec(str);
1356
- this.validateEscapes(strPart, {
1357
- isRegex: delimiter.charAt(0) === '/',
1358
- offsetInChunk
1359
- });
1360
- // Push a fake `'NEOSTRING'` token, which will get turned into a real string later.
1361
- tokens.push(this.makeToken('NEOSTRING', strPart, {
1362
- offset: offsetInChunk
1363
- }));
1364
- str = str.slice(strPart.length);
1365
- offsetInChunk += strPart.length;
1366
- if (!(match = interpolators.exec(str))) {
1367
- break;
1368
- }
1369
- [interpolator] = match;
1370
- // To remove the `#` in `#{`.
1371
- interpolationOffset = interpolator.length - 1;
1372
- [line, column, offset] = this.getLineAndColumnFromChunk(offsetInChunk + interpolationOffset);
1373
- rest = str.slice(interpolationOffset);
1374
- ({
1375
- tokens: nested,
1376
- index
1377
- } = new Lexer().tokenize(rest, {
1378
- line,
1379
- column,
1380
- offset,
1381
- untilBalanced: true,
1382
- locTweaks: this.locTweaks
1383
- }));
1384
- // Account for the `#` in `#{`.
1385
- index += interpolationOffset;
1386
- braceInterpolator = str[index - 1] === '}';
1387
- if (braceInterpolator) {
1388
- // Turn the leading and trailing `{` and `}` into parentheses. Unnecessary
1389
- // parentheses will be removed later.
1390
- [open] = nested, [close] = slice.call(nested, -1);
1391
- open[0] = 'INTERPOLATION_START';
1392
- open[1] = '(';
1393
- open[2].first_column -= interpolationOffset;
1394
- open[2].range = [open[2].range[0] - interpolationOffset, open[2].range[1]];
1395
- close[0] = 'INTERPOLATION_END';
1396
- close[1] = ')';
1397
- close.origin = ['', 'end of interpolation', close[2]];
1158
+ closeOpenCalls() {
1159
+ this.scanTokens((token, i) => {
1160
+ if (token[0] === 'CALL_START') {
1161
+ this.detectEnd(i + 1,
1162
+ t => t[0] === ')' || t[0] === 'CALL_END',
1163
+ t => t[0] = 'CALL_END'
1164
+ );
1398
1165
  }
1399
- if (((ref = nested[1]) != null ? ref[0] : void 0) === 'TERMINATOR') {
1400
- // Remove leading `'TERMINATOR'` (if any).
1401
- nested.splice(1, 1);
1402
- }
1403
- if (((ref1 = nested[nested.length - 3]) != null ? ref1[0] : void 0) === 'INDENT' && nested[nested.length - 2][0] === 'OUTDENT') {
1404
- // Remove trailing `'INDENT'/'OUTDENT'` pair (if any).
1405
- nested.splice(-3, 2);
1406
- }
1407
- if (!braceInterpolator) {
1408
- // We are not using `{` and `}`, so wrap the interpolated tokens instead.
1409
- open = this.makeToken('INTERPOLATION_START', '(', {
1410
- offset: offsetInChunk,
1411
- length: 0,
1412
- generated: true
1413
- });
1414
- close = this.makeToken('INTERPOLATION_END', ')', {
1415
- offset: offsetInChunk + index,
1416
- length: 0,
1417
- generated: true
1418
- });
1419
- nested = [open, ...nested, close];
1420
- }
1421
- // Push a fake `'TOKENS'` token, which will get turned into real tokens later.
1422
- tokens.push(['TOKENS', nested]);
1423
- str = str.slice(index);
1424
- offsetInChunk += index;
1425
- }
1426
- if (str.slice(0, closingDelimiter.length) !== closingDelimiter) {
1427
- this.error(`missing ${closingDelimiter}`, {
1428
- length: delimiter.length
1429
- });
1430
- }
1431
- return {
1432
- tokens,
1433
- index: offsetInChunk + closingDelimiter.length
1434
- };
1166
+ return 1;
1167
+ });
1435
1168
  }
1436
1169
 
1437
- // Merge the array `tokens` of the fake token types `'TOKENS'` and `'NEOSTRING'`
1438
- // (as returned by `matchWithInterpolations`) into the token stream. The value
1439
- // of `'NEOSTRING'`s are converted using `fn` and turned into strings using
1440
- // `options` first.
1441
- mergeInterpolationTokens(tokens, options, fn) {
1442
- var $, converted, double, endOffset, firstIndex, heregex, i, indent, k, l, lastToken, len, len1, locationToken, lparen, placeholderToken, quote, ref, ref1, rparen, tag, token, tokensToPush, val, value;
1443
- ({quote, indent, double, heregex, endOffset} = options);
1444
- if (tokens.length > 1) {
1445
- lparen = this.token('STRING_START', '(', {
1446
- length: (ref = quote != null ? quote.length : void 0) != null ? ref : 0,
1447
- data: {quote},
1448
- generated: !(quote != null ? quote.length : void 0)
1449
- });
1450
- }
1451
- firstIndex = this.tokens.length;
1452
- $ = tokens.length - 1;
1453
- for (i = k = 0, len = tokens.length; k < len; i = ++k) {
1454
- token = tokens[i];
1455
- [tag, value] = token;
1456
- switch (tag) {
1457
- case 'TOKENS':
1458
- // There are comments (and nothing else) in this interpolation.
1459
- if (value.length === 2 && (value[0].comments || value[1].comments)) {
1460
- placeholderToken = this.makeToken('JS', '', {
1461
- generated: true
1462
- });
1463
- // Use the same location data as the first parenthesis.
1464
- placeholderToken[2] = value[0][2];
1465
- for (l = 0, len1 = value.length; l < len1; l++) {
1466
- val = value[l];
1467
- if (!val.comments) {
1468
- continue;
1469
- }
1470
- if (placeholderToken.comments == null) {
1471
- placeholderToken.comments = [];
1472
- }
1473
- placeholderToken.comments.push(...val.comments);
1474
- }
1475
- value.splice(1, 0, placeholderToken);
1476
- }
1477
- // Push all the tokens in the fake `'TOKENS'` token. These already have
1478
- // sane location data.
1479
- locationToken = value[0];
1480
- tokensToPush = value;
1481
- break;
1482
- case 'NEOSTRING':
1483
- // Convert `'NEOSTRING'` into `'STRING'`.
1484
- converted = fn.call(this, token[1], i);
1485
- if (i === 0) {
1486
- addTokenData(token, {
1487
- initialChunk: true
1488
- });
1489
- }
1490
- if (i === $) {
1491
- addTokenData(token, {
1492
- finalChunk: true
1493
- });
1494
- }
1495
- addTokenData(token, {indent, quote, double});
1496
- if (heregex) {
1497
- addTokenData(token, {heregex});
1498
- }
1499
- token[0] = 'STRING';
1500
- token[1] = '"' + converted + '"';
1501
- if (tokens.length === 1 && (quote != null)) {
1502
- token[2].first_column -= quote.length;
1503
- if (token[1].substr(-2, 1) === '\n') {
1504
- token[2].last_line += 1;
1505
- token[2].last_column = quote.length - 1;
1170
+ closeOpenIndexes() {
1171
+ this.scanTokens((token, i) => {
1172
+ if (token[0] === 'INDEX_START') {
1173
+ this.detectEnd(i + 1,
1174
+ t => t[0] === ']' || t[0] === 'INDEX_END',
1175
+ (t, idx) => {
1176
+ if (this.tokens[idx + 1]?.[0] === ':') {
1177
+ token[0] = '[';
1178
+ t[0] = ']';
1506
1179
  } else {
1507
- token[2].last_column += quote.length;
1508
- if (token[1].length === 2) {
1509
- token[2].last_column -= 1;
1510
- }
1180
+ t[0] = 'INDEX_END';
1511
1181
  }
1512
- token[2].last_column_exclusive += quote.length;
1513
- token[2].range = [token[2].range[0] - quote.length, token[2].range[1] + quote.length];
1514
1182
  }
1515
- locationToken = token;
1516
- tokensToPush = [token];
1517
- }
1518
- this.tokens.push(...tokensToPush);
1519
- }
1520
- if (lparen) {
1521
- [lastToken] = slice.call(tokens, -1);
1522
- lparen.origin = [
1523
- 'STRING',
1524
- null,
1525
- {
1526
- first_line: lparen[2].first_line,
1527
- first_column: lparen[2].first_column,
1528
- last_line: lastToken[2].last_line,
1529
- last_column: lastToken[2].last_column,
1530
- last_line_exclusive: lastToken[2].last_line_exclusive,
1531
- last_column_exclusive: lastToken[2].last_column_exclusive,
1532
- range: [lparen[2].range[0],
1533
- lastToken[2].range[1]]
1534
- }
1535
- ];
1536
- if (!(quote != null ? quote.length : void 0)) {
1537
- lparen[2] = lparen.origin[2];
1538
- }
1539
- return rparen = this.token('STRING_END', ')', {
1540
- offset: endOffset - (quote != null ? quote : '').length,
1541
- length: (ref1 = quote != null ? quote.length : void 0) != null ? ref1 : 0,
1542
- generated: !(quote != null ? quote.length : void 0)
1543
- });
1544
- }
1545
- }
1546
-
1547
- // Pairs up a closing token, ensuring that all listed pairs of tokens are
1548
- // correctly balanced throughout the course of the token stream.
1549
- pair(tag) {
1550
- var lastIndent, prev, ref, ref1, wanted;
1551
- ref = this.ends, [prev] = slice.call(ref, -1);
1552
- if (tag !== (wanted = prev != null ? prev.tag : void 0)) {
1553
- if ('OUTDENT' !== wanted) {
1554
- this.error(`unmatched ${tag}`);
1555
- }
1556
- // Auto-close `INDENT` to support syntax like this:
1557
- //
1558
- // el.click((event) ->
1559
- // el.hide())
1560
- //
1561
- ref1 = this.indents, [lastIndent] = slice.call(ref1, -1);
1562
- this.outdentToken({
1563
- moveOut: lastIndent,
1564
- noNewlines: true
1565
- });
1566
- return this.pair(tag);
1567
- }
1568
- return this.ends.pop();
1569
- }
1570
-
1571
- // Helpers
1572
- // -------
1573
-
1574
- // Compensate for the things we strip out initially (e.g. carriage returns)
1575
- // so that location data stays accurate with respect to the original source file.
1576
- getLocationDataCompensation(start, end) {
1577
- var compensation, current, initialEnd, totalCompensation;
1578
- totalCompensation = 0;
1579
- initialEnd = end;
1580
- current = start;
1581
- while (current <= end) {
1582
- if (current === end && start !== initialEnd) {
1583
- break;
1584
- }
1585
- compensation = this.locTweaks[current];
1586
- if (compensation != null) {
1587
- totalCompensation += compensation;
1588
- end += compensation;
1183
+ );
1589
1184
  }
1590
- current++;
1591
- }
1592
- return totalCompensation;
1185
+ return 1;
1186
+ });
1593
1187
  }
1594
1188
 
1595
- // Returns the line and column number from an offset into the current chunk.
1596
- //
1597
- // `offset` is a number of characters into `@chunk`.
1598
- getLineAndColumnFromChunk(offset) {
1599
- var column, columnCompensation, compensation, lastLine, lineCount, previousLinesCompensation, ref, string;
1600
- compensation = this.getLocationDataCompensation(this.chunkOffset, this.chunkOffset + offset);
1601
- if (offset === 0) {
1602
- return [this.chunkLine, this.chunkColumn + compensation, this.chunkOffset + compensation];
1603
- }
1604
- if (offset >= this.chunk.length) {
1605
- string = this.chunk;
1606
- } else {
1607
- string = this.chunk.slice(0, +(offset - 1) + 1 || 9e9);
1608
- }
1609
- lineCount = count(string, '\n');
1610
- column = this.chunkColumn;
1611
- if (lineCount > 0) {
1612
- ref = string.split('\n'), [lastLine] = slice.call(ref, -1);
1613
- column = lastLine.length;
1614
- previousLinesCompensation = this.getLocationDataCompensation(this.chunkOffset, this.chunkOffset + offset - column);
1615
- if (previousLinesCompensation < 0) {
1616
- // Don't recompensate for initially inserted newline.
1617
- previousLinesCompensation = 0;
1618
- }
1619
- columnCompensation = this.getLocationDataCompensation(this.chunkOffset + offset + previousLinesCompensation - column, this.chunkOffset + offset + previousLinesCompensation);
1620
- } else {
1621
- column += string.length;
1622
- columnCompensation = compensation;
1623
- }
1624
- return [this.chunkLine + lineCount, column + columnCompensation, this.chunkOffset + offset + compensation];
1625
- }
1189
+ normalizeLines() {
1190
+ let starter = null;
1191
+ let indent = null;
1192
+ let outdent = null;
1193
+ let bodyStart = null;
1194
+
1195
+ let condition = (token, i) => {
1196
+ return token[1] !== ';' && SINGLE_CLOSERS.has(token[0]) &&
1197
+ !(token[0] === 'TERMINATOR' && EXPRESSION_CLOSE.has(this.tokens[i + 1]?.[0])) &&
1198
+ !(token[0] === 'ELSE' && starter !== 'THEN') ||
1199
+ token[0] === ',' && (starter === '->' || starter === '=>') &&
1200
+ !(bodyStart != null && IMPLICIT_FUNC.has(this.tokens[bodyStart]?.[0]) && this.tokens[bodyStart]?.spaced &&
1201
+ (IMPLICIT_CALL.has(this.tokens[bodyStart + 1]?.[0]) || (this.tokens[bodyStart + 1]?.[0] === '...' && IMPLICIT_CALL.has(this.tokens[bodyStart + 2]?.[0])))) ||
1202
+ CALL_CLOSERS.has(token[0]) && (this.tokens[i - 1]?.newLine || this.tokens[i - 1]?.[0] === 'OUTDENT');
1203
+ };
1626
1204
 
1627
- makeLocationData({offsetInChunk, length}) {
1628
- var endOffset, lastCharacter, locationData;
1629
- locationData = {
1630
- range: []
1205
+ let action = (token, i) => {
1206
+ let idx = this.tokens[i - 1]?.[0] === ',' ? i - 1 : i;
1207
+ this.tokens.splice(idx, 0, outdent);
1631
1208
  };
1632
- [locationData.first_line, locationData.first_column, locationData.range[0]] = this.getLineAndColumnFromChunk(offsetInChunk);
1633
- // Use length - 1 for the final offset - we're supplying the last_line and the last_column,
1634
- // so if last_column == first_column, then we're looking at a character of length 1.
1635
- lastCharacter = length > 0 ? length - 1 : 0;
1636
- [locationData.last_line, locationData.last_column, endOffset] = this.getLineAndColumnFromChunk(offsetInChunk + lastCharacter);
1637
- [locationData.last_line_exclusive, locationData.last_column_exclusive] = this.getLineAndColumnFromChunk(offsetInChunk + lastCharacter + (length > 0 ? 1 : 0));
1638
- locationData.range[1] = length > 0 ? endOffset + 1 : endOffset;
1639
- return locationData;
1640
- }
1641
1209
 
1642
- // Same as `token`, except this just returns the token without adding it
1643
- // to the results.
1644
- makeToken(tag, value, {
1645
- offset: offsetInChunk = 0,
1646
- length = value.length,
1647
- origin,
1648
- generated,
1649
- indentSize
1650
- } = {}) {
1651
- var token;
1652
- token = [tag, value, this.makeLocationData({offsetInChunk, length})];
1653
- if (origin) {
1654
- token.origin = origin;
1655
- }
1656
- if (generated) {
1657
- token.generated = true;
1658
- }
1659
- if (indentSize != null) {
1660
- token.indentSize = indentSize;
1661
- }
1662
- return token;
1663
- }
1210
+ this.scanTokens((token, i, tokens) => {
1211
+ let [tag] = token;
1664
1212
 
1665
- // Add a token to the results.
1666
- // `offset` is the offset into the current `@chunk` where the token starts.
1667
- // `length` is the length of the token in the `@chunk`, after the offset. If
1668
- // not specified, the length of `value` will be used.
1669
- //
1670
- // Returns the new token.
1671
- token(tag, value, {offset, length, origin, data, generated, indentSize} = {}) {
1672
- var token;
1673
- token = this.makeToken(tag, value, {offset, length, origin, generated, indentSize});
1674
- if (data) {
1675
- addTokenData(token, data);
1676
- }
1677
- this.tokens.push(token);
1678
- return token;
1679
- }
1213
+ if (tag === 'TERMINATOR') {
1214
+ if (this.tokens[i + 1]?.[0] === 'ELSE' && this.tokens[i - 1]?.[0] !== 'OUTDENT') {
1215
+ tokens.splice(i, 1, ...this.makeIndentation());
1216
+ return 1;
1217
+ }
1218
+ if (EXPRESSION_CLOSE.has(this.tokens[i + 1]?.[0])) {
1219
+ tokens.splice(i, 1);
1220
+ return 0;
1221
+ }
1222
+ }
1680
1223
 
1681
- // Peek at the last tag in the token stream.
1682
- tag() {
1683
- var ref, token;
1684
- ref = this.tokens, [token] = slice.call(ref, -1);
1685
- return token != null ? token[0] : void 0;
1686
- }
1224
+ if (tag === 'CATCH') {
1225
+ for (let j = 1; j <= 2; j++) {
1226
+ let nextTag = this.tokens[i + j]?.[0];
1227
+ if (nextTag === 'OUTDENT' || nextTag === 'TERMINATOR' || nextTag === 'FINALLY') {
1228
+ tokens.splice(i + j, 0, ...this.makeIndentation());
1229
+ return 2 + j;
1230
+ }
1231
+ }
1232
+ }
1687
1233
 
1688
- // Peek at the last value in the token stream.
1689
- value(useOrigin = false) {
1690
- var ref, token;
1691
- ref = this.tokens, [token] = slice.call(ref, -1);
1692
- if (useOrigin && ((token != null ? token.origin : void 0) != null)) {
1693
- return token.origin[1];
1694
- } else {
1695
- return token != null ? token[1] : void 0;
1696
- }
1697
- }
1234
+ if ((tag === '->' || tag === '=>') && (this.tokens[i + 1]?.[0] === ',' || this.tokens[i + 1]?.[0] === ']')) {
1235
+ [indent, outdent] = this.makeIndentation();
1236
+ tokens.splice(i + 1, 0, indent, outdent);
1237
+ return 1;
1238
+ }
1698
1239
 
1699
- // Get the previous token in the token stream.
1700
- prev() {
1701
- return this.tokens[this.tokens.length - 1];
1702
- }
1240
+ if (SINGLE_LINERS.has(tag) && this.tokens[i + 1]?.[0] !== 'INDENT' &&
1241
+ !(tag === 'ELSE' && this.tokens[i + 1]?.[0] === 'IF')) {
1242
+ starter = tag;
1243
+ bodyStart = i + 2;
1244
+ [indent, outdent] = this.makeIndentation();
1245
+ if (tag === 'THEN') indent.fromThen = true;
1246
+ tokens.splice(i + 1, 0, indent);
1247
+ this.detectEnd(i + 2, condition, action);
1248
+ if (tag === 'THEN') tokens.splice(i, 1);
1249
+ return 1;
1250
+ }
1703
1251
 
1704
- // Are we in the midst of an unfinished expression?
1705
- unfinished() {
1706
- var ref;
1707
- return LINE_CONTINUER.test(this.chunk) || (ref = this.tag(), indexOf.call(UNFINISHED, ref) >= 0);
1252
+ return 1;
1253
+ });
1708
1254
  }
1709
1255
 
1710
- validateUnicodeCodePointEscapes(str, options) {
1711
- return replaceUnicodeCodePointEscapes(str, merge(options, {error: this.error}));
1712
- }
1256
+ tagPostfixConditionals() {
1257
+ let original = null;
1713
1258
 
1714
- // Validates escapes in strings and regexes.
1715
- validateEscapes(str, options = {}) {
1716
- var before, hex, invalidEscape, invalidEscapeRegex, match, message, octal, ref, unicode, unicodeCodePoint;
1717
- invalidEscapeRegex = options.isRegex ? REGEX_INVALID_ESCAPE : STRING_INVALID_ESCAPE;
1718
- match = invalidEscapeRegex.exec(str);
1719
- if (!match) {
1720
- return;
1721
- }
1722
- match[0], before = match[1], octal = match[2], hex = match[3], unicodeCodePoint = match[4], unicode = match[5];
1723
- message = octal ? "octal escape sequences are not allowed" : "invalid escape sequence";
1724
- invalidEscape = `\\${octal || hex || unicodeCodePoint || unicode}`;
1725
- return this.error(`${message} ${invalidEscape}`, {
1726
- offset: ((ref = options.offsetInChunk) != null ? ref : 0) + match.index + before.length,
1727
- length: invalidEscape.length
1728
- });
1729
- }
1259
+ let condition = (token, i) => {
1260
+ return token[0] === 'TERMINATOR' ||
1261
+ (token[0] === 'INDENT' && !SINGLE_LINERS.has(this.tokens[i - 1]?.[0]));
1262
+ };
1730
1263
 
1731
- suppressSemicolons() {
1732
- var ref, ref1, results;
1733
- results = [];
1734
- while (this.value() === ';') {
1735
- this.tokens.pop();
1736
- if (ref = (ref1 = this.prev()) != null ? ref1[0] : void 0, indexOf.call(['=', ...UNFINISHED], ref) >= 0) {
1737
- results.push(this.error('unexpected ;'));
1738
- } else {
1739
- results.push(void 0);
1264
+ let action = (token) => {
1265
+ if (token[0] !== 'INDENT' || (token.generated && !token.fromThen)) {
1266
+ original[0] = 'POST_' + original[0];
1740
1267
  }
1741
- }
1742
- return results;
1743
- }
1268
+ };
1744
1269
 
1745
- error(message, options = {}) {
1746
- var first_column, first_line, location, ref, ref1;
1747
- location = 'first_line' in options ? options : ([first_line, first_column] = this.getLineAndColumnFromChunk((ref = options.offset) != null ? ref : 0), {
1748
- first_line,
1749
- first_column,
1750
- last_column: first_column + ((ref1 = options.length) != null ? ref1 : 1) - 1
1270
+ this.scanTokens((token, i) => {
1271
+ if (token[0] !== 'IF' && token[0] !== 'UNLESS') return 1;
1272
+ original = token;
1273
+ this.detectEnd(i + 1, condition, action);
1274
+ return 1;
1751
1275
  });
1752
- return throwSyntaxError(message, location);
1753
1276
  }
1754
1277
 
1755
- };
1756
-
1757
- // Helper functions
1758
- // ----------------
1759
- export var isUnassignable = function(name, displayName = name) {
1760
- switch (false) {
1761
- case indexOf.call([...JS_KEYWORDS, ...RIP_KEYWORDS], name) < 0:
1762
- return `keyword '${displayName}' can't be assigned`;
1763
- case indexOf.call(STRICT_PROSCRIBED, name) < 0:
1764
- return `'${displayName}' can't be assigned`;
1765
- case indexOf.call(RESERVED, name) < 0:
1766
- return `reserved word '${displayName}' can't be assigned`;
1767
- default:
1768
- return false;
1769
- }
1770
- };
1278
+ addImplicitBracesAndParens() {
1279
+ let stack = [];
1280
+ let inTernary = false;
1281
+
1282
+ this.scanTokens((token, i, tokens) => {
1283
+ let [tag] = token;
1284
+ let prevToken = tokens[i - 1] || [];
1285
+ let nextToken = tokens[i + 1] || [];
1286
+ let [prevTag] = prevToken;
1287
+ let [nextTag] = nextToken;
1288
+ let startIdx = i;
1289
+
1290
+ let forward = (n) => i - startIdx + n;
1291
+ let stackTop = () => stack[stack.length - 1];
1292
+ let isImplicit = (s) => s?.[2]?.ours;
1293
+ let inImplicitCall = () => isImplicit(stackTop()) && stackTop()?.[0] === '(';
1294
+ let inImplicitObject = () => isImplicit(stackTop()) && stackTop()?.[0] === '{';
1295
+
1296
+ let startImplicitCall = (idx) => {
1297
+ stack.push(['(', idx, {ours: true}]);
1298
+ tokens.splice(idx, 0, gen('CALL_START', '('));
1299
+ };
1771
1300
 
1772
- // `from` isn't a Rip keyword, but it behaves like one in `import` and
1773
- // `export` statements (handled above) and in the declaration line of a `for`
1774
- // loop. Try to detect when `from` is a variable identifier and when it is this
1775
- // "sometimes" keyword.
1776
- isForFrom = function(prev) {
1777
- var ref;
1778
- // `for i from iterable`
1779
- if (prev[0] === 'IDENTIFIER') {
1780
- return true;
1781
- // `for from…`
1782
- } else if (prev[0] === 'FOR') {
1783
- return false;
1784
- // `for {from}…`, `for [from]…`, `for {a, from}…`, `for {a: from}…`
1785
- } else if ((ref = prev[1]) === '{' || ref === '[' || ref === ',' || ref === ':') {
1786
- return false;
1787
- } else {
1788
- return true;
1789
- }
1790
- };
1301
+ let endImplicitCall = () => {
1302
+ stack.pop();
1303
+ tokens.splice(i, 0, gen('CALL_END', ')'));
1304
+ i += 1;
1305
+ };
1791
1306
 
1792
- addTokenData = function(token, data) {
1793
- return Object.assign((token.data != null ? token.data : token.data = {}), data);
1794
- };
1307
+ let startImplicitObject = (idx, opts = {}) => {
1308
+ stack.push(['{', idx, {sameLine: true, startsLine: opts.startsLine ?? true, ours: true}]);
1309
+ let t = gen('{', '{');
1310
+ if (!t.data) t.data = {};
1311
+ t.data.generated = true;
1312
+ tokens.splice(idx, 0, t);
1313
+ };
1795
1314
 
1796
- // Constants
1797
- // ---------
1798
-
1799
- // Keywords that Rip shares in common with JavaScript.
1800
- JS_KEYWORDS = ['true', 'false', 'null', 'this', 'new', 'delete', 'typeof', 'in', 'instanceof', 'return', 'throw', 'break', 'continue', 'debugger', 'yield', 'await', 'if', 'else', 'switch', 'for', 'while', 'do', 'try', 'catch', 'finally', 'class', 'extends', 'super', 'import', 'export', 'default'];
1801
-
1802
- // Rip-only keywords.
1803
- RIP_KEYWORDS = ['undefined', 'Infinity', 'NaN', 'then', 'unless', 'until', 'loop', 'of', 'by', 'when', 'def'];
1804
-
1805
- RIP_ALIAS_MAP = {
1806
- and: '&&',
1807
- or: '||',
1808
- is: '==', // Lexer maps to ==, codegen converts to === (strict)
1809
- isnt: '!=', // Lexer maps to !=, codegen converts to !== (strict)
1810
- not: '!',
1811
- yes: 'true',
1812
- no: 'false',
1813
- on: 'true',
1814
- off: 'false'
1815
- };
1315
+ let endImplicitObject = (j) => {
1316
+ j = j ?? i;
1317
+ stack.pop();
1318
+ tokens.splice(j, 0, gen('}', '}'));
1319
+ i += 1;
1320
+ };
1816
1321
 
1817
- RIP_ALIASES = (function() {
1818
- var results;
1819
- results = [];
1820
- for (key in RIP_ALIAS_MAP) {
1821
- results.push(key);
1822
- }
1823
- return results;
1824
- })();
1322
+ // Don't end implicit on INDENT for control flow inside implicit
1323
+ if ((inImplicitCall() || inImplicitObject()) && CONTROL_IN_IMPLICIT.has(tag)) {
1324
+ stack.push(['CONTROL', i, {ours: true}]);
1325
+ return forward(1);
1326
+ }
1825
1327
 
1826
- RIP_KEYWORDS = RIP_KEYWORDS.concat(RIP_ALIASES);
1328
+ // INDENT closes implicit call (usually)
1329
+ if (tag === 'INDENT' && isImplicit(stackTop())) {
1330
+ if (prevTag !== '=>' && prevTag !== '->' && prevTag !== '[' && prevTag !== '(' && prevTag !== ',' && prevTag !== '{' && prevTag !== 'ELSE' && prevTag !== '=') {
1331
+ while (inImplicitCall() || (inImplicitObject() && prevTag !== ':')) {
1332
+ if (inImplicitCall()) endImplicitCall();
1333
+ else endImplicitObject();
1334
+ }
1335
+ }
1336
+ if (stackTop()?.[2]?.ours && stackTop()[0] === 'CONTROL') stack.pop();
1337
+ stack.push([tag, i]);
1338
+ return forward(1);
1339
+ }
1827
1340
 
1828
- // The list of keywords that are reserved by JavaScript, but not used, or are
1829
- // used by Rip internally. We throw an error when these are encountered,
1830
- // to avoid having a JavaScript error at runtime.
1831
- RESERVED = ['case', 'function', 'var', 'void', 'with', 'const', 'let', 'enum', 'native', 'implements', 'interface', 'package', 'private', 'protected', 'public', 'static'];
1341
+ // Explicit expression start
1342
+ if (EXPRESSION_START.has(tag)) {
1343
+ stack.push([tag, i]);
1344
+ return forward(1);
1345
+ }
1832
1346
 
1833
- STRICT_PROSCRIBED = ['arguments', 'eval'];
1347
+ // Explicit expression end — close all implicit inside
1348
+ if (EXPRESSION_END.has(tag)) {
1349
+ while (isImplicit(stackTop())) {
1350
+ if (inImplicitCall()) endImplicitCall();
1351
+ else if (inImplicitObject()) endImplicitObject();
1352
+ else stack.pop();
1353
+ }
1354
+ stack.pop();
1355
+ }
1834
1356
 
1835
- // The superset of both JavaScript keywords and reserved words, none of which may
1836
- // be used as identifiers or properties.
1837
- export var JS_FORBIDDEN = JS_KEYWORDS.concat(RESERVED).concat(STRICT_PROSCRIBED);
1357
+ // Detect implicit function calls
1358
+ if (IMPLICIT_FUNC.has(tag) && token.spaced &&
1359
+ (IMPLICIT_CALL.has(nextTag) || (nextTag === '...' && IMPLICIT_CALL.has(tokens[i + 2]?.[0])) ||
1360
+ (IMPLICIT_UNSPACED_CALL.has(nextTag) && !nextToken.spaced && !nextToken.newLine)) &&
1361
+ !((tag === ']' || tag === '}') && (nextTag === '->' || nextTag === '=>'))) {
1362
+ startImplicitCall(i + 1);
1363
+ return forward(2);
1364
+ }
1838
1365
 
1839
- // The character code of the nasty Microsoft madness otherwise known as the BOM.
1840
- BOM = 65279;
1366
+ // Detect implicit function call with indented object
1367
+ if (IMPLICIT_FUNC.has(tag) && this.tokens[i + 1]?.[0] === 'INDENT' &&
1368
+ this.looksObjectish(i + 2) &&
1369
+ !this.findTagsBackwards(i, ['CLASS', 'EXTENDS', 'IF', 'CATCH', 'SWITCH', 'LEADING_WHEN', 'FOR', 'WHILE', 'UNTIL'])) {
1370
+ startImplicitCall(i + 1);
1371
+ stack.push(['INDENT', i + 2]);
1372
+ return forward(3);
1373
+ }
1841
1374
 
1842
- // Token matching regexes.
1843
- IDENTIFIER = /^(?!\d)((?:(?!\s)[$\w\x7f-\uffff])+!?)([^\n\S]*:(?![=:]))?/; // rip: allow optional trailing ! for async calls; exclude := and ::
1844
- // Is this a property name?
1375
+ // Track ternary
1376
+ if (tag === 'SPACE?') inTernary = true;
1845
1377
 
1846
- NUMBER = /^0b[01](?:_?[01])*n?|^0o[0-7](?:_?[0-7])*n?|^0x[\da-f](?:_?[\da-f])*n?|^\d+(?:_\d+)*n|^(?:\d+(?:_\d+)*)?\.?\d+(?:_\d+)*(?:e[+-]?\d+(?:_\d+)*)?/i; // binary
1847
- // octal
1848
- // hex
1849
- // decimal bigint
1850
- // decimal
1851
- // decimal without support for numeric literal separators for reference:
1852
- // \d*\.?\d+ (?:e[+-]?\d+)?
1378
+ // Implicit objects start at ':'
1379
+ if (tag === ':') {
1380
+ if (inTernary) {
1381
+ inTernary = false;
1382
+ return forward(1);
1383
+ }
1853
1384
 
1854
- OPERATOR = /^(?:<=>|[-=]>|~>|~=|:=|=!|===|!==|!\?|\?\?|=~|[-+*\/%<>&|^!?=]=|>>>=?|([-+:])\1|([&|<>*\/%])\2=?|\?(\.|::)|\.{2,3})/; // function
1855
- // := is reactive state assignment
1856
- // ~= is computed assign (reactive computed values)
1857
- // =! is readonly assign (reactive constant)
1858
- // Added === and !== for explicit strict equality (compiles same as == and !=)
1859
- // !? (otherwise operator) must come before ?? and before !=
1860
- // ?? must come before single ? to match correctly
1861
- // regex match operator
1862
- // compound assign / compare / strict equality
1863
- // zero-fill right shift
1864
- // doubles
1865
- // logic / shift / power / floor division / modulo
1866
- // soak access
1867
- // range or splat
1385
+ // Find the start of this key
1386
+ let s = EXPRESSION_END.has(this.tokens[i - 1]?.[0]) ? stack[stack.length - 1]?.[1] ?? i - 1 : i - 1;
1387
+ if (this.tokens[i - 2]?.[0] === '@') s = i - 2;
1868
1388
 
1869
- WHITESPACE = /^[^\n\S]+/;
1389
+ let startsLine = s <= 0 || LINE_BREAK.has(this.tokens[s - 1]?.[0]) || this.tokens[s - 1]?.newLine;
1870
1390
 
1871
- COMMENT = /^(\s*)###([^#][\s\S]*?)(?:###([^\n\S]*)|###$)|^((?:\s*#(?!##[^#]).*)+)/;
1391
+ // Check if we're continuing an existing object
1392
+ if (stackTop()) {
1393
+ let [stackTag, stackIdx] = stackTop();
1394
+ let stackNext = stack[stack.length - 2];
1395
+ if ((stackTag === '{' || (stackTag === 'INDENT' && stackNext?.[0] === '{' && !isImplicit(stackNext))) &&
1396
+ (startsLine || this.tokens[s - 1]?.[0] === ',' || this.tokens[s - 1]?.[0] === '{' || this.tokens[s]?.[0] === '{')) {
1397
+ return forward(1);
1398
+ }
1399
+ }
1872
1400
 
1873
- CODE = /^[-=]>/;
1401
+ startImplicitObject(s, {startsLine: !!startsLine});
1402
+ return forward(2);
1403
+ }
1874
1404
 
1875
- MULTI_DENT = /^(?:\n[^\n\S]*)+/;
1405
+ // Mark implicit objects as not sameLine on newlines
1406
+ if (LINE_BREAK.has(tag)) {
1407
+ for (let k = stack.length - 1; k >= 0; k--) {
1408
+ if (!isImplicit(stack[k])) break;
1409
+ if (stack[k][0] === '{') stack[k][2].sameLine = false;
1410
+ }
1411
+ }
1876
1412
 
1877
- JSTOKEN = /^`(?!``)((?:[^`\\]|\\[\s\S])*)`/;
1413
+ // End implicit calls/objects
1414
+ let newLine = prevTag === 'OUTDENT' || prevToken.newLine;
1415
+ let isLogicalOp = tag === '||' || tag === '&&';
1416
+ let logicalKeep = false;
1417
+ if (isLogicalOp) {
1418
+ // Don't close implicit call when more comma-separated args follow
1419
+ let j = i + 1, t = tokens[j]?.[0];
1420
+ if (t === '(' || t === '[' || t === '{') {
1421
+ for (let d = 1; ++j < tokens.length && d > 0;) {
1422
+ t = tokens[j][0];
1423
+ if (t === '(' || t === '[' || t === '{') d++;
1424
+ else if (t === ')' || t === ']' || t === '}') d--;
1425
+ }
1426
+ } else if (t && t !== 'TERMINATOR' && t !== 'OUTDENT' && t !== ',') j++;
1427
+ logicalKeep = tokens[j]?.[0] === ',';
1428
+ }
1429
+ if ((IMPLICIT_END.has(tag) && !logicalKeep) || (CALL_CLOSERS.has(tag) && newLine)) {
1430
+ while (isImplicit(stackTop())) {
1431
+ let [stackTag, , {sameLine, startsLine}] = stackTop();
1432
+ if (inImplicitCall() && prevTag !== ',') {
1433
+ endImplicitCall();
1434
+ } else if (inImplicitObject() && !isLogicalOp && sameLine && tag !== 'TERMINATOR' && prevTag !== ':') {
1435
+ endImplicitObject();
1436
+ } else if (inImplicitObject() && tag === 'TERMINATOR' && prevTag !== ',' && !(startsLine && this.looksObjectish(i + 1))) {
1437
+ endImplicitObject();
1438
+ } else if (stackTop()?.[2]?.ours && stackTop()[0] === 'CONTROL' && tokens[stackTop()[1]]?.[0] === 'CLASS' && tag === 'TERMINATOR') {
1439
+ stack.pop();
1440
+ } else {
1441
+ break;
1442
+ }
1443
+ }
1444
+ }
1878
1445
 
1879
- HERE_JSTOKEN = /^```((?:[^`\\]|\\[\s\S]|`(?!``))*)```/;
1446
+ // Close implicit object on comma when next doesn't look objectish
1447
+ if (tag === ',' && !this.looksObjectish(i + 1) && inImplicitObject() &&
1448
+ (nextTag !== 'TERMINATOR' || !this.looksObjectish(i + 2))) {
1449
+ let offset = nextTag === 'OUTDENT' ? 1 : 0;
1450
+ while (inImplicitObject()) endImplicitObject(i + offset);
1451
+ }
1880
1452
 
1881
- // String-matching-regexes.
1882
- STRING_START = /^(?:'''|"""|'|")/;
1453
+ return forward(1);
1454
+ });
1455
+ }
1883
1456
 
1884
- STRING_SINGLE = /^(?:[^\\']|\\[\s\S])*/;
1457
+ // Insert commas before arrows inside implicit calls: fn "arg" -> body
1458
+ addImplicitCallCommas() {
1459
+ let callDepth = 0;
1460
+ let i = 0;
1461
+ let tokens = this.tokens;
1885
1462
 
1886
- STRING_DOUBLE = /^(?:[^\\"#$]|\\[\s\S]|\#(?!\{)|\$(?!\{))*/;
1463
+ while (i < tokens.length) {
1464
+ let tag = tokens[i][0];
1465
+ let prevTag = i > 0 ? tokens[i - 1][0] : null;
1887
1466
 
1888
- HEREDOC_SINGLE = /^(?:[^\\']|\\[\s\S]|'(?!''))*/;
1467
+ if (tag === 'CALL_START' || tag === '(') callDepth++;
1468
+ if (tag === 'CALL_END' || tag === ')') callDepth--;
1889
1469
 
1890
- HEREDOC_DOUBLE = /^(?:[^\\"#$]|\\[\s\S]|"(?!"")|\#(?!\{)|\$(?!\{))*/;
1891
-
1892
- HEREDOC_INDENT = /\n+([^\n\S]*)(?=\S)/g;
1893
-
1894
- // Regex-matching-regexes.
1895
- REGEX = /^\/(?!\/)((?:[^[\/\n\\]|\\[^\n]|\[(?:\\[^\n]|[^\]\n\\])*\])*)(\/)?/; // Every other thing.
1896
- // Anything but newlines escaped.
1897
- // Character class.
1898
-
1899
- REGEX_FLAGS = /^\w*/;
1900
-
1901
- VALID_FLAGS = /^(?!.*(.).*\1)[gimsuy]*$/;
1902
-
1903
- HEREGEX = /^(?:[^\\\/#\s]|\\[\s\S]|\/(?!\/\/)|\#(?!\{)|\s+(?:#(?!\{).*)?)*/; // Match any character, except those that need special handling below.
1904
- // Match `\` followed by any character.
1905
- // Match any `/` except `///`.
1906
- // Match `#` which is not part of interpolation, e.g. `#{}`.
1907
- // Comments consume everything until the end of the line, including `///`.
1908
-
1909
- HEREGEX_COMMENT = /(\s+)(#(?!{).*)/gm;
1910
-
1911
- REGEX_ILLEGAL = /^(\/|\/{3}\s*)(\*)/;
1912
-
1913
- POSSIBLY_DIVISION = /^\/=?\s/;
1914
-
1915
- // Other regexes.
1916
- HERECOMMENT_ILLEGAL = /\*\//;
1917
-
1918
- LINE_CONTINUER = /^\s*(?:,|\??\.(?![.\d])|\??::)/;
1919
-
1920
- STRING_INVALID_ESCAPE = /((?:^|[^\\])(?:\\\\)*)\\(?:(0\d|[1-7])|(x(?![\da-fA-F]{2}).{0,2})|(u\{(?![\da-fA-F]{1,}\})[^}]*\}?)|(u(?!\{|[\da-fA-F]{4}).{0,4}))/; // Make sure the escape isn't escaped.
1921
- // octal escape
1922
- // hex escape
1923
- // unicode code point escape
1924
- // unicode escape
1925
-
1926
- REGEX_INVALID_ESCAPE = /((?:^|[^\\])(?:\\\\)*)\\(?:(0\d)|(x(?![\da-fA-F]{2}).{0,2})|(u\{(?![\da-fA-F]{1,}\})[^}]*\}?)|(u(?!\{|[\da-fA-F]{4}).{0,4}))/; // Make sure the escape isn't escaped.
1927
- // octal escape
1928
- // hex escape
1929
- // unicode code point escape
1930
- // unicode escape
1931
-
1932
- TRAILING_SPACES = /\s+$/;
1933
-
1934
- // Compound assignment tokens.
1935
- COMPOUND_ASSIGN = ['-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '??=', '<<=', '>>=', '>>>=', '&=', '^=', '|=', '**=', '//=', '%%='];
1936
-
1937
- // Unary tokens.
1938
- UNARY = ['NEW', 'TYPEOF', 'DELETE'];
1939
-
1940
- UNARY_MATH = ['!', '~'];
1941
-
1942
- // Bit-shifting tokens.
1943
- SHIFT = ['<<', '>>', '>>>'];
1944
-
1945
- // Comparison tokens.
1946
- COMPARE = ['==', '!=', '===', '!==', '<', '>', '<=', '>=', '=~'];
1947
-
1948
- // Mathematical tokens.
1949
- MATH = ['*', '/', '%', '//', '%%'];
1950
-
1951
- // Relational tokens that are negatable with `not` prefix.
1952
- RELATION = ['IN', 'OF', 'INSTANCEOF'];
1953
-
1954
- // Boolean tokens.
1955
- BOOL = ['TRUE', 'FALSE'];
1956
-
1957
- // Tokens which could legitimately be invoked or indexed. An opening
1958
- // parentheses or bracket following these tokens will be recorded as the start
1959
- // of a function invocation or indexing operation.
1960
- CALLABLE = ['IDENTIFIER', 'PROPERTY', ')', ']', '?', '@', 'THIS', 'SUPER', 'DYNAMIC_IMPORT', '?.'];
1961
-
1962
- INDEXABLE = CALLABLE.concat(['NUMBER', 'INFINITY', 'NAN', 'STRING', 'STRING_END', 'REGEX', 'REGEX_END', 'BOOL', 'NULL', 'UNDEFINED', '}', '::', '?.']);
1963
-
1964
- // Tokens which can be the left-hand side of a less-than comparison, i.e. `a<b`.
1965
- COMPARABLE_LEFT_SIDE = ['IDENTIFIER', ')', ']', 'NUMBER'];
1966
-
1967
- // Tokens which a regular expression will never immediately follow (except spaced
1968
- // CALLABLEs in some cases), but which a division operator can.
1969
- //
1970
- // See: http://www-archive.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions
1971
- NOT_REGEX = INDEXABLE.concat(['++', '--']);
1972
-
1973
- // Tokens that, when immediately preceding a `WHEN`, indicate that the `WHEN`
1974
- // occurs at the start of a line. We disambiguate these from trailing whens to
1975
- // avoid an ambiguity in the grammar.
1976
- LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'];
1977
-
1978
- // Additional indent in front of these is ignored.
1979
- INDENTABLE_CLOSERS = [')', '}', ']'];
1980
-
1981
- // ==============================================================================
1982
- // Rewriter
1983
- // ==============================================================================
1984
-
1985
- // The Rip language has a good deal of optional syntax, implicit syntax,
1986
- // and shorthand syntax. This can greatly complicate a grammar and bloat
1987
- // the resulting parse table. Instead of making the parser handle it all, we take
1988
- // a series of passes over the token stream, using this **Rewriter** to convert
1989
- // shorthand into the unambiguous long form, add implicit indentation and
1990
- // parentheses, and generally clean things up.
1991
-
1992
- // Move attached comments from one token to another.
1993
- moveComments = function(fromToken, toToken) {
1994
- var comment, k, len, ref, unshiftedComments;
1995
- if (!fromToken.comments) {
1996
- return;
1997
- }
1998
- if (toToken.comments && toToken.comments.length !== 0) {
1999
- unshiftedComments = [];
2000
- ref = fromToken.comments;
2001
- for (k = 0, len = ref.length; k < len; k++) {
2002
- comment = ref[k];
2003
- if (comment.unshift) {
2004
- unshiftedComments.push(comment);
2005
- } else {
2006
- toToken.comments.push(comment);
1470
+ if (callDepth > 0 && (tag === '->' || tag === '=>') && IMPLICIT_COMMA_BEFORE_ARROW.has(prevTag)) {
1471
+ tokens.splice(i, 0, gen(',', ','));
1472
+ i++;
2007
1473
  }
1474
+ i++;
2008
1475
  }
2009
- toToken.comments = unshiftedComments.concat(toToken.comments);
2010
- } else {
2011
- toToken.comments = fromToken.comments;
2012
1476
  }
2013
- return delete fromToken.comments;
2014
- };
2015
-
2016
- // Create a generated token: one that exists due to a use of implicit syntax.
2017
- // Optionally have this new token take the attached comments from another token.
2018
- generate = function(tag, value, origin, commentsToken) {
2019
- var token;
2020
- token = [tag, value];
2021
- token.generated = true;
2022
- if (origin) {
2023
- token.origin = origin;
2024
- }
2025
- if (commentsToken) {
2026
- moveComments(commentsToken, token);
2027
- }
2028
- return token;
2029
- };
2030
-
2031
- Rewriter = (function() {
2032
- // The **Rewriter** class is used by the [Lexer](lexer.html), directly against
2033
- // its internal array of tokens.
2034
- class Rewriter {
2035
- // Rewrite the token stream in multiple passes, one logical filter at
2036
- // a time. This could certainly be changed into a single pass through the
2037
- // stream, with a big ol' efficient switch, but it's much nicer to work with
2038
- // like this. The order of these passes matters—indentation must be
2039
- // corrected before implicit parentheses can be wrapped around blocks of code.
2040
- rewrite(tokens1) {
2041
- var ref, ref1, t;
2042
- this.tokens = tokens1;
2043
- // Set environment variable `DEBUG_TOKEN_STREAM` to `true` to output token
2044
- // debugging info. Also set `DEBUG_REWRITTEN_TOKEN_STREAM` to `true` to
2045
- // output the token stream after it has been rewritten by this file.
2046
- if (typeof process !== "undefined" && process !== null ? (ref = process.env) != null ? ref.DEBUG_TOKEN_STREAM : void 0 : void 0) {
2047
- if (process.env.DEBUG_REWRITTEN_TOKEN_STREAM) {
2048
- console.log('Initial token stream:');
2049
- }
2050
- console.log(((function() {
2051
- var k, len, ref1, results;
2052
- ref1 = this.tokens;
2053
- results = [];
2054
- for (k = 0, len = ref1.length; k < len; k++) {
2055
- t = ref1[k];
2056
- results.push(t[0] + '/' + t[1] + (t.comments ? '*' : ''));
2057
- }
2058
- return results;
2059
- }).call(this)).join(' '));
2060
- }
2061
- this.removeLeadingNewlines();
2062
- this.closeOpenCalls();
2063
- this.closeOpenIndexes();
2064
- this.normalizeLines();
2065
- this.convertPostfixSpreadRest();
2066
- this.tagPostfixConditionals();
2067
- this.addImplicitBracesAndParens();
2068
- this.addImplicitCallCommas();
2069
- this.rescueStowawayComments();
2070
- this.addLocationDataToGeneratedTokens();
2071
- this.fixIndentationLocationData();
2072
- this.exposeTokenDataToGrammar();
2073
- if (typeof process !== "undefined" && process !== null ? (ref1 = process.env) != null ? ref1.DEBUG_REWRITTEN_TOKEN_STREAM : void 0 : void 0) {
2074
- if (process.env.DEBUG_TOKEN_STREAM) {
2075
- console.log('Rewritten token stream:');
2076
- }
2077
- console.log(((function() {
2078
- var k, len, ref2, results;
2079
- ref2 = this.tokens;
2080
- results = [];
2081
- for (k = 0, len = ref2.length; k < len; k++) {
2082
- t = ref2[k];
2083
- results.push(t[0] + '/' + t[1] + (t.comments ? '*' : ''));
2084
- }
2085
- return results;
2086
- }).call(this)).join(' '));
2087
- }
2088
- return this.tokens;
2089
- }
2090
-
2091
- // Rewrite the token stream, looking one token ahead and behind.
2092
- // Allow the return value of the block to tell us how many tokens to move
2093
- // forwards (or backwards) in the stream, to make sure we don't miss anything
2094
- // as tokens are inserted and removed, and the stream changes length under
2095
- // our feet.
2096
- scanTokens(block) {
2097
- var i, token, tokens;
2098
- ({tokens} = this);
2099
- i = 0;
2100
- while (token = tokens[i]) {
2101
- i += block.call(this, token, i, tokens);
2102
- }
2103
- return true;
2104
- }
2105
-
2106
- detectEnd(i, condition, action, opts = {}) {
2107
- var levels, ref, ref1, token, tokens;
2108
- ({tokens} = this);
2109
- levels = 0;
2110
- while (token = tokens[i]) {
2111
- if (levels === 0 && condition.call(this, token, i)) {
2112
- return action.call(this, token, i);
2113
- }
2114
- if (ref = token[0], indexOf.call(EXPRESSION_START, ref) >= 0) {
2115
- levels += 1;
2116
- } else if (ref1 = token[0], indexOf.call(EXPRESSION_END, ref1) >= 0) {
2117
- levels -= 1;
2118
- }
2119
- if (levels < 0) {
2120
- if (opts.returnOnNegativeLevel) {
2121
- return;
2122
- }
2123
- return action.call(this, token, i);
2124
- }
2125
- i += 1;
2126
- }
2127
- return i - 1;
2128
- }
2129
-
2130
- // Leading newlines would introduce an ambiguity in the grammar, so we
2131
- // dispatch them here.
2132
- removeLeadingNewlines() {
2133
- var i, k, l, leadingNewlineToken, len, len1, ref, ref1, tag;
2134
- ref = this.tokens;
2135
- for (i = k = 0, len = ref.length; k < len; i = ++k) {
2136
- [tag] = ref[i];
2137
- if (tag !== 'TERMINATOR') {
2138
- // Find the index of the first non-`TERMINATOR` token.
2139
- break;
2140
- }
2141
- }
2142
- if (i === 0) {
2143
- return;
2144
- }
2145
- ref1 = this.tokens.slice(0, i);
2146
- // If there are any comments attached to the tokens we're about to discard,
2147
- // shift them forward to what will become the new first token.
2148
- for (l = 0, len1 = ref1.length; l < len1; l++) {
2149
- leadingNewlineToken = ref1[l];
2150
- moveComments(leadingNewlineToken, this.tokens[i]);
2151
- }
2152
- // Discard all the leading newline tokens.
2153
- return this.tokens.splice(0, i);
2154
- }
2155
-
2156
- // The lexer has tagged the opening parenthesis of a method call. Match it with
2157
- // its paired close.
2158
- closeOpenCalls() {
2159
- var action, condition;
2160
- condition = function(token, i) {
2161
- var ref;
2162
- return (ref = token[0]) === ')' || ref === 'CALL_END';
2163
- };
2164
- action = function(token, i) {
2165
- return token[0] = 'CALL_END';
2166
- };
2167
- return this.scanTokens(function(token, i) {
2168
- if (token[0] === 'CALL_START') {
2169
- this.detectEnd(i + 1, condition, action);
2170
- }
2171
- return 1;
2172
- });
2173
- }
2174
-
2175
- // The lexer has tagged the opening bracket of an indexing operation call.
2176
- // Match it with its paired close.
2177
- closeOpenIndexes() {
2178
- var action, condition, startToken;
2179
- startToken = null;
2180
- condition = function(token, i) {
2181
- var ref;
2182
- return (ref = token[0]) === ']' || ref === 'INDEX_END';
2183
- };
2184
- action = function(token, i) {
2185
- if (this.tokens.length >= i && this.tokens[i + 1][0] === ':') {
2186
- startToken[0] = '[';
2187
- return token[0] = ']';
2188
- } else {
2189
- return token[0] = 'INDEX_END';
2190
- }
2191
- };
2192
- return this.scanTokens(function(token, i) {
2193
- if (token[0] === 'INDEX_START') {
2194
- startToken = token;
2195
- this.detectEnd(i + 1, condition, action);
2196
- }
2197
- return 1;
2198
- });
2199
- }
2200
1477
 
2201
- // Match tags in token stream starting at `i` with `pattern`.
2202
- // `pattern` may consist of strings (equality), an array of strings (one of)
2203
- // or null (wildcard). Returns the index of the match or -1 if no match.
2204
- indexOfTag(i, ...pattern) {
2205
- var fuzz, j, k, ref, ref1;
2206
- fuzz = 0;
2207
- for (j = k = 0, ref = pattern.length; (0 <= ref ? k < ref : k > ref); j = 0 <= ref ? ++k : --k) {
2208
- if (pattern[j] == null) {
2209
- continue;
2210
- }
2211
- if (typeof pattern[j] === 'string') {
2212
- pattern[j] = [pattern[j]];
2213
- }
2214
- if (ref1 = this.tag(i + j + fuzz), indexOf.call(pattern[j], ref1) < 0) {
2215
- return -1;
2216
- }
2217
- }
2218
- return i + j + fuzz - 1;
2219
- }
1478
+ // --- Rewriter helpers ---
2220
1479
 
2221
- // Returns `yes` if standing in front of something looking like
2222
- // `@<x>:`, `<x>:` or `<EXPRESSION_START><x>...<EXPRESSION_END>:`.
2223
- looksObjectish(j) {
2224
- var end, index;
2225
- if (this.indexOfTag(j, '@', null, ':') !== -1 || this.indexOfTag(j, null, ':') !== -1) {
2226
- return true;
2227
- }
2228
- index = this.indexOfTag(j, EXPRESSION_START);
2229
- if (index !== -1) {
2230
- end = null;
2231
- this.detectEnd(index + 1, (function(token) {
2232
- var ref;
2233
- return ref = token[0], indexOf.call(EXPRESSION_END, ref) >= 0;
2234
- }), (function(token, i) {
2235
- return end = i;
2236
- }));
2237
- if (this.tag(end + 1) === ':') {
2238
- return true;
2239
- }
2240
- }
2241
- return false;
2242
- }
2243
-
2244
- // Returns `yes` if current line of tokens contain an element of tags on same
2245
- // expression level. Stop searching at `LINEBREAKS` or explicit start of
2246
- // containing balanced expression.
2247
- findTagsBackwards(i, tags) {
2248
- var backStack, ref, ref1, ref2, ref3, ref4, ref5;
2249
- backStack = [];
2250
- while (i >= 0 && (backStack.length || (ref2 = this.tag(i), indexOf.call(tags, ref2) < 0) && ((ref3 = this.tag(i), indexOf.call(EXPRESSION_START, ref3) < 0) || this.tokens[i].generated) && (ref4 = this.tag(i), indexOf.call(LINEBREAKS, ref4) < 0))) {
2251
- if (ref = this.tag(i), indexOf.call(EXPRESSION_END, ref) >= 0) {
2252
- backStack.push(this.tag(i));
2253
- }
2254
- if ((ref1 = this.tag(i), indexOf.call(EXPRESSION_START, ref1) >= 0) && backStack.length) {
2255
- backStack.pop();
2256
- }
2257
- i -= 1;
2258
- }
2259
- return ref5 = this.tag(i), indexOf.call(tags, ref5) >= 0;
2260
- }
2261
-
2262
- // Look for signs of implicit calls and objects in the token stream and
2263
- // add them.
2264
- addImplicitBracesAndParens() {
2265
- var stack, start, inTernary;
2266
- // Track current balancing depth (both implicit and explicit) on stack.
2267
- stack = [];
2268
- start = null;
2269
- // Track if we're in ternary mode (saw ? waiting for :)
2270
- inTernary = false;
2271
- return this.scanTokens(function(token, i, tokens) {
2272
- var endImplicitCall, endImplicitObject, forward, implicitObjectContinues, implicitObjectIndent, inControlFlow, inImplicit, inImplicitCall, inImplicitControl, inImplicitObject, isImplicit, isImplicitCall, isImplicitObject, k, newLine, nextTag, nextToken, offset, preContinuationLineIndent, preObjectToken, prevTag, prevToken, ref, ref1, ref2, ref3, ref4, ref5, s, sameLine, stackIdx, stackItem, stackNext, stackTag, stackTop, startIdx, startImplicitCall, startImplicitObject, startIndex, startTag, startsLine, tag;
2273
- [tag] = token;
2274
- [prevTag] = prevToken = i > 0 ? tokens[i - 1] : [];
2275
- [nextTag] = nextToken = i < tokens.length - 1 ? tokens[i + 1] : [];
2276
- stackTop = function() {
2277
- return stack[stack.length - 1];
2278
- };
2279
- startIdx = i;
2280
- // Helper function, used for keeping track of the number of tokens consumed
2281
- // and spliced, when returning for getting a new token.
2282
- forward = function(n) {
2283
- return i - startIdx + n;
2284
- };
2285
- // Helper functions
2286
- isImplicit = function(stackItem) {
2287
- var ref;
2288
- return stackItem != null ? (ref = stackItem[2]) != null ? ref.ours : void 0 : void 0;
2289
- };
2290
- isImplicitObject = function(stackItem) {
2291
- return isImplicit(stackItem) && (stackItem != null ? stackItem[0] : void 0) === '{';
2292
- };
2293
- isImplicitCall = function(stackItem) {
2294
- return isImplicit(stackItem) && (stackItem != null ? stackItem[0] : void 0) === '(';
2295
- };
2296
- inImplicit = function() {
2297
- return isImplicit(stackTop());
2298
- };
2299
- inImplicitCall = function() {
2300
- return isImplicitCall(stackTop());
2301
- };
2302
- inImplicitObject = function() {
2303
- return isImplicitObject(stackTop());
2304
- };
2305
- // Unclosed control statement inside implicit parens (like
2306
- // class declaration or if-conditionals).
2307
- inImplicitControl = function() {
2308
- var ref;
2309
- return inImplicit() && ((ref = stackTop()) != null ? ref[0] : void 0) === 'CONTROL';
2310
- };
2311
- startImplicitCall = function(idx) {
2312
- stack.push([
2313
- '(',
2314
- idx,
2315
- {
2316
- ours: true
2317
- }
2318
- ]);
2319
- return tokens.splice(idx, 0, generate('CALL_START', '(', ['', 'implicit function call', token[2]], prevToken));
2320
- };
2321
- endImplicitCall = function() {
2322
- stack.pop();
2323
- tokens.splice(i, 0, generate('CALL_END', ')', ['', 'end of input', token[2]], prevToken));
2324
- return i += 1;
2325
- };
2326
- startImplicitObject = function(idx, {startsLine = true, continuationLineIndent} = {}) {
2327
- var val;
2328
- stack.push([
2329
- '{',
2330
- idx,
2331
- {
2332
- sameLine: true,
2333
- startsLine: startsLine,
2334
- ours: true,
2335
- continuationLineIndent: continuationLineIndent
2336
- }
2337
- ]);
2338
- val = new String('{');
2339
- val.generated = true;
2340
- return tokens.splice(idx, 0, generate('{', val, token, prevToken));
2341
- };
2342
- endImplicitObject = function(j) {
2343
- j = j != null ? j : i;
2344
- stack.pop();
2345
- tokens.splice(j, 0, generate('}', '}', token, prevToken));
2346
- return i += 1;
2347
- };
2348
- implicitObjectContinues = (j) => {
2349
- var nextTerminatorIdx;
2350
- nextTerminatorIdx = null;
2351
- this.detectEnd(j, function(token) {
2352
- return token[0] === 'TERMINATOR';
2353
- }, function(token, i) {
2354
- return nextTerminatorIdx = i;
2355
- }, {
2356
- returnOnNegativeLevel: true
2357
- });
2358
- if (nextTerminatorIdx == null) {
2359
- return false;
2360
- }
2361
- return this.looksObjectish(nextTerminatorIdx + 1);
2362
- };
2363
- // Don't end an implicit call/object on next indent if any of these are in an argument/value.
2364
- if ((inImplicitCall() || inImplicitObject()) && indexOf.call(CONTROL_IN_IMPLICIT, tag) >= 0 || inImplicitObject() && prevTag === ':' && tag === 'FOR') {
2365
- stack.push([
2366
- 'CONTROL',
2367
- i,
2368
- {
2369
- ours: true
2370
- }
2371
- ]);
2372
- return forward(1);
2373
- }
2374
- if (tag === 'INDENT' && inImplicit()) {
2375
- // An `INDENT` closes an implicit call unless
2376
- //
2377
- // 1. We have seen a `CONTROL` argument on the line.
2378
- // 2. The last token before the indent is part of the list below.
2379
- if (prevTag !== '=>' && prevTag !== '->' && prevTag !== '[' && prevTag !== '(' && prevTag !== ',' && prevTag !== '{' && prevTag !== 'ELSE' && prevTag !== '=') {
2380
- while (inImplicitCall() || inImplicitObject() && prevTag !== ':') {
2381
- if (inImplicitCall()) {
2382
- endImplicitCall();
2383
- } else {
2384
- endImplicitObject();
2385
- }
2386
- }
2387
- }
2388
- if (inImplicitControl()) {
2389
- stack.pop();
2390
- }
2391
- stack.push([tag, i]);
2392
- return forward(1);
2393
- }
2394
- // Straightforward start of explicit expression.
2395
- if (indexOf.call(EXPRESSION_START, tag) >= 0) {
2396
- stack.push([tag, i]);
2397
- return forward(1);
2398
- }
2399
- // Close all implicit expressions inside of explicitly closed expressions.
2400
- if (indexOf.call(EXPRESSION_END, tag) >= 0) {
2401
- while (inImplicit()) {
2402
- if (inImplicitCall()) {
2403
- endImplicitCall();
2404
- } else if (inImplicitObject()) {
2405
- endImplicitObject();
2406
- } else {
2407
- stack.pop();
2408
- }
2409
- }
2410
- start = stack.pop();
2411
- }
2412
- inControlFlow = () => {
2413
- var controlFlow, isFunc, seenFor, tagCurrentLine;
2414
- seenFor = this.findTagsBackwards(i, ['FOR']) && this.findTagsBackwards(i, ['FORIN', 'FOROF', 'FORFROM']);
2415
- controlFlow = seenFor || this.findTagsBackwards(i, ['WHILE', 'UNTIL', 'LOOP', 'LEADING_WHEN']);
2416
- if (!controlFlow) {
2417
- return false;
2418
- }
2419
- isFunc = false;
2420
- tagCurrentLine = token[2].first_line;
2421
- this.detectEnd(i, function(token, i) {
2422
- var ref;
2423
- return ref = token[0], indexOf.call(LINEBREAKS, ref) >= 0;
2424
- }, function(token, i) {
2425
- var first_line;
2426
- [prevTag, , {first_line}] = tokens[i - 1] || [];
2427
- return isFunc = tagCurrentLine === first_line && (prevTag === '->' || prevTag === '=>');
2428
- }, {
2429
- returnOnNegativeLevel: true
2430
- });
2431
- return isFunc;
2432
- };
2433
- // Recognize standard implicit calls like
2434
- // f a, f() b, f? c, h[0] d etc.
2435
- // Added support for spread dots on the left side: f ...a
2436
- // Don't treat `] ->` or `} ->` as implicit calls (these become args with comma insertion)
2437
- if ((indexOf.call(IMPLICIT_FUNC, tag) >= 0 && token.spaced || tag === '?' && i > 0 && !tokens[i - 1].spaced) && (indexOf.call(IMPLICIT_CALL, nextTag) >= 0 || (nextTag === '...' && (ref = this.tag(i + 2), indexOf.call(IMPLICIT_CALL, ref) >= 0) && !this.findTagsBackwards(i, ['INDEX_START', '['])) || indexOf.call(IMPLICIT_UNSPACED_CALL, nextTag) >= 0 && !nextToken.spaced && !nextToken.newLine) && !inControlFlow() && !((tag === ']' || tag === '}') && (nextTag === '->' || nextTag === '=>'))) {
2438
- if (tag === '?') {
2439
- tag = token[0] = 'FUNC_EXIST';
2440
- }
2441
- startImplicitCall(i + 1);
2442
- return forward(2);
2443
- }
2444
- // Implicit call taking an implicit indented object as first argument.
2445
- //
2446
- // f
2447
- // a: b
2448
- // c: d
2449
- //
2450
- // Don't accept implicit calls of this type, when on the same line
2451
- // as the control structures below as that may misinterpret constructs like:
2452
- //
2453
- // if f
2454
- // a: 1
2455
- // as
2456
- //
2457
- // if f(a: 1)
2458
- //
2459
- // which is probably always unintended.
2460
- // Furthermore don't allow this in the first line of a literal array
2461
- // or explicit object, as that creates grammatical ambiguities (#5368).
2462
- if (indexOf.call(IMPLICIT_FUNC, tag) >= 0 && this.indexOfTag(i + 1, 'INDENT') > -1 && this.looksObjectish(i + 2) && !this.findTagsBackwards(i, ['CLASS', 'EXTENDS', 'IF', 'CATCH', 'SWITCH', 'LEADING_WHEN', 'FOR', 'WHILE', 'UNTIL']) && !(((ref1 = (s = (ref2 = stackTop()) != null ? ref2[0] : void 0)) === '{' || ref1 === '[') && !isImplicit(stackTop()) && this.findTagsBackwards(i, s))) {
2463
- startImplicitCall(i + 1);
2464
- stack.push(['INDENT', i + 2]);
2465
- return forward(3);
2466
- }
2467
- // Track ternary operator: when we see SPACE?, next : is part of ternary
2468
- if (tag === 'SPACE?') {
2469
- inTernary = true;
2470
- }
2471
- // Implicit objects start here.
2472
- if (tag === ':') {
2473
- // If in ternary mode, skip implicit object creation
2474
- if (inTernary) {
2475
- inTernary = false; // Reset for next statement
2476
- return forward(1);
2477
- }
2478
- // Go back to the (implicit) start of the object.
2479
- s = (function() {
2480
- var ref3;
2481
- switch (false) {
2482
- case ref3 = this.tag(i - 1), indexOf.call(EXPRESSION_END, ref3) < 0:
2483
- [startTag, startIndex] = start;
2484
- if (startTag === '[' && startIndex > 0 && this.tag(startIndex - 1) === '@' && !tokens[startIndex - 1].spaced) {
2485
- return startIndex - 1;
2486
- } else {
2487
- return startIndex;
2488
- }
2489
- break;
2490
- case this.tag(i - 2) !== '@':
2491
- return i - 2;
2492
- default:
2493
- return i - 1;
2494
- }
2495
- }).call(this);
2496
- startsLine = s <= 0 || (ref3 = this.tag(s - 1), indexOf.call(LINEBREAKS, ref3) >= 0) || tokens[s - 1].newLine;
2497
- // Are we just continuing an already declared object?
2498
- // Including the case where we indent on the line after an explicit '{'.
2499
- if (stackTop()) {
2500
- [stackTag, stackIdx] = stackTop();
2501
- stackNext = stack[stack.length - 2];
2502
- if ((stackTag === '{' || stackTag === 'INDENT' && (stackNext != null ? stackNext[0] : void 0) === '{' && !isImplicit(stackNext) && this.findTagsBackwards(stackIdx - 1, ['{'])) && (startsLine || this.tag(s - 1) === ',' || this.tag(s - 1) === '{') && (ref4 = this.tag(s - 1), indexOf.call(UNFINISHED, ref4) < 0)) {
2503
- return forward(1);
2504
- }
2505
- }
2506
- preObjectToken = i > 1 ? tokens[i - 2] : [];
2507
- startImplicitObject(s, {
2508
- startsLine: !!startsLine,
2509
- continuationLineIndent: preObjectToken.continuationLineIndent
2510
- });
2511
- return forward(2);
2512
- }
2513
- // End implicit calls when chaining method calls
2514
- // like e.g.:
2515
- //
2516
- // f ->
2517
- // a
2518
- // .g b, ->
2519
- // c
2520
- // .h a
2521
- //
2522
- // and also
2523
- //
2524
- // f a
2525
- // .g b
2526
- // .h a
2527
-
2528
- // Mark all enclosing objects as not sameLine
2529
- if (indexOf.call(LINEBREAKS, tag) >= 0) {
2530
- for (k = stack.length - 1; k >= 0; k += -1) {
2531
- stackItem = stack[k];
2532
- if (!isImplicit(stackItem)) {
2533
- break;
2534
- }
2535
- if (isImplicitObject(stackItem)) {
2536
- stackItem[2].sameLine = false;
2537
- }
2538
- }
2539
- }
2540
- // End indented-continuation-line implicit objects once that indentation is over.
2541
- if (tag === 'TERMINATOR' && token.endsContinuationLineIndentation) {
2542
- ({preContinuationLineIndent} = token.endsContinuationLineIndentation);
2543
- while (inImplicitObject() && ((implicitObjectIndent = stackTop()[2].continuationLineIndent) != null) && implicitObjectIndent > preContinuationLineIndent) {
2544
- endImplicitObject();
2545
- }
2546
- }
2547
- newLine = prevTag === 'OUTDENT' || prevToken.newLine;
2548
- // || and && only end implicit calls, not implicit objects (allows `a: x or 'default'` in objects)
2549
- var isLogicalOp = tag === '||' || tag === '&&';
2550
- // For || and &&, check if there's an immediate comma after the next value
2551
- // e.g., `new Response body or 'default', { status: 200 }` - comma right after 'default'
2552
- // but NOT `get 'x' or set 'x', 42` - comma is inside nested set() call
2553
- var logicalOpHasMoreArgs = false;
2554
- if (isLogicalOp && i + 1 < tokens.length) {
2555
- var nextTok = tokens[i + 1][0];
2556
- // Skip one value token (STRING, NUMBER, IDENTIFIER, etc) or bracketed expression
2557
- var j = i + 1;
2558
- if (nextTok === '(' || nextTok === '[' || nextTok === '{') {
2559
- // Skip bracketed expression
2560
- var depth = 1;
2561
- j++;
2562
- while (j < tokens.length && depth > 0) {
2563
- var lt = tokens[j][0];
2564
- if (lt === '(' || lt === '[' || lt === '{') depth++;
2565
- else if (lt === ')' || lt === ']' || lt === '}') depth--;
2566
- j++;
2567
- }
2568
- } else if (nextTok !== 'TERMINATOR' && nextTok !== 'OUTDENT' && nextTok !== ',') {
2569
- // Simple value token - skip it
2570
- j++;
2571
- }
2572
- // Now check if next token is a comma
2573
- if (j < tokens.length && tokens[j][0] === ',') {
2574
- logicalOpHasMoreArgs = true;
2575
- }
2576
- }
2577
- if ((indexOf.call(IMPLICIT_END, tag) >= 0 && !(isLogicalOp && logicalOpHasMoreArgs)) || (indexOf.call(CALL_CLOSERS, tag) >= 0 && newLine) || ((tag === '..' || tag === '...') && this.findTagsBackwards(i, ["INDEX_START"]))) {
2578
- while (inImplicit()) {
2579
- [stackTag, stackIdx, {sameLine, startsLine}] = stackTop();
2580
- // Close implicit calls when reached end of argument list
2581
- if (inImplicitCall() && prevTag !== ',' || (prevTag === ',' && tag === 'TERMINATOR' && (nextTag == null))) {
2582
- endImplicitCall();
2583
- // Close implicit objects such as:
2584
- // return a: 1, b: 2 unless true
2585
- // But NOT for || or && which should stay inside objects
2586
- } else if (inImplicitObject() && !isLogicalOp && sameLine && tag !== 'TERMINATOR' && prevTag !== ':' && !((tag === 'POST_IF' || tag === 'POST_UNLESS' || tag === 'FOR' || tag === 'WHILE' || tag === 'UNTIL') && startsLine && implicitObjectContinues(i + 1))) {
2587
- endImplicitObject();
2588
- // Close implicit objects when at end of line, line didn't end with a comma
2589
- // and the implicit object didn't start the line or the next line doesn't look like
2590
- // the continuation of an object.
2591
- } else if (inImplicitObject() && tag === 'TERMINATOR' && prevTag !== ',' && !(startsLine && this.looksObjectish(i + 1))) {
2592
- endImplicitObject();
2593
- } else if (inImplicitControl() && tokens[stackTop()[1]][0] === 'CLASS' && tag === 'TERMINATOR') {
2594
- stack.pop();
2595
- } else {
2596
- break;
2597
- }
2598
- }
2599
- }
2600
- // Close implicit object if comma is the last character
2601
- // and what comes after doesn't look like it belongs.
2602
- // This is used for trailing commas and calls, like:
2603
- //
2604
- // x =
2605
- // a: b,
2606
- // c: d,
2607
- // e = 2
2608
- //
2609
- // and
2610
- //
2611
- // f a, b: c, d: e, f, g: h: i, j
2612
- //
2613
- if (tag === ',' && !this.looksObjectish(i + 1) && inImplicitObject() && !((ref5 = this.tag(i + 2)) === 'FOROF' || ref5 === 'FORIN') && (nextTag !== 'TERMINATOR' || !this.looksObjectish(i + 2))) {
2614
- // When nextTag is OUTDENT the comma is insignificant and
2615
- // should just be ignored so embed it in the implicit object.
2616
- //
2617
- // When it isn't the comma go on to play a role in a call or
2618
- // array further up the stack, so give it a chance.
2619
- offset = nextTag === 'OUTDENT' ? 1 : 0;
2620
- while (inImplicitObject()) {
2621
- endImplicitObject(i + offset);
2622
- }
2623
- }
2624
- return forward(1);
2625
- });
1480
+ scanTokens(fn) {
1481
+ let i = 0;
1482
+ while (i < this.tokens.length) {
1483
+ i += fn.call(this, this.tokens[i], i, this.tokens);
2626
1484
  }
1485
+ }
2627
1486
 
2628
- // Insert commas before arrow functions in implicit calls.
2629
- // Allows: get '/users' -> ... instead of: get '/users', -> ...
2630
- // Works with literals that would otherwise be syntax errors before arrows.
2631
- addImplicitCallCommas() {
2632
- var callDepth, i, prevTag, ref, tag, tokens;
2633
- tokens = this.tokens;
2634
- callDepth = 0;
2635
- i = 0;
2636
- while (i < tokens.length) {
2637
- tag = tokens[i][0];
2638
- prevTag = i > 0 ? tokens[i - 1][0] : null;
2639
- // Track call depth
2640
- if (tag === 'CALL_START' || tag === '(') {
2641
- callDepth++;
2642
- } else if (tag === 'CALL_END' || tag === ')') {
2643
- callDepth--;
2644
- }
2645
- // Inside a call, if we see -> or => preceded by a literal value, insert comma
2646
- if (callDepth > 0 && (tag === '->' || tag === '=>') &&
2647
- (ref = prevTag, indexOf.call(IMPLICIT_COMMA_BEFORE_ARROW, ref) >= 0)) {
2648
- tokens.splice(i, 0, generate(',', ',', tokens[i], tokens[i - 1]));
2649
- i++; // Skip past the inserted comma
2650
- }
2651
- i++;
1487
+ detectEnd(i, condition, action, opts = {}) {
1488
+ let levels = 0;
1489
+ while (i < this.tokens.length) {
1490
+ let token = this.tokens[i];
1491
+ if (levels === 0 && condition.call(this, token, i)) {
1492
+ return action.call(this, token, i);
2652
1493
  }
2653
- }
2654
-
2655
- // Not all tokens survive processing by the parser. To avoid comments getting
2656
- // lost into the ether, find comments attached to doomed tokens and move them
2657
- // to a token that will make it to the other side.
2658
- rescueStowawayComments() {
2659
- var dontShiftForward, insertPlaceholder, shiftCommentsBackward, shiftCommentsForward;
2660
- insertPlaceholder = function(token, j, tokens, method) {
2661
- if (tokens[j][0] !== 'TERMINATOR') {
2662
- tokens[method](generate('TERMINATOR', '\n', tokens[j]));
2663
- }
2664
- return tokens[method](generate('JS', '', tokens[j], token));
2665
- };
2666
- dontShiftForward = function(i, tokens) {
2667
- var j, ref;
2668
- j = i + 1;
2669
- while (j !== tokens.length && (ref = tokens[j][0], indexOf.call(DISCARDED, ref) >= 0)) {
2670
- if (tokens[j][0] === 'INTERPOLATION_END') {
2671
- return true;
2672
- }
2673
- j++;
2674
- }
2675
- return false;
2676
- };
2677
- shiftCommentsForward = function(token, i, tokens) {
2678
- var comment, j, k, len, ref, ref1, ref2;
2679
- // Find the next surviving token and attach this token's comments to it,
2680
- // with a flag that we know to output such comments *before* that
2681
- // token's own compilation. (Otherwise comments are output following
2682
- // the token they're attached to.)
2683
- j = i;
2684
- while (j !== tokens.length && (ref = tokens[j][0], indexOf.call(DISCARDED, ref) >= 0)) {
2685
- j++;
2686
- }
2687
- if (!(j === tokens.length || (ref1 = tokens[j][0], indexOf.call(DISCARDED, ref1) >= 0))) {
2688
- ref2 = token.comments;
2689
- for (k = 0, len = ref2.length; k < len; k++) {
2690
- comment = ref2[k];
2691
- comment.unshift = true;
2692
- }
2693
- moveComments(token, tokens[j]);
2694
- return 1; // All following tokens are doomed!
2695
- } else {
2696
- j = tokens.length - 1;
2697
- insertPlaceholder(token, j, tokens, 'push');
2698
- // The generated tokens were added to the end, not inline, so we don't skip.
2699
- return 1;
2700
- }
2701
- };
2702
- shiftCommentsBackward = function(token, i, tokens) {
2703
- var j, ref, ref1;
2704
- // Find the last surviving token and attach this token's comments to it.
2705
- j = i;
2706
- while (j !== -1 && (ref = tokens[j][0], indexOf.call(DISCARDED, ref) >= 0)) {
2707
- j--;
2708
- }
2709
- if (!(j === -1 || (ref1 = tokens[j][0], indexOf.call(DISCARDED, ref1) >= 0))) {
2710
- moveComments(token, tokens[j]);
2711
- return 1; // All previous tokens are doomed!
2712
- } else {
2713
- insertPlaceholder(token, 0, tokens, 'unshift');
2714
- // We added two tokens, so shift forward to account for the insertion.
2715
- return 3;
2716
- }
2717
- };
2718
- return this.scanTokens(function(token, i, tokens) {
2719
- var dummyToken, j, ref, ref1, ret;
2720
- if (!token.comments) {
2721
- return 1;
2722
- }
2723
- ret = 1;
2724
- if (ref = token[0], indexOf.call(DISCARDED, ref) >= 0) {
2725
- // This token won't survive passage through the parser, so we need to
2726
- // rescue its attached tokens and redistribute them to nearby tokens.
2727
- // Comments that don't start a new line can shift backwards to the last
2728
- // safe token, while other tokens should shift forward.
2729
- dummyToken = {
2730
- comments: []
2731
- };
2732
- j = token.comments.length - 1;
2733
- while (j !== -1) {
2734
- if (token.comments[j].newLine === false && token.comments[j].here === false) {
2735
- dummyToken.comments.unshift(token.comments[j]);
2736
- token.comments.splice(j, 1);
2737
- }
2738
- j--;
2739
- }
2740
- if (dummyToken.comments.length !== 0) {
2741
- ret = shiftCommentsBackward(dummyToken, i - 1, tokens);
2742
- }
2743
- if (token.comments.length !== 0) {
2744
- shiftCommentsForward(token, i, tokens);
2745
- }
2746
- } else if (!dontShiftForward(i, tokens)) {
2747
- // If any of this token's comments start a line—there's only
2748
- // whitespace between the preceding newline and the start of the
2749
- // comment—and this isn't one of the special `JS` tokens, then
2750
- // shift this comment forward to precede the next valid token.
2751
- // `Block.compileComments` also has logic to make sure that
2752
- // "starting new line" comments follow or precede the nearest
2753
- // newline relative to the token that the comment is attached to,
2754
- // but that newline might be inside a `}` or `)` or other generated
2755
- // token that we really want this comment to output after. Therefore
2756
- // we need to shift the comments here, avoiding such generated and
2757
- // discarded tokens.
2758
- dummyToken = {
2759
- comments: []
2760
- };
2761
- j = token.comments.length - 1;
2762
- while (j !== -1) {
2763
- if (token.comments[j].newLine && !token.comments[j].unshift && !(token[0] === 'JS' && token.generated)) {
2764
- dummyToken.comments.unshift(token.comments[j]);
2765
- token.comments.splice(j, 1);
2766
- }
2767
- j--;
2768
- }
2769
- if (dummyToken.comments.length !== 0) {
2770
- ret = shiftCommentsForward(dummyToken, i + 1, tokens);
2771
- }
2772
- }
2773
- if (((ref1 = token.comments) != null ? ref1.length : void 0) === 0) {
2774
- delete token.comments;
2775
- }
2776
- return ret;
2777
- });
2778
- }
2779
-
2780
- // Add location data to all tokens generated by the rewriter.
2781
- addLocationDataToGeneratedTokens() {
2782
- return this.scanTokens(function(token, i, tokens) {
2783
- var column, line, nextLocation, prevLocation, rangeIndex, ref, ref1;
2784
- if (token[2]) {
2785
- return 1;
2786
- }
2787
- if (!(token.generated || token.explicit)) {
2788
- return 1;
2789
- }
2790
- if (token.fromThen && token[0] === 'INDENT') {
2791
- token[2] = token.origin[2];
2792
- return 1;
2793
- }
2794
- if (token[0] === '{' && (nextLocation = (ref = tokens[i + 1]) != null ? ref[2] : void 0)) {
2795
- ({
2796
- first_line: line,
2797
- first_column: column,
2798
- range: [rangeIndex]
2799
- } = nextLocation);
2800
- } else if (prevLocation = (ref1 = tokens[i - 1]) != null ? ref1[2] : void 0) {
2801
- ({
2802
- last_line: line,
2803
- last_column: column,
2804
- range: [, rangeIndex]
2805
- } = prevLocation);
2806
- column += 1;
2807
- } else {
2808
- line = column = 0;
2809
- rangeIndex = 0;
2810
- }
2811
- token[2] = {
2812
- first_line: line,
2813
- first_column: column,
2814
- last_line: line,
2815
- last_column: column,
2816
- last_line_exclusive: line,
2817
- last_column_exclusive: column,
2818
- range: [rangeIndex, rangeIndex]
2819
- };
2820
- return 1;
2821
- });
2822
- }
2823
-
2824
- // `OUTDENT` tokens should always be positioned at the last character of the
2825
- // previous token, so that AST nodes ending in an `OUTDENT` token end up with a
2826
- // location corresponding to the last "real" token under the node.
2827
- fixIndentationLocationData() {
2828
- var findPrecedingComment;
2829
- if (this.allComments == null) {
2830
- this.allComments = extractAllCommentTokens(this.tokens);
1494
+ if (EXPRESSION_START.has(token[0])) levels++;
1495
+ if (EXPRESSION_END.has(token[0])) levels--;
1496
+ if (levels < 0) {
1497
+ if (opts.returnOnNegativeLevel) return;
1498
+ return action.call(this, token, i);
2831
1499
  }
2832
- findPrecedingComment = (token, {afterPosition, indentSize, first, indented}) => {
2833
- var comment, k, l, lastMatching, matches, ref, ref1, tokenStart;
2834
- tokenStart = token[2].range[0];
2835
- matches = function(comment) {
2836
- if (comment.outdented) {
2837
- if (!((indentSize != null) && comment.indentSize > indentSize)) {
2838
- return false;
2839
- }
2840
- }
2841
- if (indented && !comment.indented) {
2842
- return false;
2843
- }
2844
- if (!(comment.locationData.range[0] < tokenStart)) {
2845
- return false;
2846
- }
2847
- if (!(comment.locationData.range[0] > afterPosition)) {
2848
- return false;
2849
- }
2850
- return true;
2851
- };
2852
- if (first) {
2853
- lastMatching = null;
2854
- ref = this.allComments;
2855
- for (k = ref.length - 1; k >= 0; k += -1) {
2856
- comment = ref[k];
2857
- if (matches(comment)) {
2858
- lastMatching = comment;
2859
- } else if (lastMatching) {
2860
- return lastMatching;
2861
- }
2862
- }
2863
- return lastMatching;
2864
- }
2865
- ref1 = this.allComments;
2866
- for (l = ref1.length - 1; l >= 0; l += -1) {
2867
- comment = ref1[l];
2868
- if (matches(comment)) {
2869
- return comment;
2870
- }
2871
- }
2872
- return null;
2873
- };
2874
- return this.scanTokens(function(token, i, tokens) {
2875
- var isIndent, nextToken, nextTokenIndex, precedingComment, prevLocationData, prevToken, ref, ref1, ref2, useNextToken;
2876
- if (!(((ref = token[0]) === 'INDENT' || ref === 'OUTDENT') || (token.generated && token[0] === 'CALL_END' && !((ref1 = token.data) != null ? ref1.closingTagNameToken : void 0)) || (token.generated && token[0] === '}'))) {
2877
- return 1;
2878
- }
2879
- isIndent = token[0] === 'INDENT';
2880
- prevToken = (ref2 = token.prevToken) != null ? ref2 : tokens[i - 1];
2881
- prevLocationData = prevToken[2];
2882
- // addLocationDataToGeneratedTokens() set the outdent's location data
2883
- // to the preceding token's, but in order to detect comments inside an
2884
- // empty "block" we want to look for comments preceding the next token.
2885
- useNextToken = token.explicit || token.generated;
2886
- if (useNextToken) {
2887
- nextToken = token;
2888
- nextTokenIndex = i;
2889
- while ((nextToken.explicit || nextToken.generated) && nextTokenIndex !== tokens.length - 1) {
2890
- nextToken = tokens[nextTokenIndex++];
2891
- }
2892
- }
2893
- precedingComment = findPrecedingComment(useNextToken ? nextToken : token, {
2894
- afterPosition: prevLocationData.range[0],
2895
- indentSize: token.indentSize,
2896
- first: isIndent,
2897
- indented: useNextToken
2898
- });
2899
- if (isIndent) {
2900
- if (!(precedingComment != null ? precedingComment.newLine : void 0)) {
2901
- return 1;
2902
- }
2903
- }
2904
- if (token.generated && token[0] === 'CALL_END' && (precedingComment != null ? precedingComment.indented : void 0)) {
2905
- // We don't want e.g. an implicit call at the end of an `if` condition to
2906
- // include a following indented comment.
2907
- return 1;
2908
- }
2909
- if (precedingComment != null) {
2910
- prevLocationData = precedingComment.locationData;
2911
- }
2912
- token[2] = {
2913
- first_line: precedingComment != null ? prevLocationData.first_line : prevLocationData.last_line,
2914
- first_column: precedingComment != null ? isIndent ? 0 : prevLocationData.first_column : prevLocationData.last_column,
2915
- last_line: prevLocationData.last_line,
2916
- last_column: prevLocationData.last_column,
2917
- last_line_exclusive: prevLocationData.last_line_exclusive,
2918
- last_column_exclusive: prevLocationData.last_column_exclusive,
2919
- range: isIndent && (precedingComment != null) ? [prevLocationData.range[0] - precedingComment.indentSize, prevLocationData.range[1]] : prevLocationData.range
2920
- };
2921
- return 1;
2922
- });
2923
- }
2924
-
2925
- // Because our grammar is LALR(1), it can't handle some single-line
2926
- // expressions that lack ending delimiters. The **Rewriter** adds the implicit
2927
- // blocks, so it doesn't need to. To keep the grammar clean and tidy, trailing
2928
- // newlines within expressions are removed and the indentation tokens of empty
2929
- // blocks are added.
2930
- normalizeLines() {
2931
- var action, closeElseTag, condition, ifThens, indent, leading_if_then, leading_switch_when, outdent, starter;
2932
- starter = indent = outdent = null;
2933
- leading_switch_when = null;
2934
- leading_if_then = null;
2935
- // Count `THEN` tags
2936
- ifThens = [];
2937
- condition = function(token, i) {
2938
- var ref, ref1, ref2, ref3;
2939
- return token[1] !== ';' && (ref = token[0], indexOf.call(SINGLE_CLOSERS, ref) >= 0) && !(token[0] === 'TERMINATOR' && (ref1 = this.tag(i + 1), indexOf.call(EXPRESSION_CLOSE, ref1) >= 0)) && !(token[0] === 'ELSE' && (starter !== 'THEN' || (leading_if_then || leading_switch_when))) && !(((ref2 = token[0]) === 'CATCH' || ref2 === 'FINALLY') && (starter === '->' || starter === '=>')) || (ref3 = token[0], indexOf.call(CALL_CLOSERS, ref3) >= 0) && (this.tokens[i - 1].newLine || this.tokens[i - 1][0] === 'OUTDENT');
2940
- };
2941
- action = function(token, i) {
2942
- if (token[0] === 'ELSE' && starter === 'THEN') {
2943
- ifThens.pop();
2944
- }
2945
- return this.tokens.splice((this.tag(i - 1) === ',' ? i - 1 : i), 0, outdent);
2946
- };
2947
- closeElseTag = (tokens, i) => {
2948
- var lastThen, outdentElse, tlen;
2949
- tlen = ifThens.length;
2950
- if (!(tlen > 0)) {
2951
- return i;
2952
- }
2953
- lastThen = ifThens.pop();
2954
- [, outdentElse] = this.indentation(tokens[lastThen]);
2955
- // Insert `OUTDENT` to close inner `IF`.
2956
- outdentElse[1] = tlen * 2;
2957
- tokens.splice(i, 0, outdentElse);
2958
- // Insert `OUTDENT` to close outer `IF`.
2959
- outdentElse[1] = 2;
2960
- tokens.splice(i + 1, 0, outdentElse);
2961
- // Remove outdents from the end.
2962
- this.detectEnd(i + 2, function(token, i) {
2963
- var ref;
2964
- return (ref = token[0]) === 'OUTDENT' || ref === 'TERMINATOR';
2965
- }, function(token, i) {
2966
- if (this.tag(i) === 'OUTDENT' && this.tag(i + 1) === 'OUTDENT') {
2967
- return tokens.splice(i, 2);
2968
- }
2969
- });
2970
- return i + 2;
2971
- };
2972
- return this.scanTokens(function(token, i, tokens) {
2973
- var conditionTag, j, k, ref, ref1, ref2, tag;
2974
- [tag] = token;
2975
- conditionTag = (tag === '->' || tag === '=>') && this.findTagsBackwards(i, ['IF', 'WHILE', 'FOR', 'UNTIL', 'SWITCH', 'WHEN', 'LEADING_WHEN', '[', 'INDEX_START']) && !(this.findTagsBackwards(i, ['THEN', '..', '...']));
2976
- if (tag === 'TERMINATOR') {
2977
- if (this.tag(i + 1) === 'ELSE' && this.tag(i - 1) !== 'OUTDENT') {
2978
- tokens.splice(i, 1, ...this.indentation());
2979
- return 1;
2980
- }
2981
- if (ref = this.tag(i + 1), indexOf.call(EXPRESSION_CLOSE, ref) >= 0) {
2982
- if (token[1] === ';' && this.tag(i + 1) === 'OUTDENT') {
2983
- tokens[i + 1].prevToken = token;
2984
- moveComments(token, tokens[i + 1]);
2985
- }
2986
- tokens.splice(i, 1);
2987
- return 0;
2988
- }
2989
- }
2990
- if (tag === 'CATCH') {
2991
- for (j = k = 1; k <= 2; j = ++k) {
2992
- if (!((ref1 = this.tag(i + j)) === 'OUTDENT' || ref1 === 'TERMINATOR' || ref1 === 'FINALLY')) {
2993
- continue;
2994
- }
2995
- tokens.splice(i + j, 0, ...this.indentation());
2996
- return 2 + j;
2997
- }
2998
- }
2999
- if ((tag === '->' || tag === '=>') && (((ref2 = this.tag(i + 1)) === ',' || ref2 === ']') || this.tag(i + 1) === '.' && token.newLine)) {
3000
- [indent, outdent] = this.indentation(tokens[i]);
3001
- tokens.splice(i + 1, 0, indent, outdent);
3002
- return 1;
3003
- }
3004
- if (indexOf.call(SINGLE_LINERS, tag) >= 0 && this.tag(i + 1) !== 'INDENT' && !(tag === 'ELSE' && this.tag(i + 1) === 'IF') && !conditionTag) {
3005
- starter = tag;
3006
- [indent, outdent] = this.indentation(tokens[i]);
3007
- if (starter === 'THEN') {
3008
- indent.fromThen = true;
3009
- }
3010
- if (tag === 'THEN') {
3011
- leading_switch_when = this.findTagsBackwards(i, ['LEADING_WHEN']) && this.tag(i + 1) === 'IF';
3012
- leading_if_then = this.findTagsBackwards(i, ['IF']) && this.tag(i + 1) === 'IF';
3013
- }
3014
- if (tag === 'THEN' && this.findTagsBackwards(i, ['IF'])) {
3015
- ifThens.push(i);
3016
- }
3017
- // `ELSE` tag is not closed.
3018
- if (tag === 'ELSE' && this.tag(i - 1) !== 'OUTDENT') {
3019
- i = closeElseTag(tokens, i);
3020
- }
3021
- tokens.splice(i + 1, 0, indent);
3022
- this.detectEnd(i + 2, condition, action);
3023
- if (tag === 'THEN') {
3024
- tokens.splice(i, 1);
3025
- }
3026
- return 1;
3027
- }
3028
- return 1;
3029
- });
3030
- }
3031
-
3032
- // =========================================================================
3033
- // BACKWARDS COMPATIBILITY / TRANSITION SUPPORT
3034
- // =========================================================================
3035
- // Convert CoffeeScript postfix spread/rest to ES6 prefix syntax.
3036
- // In CoffeeScript, spread/rest can appear AFTER the identifier (e.g.,
3037
- // "args..." for rest params, "arr..." for spread). ES6 requires the prefix
3038
- // form ("...args", "...arr").
3039
- //
3040
- // This rewriter converts postfix spread/rest tokens to prefix form by
3041
- // swapping token positions, while carefully preserving range operators
3042
- // (which also use ... but in different contexts).
3043
- //
3044
- // Examples:
3045
- // [a, rest...] → [a, ...rest] (destructuring rest, converted)
3046
- // (args...) -> → (...args) => (rest params, converted)
3047
- // [arr...] → [...arr] (array spread, converted)
3048
- // fn(arr...) → fn(...arr) (call spread, converted)
3049
- // [1...10] → [1...10] (range operator, unchanged)
3050
- // arr[0...5] → arr[0...5] (slice operator, unchanged)
3051
- //
3052
- // NOTE: This is for COMPATIBILITY and TRANSITION purposes only.
3053
- // New code should use ES6 prefix syntax (...x) for clarity and consistency.
3054
- // =========================================================================
3055
- convertPostfixSpreadRest() {
3056
- return this.scanTokens(function(token, i, tokens) {
3057
- var definiteSpreadNext, inIndexContext, lastIndexEnd, lastIndexStart, next, nextTag, prev, prevTag, ref, validPostfixTokens;
3058
- // Only process ... and .. tokens
3059
- if (token[0] !== '...' && token[0] !== '..') {
3060
- return 1;
3061
- }
3062
- // Check if we're inside an OPEN (unmatched) INDEX_START...INDEX_END context
3063
- // Count bracket depth to handle nested cases like arr[_[0].length..]
3064
- // where the inner _[0] has matching brackets but we're still inside the outer [...]
3065
- let bracketDepth = 0;
3066
- for (let j = i - 1; j >= 0; j--) {
3067
- if (tokens[j][0] === 'INDEX_END') {
3068
- bracketDepth++; // Closing bracket adds to depth (going backwards)
3069
- }
3070
- if (tokens[j][0] === 'INDEX_START') {
3071
- bracketDepth--; // Opening bracket reduces depth
3072
- }
3073
- }
3074
- // If bracketDepth < 0, we have more INDEX_START than INDEX_END = we're inside [...]
3075
- inIndexContext = bracketDepth < 0;
3076
- if (inIndexContext) {
3077
- return 1; // It's a range/slice operator, leave unchanged
3078
- }
3079
- prev = tokens[i - 1];
3080
- next = tokens[i + 1];
3081
- if (!prev || !next) {
3082
- return 1;
3083
- }
3084
- prevTag = prev[0];
3085
- nextTag = next[0];
3086
- // Skip standalone expansion marker: , ... ,
3087
- // This is used in function params like (a, ..., b) for expansion
3088
- if (prevTag === ',' && nextTag === ',') {
3089
- return 1; // Expansion marker, leave unchanged
3090
- }
3091
- // Don't transform if previous token is NUMBER (that's a range)
3092
- // Examples: [1...10], arr[5...], for i in [0...10]
3093
- if (prevTag === 'NUMBER') {
3094
- return 1; // Range operator, leave unchanged
3095
- }
3096
- // Don't transform if already in prefix position
3097
- // Check if next token is what would follow in prefix form
3098
- // Example: [...arr] should not be transformed (already prefix)
3099
- validPostfixTokens = ['IDENTIFIER', 'PROPERTY', ')', ']', 'THIS', '@'];
3100
- // If previous token can have postfix spread, check if next confirms it
3101
- if (ref = prevTag, indexOf.call(validPostfixTokens, ref) >= 0) {
3102
- // Check if next token confirms this is spread (not range)
3103
- // After spread, we expect: , (separator), ) (end param/call), ] (end array), } (end object)
3104
- definiteSpreadNext = [',', ']', ')', '}', 'CALL_END', 'INDEX_END', 'PARAM_END', 'TERMINATOR', 'OUTDENT'];
3105
- if (ref = nextTag, indexOf.call(definiteSpreadNext, ref) >= 0) {
3106
- // This is postfix spread/rest - TRANSFORM by swapping tokens
3107
- tokens[i - 1] = token; // Move ... to before
3108
- tokens[i] = prev; // Move identifier to after
3109
- return 1;
3110
- }
3111
- // If next is IDENTIFIER or NUMBER, it's likely a range (x...y)
3112
- // Don't transform
3113
- }
3114
- return 1;
3115
- });
3116
- }
3117
-
3118
- // Tag postfix conditionals as such, so that we can parse them with a
3119
- // different precedence.
3120
- tagPostfixConditionals() {
3121
- var action, condition, original;
3122
- original = null;
3123
- condition = function(token, i) {
3124
- var prevTag, tag;
3125
- [tag] = token;
3126
- [prevTag] = this.tokens[i - 1];
3127
- return tag === 'TERMINATOR' || (tag === 'INDENT' && indexOf.call(SINGLE_LINERS, prevTag) < 0);
3128
- };
3129
- action = function(token, i) {
3130
- if (token[0] !== 'INDENT' || (token.generated && !token.fromThen)) {
3131
- return original[0] = 'POST_' + original[0];
3132
- }
3133
- };
3134
- return this.scanTokens(function(token, i) {
3135
- if (token[0] !== 'IF' && token[0] !== 'UNLESS') {
3136
- return 1;
3137
- }
3138
- original = token;
3139
- this.detectEnd(i + 1, condition, action);
3140
- return 1;
3141
- });
1500
+ i++;
3142
1501
  }
1502
+ }
3143
1503
 
3144
- // For tokens with extra data, we want to make that data visible to the grammar
3145
- // by wrapping the token value as a String() object and setting the data as
3146
- // properties of that object. The grammar should then be responsible for
3147
- // cleaning this up for the node constructor: unwrapping the token value to a
3148
- // primitive string and separately passing any expected token data properties
3149
- exposeTokenDataToGrammar() {
3150
- return this.scanTokens(function(token, i) {
3151
- var ref, ref1, val;
3152
- if (token.generated || (token.data && Object.keys(token.data).length !== 0)) {
3153
- token[1] = new String(token[1]);
3154
- ref1 = (ref = token.data) != null ? ref : {};
3155
- for (key in ref1) {
3156
- if (!hasProp.call(ref1, key)) continue;
3157
- val = ref1[key];
3158
- token[1][key] = val;
3159
- }
3160
- if (token.generated) {
3161
- token[1].generated = true;
3162
- }
3163
- }
3164
- return 1;
3165
- });
1504
+ looksObjectish(j) {
1505
+ if (!this.tokens[j]) return false;
1506
+ if (this.tokens[j]?.[0] === '@' && this.tokens[j + 2]?.[0] === ':') return true;
1507
+ if (this.tokens[j + 1]?.[0] === ':') return true;
1508
+ if (EXPRESSION_START.has(this.tokens[j]?.[0])) {
1509
+ let end = null;
1510
+ this.detectEnd(j + 1,
1511
+ t => EXPRESSION_END.has(t[0]),
1512
+ (t, i) => end = i
1513
+ );
1514
+ if (end && this.tokens[end + 1]?.[0] === ':') return true;
3166
1515
  }
1516
+ return false;
1517
+ }
3167
1518
 
3168
- // Generate the indentation tokens, based on another token on the same line.
3169
- indentation(origin) {
3170
- var indent, outdent;
3171
- indent = ['INDENT', 2];
3172
- outdent = ['OUTDENT', 2];
3173
- if (origin) {
3174
- indent.generated = outdent.generated = true;
3175
- indent.origin = outdent.origin = origin;
3176
- } else {
3177
- indent.explicit = outdent.explicit = true;
3178
- }
3179
- return [indent, outdent];
1519
+ findTagsBackwards(i, tags) {
1520
+ let tagSet = new Set(tags);
1521
+ let backStack = [];
1522
+ while (i >= 0) {
1523
+ let tag = this.tokens[i]?.[0];
1524
+ if (!backStack.length && tagSet.has(tag)) return true;
1525
+ if (EXPRESSION_END.has(tag)) backStack.push(tag);
1526
+ if (EXPRESSION_START.has(tag) && backStack.length) backStack.pop();
1527
+ if (!backStack.length && (EXPRESSION_START.has(tag) && !this.tokens[i]?.generated || LINE_BREAK.has(tag))) break;
1528
+ i--;
3180
1529
  }
1530
+ return false;
1531
+ }
3181
1532
 
3182
- // Look up a tag by token index.
3183
- tag(i) {
3184
- var ref;
3185
- return (ref = this.tokens[i]) != null ? ref[0] : void 0;
1533
+ makeIndentation(origin) {
1534
+ let indent = gen('INDENT', 2);
1535
+ let outdent = gen('OUTDENT', 2);
1536
+ if (origin) {
1537
+ indent.generated = outdent.generated = true;
1538
+ indent.origin = outdent.origin = origin;
1539
+ } else {
1540
+ indent.explicit = outdent.explicit = true;
3186
1541
  }
3187
-
3188
- };
3189
-
3190
- Rewriter.prototype.generate = generate;
3191
-
3192
- return Rewriter;
3193
-
3194
- }).call(this);
3195
-
3196
- // Constants
3197
- // ---------
3198
-
3199
- // List of the token pairs that must be balanced.
3200
- BALANCED_PAIRS = [['(', ')'], ['[', ']'], ['{', '}'], ['INDENT', 'OUTDENT'], ['CALL_START', 'CALL_END'], ['PARAM_START', 'PARAM_END'], ['INDEX_START', 'INDEX_END'], ['STRING_START', 'STRING_END'], ['INTERPOLATION_START', 'INTERPOLATION_END'], ['REGEX_START', 'REGEX_END']];
3201
-
3202
- // The inverse mappings of `BALANCED_PAIRS` we're trying to fix up, so we can
3203
- // look things up from either end.
3204
- INVERSES = {};
3205
-
3206
- // The tokens that signal the start/end of a balanced pair.
3207
- EXPRESSION_START = [];
3208
-
3209
- EXPRESSION_END = [];
3210
-
3211
- for (k = 0, len = BALANCED_PAIRS.length; k < len; k++) {
3212
- [left, right] = BALANCED_PAIRS[k];
3213
- EXPRESSION_START.push(INVERSES[right] = left);
3214
- EXPRESSION_END.push(INVERSES[left] = right);
1542
+ return [indent, outdent];
1543
+ }
3215
1544
  }
3216
1545
 
3217
- // Tokens that indicate the close of a clause of an expression.
3218
- EXPRESSION_CLOSE = ['CATCH', 'THEN', 'ELSE', 'FINALLY'].concat(EXPRESSION_END);
3219
-
3220
- // Tokens that, if followed by an `IMPLICIT_CALL`, indicate a function invocation.
3221
- IMPLICIT_FUNC = ['IDENTIFIER', 'PROPERTY', 'SUPER', ')', 'CALL_END', ']', 'INDEX_END', '@', 'THIS'];
3222
-
3223
- // If preceded by an `IMPLICIT_FUNC`, indicates a function invocation.
3224
- IMPLICIT_CALL = ['IDENTIFIER', 'PROPERTY', 'NUMBER', 'INFINITY', 'NAN', 'STRING', 'STRING_START', 'REGEX', 'REGEX_START', 'JS', 'NEW', 'PARAM_START', 'CLASS', 'IF', 'TRY', 'SWITCH', 'THIS', 'DYNAMIC_IMPORT', 'IMPORT_META', 'NEW_TARGET', 'UNDEFINED', 'NULL', 'BOOL', 'UNARY', 'DO', 'DO_IIFE', 'YIELD', 'AWAIT', 'UNARY_MATH', 'SUPER', 'THROW', '@', '->', '=>', '[', '(', '{', '--', '++'];
3225
-
3226
- IMPLICIT_UNSPACED_CALL = ['+', '-'];
1546
+ // ==========================================================================
1547
+ // Convenience export
1548
+ // ==========================================================================
3227
1549
 
3228
- // Tokens that always mark the end of an implicit call for single-liners.
3229
- // Includes || and && so that `read 'body' or ''` parses as `read('body') || ''`
3230
- IMPLICIT_END = ['POST_IF', 'POST_UNLESS', 'FOR', 'WHILE', 'UNTIL', 'WHEN', 'BY', 'LOOP', 'TERMINATOR', '||', '&&'];
3231
-
3232
- // Literals that trigger comma insertion before arrows: get '/path' -> ... becomes get('/path', -> ...)
3233
- IMPLICIT_COMMA_BEFORE_ARROW = ['STRING', 'STRING_END', 'REGEX', 'REGEX_END', 'NUMBER', 'BOOL', 'NULL', 'UNDEFINED', 'INFINITY', 'NAN', ']', '}'];
3234
-
3235
- // Single-line flavors of block expressions that have unclosed endings.
3236
- // The grammar can't disambiguate them, so we insert the implicit indentation.
3237
- SINGLE_LINERS = ['ELSE', '->', '=>', 'TRY', 'FINALLY', 'THEN'];
3238
-
3239
- SINGLE_CLOSERS = ['TERMINATOR', 'CATCH', 'FINALLY', 'ELSE', 'OUTDENT', 'LEADING_WHEN'];
3240
-
3241
- // Tokens that end a line.
3242
- LINEBREAKS = ['TERMINATOR', 'INDENT', 'OUTDENT'];
3243
-
3244
- // Tokens that close open calls when they follow a newline.
3245
- CALL_CLOSERS = ['.', '?.', '::', '?::'];
3246
-
3247
- // Tokens that prevent a subsequent indent from ending implicit calls/objects
3248
- CONTROL_IN_IMPLICIT = ['IF', 'TRY', 'FINALLY', 'CATCH', 'CLASS', 'SWITCH'];
3249
-
3250
- // Tokens that are swallowed up by the parser, never leading to code generation.
3251
- // You can spot these in `grammar.rip` because the `o` function second
3252
- // argument doesn't contain a `new` call for these tokens.
3253
- // `STRING_START` isn't on this list because its `locationData` matches that of
3254
- // the node that becomes `StringWithInterpolations`, and therefore
3255
- // `addDataToNode` attaches `STRING_START`'s tokens to that node.
3256
- DISCARDED = ['(', ')', '[', ']', '{', '}', ':', '.', '..', '...', ',', '=', '++', '--', '?', 'AS', 'AWAIT', 'CALL_START', 'CALL_END', 'DEFAULT', 'DO', 'DO_IIFE', 'ELSE', 'EXTENDS', 'EXPORT', 'FORIN', 'FOROF', 'FORFROM', 'IMPORT', 'INDENT', 'INDEX_SOAK', 'INTERPOLATION_START', 'INTERPOLATION_END', 'LEADING_WHEN', 'OUTDENT', 'PARAM_END', 'REGEX_START', 'REGEX_END', 'RETURN', 'STRING_END', 'THROW', 'UNARY', 'YIELD'].concat(IMPLICIT_UNSPACED_CALL.concat(IMPLICIT_END.concat(CALL_CLOSERS.concat(CONTROL_IN_IMPLICIT))));
3257
-
3258
- // Tokens that, when appearing at the end of a line, suppress a following TERMINATOR/INDENT token
3259
- UNFINISHED = ['\\', '.', '?.', '?::', 'UNARY', 'DO', 'DO_IIFE', 'MATH', 'UNARY_MATH', '+', '-', '**', 'SHIFT', 'RELATION', 'COMPARE', '&', '^', '|', '&&', '||', 'SPACE?', 'EXTENDS'];
1550
+ export function tokenize(code, opts) {
1551
+ return new Lexer().tokenize(code, opts);
1552
+ }