tova 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +112 -0
- package/bin/tova.js +1530 -0
- package/package.json +38 -0
- package/src/analyzer/analyzer.js +2053 -0
- package/src/analyzer/scope.js +60 -0
- package/src/codegen/base-codegen.js +1351 -0
- package/src/codegen/client-codegen.js +876 -0
- package/src/codegen/codegen.js +148 -0
- package/src/codegen/server-codegen.js +2506 -0
- package/src/codegen/shared-codegen.js +29 -0
- package/src/diagnostics/formatter.js +139 -0
- package/src/formatter/formatter.js +559 -0
- package/src/index.js +6 -0
- package/src/lexer/lexer.js +886 -0
- package/src/lexer/tokens.js +214 -0
- package/src/lsp/server.js +738 -0
- package/src/parser/ast.js +1135 -0
- package/src/parser/parser.js +2803 -0
- package/src/runtime/db.js +106 -0
- package/src/runtime/embedded.js +8 -0
- package/src/runtime/reactivity.js +1366 -0
- package/src/runtime/router.js +200 -0
- package/src/runtime/rpc.js +46 -0
- package/src/runtime/ssr.js +134 -0
- package/src/runtime/string-proto.js +27 -0
- package/src/stdlib/collections.js +90 -0
- package/src/stdlib/core.js +98 -0
- package/src/stdlib/inline.js +172 -0
- package/src/stdlib/string.js +100 -0
|
@@ -0,0 +1,886 @@
|
|
|
1
|
+
import { TokenType, Keywords, Token } from './tokens.js';
|
|
2
|
+
|
|
3
|
+
export class Lexer {
|
|
4
|
+
static MAX_INTERPOLATION_DEPTH = 64;
|
|
5
|
+
|
|
6
|
+
constructor(source, filename = '<stdin>', lineOffset = 0, columnOffset = 0, _depth = 0) {
|
|
7
|
+
this.source = source;
|
|
8
|
+
this.filename = filename;
|
|
9
|
+
this.tokens = [];
|
|
10
|
+
this.pos = 0;
|
|
11
|
+
this.line = 1 + lineOffset;
|
|
12
|
+
this.column = 1 + columnOffset;
|
|
13
|
+
this.length = source.length;
|
|
14
|
+
this._depth = _depth;
|
|
15
|
+
|
|
16
|
+
// JSX context tracking for unquoted text support
|
|
17
|
+
this._jsxStack = []; // stack of 'tag' or 'cfblock' entries
|
|
18
|
+
this._jsxTagOpening = false; // true when < starts a JSX opening tag
|
|
19
|
+
this._jsxSelfClosing = false; // true when / seen inside JSX tag (before >)
|
|
20
|
+
this._jsxClosingTag = false; // true when </ detected
|
|
21
|
+
this._jsxExprDepth = 0; // brace depth for {expr} inside JSX
|
|
22
|
+
this._jsxControlFlowPending = false; // true after if/for/elif/else keyword in JSX
|
|
23
|
+
this._cfParenDepth = 0; // () and [] nesting in control flow condition
|
|
24
|
+
this._cfBraceDepth = 0; // {} nesting for expression braces (key={...})
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
error(message) {
|
|
28
|
+
throw new Error(`${this.filename}:${this.line}:${this.column} — ${message}`);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
peek(offset = 0) {
|
|
32
|
+
const idx = this.pos + offset;
|
|
33
|
+
return idx < this.length ? this.source[idx] : '\0';
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
advance() {
|
|
37
|
+
const ch = this.source[this.pos];
|
|
38
|
+
this.pos++;
|
|
39
|
+
if (ch === '\n') {
|
|
40
|
+
this.line++;
|
|
41
|
+
this.column = 1;
|
|
42
|
+
} else {
|
|
43
|
+
this.column++;
|
|
44
|
+
}
|
|
45
|
+
return ch;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
match(expected) {
|
|
49
|
+
if (this.pos < this.length && this.source[this.pos] === expected) {
|
|
50
|
+
this.advance();
|
|
51
|
+
return true;
|
|
52
|
+
}
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
addToken(type, value) {
|
|
57
|
+
this.tokens.push(new Token(type, value, this.line, this.column));
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
isDigit(ch) {
|
|
61
|
+
return ch >= '0' && ch <= '9';
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
isAlpha(ch) {
|
|
65
|
+
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch === '_') return true;
|
|
66
|
+
// Unicode letter support
|
|
67
|
+
if (ch > '\x7f') return /\p{Letter}/u.test(ch);
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
isAlphaNumeric(ch) {
|
|
72
|
+
if (this.isAlpha(ch) || this.isDigit(ch)) return true;
|
|
73
|
+
// Unicode continue characters (combining marks, etc.)
|
|
74
|
+
if (ch > '\x7f') return /[\p{Letter}\p{Number}\p{Mark}]/u.test(ch);
|
|
75
|
+
return false;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
isWhitespace(ch) {
|
|
79
|
+
return ch === ' ' || ch === '\t' || ch === '\r';
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
_isJSXStart() {
|
|
83
|
+
const nextCh = this.peek();
|
|
84
|
+
if (!this.isAlpha(nextCh)) return false;
|
|
85
|
+
// Check the token BEFORE < (LESS was already pushed, so it's at length-2)
|
|
86
|
+
const prev = this.tokens.length > 1 ? this.tokens[this.tokens.length - 2] : null;
|
|
87
|
+
if (!prev) return true;
|
|
88
|
+
const valueTypes = [TokenType.IDENTIFIER, TokenType.NUMBER, TokenType.STRING,
|
|
89
|
+
TokenType.STRING_TEMPLATE, TokenType.RPAREN, TokenType.RBRACKET,
|
|
90
|
+
TokenType.TRUE, TokenType.FALSE, TokenType.NIL];
|
|
91
|
+
return !valueTypes.includes(prev.type);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
tokenize() {
|
|
95
|
+
while (this.pos < this.length) {
|
|
96
|
+
this.scanToken();
|
|
97
|
+
}
|
|
98
|
+
this.tokens.push(new Token(TokenType.EOF, null, this.line, this.column));
|
|
99
|
+
return this.tokens;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
scanToken() {
|
|
103
|
+
// In JSX children mode, scan raw text instead of normal tokens
|
|
104
|
+
if (this._jsxStack.length > 0 && this._jsxExprDepth === 0 &&
|
|
105
|
+
!this._jsxTagOpening && !this._jsxClosingTag &&
|
|
106
|
+
!this._jsxControlFlowPending) {
|
|
107
|
+
return this._scanInJSXChildren();
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const ch = this.peek();
|
|
111
|
+
|
|
112
|
+
// Skip whitespace (not newlines)
|
|
113
|
+
if (this.isWhitespace(ch)) {
|
|
114
|
+
this.advance();
|
|
115
|
+
return;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Newlines
|
|
119
|
+
if (ch === '\n') {
|
|
120
|
+
this.tokens.push(new Token(TokenType.NEWLINE, '\n', this.line, this.column));
|
|
121
|
+
this.advance();
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Comments
|
|
126
|
+
if (ch === '/' && this.peek(1) === '/') {
|
|
127
|
+
this.scanComment();
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
if (ch === '/' && this.peek(1) === '*') {
|
|
131
|
+
this.scanBlockComment();
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Regex literals: /pattern/flags
|
|
136
|
+
// Must not be /=, //, /*, and must be in a context where regex makes sense
|
|
137
|
+
if (ch === '/' && this.peek(1) !== '/' && this.peek(1) !== '*' && this.peek(1) !== '='
|
|
138
|
+
&& this._jsxStack.length === 0) {
|
|
139
|
+
let prev = null;
|
|
140
|
+
for (let i = this.tokens.length - 1; i >= 0; i--) {
|
|
141
|
+
if (this.tokens[i].type !== TokenType.NEWLINE) {
|
|
142
|
+
prev = this.tokens[i];
|
|
143
|
+
break;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
// Only treat as regex after tokens that clearly start an expression context
|
|
147
|
+
const regexPreceders = [
|
|
148
|
+
TokenType.ASSIGN, TokenType.LPAREN, TokenType.LBRACKET, TokenType.LBRACE,
|
|
149
|
+
TokenType.COMMA, TokenType.COLON, TokenType.SEMICOLON,
|
|
150
|
+
TokenType.RETURN, TokenType.ARROW, TokenType.PIPE,
|
|
151
|
+
TokenType.EQUAL, TokenType.NOT_EQUAL,
|
|
152
|
+
TokenType.AND, TokenType.OR, TokenType.AND_AND, TokenType.OR_OR,
|
|
153
|
+
TokenType.NOT, TokenType.BANG,
|
|
154
|
+
TokenType.PLUS_ASSIGN, TokenType.MINUS_ASSIGN, TokenType.STAR_ASSIGN, TokenType.SLASH_ASSIGN,
|
|
155
|
+
];
|
|
156
|
+
if (prev && regexPreceders.includes(prev.type)) {
|
|
157
|
+
this.scanRegex();
|
|
158
|
+
return;
|
|
159
|
+
}
|
|
160
|
+
// At start of file (no prev token), treat / as regex if followed by a non-space, non-special char
|
|
161
|
+
if (!prev && this.pos + 1 < this.length && !/[\s\/*=]/.test(this.peek(1))) {
|
|
162
|
+
this.scanRegex();
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Numbers
|
|
168
|
+
if (this.isDigit(ch)) {
|
|
169
|
+
this.scanNumber();
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Strings
|
|
174
|
+
if (ch === '"') {
|
|
175
|
+
this.scanString();
|
|
176
|
+
return;
|
|
177
|
+
}
|
|
178
|
+
if (ch === "'") {
|
|
179
|
+
this.scanSimpleString();
|
|
180
|
+
return;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Identifiers and keywords
|
|
184
|
+
if (this.isAlpha(ch)) {
|
|
185
|
+
this.scanIdentifier();
|
|
186
|
+
return;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// Operators and delimiters
|
|
190
|
+
this.scanOperator();
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
_scanInJSXChildren() {
|
|
194
|
+
const ch = this.peek();
|
|
195
|
+
|
|
196
|
+
// Close control flow block: } when top of stack is 'cfblock'
|
|
197
|
+
if (ch === '}' && this._jsxStack.length > 0 && this._jsxStack[this._jsxStack.length - 1] === 'cfblock') {
|
|
198
|
+
this._jsxStack.pop();
|
|
199
|
+
this.scanOperator(); // emits RBRACE
|
|
200
|
+
return;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Skip whitespace/newlines silently when followed by structural chars
|
|
204
|
+
if (this.isWhitespace(ch) || ch === '\n') {
|
|
205
|
+
let pp = this.pos;
|
|
206
|
+
while (pp < this.length && (this.source[pp] === ' ' || this.source[pp] === '\t' || this.source[pp] === '\r' || this.source[pp] === '\n')) {
|
|
207
|
+
pp++;
|
|
208
|
+
}
|
|
209
|
+
const nextNonWs = pp < this.length ? this.source[pp] : '\0';
|
|
210
|
+
// Skip whitespace if next meaningful char is structural
|
|
211
|
+
if (nextNonWs === '<' || nextNonWs === '{' || nextNonWs === '}' || nextNonWs === '"' || nextNonWs === "'" || pp >= this.length) {
|
|
212
|
+
while (this.pos < pp) this.advance();
|
|
213
|
+
return;
|
|
214
|
+
}
|
|
215
|
+
// Check if next non-ws starts a keyword (if/for/elif/else)
|
|
216
|
+
if (this.isAlpha(nextNonWs)) {
|
|
217
|
+
let word = '', wp = pp;
|
|
218
|
+
while (wp < this.length && this.isAlphaNumeric(this.source[wp])) {
|
|
219
|
+
word += this.source[wp]; wp++;
|
|
220
|
+
}
|
|
221
|
+
if (['if', 'for', 'elif', 'else'].includes(word)) {
|
|
222
|
+
while (this.pos < pp) this.advance();
|
|
223
|
+
return;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
// Otherwise, fall through to collect as JSX text
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
if (ch === '{') {
|
|
230
|
+
this.scanOperator();
|
|
231
|
+
this._jsxExprDepth = 1;
|
|
232
|
+
return;
|
|
233
|
+
}
|
|
234
|
+
if (ch === '<') {
|
|
235
|
+
// In JSX children, set flags directly (heuristic may fail after STRING tokens)
|
|
236
|
+
const nextCh = this.peek(1);
|
|
237
|
+
if (nextCh === '/') {
|
|
238
|
+
this._jsxClosingTag = true;
|
|
239
|
+
} else if (this.isAlpha(nextCh)) {
|
|
240
|
+
this._jsxTagOpening = true;
|
|
241
|
+
}
|
|
242
|
+
this.scanOperator();
|
|
243
|
+
return;
|
|
244
|
+
}
|
|
245
|
+
if (ch === '"') { this.scanString(); return; }
|
|
246
|
+
if (ch === "'") { this.scanSimpleString(); return; }
|
|
247
|
+
|
|
248
|
+
// Check for JSX control flow keywords: if, for, elif, else
|
|
249
|
+
if (this.isAlpha(ch)) {
|
|
250
|
+
let word = '', peekPos = this.pos;
|
|
251
|
+
while (peekPos < this.length && this.isAlphaNumeric(this.source[peekPos])) {
|
|
252
|
+
word += this.source[peekPos]; peekPos++;
|
|
253
|
+
}
|
|
254
|
+
if (['if', 'for', 'elif', 'else'].includes(word)) {
|
|
255
|
+
this.scanIdentifier();
|
|
256
|
+
// After keyword, enter control flow pending mode for normal scanning
|
|
257
|
+
this._jsxControlFlowPending = true;
|
|
258
|
+
this._cfParenDepth = 0;
|
|
259
|
+
this._cfBraceDepth = 0;
|
|
260
|
+
return;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Everything else: scan as raw JSX text
|
|
265
|
+
this._scanJSXText();
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
_scanJSXText() {
|
|
269
|
+
const startLine = this.line, startCol = this.column;
|
|
270
|
+
let text = '';
|
|
271
|
+
while (this.pos < this.length) {
|
|
272
|
+
const ch = this.peek();
|
|
273
|
+
if (ch === '<' || ch === '{' || ch === '"' || ch === "'") break;
|
|
274
|
+
// Stop at keywords if, for, elif, else preceded by whitespace
|
|
275
|
+
if (this.isAlpha(ch) && text.length > 0 && /\s$/.test(text)) {
|
|
276
|
+
let word = '', pp = this.pos;
|
|
277
|
+
while (pp < this.length && this.isAlphaNumeric(this.source[pp])) {
|
|
278
|
+
word += this.source[pp]; pp++;
|
|
279
|
+
}
|
|
280
|
+
if (['if', 'for', 'elif', 'else'].includes(word)) break;
|
|
281
|
+
}
|
|
282
|
+
text += this.advance();
|
|
283
|
+
}
|
|
284
|
+
if (text.length > 0) {
|
|
285
|
+
this.tokens.push(new Token(TokenType.JSX_TEXT, text, startLine, startCol));
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
scanComment() {
|
|
290
|
+
const startLine = this.line;
|
|
291
|
+
const startCol = this.column;
|
|
292
|
+
this.advance(); // /
|
|
293
|
+
this.advance(); // /
|
|
294
|
+
|
|
295
|
+
// Check for docstring ///
|
|
296
|
+
const isDocstring = this.peek() === '/';
|
|
297
|
+
if (isDocstring) this.advance();
|
|
298
|
+
|
|
299
|
+
let value = '';
|
|
300
|
+
while (this.pos < this.length && this.peek() !== '\n') {
|
|
301
|
+
value += this.advance();
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
if (isDocstring) {
|
|
305
|
+
this.tokens.push(new Token(TokenType.DOCSTRING, value.trim(), startLine, startCol));
|
|
306
|
+
}
|
|
307
|
+
// Regular comments are discarded
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
scanBlockComment() {
|
|
311
|
+
this.advance(); // /
|
|
312
|
+
this.advance(); // *
|
|
313
|
+
let depth = 1;
|
|
314
|
+
|
|
315
|
+
while (this.pos < this.length && depth > 0) {
|
|
316
|
+
if (this.peek() === '/' && this.peek(1) === '*') {
|
|
317
|
+
depth++;
|
|
318
|
+
this.advance();
|
|
319
|
+
this.advance();
|
|
320
|
+
} else if (this.peek() === '*' && this.peek(1) === '/') {
|
|
321
|
+
depth--;
|
|
322
|
+
this.advance();
|
|
323
|
+
this.advance();
|
|
324
|
+
} else {
|
|
325
|
+
this.advance();
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
if (depth !== 0) {
|
|
330
|
+
this.error('Unterminated block comment');
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
scanNumber() {
|
|
335
|
+
const startLine = this.line;
|
|
336
|
+
const startCol = this.column;
|
|
337
|
+
let value = '';
|
|
338
|
+
|
|
339
|
+
// Handle hex, octal, binary
|
|
340
|
+
if (this.peek() === '0') {
|
|
341
|
+
const next = this.peek(1);
|
|
342
|
+
if (next === 'x' || next === 'X') {
|
|
343
|
+
this.advance(); // 0
|
|
344
|
+
this.advance(); // x
|
|
345
|
+
while (this.pos < this.length && /[0-9a-fA-F_]/.test(this.peek())) {
|
|
346
|
+
const ch = this.advance();
|
|
347
|
+
if (ch !== '_') value += ch;
|
|
348
|
+
}
|
|
349
|
+
if (!value) this.error('Expected hex digits after 0x');
|
|
350
|
+
this.tokens.push(new Token(TokenType.NUMBER, parseInt(value, 16), startLine, startCol));
|
|
351
|
+
return;
|
|
352
|
+
}
|
|
353
|
+
if (next === 'b' || next === 'B') {
|
|
354
|
+
this.advance(); // 0
|
|
355
|
+
this.advance(); // b
|
|
356
|
+
while (this.pos < this.length && /[01_]/.test(this.peek())) {
|
|
357
|
+
const ch = this.advance();
|
|
358
|
+
if (ch !== '_') value += ch;
|
|
359
|
+
}
|
|
360
|
+
if (!value) this.error('Expected binary digits after 0b');
|
|
361
|
+
this.tokens.push(new Token(TokenType.NUMBER, parseInt(value, 2), startLine, startCol));
|
|
362
|
+
return;
|
|
363
|
+
}
|
|
364
|
+
if (next === 'o' || next === 'O') {
|
|
365
|
+
this.advance(); // 0
|
|
366
|
+
this.advance(); // o
|
|
367
|
+
while (this.pos < this.length && /[0-7_]/.test(this.peek())) {
|
|
368
|
+
const ch = this.advance();
|
|
369
|
+
if (ch !== '_') value += ch;
|
|
370
|
+
}
|
|
371
|
+
if (!value) this.error('Expected octal digits after 0o');
|
|
372
|
+
this.tokens.push(new Token(TokenType.NUMBER, parseInt(value, 8), startLine, startCol));
|
|
373
|
+
return;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// Decimal
|
|
378
|
+
while (this.pos < this.length && (this.isDigit(this.peek()) || this.peek() === '_')) {
|
|
379
|
+
const ch = this.advance();
|
|
380
|
+
if (ch !== '_') value += ch;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// Decimal point
|
|
384
|
+
if (this.peek() === '.' && this.peek(1) !== '.') {
|
|
385
|
+
value += this.advance(); // .
|
|
386
|
+
while (this.pos < this.length && (this.isDigit(this.peek()) || this.peek() === '_')) {
|
|
387
|
+
const ch = this.advance();
|
|
388
|
+
if (ch !== '_') value += ch;
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Exponent
|
|
393
|
+
if (this.peek() === 'e' || this.peek() === 'E') {
|
|
394
|
+
const savedPos = this.pos;
|
|
395
|
+
const savedCol = this.column;
|
|
396
|
+
let expPart = this.advance(); // consume 'e'/'E'
|
|
397
|
+
if (this.peek() === '+' || this.peek() === '-') {
|
|
398
|
+
expPart += this.advance();
|
|
399
|
+
}
|
|
400
|
+
if (this.pos < this.length && this.isDigit(this.peek())) {
|
|
401
|
+
value += expPart;
|
|
402
|
+
while (this.pos < this.length && this.isDigit(this.peek())) {
|
|
403
|
+
value += this.advance();
|
|
404
|
+
}
|
|
405
|
+
} else {
|
|
406
|
+
// No digits after exponent — backtrack, treat 'e' as separate token
|
|
407
|
+
this.pos = savedPos;
|
|
408
|
+
this.column = savedCol;
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
this.tokens.push(new Token(TokenType.NUMBER, parseFloat(value), startLine, startCol));
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
scanString() {
|
|
416
|
+
const startLine = this.line;
|
|
417
|
+
const startCol = this.column;
|
|
418
|
+
this.advance(); // opening "
|
|
419
|
+
|
|
420
|
+
const parts = [];
|
|
421
|
+
let current = '';
|
|
422
|
+
|
|
423
|
+
while (this.pos < this.length && this.peek() !== '"') {
|
|
424
|
+
// Escape sequences
|
|
425
|
+
if (this.peek() === '\\') {
|
|
426
|
+
this.advance();
|
|
427
|
+
if (this.pos >= this.length) {
|
|
428
|
+
this.error('Unterminated string');
|
|
429
|
+
}
|
|
430
|
+
const esc = this.advance();
|
|
431
|
+
switch (esc) {
|
|
432
|
+
case 'n': current += '\n'; break;
|
|
433
|
+
case 't': current += '\t'; break;
|
|
434
|
+
case 'r': current += '\r'; break;
|
|
435
|
+
case '\\': current += '\\'; break;
|
|
436
|
+
case '"': current += '"'; break;
|
|
437
|
+
case '{': current += '{'; break;
|
|
438
|
+
default: current += '\\' + esc;
|
|
439
|
+
}
|
|
440
|
+
continue;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// String interpolation: {expr}
|
|
444
|
+
if (this.peek() === '{') {
|
|
445
|
+
this.advance(); // {
|
|
446
|
+
if (current.length > 0) {
|
|
447
|
+
parts.push({ type: 'text', value: current });
|
|
448
|
+
current = '';
|
|
449
|
+
}
|
|
450
|
+
|
|
451
|
+
// Lex the interpolation expression, respecting nested strings
|
|
452
|
+
const exprStartLine = this.line - 1; // 0-based offset for sub-lexer
|
|
453
|
+
const exprStartCol = this.column - 1;
|
|
454
|
+
let depth = 1;
|
|
455
|
+
let exprSource = '';
|
|
456
|
+
while (this.pos < this.length && depth > 0) {
|
|
457
|
+
const ch = this.peek();
|
|
458
|
+
// Skip over string literals so braces inside them don't affect depth
|
|
459
|
+
if (ch === '"' || ch === "'" || ch === '`') {
|
|
460
|
+
const quote = ch;
|
|
461
|
+
exprSource += this.advance(); // opening quote
|
|
462
|
+
let strDepth = 0; // track interpolation depth inside nested strings
|
|
463
|
+
while (this.pos < this.length) {
|
|
464
|
+
if (this.peek() === '\\') {
|
|
465
|
+
exprSource += this.advance(); // backslash
|
|
466
|
+
if (this.pos < this.length) exprSource += this.advance(); // escaped char
|
|
467
|
+
} else if (quote === '"' && this.peek() === '{') {
|
|
468
|
+
strDepth++;
|
|
469
|
+
exprSource += this.advance();
|
|
470
|
+
} else if (quote === '"' && this.peek() === '}' && strDepth > 0) {
|
|
471
|
+
strDepth--;
|
|
472
|
+
exprSource += this.advance();
|
|
473
|
+
} else if (this.peek() === quote && strDepth === 0) {
|
|
474
|
+
break;
|
|
475
|
+
} else {
|
|
476
|
+
exprSource += this.advance();
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
if (this.pos < this.length) exprSource += this.advance(); // closing quote
|
|
480
|
+
continue;
|
|
481
|
+
}
|
|
482
|
+
if (ch === '{') depth++;
|
|
483
|
+
if (ch === '}') {
|
|
484
|
+
depth--;
|
|
485
|
+
if (depth === 0) break;
|
|
486
|
+
}
|
|
487
|
+
exprSource += this.advance();
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
if (this.peek() !== '}') {
|
|
491
|
+
this.error('Unterminated string interpolation');
|
|
492
|
+
}
|
|
493
|
+
this.advance(); // }
|
|
494
|
+
|
|
495
|
+
// Sub-lex the expression with correct file position offsets
|
|
496
|
+
if (this._depth + 1 > Lexer.MAX_INTERPOLATION_DEPTH) {
|
|
497
|
+
this.error('String interpolation nested too deeply (max ' + Lexer.MAX_INTERPOLATION_DEPTH + ' levels)');
|
|
498
|
+
}
|
|
499
|
+
const subLexer = new Lexer(exprSource, this.filename, exprStartLine, exprStartCol, this._depth + 1);
|
|
500
|
+
const exprTokens = subLexer.tokenize();
|
|
501
|
+
// Remove the EOF token
|
|
502
|
+
exprTokens.pop();
|
|
503
|
+
|
|
504
|
+
parts.push({ type: 'expr', tokens: exprTokens, source: exprSource });
|
|
505
|
+
continue;
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
current += this.advance();
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
if (this.pos >= this.length) {
|
|
512
|
+
this.error('Unterminated string');
|
|
513
|
+
}
|
|
514
|
+
this.advance(); // closing "
|
|
515
|
+
|
|
516
|
+
// If there are no interpolation parts, emit a simple string
|
|
517
|
+
if (parts.length === 0) {
|
|
518
|
+
this.tokens.push(new Token(TokenType.STRING, current, startLine, startCol));
|
|
519
|
+
} else {
|
|
520
|
+
if (current.length > 0) {
|
|
521
|
+
parts.push({ type: 'text', value: current });
|
|
522
|
+
}
|
|
523
|
+
this.tokens.push(new Token(TokenType.STRING_TEMPLATE, parts, startLine, startCol));
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
scanSimpleString() {
|
|
528
|
+
const startLine = this.line;
|
|
529
|
+
const startCol = this.column;
|
|
530
|
+
this.advance(); // opening '
|
|
531
|
+
|
|
532
|
+
let value = '';
|
|
533
|
+
while (this.pos < this.length && this.peek() !== "'") {
|
|
534
|
+
if (this.peek() === '\\') {
|
|
535
|
+
this.advance();
|
|
536
|
+
if (this.pos >= this.length) {
|
|
537
|
+
this.error('Unterminated string');
|
|
538
|
+
}
|
|
539
|
+
const esc = this.advance();
|
|
540
|
+
switch (esc) {
|
|
541
|
+
case 'n': value += '\n'; break;
|
|
542
|
+
case 't': value += '\t'; break;
|
|
543
|
+
case 'r': value += '\r'; break;
|
|
544
|
+
case '\\': value += '\\'; break;
|
|
545
|
+
case "'": value += "'"; break;
|
|
546
|
+
default: value += '\\' + esc;
|
|
547
|
+
}
|
|
548
|
+
} else {
|
|
549
|
+
value += this.advance();
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
if (this.pos >= this.length) {
|
|
554
|
+
this.error('Unterminated string');
|
|
555
|
+
}
|
|
556
|
+
this.advance(); // closing '
|
|
557
|
+
|
|
558
|
+
this.tokens.push(new Token(TokenType.STRING, value, startLine, startCol));
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
scanRegex() {
|
|
562
|
+
const startLine = this.line;
|
|
563
|
+
const startCol = this.column;
|
|
564
|
+
this.advance(); // opening /
|
|
565
|
+
|
|
566
|
+
let pattern = '';
|
|
567
|
+
let escaped = false;
|
|
568
|
+
let inCharClass = false;
|
|
569
|
+
|
|
570
|
+
while (this.pos < this.length) {
|
|
571
|
+
const ch = this.peek();
|
|
572
|
+
if (ch === '\n') {
|
|
573
|
+
this.error('Unterminated regex literal');
|
|
574
|
+
}
|
|
575
|
+
if (escaped) {
|
|
576
|
+
pattern += ch;
|
|
577
|
+
this.advance();
|
|
578
|
+
escaped = false;
|
|
579
|
+
continue;
|
|
580
|
+
}
|
|
581
|
+
if (ch === '\\') {
|
|
582
|
+
pattern += ch;
|
|
583
|
+
this.advance();
|
|
584
|
+
escaped = true;
|
|
585
|
+
continue;
|
|
586
|
+
}
|
|
587
|
+
if (ch === '[') inCharClass = true;
|
|
588
|
+
if (ch === ']') inCharClass = false;
|
|
589
|
+
if (ch === '/' && !inCharClass) break;
|
|
590
|
+
pattern += this.advance();
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
if (this.pos >= this.length || this.peek() !== '/') {
|
|
594
|
+
this.error('Unterminated regex literal');
|
|
595
|
+
}
|
|
596
|
+
this.advance(); // closing /
|
|
597
|
+
|
|
598
|
+
// Read flags
|
|
599
|
+
let flags = '';
|
|
600
|
+
while (this.pos < this.length && /[gimsuydv]/.test(this.peek())) {
|
|
601
|
+
flags += this.advance();
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
this.tokens.push(new Token(TokenType.REGEX, { pattern, flags }, startLine, startCol));
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
scanIdentifier() {
|
|
608
|
+
const startLine = this.line;
|
|
609
|
+
const startCol = this.column;
|
|
610
|
+
let value = '';
|
|
611
|
+
|
|
612
|
+
while (this.pos < this.length && this.isAlphaNumeric(this.peek())) {
|
|
613
|
+
value += this.advance();
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
// Raw string: r"no\escapes"
|
|
617
|
+
if (value === 'r' && this.pos < this.length && this.peek() === '"') {
|
|
618
|
+
this.advance(); // opening "
|
|
619
|
+
let raw = '';
|
|
620
|
+
while (this.pos < this.length && this.peek() !== '"') {
|
|
621
|
+
raw += this.advance();
|
|
622
|
+
}
|
|
623
|
+
if (this.pos >= this.length) {
|
|
624
|
+
this.error('Unterminated raw string');
|
|
625
|
+
}
|
|
626
|
+
this.advance(); // closing "
|
|
627
|
+
this.tokens.push(new Token(TokenType.STRING, raw, startLine, startCol));
|
|
628
|
+
return;
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// Special case: "style {" → read raw CSS block
|
|
632
|
+
if (value === 'style') {
|
|
633
|
+
const savedPos = this.pos;
|
|
634
|
+
const savedLine = this.line;
|
|
635
|
+
const savedCol = this.column;
|
|
636
|
+
// Skip whitespace (including newlines) to check for {
|
|
637
|
+
while (this.pos < this.length && (this.isWhitespace(this.peek()) || this.peek() === '\n')) {
|
|
638
|
+
this.advance();
|
|
639
|
+
}
|
|
640
|
+
if (this.peek() === '{') {
|
|
641
|
+
this.advance(); // skip {
|
|
642
|
+
let depth = 1;
|
|
643
|
+
let css = '';
|
|
644
|
+
while (depth > 0 && this.pos < this.length) {
|
|
645
|
+
const ch = this.peek();
|
|
646
|
+
if (ch === '{') depth++;
|
|
647
|
+
else if (ch === '}') {
|
|
648
|
+
depth--;
|
|
649
|
+
if (depth === 0) { this.advance(); break; }
|
|
650
|
+
}
|
|
651
|
+
css += this.advance();
|
|
652
|
+
}
|
|
653
|
+
if (depth > 0) {
|
|
654
|
+
this.error('Unterminated style block');
|
|
655
|
+
}
|
|
656
|
+
this.tokens.push(new Token(TokenType.STYLE_BLOCK, css.trim(), startLine, startCol));
|
|
657
|
+
return;
|
|
658
|
+
}
|
|
659
|
+
// Not a style block — restore position
|
|
660
|
+
this.pos = savedPos;
|
|
661
|
+
this.line = savedLine;
|
|
662
|
+
this.column = savedCol;
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
// Check if it's a keyword
|
|
666
|
+
const type = Keywords[value] || TokenType.IDENTIFIER;
|
|
667
|
+
this.tokens.push(new Token(type, value, startLine, startCol));
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
scanOperator() {
|
|
671
|
+
const startLine = this.line;
|
|
672
|
+
const startCol = this.column;
|
|
673
|
+
const ch = this.advance();
|
|
674
|
+
|
|
675
|
+
switch (ch) {
|
|
676
|
+
case '(':
|
|
677
|
+
this.tokens.push(new Token(TokenType.LPAREN, '(', startLine, startCol));
|
|
678
|
+
if (this._jsxControlFlowPending) this._cfParenDepth++;
|
|
679
|
+
break;
|
|
680
|
+
case ')':
|
|
681
|
+
this.tokens.push(new Token(TokenType.RPAREN, ')', startLine, startCol));
|
|
682
|
+
if (this._jsxControlFlowPending && this._cfParenDepth > 0) this._cfParenDepth--;
|
|
683
|
+
break;
|
|
684
|
+
case '{':
|
|
685
|
+
this.tokens.push(new Token(TokenType.LBRACE, '{', startLine, startCol));
|
|
686
|
+
if (this._jsxControlFlowPending) {
|
|
687
|
+
if (this._cfBraceDepth > 0) {
|
|
688
|
+
// Nested brace inside expression (e.g., key={obj.field})
|
|
689
|
+
this._cfBraceDepth++;
|
|
690
|
+
} else if (this._cfParenDepth > 0) {
|
|
691
|
+
// Inside parens, this is an expression brace
|
|
692
|
+
this._cfBraceDepth++;
|
|
693
|
+
} else {
|
|
694
|
+
// Check if prev token is ASSIGN (key={...}) or FOR (destructuring: for {a,b} in ...)
|
|
695
|
+
const prev = this.tokens.length > 1 ? this.tokens[this.tokens.length - 2] : null;
|
|
696
|
+
if (prev && (prev.type === TokenType.ASSIGN || prev.type === TokenType.FOR)) {
|
|
697
|
+
this._cfBraceDepth++;
|
|
698
|
+
} else {
|
|
699
|
+
// This is the block opener for the control flow body
|
|
700
|
+
this._jsxControlFlowPending = false;
|
|
701
|
+
this._jsxStack.push('cfblock');
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
} else if (this._jsxExprDepth > 0) {
|
|
705
|
+
this._jsxExprDepth++;
|
|
706
|
+
}
|
|
707
|
+
break;
|
|
708
|
+
case '}':
|
|
709
|
+
this.tokens.push(new Token(TokenType.RBRACE, '}', startLine, startCol));
|
|
710
|
+
if (this._jsxControlFlowPending && this._cfBraceDepth > 0) {
|
|
711
|
+
this._cfBraceDepth--;
|
|
712
|
+
} else if (this._jsxExprDepth > 0) {
|
|
713
|
+
this._jsxExprDepth--;
|
|
714
|
+
}
|
|
715
|
+
break;
|
|
716
|
+
case '[':
|
|
717
|
+
this.tokens.push(new Token(TokenType.LBRACKET, '[', startLine, startCol));
|
|
718
|
+
if (this._jsxControlFlowPending) this._cfParenDepth++;
|
|
719
|
+
break;
|
|
720
|
+
case ']':
|
|
721
|
+
this.tokens.push(new Token(TokenType.RBRACKET, ']', startLine, startCol));
|
|
722
|
+
if (this._jsxControlFlowPending && this._cfParenDepth > 0) this._cfParenDepth--;
|
|
723
|
+
break;
|
|
724
|
+
case ',':
|
|
725
|
+
this.tokens.push(new Token(TokenType.COMMA, ',', startLine, startCol));
|
|
726
|
+
break;
|
|
727
|
+
case ';':
|
|
728
|
+
this.tokens.push(new Token(TokenType.SEMICOLON, ';', startLine, startCol));
|
|
729
|
+
break;
|
|
730
|
+
|
|
731
|
+
case '+':
|
|
732
|
+
if (this.match('=')) {
|
|
733
|
+
this.tokens.push(new Token(TokenType.PLUS_ASSIGN, '+=', startLine, startCol));
|
|
734
|
+
} else {
|
|
735
|
+
this.tokens.push(new Token(TokenType.PLUS, '+', startLine, startCol));
|
|
736
|
+
}
|
|
737
|
+
break;
|
|
738
|
+
|
|
739
|
+
case '-':
|
|
740
|
+
if (this.match('>')) {
|
|
741
|
+
this.tokens.push(new Token(TokenType.THIN_ARROW, '->', startLine, startCol));
|
|
742
|
+
} else if (this.match('=')) {
|
|
743
|
+
this.tokens.push(new Token(TokenType.MINUS_ASSIGN, '-=', startLine, startCol));
|
|
744
|
+
} else {
|
|
745
|
+
this.tokens.push(new Token(TokenType.MINUS, '-', startLine, startCol));
|
|
746
|
+
}
|
|
747
|
+
break;
|
|
748
|
+
|
|
749
|
+
case '*':
|
|
750
|
+
if (this.match('*')) {
|
|
751
|
+
this.tokens.push(new Token(TokenType.POWER, '**', startLine, startCol));
|
|
752
|
+
} else if (this.match('=')) {
|
|
753
|
+
this.tokens.push(new Token(TokenType.STAR_ASSIGN, '*=', startLine, startCol));
|
|
754
|
+
} else {
|
|
755
|
+
this.tokens.push(new Token(TokenType.STAR, '*', startLine, startCol));
|
|
756
|
+
}
|
|
757
|
+
break;
|
|
758
|
+
|
|
759
|
+
case '/':
|
|
760
|
+
if (this.match('=')) {
|
|
761
|
+
this.tokens.push(new Token(TokenType.SLASH_ASSIGN, '/=', startLine, startCol));
|
|
762
|
+
} else {
|
|
763
|
+
this.tokens.push(new Token(TokenType.SLASH, '/', startLine, startCol));
|
|
764
|
+
if (this._jsxTagOpening) this._jsxSelfClosing = true;
|
|
765
|
+
}
|
|
766
|
+
break;
|
|
767
|
+
|
|
768
|
+
case '%':
|
|
769
|
+
this.tokens.push(new Token(TokenType.PERCENT, '%', startLine, startCol));
|
|
770
|
+
break;
|
|
771
|
+
|
|
772
|
+
case '=':
|
|
773
|
+
if (this.match('=')) {
|
|
774
|
+
this.tokens.push(new Token(TokenType.EQUAL, '==', startLine, startCol));
|
|
775
|
+
} else if (this.match('>')) {
|
|
776
|
+
this.tokens.push(new Token(TokenType.ARROW, '=>', startLine, startCol));
|
|
777
|
+
} else {
|
|
778
|
+
this.tokens.push(new Token(TokenType.ASSIGN, '=', startLine, startCol));
|
|
779
|
+
}
|
|
780
|
+
break;
|
|
781
|
+
|
|
782
|
+
case '!':
|
|
783
|
+
if (this.match('=')) {
|
|
784
|
+
this.tokens.push(new Token(TokenType.NOT_EQUAL, '!=', startLine, startCol));
|
|
785
|
+
} else {
|
|
786
|
+
this.tokens.push(new Token(TokenType.BANG, '!', startLine, startCol));
|
|
787
|
+
}
|
|
788
|
+
break;
|
|
789
|
+
|
|
790
|
+
case '<':
|
|
791
|
+
if (this.match('=')) {
|
|
792
|
+
this.tokens.push(new Token(TokenType.LESS_EQUAL, '<=', startLine, startCol));
|
|
793
|
+
} else {
|
|
794
|
+
this.tokens.push(new Token(TokenType.LESS, '<', startLine, startCol));
|
|
795
|
+
// Don't override flags already set by _scanInJSXChildren
|
|
796
|
+
if (!this._jsxClosingTag && !this._jsxTagOpening) {
|
|
797
|
+
if (this.peek() === '/') {
|
|
798
|
+
this._jsxClosingTag = true;
|
|
799
|
+
} else if (this._isJSXStart()) {
|
|
800
|
+
this._jsxTagOpening = true;
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
break;
|
|
805
|
+
|
|
806
|
+
case '>':
|
|
807
|
+
if (this.match('=')) {
|
|
808
|
+
this.tokens.push(new Token(TokenType.GREATER_EQUAL, '>=', startLine, startCol));
|
|
809
|
+
} else {
|
|
810
|
+
this.tokens.push(new Token(TokenType.GREATER, '>', startLine, startCol));
|
|
811
|
+
// JSX state transitions on >
|
|
812
|
+
if (this._jsxSelfClosing) {
|
|
813
|
+
// Self-closing tag: <br/> — don't push to stack
|
|
814
|
+
this._jsxTagOpening = false;
|
|
815
|
+
this._jsxSelfClosing = false;
|
|
816
|
+
} else if (this._jsxClosingTag) {
|
|
817
|
+
// Closing tag: </div> — pop 'tag' from stack
|
|
818
|
+
this._jsxClosingTag = false;
|
|
819
|
+
if (this._jsxStack.length > 0) this._jsxStack.pop();
|
|
820
|
+
} else if (this._jsxTagOpening) {
|
|
821
|
+
// Opening tag: <div> — push 'tag' to stack (entering children mode)
|
|
822
|
+
this._jsxTagOpening = false;
|
|
823
|
+
this._jsxStack.push('tag');
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
break;
|
|
827
|
+
|
|
828
|
+
case '&':
|
|
829
|
+
if (this.match('&')) {
|
|
830
|
+
this.tokens.push(new Token(TokenType.AND_AND, '&&', startLine, startCol));
|
|
831
|
+
} else if (this._jsxStack.length > 0) {
|
|
832
|
+
// Inside JSX, & is valid text - should not reach here normally
|
|
833
|
+
// but handle gracefully by treating as text
|
|
834
|
+
this.tokens.push(new Token(TokenType.JSX_TEXT, '&', startLine, startCol));
|
|
835
|
+
} else {
|
|
836
|
+
this.error(`Unexpected character: '&'. Did you mean '&&'?`);
|
|
837
|
+
}
|
|
838
|
+
break;
|
|
839
|
+
|
|
840
|
+
case '|':
|
|
841
|
+
if (this.match('>')) {
|
|
842
|
+
this.tokens.push(new Token(TokenType.PIPE, '|>', startLine, startCol));
|
|
843
|
+
} else if (this.match('|')) {
|
|
844
|
+
this.tokens.push(new Token(TokenType.OR_OR, '||', startLine, startCol));
|
|
845
|
+
} else {
|
|
846
|
+
this.error(`Unexpected character: '|'. Did you mean '|>' or '||'?`);
|
|
847
|
+
}
|
|
848
|
+
break;
|
|
849
|
+
|
|
850
|
+
case '.':
|
|
851
|
+
if (this.match('.')) {
|
|
852
|
+
if (this.match('=')) {
|
|
853
|
+
this.tokens.push(new Token(TokenType.DOT_DOT_EQUAL, '..=', startLine, startCol));
|
|
854
|
+
} else if (this.match('.')) {
|
|
855
|
+
this.tokens.push(new Token(TokenType.SPREAD, '...', startLine, startCol));
|
|
856
|
+
} else {
|
|
857
|
+
this.tokens.push(new Token(TokenType.DOT_DOT, '..', startLine, startCol));
|
|
858
|
+
}
|
|
859
|
+
} else {
|
|
860
|
+
this.tokens.push(new Token(TokenType.DOT, '.', startLine, startCol));
|
|
861
|
+
}
|
|
862
|
+
break;
|
|
863
|
+
|
|
864
|
+
case ':':
|
|
865
|
+
if (this.match(':')) {
|
|
866
|
+
this.tokens.push(new Token(TokenType.DOUBLE_COLON, '::', startLine, startCol));
|
|
867
|
+
} else {
|
|
868
|
+
this.tokens.push(new Token(TokenType.COLON, ':', startLine, startCol));
|
|
869
|
+
}
|
|
870
|
+
break;
|
|
871
|
+
|
|
872
|
+
case '?':
|
|
873
|
+
if (this.match('.')) {
|
|
874
|
+
this.tokens.push(new Token(TokenType.QUESTION_DOT, '?.', startLine, startCol));
|
|
875
|
+
} else if (this.match('?')) {
|
|
876
|
+
this.tokens.push(new Token(TokenType.QUESTION_QUESTION, '??', startLine, startCol));
|
|
877
|
+
} else {
|
|
878
|
+
this.tokens.push(new Token(TokenType.QUESTION, '?', startLine, startCol));
|
|
879
|
+
}
|
|
880
|
+
break;
|
|
881
|
+
|
|
882
|
+
default:
|
|
883
|
+
this.error(`Unexpected character: '${ch}'`);
|
|
884
|
+
}
|
|
885
|
+
}
|
|
886
|
+
}
|