stone-lang 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -0
- package/StoneEngine.js +879 -0
- package/StoneEngineService.js +1727 -0
- package/adapters/FileSystemAdapter.js +230 -0
- package/adapters/OutputAdapter.js +208 -0
- package/adapters/index.js +6 -0
- package/cli/CLIOutputAdapter.js +196 -0
- package/cli/DaemonClient.js +349 -0
- package/cli/JSONOutputAdapter.js +135 -0
- package/cli/ReplSession.js +567 -0
- package/cli/ViewerServer.js +590 -0
- package/cli/commands/check.js +84 -0
- package/cli/commands/daemon.js +189 -0
- package/cli/commands/kill.js +66 -0
- package/cli/commands/package.js +713 -0
- package/cli/commands/ps.js +65 -0
- package/cli/commands/run.js +537 -0
- package/cli/entry.js +169 -0
- package/cli/index.js +14 -0
- package/cli/stonec.js +358 -0
- package/cli/test-compiler.js +181 -0
- package/cli/viewer/index.html +495 -0
- package/daemon/IPCServer.js +455 -0
- package/daemon/ProcessManager.js +327 -0
- package/daemon/ProcessRunner.js +307 -0
- package/daemon/daemon.js +398 -0
- package/daemon/index.js +16 -0
- package/frontend/analysis/index.js +5 -0
- package/frontend/analysis/livenessAnalyzer.js +568 -0
- package/frontend/analysis/treeShaker.js +265 -0
- package/frontend/index.js +20 -0
- package/frontend/parsing/astBuilder.js +2196 -0
- package/frontend/parsing/index.js +7 -0
- package/frontend/parsing/sonParser.js +592 -0
- package/frontend/parsing/stoneAstTypes.js +703 -0
- package/frontend/parsing/terminal-registry.js +435 -0
- package/frontend/parsing/tokenizer.js +692 -0
- package/frontend/type-checker/OverloadedFunctionType.js +43 -0
- package/frontend/type-checker/TypeEnvironment.js +165 -0
- package/frontend/type-checker/bidirectionalInference.js +149 -0
- package/frontend/type-checker/index.js +10 -0
- package/frontend/type-checker/moduleAnalysis.js +248 -0
- package/frontend/type-checker/operatorMappings.js +35 -0
- package/frontend/type-checker/overloadResolution.js +605 -0
- package/frontend/type-checker/typeChecker.js +452 -0
- package/frontend/type-checker/typeCompatibility.js +389 -0
- package/frontend/type-checker/visitors/controlFlow.js +483 -0
- package/frontend/type-checker/visitors/functions.js +604 -0
- package/frontend/type-checker/visitors/index.js +38 -0
- package/frontend/type-checker/visitors/literals.js +341 -0
- package/frontend/type-checker/visitors/modules.js +159 -0
- package/frontend/type-checker/visitors/operators.js +109 -0
- package/frontend/type-checker/visitors/statements.js +768 -0
- package/frontend/types/index.js +5 -0
- package/frontend/types/operatorMap.js +134 -0
- package/frontend/types/types.js +2046 -0
- package/frontend/utils/errorCollector.js +244 -0
- package/frontend/utils/index.js +5 -0
- package/frontend/utils/moduleResolver.js +479 -0
- package/package.json +50 -0
- package/packages/browserCache.js +359 -0
- package/packages/fetcher.js +236 -0
- package/packages/index.js +130 -0
- package/packages/lockfile.js +271 -0
- package/packages/manifest.js +291 -0
- package/packages/packageResolver.js +356 -0
- package/packages/resolver.js +310 -0
- package/packages/semver.js +635 -0
|
@@ -0,0 +1,692 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stone Language Lexer (Tokenizer)
|
|
3
|
+
*
|
|
4
|
+
* Converts Stone source code into a stream of tokens.
|
|
5
|
+
* This is the first phase of parsing.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// Token types
|
|
9
|
+
export const TokenType = {
|
|
10
|
+
// Literals
|
|
11
|
+
NUMBER: "NUMBER",
|
|
12
|
+
IMAGINARY: "IMAGINARY", // 4i or 4j (imaginary part of complex)
|
|
13
|
+
STRING: "STRING",
|
|
14
|
+
TRUE: "TRUE",
|
|
15
|
+
FALSE: "FALSE",
|
|
16
|
+
NONE: "NONE",
|
|
17
|
+
|
|
18
|
+
// Identifiers and keywords
|
|
19
|
+
IDENTIFIER: "IDENTIFIER",
|
|
20
|
+
|
|
21
|
+
// Keywords
|
|
22
|
+
FN: "FN",
|
|
23
|
+
RETURN: "RETURN",
|
|
24
|
+
IF: "IF",
|
|
25
|
+
ELIF: "ELIF",
|
|
26
|
+
ELSE: "ELSE",
|
|
27
|
+
FOR: "FOR",
|
|
28
|
+
WHILE: "WHILE",
|
|
29
|
+
IN: "IN",
|
|
30
|
+
BY: "BY",
|
|
31
|
+
BREAK: "BREAK",
|
|
32
|
+
CONTINUE: "CONTINUE",
|
|
33
|
+
STATE: "STATE",
|
|
34
|
+
STATIC: "STATIC",
|
|
35
|
+
AS: "AS",
|
|
36
|
+
|
|
37
|
+
// Module keywords
|
|
38
|
+
IMPORT: "IMPORT",
|
|
39
|
+
EXPORT: "EXPORT",
|
|
40
|
+
FROM: "FROM",
|
|
41
|
+
|
|
42
|
+
// Type keywords
|
|
43
|
+
TYPE: "TYPE",
|
|
44
|
+
|
|
45
|
+
// Extension keyword
|
|
46
|
+
EXTENSION: "EXTENSION",
|
|
47
|
+
|
|
48
|
+
// Operators
|
|
49
|
+
PLUS: "PLUS", // +
|
|
50
|
+
MINUS: "MINUS", // -
|
|
51
|
+
STAR: "STAR", // *
|
|
52
|
+
SLASH: "SLASH", // /
|
|
53
|
+
PERCENT: "PERCENT", // %
|
|
54
|
+
CARET: "CARET", // ^ (power)
|
|
55
|
+
AT: "AT", // @ (matrix multiplication)
|
|
56
|
+
|
|
57
|
+
// Element-wise operators (for arrays)
|
|
58
|
+
DOT_PLUS: "DOT_PLUS", // .+
|
|
59
|
+
DOT_MINUS: "DOT_MINUS", // .-
|
|
60
|
+
DOT_STAR: "DOT_STAR", // .*
|
|
61
|
+
DOT_SLASH: "DOT_SLASH", // ./
|
|
62
|
+
DOT_CARET: "DOT_CARET", // .^
|
|
63
|
+
|
|
64
|
+
// Logical operators
|
|
65
|
+
AND: "AND", // &&
|
|
66
|
+
OR: "OR", // ||
|
|
67
|
+
NOT: "NOT", // !
|
|
68
|
+
PIPE: "PIPE", // | (union type)
|
|
69
|
+
|
|
70
|
+
// Comparison
|
|
71
|
+
LT: "LT", // <
|
|
72
|
+
GT: "GT", // >
|
|
73
|
+
LTE: "LTE", // <=
|
|
74
|
+
GTE: "GTE", // >=
|
|
75
|
+
EQ: "EQ", // ==
|
|
76
|
+
NEQ: "NEQ", // !=
|
|
77
|
+
|
|
78
|
+
// Assignment
|
|
79
|
+
ASSIGN: "ASSIGN", // =
|
|
80
|
+
COLON_ASSIGN: "COLON_ASSIGN", // :=
|
|
81
|
+
LEFT_ARROW: "LEFT_ARROW", // <- (state transition operator)
|
|
82
|
+
PLUS_ASSIGN: "PLUS_ASSIGN", // +=
|
|
83
|
+
MINUS_ASSIGN: "MINUS_ASSIGN", // -=
|
|
84
|
+
STAR_ASSIGN: "STAR_ASSIGN", // *=
|
|
85
|
+
SLASH_ASSIGN: "SLASH_ASSIGN", // /=
|
|
86
|
+
|
|
87
|
+
// Increment/Decrement
|
|
88
|
+
INCREMENT: "INCREMENT", // ++
|
|
89
|
+
DECREMENT: "DECREMENT", // --
|
|
90
|
+
|
|
91
|
+
// Range operators
|
|
92
|
+
RANGE: "RANGE", // ..
|
|
93
|
+
RANGE_EXCLUSIVE: "RANGE_EXCLUSIVE", // ..<
|
|
94
|
+
RANGE_INCLUSIVE: "RANGE_INCLUSIVE", // ..=
|
|
95
|
+
|
|
96
|
+
// Delimiters
|
|
97
|
+
LPAREN: "LPAREN", // (
|
|
98
|
+
RPAREN: "RPAREN", // )
|
|
99
|
+
LBRACE: "LBRACE", // {
|
|
100
|
+
RBRACE: "RBRACE", // }
|
|
101
|
+
LBRACKET: "LBRACKET", // [
|
|
102
|
+
RBRACKET: "RBRACKET", // ]
|
|
103
|
+
|
|
104
|
+
// Punctuation
|
|
105
|
+
COMMA: "COMMA", // ,
|
|
106
|
+
COLON: "COLON", // :
|
|
107
|
+
DOUBLE_COLON: "DOUBLE_COLON", // ::
|
|
108
|
+
SEMICOLON: "SEMICOLON", // ;
|
|
109
|
+
DOT: "DOT", // .
|
|
110
|
+
ARROW: "ARROW", // ->
|
|
111
|
+
FAT_ARROW: "FAT_ARROW", // =>
|
|
112
|
+
QUESTION: "QUESTION", // ?
|
|
113
|
+
TILDE: "TILDE", // ~
|
|
114
|
+
|
|
115
|
+
// Special
|
|
116
|
+
NEWLINE: "NEWLINE",
|
|
117
|
+
EOF: "EOF",
|
|
118
|
+
COMMENT: "COMMENT",
|
|
119
|
+
|
|
120
|
+
// String interpolation
|
|
121
|
+
F_STRING_START: "F_STRING_START",
|
|
122
|
+
INTERPOLATION_START: "INTERPOLATION_START",
|
|
123
|
+
INTERPOLATION_END: "INTERPOLATION_END",
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
// Reserved keywords
|
|
127
|
+
const KEYWORDS = {
|
|
128
|
+
fn: TokenType.FN,
|
|
129
|
+
return: TokenType.RETURN,
|
|
130
|
+
if: TokenType.IF,
|
|
131
|
+
elif: TokenType.ELIF,
|
|
132
|
+
else: TokenType.ELSE,
|
|
133
|
+
for: TokenType.FOR,
|
|
134
|
+
while: TokenType.WHILE,
|
|
135
|
+
in: TokenType.IN,
|
|
136
|
+
by: TokenType.BY,
|
|
137
|
+
break: TokenType.BREAK,
|
|
138
|
+
continue: TokenType.CONTINUE,
|
|
139
|
+
true: TokenType.TRUE,
|
|
140
|
+
false: TokenType.FALSE,
|
|
141
|
+
none: TokenType.NONE,
|
|
142
|
+
// Note: 'state' is NOT a keyword - it can be used as a regular identifier
|
|
143
|
+
static: TokenType.STATIC,
|
|
144
|
+
// 'as' is NOT a keyword - it's a contextual identifier recognized only in alias positions
|
|
145
|
+
// This allows: as = [1, 2, 3] while still supporting: x as alias := obj
|
|
146
|
+
// Module keywords
|
|
147
|
+
import: TokenType.IMPORT,
|
|
148
|
+
export: TokenType.EXPORT,
|
|
149
|
+
from: TokenType.FROM,
|
|
150
|
+
// Type keywords
|
|
151
|
+
type: TokenType.TYPE,
|
|
152
|
+
// Extension keyword
|
|
153
|
+
ext: TokenType.EXTENSION,
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Token class
|
|
158
|
+
*/
|
|
159
|
+
export class Token {
|
|
160
|
+
constructor(type, value, line, column) {
|
|
161
|
+
this.type = type;
|
|
162
|
+
this.value = value;
|
|
163
|
+
this.line = line;
|
|
164
|
+
this.column = column;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
toString() {
|
|
168
|
+
try {
|
|
169
|
+
let valueStr;
|
|
170
|
+
if (typeof this.value === "string" && this.value.length > 100) {
|
|
171
|
+
valueStr = `"${this.value.substring(0, 100)}..."`;
|
|
172
|
+
} else if (typeof this.value === "object") {
|
|
173
|
+
valueStr = "[Object]";
|
|
174
|
+
} else {
|
|
175
|
+
valueStr = JSON.stringify(this.value);
|
|
176
|
+
}
|
|
177
|
+
return `Token(${this.type}, ${valueStr}, ${this.line}:${this.column})`;
|
|
178
|
+
} catch (error) {
|
|
179
|
+
return `Token(${this.type}, [Error stringifying value], ${this.line}:${this.column})`;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* Lexer class
|
|
186
|
+
*/
|
|
187
|
+
export class Lexer {
|
|
188
|
+
constructor(source, filename = "<stdin>") {
|
|
189
|
+
this.source = source;
|
|
190
|
+
this.filename = filename;
|
|
191
|
+
this.pos = 0;
|
|
192
|
+
this.line = 1;
|
|
193
|
+
this.column = 1;
|
|
194
|
+
this.tokens = [];
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Tokenize the entire source
|
|
199
|
+
*/
|
|
200
|
+
tokenize() {
|
|
201
|
+
while (this.pos < this.source.length) {
|
|
202
|
+
this.skipWhitespace();
|
|
203
|
+
|
|
204
|
+
if (this.pos >= this.source.length) break;
|
|
205
|
+
|
|
206
|
+
// Skip comments
|
|
207
|
+
if (this.peek() === "/" && this.peek(1) === "/") {
|
|
208
|
+
this.skipComment();
|
|
209
|
+
continue;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
const token = this.nextToken();
|
|
213
|
+
if (token) {
|
|
214
|
+
this.tokens.push(token);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Add EOF token
|
|
219
|
+
const eofToken = new Token(TokenType.EOF, null, this.line, this.column);
|
|
220
|
+
this.tokens.push(eofToken);
|
|
221
|
+
|
|
222
|
+
return this.tokens;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Get next token
|
|
227
|
+
*/
|
|
228
|
+
nextToken() {
|
|
229
|
+
const char = this.peek();
|
|
230
|
+
const startLine = this.line;
|
|
231
|
+
const startColumn = this.column;
|
|
232
|
+
|
|
233
|
+
// Numbers
|
|
234
|
+
if (this.isDigit(char)) {
|
|
235
|
+
return this.readNumber(startLine, startColumn);
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// Strings
|
|
239
|
+
if (char === '"' || char === "'") {
|
|
240
|
+
return this.readString(char, startLine, startColumn);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// F-strings (string interpolation)
|
|
244
|
+
if (char === "f" && (this.peek(1) === '"' || this.peek(1) === "'")) {
|
|
245
|
+
return this.readFString(startLine, startColumn);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// Identifiers and keywords
|
|
249
|
+
if (this.isAlpha(char) || char === "_") {
|
|
250
|
+
return this.readIdentifier(startLine, startColumn);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Three-character operators (none currently)
|
|
254
|
+
|
|
255
|
+
// Two-character operators
|
|
256
|
+
const twoChar = this.peek() + this.peek(1);
|
|
257
|
+
if (twoChar === "&&") {
|
|
258
|
+
this.advance(2);
|
|
259
|
+
return new Token(TokenType.AND, "&&", startLine, startColumn);
|
|
260
|
+
}
|
|
261
|
+
if (twoChar === "||") {
|
|
262
|
+
this.advance(2);
|
|
263
|
+
return new Token(TokenType.OR, "||", startLine, startColumn);
|
|
264
|
+
}
|
|
265
|
+
if (twoChar === ":=") {
|
|
266
|
+
this.advance(2);
|
|
267
|
+
return new Token(TokenType.COLON_ASSIGN, ":=", startLine, startColumn);
|
|
268
|
+
}
|
|
269
|
+
if (twoChar === "::") {
|
|
270
|
+
this.advance(2);
|
|
271
|
+
return new Token(TokenType.DOUBLE_COLON, "::", startLine, startColumn);
|
|
272
|
+
}
|
|
273
|
+
// Element-wise operators: .+, .-, .*, ./, .^
|
|
274
|
+
if (twoChar === ".+") {
|
|
275
|
+
this.advance(2);
|
|
276
|
+
return new Token(TokenType.DOT_PLUS, ".+", startLine, startColumn);
|
|
277
|
+
}
|
|
278
|
+
if (twoChar === ".-") {
|
|
279
|
+
this.advance(2);
|
|
280
|
+
return new Token(TokenType.DOT_MINUS, ".-", startLine, startColumn);
|
|
281
|
+
}
|
|
282
|
+
if (twoChar === ".*") {
|
|
283
|
+
this.advance(2);
|
|
284
|
+
return new Token(TokenType.DOT_STAR, ".*", startLine, startColumn);
|
|
285
|
+
}
|
|
286
|
+
if (twoChar === "./") {
|
|
287
|
+
this.advance(2);
|
|
288
|
+
return new Token(TokenType.DOT_SLASH, "./", startLine, startColumn);
|
|
289
|
+
}
|
|
290
|
+
if (twoChar === ".^") {
|
|
291
|
+
this.advance(2);
|
|
292
|
+
return new Token(TokenType.DOT_CARET, ".^", startLine, startColumn);
|
|
293
|
+
}
|
|
294
|
+
if (twoChar === "..") {
|
|
295
|
+
this.advance(2);
|
|
296
|
+
if (this.peek() === "<") {
|
|
297
|
+
this.advance();
|
|
298
|
+
return new Token(
|
|
299
|
+
TokenType.RANGE_EXCLUSIVE,
|
|
300
|
+
"..<",
|
|
301
|
+
startLine,
|
|
302
|
+
startColumn
|
|
303
|
+
);
|
|
304
|
+
}
|
|
305
|
+
if (this.peek() === "=") {
|
|
306
|
+
this.advance();
|
|
307
|
+
return new Token(
|
|
308
|
+
TokenType.RANGE_INCLUSIVE,
|
|
309
|
+
"..=",
|
|
310
|
+
startLine,
|
|
311
|
+
startColumn
|
|
312
|
+
);
|
|
313
|
+
}
|
|
314
|
+
// Default range (inclusive)
|
|
315
|
+
return new Token(TokenType.RANGE, "..", startLine, startColumn);
|
|
316
|
+
}
|
|
317
|
+
if (twoChar === "<-") {
|
|
318
|
+
this.advance(2);
|
|
319
|
+
return new Token(TokenType.LEFT_ARROW, "<-", startLine, startColumn);
|
|
320
|
+
}
|
|
321
|
+
if (twoChar === "->") {
|
|
322
|
+
this.advance(2);
|
|
323
|
+
return new Token(TokenType.ARROW, "->", startLine, startColumn);
|
|
324
|
+
}
|
|
325
|
+
if (twoChar === "=>") {
|
|
326
|
+
this.advance(2);
|
|
327
|
+
return new Token(TokenType.FAT_ARROW, "=>", startLine, startColumn);
|
|
328
|
+
}
|
|
329
|
+
if (twoChar === "<=") {
|
|
330
|
+
this.advance(2);
|
|
331
|
+
return new Token(TokenType.LTE, "<=", startLine, startColumn);
|
|
332
|
+
}
|
|
333
|
+
if (twoChar === ">=") {
|
|
334
|
+
this.advance(2);
|
|
335
|
+
return new Token(TokenType.GTE, ">=", startLine, startColumn);
|
|
336
|
+
}
|
|
337
|
+
if (twoChar === "==") {
|
|
338
|
+
this.advance(2);
|
|
339
|
+
return new Token(TokenType.EQ, "==", startLine, startColumn);
|
|
340
|
+
}
|
|
341
|
+
if (twoChar === "!=") {
|
|
342
|
+
this.advance(2);
|
|
343
|
+
return new Token(TokenType.NEQ, "!=", startLine, startColumn);
|
|
344
|
+
}
|
|
345
|
+
if (twoChar === "++") {
|
|
346
|
+
this.advance(2);
|
|
347
|
+
return new Token(TokenType.INCREMENT, "++", startLine, startColumn);
|
|
348
|
+
}
|
|
349
|
+
if (twoChar === "--") {
|
|
350
|
+
this.advance(2);
|
|
351
|
+
return new Token(TokenType.DECREMENT, "--", startLine, startColumn);
|
|
352
|
+
}
|
|
353
|
+
if (twoChar === "+=") {
|
|
354
|
+
this.advance(2);
|
|
355
|
+
return new Token(TokenType.PLUS_ASSIGN, "+=", startLine, startColumn);
|
|
356
|
+
}
|
|
357
|
+
if (twoChar === "-=") {
|
|
358
|
+
this.advance(2);
|
|
359
|
+
return new Token(TokenType.MINUS_ASSIGN, "-=", startLine, startColumn);
|
|
360
|
+
}
|
|
361
|
+
if (twoChar === "*=") {
|
|
362
|
+
this.advance(2);
|
|
363
|
+
return new Token(TokenType.STAR_ASSIGN, "*=", startLine, startColumn);
|
|
364
|
+
}
|
|
365
|
+
if (twoChar === "/=") {
|
|
366
|
+
this.advance(2);
|
|
367
|
+
return new Token(TokenType.SLASH_ASSIGN, "/=", startLine, startColumn);
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
// Single-character tokens
|
|
371
|
+
this.advance();
|
|
372
|
+
switch (char) {
|
|
373
|
+
case "+":
|
|
374
|
+
return new Token(TokenType.PLUS, "+", startLine, startColumn);
|
|
375
|
+
case "-":
|
|
376
|
+
return new Token(TokenType.MINUS, "-", startLine, startColumn);
|
|
377
|
+
case "*":
|
|
378
|
+
return new Token(TokenType.STAR, "*", startLine, startColumn);
|
|
379
|
+
case "/":
|
|
380
|
+
return new Token(TokenType.SLASH, "/", startLine, startColumn);
|
|
381
|
+
case "%":
|
|
382
|
+
return new Token(TokenType.PERCENT, "%", startLine, startColumn);
|
|
383
|
+
case "^":
|
|
384
|
+
return new Token(TokenType.CARET, "^", startLine, startColumn);
|
|
385
|
+
case "@":
|
|
386
|
+
return new Token(TokenType.AT, "@", startLine, startColumn);
|
|
387
|
+
case "|":
|
|
388
|
+
return new Token(TokenType.PIPE, "|", startLine, startColumn);
|
|
389
|
+
case "<":
|
|
390
|
+
return new Token(TokenType.LT, "<", startLine, startColumn);
|
|
391
|
+
case ">":
|
|
392
|
+
return new Token(TokenType.GT, ">", startLine, startColumn);
|
|
393
|
+
case "=":
|
|
394
|
+
return new Token(TokenType.ASSIGN, "=", startLine, startColumn);
|
|
395
|
+
case "!":
|
|
396
|
+
return new Token(TokenType.NOT, "!", startLine, startColumn);
|
|
397
|
+
case "(":
|
|
398
|
+
return new Token(TokenType.LPAREN, "(", startLine, startColumn);
|
|
399
|
+
case ")":
|
|
400
|
+
return new Token(TokenType.RPAREN, ")", startLine, startColumn);
|
|
401
|
+
case "{":
|
|
402
|
+
return new Token(TokenType.LBRACE, "{", startLine, startColumn);
|
|
403
|
+
case "}":
|
|
404
|
+
return new Token(TokenType.RBRACE, "}", startLine, startColumn);
|
|
405
|
+
case "[":
|
|
406
|
+
return new Token(TokenType.LBRACKET, "[", startLine, startColumn);
|
|
407
|
+
case "]":
|
|
408
|
+
return new Token(TokenType.RBRACKET, "]", startLine, startColumn);
|
|
409
|
+
case ",":
|
|
410
|
+
return new Token(TokenType.COMMA, ",", startLine, startColumn);
|
|
411
|
+
case ":":
|
|
412
|
+
return new Token(TokenType.COLON, ":", startLine, startColumn);
|
|
413
|
+
case ";":
|
|
414
|
+
return new Token(TokenType.SEMICOLON, ";", startLine, startColumn);
|
|
415
|
+
case ".":
|
|
416
|
+
return new Token(TokenType.DOT, ".", startLine, startColumn);
|
|
417
|
+
case "?":
|
|
418
|
+
return new Token(TokenType.QUESTION, "?", startLine, startColumn);
|
|
419
|
+
case "~":
|
|
420
|
+
return new Token(TokenType.TILDE, "~", startLine, startColumn);
|
|
421
|
+
default:
|
|
422
|
+
throw new Error(
|
|
423
|
+
`Unexpected character '${char}' at ${startLine}:${startColumn}`
|
|
424
|
+
);
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
/**
|
|
429
|
+
* Read a number (int or float)
|
|
430
|
+
* Also handles imaginary literals: 4i or 4j
|
|
431
|
+
*/
|
|
432
|
+
readNumber(line, column) {
|
|
433
|
+
let num = "";
|
|
434
|
+
let isFloat = false;
|
|
435
|
+
|
|
436
|
+
while (this.isDigit(this.peek()) || this.peek() === ".") {
|
|
437
|
+
if (this.peek() === ".") {
|
|
438
|
+
// Check if it's a range operator
|
|
439
|
+
if (
|
|
440
|
+
this.peek(1) === "." ||
|
|
441
|
+
this.peek(1) === "<" ||
|
|
442
|
+
this.peek(1) === "="
|
|
443
|
+
) {
|
|
444
|
+
break;
|
|
445
|
+
}
|
|
446
|
+
if (isFloat) {
|
|
447
|
+
throw new Error(
|
|
448
|
+
`Invalid number: multiple decimal points at ${line}:${column}`
|
|
449
|
+
);
|
|
450
|
+
}
|
|
451
|
+
isFloat = true;
|
|
452
|
+
}
|
|
453
|
+
num += this.peek();
|
|
454
|
+
this.advance();
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
// Check for scientific notation (e.g., 1e-12, 3.14e+5, 2.5E10)
|
|
458
|
+
if (this.peek() === "e" || this.peek() === "E") {
|
|
459
|
+
num += this.peek();
|
|
460
|
+
this.advance();
|
|
461
|
+
isFloat = true;
|
|
462
|
+
|
|
463
|
+
// Optional sign after e/E
|
|
464
|
+
if (this.peek() === "+" || this.peek() === "-") {
|
|
465
|
+
num += this.peek();
|
|
466
|
+
this.advance();
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
// Must have at least one digit after e/E
|
|
470
|
+
if (!this.isDigit(this.peek())) {
|
|
471
|
+
throw new Error(
|
|
472
|
+
`Invalid number: expected digit after exponent at ${line}:${column}`
|
|
473
|
+
);
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
while (this.isDigit(this.peek())) {
|
|
477
|
+
num += this.peek();
|
|
478
|
+
this.advance();
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
const value = isFloat ? parseFloat(num) : parseInt(num, 10);
|
|
483
|
+
|
|
484
|
+
// Check for imaginary suffix (i or j)
|
|
485
|
+
if (this.peek() === "i" || this.peek() === "j") {
|
|
486
|
+
this.advance(); // consume the i/j
|
|
487
|
+
return new Token(TokenType.IMAGINARY, value, line, column);
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
return new Token(TokenType.NUMBER, value, line, column);
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
/**
|
|
494
|
+
* Read a string literal
|
|
495
|
+
*/
|
|
496
|
+
readString(quote, line, column) {
|
|
497
|
+
this.advance(); // Skip opening quote
|
|
498
|
+
let str = "";
|
|
499
|
+
|
|
500
|
+
while (this.peek() !== quote && this.pos < this.source.length) {
|
|
501
|
+
if (this.peek() === "\\") {
|
|
502
|
+
this.advance();
|
|
503
|
+
const escaped = this.peek();
|
|
504
|
+
switch (escaped) {
|
|
505
|
+
case "n":
|
|
506
|
+
str += "\n";
|
|
507
|
+
break;
|
|
508
|
+
case "t":
|
|
509
|
+
str += "\t";
|
|
510
|
+
break;
|
|
511
|
+
case "r":
|
|
512
|
+
str += "\r";
|
|
513
|
+
break;
|
|
514
|
+
case "\\":
|
|
515
|
+
str += "\\";
|
|
516
|
+
break;
|
|
517
|
+
case quote:
|
|
518
|
+
str += quote;
|
|
519
|
+
break;
|
|
520
|
+
default:
|
|
521
|
+
str += escaped;
|
|
522
|
+
}
|
|
523
|
+
this.advance();
|
|
524
|
+
} else {
|
|
525
|
+
str += this.peek();
|
|
526
|
+
this.advance();
|
|
527
|
+
}
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
if (this.peek() !== quote) {
|
|
531
|
+
throw new Error(`Unterminated string at ${line}:${column}`);
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
this.advance(); // Skip closing quote
|
|
535
|
+
return new Token(TokenType.STRING, str, line, column);
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
/**
|
|
539
|
+
* Read an f-string (string interpolation)
|
|
540
|
+
*/
|
|
541
|
+
readFString(line, column) {
|
|
542
|
+
this.advance(); // Skip 'f'
|
|
543
|
+
const quote = this.peek();
|
|
544
|
+
this.advance(); // Skip opening quote
|
|
545
|
+
|
|
546
|
+
let parts = [];
|
|
547
|
+
let currentPart = "";
|
|
548
|
+
|
|
549
|
+
while (this.peek() !== quote && this.pos < this.source.length) {
|
|
550
|
+
if (this.peek() === "{") {
|
|
551
|
+
if (currentPart) {
|
|
552
|
+
parts.push({ type: "literal", value: currentPart });
|
|
553
|
+
currentPart = "";
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
this.advance(); // Skip {
|
|
557
|
+
let expr = "";
|
|
558
|
+
let braceCount = 1;
|
|
559
|
+
|
|
560
|
+
while (braceCount > 0 && this.pos < this.source.length) {
|
|
561
|
+
if (this.peek() === "{") braceCount++;
|
|
562
|
+
if (this.peek() === "}") braceCount--;
|
|
563
|
+
if (braceCount > 0) {
|
|
564
|
+
expr += this.peek();
|
|
565
|
+
}
|
|
566
|
+
this.advance();
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
parts.push({ type: "interpolation", value: expr });
|
|
570
|
+
} else {
|
|
571
|
+
currentPart += this.peek();
|
|
572
|
+
this.advance();
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
if (currentPart) {
|
|
577
|
+
parts.push({ type: "literal", value: currentPart });
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
if (this.peek() !== quote) {
|
|
581
|
+
throw new Error(`Unterminated f-string at ${line}:${column}`);
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
this.advance(); // Skip closing quote
|
|
585
|
+
|
|
586
|
+
return new Token(TokenType.F_STRING_START, parts, line, column);
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
/**
|
|
590
|
+
* Read an identifier or keyword
|
|
591
|
+
* Supports prime notation: identifier followed by ' (apostrophe)
|
|
592
|
+
* Examples: sum', count', x'
|
|
593
|
+
*/
|
|
594
|
+
readIdentifier(line, column) {
|
|
595
|
+
let id = "";
|
|
596
|
+
|
|
597
|
+
while (this.isAlphaNumeric(this.peek()) || this.peek() === "_") {
|
|
598
|
+
id += this.peek();
|
|
599
|
+
this.advance();
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// Check for prime notation (apostrophe after identifier)
|
|
603
|
+
// This is used for loop next-state assignments: sum' = sum + x
|
|
604
|
+
// Multiple primes are allowed: x', x'', x''' etc.
|
|
605
|
+
while (this.peek() === "'") {
|
|
606
|
+
id += "'";
|
|
607
|
+
this.advance();
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
const type = KEYWORDS[id] || TokenType.IDENTIFIER;
|
|
611
|
+
return new Token(type, id, line, column);
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
/**
|
|
615
|
+
* Skip whitespace (but not newlines - those might be significant)
|
|
616
|
+
*/
|
|
617
|
+
skipWhitespace() {
|
|
618
|
+
while (this.pos < this.source.length) {
|
|
619
|
+
const char = this.peek();
|
|
620
|
+
if (char === " " || char === "\t" || char === "\r" || char === "\n") {
|
|
621
|
+
if (char === "\n") {
|
|
622
|
+
this.line++;
|
|
623
|
+
this.column = 1;
|
|
624
|
+
} else {
|
|
625
|
+
this.column++;
|
|
626
|
+
}
|
|
627
|
+
this.pos++;
|
|
628
|
+
} else {
|
|
629
|
+
break;
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
/**
|
|
635
|
+
* Skip comment (// to end of line)
|
|
636
|
+
*/
|
|
637
|
+
skipComment() {
|
|
638
|
+
while (this.peek() !== "\n" && this.pos < this.source.length) {
|
|
639
|
+
this.advance();
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
/**
|
|
644
|
+
* Peek at character at current position + offset
|
|
645
|
+
*/
|
|
646
|
+
peek(offset = 0) {
|
|
647
|
+
const pos = this.pos + offset;
|
|
648
|
+
return pos < this.source.length ? this.source[pos] : null;
|
|
649
|
+
}
|
|
650
|
+
|
|
651
|
+
/**
|
|
652
|
+
* Advance position by n characters
|
|
653
|
+
*/
|
|
654
|
+
advance(n = 1) {
|
|
655
|
+
for (let i = 0; i < n; i++) {
|
|
656
|
+
if (this.pos < this.source.length) {
|
|
657
|
+
if (this.source[this.pos] === "\n") {
|
|
658
|
+
this.line++;
|
|
659
|
+
this.column = 1;
|
|
660
|
+
} else {
|
|
661
|
+
this.column++;
|
|
662
|
+
}
|
|
663
|
+
this.pos++;
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
isDigit(char) {
|
|
669
|
+
if (char === null || char === undefined) return false;
|
|
670
|
+
return char >= "0" && char <= "9";
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
isAlpha(char) {
|
|
674
|
+
if (char === null || char === undefined) return false;
|
|
675
|
+
return (char >= "a" && char <= "z") || (char >= "A" && char <= "Z");
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
isAlphaNumeric(char) {
|
|
679
|
+
if (char === null || char === undefined) return false;
|
|
680
|
+
return this.isAlpha(char) || this.isDigit(char);
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
/**
|
|
685
|
+
* Helper function to tokenize source code
|
|
686
|
+
*/
|
|
687
|
+
export function tokenizeSource(source, filename) {
|
|
688
|
+
const lexer = new Lexer(source, filename);
|
|
689
|
+
return lexer.tokenize();
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
export const tokenize = tokenizeSource;
|