novac 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,506 @@
1
+ /**
2
+ * Nova Lexer
3
+ * ------------------------------
4
+ * Converts source code into structured tokens:
5
+ * { type, value, line, column, isUnary, isPostfix, precedence }
6
+ */
7
+
8
+ const LITERALS = {
9
+ // Use unique sentinel values for literals the Executor will wrap
10
+ true: Symbol("NOVA_TRUE"),
11
+ false: Symbol("NOVA_FALSE"),
12
+ null: Symbol("NOVA_NULL"),
13
+ nstr: "",
14
+ nfunc: () => Symbol("NOVA_NULL"),
15
+ };
16
+
17
+ const KEYWORDS = new Set([
18
+ "var",
19
+ "let",
20
+ "const",
21
+ "class",
22
+ "if",
23
+ "else",
24
+ "for",
25
+ "repeat",
26
+ "unless",
27
+ "until",
28
+ "throw",
29
+ "try",
30
+ "catch",
31
+ "finally",
32
+ "func",
33
+ "function",
34
+ "return",
35
+ "give",
36
+ "async",
37
+ "await",
38
+ ]);
39
+
40
+ const { CustomError, formatError } = require("./error");
41
+
42
+ // Operator info for parser
43
+ const OPERATORS = new Map([
44
+ ["+", { precedence: 5, isUnary: true }],
45
+ ["-", { precedence: 5, isUnary: true }],
46
+ ["*", { precedence: 6, isUnary: true }],
47
+ ["/", { precedence: 6 }],
48
+ ["%", { precedence: 6 }],
49
+ ["==", { precedence: 3 }],
50
+ ["!=", { precedence: 3 }],
51
+ ["<", { precedence: 4 }],
52
+ ["<=", { precedence: 4 }],
53
+ [">", { precedence: 4 }],
54
+ [">=", { precedence: 4 }],
55
+ ["&&", { precedence: 2 }],
56
+ ["||", { precedence: 1 }],
57
+ ["delete", { precedence: 8, isUnary: true }],
58
+ ["!", { precedence: 7, isUnary: true }],
59
+ ["++", { precedence: 7, isPostfix: true }],
60
+ ["--", { precedence: 7, isPostfix: true }],
61
+ ["=>", { precedence: 0 }],
62
+ ["=", { precedence: 0 }],
63
+ ]);
64
+
65
+ // ===== lexer.js (PUNCTUATION Definition) =====
66
+
67
+ const PUNCTUATION = new Set(["(", ")", "{", "}", "[", "]", ",", ".", ";", ":"]);
68
+
69
+ // ... (rest of file)
70
+
71
+ class Lexer {
72
+ constructor(source) {
73
+ this.source = source;
74
+ this.position = 0;
75
+ this.line = 1;
76
+ this.column = 1;
77
+ this.tokens = [];
78
+
79
+ // 🔥 NEW: Preprocessor State
80
+ this.definitions = new Map(); // Stores defined symbols (e.g., #define DEBUG)
81
+ this.isSkipping = false; // True if inside an inactive #ifdef block
82
+ // ──────────────────────────
83
+ }
84
+
85
+ // 🔥 NEW: Helper to read an identifier (directive name or argument)
86
+ readIdentifier() {
87
+ let value = "";
88
+ while (!this.isAtEnd() && this.isAlphaNumeric(this.peek())) {
89
+ value += this.advance();
90
+ }
91
+ return value;
92
+ }
93
+
94
+ readRestOfLine() {
95
+ let value = "";
96
+ while (!this.isAtEnd() && this.peek() !== "\n") {
97
+ value += this.advance();
98
+ }
99
+ return value.trim();
100
+ }
101
+
102
+ // Helper to move the cursor to the next line boundary
103
+ skipLine() {
104
+ while (!this.isAtEnd() && this.peek() !== "\n") {
105
+ this.advance();
106
+ }
107
+ if (this.peek() === "\n") {
108
+ this.line++;
109
+ this.column = 1;
110
+ this.position++; // Consume the newline
111
+ }
112
+ }
113
+
114
+ // Main logic to parse and execute a directive line
115
+ handleDirective() {
116
+ // 1. Consume '#' (already checked in tokenize loop)
117
+ this.advance();
118
+
119
+ // 2. Consume whitespace before the directive name
120
+ while (this.isWhitespace(this.peek())) {
121
+ this.advance();
122
+ }
123
+ let shouldSkip = true;
124
+ // 3. Read the directive name
125
+ const dirName = this.readIdentifier();
126
+
127
+ // 4. Consume internal whitespace and read the argument (if any)
128
+ while (this.isWhitespace(this.peek())) {
129
+ this.advance();
130
+ }
131
+ const dirArg = this.readIdentifier();
132
+
133
+ // 5. Execute the directive logic
134
+ switch (dirName) {
135
+ case "define":
136
+ if (dirArg) {
137
+ const value = this.readRestOfLine(); // new helper to grab everything after the name
138
+ this.definitions.set(dirArg, value ?? true);
139
+ }
140
+ break;
141
+
142
+ case "undef":
143
+ if (dirArg) this.definitions.delete(dirArg);
144
+ break;
145
+
146
+ case "register":
147
+ let type = dirArg;
148
+ switch (type) {
149
+ case 'operator':
150
+ let name = this.readIdentifier();
151
+ OPERATORS.set(name, JSON.parse(this.readRestOfLine()));
152
+ OPERATORS.get(name).custom = true;
153
+ }
154
+ break;
155
+
156
+ case "inject":
157
+ let value = this.readRestOfLine();
158
+ this.tokens.push(JSON.parse(value));
159
+ break;
160
+
161
+ case "ifdef":
162
+ // Only start skipping if we aren't already skipping from an outer block
163
+ if (!this.isSkipping) {
164
+ this.isSkipping = !this.definitions.has(dirArg);
165
+ }
166
+ break;
167
+
168
+ case "ifndef":
169
+ // Only start skipping if we aren't already skipping from an outer block
170
+ if (!this.isSkipping) {
171
+ this.isSkipping = this.definitions.has(dirArg);
172
+ }
173
+ break;
174
+
175
+ case "endif":
176
+ // Stop skipping; this assumes directives are simple and don't nest complexly.
177
+ this.isSkipping = false;
178
+ break;
179
+
180
+ default:
181
+ this.addToken("LITERAL", dirArg);
182
+ shouldSkip = false;
183
+ break;
184
+ }
185
+
186
+ // 6. Move past the rest of the directive line
187
+ if (shouldSkip) this.skipLine();
188
+ }
189
+
190
+ // ===== lexer.js (Updated tokenize method) =====
191
+
192
+ tokenize() {
193
+ while (!this.isAtEnd()) {
194
+ const char = this.peek();
195
+
196
+ // 1. Check for Preprocessor Directive
197
+ if (char === "#") {
198
+ this.handleDirective();
199
+ continue;
200
+ }
201
+
202
+ // 2. Handle Skipping State
203
+ if (this.isSkipping) {
204
+ // Manually advance and update line/column, but skip tokenization
205
+ if (char === "\n") {
206
+ this.line++;
207
+ this.column = 1;
208
+ } else if (char === "\t") {
209
+ this.column += 3;
210
+ } else {
211
+ this.column++;
212
+ }
213
+ this.position++;
214
+ continue;
215
+ }
216
+
217
+ // 3. Normal Lexing (Original code structure)
218
+
219
+ // Resetting position/column to match your original advance() logic flow:
220
+ this.advance();
221
+
222
+ if (this.isWhitespace(char)) {
223
+ if (char === "\t") this.column += 3;
224
+ continue;
225
+ }
226
+
227
+ if (char === "\n") {
228
+ this.line++;
229
+ this.column = 1;
230
+ continue;
231
+ }
232
+
233
+ // ─────────────── Comments ───────────────
234
+ if (char === "/") {
235
+ const next = this.peek();
236
+
237
+ // Nova-style single-line comment: /!/
238
+ if (next === "!") {
239
+ this.advance();
240
+ this.skipComment();
241
+ continue;
242
+ }
243
+
244
+ // Normal single-line comment: //
245
+ if (next === "/") {
246
+ this.advance();
247
+ this.skipComment();
248
+ continue;
249
+ }
250
+
251
+ // Block comment: /* ... */
252
+ if (next === "*") {
253
+ this.advance(); // consume '*'
254
+ this.skipBlockComment();
255
+ continue;
256
+ }
257
+ }
258
+
259
+ if (char === '"' || char === "'") {
260
+ this.string(char);
261
+ continue;
262
+ }
263
+
264
+ if (char === "`") {
265
+ this.templateLiteral();
266
+ continue;
267
+ }
268
+
269
+ if (this.isDigit(char)) {
270
+ this.number(char);
271
+ continue;
272
+ }
273
+
274
+ if (this.isAlpha(char)) {
275
+ // Read the full identifier starting from the current position
276
+ // and including the char that was just advanced.
277
+ let ident = char;
278
+ while (this.isAlphaNumeric(this.peek())) ident += this.advance();
279
+
280
+ // Preprocessor replacement
281
+ if (this.definitions.has(ident)) {
282
+ const replacement = this.definitions.get(ident);
283
+ if (replacement !== true) {
284
+ // Inject replacement into source stream
285
+ this.injectReplacement(replacement);
286
+ continue; // restart lexing with new injected chars
287
+ }
288
+ }
289
+
290
+ // Pass the full identifier (e.g., "std") to the identifier method.
291
+ this.identifier(ident); // <-- MODIFIED LINE (was: this.identifier(ident[0]);)
292
+ continue;
293
+ }
294
+ this.symbol(char);
295
+ }
296
+
297
+ this.addToken("EOF", null, this.column);
298
+ return this.tokens;
299
+ }
300
+
301
+ identifier(firstChar) {
302
+ const startColumn = this.column - 1;
303
+ let value = firstChar;
304
+ while (this.isAlphaNumeric(this.peek())) value += this.advance();
305
+ if (LITERALS.hasOwnProperty(value)) {
306
+ this.addToken("LITERAL", LITERALS[value], startColumn);
307
+ } else {
308
+ if (OPERATORS.has(value)) {
309
+ this.addOperatorToken(value, startColumn);
310
+ return;
311
+ }
312
+ const type = KEYWORDS.has(value) ? "KEYWORD" : "IDENTIFIER";
313
+ this.addToken(type, value, startColumn);
314
+ }
315
+ }
316
+
317
+ injectReplacement(str) {
318
+ // Prepend the replacement string into the remaining source
319
+ this.source = str + this.source.slice(this.position);
320
+ this.position = 0;
321
+ this.column = 1; // optionally reset column
322
+ }
323
+
324
+ number(firstChar) {
325
+ const startColumn = this.column - 1;
326
+ let value = firstChar;
327
+ while (this.isDigit(this.peek())) value += this.advance();
328
+
329
+ if (this.peek() === "." && this.isDigit(this.peekNext())) {
330
+ value += this.advance();
331
+ while (this.isDigit(this.peek())) value += this.advance();
332
+ }
333
+
334
+ this.addToken("NUMBER", parseFloat(value), startColumn);
335
+ }
336
+ skipBlockComment() {
337
+ while (!this.isAtEnd()) {
338
+ const ch = this.advance();
339
+ if (ch === "*" && this.peek() === "/") {
340
+ this.advance(); // consume '/'
341
+ return;
342
+ }
343
+ if (ch === "\n") {
344
+ this.line++;
345
+ this.column = 1;
346
+ }
347
+ }
348
+ throw this.error("Unterminated block comment");
349
+ }
350
+ string(quote) {
351
+ const startColumn = this.column;
352
+ let value = "";
353
+ while (!this.isAtEnd() && this.peek() !== quote) {
354
+ if (this.peek() === "\n") this.line++;
355
+ value += this.advance();
356
+ }
357
+ if (this.isAtEnd()) throw this.error("Unterminated string literal");
358
+ this.advance();
359
+ this.addToken("STRING", value, startColumn);
360
+ }
361
+
362
+ templateLiteral() {
363
+ const startColumn = this.column - 1;
364
+ this.addToken("TEMPLATE_START", "`", startColumn);
365
+
366
+ let value = "";
367
+
368
+ const flushStringPart = () => {
369
+ if (value.length > 0) {
370
+ this.addToken("STRING_PART", value, this.column - value.length - 1);
371
+ value = "";
372
+ }
373
+ };
374
+
375
+ while (!this.isAtEnd()) {
376
+ const ch = this.advance();
377
+
378
+ // Handle interpolation start
379
+ if (ch === "&" && this.peek() === "{") {
380
+ this.advance(); // consume '{'
381
+ flushStringPart();
382
+ this.addToken("INTERPOLATION_START", "&{", this.column - 2);
383
+
384
+ // Parse until matching '}' (nested-safe)
385
+ let depth = 1;
386
+ let expr = "";
387
+ while (!this.isAtEnd() && depth > 0) {
388
+ const inner = this.advance();
389
+ if (inner === "{") depth++;
390
+ else if (inner === "}") depth--;
391
+ if (depth > 0) expr += inner;
392
+ }
393
+
394
+ // Recursively lex the expression inside the interpolation
395
+ const innerLexer = new Lexer(expr);
396
+ const innerTokens = innerLexer.tokenize();
397
+ // Drop EOF from innerTokens
398
+ innerTokens.pop();
399
+ this.tokens.push(...innerTokens);
400
+
401
+ this.addToken("INTERPOLATION_END", "}", this.column - 1);
402
+ continue;
403
+ }
404
+
405
+ // End of template
406
+ if (ch === "`") {
407
+ flushStringPart();
408
+ this.addToken("TEMPLATE_END", "`", this.column - 1);
409
+ return;
410
+ }
411
+
412
+ // Normal character inside template
413
+ if (ch === "\n") this.line++;
414
+ value += ch;
415
+ }
416
+
417
+ throw this.error("Unterminated template literal");
418
+ }
419
+
420
+ symbol(ch) {
421
+ const startColumn = this.column - 1;
422
+ const twoChar = ch + this.peek();
423
+
424
+ // Two-char operator first
425
+ if (OPERATORS.has(twoChar)) {
426
+ this.advance();
427
+ this.addOperatorToken(twoChar, startColumn);
428
+ return;
429
+ }
430
+
431
+ if (OPERATORS.has(ch)) {
432
+ this.addOperatorToken(ch, startColumn);
433
+ return;
434
+ }
435
+
436
+ if (PUNCTUATION.has(ch)) {
437
+ this.addToken("PUNCTUATION", ch, startColumn);
438
+ return;
439
+ }
440
+
441
+ throw this.error(`Unexpected character '${ch}'`);
442
+ }
443
+
444
+ addOperatorToken(op, startColumn) {
445
+ const info = OPERATORS.get(op) || {};
446
+ this.tokens.push({
447
+ type: "OPERATOR",
448
+ value: op,
449
+ line: this.line,
450
+ column: startColumn,
451
+ precedence: info.precedence ?? 0,
452
+ isUnary: !!info.isUnary,
453
+ isPostfix: !!info.isPostfix,
454
+ });
455
+ }
456
+
457
+ skipComment() {
458
+ while (!this.isAtEnd() && this.peek() !== "\n") this.advance();
459
+ }
460
+
461
+ peek() {
462
+ return this.source[this.position] ?? "\0";
463
+ }
464
+ peekNext() {
465
+ return this.source[this.position + 1] ?? "\0";
466
+ }
467
+
468
+ advance() {
469
+ const ch = this.source[this.position++] ?? "\0";
470
+ this.column++;
471
+ return ch;
472
+ }
473
+
474
+ isAtEnd() {
475
+ return this.position >= this.source.length;
476
+ }
477
+ isWhitespace(ch) {
478
+ return ch === " " || ch === "\t" || ch === "\r";
479
+ }
480
+ isDigit(ch) {
481
+ return ch >= "0" && ch <= "9";
482
+ }
483
+ isAlpha(ch) {
484
+ return /[A-Za-z_]/.test(ch);
485
+ }
486
+ isAlphaNumeric(ch) {
487
+ return /[A-Za-z0-9_]/.test(ch);
488
+ }
489
+
490
+ addToken(type, value, startColumn = null) {
491
+ this.tokens.push({
492
+ type,
493
+ value,
494
+ line: this.line,
495
+ column: startColumn ?? this.column,
496
+ });
497
+ }
498
+
499
+ error(msg) {
500
+ const src = this.source;
501
+ const formatted = formatError("Lexer", msg, this.line, this.column, src);
502
+ throw new (CustomError("LexError"))(formatted);
503
+ }
504
+ }
505
+
506
+ module.exports = { Lexer, KEYWORDS, OPERATORS, PUNCTUATION };