@nova-lang/cli 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/lexer.js ADDED
@@ -0,0 +1,315 @@
1
+ const { TokenType, Token } = require("./types");
2
+
3
+ const ESCAPE_MAP = {
4
+ n: "\n",
5
+ t: "\t",
6
+ r: "\r",
7
+ '"': '"',
8
+ "'": "'",
9
+ "\\": "\\",
10
+ "#": "#",
11
+ $: "$",
12
+ };
13
+
14
+ class LexerError extends Error {
15
+ constructor(msg, line, col) {
16
+ super(`SyntaxError: ${msg} at line ${line}, col ${col}`);
17
+ this.name = "LexerError";
18
+ }
19
+ }
20
+
21
+ class Lexer {
22
+ constructor(source) {
23
+ this.source = source;
24
+ this.pos = 0;
25
+ this.line = 1;
26
+ this.col = 1;
27
+ this.tokens = [];
28
+ this.indentStack = [0];
29
+ this.lineStart = true;
30
+ }
31
+
32
+ peek(offset = 0) {
33
+ const idx = this.pos + offset;
34
+ return idx < this.source.length ? this.source[idx] : "\0";
35
+ }
36
+
37
+ advance() {
38
+ const ch = this.source[this.pos++];
39
+ if (ch === "\n") {
40
+ this.line++;
41
+ this.col = 1;
42
+ } else {
43
+ this.col++;
44
+ }
45
+ return ch;
46
+ }
47
+
48
+ error(msg) {
49
+ throw new LexerError(msg, this.line, this.col);
50
+ }
51
+
52
+ emit(type, value = "") {
53
+ this.tokens.push(new Token(type, value, this.line, this.col));
54
+ }
55
+
56
+ tokenize() {
57
+ while (this.pos < this.source.length) {
58
+ if (this.lineStart) {
59
+ this.lineStart = false;
60
+ this.handleIndent();
61
+ if (this.pos >= this.source.length) break;
62
+ }
63
+
64
+ const ch = this.peek(0);
65
+
66
+ if (ch === "\n") {
67
+ this.pos++;
68
+ this.line++;
69
+ this.col = 1;
70
+ this.lineStart = true;
71
+ this.emit(TokenType.newline);
72
+ continue;
73
+ }
74
+
75
+ if (ch === "/" && (this.peek(1) === "/" || this.peek(1) === "*")) {
76
+ this.skipComment();
77
+ continue;
78
+ }
79
+
80
+ if (ch === " " || ch === "\t") {
81
+ this.pos++;
82
+ this.col++;
83
+ continue;
84
+ }
85
+
86
+ if (ch === "$") {
87
+ if (this.peek(1) === "$") {
88
+ this.pos += 2;
89
+ const start = this.pos;
90
+ while (this.pos < this.source.length) {
91
+ if (this.peek(0) === "$" && this.peek(1) === "$") {
92
+ this.emit(TokenType.math_display, this.source.slice(start, this.pos));
93
+ this.pos += 2;
94
+ break;
95
+ }
96
+ this.pos++;
97
+ }
98
+ if (this.pos >= this.source.length) this.error("Unterminated display math");
99
+ } else {
100
+ this.pos++;
101
+ const start = this.pos;
102
+ while (this.pos < this.source.length && this.source[this.pos] !== "$") {
103
+ this.pos++;
104
+ }
105
+ if (this.pos >= this.source.length) this.error("Unterminated inline math");
106
+ this.emit(TokenType.math_inline, this.source.slice(start, this.pos));
107
+ this.pos++;
108
+ }
109
+ continue;
110
+ }
111
+
112
+ if (ch === "@") {
113
+ this.pos++;
114
+ this.emit(TokenType.at);
115
+ continue;
116
+ }
117
+
118
+ if (ch === "#") {
119
+ if (this.peek(1) === "{") {
120
+ this.pos += 2;
121
+ let depth = 1;
122
+ const start = this.pos;
123
+ while (this.pos < this.source.length && depth > 0) {
124
+ const c = this.source[this.pos++];
125
+ if (c === "{") depth++;
126
+ else if (c === "}") depth--;
127
+ }
128
+ this.emit(TokenType.interp_start, this.source.slice(start, this.pos - 1));
129
+ continue;
130
+ }
131
+ }
132
+
133
+ if (ch === '"' || ch === "'") {
134
+ this.pos++;
135
+ this.readStringInterp(ch);
136
+ continue;
137
+ }
138
+
139
+ if (ch === "(") { this.pos++; this.emit(TokenType.lparen); continue; }
140
+ if (ch === ")") { this.pos++; this.emit(TokenType.rparen); continue; }
141
+ if (ch === "{") { this.pos++; this.emit(TokenType.lbrace); continue; }
142
+ if (ch === "}") { this.pos++; this.emit(TokenType.rbrace); continue; }
143
+ if (ch === "[") { this.pos++; this.emit(TokenType.lbracket); continue; }
144
+ if (ch === "]") { this.pos++; this.emit(TokenType.rbracket); continue; }
145
+ if (ch === ":") { this.pos++; this.emit(TokenType.colon, ":"); continue; }
146
+ if (ch === ",") { this.pos++; this.emit(TokenType.comma); continue; }
147
+ if (ch === "-") { this.pos++; this.emit(TokenType.dash, "-"); continue; }
148
+ if (ch === "*") { this.pos++; this.emit(TokenType.star, "*"); continue; }
149
+ if (ch === "+") { this.pos++; this.emit(TokenType.plus, "+"); continue; }
150
+ if (ch === "|") { this.pos++; this.emit(TokenType.pipe, "|"); continue; }
151
+ if (ch === "=") { this.pos++; this.emit(TokenType.equals, "="); continue; }
152
+ if (ch === ".") { this.pos++; this.emit(TokenType.dot, "."); continue; }
153
+ if (ch === "?") { this.pos++; this.emit(TokenType.question, "?"); continue; }
154
+ if (ch === "<") { this.pos++; this.emit(TokenType.lt, "<"); continue; }
155
+ if (ch === ">") { this.pos++; this.emit(TokenType.gt, ">"); continue; }
156
+
157
+ if (/\d/.test(ch) || ((ch === "+" || ch === "-") && this.peek(1) !== "\0" && /\d/.test(this.peek(1)))) {
158
+ this.emit(TokenType.number, this.readNumber());
159
+ continue;
160
+ }
161
+
162
+ if (/[a-zA-Z_]/.test(ch)) {
163
+ const ident = this.readIdent();
164
+ if (ident === "true" || ident === "false") {
165
+ this.emit(TokenType.bool, ident);
166
+ } else if (ident === "null") {
167
+ this.emit(TokenType.null, ident);
168
+ } else {
169
+ this.emit(TokenType.ident, ident);
170
+ }
171
+ continue;
172
+ }
173
+
174
+ const text = this.readText();
175
+ if (text.length > 0) {
176
+ this.emit(TokenType.text, text);
177
+ continue;
178
+ }
179
+
180
+ if (ch === "\r") { this.pos++; continue; }
181
+ this.pos++;
182
+ }
183
+
184
+ while (this.indentStack.length > 1) {
185
+ this.indentStack.pop();
186
+ this.emit(TokenType.dedent);
187
+ }
188
+ this.emit(TokenType.eof);
189
+ }
190
+
191
+ skipComment() {
192
+ this.pos++;
193
+ if (this.peek(0) === "/") {
194
+ this.pos++;
195
+ while (this.pos < this.source.length && this.source[this.pos] !== "\n") this.pos++;
196
+ } else if (this.peek(0) === "*") {
197
+ this.pos++;
198
+ let depth = 1;
199
+ while (depth > 0 && this.pos < this.source.length) {
200
+ if (this.peek(0) === "/" && this.peek(1) === "*") { this.pos += 2; depth++; }
201
+ else if (this.peek(0) === "*" && this.peek(1) === "/") { this.pos += 2; depth--; }
202
+ else this.pos++;
203
+ }
204
+ }
205
+ }
206
+
207
+ readStringInterp(quote) {
208
+ while (true) {
209
+ let buf = "";
210
+ while (this.pos < this.source.length) {
211
+ const ch = this.advance();
212
+ if (ch === "\\") {
213
+ if (this.pos < this.source.length) {
214
+ const next = this.advance();
215
+ const escaped = ESCAPE_MAP[next];
216
+ buf += "\\";
217
+ buf += escaped !== undefined ? escaped : next;
218
+ }
219
+ } else if (ch === "#" && this.peek(0) === "{") {
220
+ this.pos++;
221
+ if (buf.length > 0) this.emit(TokenType.string, buf);
222
+ let depth = 1;
223
+ const start = this.pos;
224
+ while (this.pos < this.source.length && depth > 0) {
225
+ const c = this.source[this.pos++];
226
+ if (c === "{") depth++;
227
+ else if (c === "}") depth--;
228
+ }
229
+ this.emit(TokenType.interp_start, this.source.slice(start, this.pos - 1));
230
+ break;
231
+ } else if (ch === quote) {
232
+ if (buf.length > 0) this.emit(TokenType.string, buf);
233
+ return;
234
+ } else {
235
+ buf += ch;
236
+ }
237
+ }
238
+ if (this.pos >= this.source.length) return;
239
+ }
240
+ }
241
+
242
+ readNumber() {
243
+ const start = this.pos;
244
+ if (this.peek(0) === "+" || this.peek(0) === "-") this.pos++;
245
+ if (this.peek(0) === "0" && /[xX]/.test(this.peek(1))) {
246
+ this.pos += 2;
247
+ while (this.pos < this.source.length && /[0-9a-fA-F]/.test(this.source[this.pos])) this.pos++;
248
+ return this.source.slice(start, this.pos);
249
+ }
250
+ while (this.pos < this.source.length && /[0-9.]/.test(this.source[this.pos])) this.pos++;
251
+ if (this.pos < this.source.length && /[eE]/.test(this.source[this.pos])) {
252
+ this.pos++;
253
+ if (this.pos < this.source.length && /[+-]/.test(this.source[this.pos])) this.pos++;
254
+ while (this.pos < this.source.length && /\d/.test(this.source[this.pos])) this.pos++;
255
+ }
256
+ return this.source.slice(start, this.pos);
257
+ }
258
+
259
+ readIdent() {
260
+ const start = this.pos;
261
+ while (this.pos < this.source.length && /[a-zA-Z0-9_-]/.test(this.source[this.pos])) this.pos++;
262
+ return this.source.slice(start, this.pos);
263
+ }
264
+
265
+ readText() {
266
+ const start = this.pos;
267
+ while (this.pos < this.source.length) {
268
+ const ch = this.source[this.pos];
269
+ const next = this.pos + 1 < this.source.length ? this.source[this.pos + 1] : "\0";
270
+ if ("@{}$\n\"'".indexOf(ch) !== -1) break;
271
+ if (ch === "{" || ch === "}") break;
272
+ if (ch === "/" && (next === "/" || next === "*")) break;
273
+ if (ch === "#" && next === "{") break;
274
+ this.pos++;
275
+ }
276
+ return this.source.slice(start, this.pos);
277
+ }
278
+
279
+ handleIndent() {
280
+ let count = 0;
281
+ while (this.pos < this.source.length && this.source[this.pos] === " ") {
282
+ this.pos++;
283
+ count++;
284
+ }
285
+ while (this.pos < this.source.length && this.source[this.pos] === "\t") {
286
+ this.pos++;
287
+ count += 2;
288
+ }
289
+ if (this.pos >= this.source.length) return;
290
+ if (this.source[this.pos] === "\n") return;
291
+ if (this.source[this.pos] === "#") return;
292
+
293
+ const top = this.indentStack[this.indentStack.length - 1];
294
+ if (count > top) {
295
+ this.indentStack.push(count);
296
+ this.emit(TokenType.indent);
297
+ } else if (count < top) {
298
+ while (this.indentStack.length > 0 && count < this.indentStack[this.indentStack.length - 1]) {
299
+ this.indentStack.pop();
300
+ this.emit(TokenType.dedent);
301
+ }
302
+ if (this.indentStack.length > 0 && count !== this.indentStack[this.indentStack.length - 1]) {
303
+ this.error("Inconsistent indentation");
304
+ }
305
+ }
306
+ }
307
+ }
308
+
309
+ function tokenize(source) {
310
+ const lexer = new Lexer(source);
311
+ lexer.tokenize();
312
+ return lexer.tokens;
313
+ }
314
+
315
+ module.exports = { Lexer, tokenize };