@fc-components/monaco-editor 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,377 @@
1
+ /**
2
+ * Expr-lang lexer/tokenizer
3
+ * Handles: identifiers, numbers, strings, bytes literals, operators, comments
4
+ */
5
+
6
+ import { Token, TokenKind, EOF_TOKEN } from './types';
7
+
8
+ // Tokens that should be treated as Operators even if they look like identifiers
9
+ const OPERATOR_TOKENS = new Set([
10
+ 'let', 'if', 'else',
11
+ 'not', 'and', 'or',
12
+ 'in', 'matches', 'contains',
13
+ 'startsWith', 'endsWith',
14
+ 'hasPrefix', 'hasSuffix',
15
+ ]);
16
+
17
+ export class Lexer {
18
+ private input = '';
19
+ private pos = 0;
20
+ private line = 1;
21
+ private column = 1;
22
+ private startLine = 1;
23
+ private startColumn = 1;
24
+
25
+ reset(input: string): void {
26
+ this.input = input;
27
+ this.pos = 0;
28
+ this.line = 1;
29
+ this.column = 1;
30
+ this.startLine = 1;
31
+ this.startColumn = 1;
32
+ }
33
+
34
+ next(): Token {
35
+ this.skipWhitespace();
36
+ this.skipComments();
37
+
38
+ if (this.pos >= this.input.length) {
39
+ return { ...EOF_TOKEN, start: this.pos, end: this.pos, line: this.line, column: this.column };
40
+ }
41
+
42
+ this.startLine = this.line;
43
+ this.startColumn = this.column;
44
+
45
+ const ch = this.input[this.pos];
46
+
47
+ // String literals
48
+ if (ch === '"' || ch === "'") {
49
+ return this.readString(ch);
50
+ }
51
+ if (ch === '`') {
52
+ return this.readBacktickString();
53
+ }
54
+
55
+ // Byte literals: b"..." or b'...' or B"..." or B'...'
56
+ if ((ch === 'b' || ch === 'B') && this.pos + 1 < this.input.length) {
57
+ const next = this.input[this.pos + 1];
58
+ if (next === '"' || next === "'") {
59
+ return this.readByteLiteral();
60
+ }
61
+ }
62
+
63
+ // Numbers
64
+ if (this.isDigit(ch) || (ch === '.' && this.pos + 1 < this.input.length && this.isDigit(this.input[this.pos + 1]))) {
65
+ return this.readNumber();
66
+ }
67
+
68
+ // Identifiers and keywords
69
+ if (this.isIdentStart(ch)) {
70
+ const token = this.readIdentOrKeyword();
71
+ return token;
72
+ }
73
+
74
+ // Operators and brackets
75
+ return this.readOperatorOrBracket();
76
+ }
77
+
78
+ private skipWhitespace(): void {
79
+ while (this.pos < this.input.length) {
80
+ const ch = this.input[this.pos];
81
+ if (ch === ' ' || ch === '\t' || ch === '\r') {
82
+ this.pos++;
83
+ this.column++;
84
+ } else if (ch === '\n') {
85
+ this.pos++;
86
+ this.line++;
87
+ this.column = 1;
88
+ } else {
89
+ break;
90
+ }
91
+ }
92
+ }
93
+
94
+ private skipComments(): void {
95
+ while (this.pos < this.input.length) {
96
+ // Line comment //
97
+ if (this.input[this.pos] === '/' && this.pos + 1 < this.input.length && this.input[this.pos + 1] === '/') {
98
+ this.pos += 2;
99
+ this.column += 2;
100
+ while (this.pos < this.input.length && this.input[this.pos] !== '\n') {
101
+ this.pos++;
102
+ this.column++;
103
+ }
104
+ this.skipWhitespace();
105
+ continue;
106
+ }
107
+ // Block comment /* */
108
+ if (this.input[this.pos] === '/' && this.pos + 1 < this.input.length && this.input[this.pos + 1] === '*') {
109
+ this.pos += 2;
110
+ this.column += 2;
111
+ while (this.pos < this.input.length) {
112
+ if (this.input[this.pos] === '*' && this.pos + 1 < this.input.length && this.input[this.pos + 1] === '/') {
113
+ this.pos += 2;
114
+ this.column += 2;
115
+ break;
116
+ }
117
+ if (this.input[this.pos] === '\n') {
118
+ this.line++;
119
+ this.column = 1;
120
+ } else {
121
+ this.column++;
122
+ }
123
+ this.pos++;
124
+ }
125
+ this.skipWhitespace();
126
+ continue;
127
+ }
128
+ break;
129
+ }
130
+ }
131
+
132
+ private readString(quote: string): Token {
133
+ const start = this.pos;
134
+ this.pos++; // skip opening quote
135
+ this.column++;
136
+
137
+ let value = '';
138
+ while (this.pos < this.input.length) {
139
+ const ch = this.input[this.pos];
140
+ if (ch === '\\') {
141
+ if (this.pos + 1 >= this.input.length) break;
142
+ const next = this.input[this.pos + 1];
143
+ switch (next) {
144
+ case 'n': value += '\n'; break;
145
+ case 't': value += '\t'; break;
146
+ case 'r': value += '\r'; break;
147
+ case '\\': value += '\\'; break;
148
+ case quote: value += quote; break;
149
+ case 'x': {
150
+ const hex = this.input.substr(this.pos + 2, 2);
151
+ value += String.fromCharCode(parseInt(hex, 16));
152
+ this.pos += 2;
153
+ this.column += 2;
154
+ break;
155
+ }
156
+ case 'u': {
157
+ const unicode = this.input.substr(this.pos + 2, 4);
158
+ value += String.fromCharCode(parseInt(unicode, 16));
159
+ this.pos += 2;
160
+ this.column += 2;
161
+ break;
162
+ }
163
+ default:
164
+ value += next;
165
+ }
166
+ this.pos += 2;
167
+ this.column += 2;
168
+ } else if (ch === quote) {
169
+ this.pos++; // skip closing quote
170
+ this.column++;
171
+ return this.makeToken(TokenKind.String, value, start);
172
+ } else {
173
+ value += ch;
174
+ this.pos++;
175
+ this.column++;
176
+ if (ch === '\n') {
177
+ this.line++;
178
+ this.column = 1;
179
+ }
180
+ }
181
+ }
182
+
183
+ return this.makeToken(TokenKind.String, value, start);
184
+ }
185
+
186
+ private readBacktickString(): Token {
187
+ const start = this.pos;
188
+ this.pos++; // skip opening backtick
189
+ this.column++;
190
+
191
+ let value = '';
192
+ while (this.pos < this.input.length) {
193
+ const ch = this.input[this.pos];
194
+ if (ch === '`') {
195
+ this.pos++;
196
+ this.column++;
197
+ return this.makeToken(TokenKind.String, value, start);
198
+ }
199
+ value += ch;
200
+ this.pos++;
201
+ if (ch === '\n') {
202
+ this.line++;
203
+ this.column = 1;
204
+ } else {
205
+ this.column++;
206
+ }
207
+ }
208
+
209
+ return this.makeToken(TokenKind.String, value, start);
210
+ }
211
+
212
+ private readByteLiteral(): Token {
213
+ // b"..." or b'...' or B"..." or B'...'
214
+ const start = this.pos;
215
+ this.pos++; // skip 'b' or 'B'
216
+ this.column++;
217
+
218
+ const quote = this.input[this.pos];
219
+ this.pos++; // skip quote
220
+ this.column++;
221
+
222
+ let value = '';
223
+ while (this.pos < this.input.length) {
224
+ const ch = this.input[this.pos];
225
+ if (ch === '\\') {
226
+ if (this.pos + 1 >= this.input.length) break;
227
+ const next = this.input[this.pos + 1];
228
+ switch (next) {
229
+ case 'n': value += '\n'; break;
230
+ case 't': value += '\t'; break;
231
+ case 'r': value += '\r'; break;
232
+ case '\\': value += '\\'; break;
233
+ case quote: value += quote; break;
234
+ case 'x': {
235
+ const hex = this.input.substr(this.pos + 2, 2);
236
+ value += String.fromCharCode(parseInt(hex, 16));
237
+ this.pos += 2;
238
+ this.column += 2;
239
+ break;
240
+ }
241
+ default:
242
+ // octal escape \NNN
243
+ if (this.isDigit(next)) {
244
+ const octal = this.input.substr(this.pos + 1, 3);
245
+ value += String.fromCharCode(parseInt(octal, 8));
246
+ this.pos += 2;
247
+ this.column += 2;
248
+ break;
249
+ }
250
+ value += next;
251
+ }
252
+ this.pos += 2;
253
+ this.column += 2;
254
+ } else if (ch === quote) {
255
+ this.pos++;
256
+ this.column++;
257
+ return this.makeToken(TokenKind.String, value, start);
258
+ } else {
259
+ value += ch;
260
+ this.pos++;
261
+ this.column++;
262
+ }
263
+ }
264
+
265
+ return this.makeToken(TokenKind.String, value, start);
266
+ }
267
+
268
+ private readNumber(): Token {
269
+ const start = this.pos;
270
+ let value = '';
271
+
272
+ while (this.pos < this.input.length) {
273
+ const ch = this.input[this.pos];
274
+ if (this.isDigit(ch) || ch === '.' || ch === 'e' || ch === 'E' || ch === '+' || ch === '-' || ch === '_' || ch === 'x' || ch === 'X' || ch === 'o' || ch === 'O' || ch === 'b' || ch === 'B' || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F')) {
275
+ value += ch;
276
+ this.pos++;
277
+ this.column++;
278
+ } else {
279
+ break;
280
+ }
281
+ }
282
+
283
+ return this.makeToken(TokenKind.Number, value, start);
284
+ }
285
+
286
+ private readIdentOrKeyword(): Token {
287
+ const start = this.pos;
288
+ let value = '';
289
+
290
+ while (this.pos < this.input.length) {
291
+ const ch = this.input[this.pos];
292
+ if (this.isIdentPart(ch)) {
293
+ value += ch;
294
+ this.pos++;
295
+ this.column++;
296
+ } else {
297
+ break;
298
+ }
299
+ }
300
+
301
+ // Keywords that are operators
302
+ if (OPERATOR_TOKENS.has(value)) {
303
+ return this.makeToken(TokenKind.Operator, value, start);
304
+ }
305
+
306
+ return this.makeToken(TokenKind.Identifier, value, start);
307
+ }
308
+
309
+ private readOperatorOrBracket(): Token {
310
+ const start = this.pos;
311
+ const ch = this.input[this.pos];
312
+
313
+ // Brackets
314
+ if ('()[]{}'.includes(ch)) {
315
+ this.pos++;
316
+ this.column++;
317
+ return this.makeToken(TokenKind.Bracket, ch, start);
318
+ }
319
+
320
+ // Multi-character operators
321
+ const twoChar = this.input.substr(this.pos, 2);
322
+ const threeChar = this.input.substr(this.pos, 3);
323
+
324
+ // 3-character operators: **=
325
+ // 2-character operators
326
+ const twoCharOps = ['==', '!=', '<=', '>=', '&&', '||', '??', '?.', '..', '**', '//', '::', '->'];
327
+ const threeCharOps = ['...', '<<=', '>>='];
328
+
329
+ if (threeCharOps.includes(threeChar)) {
330
+ this.pos += 3;
331
+ this.column += 3;
332
+ return this.makeToken(TokenKind.Operator, threeChar, start);
333
+ }
334
+
335
+ if (twoCharOps.includes(twoChar)) {
336
+ this.pos += 2;
337
+ this.column += 2;
338
+ return this.makeToken(TokenKind.Operator, twoChar, start);
339
+ }
340
+
341
+ // Single-character operators and separators
342
+ const singleCharOps = '+-*/%^=<>!&|?.,;:#@$~';
343
+ if (singleCharOps.includes(ch)) {
344
+ this.pos++;
345
+ this.column++;
346
+ return this.makeToken(TokenKind.Operator, ch, start);
347
+ }
348
+
349
+ // Unrecognized character
350
+ this.pos++;
351
+ this.column++;
352
+ return this.makeToken(TokenKind.Operator, ch, start);
353
+ }
354
+
355
+ private isDigit(ch: string): boolean {
356
+ return ch >= '0' && ch <= '9';
357
+ }
358
+
359
+ private isIdentStart(ch: string): boolean {
360
+ return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch === '_' || ch === '$';
361
+ }
362
+
363
+ private isIdentPart(ch: string): boolean {
364
+ return this.isIdentStart(ch) || this.isDigit(ch);
365
+ }
366
+
367
+ private makeToken(kind: TokenKind, value: string, start: number): Token {
368
+ return {
369
+ kind,
370
+ value,
371
+ start,
372
+ end: this.pos,
373
+ line: this.startLine,
374
+ column: this.startColumn,
375
+ };
376
+ }
377
+ }