kimchilang 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/ci.yml +66 -0
- package/README.md +1547 -0
- package/create-kimchi-app/README.md +44 -0
- package/create-kimchi-app/index.js +214 -0
- package/create-kimchi-app/package.json +22 -0
- package/editors/README.md +121 -0
- package/editors/sublime/KimchiLang.sublime-syntax +138 -0
- package/editors/vscode/README.md +90 -0
- package/editors/vscode/kimchilang-1.1.0.vsix +0 -0
- package/editors/vscode/language-configuration.json +37 -0
- package/editors/vscode/package.json +55 -0
- package/editors/vscode/src/extension.js +354 -0
- package/editors/vscode/syntaxes/kimchi.tmLanguage.json +215 -0
- package/examples/api/client.km +36 -0
- package/examples/async_pipe.km +58 -0
- package/examples/basic.kimchi +109 -0
- package/examples/cli_framework/README.md +92 -0
- package/examples/cli_framework/calculator.km +61 -0
- package/examples/cli_framework/deploy.km +126 -0
- package/examples/cli_framework/greeter.km +26 -0
- package/examples/config.static +27 -0
- package/examples/config.static.js +10 -0
- package/examples/env_test.km +37 -0
- package/examples/fibonacci.kimchi +17 -0
- package/examples/greeter.km +15 -0
- package/examples/hello.js +1 -0
- package/examples/hello.kimchi +3 -0
- package/examples/js_interop.km +42 -0
- package/examples/logger_example.km +34 -0
- package/examples/memo_fibonacci.km +17 -0
- package/examples/myapp/lib/http.js +14 -0
- package/examples/myapp/lib/http.km +16 -0
- package/examples/myapp/main.km +16 -0
- package/examples/myapp/main_with_mock.km +42 -0
- package/examples/myapp/services/api.js +18 -0
- package/examples/myapp/services/api.km +18 -0
- package/examples/new_features.kimchi +52 -0
- package/examples/project_example.static +20 -0
- package/examples/readme_examples.km +240 -0
- package/examples/reduce_pattern_match.km +85 -0
- package/examples/regex_match.km +46 -0
- package/examples/sample.js +45 -0
- package/examples/sample.km +39 -0
- package/examples/secrets.static +35 -0
- package/examples/secrets.static.js +30 -0
- package/examples/shell-example.mjs +144 -0
- package/examples/shell_example.km +19 -0
- package/examples/stdlib_test.km +22 -0
- package/examples/test_example.km +69 -0
- package/examples/testing/README.md +88 -0
- package/examples/testing/http_client.km +18 -0
- package/examples/testing/math.km +48 -0
- package/examples/testing/math.test.km +93 -0
- package/examples/testing/user_service.km +29 -0
- package/examples/testing/user_service.test.km +72 -0
- package/examples/use-config.mjs +141 -0
- package/examples/use_config.km +13 -0
- package/install.sh +59 -0
- package/package.json +29 -0
- package/pantry/acorn/index.km +1 -0
- package/pantry/is_number/index.km +1 -0
- package/pantry/is_odd/index.km +2 -0
- package/project.static +6 -0
- package/src/cli.js +1245 -0
- package/src/generator.js +1241 -0
- package/src/index.js +141 -0
- package/src/js2km.js +568 -0
- package/src/lexer.js +822 -0
- package/src/linter.js +810 -0
- package/src/package-manager.js +307 -0
- package/src/parser.js +1876 -0
- package/src/static-parser.js +500 -0
- package/src/typechecker.js +950 -0
- package/stdlib/array.km +0 -0
- package/stdlib/bitwise.km +38 -0
- package/stdlib/console.km +49 -0
- package/stdlib/date.km +97 -0
- package/stdlib/function.km +44 -0
- package/stdlib/http.km +197 -0
- package/stdlib/http.md +333 -0
- package/stdlib/index.km +26 -0
- package/stdlib/json.km +17 -0
- package/stdlib/logger.js +114 -0
- package/stdlib/logger.km +104 -0
- package/stdlib/math.km +120 -0
- package/stdlib/object.km +41 -0
- package/stdlib/promise.km +33 -0
- package/stdlib/string.km +93 -0
- package/stdlib/testing.md +265 -0
- package/test/test.js +599 -0
package/src/lexer.js
ADDED
|
@@ -0,0 +1,822 @@
|
|
|
1
|
+
// KimchiLang Lexer - Tokenizes source code into tokens
|
|
2
|
+
|
|
3
|
+
export const TokenType = {
|
|
4
|
+
// Literals
|
|
5
|
+
NUMBER: 'NUMBER',
|
|
6
|
+
STRING: 'STRING',
|
|
7
|
+
IDENTIFIER: 'IDENTIFIER',
|
|
8
|
+
BOOLEAN: 'BOOLEAN',
|
|
9
|
+
NULL: 'NULL',
|
|
10
|
+
|
|
11
|
+
// Keywords
|
|
12
|
+
EXPOSE: 'EXPOSE',
|
|
13
|
+
DEC: 'DEC',
|
|
14
|
+
FN: 'FN',
|
|
15
|
+
MEMO: 'MEMO',
|
|
16
|
+
RETURN: 'RETURN',
|
|
17
|
+
IF: 'IF',
|
|
18
|
+
ELSE: 'ELSE',
|
|
19
|
+
ELIF: 'ELIF',
|
|
20
|
+
WHILE: 'WHILE',
|
|
21
|
+
FOR: 'FOR',
|
|
22
|
+
IN: 'IN',
|
|
23
|
+
BREAK: 'BREAK',
|
|
24
|
+
CONTINUE: 'CONTINUE',
|
|
25
|
+
AS: 'AS',
|
|
26
|
+
ASYNC: 'ASYNC',
|
|
27
|
+
AWAIT: 'AWAIT',
|
|
28
|
+
TRY: 'TRY',
|
|
29
|
+
CATCH: 'CATCH',
|
|
30
|
+
FINALLY: 'FINALLY',
|
|
31
|
+
THROW: 'THROW',
|
|
32
|
+
PRINT: 'PRINT',
|
|
33
|
+
DEP: 'DEP',
|
|
34
|
+
ARG: 'ARG',
|
|
35
|
+
ENV: 'ENV',
|
|
36
|
+
SECRET: 'SECRET',
|
|
37
|
+
IS: 'IS',
|
|
38
|
+
ENUM: 'ENUM',
|
|
39
|
+
JS: 'JS',
|
|
40
|
+
JS_CONTENT: 'JS_CONTENT',
|
|
41
|
+
SHELL: 'SHELL',
|
|
42
|
+
SHELL_CONTENT: 'SHELL_CONTENT',
|
|
43
|
+
TEST: 'TEST',
|
|
44
|
+
DESCRIBE: 'DESCRIBE',
|
|
45
|
+
EXPECT: 'EXPECT',
|
|
46
|
+
ASSERT: 'ASSERT',
|
|
47
|
+
|
|
48
|
+
// Operators
|
|
49
|
+
PLUS: 'PLUS',
|
|
50
|
+
MINUS: 'MINUS',
|
|
51
|
+
STAR: 'STAR',
|
|
52
|
+
SLASH: 'SLASH',
|
|
53
|
+
PERCENT: 'PERCENT',
|
|
54
|
+
POWER: 'POWER',
|
|
55
|
+
ASSIGN: 'ASSIGN',
|
|
56
|
+
PLUS_ASSIGN: 'PLUS_ASSIGN',
|
|
57
|
+
MINUS_ASSIGN: 'MINUS_ASSIGN',
|
|
58
|
+
STAR_ASSIGN: 'STAR_ASSIGN',
|
|
59
|
+
SLASH_ASSIGN: 'SLASH_ASSIGN',
|
|
60
|
+
EQ: 'EQ',
|
|
61
|
+
NEQ: 'NEQ',
|
|
62
|
+
LT: 'LT',
|
|
63
|
+
GT: 'GT',
|
|
64
|
+
LTE: 'LTE',
|
|
65
|
+
GTE: 'GTE',
|
|
66
|
+
AND: 'AND',
|
|
67
|
+
OR: 'OR',
|
|
68
|
+
NOT: 'NOT',
|
|
69
|
+
BITOR: 'BITOR',
|
|
70
|
+
ARROW: 'ARROW',
|
|
71
|
+
FAT_ARROW: 'FAT_ARROW',
|
|
72
|
+
FLOW: 'FLOW',
|
|
73
|
+
PIPE: 'PIPE',
|
|
74
|
+
MATCH: 'MATCH',
|
|
75
|
+
QUESTION: 'QUESTION',
|
|
76
|
+
COLON: 'COLON',
|
|
77
|
+
DOUBLE_COLON: 'DOUBLE_COLON',
|
|
78
|
+
RANGE: 'RANGE',
|
|
79
|
+
SPREAD: 'SPREAD',
|
|
80
|
+
|
|
81
|
+
// Delimiters
|
|
82
|
+
LPAREN: 'LPAREN',
|
|
83
|
+
RPAREN: 'RPAREN',
|
|
84
|
+
LBRACE: 'LBRACE',
|
|
85
|
+
RBRACE: 'RBRACE',
|
|
86
|
+
LBRACKET: 'LBRACKET',
|
|
87
|
+
RBRACKET: 'RBRACKET',
|
|
88
|
+
COMMA: 'COMMA',
|
|
89
|
+
DOT: 'DOT',
|
|
90
|
+
SEMICOLON: 'SEMICOLON',
|
|
91
|
+
NEWLINE: 'NEWLINE',
|
|
92
|
+
|
|
93
|
+
// Special
|
|
94
|
+
EOF: 'EOF',
|
|
95
|
+
COMMENT: 'COMMENT',
|
|
96
|
+
TEMPLATE_STRING: 'TEMPLATE_STRING',
|
|
97
|
+
REGEX: 'REGEX',
|
|
98
|
+
AT: 'AT',
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
const KEYWORDS = {
|
|
102
|
+
'expose': TokenType.EXPOSE,
|
|
103
|
+
'dec': TokenType.DEC,
|
|
104
|
+
'fn': TokenType.FN,
|
|
105
|
+
'memo': TokenType.MEMO,
|
|
106
|
+
'return': TokenType.RETURN,
|
|
107
|
+
'if': TokenType.IF,
|
|
108
|
+
'else': TokenType.ELSE,
|
|
109
|
+
'elif': TokenType.ELIF,
|
|
110
|
+
'while': TokenType.WHILE,
|
|
111
|
+
'for': TokenType.FOR,
|
|
112
|
+
'in': TokenType.IN,
|
|
113
|
+
'break': TokenType.BREAK,
|
|
114
|
+
'continue': TokenType.CONTINUE,
|
|
115
|
+
'as': TokenType.AS,
|
|
116
|
+
'async': TokenType.ASYNC,
|
|
117
|
+
'await': TokenType.AWAIT,
|
|
118
|
+
'try': TokenType.TRY,
|
|
119
|
+
'catch': TokenType.CATCH,
|
|
120
|
+
'finally': TokenType.FINALLY,
|
|
121
|
+
'throw': TokenType.THROW,
|
|
122
|
+
'dep': TokenType.DEP,
|
|
123
|
+
'arg': TokenType.ARG,
|
|
124
|
+
'env': TokenType.ENV,
|
|
125
|
+
'secret': TokenType.SECRET,
|
|
126
|
+
'is': TokenType.IS,
|
|
127
|
+
'enum': TokenType.ENUM,
|
|
128
|
+
'js': TokenType.JS,
|
|
129
|
+
'shell': TokenType.SHELL,
|
|
130
|
+
'test': TokenType.TEST,
|
|
131
|
+
'describe': TokenType.DESCRIBE,
|
|
132
|
+
'expect': TokenType.EXPECT,
|
|
133
|
+
'assert': TokenType.ASSERT,
|
|
134
|
+
'print': TokenType.PRINT,
|
|
135
|
+
'true': TokenType.BOOLEAN,
|
|
136
|
+
'false': TokenType.BOOLEAN,
|
|
137
|
+
'null': TokenType.NULL,
|
|
138
|
+
'and': TokenType.AND,
|
|
139
|
+
'or': TokenType.OR,
|
|
140
|
+
'not': TokenType.NOT,
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
export class Token {
|
|
144
|
+
constructor(type, value, line, column, sourcePos = 0) {
|
|
145
|
+
this.type = type;
|
|
146
|
+
this.value = value;
|
|
147
|
+
this.line = line;
|
|
148
|
+
this.column = column;
|
|
149
|
+
this.sourcePos = sourcePos; // Position in source for raw extraction
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
toString() {
|
|
153
|
+
return `Token(${this.type}, ${JSON.stringify(this.value)}, ${this.line}:${this.column})`;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
export class Lexer {
|
|
158
|
+
constructor(source) {
|
|
159
|
+
this.source = source;
|
|
160
|
+
this.pos = 0;
|
|
161
|
+
this.line = 1;
|
|
162
|
+
this.column = 1;
|
|
163
|
+
this.tokens = [];
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
error(message) {
|
|
167
|
+
throw new Error(`Lexer Error at ${this.line}:${this.column}: ${message}`);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
peek(offset = 0) {
|
|
171
|
+
const pos = this.pos + offset;
|
|
172
|
+
if (pos >= this.source.length) return '\0';
|
|
173
|
+
return this.source[pos];
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
advance() {
|
|
177
|
+
const char = this.peek();
|
|
178
|
+
this.pos++;
|
|
179
|
+
if (char === '\n') {
|
|
180
|
+
this.line++;
|
|
181
|
+
this.column = 1;
|
|
182
|
+
} else {
|
|
183
|
+
this.column++;
|
|
184
|
+
}
|
|
185
|
+
return char;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
match(expected) {
|
|
189
|
+
if (this.peek() === expected) {
|
|
190
|
+
this.advance();
|
|
191
|
+
return true;
|
|
192
|
+
}
|
|
193
|
+
return false;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
skipWhitespace() {
|
|
197
|
+
while (this.peek() === ' ' || this.peek() === '\t' || this.peek() === '\r') {
|
|
198
|
+
this.advance();
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
skipLineComment() {
|
|
203
|
+
while (this.peek() !== '\n' && this.peek() !== '\0') {
|
|
204
|
+
this.advance();
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
skipBlockComment() {
|
|
209
|
+
this.advance(); // skip *
|
|
210
|
+
while (!(this.peek() === '*' && this.peek(1) === '/')) {
|
|
211
|
+
if (this.peek() === '\0') {
|
|
212
|
+
this.error('Unterminated block comment');
|
|
213
|
+
}
|
|
214
|
+
this.advance();
|
|
215
|
+
}
|
|
216
|
+
this.advance(); // skip *
|
|
217
|
+
this.advance(); // skip /
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
readString(quote) {
|
|
221
|
+
const startLine = this.line;
|
|
222
|
+
const startColumn = this.column;
|
|
223
|
+
let value = '';
|
|
224
|
+
let hasInterpolation = false;
|
|
225
|
+
|
|
226
|
+
while (this.peek() !== quote) {
|
|
227
|
+
if (this.peek() === '\0') {
|
|
228
|
+
this.error('Unterminated string');
|
|
229
|
+
}
|
|
230
|
+
if (this.peek() === '\\') {
|
|
231
|
+
this.advance();
|
|
232
|
+
const escaped = this.advance();
|
|
233
|
+
switch (escaped) {
|
|
234
|
+
case 'n': value += '\n'; break;
|
|
235
|
+
case 't': value += '\t'; break;
|
|
236
|
+
case 'r': value += '\r'; break;
|
|
237
|
+
case '\\': value += '\\'; break;
|
|
238
|
+
case '"': value += '"'; break;
|
|
239
|
+
case "'": value += "'"; break;
|
|
240
|
+
case '`': value += '`'; break;
|
|
241
|
+
case '0': value += '\0'; break;
|
|
242
|
+
case '$': value += '$'; break; // Allow escaping $ to prevent interpolation
|
|
243
|
+
default: value += escaped;
|
|
244
|
+
}
|
|
245
|
+
} else if (this.peek() === '$' && this.peek(1) === '{') {
|
|
246
|
+
// String interpolation detected - mark position for later parsing
|
|
247
|
+
hasInterpolation = true;
|
|
248
|
+
value += '\x00INTERP_START\x00'; // Use marker that we'll split on later
|
|
249
|
+
this.advance(); // skip $
|
|
250
|
+
this.advance(); // skip {
|
|
251
|
+
// Read until matching }
|
|
252
|
+
let braceDepth = 1;
|
|
253
|
+
while (braceDepth > 0 && this.peek() !== '\0') {
|
|
254
|
+
if (this.peek() === '{') braceDepth++;
|
|
255
|
+
if (this.peek() === '}') braceDepth--;
|
|
256
|
+
if (braceDepth > 0) {
|
|
257
|
+
value += this.advance();
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
if (this.peek() === '}') {
|
|
261
|
+
this.advance(); // skip closing }
|
|
262
|
+
value += '\x00INTERP_END\x00';
|
|
263
|
+
} else {
|
|
264
|
+
this.error('Unterminated interpolation in string');
|
|
265
|
+
}
|
|
266
|
+
} else {
|
|
267
|
+
value += this.advance();
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
this.advance(); // closing quote
|
|
271
|
+
|
|
272
|
+
if (hasInterpolation) {
|
|
273
|
+
// Return as template string token with the raw content (will be converted to backticks in generator)
|
|
274
|
+
return new Token(TokenType.TEMPLATE_STRING, value, startLine, startColumn);
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
return new Token(TokenType.STRING, value, startLine, startColumn);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
readTemplateString() {
|
|
281
|
+
const startLine = this.line;
|
|
282
|
+
const startColumn = this.column;
|
|
283
|
+
let value = '`';
|
|
284
|
+
|
|
285
|
+
while (this.peek() !== '`') {
|
|
286
|
+
if (this.peek() === '\0') {
|
|
287
|
+
this.error('Unterminated template string');
|
|
288
|
+
}
|
|
289
|
+
if (this.peek() === '\\') {
|
|
290
|
+
value += this.advance();
|
|
291
|
+
value += this.advance();
|
|
292
|
+
} else {
|
|
293
|
+
value += this.advance();
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
value += this.advance(); // closing backtick
|
|
297
|
+
|
|
298
|
+
return new Token(TokenType.STRING, value, startLine, startColumn);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
readNumber() {
|
|
302
|
+
const startLine = this.line;
|
|
303
|
+
const startColumn = this.column;
|
|
304
|
+
let value = '';
|
|
305
|
+
|
|
306
|
+
// Handle hex, binary, octal
|
|
307
|
+
if (this.peek() === '0') {
|
|
308
|
+
value += this.advance();
|
|
309
|
+
if (this.peek() === 'x' || this.peek() === 'X') {
|
|
310
|
+
value += this.advance();
|
|
311
|
+
while (/[0-9a-fA-F]/.test(this.peek())) {
|
|
312
|
+
value += this.advance();
|
|
313
|
+
}
|
|
314
|
+
return new Token(TokenType.NUMBER, value, startLine, startColumn);
|
|
315
|
+
} else if (this.peek() === 'b' || this.peek() === 'B') {
|
|
316
|
+
value += this.advance();
|
|
317
|
+
while (this.peek() === '0' || this.peek() === '1') {
|
|
318
|
+
value += this.advance();
|
|
319
|
+
}
|
|
320
|
+
return new Token(TokenType.NUMBER, value, startLine, startColumn);
|
|
321
|
+
} else if (this.peek() === 'o' || this.peek() === 'O') {
|
|
322
|
+
value += this.advance();
|
|
323
|
+
while (/[0-7]/.test(this.peek())) {
|
|
324
|
+
value += this.advance();
|
|
325
|
+
}
|
|
326
|
+
return new Token(TokenType.NUMBER, value, startLine, startColumn);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// Regular number
|
|
331
|
+
while (/[0-9]/.test(this.peek())) {
|
|
332
|
+
value += this.advance();
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
// Decimal part
|
|
336
|
+
if (this.peek() === '.' && /[0-9]/.test(this.peek(1))) {
|
|
337
|
+
value += this.advance(); // .
|
|
338
|
+
while (/[0-9]/.test(this.peek())) {
|
|
339
|
+
value += this.advance();
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Exponent
|
|
344
|
+
if (this.peek() === 'e' || this.peek() === 'E') {
|
|
345
|
+
value += this.advance();
|
|
346
|
+
if (this.peek() === '+' || this.peek() === '-') {
|
|
347
|
+
value += this.advance();
|
|
348
|
+
}
|
|
349
|
+
while (/[0-9]/.test(this.peek())) {
|
|
350
|
+
value += this.advance();
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
return new Token(TokenType.NUMBER, value, startLine, startColumn);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
canStartRegex() {
|
|
358
|
+
// Regex can start after these token types (expression start context)
|
|
359
|
+
// After values/identifiers/closing brackets, / is division
|
|
360
|
+
// After operators that expect an operand, / could be regex
|
|
361
|
+
if (this.tokens.length === 0) return true;
|
|
362
|
+
|
|
363
|
+
const lastToken = this.tokens[this.tokens.length - 1];
|
|
364
|
+
|
|
365
|
+
// After these, / is definitely division (they produce values)
|
|
366
|
+
const divisionPrecedingTokens = [
|
|
367
|
+
TokenType.NUMBER,
|
|
368
|
+
TokenType.STRING,
|
|
369
|
+
TokenType.TEMPLATE_STRING,
|
|
370
|
+
TokenType.IDENTIFIER,
|
|
371
|
+
TokenType.BOOLEAN,
|
|
372
|
+
TokenType.NULL,
|
|
373
|
+
TokenType.RPAREN,
|
|
374
|
+
TokenType.RBRACKET,
|
|
375
|
+
TokenType.RBRACE,
|
|
376
|
+
TokenType.REGEX, // After a regex, / is division
|
|
377
|
+
];
|
|
378
|
+
|
|
379
|
+
if (divisionPrecedingTokens.includes(lastToken.type)) {
|
|
380
|
+
return false;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// After everything else (operators, keywords, opening brackets), / could be regex
|
|
384
|
+
return true;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
readRegex() {
|
|
388
|
+
const startLine = this.line;
|
|
389
|
+
const startColumn = this.column;
|
|
390
|
+
let pattern = '';
|
|
391
|
+
let flags = '';
|
|
392
|
+
|
|
393
|
+
// Read pattern until unescaped /
|
|
394
|
+
while (this.peek() !== '/' && this.peek() !== '\0' && this.peek() !== '\n') {
|
|
395
|
+
if (this.peek() === '\\') {
|
|
396
|
+
pattern += this.advance(); // backslash
|
|
397
|
+
if (this.peek() !== '\0' && this.peek() !== '\n') {
|
|
398
|
+
pattern += this.advance(); // escaped char
|
|
399
|
+
}
|
|
400
|
+
} else {
|
|
401
|
+
pattern += this.advance();
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
if (this.peek() !== '/') {
|
|
406
|
+
this.error('Unterminated regex literal');
|
|
407
|
+
}
|
|
408
|
+
this.advance(); // closing /
|
|
409
|
+
|
|
410
|
+
// Read flags (g, i, m, s, u, y)
|
|
411
|
+
while (/[gimsuy]/.test(this.peek())) {
|
|
412
|
+
flags += this.advance();
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
return new Token(TokenType.REGEX, { pattern, flags }, startLine, startColumn);
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
readIdentifier() {
|
|
419
|
+
const startLine = this.line;
|
|
420
|
+
const startColumn = this.column;
|
|
421
|
+
let value = '';
|
|
422
|
+
|
|
423
|
+
while (/[a-zA-Z0-9_$]/.test(this.peek())) {
|
|
424
|
+
value += this.advance();
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
const type = KEYWORDS[value] || TokenType.IDENTIFIER;
|
|
428
|
+
|
|
429
|
+
// Special handling for js keyword - read raw content after {
|
|
430
|
+
if (type === TokenType.JS) {
|
|
431
|
+
this.tokens.push(new Token(type, value, startLine, startColumn));
|
|
432
|
+
this.skipWhitespace();
|
|
433
|
+
|
|
434
|
+
// Check for optional (inputs)
|
|
435
|
+
if (this.peek() === '(') {
|
|
436
|
+
this.tokens.push(new Token(TokenType.LPAREN, '(', this.line, this.column));
|
|
437
|
+
this.advance();
|
|
438
|
+
// Read input identifiers
|
|
439
|
+
while (this.peek() !== ')' && this.peek() !== '\0') {
|
|
440
|
+
this.skipWhitespace();
|
|
441
|
+
if (this.peek() === ',') {
|
|
442
|
+
this.tokens.push(new Token(TokenType.COMMA, ',', this.line, this.column));
|
|
443
|
+
this.advance();
|
|
444
|
+
continue;
|
|
445
|
+
}
|
|
446
|
+
if (/[a-zA-Z_$]/.test(this.peek())) {
|
|
447
|
+
const idStart = this.line;
|
|
448
|
+
const idCol = this.column;
|
|
449
|
+
let id = '';
|
|
450
|
+
while (/[a-zA-Z0-9_$]/.test(this.peek())) {
|
|
451
|
+
id += this.advance();
|
|
452
|
+
}
|
|
453
|
+
this.tokens.push(new Token(TokenType.IDENTIFIER, id, idStart, idCol));
|
|
454
|
+
} else {
|
|
455
|
+
break;
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
if (this.peek() === ')') {
|
|
459
|
+
this.tokens.push(new Token(TokenType.RPAREN, ')', this.line, this.column));
|
|
460
|
+
this.advance();
|
|
461
|
+
}
|
|
462
|
+
this.skipWhitespace();
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// Skip newlines before {
|
|
466
|
+
while (this.peek() === '\n') {
|
|
467
|
+
this.advance();
|
|
468
|
+
}
|
|
469
|
+
this.skipWhitespace();
|
|
470
|
+
|
|
471
|
+
// Now read the { and raw content until }
|
|
472
|
+
if (this.peek() === '{') {
|
|
473
|
+
this.tokens.push(new Token(TokenType.LBRACE, '{', this.line, this.column));
|
|
474
|
+
this.advance();
|
|
475
|
+
|
|
476
|
+
// Read raw JS content until matching }
|
|
477
|
+
const contentStart = this.line;
|
|
478
|
+
const contentCol = this.column;
|
|
479
|
+
let content = '';
|
|
480
|
+
let braceDepth = 1;
|
|
481
|
+
|
|
482
|
+
while (braceDepth > 0 && this.peek() !== '\0') {
|
|
483
|
+
if (this.peek() === '{') {
|
|
484
|
+
braceDepth++;
|
|
485
|
+
content += this.advance();
|
|
486
|
+
} else if (this.peek() === '}') {
|
|
487
|
+
braceDepth--;
|
|
488
|
+
if (braceDepth > 0) {
|
|
489
|
+
content += this.advance();
|
|
490
|
+
}
|
|
491
|
+
} else {
|
|
492
|
+
content += this.advance();
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// Add the raw JS content as a single token
|
|
497
|
+
this.tokens.push(new Token(TokenType.JS_CONTENT, content.trim(), contentStart, contentCol));
|
|
498
|
+
|
|
499
|
+
// Add closing brace
|
|
500
|
+
if (this.peek() === '}') {
|
|
501
|
+
this.tokens.push(new Token(TokenType.RBRACE, '}', this.line, this.column));
|
|
502
|
+
this.advance();
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
return null; // Already added tokens
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
// Special handling for shell keyword - read raw content after {
|
|
510
|
+
if (type === TokenType.SHELL) {
|
|
511
|
+
this.tokens.push(new Token(type, value, startLine, startColumn));
|
|
512
|
+
this.skipWhitespace();
|
|
513
|
+
|
|
514
|
+
// Check for optional (inputs)
|
|
515
|
+
if (this.peek() === '(') {
|
|
516
|
+
this.tokens.push(new Token(TokenType.LPAREN, '(', this.line, this.column));
|
|
517
|
+
this.advance();
|
|
518
|
+
// Read input identifiers
|
|
519
|
+
while (this.peek() !== ')' && this.peek() !== '\0') {
|
|
520
|
+
this.skipWhitespace();
|
|
521
|
+
if (this.peek() === ',') {
|
|
522
|
+
this.tokens.push(new Token(TokenType.COMMA, ',', this.line, this.column));
|
|
523
|
+
this.advance();
|
|
524
|
+
continue;
|
|
525
|
+
}
|
|
526
|
+
if (/[a-zA-Z_$]/.test(this.peek())) {
|
|
527
|
+
const idStart = this.line;
|
|
528
|
+
const idCol = this.column;
|
|
529
|
+
let id = '';
|
|
530
|
+
while (/[a-zA-Z0-9_$]/.test(this.peek())) {
|
|
531
|
+
id += this.advance();
|
|
532
|
+
}
|
|
533
|
+
this.tokens.push(new Token(TokenType.IDENTIFIER, id, idStart, idCol));
|
|
534
|
+
} else {
|
|
535
|
+
break;
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
if (this.peek() === ')') {
|
|
539
|
+
this.tokens.push(new Token(TokenType.RPAREN, ')', this.line, this.column));
|
|
540
|
+
this.advance();
|
|
541
|
+
}
|
|
542
|
+
this.skipWhitespace();
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
// Skip newlines before {
|
|
546
|
+
while (this.peek() === '\n') {
|
|
547
|
+
this.advance();
|
|
548
|
+
}
|
|
549
|
+
this.skipWhitespace();
|
|
550
|
+
|
|
551
|
+
// Now read the { and raw content until }
|
|
552
|
+
if (this.peek() === '{') {
|
|
553
|
+
this.tokens.push(new Token(TokenType.LBRACE, '{', this.line, this.column));
|
|
554
|
+
this.advance();
|
|
555
|
+
|
|
556
|
+
// Read raw shell content until matching }
|
|
557
|
+
const contentStart = this.line;
|
|
558
|
+
const contentCol = this.column;
|
|
559
|
+
let content = '';
|
|
560
|
+
let braceDepth = 1;
|
|
561
|
+
|
|
562
|
+
while (braceDepth > 0 && this.peek() !== '\0') {
|
|
563
|
+
if (this.peek() === '{') {
|
|
564
|
+
braceDepth++;
|
|
565
|
+
content += this.advance();
|
|
566
|
+
} else if (this.peek() === '}') {
|
|
567
|
+
braceDepth--;
|
|
568
|
+
if (braceDepth > 0) {
|
|
569
|
+
content += this.advance();
|
|
570
|
+
}
|
|
571
|
+
} else {
|
|
572
|
+
content += this.advance();
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
// Add the raw shell content as a single token
|
|
577
|
+
this.tokens.push(new Token(TokenType.SHELL_CONTENT, content.trim(), contentStart, contentCol));
|
|
578
|
+
|
|
579
|
+
// Add closing brace
|
|
580
|
+
if (this.peek() === '}') {
|
|
581
|
+
this.tokens.push(new Token(TokenType.RBRACE, '}', this.line, this.column));
|
|
582
|
+
this.advance();
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
return null; // Already added tokens
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
return new Token(type, value, startLine, startColumn);
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
addToken(type, value = null) {
|
|
593
|
+
const token = new Token(type, value, this.line, this.column);
|
|
594
|
+
this.tokens.push(token);
|
|
595
|
+
return token;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
tokenize() {
|
|
599
|
+
while (this.pos < this.source.length) {
|
|
600
|
+
this.skipWhitespace();
|
|
601
|
+
|
|
602
|
+
if (this.pos >= this.source.length) break;
|
|
603
|
+
|
|
604
|
+
const startLine = this.line;
|
|
605
|
+
const startColumn = this.column;
|
|
606
|
+
const char = this.peek();
|
|
607
|
+
|
|
608
|
+
// Comments
|
|
609
|
+
if (char === '/' && this.peek(1) === '/') {
|
|
610
|
+
this.advance();
|
|
611
|
+
this.advance();
|
|
612
|
+
this.skipLineComment();
|
|
613
|
+
continue;
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
if (char === '/' && this.peek(1) === '*') {
|
|
617
|
+
this.advance();
|
|
618
|
+
this.skipBlockComment();
|
|
619
|
+
continue;
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
// Newlines (significant in KimchiLang)
|
|
623
|
+
if (char === '\n') {
|
|
624
|
+
this.advance();
|
|
625
|
+
// Only add newline if previous token isn't already a newline
|
|
626
|
+
if (this.tokens.length > 0 && this.tokens[this.tokens.length - 1].type !== TokenType.NEWLINE) {
|
|
627
|
+
this.tokens.push(new Token(TokenType.NEWLINE, '\n', startLine, startColumn));
|
|
628
|
+
}
|
|
629
|
+
continue;
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
// Strings
|
|
633
|
+
if (char === '"' || char === "'") {
|
|
634
|
+
this.advance();
|
|
635
|
+
this.tokens.push(this.readString(char));
|
|
636
|
+
continue;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
// Template strings
|
|
640
|
+
if (char === '`') {
|
|
641
|
+
this.advance();
|
|
642
|
+
this.tokens.push(this.readTemplateString());
|
|
643
|
+
continue;
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
// Numbers
|
|
647
|
+
if (/[0-9]/.test(char)) {
|
|
648
|
+
this.tokens.push(this.readNumber());
|
|
649
|
+
continue;
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
// Identifiers and keywords
|
|
653
|
+
if (/[a-zA-Z_$]/.test(char)) {
|
|
654
|
+
const token = this.readIdentifier();
|
|
655
|
+
if (token) {
|
|
656
|
+
this.tokens.push(token);
|
|
657
|
+
}
|
|
658
|
+
continue;
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
// Operators and delimiters
|
|
662
|
+
this.advance();
|
|
663
|
+
|
|
664
|
+
switch (char) {
|
|
665
|
+
case '+':
|
|
666
|
+
if (this.match('=')) {
|
|
667
|
+
this.tokens.push(new Token(TokenType.PLUS_ASSIGN, '+=', startLine, startColumn));
|
|
668
|
+
} else {
|
|
669
|
+
this.tokens.push(new Token(TokenType.PLUS, '+', startLine, startColumn));
|
|
670
|
+
}
|
|
671
|
+
break;
|
|
672
|
+
case '-':
|
|
673
|
+
if (this.match('>')) {
|
|
674
|
+
this.tokens.push(new Token(TokenType.ARROW, '->', startLine, startColumn));
|
|
675
|
+
} else if (this.match('=')) {
|
|
676
|
+
this.tokens.push(new Token(TokenType.MINUS_ASSIGN, '-=', startLine, startColumn));
|
|
677
|
+
} else {
|
|
678
|
+
this.tokens.push(new Token(TokenType.MINUS, '-', startLine, startColumn));
|
|
679
|
+
}
|
|
680
|
+
break;
|
|
681
|
+
case '*':
|
|
682
|
+
if (this.match('*')) {
|
|
683
|
+
this.tokens.push(new Token(TokenType.POWER, '**', startLine, startColumn));
|
|
684
|
+
} else if (this.match('=')) {
|
|
685
|
+
this.tokens.push(new Token(TokenType.STAR_ASSIGN, '*=', startLine, startColumn));
|
|
686
|
+
} else {
|
|
687
|
+
this.tokens.push(new Token(TokenType.STAR, '*', startLine, startColumn));
|
|
688
|
+
}
|
|
689
|
+
break;
|
|
690
|
+
case '/':
|
|
691
|
+
if (this.match('=')) {
|
|
692
|
+
this.tokens.push(new Token(TokenType.SLASH_ASSIGN, '/=', startLine, startColumn));
|
|
693
|
+
} else if (this.canStartRegex()) {
|
|
694
|
+
// This is a regex literal
|
|
695
|
+
this.tokens.push(this.readRegex());
|
|
696
|
+
} else {
|
|
697
|
+
this.tokens.push(new Token(TokenType.SLASH, '/', startLine, startColumn));
|
|
698
|
+
}
|
|
699
|
+
break;
|
|
700
|
+
case '%':
|
|
701
|
+
this.tokens.push(new Token(TokenType.PERCENT, '%', startLine, startColumn));
|
|
702
|
+
break;
|
|
703
|
+
case '=':
|
|
704
|
+
if (this.match('=')) {
|
|
705
|
+
this.tokens.push(new Token(TokenType.EQ, '==', startLine, startColumn));
|
|
706
|
+
} else if (this.match('>')) {
|
|
707
|
+
this.tokens.push(new Token(TokenType.FAT_ARROW, '=>', startLine, startColumn));
|
|
708
|
+
} else {
|
|
709
|
+
this.tokens.push(new Token(TokenType.ASSIGN, '=', startLine, startColumn));
|
|
710
|
+
}
|
|
711
|
+
break;
|
|
712
|
+
case '!':
|
|
713
|
+
if (this.match('=')) {
|
|
714
|
+
this.tokens.push(new Token(TokenType.NEQ, '!=', startLine, startColumn));
|
|
715
|
+
} else {
|
|
716
|
+
this.tokens.push(new Token(TokenType.NOT, '!', startLine, startColumn));
|
|
717
|
+
}
|
|
718
|
+
break;
|
|
719
|
+
case '<':
|
|
720
|
+
if (this.match('=')) {
|
|
721
|
+
this.tokens.push(new Token(TokenType.LTE, '<=', startLine, startColumn));
|
|
722
|
+
} else if (this.match('<')) {
|
|
723
|
+
this.tokens.push(new Token(TokenType.LSHIFT, '<<', startLine, startColumn));
|
|
724
|
+
} else {
|
|
725
|
+
this.tokens.push(new Token(TokenType.LT, '<', startLine, startColumn));
|
|
726
|
+
}
|
|
727
|
+
break;
|
|
728
|
+
case '>':
|
|
729
|
+
if (this.match('=')) {
|
|
730
|
+
this.tokens.push(new Token(TokenType.GTE, '>=', startLine, startColumn));
|
|
731
|
+
} else if (this.match('>')) {
|
|
732
|
+
this.tokens.push(new Token(TokenType.FLOW, '>>', startLine, startColumn));
|
|
733
|
+
} else {
|
|
734
|
+
this.tokens.push(new Token(TokenType.GT, '>', startLine, startColumn));
|
|
735
|
+
}
|
|
736
|
+
break;
|
|
737
|
+
case '&':
|
|
738
|
+
if (this.match('&')) {
|
|
739
|
+
this.tokens.push(new Token(TokenType.AND, '&&', startLine, startColumn));
|
|
740
|
+
} else {
|
|
741
|
+
this.error('Bitwise operators not supported. Use stdlib.bitwise instead.');
|
|
742
|
+
}
|
|
743
|
+
break;
|
|
744
|
+
case '|':
|
|
745
|
+
if (this.match('|')) {
|
|
746
|
+
this.tokens.push(new Token(TokenType.OR, '||', startLine, startColumn));
|
|
747
|
+
} else {
|
|
748
|
+
this.tokens.push(new Token(TokenType.BITOR, '|', startLine, startColumn));
|
|
749
|
+
}
|
|
750
|
+
break;
|
|
751
|
+
case '^':
|
|
752
|
+
this.error('Bitwise operators not supported. Use stdlib.bitwise instead.');
|
|
753
|
+
break;
|
|
754
|
+
case '~':
|
|
755
|
+
if (this.match('>')) {
|
|
756
|
+
this.tokens.push(new Token(TokenType.PIPE, '~>', startLine, startColumn));
|
|
757
|
+
} else {
|
|
758
|
+
this.tokens.push(new Token(TokenType.MATCH, '~', startLine, startColumn));
|
|
759
|
+
}
|
|
760
|
+
break;
|
|
761
|
+
case '?':
|
|
762
|
+
this.tokens.push(new Token(TokenType.QUESTION, '?', startLine, startColumn));
|
|
763
|
+
break;
|
|
764
|
+
case ':':
|
|
765
|
+
if (this.match(':')) {
|
|
766
|
+
this.tokens.push(new Token(TokenType.DOUBLE_COLON, '::', startLine, startColumn));
|
|
767
|
+
} else {
|
|
768
|
+
this.tokens.push(new Token(TokenType.COLON, ':', startLine, startColumn));
|
|
769
|
+
}
|
|
770
|
+
break;
|
|
771
|
+
case '.':
|
|
772
|
+
if (this.match('.')) {
|
|
773
|
+
if (this.match('.')) {
|
|
774
|
+
this.tokens.push(new Token(TokenType.SPREAD, '...', startLine, startColumn));
|
|
775
|
+
} else {
|
|
776
|
+
this.tokens.push(new Token(TokenType.RANGE, '..', startLine, startColumn));
|
|
777
|
+
}
|
|
778
|
+
} else {
|
|
779
|
+
this.tokens.push(new Token(TokenType.DOT, '.', startLine, startColumn));
|
|
780
|
+
}
|
|
781
|
+
break;
|
|
782
|
+
case ',':
|
|
783
|
+
this.tokens.push(new Token(TokenType.COMMA, ',', startLine, startColumn));
|
|
784
|
+
break;
|
|
785
|
+
case ';':
|
|
786
|
+
this.tokens.push(new Token(TokenType.SEMICOLON, ';', startLine, startColumn));
|
|
787
|
+
break;
|
|
788
|
+
case '(':
|
|
789
|
+
this.tokens.push(new Token(TokenType.LPAREN, '(', startLine, startColumn));
|
|
790
|
+
break;
|
|
791
|
+
case ')':
|
|
792
|
+
this.tokens.push(new Token(TokenType.RPAREN, ')', startLine, startColumn));
|
|
793
|
+
break;
|
|
794
|
+
case '{':
|
|
795
|
+
this.tokens.push(new Token(TokenType.LBRACE, '{', startLine, startColumn));
|
|
796
|
+
break;
|
|
797
|
+
case '}':
|
|
798
|
+
this.tokens.push(new Token(TokenType.RBRACE, '}', startLine, startColumn));
|
|
799
|
+
break;
|
|
800
|
+
case '[':
|
|
801
|
+
this.tokens.push(new Token(TokenType.LBRACKET, '[', startLine, startColumn));
|
|
802
|
+
break;
|
|
803
|
+
case ']':
|
|
804
|
+
this.tokens.push(new Token(TokenType.RBRACKET, ']', startLine, startColumn));
|
|
805
|
+
break;
|
|
806
|
+
case '@':
|
|
807
|
+
this.tokens.push(new Token(TokenType.AT, '@', startLine, startColumn));
|
|
808
|
+
break;
|
|
809
|
+
default:
|
|
810
|
+
this.error(`Unexpected character: ${char}`);
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
|
|
814
|
+
this.tokens.push(new Token(TokenType.EOF, null, this.line, this.column));
|
|
815
|
+
return this.tokens;
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
export function tokenize(source) {
|
|
820
|
+
const lexer = new Lexer(source);
|
|
821
|
+
return lexer.tokenize();
|
|
822
|
+
}
|