mimo-lang 1.1.1 → 2.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +24 -0
- package/LICENSE +21 -0
- package/README.md +71 -39
- package/adapters/browserAdapter.js +86 -0
- package/adapters/nodeAdapter.js +101 -0
- package/bin/cli.js +80 -0
- package/bin/commands/convert.js +27 -0
- package/bin/commands/doctor.js +139 -0
- package/bin/commands/eval.js +39 -0
- package/bin/commands/fmt.js +109 -0
- package/bin/commands/help.js +72 -0
- package/bin/commands/lint.js +117 -0
- package/bin/commands/repl.js +24 -0
- package/bin/commands/run.js +64 -0
- package/bin/commands/test.js +126 -0
- package/bin/utils/colors.js +38 -0
- package/bin/utils/formatError.js +47 -0
- package/bin/utils/fs.js +57 -0
- package/bin/utils/version.js +8 -0
- package/build.js +18 -0
- package/bun.lock +74 -0
- package/index.js +48 -77
- package/index.web.js +364 -0
- package/interpreter/BuiltinFunction.js +32 -0
- package/interpreter/ErrorHandler.js +120 -0
- package/interpreter/ExpressionEvaluator.js +106 -0
- package/interpreter/Interpreter.js +172 -0
- package/interpreter/MimoError.js +112 -0
- package/interpreter/ModuleLoader.js +236 -0
- package/interpreter/StatementExecutor.js +107 -0
- package/interpreter/Utils.js +82 -0
- package/interpreter/Values.js +87 -0
- package/interpreter/coreBuiltins.js +490 -0
- package/interpreter/environment.js +99 -0
- package/interpreter/evaluators/binaryExpressionEvaluator.js +111 -0
- package/interpreter/evaluators/collectionEvaluator.js +151 -0
- package/interpreter/evaluators/functionCallEvaluator.js +76 -0
- package/interpreter/evaluators/literalEvaluator.js +27 -0
- package/interpreter/evaluators/moduleAccessEvaluator.js +25 -0
- package/interpreter/evaluators/templateLiteralEvaluator.js +20 -0
- package/interpreter/executors/BaseExecutor.js +37 -0
- package/interpreter/executors/ControlFlowExecutor.js +206 -0
- package/interpreter/executors/FunctionExecutor.js +126 -0
- package/interpreter/executors/PatternMatchExecutor.js +93 -0
- package/interpreter/executors/VariableExecutor.js +144 -0
- package/interpreter/index.js +8 -0
- package/interpreter/stdlib/array/accessFunctions.js +61 -0
- package/interpreter/stdlib/array/arrayUtils.js +36 -0
- package/interpreter/stdlib/array/higherOrderFunctions.js +285 -0
- package/interpreter/stdlib/array/searchFunctions.js +77 -0
- package/interpreter/stdlib/array/setFunctions.js +49 -0
- package/interpreter/stdlib/array/transformationFunctions.js +68 -0
- package/interpreter/stdlib/array.js +85 -0
- package/interpreter/stdlib/assert.js +143 -0
- package/interpreter/stdlib/datetime.js +170 -0
- package/interpreter/stdlib/env.js +54 -0
- package/interpreter/stdlib/fs.js +161 -0
- package/interpreter/stdlib/http.js +92 -0
- package/interpreter/stdlib/json.js +70 -0
- package/interpreter/stdlib/math.js +309 -0
- package/interpreter/stdlib/object.js +142 -0
- package/interpreter/stdlib/path.js +69 -0
- package/interpreter/stdlib/regex.js +134 -0
- package/interpreter/stdlib/string.js +260 -0
- package/interpreter/suggestions.js +46 -0
- package/lexer/Lexer.js +245 -0
- package/lexer/TokenTypes.js +131 -0
- package/lexer/createToken.js +11 -0
- package/lexer/tokenizers/commentTokenizer.js +45 -0
- package/lexer/tokenizers/literalTokenizer.js +163 -0
- package/lexer/tokenizers/symbolTokenizer.js +69 -0
- package/lexer/tokenizers/whitespaceTokenizer.js +36 -0
- package/package.json +29 -13
- package/parser/ASTNodes.js +448 -0
- package/parser/Parser.js +188 -0
- package/parser/expressions/atomicExpressions.js +165 -0
- package/parser/expressions/conditionalExpressions.js +0 -0
- package/parser/expressions/operatorExpressions.js +79 -0
- package/parser/expressions/primaryExpressions.js +77 -0
- package/parser/parseStatement.js +184 -0
- package/parser/parserExpressions.js +115 -0
- package/parser/parserUtils.js +19 -0
- package/parser/statements/controlFlowParsers.js +106 -0
- package/parser/statements/functionParsers.js +314 -0
- package/parser/statements/moduleParsers.js +57 -0
- package/parser/statements/patternMatchParsers.js +124 -0
- package/parser/statements/variableParsers.js +155 -0
- package/repl.js +325 -0
- package/test.js +47 -0
- package/tools/PrettyPrinter.js +3 -0
- package/tools/convert/Args.js +46 -0
- package/tools/convert/Registry.js +91 -0
- package/tools/convert/Transpiler.js +78 -0
- package/tools/convert/plugins/README.md +66 -0
- package/tools/convert/plugins/alya/index.js +10 -0
- package/tools/convert/plugins/alya/to_alya.js +289 -0
- package/tools/convert/plugins/alya/visitors/expressions.js +257 -0
- package/tools/convert/plugins/alya/visitors/statements.js +403 -0
- package/tools/convert/plugins/base_converter.js +228 -0
- package/tools/convert/plugins/javascript/index.js +10 -0
- package/tools/convert/plugins/javascript/mimo_runtime.js +265 -0
- package/tools/convert/plugins/javascript/to_js.js +155 -0
- package/tools/convert/plugins/javascript/visitors/expressions.js +197 -0
- package/tools/convert/plugins/javascript/visitors/patterns.js +102 -0
- package/tools/convert/plugins/javascript/visitors/statements.js +236 -0
- package/tools/convert/plugins/python/index.js +10 -0
- package/tools/convert/plugins/python/mimo_runtime.py +811 -0
- package/tools/convert/plugins/python/to_py.js +329 -0
- package/tools/convert/plugins/python/visitors/expressions.js +272 -0
- package/tools/convert/plugins/python/visitors/patterns.js +100 -0
- package/tools/convert/plugins/python/visitors/statements.js +257 -0
- package/tools/convert.js +102 -0
- package/tools/format/CommentAttacher.js +190 -0
- package/tools/format/CommentLexer.js +152 -0
- package/tools/format/Printer.js +849 -0
- package/tools/format/config.js +107 -0
- package/tools/formatter.js +169 -0
- package/tools/lint/Linter.js +391 -0
- package/tools/lint/config.js +114 -0
- package/tools/lint/rules/consistent-return.js +62 -0
- package/tools/lint/rules/max-depth.js +56 -0
- package/tools/lint/rules/no-empty-function.js +45 -0
- package/tools/lint/rules/no-magic-numbers.js +46 -0
- package/tools/lint/rules/no-shadow.js +113 -0
- package/tools/lint/rules/no-unused-vars.js +26 -0
- package/tools/lint/rules/prefer-const.js +19 -0
- package/tools/linter.js +261 -0
- package/tools/replFormatter.js +93 -0
- package/tools/stamp-version.js +32 -0
- package/web/index.js +9 -0
- package/bun.lockb +0 -0
- package/cli.js +0 -84
- package/compiler/execute/interpreter.js +0 -68
- package/compiler/execute/interpreters/binary.js +0 -12
- package/compiler/execute/interpreters/call.js +0 -10
- package/compiler/execute/interpreters/if.js +0 -10
- package/compiler/execute/interpreters/try-catch.js +0 -10
- package/compiler/execute/interpreters/while.js +0 -8
- package/compiler/execute/utils/createfunction.js +0 -11
- package/compiler/execute/utils/evaluate.js +0 -20
- package/compiler/execute/utils/operate.js +0 -23
- package/compiler/lexer/processToken.js +0 -40
- package/compiler/lexer/tokenTypes.js +0 -4
- package/compiler/lexer/tokenizer.js +0 -74
- package/compiler/parser/expression/comparison.js +0 -18
- package/compiler/parser/expression/identifier.js +0 -29
- package/compiler/parser/expression/number.js +0 -10
- package/compiler/parser/expression/operator.js +0 -21
- package/compiler/parser/expression/punctuation.js +0 -31
- package/compiler/parser/expression/string.js +0 -6
- package/compiler/parser/parseExpression.js +0 -27
- package/compiler/parser/parseStatement.js +0 -34
- package/compiler/parser/parser.js +0 -45
- package/compiler/parser/statement/call.js +0 -26
- package/compiler/parser/statement/function.js +0 -29
- package/compiler/parser/statement/if.js +0 -34
- package/compiler/parser/statement/return.js +0 -10
- package/compiler/parser/statement/set.js +0 -11
- package/compiler/parser/statement/show.js +0 -10
- package/compiler/parser/statement/try-catch.js +0 -25
- package/compiler/parser/statement/while.js +0 -22
- package/converter/go/convert.js +0 -110
- package/converter/js/convert.js +0 -107
- package/jsconfig.json +0 -27
- package/vite.config.js +0 -17
package/lexer/Lexer.js
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
import { TokenType } from "./TokenTypes.js";
|
|
2
|
+
import { createToken as _createTokenRaw } from "./createToken.js"; // <--- Add this import
|
|
3
|
+
import { skipWhitespace } from "./tokenizers/whitespaceTokenizer.js";
|
|
4
|
+
import {
|
|
5
|
+
isLiteralStart,
|
|
6
|
+
readIdentifier,
|
|
7
|
+
readNumber,
|
|
8
|
+
readString,
|
|
9
|
+
isAlpha,
|
|
10
|
+
isDigit,
|
|
11
|
+
} from "./tokenizers/literalTokenizer.js";
|
|
12
|
+
import { readSymbol } from "./tokenizers/symbolTokenizer.js";
|
|
13
|
+
import { MimoError } from "../interpreter/MimoError.js";
|
|
14
|
+
|
|
15
|
+
export class Lexer {
|
|
16
|
+
constructor(source, filePath = "unknown") {
|
|
17
|
+
// Add filePath
|
|
18
|
+
this.source = source;
|
|
19
|
+
this.filePath = filePath; // Store filePath
|
|
20
|
+
this.position = 0;
|
|
21
|
+
this.line = 1;
|
|
22
|
+
this.column = 1;
|
|
23
|
+
this.previousToken = null;
|
|
24
|
+
|
|
25
|
+
// State for template literals
|
|
26
|
+
// 0: Normal
|
|
27
|
+
// 1: Inside template, expecting StringFragment or ${ or `
|
|
28
|
+
// 2: Inside template, just saw ${, expecting expression (normal tokenizing)
|
|
29
|
+
// 3: Inside template, just saw }, expecting StringFragment or ${ or `
|
|
30
|
+
this.templateLiteralState = 0;
|
|
31
|
+
this.templateLiteralDepth = 0; // To handle nested templates if ever supported
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
isAtEnd() {
|
|
35
|
+
return this.position >= this.source.length;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
peek(offset = 0) {
|
|
39
|
+
return this.position + offset < this.source.length
|
|
40
|
+
? this.source[this.position + offset]
|
|
41
|
+
: null;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
advance() {
|
|
45
|
+
if (!this.isAtEnd()) {
|
|
46
|
+
const char = this.source[this.position];
|
|
47
|
+
if (char === "\n") {
|
|
48
|
+
// <--- FIXED: Now checks for actual newline character
|
|
49
|
+
this.line++;
|
|
50
|
+
this.column = 1;
|
|
51
|
+
} else {
|
|
52
|
+
this.column++;
|
|
53
|
+
}
|
|
54
|
+
this.position++;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Internal helper for Lexer's own token creation (e.g., for template literals)
|
|
59
|
+
// and for external tokenizers to use.
|
|
60
|
+
_createToken(type, value, startLine, startColumn, startPosition, length) {
|
|
61
|
+
// Calls the raw createToken from lexer/createToken.js
|
|
62
|
+
return _createTokenRaw(
|
|
63
|
+
type,
|
|
64
|
+
value,
|
|
65
|
+
startLine,
|
|
66
|
+
startColumn,
|
|
67
|
+
startPosition,
|
|
68
|
+
length,
|
|
69
|
+
this.filePath
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Helper for consistent error reporting (as discussed before)
|
|
74
|
+
error(message, code = "LEX000", suggestion = "") {
|
|
75
|
+
const errorToken = {
|
|
76
|
+
// Create a token-like object for error location
|
|
77
|
+
value:
|
|
78
|
+
this.peek() ||
|
|
79
|
+
this.source.substring(this.position - 1, this.position) ||
|
|
80
|
+
"",
|
|
81
|
+
line: this.line,
|
|
82
|
+
column: this.column,
|
|
83
|
+
start: this.position, // Point to the current position
|
|
84
|
+
length: 1,
|
|
85
|
+
file: this.filePath,
|
|
86
|
+
};
|
|
87
|
+
// CORRECTED: Pass arguments in the order expected by MimoError.lexerError(code, message, token, suggestion)
|
|
88
|
+
throw MimoError.lexerError(code, message, errorToken, suggestion);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
nextToken() {
|
|
92
|
+
const startLine = this.line;
|
|
93
|
+
const startColumn = this.column;
|
|
94
|
+
const startPosition = this.position; // Capture start position for the token
|
|
95
|
+
|
|
96
|
+
skipWhitespace(this);
|
|
97
|
+
|
|
98
|
+
if (this.isAtEnd()) return null;
|
|
99
|
+
|
|
100
|
+
const char = this.peek(); // char needs to be defined here to be used later
|
|
101
|
+
|
|
102
|
+
// Handle template literal states (State 1 or 3)
|
|
103
|
+
if (this.templateLiteralState === 1 || this.templateLiteralState === 3) {
|
|
104
|
+
if (this.peek() === "`") {
|
|
105
|
+
this.advance();
|
|
106
|
+
this.templateLiteralState = 0;
|
|
107
|
+
this.templateLiteralDepth--;
|
|
108
|
+
return this._createToken(
|
|
109
|
+
TokenType.Backtick,
|
|
110
|
+
"`",
|
|
111
|
+
startLine,
|
|
112
|
+
startColumn,
|
|
113
|
+
startPosition,
|
|
114
|
+
1
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (this.peek() === "$" && this.peek(1) === "{") {
|
|
119
|
+
this.advance();
|
|
120
|
+
this.advance();
|
|
121
|
+
this.templateLiteralState = 2;
|
|
122
|
+
return this._createToken(
|
|
123
|
+
TokenType.InterpolationStart,
|
|
124
|
+
"${",
|
|
125
|
+
startLine,
|
|
126
|
+
startColumn,
|
|
127
|
+
startPosition,
|
|
128
|
+
2
|
|
129
|
+
);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
let fragment = "";
|
|
133
|
+
// Loop to read string fragment, handling escapes
|
|
134
|
+
while (
|
|
135
|
+
!this.isAtEnd() &&
|
|
136
|
+
this.peek() !== "`" &&
|
|
137
|
+
!(this.peek() === "$" && this.peek(1) === "{")
|
|
138
|
+
) {
|
|
139
|
+
const currentFragmentChar = this.peek(); // Renamed to avoid conflict with outer 'char'
|
|
140
|
+
if (currentFragmentChar === "\\") {
|
|
141
|
+
// <--- Correctly checking for backslash
|
|
142
|
+
this.advance(); // consume backslash
|
|
143
|
+
if (this.isAtEnd()) {
|
|
144
|
+
this.error(
|
|
145
|
+
"Unterminated escape sequence in template fragment.",
|
|
146
|
+
"LEX004",
|
|
147
|
+
"Complete the escape sequence or close the template literal."
|
|
148
|
+
);
|
|
149
|
+
}
|
|
150
|
+
const escapedChar = this.peek();
|
|
151
|
+
switch (escapedChar) {
|
|
152
|
+
case "n":
|
|
153
|
+
fragment += "\n";
|
|
154
|
+
break; // <--- FIXED: store actual newline
|
|
155
|
+
case "t":
|
|
156
|
+
fragment += "\t";
|
|
157
|
+
break; // <--- FIXED: store actual tab
|
|
158
|
+
case "r":
|
|
159
|
+
fragment += "\r";
|
|
160
|
+
break; // <--- FIXED: store actual carriage return
|
|
161
|
+
case "\\":
|
|
162
|
+
fragment += "\\";
|
|
163
|
+
break; // <--- FIXED: store actual backslash
|
|
164
|
+
case "`":
|
|
165
|
+
fragment += "`";
|
|
166
|
+
break;
|
|
167
|
+
case "$":
|
|
168
|
+
fragment += "$";
|
|
169
|
+
break;
|
|
170
|
+
case "{":
|
|
171
|
+
fragment += "{";
|
|
172
|
+
break;
|
|
173
|
+
default:
|
|
174
|
+
this.error(
|
|
175
|
+
`Unrecognized escape sequence in template: '\\${escapedChar}'.`,
|
|
176
|
+
"LEX005",
|
|
177
|
+
"Use valid escape sequences like '\\n', '\\t', '\\\\', '\\`', '\\$'."
|
|
178
|
+
);
|
|
179
|
+
}
|
|
180
|
+
this.advance(); // consume escaped character
|
|
181
|
+
} else if (currentFragmentChar === "\n") {
|
|
182
|
+
// <--- Handle literal newlines inside template fragments
|
|
183
|
+
fragment += currentFragmentChar;
|
|
184
|
+
this.advance();
|
|
185
|
+
} else {
|
|
186
|
+
fragment += currentFragmentChar;
|
|
187
|
+
this.advance();
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
if (fragment.length > 0) {
|
|
191
|
+
// Calculate length of the *original* source consumed for this fragment
|
|
192
|
+
return this._createToken(
|
|
193
|
+
TokenType.StringFragment,
|
|
194
|
+
fragment,
|
|
195
|
+
startLine,
|
|
196
|
+
startColumn,
|
|
197
|
+
startPosition,
|
|
198
|
+
this.position - startPosition
|
|
199
|
+
);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Handle template literal start (` `)
|
|
204
|
+
if (char === "`") {
|
|
205
|
+
this.advance();
|
|
206
|
+
this.templateLiteralState = 1;
|
|
207
|
+
this.templateLiteralDepth++;
|
|
208
|
+
return this._createToken(
|
|
209
|
+
TokenType.Backtick,
|
|
210
|
+
"`",
|
|
211
|
+
startLine,
|
|
212
|
+
startColumn,
|
|
213
|
+
startPosition,
|
|
214
|
+
1
|
|
215
|
+
);
|
|
216
|
+
}
|
|
217
|
+
// Handle template interpolation end (})
|
|
218
|
+
if (this.templateLiteralState === 2 && char === "}") {
|
|
219
|
+
this.advance();
|
|
220
|
+
this.templateLiteralState = 3;
|
|
221
|
+
return this._createToken(
|
|
222
|
+
TokenType.InterpolationEnd,
|
|
223
|
+
"}",
|
|
224
|
+
startLine,
|
|
225
|
+
startColumn,
|
|
226
|
+
startPosition,
|
|
227
|
+
1
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Normal tokenizing mode
|
|
232
|
+
let token;
|
|
233
|
+
if (isAlpha(char)) {
|
|
234
|
+
token = readIdentifier(this);
|
|
235
|
+
} else if (isDigit(char)) {
|
|
236
|
+
token = readNumber(this);
|
|
237
|
+
} else if (char === '"') {
|
|
238
|
+
// Assuming only double quotes for strings based on current `readString`
|
|
239
|
+
token = readString(this);
|
|
240
|
+
} else {
|
|
241
|
+
token = readSymbol(this);
|
|
242
|
+
}
|
|
243
|
+
return token; // Tokenizers should return the token
|
|
244
|
+
}
|
|
245
|
+
}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
export const TokenType = {
|
|
2
|
+
Keyword: "keyword",
|
|
3
|
+
Identifier: "identifier",
|
|
4
|
+
Number: "number",
|
|
5
|
+
String: "string",
|
|
6
|
+
Boolean: "boolean",
|
|
7
|
+
Null: "null",
|
|
8
|
+
Operator: "operator",
|
|
9
|
+
LParen: "lparen",
|
|
10
|
+
RParen: "rparen",
|
|
11
|
+
LBracket: "lbracket",
|
|
12
|
+
RBracket: "rbracket",
|
|
13
|
+
LBrace: "lbrace",
|
|
14
|
+
RBrace: "rbrace",
|
|
15
|
+
Comma: "comma",
|
|
16
|
+
Range: "range",
|
|
17
|
+
Slice: "slice",
|
|
18
|
+
Spread: "spread",
|
|
19
|
+
Backtick: "backtick",
|
|
20
|
+
StringFragment: "string_fragment",
|
|
21
|
+
InterpolationStart: "interpolation_start",
|
|
22
|
+
InterpolationEnd: "interpolation_end",
|
|
23
|
+
Colon: "colon",
|
|
24
|
+
At: "at",
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
export const END_KEYWORDS = [
|
|
28
|
+
'end', 'else'
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
export const KEYWORDS = [
|
|
32
|
+
"set",
|
|
33
|
+
"let",
|
|
34
|
+
"const",
|
|
35
|
+
"global",
|
|
36
|
+
"destructure",
|
|
37
|
+
"from",
|
|
38
|
+
"if",
|
|
39
|
+
"guard",
|
|
40
|
+
"then",
|
|
41
|
+
"when",
|
|
42
|
+
"while",
|
|
43
|
+
"function",
|
|
44
|
+
"fn",
|
|
45
|
+
"call",
|
|
46
|
+
"show",
|
|
47
|
+
"return",
|
|
48
|
+
"try",
|
|
49
|
+
"catch",
|
|
50
|
+
"throw",
|
|
51
|
+
"for",
|
|
52
|
+
"in",
|
|
53
|
+
"match",
|
|
54
|
+
"case",
|
|
55
|
+
"default",
|
|
56
|
+
"break",
|
|
57
|
+
"continue",
|
|
58
|
+
"loop",
|
|
59
|
+
"true",
|
|
60
|
+
"false",
|
|
61
|
+
"null",
|
|
62
|
+
"import",
|
|
63
|
+
"as",
|
|
64
|
+
"export",
|
|
65
|
+
"not",
|
|
66
|
+
"and",
|
|
67
|
+
"or",
|
|
68
|
+
...END_KEYWORDS
|
|
69
|
+
];
|
|
70
|
+
|
|
71
|
+
export const Punctuators = {
|
|
72
|
+
Semicolon: ";",
|
|
73
|
+
Comma: ",",
|
|
74
|
+
Dot: ".",
|
|
75
|
+
Question: "?",
|
|
76
|
+
Spread: "...",
|
|
77
|
+
Colon: ":",
|
|
78
|
+
LParen: "(",
|
|
79
|
+
RParen: ")",
|
|
80
|
+
LBracket: "[",
|
|
81
|
+
RBracket: "]",
|
|
82
|
+
LBrace: "{",
|
|
83
|
+
RBrace: "}",
|
|
84
|
+
Pipe: "|",
|
|
85
|
+
Ampersand: "&",
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
export const Keywords = {
|
|
89
|
+
set: TokenType.Keyword,
|
|
90
|
+
let: TokenType.Keyword,
|
|
91
|
+
const: TokenType.Keyword,
|
|
92
|
+
global: TokenType.Keyword,
|
|
93
|
+
if: TokenType.If,
|
|
94
|
+
while: TokenType.Keyword,
|
|
95
|
+
function: TokenType.Keyword,
|
|
96
|
+
call: TokenType.Keyword,
|
|
97
|
+
show: TokenType.Keyword,
|
|
98
|
+
return: TokenType.Keyword,
|
|
99
|
+
try: TokenType.Keyword,
|
|
100
|
+
catch: TokenType.Keyword,
|
|
101
|
+
throw: TokenType.Keyword,
|
|
102
|
+
for: TokenType.Keyword,
|
|
103
|
+
in: TokenType.Keyword,
|
|
104
|
+
match: TokenType.Keyword,
|
|
105
|
+
case: TokenType.Keyword,
|
|
106
|
+
default: TokenType.Keyword,
|
|
107
|
+
break: TokenType.Keyword,
|
|
108
|
+
continue: TokenType.Keyword,
|
|
109
|
+
loop: TokenType.Keyword,
|
|
110
|
+
true: TokenType.True,
|
|
111
|
+
false: TokenType.False,
|
|
112
|
+
null: TokenType.Null,
|
|
113
|
+
import: TokenType.Import,
|
|
114
|
+
export: TokenType.Export,
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
export const Operators = {
|
|
118
|
+
"+": TokenType.Plus,
|
|
119
|
+
"-": TokenType.Minus,
|
|
120
|
+
"=": TokenType.Equal,
|
|
121
|
+
"==": TokenType.EqualEqual,
|
|
122
|
+
"===": TokenType.EqualEqualEqual,
|
|
123
|
+
"!=": TokenType.BangEqual,
|
|
124
|
+
"!==": TokenType.BangEqualEqual,
|
|
125
|
+
">": TokenType.Greater,
|
|
126
|
+
"<": TokenType.Less,
|
|
127
|
+
">=": TokenType.GreaterEqual,
|
|
128
|
+
"<=": TokenType.LessEqual,
|
|
129
|
+
"&&": TokenType.And,
|
|
130
|
+
"||": TokenType.Or,
|
|
131
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export function createToken(type, value, line, column, start, length, file = 'unknown') {
|
|
2
|
+
return {
|
|
3
|
+
type,
|
|
4
|
+
value,
|
|
5
|
+
line,
|
|
6
|
+
column,
|
|
7
|
+
start, // Starting character index in the source string
|
|
8
|
+
length, // Number of characters this token spans in the source
|
|
9
|
+
file, // The file path
|
|
10
|
+
};
|
|
11
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
export function isCommentStart(lexer) {
|
|
2
|
+
const char = lexer.peek();
|
|
3
|
+
return char === "/" && (lexer.peek(1) === "/" || lexer.peek(1) === "*");
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
export function skipSingleLineComment(lexer) {
|
|
7
|
+
// Skip the '//' characters
|
|
8
|
+
lexer.advance();
|
|
9
|
+
lexer.advance();
|
|
10
|
+
|
|
11
|
+
// Skip until end of line or end of file
|
|
12
|
+
while (!lexer.isAtEnd() && lexer.peek() !== "\n") {
|
|
13
|
+
lexer.advance();
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function skipMultiLineComment(lexer) {
|
|
18
|
+
// Skip the '/*' characters
|
|
19
|
+
lexer.advance();
|
|
20
|
+
lexer.advance();
|
|
21
|
+
|
|
22
|
+
// Skip until we find '*/' or reach end of file
|
|
23
|
+
while (!lexer.isAtEnd()) {
|
|
24
|
+
if (lexer.peek() === "*" && lexer.peek(1) === "/") {
|
|
25
|
+
// Found end of comment, skip the '*/' characters
|
|
26
|
+
lexer.advance();
|
|
27
|
+
lexer.advance();
|
|
28
|
+
break;
|
|
29
|
+
}
|
|
30
|
+
lexer.advance();
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function skipComments(lexer) {
|
|
35
|
+
while (!lexer.isAtEnd()) {
|
|
36
|
+
const char = lexer.peek();
|
|
37
|
+
if (char === "/" && lexer.peek(1) === "/") {
|
|
38
|
+
skipSingleLineComment(lexer);
|
|
39
|
+
} else if (char === "/" && lexer.peek(1) === "*") {
|
|
40
|
+
skipMultiLineComment(lexer);
|
|
41
|
+
} else {
|
|
42
|
+
break;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import { KEYWORDS, TokenType } from "../TokenTypes.js";
|
|
2
|
+
import { createToken } from "../createToken.js";
|
|
3
|
+
|
|
4
|
+
export function isLiteralStart(char) {
|
|
5
|
+
return isAlpha(char) || isDigit(char) || char === '"';
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export function readIdentifier(lexer) {
|
|
9
|
+
const startLine = lexer.line;
|
|
10
|
+
const startColumn = lexer.column;
|
|
11
|
+
const startPosition = lexer.position; // Capture original start position
|
|
12
|
+
let value = "";
|
|
13
|
+
|
|
14
|
+
while (!lexer.isAtEnd() && isAlphaNumeric(lexer.peek())) {
|
|
15
|
+
value += lexer.peek();
|
|
16
|
+
lexer.advance();
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
const length = value.length; // Calculate length
|
|
20
|
+
|
|
21
|
+
// Handle boolean and null literals first, as they have specific TokenTypes
|
|
22
|
+
if (value === "true") {
|
|
23
|
+
return lexer._createToken(TokenType.Boolean, true, startLine, startColumn, startPosition, length);
|
|
24
|
+
}
|
|
25
|
+
if (value === "false") {
|
|
26
|
+
return lexer._createToken(TokenType.Boolean, false, startLine, startColumn, startPosition, length);
|
|
27
|
+
}
|
|
28
|
+
if (value === "null") {
|
|
29
|
+
return lexer._createToken(TokenType.Null, null, startLine, startColumn, startPosition, length);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Check if the identifier is a keyword (using includes for arrays)
|
|
33
|
+
if (KEYWORDS.includes(value)) { // <--- FIXED: Use .includes() for array check
|
|
34
|
+
return lexer._createToken(TokenType.Keyword, value, startLine, startColumn, startPosition, length);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Otherwise, it's a regular identifier
|
|
38
|
+
return lexer._createToken(TokenType.Identifier, value, startLine, startColumn, startPosition, length);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export function readNumber(lexer) {
|
|
42
|
+
const startLine = lexer.line;
|
|
43
|
+
const startCol = lexer.column; // Corrected to startColumn for consistency, though guide uses startCol
|
|
44
|
+
const startPosition = lexer.position; // Capture original start position
|
|
45
|
+
let value = "";
|
|
46
|
+
let hasDecimal = false;
|
|
47
|
+
let hasExponent = false;
|
|
48
|
+
|
|
49
|
+
while (!lexer.isAtEnd()) {
|
|
50
|
+
const char = lexer.peek();
|
|
51
|
+
if (isDigit(char)) {
|
|
52
|
+
value += char;
|
|
53
|
+
} else if (char === "." && !hasDecimal && !hasExponent && isDigit(lexer.peek(1))) {
|
|
54
|
+
value += char;
|
|
55
|
+
hasDecimal = true;
|
|
56
|
+
} else if ((char === "e" || char === "E") && !hasExponent) {
|
|
57
|
+
// Check if the next char is a sign or a digit for a valid exponent
|
|
58
|
+
const nextChar = lexer.peek(1);
|
|
59
|
+
if (nextChar === '+' || nextChar === '-' || isDigit(nextChar)) {
|
|
60
|
+
value += char; // Add 'e' or 'E'
|
|
61
|
+
lexer.advance(); // Consume 'e' or 'E'
|
|
62
|
+
if (nextChar === '+' || nextChar === '-') {
|
|
63
|
+
value += nextChar; // Add sign
|
|
64
|
+
lexer.advance(); // Consume sign
|
|
65
|
+
}
|
|
66
|
+
// Now read the exponent digits
|
|
67
|
+
while (!lexer.isAtEnd() && isDigit(lexer.peek())) {
|
|
68
|
+
value += lexer.peek();
|
|
69
|
+
lexer.advance();
|
|
70
|
+
}
|
|
71
|
+
hasExponent = true;
|
|
72
|
+
// Since we manually advanced for exponent parts, we break here
|
|
73
|
+
// The main loop's advance() would skip characters otherwise.
|
|
74
|
+
break;
|
|
75
|
+
} else {
|
|
76
|
+
// Not a valid exponent (e.g., "123e" followed by non-digit/non-sign)
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
} else {
|
|
80
|
+
break;
|
|
81
|
+
}
|
|
82
|
+
lexer.advance();
|
|
83
|
+
}
|
|
84
|
+
// The guide.md snippet for readNumber is incomplete, so I'm using the logic from the `readIdentifier` and the `before` part of `readNumber`
|
|
85
|
+
// to ensure startPosition and length are correctly passed.
|
|
86
|
+
return lexer._createToken(TokenType.Number, parseFloat(value), startLine, startCol, startPosition, value.length);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export function readString(lexer) {
|
|
90
|
+
const startLine = lexer.line;
|
|
91
|
+
const startCol = lexer.column;
|
|
92
|
+
const startPos = lexer.position;
|
|
93
|
+
const quote = lexer.peek(); // Get the opening quote character
|
|
94
|
+
lexer.advance(); // Consume opening quote
|
|
95
|
+
|
|
96
|
+
let value = "";
|
|
97
|
+
|
|
98
|
+
while (!lexer.isAtEnd()) {
|
|
99
|
+
const char = lexer.peek();
|
|
100
|
+
|
|
101
|
+
if (char === quote) {
|
|
102
|
+
// End of string found
|
|
103
|
+
lexer.advance(); // Consume closing quote
|
|
104
|
+
const tokenLength = lexer.position - startPos;
|
|
105
|
+
return lexer._createToken(TokenType.String, value, startLine, startCol, startPos, tokenLength);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
if (char === '\n') {
|
|
109
|
+
// Unterminated string error
|
|
110
|
+
lexer.error(
|
|
111
|
+
"Unterminated string literal. Newline encountered.",
|
|
112
|
+
'LEX004',
|
|
113
|
+
"String literals must be closed on the same line or use '\\n' for a newline character."
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// --- CRITICAL FIX FOR ESCAPE CHARACTERS ---
|
|
118
|
+
if (char === '\\') {
|
|
119
|
+
lexer.advance(); // Consume the backslash
|
|
120
|
+
|
|
121
|
+
if (lexer.isAtEnd()) {
|
|
122
|
+
lexer.error("Unterminated escape sequence at end of file.", 'LEX005');
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
const escapedChar = lexer.peek();
|
|
126
|
+
switch (escapedChar) {
|
|
127
|
+
case 'n': value += '\n'; break;
|
|
128
|
+
case 't': value += '\t'; break;
|
|
129
|
+
case 'r': value += '\r'; break;
|
|
130
|
+
case '\\': value += '\\'; break;
|
|
131
|
+
case '"': value += '"'; break; // This handles \"
|
|
132
|
+
// You can add support for other quotes like \' if needed
|
|
133
|
+
default:
|
|
134
|
+
lexer.error(`Invalid escape sequence: \\${escapedChar}`, 'LEX003');
|
|
135
|
+
}
|
|
136
|
+
} else {
|
|
137
|
+
// Regular character
|
|
138
|
+
value += char;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
lexer.advance(); // Move to the next character in the source
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// If the loop finishes, it means we hit the end of the file without a closing quote.
|
|
145
|
+
lexer.error(
|
|
146
|
+
"Unterminated string literal.",
|
|
147
|
+
'LEX005',
|
|
148
|
+
`A string starting with ${quote} was not properly closed.`
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Helper functions
|
|
153
|
+
export function isAlpha(char) {
|
|
154
|
+
return /^[a-zA-Z_]$/.test(char);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
export function isAlphaNumeric(char) {
|
|
158
|
+
return /^[a-zA-Z0-9_]$/.test(char);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
export function isDigit(char) {
|
|
162
|
+
return /^[0-9]$/.test(char);
|
|
163
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import { TokenType } from "../TokenTypes.js";
|
|
2
|
+
import { createToken } from "../createToken.js";
|
|
3
|
+
|
|
4
|
+
export const SYMBOLS = {
|
|
5
|
+
"(": TokenType.LParen,
|
|
6
|
+
")": TokenType.RParen,
|
|
7
|
+
"[": TokenType.LBracket,
|
|
8
|
+
"]": TokenType.RBracket,
|
|
9
|
+
"{": TokenType.LBrace,
|
|
10
|
+
"}": TokenType.RBrace,
|
|
11
|
+
",": TokenType.Comma,
|
|
12
|
+
"?.": TokenType.Operator,
|
|
13
|
+
".": TokenType.Operator,
|
|
14
|
+
"+": TokenType.Operator,
|
|
15
|
+
"-": TokenType.Operator,
|
|
16
|
+
"*": TokenType.Operator,
|
|
17
|
+
"/": TokenType.Operator,
|
|
18
|
+
"%": TokenType.Operator,
|
|
19
|
+
">": TokenType.Operator,
|
|
20
|
+
"<": TokenType.Operator,
|
|
21
|
+
"=": TokenType.Operator,
|
|
22
|
+
"==": TokenType.Operator,
|
|
23
|
+
"===": TokenType.Operator,
|
|
24
|
+
"!": TokenType.Operator,
|
|
25
|
+
"!=": TokenType.Operator,
|
|
26
|
+
"!==": TokenType.Operator,
|
|
27
|
+
">=": TokenType.Operator,
|
|
28
|
+
"<=": TokenType.Operator,
|
|
29
|
+
"&&": TokenType.Operator,
|
|
30
|
+
"||": TokenType.Operator,
|
|
31
|
+
"|>": TokenType.Operator, // Pipe operator
|
|
32
|
+
"->": TokenType.Operator, // Arrow operator
|
|
33
|
+
"??": TokenType.Operator, // Null coalescing
|
|
34
|
+
|
|
35
|
+
"...": TokenType.Spread, // Spread operator
|
|
36
|
+
"..": TokenType.Range, // Range operator
|
|
37
|
+
":": TokenType.Colon,
|
|
38
|
+
"@": TokenType.At,
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
export function readSymbol(lexer) {
|
|
44
|
+
const startLine = lexer.line;
|
|
45
|
+
const startColumn = lexer.column;
|
|
46
|
+
const startPosition = lexer.position; // Capture original start position
|
|
47
|
+
|
|
48
|
+
// Try to match longer symbols first (e.g., "==" before "=")
|
|
49
|
+
const sortedSymbols = Object.keys(SYMBOLS).sort((a, b) => b.length - a.length);
|
|
50
|
+
|
|
51
|
+
for (const symbolString of sortedSymbols) {
|
|
52
|
+
if (lexer.source.startsWith(symbolString, lexer.position)) {
|
|
53
|
+
// Found a match
|
|
54
|
+
for (let i = 0; i < symbolString.length; i++) {
|
|
55
|
+
lexer.advance(); // Consume the characters of the symbol
|
|
56
|
+
}
|
|
57
|
+
const tokenLength = symbolString.length;
|
|
58
|
+
return lexer._createToken(SYMBOLS[symbolString], symbolString, startLine, startColumn, startPosition, tokenLength); // <--- Use lexer._createToken
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// If no recognized symbol prefix is found, it's an error
|
|
63
|
+
const char = lexer.peek(); // Get the current character that caused the issue
|
|
64
|
+
lexer.error( // <--- Use lexer.error
|
|
65
|
+
`Unrecognized symbol or character: '${char}'.`,
|
|
66
|
+
'LEX007',
|
|
67
|
+
`The symbol or character '${char}' is not recognized. Check for typos or unsupported operators.`
|
|
68
|
+
);
|
|
69
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { skipComments } from "./commentTokenizer.js";
|
|
2
|
+
|
|
3
|
+
export function isWhitespace(char) {
|
|
4
|
+
return char === " " || char === "\t" || char === "\n" || char === "\r";
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export function skipWhitespace(lexer) {
|
|
8
|
+
while (!lexer.isAtEnd()) {
|
|
9
|
+
const char = lexer.peek();
|
|
10
|
+
|
|
11
|
+
if (char === " " || char === "\t") {
|
|
12
|
+
lexer.advance();
|
|
13
|
+
} else if (char === "\n") {
|
|
14
|
+
lexer.advance();
|
|
15
|
+
} else if (char === "/" && (lexer.peek(1) === "/" || lexer.peek(1) === "*")) {
|
|
16
|
+
// Handle comments as part of whitespace skipping
|
|
17
|
+
skipComments(lexer);
|
|
18
|
+
} else {
|
|
19
|
+
break;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function skipSimpleWhitespace(lexer) {
|
|
25
|
+
while (!lexer.isAtEnd()) {
|
|
26
|
+
const char = lexer.peek();
|
|
27
|
+
|
|
28
|
+
if (char === " " || char === "\t") {
|
|
29
|
+
lexer.advance();
|
|
30
|
+
} else if (char === "\n") {
|
|
31
|
+
lexer.advance();
|
|
32
|
+
} else {
|
|
33
|
+
break;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|