mimo-lang 1.1.1 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. package/.gitattributes +24 -0
  2. package/LICENSE +21 -0
  3. package/README.md +71 -39
  4. package/adapters/browserAdapter.js +86 -0
  5. package/adapters/nodeAdapter.js +101 -0
  6. package/bin/cli.js +80 -0
  7. package/bin/commands/convert.js +27 -0
  8. package/bin/commands/doctor.js +139 -0
  9. package/bin/commands/eval.js +39 -0
  10. package/bin/commands/fmt.js +109 -0
  11. package/bin/commands/help.js +72 -0
  12. package/bin/commands/lint.js +117 -0
  13. package/bin/commands/repl.js +24 -0
  14. package/bin/commands/run.js +64 -0
  15. package/bin/commands/test.js +126 -0
  16. package/bin/utils/colors.js +38 -0
  17. package/bin/utils/formatError.js +47 -0
  18. package/bin/utils/fs.js +57 -0
  19. package/bin/utils/version.js +8 -0
  20. package/build.js +18 -0
  21. package/bun.lock +74 -0
  22. package/index.js +48 -77
  23. package/index.web.js +364 -0
  24. package/interpreter/BuiltinFunction.js +32 -0
  25. package/interpreter/ErrorHandler.js +120 -0
  26. package/interpreter/ExpressionEvaluator.js +106 -0
  27. package/interpreter/Interpreter.js +172 -0
  28. package/interpreter/MimoError.js +112 -0
  29. package/interpreter/ModuleLoader.js +236 -0
  30. package/interpreter/StatementExecutor.js +107 -0
  31. package/interpreter/Utils.js +82 -0
  32. package/interpreter/Values.js +87 -0
  33. package/interpreter/coreBuiltins.js +490 -0
  34. package/interpreter/environment.js +99 -0
  35. package/interpreter/evaluators/binaryExpressionEvaluator.js +111 -0
  36. package/interpreter/evaluators/collectionEvaluator.js +151 -0
  37. package/interpreter/evaluators/functionCallEvaluator.js +76 -0
  38. package/interpreter/evaluators/literalEvaluator.js +27 -0
  39. package/interpreter/evaluators/moduleAccessEvaluator.js +25 -0
  40. package/interpreter/evaluators/templateLiteralEvaluator.js +20 -0
  41. package/interpreter/executors/BaseExecutor.js +37 -0
  42. package/interpreter/executors/ControlFlowExecutor.js +206 -0
  43. package/interpreter/executors/FunctionExecutor.js +126 -0
  44. package/interpreter/executors/PatternMatchExecutor.js +93 -0
  45. package/interpreter/executors/VariableExecutor.js +144 -0
  46. package/interpreter/index.js +8 -0
  47. package/interpreter/stdlib/array/accessFunctions.js +61 -0
  48. package/interpreter/stdlib/array/arrayUtils.js +36 -0
  49. package/interpreter/stdlib/array/higherOrderFunctions.js +285 -0
  50. package/interpreter/stdlib/array/searchFunctions.js +77 -0
  51. package/interpreter/stdlib/array/setFunctions.js +49 -0
  52. package/interpreter/stdlib/array/transformationFunctions.js +68 -0
  53. package/interpreter/stdlib/array.js +85 -0
  54. package/interpreter/stdlib/assert.js +143 -0
  55. package/interpreter/stdlib/datetime.js +170 -0
  56. package/interpreter/stdlib/env.js +54 -0
  57. package/interpreter/stdlib/fs.js +161 -0
  58. package/interpreter/stdlib/http.js +92 -0
  59. package/interpreter/stdlib/json.js +70 -0
  60. package/interpreter/stdlib/math.js +309 -0
  61. package/interpreter/stdlib/object.js +142 -0
  62. package/interpreter/stdlib/path.js +69 -0
  63. package/interpreter/stdlib/regex.js +134 -0
  64. package/interpreter/stdlib/string.js +260 -0
  65. package/interpreter/suggestions.js +46 -0
  66. package/lexer/Lexer.js +245 -0
  67. package/lexer/TokenTypes.js +131 -0
  68. package/lexer/createToken.js +11 -0
  69. package/lexer/tokenizers/commentTokenizer.js +45 -0
  70. package/lexer/tokenizers/literalTokenizer.js +163 -0
  71. package/lexer/tokenizers/symbolTokenizer.js +69 -0
  72. package/lexer/tokenizers/whitespaceTokenizer.js +36 -0
  73. package/package.json +29 -13
  74. package/parser/ASTNodes.js +448 -0
  75. package/parser/Parser.js +188 -0
  76. package/parser/expressions/atomicExpressions.js +165 -0
  77. package/parser/expressions/conditionalExpressions.js +0 -0
  78. package/parser/expressions/operatorExpressions.js +79 -0
  79. package/parser/expressions/primaryExpressions.js +77 -0
  80. package/parser/parseStatement.js +184 -0
  81. package/parser/parserExpressions.js +115 -0
  82. package/parser/parserUtils.js +19 -0
  83. package/parser/statements/controlFlowParsers.js +106 -0
  84. package/parser/statements/functionParsers.js +314 -0
  85. package/parser/statements/moduleParsers.js +57 -0
  86. package/parser/statements/patternMatchParsers.js +124 -0
  87. package/parser/statements/variableParsers.js +155 -0
  88. package/repl.js +325 -0
  89. package/test.js +47 -0
  90. package/tools/PrettyPrinter.js +3 -0
  91. package/tools/convert/Args.js +46 -0
  92. package/tools/convert/Registry.js +91 -0
  93. package/tools/convert/Transpiler.js +78 -0
  94. package/tools/convert/plugins/README.md +66 -0
  95. package/tools/convert/plugins/alya/index.js +10 -0
  96. package/tools/convert/plugins/alya/to_alya.js +289 -0
  97. package/tools/convert/plugins/alya/visitors/expressions.js +257 -0
  98. package/tools/convert/plugins/alya/visitors/statements.js +403 -0
  99. package/tools/convert/plugins/base_converter.js +228 -0
  100. package/tools/convert/plugins/javascript/index.js +10 -0
  101. package/tools/convert/plugins/javascript/mimo_runtime.js +265 -0
  102. package/tools/convert/plugins/javascript/to_js.js +155 -0
  103. package/tools/convert/plugins/javascript/visitors/expressions.js +197 -0
  104. package/tools/convert/plugins/javascript/visitors/patterns.js +102 -0
  105. package/tools/convert/plugins/javascript/visitors/statements.js +236 -0
  106. package/tools/convert/plugins/python/index.js +10 -0
  107. package/tools/convert/plugins/python/mimo_runtime.py +811 -0
  108. package/tools/convert/plugins/python/to_py.js +329 -0
  109. package/tools/convert/plugins/python/visitors/expressions.js +272 -0
  110. package/tools/convert/plugins/python/visitors/patterns.js +100 -0
  111. package/tools/convert/plugins/python/visitors/statements.js +257 -0
  112. package/tools/convert.js +102 -0
  113. package/tools/format/CommentAttacher.js +190 -0
  114. package/tools/format/CommentLexer.js +152 -0
  115. package/tools/format/Printer.js +849 -0
  116. package/tools/format/config.js +107 -0
  117. package/tools/formatter.js +169 -0
  118. package/tools/lint/Linter.js +391 -0
  119. package/tools/lint/config.js +114 -0
  120. package/tools/lint/rules/consistent-return.js +62 -0
  121. package/tools/lint/rules/max-depth.js +56 -0
  122. package/tools/lint/rules/no-empty-function.js +45 -0
  123. package/tools/lint/rules/no-magic-numbers.js +46 -0
  124. package/tools/lint/rules/no-shadow.js +113 -0
  125. package/tools/lint/rules/no-unused-vars.js +26 -0
  126. package/tools/lint/rules/prefer-const.js +19 -0
  127. package/tools/linter.js +261 -0
  128. package/tools/replFormatter.js +93 -0
  129. package/tools/stamp-version.js +32 -0
  130. package/web/index.js +9 -0
  131. package/bun.lockb +0 -0
  132. package/cli.js +0 -84
  133. package/compiler/execute/interpreter.js +0 -68
  134. package/compiler/execute/interpreters/binary.js +0 -12
  135. package/compiler/execute/interpreters/call.js +0 -10
  136. package/compiler/execute/interpreters/if.js +0 -10
  137. package/compiler/execute/interpreters/try-catch.js +0 -10
  138. package/compiler/execute/interpreters/while.js +0 -8
  139. package/compiler/execute/utils/createfunction.js +0 -11
  140. package/compiler/execute/utils/evaluate.js +0 -20
  141. package/compiler/execute/utils/operate.js +0 -23
  142. package/compiler/lexer/processToken.js +0 -40
  143. package/compiler/lexer/tokenTypes.js +0 -4
  144. package/compiler/lexer/tokenizer.js +0 -74
  145. package/compiler/parser/expression/comparison.js +0 -18
  146. package/compiler/parser/expression/identifier.js +0 -29
  147. package/compiler/parser/expression/number.js +0 -10
  148. package/compiler/parser/expression/operator.js +0 -21
  149. package/compiler/parser/expression/punctuation.js +0 -31
  150. package/compiler/parser/expression/string.js +0 -6
  151. package/compiler/parser/parseExpression.js +0 -27
  152. package/compiler/parser/parseStatement.js +0 -34
  153. package/compiler/parser/parser.js +0 -45
  154. package/compiler/parser/statement/call.js +0 -26
  155. package/compiler/parser/statement/function.js +0 -29
  156. package/compiler/parser/statement/if.js +0 -34
  157. package/compiler/parser/statement/return.js +0 -10
  158. package/compiler/parser/statement/set.js +0 -11
  159. package/compiler/parser/statement/show.js +0 -10
  160. package/compiler/parser/statement/try-catch.js +0 -25
  161. package/compiler/parser/statement/while.js +0 -22
  162. package/converter/go/convert.js +0 -110
  163. package/converter/js/convert.js +0 -107
  164. package/jsconfig.json +0 -27
  165. package/vite.config.js +0 -17
package/lexer/Lexer.js ADDED
@@ -0,0 +1,245 @@
1
+ import { TokenType } from "./TokenTypes.js";
2
+ import { createToken as _createTokenRaw } from "./createToken.js"; // <--- Add this import
3
+ import { skipWhitespace } from "./tokenizers/whitespaceTokenizer.js";
4
+ import {
5
+ isLiteralStart,
6
+ readIdentifier,
7
+ readNumber,
8
+ readString,
9
+ isAlpha,
10
+ isDigit,
11
+ } from "./tokenizers/literalTokenizer.js";
12
+ import { readSymbol } from "./tokenizers/symbolTokenizer.js";
13
+ import { MimoError } from "../interpreter/MimoError.js";
14
+
15
+ export class Lexer {
16
+ constructor(source, filePath = "unknown") {
17
+ // Add filePath
18
+ this.source = source;
19
+ this.filePath = filePath; // Store filePath
20
+ this.position = 0;
21
+ this.line = 1;
22
+ this.column = 1;
23
+ this.previousToken = null;
24
+
25
+ // State for template literals
26
+ // 0: Normal
27
+ // 1: Inside template, expecting StringFragment or ${ or `
28
+ // 2: Inside template, just saw ${, expecting expression (normal tokenizing)
29
+ // 3: Inside template, just saw }, expecting StringFragment or ${ or `
30
+ this.templateLiteralState = 0;
31
+ this.templateLiteralDepth = 0; // To handle nested templates if ever supported
32
+ }
33
+
34
+ isAtEnd() {
35
+ return this.position >= this.source.length;
36
+ }
37
+
38
+ peek(offset = 0) {
39
+ return this.position + offset < this.source.length
40
+ ? this.source[this.position + offset]
41
+ : null;
42
+ }
43
+
44
+ advance() {
45
+ if (!this.isAtEnd()) {
46
+ const char = this.source[this.position];
47
+ if (char === "\n") {
48
+ // <--- FIXED: Now checks for actual newline character
49
+ this.line++;
50
+ this.column = 1;
51
+ } else {
52
+ this.column++;
53
+ }
54
+ this.position++;
55
+ }
56
+ }
57
+
58
+ // Internal helper for Lexer's own token creation (e.g., for template literals)
59
+ // and for external tokenizers to use.
60
+ _createToken(type, value, startLine, startColumn, startPosition, length) {
61
+ // Calls the raw createToken from lexer/createToken.js
62
+ return _createTokenRaw(
63
+ type,
64
+ value,
65
+ startLine,
66
+ startColumn,
67
+ startPosition,
68
+ length,
69
+ this.filePath
70
+ );
71
+ }
72
+
73
+ // Helper for consistent error reporting (as discussed before)
74
+ error(message, code = "LEX000", suggestion = "") {
75
+ const errorToken = {
76
+ // Create a token-like object for error location
77
+ value:
78
+ this.peek() ||
79
+ this.source.substring(this.position - 1, this.position) ||
80
+ "",
81
+ line: this.line,
82
+ column: this.column,
83
+ start: this.position, // Point to the current position
84
+ length: 1,
85
+ file: this.filePath,
86
+ };
87
+ // CORRECTED: Pass arguments in the order expected by MimoError.lexerError(code, message, token, suggestion)
88
+ throw MimoError.lexerError(code, message, errorToken, suggestion);
89
+ }
90
+
91
+ nextToken() {
92
+ const startLine = this.line;
93
+ const startColumn = this.column;
94
+ const startPosition = this.position; // Capture start position for the token
95
+
96
+ skipWhitespace(this);
97
+
98
+ if (this.isAtEnd()) return null;
99
+
100
+ const char = this.peek(); // char needs to be defined here to be used later
101
+
102
+ // Handle template literal states (State 1 or 3)
103
+ if (this.templateLiteralState === 1 || this.templateLiteralState === 3) {
104
+ if (this.peek() === "`") {
105
+ this.advance();
106
+ this.templateLiteralState = 0;
107
+ this.templateLiteralDepth--;
108
+ return this._createToken(
109
+ TokenType.Backtick,
110
+ "`",
111
+ startLine,
112
+ startColumn,
113
+ startPosition,
114
+ 1
115
+ );
116
+ }
117
+
118
+ if (this.peek() === "$" && this.peek(1) === "{") {
119
+ this.advance();
120
+ this.advance();
121
+ this.templateLiteralState = 2;
122
+ return this._createToken(
123
+ TokenType.InterpolationStart,
124
+ "${",
125
+ startLine,
126
+ startColumn,
127
+ startPosition,
128
+ 2
129
+ );
130
+ }
131
+
132
+ let fragment = "";
133
+ // Loop to read string fragment, handling escapes
134
+ while (
135
+ !this.isAtEnd() &&
136
+ this.peek() !== "`" &&
137
+ !(this.peek() === "$" && this.peek(1) === "{")
138
+ ) {
139
+ const currentFragmentChar = this.peek(); // Renamed to avoid conflict with outer 'char'
140
+ if (currentFragmentChar === "\\") {
141
+ // <--- Correctly checking for backslash
142
+ this.advance(); // consume backslash
143
+ if (this.isAtEnd()) {
144
+ this.error(
145
+ "Unterminated escape sequence in template fragment.",
146
+ "LEX004",
147
+ "Complete the escape sequence or close the template literal."
148
+ );
149
+ }
150
+ const escapedChar = this.peek();
151
+ switch (escapedChar) {
152
+ case "n":
153
+ fragment += "\n";
154
+ break; // <--- FIXED: store actual newline
155
+ case "t":
156
+ fragment += "\t";
157
+ break; // <--- FIXED: store actual tab
158
+ case "r":
159
+ fragment += "\r";
160
+ break; // <--- FIXED: store actual carriage return
161
+ case "\\":
162
+ fragment += "\\";
163
+ break; // <--- FIXED: store actual backslash
164
+ case "`":
165
+ fragment += "`";
166
+ break;
167
+ case "$":
168
+ fragment += "$";
169
+ break;
170
+ case "{":
171
+ fragment += "{";
172
+ break;
173
+ default:
174
+ this.error(
175
+ `Unrecognized escape sequence in template: '\\${escapedChar}'.`,
176
+ "LEX005",
177
+ "Use valid escape sequences like '\\n', '\\t', '\\\\', '\\`', '\\$'."
178
+ );
179
+ }
180
+ this.advance(); // consume escaped character
181
+ } else if (currentFragmentChar === "\n") {
182
+ // <--- Handle literal newlines inside template fragments
183
+ fragment += currentFragmentChar;
184
+ this.advance();
185
+ } else {
186
+ fragment += currentFragmentChar;
187
+ this.advance();
188
+ }
189
+ }
190
+ if (fragment.length > 0) {
191
+ // Calculate length of the *original* source consumed for this fragment
192
+ return this._createToken(
193
+ TokenType.StringFragment,
194
+ fragment,
195
+ startLine,
196
+ startColumn,
197
+ startPosition,
198
+ this.position - startPosition
199
+ );
200
+ }
201
+ }
202
+
203
+ // Handle template literal start (` `)
204
+ if (char === "`") {
205
+ this.advance();
206
+ this.templateLiteralState = 1;
207
+ this.templateLiteralDepth++;
208
+ return this._createToken(
209
+ TokenType.Backtick,
210
+ "`",
211
+ startLine,
212
+ startColumn,
213
+ startPosition,
214
+ 1
215
+ );
216
+ }
217
+ // Handle template interpolation end (})
218
+ if (this.templateLiteralState === 2 && char === "}") {
219
+ this.advance();
220
+ this.templateLiteralState = 3;
221
+ return this._createToken(
222
+ TokenType.InterpolationEnd,
223
+ "}",
224
+ startLine,
225
+ startColumn,
226
+ startPosition,
227
+ 1
228
+ );
229
+ }
230
+
231
+ // Normal tokenizing mode
232
+ let token;
233
+ if (isAlpha(char)) {
234
+ token = readIdentifier(this);
235
+ } else if (isDigit(char)) {
236
+ token = readNumber(this);
237
+ } else if (char === '"') {
238
+ // Assuming only double quotes for strings based on current `readString`
239
+ token = readString(this);
240
+ } else {
241
+ token = readSymbol(this);
242
+ }
243
+ return token; // Tokenizers should return the token
244
+ }
245
+ }
@@ -0,0 +1,131 @@
1
+ export const TokenType = {
2
+ Keyword: "keyword",
3
+ Identifier: "identifier",
4
+ Number: "number",
5
+ String: "string",
6
+ Boolean: "boolean",
7
+ Null: "null",
8
+ Operator: "operator",
9
+ LParen: "lparen",
10
+ RParen: "rparen",
11
+ LBracket: "lbracket",
12
+ RBracket: "rbracket",
13
+ LBrace: "lbrace",
14
+ RBrace: "rbrace",
15
+ Comma: "comma",
16
+ Range: "range",
17
+ Slice: "slice",
18
+ Spread: "spread",
19
+ Backtick: "backtick",
20
+ StringFragment: "string_fragment",
21
+ InterpolationStart: "interpolation_start",
22
+ InterpolationEnd: "interpolation_end",
23
+ Colon: "colon",
24
+ At: "at",
25
+ };
26
+
27
+ export const END_KEYWORDS = [
28
+ 'end', 'else'
29
+ ];
30
+
31
+ export const KEYWORDS = [
32
+ "set",
33
+ "let",
34
+ "const",
35
+ "global",
36
+ "destructure",
37
+ "from",
38
+ "if",
39
+ "guard",
40
+ "then",
41
+ "when",
42
+ "while",
43
+ "function",
44
+ "fn",
45
+ "call",
46
+ "show",
47
+ "return",
48
+ "try",
49
+ "catch",
50
+ "throw",
51
+ "for",
52
+ "in",
53
+ "match",
54
+ "case",
55
+ "default",
56
+ "break",
57
+ "continue",
58
+ "loop",
59
+ "true",
60
+ "false",
61
+ "null",
62
+ "import",
63
+ "as",
64
+ "export",
65
+ "not",
66
+ "and",
67
+ "or",
68
+ ...END_KEYWORDS
69
+ ];
70
+
71
+ export const Punctuators = {
72
+ Semicolon: ";",
73
+ Comma: ",",
74
+ Dot: ".",
75
+ Question: "?",
76
+ Spread: "...",
77
+ Colon: ":",
78
+ LParen: "(",
79
+ RParen: ")",
80
+ LBracket: "[",
81
+ RBracket: "]",
82
+ LBrace: "{",
83
+ RBrace: "}",
84
+ Pipe: "|",
85
+ Ampersand: "&",
86
+ };
87
+
88
+ export const Keywords = {
89
+ set: TokenType.Keyword,
90
+ let: TokenType.Keyword,
91
+ const: TokenType.Keyword,
92
+ global: TokenType.Keyword,
93
+ if: TokenType.If,
94
+ while: TokenType.Keyword,
95
+ function: TokenType.Keyword,
96
+ call: TokenType.Keyword,
97
+ show: TokenType.Keyword,
98
+ return: TokenType.Keyword,
99
+ try: TokenType.Keyword,
100
+ catch: TokenType.Keyword,
101
+ throw: TokenType.Keyword,
102
+ for: TokenType.Keyword,
103
+ in: TokenType.Keyword,
104
+ match: TokenType.Keyword,
105
+ case: TokenType.Keyword,
106
+ default: TokenType.Keyword,
107
+ break: TokenType.Keyword,
108
+ continue: TokenType.Keyword,
109
+ loop: TokenType.Keyword,
110
+ true: TokenType.True,
111
+ false: TokenType.False,
112
+ null: TokenType.Null,
113
+ import: TokenType.Import,
114
+ export: TokenType.Export,
115
+ };
116
+
117
+ export const Operators = {
118
+ "+": TokenType.Plus,
119
+ "-": TokenType.Minus,
120
+ "=": TokenType.Equal,
121
+ "==": TokenType.EqualEqual,
122
+ "===": TokenType.EqualEqualEqual,
123
+ "!=": TokenType.BangEqual,
124
+ "!==": TokenType.BangEqualEqual,
125
+ ">": TokenType.Greater,
126
+ "<": TokenType.Less,
127
+ ">=": TokenType.GreaterEqual,
128
+ "<=": TokenType.LessEqual,
129
+ "&&": TokenType.And,
130
+ "||": TokenType.Or,
131
+ };
@@ -0,0 +1,11 @@
1
+ export function createToken(type, value, line, column, start, length, file = 'unknown') {
2
+ return {
3
+ type,
4
+ value,
5
+ line,
6
+ column,
7
+ start, // Starting character index in the source string
8
+ length, // Number of characters this token spans in the source
9
+ file, // The file path
10
+ };
11
+ }
@@ -0,0 +1,45 @@
1
+ export function isCommentStart(lexer) {
2
+ const char = lexer.peek();
3
+ return char === "/" && (lexer.peek(1) === "/" || lexer.peek(1) === "*");
4
+ }
5
+
6
+ export function skipSingleLineComment(lexer) {
7
+ // Skip the '//' characters
8
+ lexer.advance();
9
+ lexer.advance();
10
+
11
+ // Skip until end of line or end of file
12
+ while (!lexer.isAtEnd() && lexer.peek() !== "\n") {
13
+ lexer.advance();
14
+ }
15
+ }
16
+
17
+ export function skipMultiLineComment(lexer) {
18
+ // Skip the '/*' characters
19
+ lexer.advance();
20
+ lexer.advance();
21
+
22
+ // Skip until we find '*/' or reach end of file
23
+ while (!lexer.isAtEnd()) {
24
+ if (lexer.peek() === "*" && lexer.peek(1) === "/") {
25
+ // Found end of comment, skip the '*/' characters
26
+ lexer.advance();
27
+ lexer.advance();
28
+ break;
29
+ }
30
+ lexer.advance();
31
+ }
32
+ }
33
+
34
+ export function skipComments(lexer) {
35
+ while (!lexer.isAtEnd()) {
36
+ const char = lexer.peek();
37
+ if (char === "/" && lexer.peek(1) === "/") {
38
+ skipSingleLineComment(lexer);
39
+ } else if (char === "/" && lexer.peek(1) === "*") {
40
+ skipMultiLineComment(lexer);
41
+ } else {
42
+ break;
43
+ }
44
+ }
45
+ }
@@ -0,0 +1,163 @@
1
+ import { KEYWORDS, TokenType } from "../TokenTypes.js";
2
+ import { createToken } from "../createToken.js";
3
+
4
+ export function isLiteralStart(char) {
5
+ return isAlpha(char) || isDigit(char) || char === '"';
6
+ }
7
+
8
+ export function readIdentifier(lexer) {
9
+ const startLine = lexer.line;
10
+ const startColumn = lexer.column;
11
+ const startPosition = lexer.position; // Capture original start position
12
+ let value = "";
13
+
14
+ while (!lexer.isAtEnd() && isAlphaNumeric(lexer.peek())) {
15
+ value += lexer.peek();
16
+ lexer.advance();
17
+ }
18
+
19
+ const length = value.length; // Calculate length
20
+
21
+ // Handle boolean and null literals first, as they have specific TokenTypes
22
+ if (value === "true") {
23
+ return lexer._createToken(TokenType.Boolean, true, startLine, startColumn, startPosition, length);
24
+ }
25
+ if (value === "false") {
26
+ return lexer._createToken(TokenType.Boolean, false, startLine, startColumn, startPosition, length);
27
+ }
28
+ if (value === "null") {
29
+ return lexer._createToken(TokenType.Null, null, startLine, startColumn, startPosition, length);
30
+ }
31
+
32
+ // Check if the identifier is a keyword (using includes for arrays)
33
+ if (KEYWORDS.includes(value)) { // <--- FIXED: Use .includes() for array check
34
+ return lexer._createToken(TokenType.Keyword, value, startLine, startColumn, startPosition, length);
35
+ }
36
+
37
+ // Otherwise, it's a regular identifier
38
+ return lexer._createToken(TokenType.Identifier, value, startLine, startColumn, startPosition, length);
39
+ }
40
+
41
+ export function readNumber(lexer) {
42
+ const startLine = lexer.line;
43
+ const startCol = lexer.column; // Corrected to startColumn for consistency, though guide uses startCol
44
+ const startPosition = lexer.position; // Capture original start position
45
+ let value = "";
46
+ let hasDecimal = false;
47
+ let hasExponent = false;
48
+
49
+ while (!lexer.isAtEnd()) {
50
+ const char = lexer.peek();
51
+ if (isDigit(char)) {
52
+ value += char;
53
+ } else if (char === "." && !hasDecimal && !hasExponent && isDigit(lexer.peek(1))) {
54
+ value += char;
55
+ hasDecimal = true;
56
+ } else if ((char === "e" || char === "E") && !hasExponent) {
57
+ // Check if the next char is a sign or a digit for a valid exponent
58
+ const nextChar = lexer.peek(1);
59
+ if (nextChar === '+' || nextChar === '-' || isDigit(nextChar)) {
60
+ value += char; // Add 'e' or 'E'
61
+ lexer.advance(); // Consume 'e' or 'E'
62
+ if (nextChar === '+' || nextChar === '-') {
63
+ value += nextChar; // Add sign
64
+ lexer.advance(); // Consume sign
65
+ }
66
+ // Now read the exponent digits
67
+ while (!lexer.isAtEnd() && isDigit(lexer.peek())) {
68
+ value += lexer.peek();
69
+ lexer.advance();
70
+ }
71
+ hasExponent = true;
72
+ // Since we manually advanced for exponent parts, we break here
73
+ // The main loop's advance() would skip characters otherwise.
74
+ break;
75
+ } else {
76
+ // Not a valid exponent (e.g., "123e" followed by non-digit/non-sign)
77
+ break;
78
+ }
79
+ } else {
80
+ break;
81
+ }
82
+ lexer.advance();
83
+ }
84
+ // The guide.md snippet for readNumber is incomplete, so I'm using the logic from the `readIdentifier` and the `before` part of `readNumber`
85
+ // to ensure startPosition and length are correctly passed.
86
+ return lexer._createToken(TokenType.Number, parseFloat(value), startLine, startCol, startPosition, value.length);
87
+ }
88
+
89
+ export function readString(lexer) {
90
+ const startLine = lexer.line;
91
+ const startCol = lexer.column;
92
+ const startPos = lexer.position;
93
+ const quote = lexer.peek(); // Get the opening quote character
94
+ lexer.advance(); // Consume opening quote
95
+
96
+ let value = "";
97
+
98
+ while (!lexer.isAtEnd()) {
99
+ const char = lexer.peek();
100
+
101
+ if (char === quote) {
102
+ // End of string found
103
+ lexer.advance(); // Consume closing quote
104
+ const tokenLength = lexer.position - startPos;
105
+ return lexer._createToken(TokenType.String, value, startLine, startCol, startPos, tokenLength);
106
+ }
107
+
108
+ if (char === '\n') {
109
+ // Unterminated string error
110
+ lexer.error(
111
+ "Unterminated string literal. Newline encountered.",
112
+ 'LEX004',
113
+ "String literals must be closed on the same line or use '\\n' for a newline character."
114
+ );
115
+ }
116
+
117
+ // --- CRITICAL FIX FOR ESCAPE CHARACTERS ---
118
+ if (char === '\\') {
119
+ lexer.advance(); // Consume the backslash
120
+
121
+ if (lexer.isAtEnd()) {
122
+ lexer.error("Unterminated escape sequence at end of file.", 'LEX005');
123
+ }
124
+
125
+ const escapedChar = lexer.peek();
126
+ switch (escapedChar) {
127
+ case 'n': value += '\n'; break;
128
+ case 't': value += '\t'; break;
129
+ case 'r': value += '\r'; break;
130
+ case '\\': value += '\\'; break;
131
+ case '"': value += '"'; break; // This handles \"
132
+ // You can add support for other quotes like \' if needed
133
+ default:
134
+ lexer.error(`Invalid escape sequence: \\${escapedChar}`, 'LEX003');
135
+ }
136
+ } else {
137
+ // Regular character
138
+ value += char;
139
+ }
140
+
141
+ lexer.advance(); // Move to the next character in the source
142
+ }
143
+
144
+ // If the loop finishes, it means we hit the end of the file without a closing quote.
145
+ lexer.error(
146
+ "Unterminated string literal.",
147
+ 'LEX005',
148
+ `A string starting with ${quote} was not properly closed.`
149
+ );
150
+ }
151
+
152
+ // Helper functions
153
+ export function isAlpha(char) {
154
+ return /^[a-zA-Z_]$/.test(char);
155
+ }
156
+
157
+ export function isAlphaNumeric(char) {
158
+ return /^[a-zA-Z0-9_]$/.test(char);
159
+ }
160
+
161
+ export function isDigit(char) {
162
+ return /^[0-9]$/.test(char);
163
+ }
@@ -0,0 +1,69 @@
1
+ import { TokenType } from "../TokenTypes.js";
2
+ import { createToken } from "../createToken.js";
3
+
4
+ export const SYMBOLS = {
5
+ "(": TokenType.LParen,
6
+ ")": TokenType.RParen,
7
+ "[": TokenType.LBracket,
8
+ "]": TokenType.RBracket,
9
+ "{": TokenType.LBrace,
10
+ "}": TokenType.RBrace,
11
+ ",": TokenType.Comma,
12
+ "?.": TokenType.Operator,
13
+ ".": TokenType.Operator,
14
+ "+": TokenType.Operator,
15
+ "-": TokenType.Operator,
16
+ "*": TokenType.Operator,
17
+ "/": TokenType.Operator,
18
+ "%": TokenType.Operator,
19
+ ">": TokenType.Operator,
20
+ "<": TokenType.Operator,
21
+ "=": TokenType.Operator,
22
+ "==": TokenType.Operator,
23
+ "===": TokenType.Operator,
24
+ "!": TokenType.Operator,
25
+ "!=": TokenType.Operator,
26
+ "!==": TokenType.Operator,
27
+ ">=": TokenType.Operator,
28
+ "<=": TokenType.Operator,
29
+ "&&": TokenType.Operator,
30
+ "||": TokenType.Operator,
31
+ "|>": TokenType.Operator, // Pipe operator
32
+ "->": TokenType.Operator, // Arrow operator
33
+ "??": TokenType.Operator, // Null coalescing
34
+
35
+ "...": TokenType.Spread, // Spread operator
36
+ "..": TokenType.Range, // Range operator
37
+ ":": TokenType.Colon,
38
+ "@": TokenType.At,
39
+ };
40
+
41
+
42
+
43
+ export function readSymbol(lexer) {
44
+ const startLine = lexer.line;
45
+ const startColumn = lexer.column;
46
+ const startPosition = lexer.position; // Capture original start position
47
+
48
+ // Try to match longer symbols first (e.g., "==" before "=")
49
+ const sortedSymbols = Object.keys(SYMBOLS).sort((a, b) => b.length - a.length);
50
+
51
+ for (const symbolString of sortedSymbols) {
52
+ if (lexer.source.startsWith(symbolString, lexer.position)) {
53
+ // Found a match
54
+ for (let i = 0; i < symbolString.length; i++) {
55
+ lexer.advance(); // Consume the characters of the symbol
56
+ }
57
+ const tokenLength = symbolString.length;
58
+ return lexer._createToken(SYMBOLS[symbolString], symbolString, startLine, startColumn, startPosition, tokenLength); // <--- Use lexer._createToken
59
+ }
60
+ }
61
+
62
+ // If no recognized symbol prefix is found, it's an error
63
+ const char = lexer.peek(); // Get the current character that caused the issue
64
+ lexer.error( // <--- Use lexer.error
65
+ `Unrecognized symbol or character: '${char}'.`,
66
+ 'LEX007',
67
+ `The symbol or character '${char}' is not recognized. Check for typos or unsupported operators.`
68
+ );
69
+ }
@@ -0,0 +1,36 @@
1
+ import { skipComments } from "./commentTokenizer.js";
2
+
3
+ export function isWhitespace(char) {
4
+ return char === " " || char === "\t" || char === "\n" || char === "\r";
5
+ }
6
+
7
+ export function skipWhitespace(lexer) {
8
+ while (!lexer.isAtEnd()) {
9
+ const char = lexer.peek();
10
+
11
+ if (char === " " || char === "\t") {
12
+ lexer.advance();
13
+ } else if (char === "\n") {
14
+ lexer.advance();
15
+ } else if (char === "/" && (lexer.peek(1) === "/" || lexer.peek(1) === "*")) {
16
+ // Handle comments as part of whitespace skipping
17
+ skipComments(lexer);
18
+ } else {
19
+ break;
20
+ }
21
+ }
22
+ }
23
+
24
+ export function skipSimpleWhitespace(lexer) {
25
+ while (!lexer.isAtEnd()) {
26
+ const char = lexer.peek();
27
+
28
+ if (char === " " || char === "\t") {
29
+ lexer.advance();
30
+ } else if (char === "\n") {
31
+ lexer.advance();
32
+ } else {
33
+ break;
34
+ }
35
+ }
36
+ }