mimo-lang 1.1.1 → 2.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. package/.gitattributes +24 -0
  2. package/LICENSE +21 -0
  3. package/README.md +71 -39
  4. package/adapters/browserAdapter.js +86 -0
  5. package/adapters/nodeAdapter.js +101 -0
  6. package/bin/cli.js +80 -0
  7. package/bin/commands/convert.js +27 -0
  8. package/bin/commands/doctor.js +139 -0
  9. package/bin/commands/eval.js +39 -0
  10. package/bin/commands/fmt.js +109 -0
  11. package/bin/commands/help.js +72 -0
  12. package/bin/commands/lint.js +117 -0
  13. package/bin/commands/repl.js +24 -0
  14. package/bin/commands/run.js +64 -0
  15. package/bin/commands/test.js +126 -0
  16. package/bin/utils/colors.js +38 -0
  17. package/bin/utils/formatError.js +47 -0
  18. package/bin/utils/fs.js +57 -0
  19. package/bin/utils/version.js +8 -0
  20. package/build.js +18 -0
  21. package/bun.lock +74 -0
  22. package/index.js +48 -77
  23. package/index.web.js +364 -0
  24. package/interpreter/BuiltinFunction.js +32 -0
  25. package/interpreter/ErrorHandler.js +120 -0
  26. package/interpreter/ExpressionEvaluator.js +106 -0
  27. package/interpreter/Interpreter.js +172 -0
  28. package/interpreter/MimoError.js +112 -0
  29. package/interpreter/ModuleLoader.js +236 -0
  30. package/interpreter/StatementExecutor.js +107 -0
  31. package/interpreter/Utils.js +82 -0
  32. package/interpreter/Values.js +87 -0
  33. package/interpreter/coreBuiltins.js +490 -0
  34. package/interpreter/environment.js +99 -0
  35. package/interpreter/evaluators/binaryExpressionEvaluator.js +111 -0
  36. package/interpreter/evaluators/collectionEvaluator.js +151 -0
  37. package/interpreter/evaluators/functionCallEvaluator.js +76 -0
  38. package/interpreter/evaluators/literalEvaluator.js +27 -0
  39. package/interpreter/evaluators/moduleAccessEvaluator.js +25 -0
  40. package/interpreter/evaluators/templateLiteralEvaluator.js +20 -0
  41. package/interpreter/executors/BaseExecutor.js +37 -0
  42. package/interpreter/executors/ControlFlowExecutor.js +206 -0
  43. package/interpreter/executors/FunctionExecutor.js +126 -0
  44. package/interpreter/executors/PatternMatchExecutor.js +93 -0
  45. package/interpreter/executors/VariableExecutor.js +144 -0
  46. package/interpreter/index.js +8 -0
  47. package/interpreter/stdlib/array/accessFunctions.js +61 -0
  48. package/interpreter/stdlib/array/arrayUtils.js +36 -0
  49. package/interpreter/stdlib/array/higherOrderFunctions.js +285 -0
  50. package/interpreter/stdlib/array/searchFunctions.js +77 -0
  51. package/interpreter/stdlib/array/setFunctions.js +49 -0
  52. package/interpreter/stdlib/array/transformationFunctions.js +68 -0
  53. package/interpreter/stdlib/array.js +85 -0
  54. package/interpreter/stdlib/assert.js +143 -0
  55. package/interpreter/stdlib/datetime.js +170 -0
  56. package/interpreter/stdlib/env.js +54 -0
  57. package/interpreter/stdlib/fs.js +161 -0
  58. package/interpreter/stdlib/http.js +92 -0
  59. package/interpreter/stdlib/json.js +70 -0
  60. package/interpreter/stdlib/math.js +309 -0
  61. package/interpreter/stdlib/object.js +142 -0
  62. package/interpreter/stdlib/path.js +69 -0
  63. package/interpreter/stdlib/regex.js +134 -0
  64. package/interpreter/stdlib/string.js +260 -0
  65. package/interpreter/suggestions.js +46 -0
  66. package/lexer/Lexer.js +245 -0
  67. package/lexer/TokenTypes.js +131 -0
  68. package/lexer/createToken.js +11 -0
  69. package/lexer/tokenizers/commentTokenizer.js +45 -0
  70. package/lexer/tokenizers/literalTokenizer.js +163 -0
  71. package/lexer/tokenizers/symbolTokenizer.js +69 -0
  72. package/lexer/tokenizers/whitespaceTokenizer.js +36 -0
  73. package/package.json +29 -13
  74. package/parser/ASTNodes.js +448 -0
  75. package/parser/Parser.js +188 -0
  76. package/parser/expressions/atomicExpressions.js +165 -0
  77. package/parser/expressions/conditionalExpressions.js +0 -0
  78. package/parser/expressions/operatorExpressions.js +79 -0
  79. package/parser/expressions/primaryExpressions.js +77 -0
  80. package/parser/parseStatement.js +184 -0
  81. package/parser/parserExpressions.js +115 -0
  82. package/parser/parserUtils.js +19 -0
  83. package/parser/statements/controlFlowParsers.js +106 -0
  84. package/parser/statements/functionParsers.js +314 -0
  85. package/parser/statements/moduleParsers.js +57 -0
  86. package/parser/statements/patternMatchParsers.js +124 -0
  87. package/parser/statements/variableParsers.js +155 -0
  88. package/repl.js +325 -0
  89. package/test.js +47 -0
  90. package/tools/PrettyPrinter.js +3 -0
  91. package/tools/convert/Args.js +46 -0
  92. package/tools/convert/Registry.js +91 -0
  93. package/tools/convert/Transpiler.js +78 -0
  94. package/tools/convert/plugins/README.md +66 -0
  95. package/tools/convert/plugins/alya/index.js +10 -0
  96. package/tools/convert/plugins/alya/to_alya.js +289 -0
  97. package/tools/convert/plugins/alya/visitors/expressions.js +257 -0
  98. package/tools/convert/plugins/alya/visitors/statements.js +403 -0
  99. package/tools/convert/plugins/base_converter.js +228 -0
  100. package/tools/convert/plugins/javascript/index.js +10 -0
  101. package/tools/convert/plugins/javascript/mimo_runtime.js +265 -0
  102. package/tools/convert/plugins/javascript/to_js.js +155 -0
  103. package/tools/convert/plugins/javascript/visitors/expressions.js +197 -0
  104. package/tools/convert/plugins/javascript/visitors/patterns.js +102 -0
  105. package/tools/convert/plugins/javascript/visitors/statements.js +236 -0
  106. package/tools/convert/plugins/python/index.js +10 -0
  107. package/tools/convert/plugins/python/mimo_runtime.py +811 -0
  108. package/tools/convert/plugins/python/to_py.js +329 -0
  109. package/tools/convert/plugins/python/visitors/expressions.js +272 -0
  110. package/tools/convert/plugins/python/visitors/patterns.js +100 -0
  111. package/tools/convert/plugins/python/visitors/statements.js +257 -0
  112. package/tools/convert.js +102 -0
  113. package/tools/format/CommentAttacher.js +190 -0
  114. package/tools/format/CommentLexer.js +152 -0
  115. package/tools/format/Printer.js +849 -0
  116. package/tools/format/config.js +107 -0
  117. package/tools/formatter.js +169 -0
  118. package/tools/lint/Linter.js +391 -0
  119. package/tools/lint/config.js +114 -0
  120. package/tools/lint/rules/consistent-return.js +62 -0
  121. package/tools/lint/rules/max-depth.js +56 -0
  122. package/tools/lint/rules/no-empty-function.js +45 -0
  123. package/tools/lint/rules/no-magic-numbers.js +46 -0
  124. package/tools/lint/rules/no-shadow.js +113 -0
  125. package/tools/lint/rules/no-unused-vars.js +26 -0
  126. package/tools/lint/rules/prefer-const.js +19 -0
  127. package/tools/linter.js +261 -0
  128. package/tools/replFormatter.js +93 -0
  129. package/tools/stamp-version.js +32 -0
  130. package/web/index.js +9 -0
  131. package/bun.lockb +0 -0
  132. package/cli.js +0 -84
  133. package/compiler/execute/interpreter.js +0 -68
  134. package/compiler/execute/interpreters/binary.js +0 -12
  135. package/compiler/execute/interpreters/call.js +0 -10
  136. package/compiler/execute/interpreters/if.js +0 -10
  137. package/compiler/execute/interpreters/try-catch.js +0 -10
  138. package/compiler/execute/interpreters/while.js +0 -8
  139. package/compiler/execute/utils/createfunction.js +0 -11
  140. package/compiler/execute/utils/evaluate.js +0 -20
  141. package/compiler/execute/utils/operate.js +0 -23
  142. package/compiler/lexer/processToken.js +0 -40
  143. package/compiler/lexer/tokenTypes.js +0 -4
  144. package/compiler/lexer/tokenizer.js +0 -74
  145. package/compiler/parser/expression/comparison.js +0 -18
  146. package/compiler/parser/expression/identifier.js +0 -29
  147. package/compiler/parser/expression/number.js +0 -10
  148. package/compiler/parser/expression/operator.js +0 -21
  149. package/compiler/parser/expression/punctuation.js +0 -31
  150. package/compiler/parser/expression/string.js +0 -6
  151. package/compiler/parser/parseExpression.js +0 -27
  152. package/compiler/parser/parseStatement.js +0 -34
  153. package/compiler/parser/parser.js +0 -45
  154. package/compiler/parser/statement/call.js +0 -26
  155. package/compiler/parser/statement/function.js +0 -29
  156. package/compiler/parser/statement/if.js +0 -34
  157. package/compiler/parser/statement/return.js +0 -10
  158. package/compiler/parser/statement/set.js +0 -11
  159. package/compiler/parser/statement/show.js +0 -10
  160. package/compiler/parser/statement/try-catch.js +0 -25
  161. package/compiler/parser/statement/while.js +0 -22
  162. package/converter/go/convert.js +0 -110
  163. package/converter/js/convert.js +0 -107
  164. package/jsconfig.json +0 -27
  165. package/vite.config.js +0 -17
@@ -0,0 +1,257 @@
1
+ /**
2
+ * Statement visitors for the Mimo → Python converter.
3
+ * Mixed into MimoToPyConverter via Object.assign.
4
+ */
5
+ export const statementVisitors = {
6
+ visitShowStatement(node) {
7
+ this.write(`${this.currentIndent}mimo.show(`);
8
+ this.visitNode(node.expression);
9
+ this.write(')\n');
10
+ },
11
+
12
+ visitVariableDeclaration(node) {
13
+ this.write(`${this.currentIndent}${node.identifier} = `);
14
+ this.visitNode(node.value);
15
+ this.write('\n');
16
+ },
17
+
18
+ visitFunctionDeclaration(node) {
19
+ const decorators = node.decorators || [];
20
+
21
+ // Build parameter list with defaults and rest params
22
+ const defaults = node.defaults || {};
23
+ const paramParts = (node.params || []).map((p) => {
24
+ const defaultNode = defaults[p.name];
25
+ if (defaultNode !== undefined && defaultNode !== null) {
26
+ return `${p.name}=${this._exprToString(defaultNode)}`;
27
+ }
28
+ return p.name;
29
+ });
30
+ if (node.restParam) {
31
+ paramParts.push(`*${node.restParam.name}`);
32
+ }
33
+
34
+ // Collect parameters and locals to exclude from global declarations
35
+ const paramNames = new Set((node.params || []).map(p => p.name));
36
+ if (node.restParam) paramNames.add(node.restParam.name);
37
+
38
+ // Find module-level vars that are assigned inside this function body
39
+ const assignedInBody = this._collectAssignedVars(node.body);
40
+ const globalsNeeded = [...assignedInBody].filter(
41
+ v => this._moduleVars.has(v) && !paramNames.has(v)
42
+ );
43
+
44
+ this.writeLine(`def ${node.name}(${paramParts.join(', ')}):`);
45
+ // Emit global declarations before the body
46
+ if (globalsNeeded.length > 0) {
47
+ this.indent();
48
+ this.writeLine(`global ${globalsNeeded.join(', ')}`);
49
+ this.dedent();
50
+ }
51
+
52
+ // Track variables declared in this function for nested closure detection
53
+ const funcVars = new Set([...paramNames, ...this._collectAssignedVars(node.body)]);
54
+ this._enclosingFunctionVars.push(funcVars);
55
+ this.visitBlock(node.body);
56
+ this._enclosingFunctionVars.pop();
57
+ this.writeLine();
58
+
59
+ // Apply decorators immediately after function definition.
60
+ // When emitting hoisted functions (visitProgram pass 1), defer all decorator
61
+ // applications so that all functions are defined before any decorator is applied.
62
+ for (const dec of [...decorators].reverse()) {
63
+ const decName = typeof dec.name === 'string' ? dec.name : dec.name?.name;
64
+ const decArgs = dec.arguments || dec.args || [];
65
+ let decLine;
66
+ if (decArgs.length > 0) {
67
+ const argStr = decArgs.map((a) => this._exprToString(a)).join(', ');
68
+ decLine = `${node.name} = ${decName}(${argStr})(${node.name})`;
69
+ } else {
70
+ decLine = `${node.name} = ${decName}(${node.name})`;
71
+ }
72
+ if (this._emittingHoistedFunctions) {
73
+ this._deferredDecorators.push(decLine);
74
+ } else {
75
+ this.writeLine(decLine);
76
+ }
77
+ }
78
+ if (decorators.length > 0 && !this._emittingHoistedFunctions) this.writeLine();
79
+ },
80
+
81
+ visitCallStatement(node) {
82
+ if (node.destination) {
83
+ const destName = node.destination.name;
84
+ this.write(`${this.currentIndent}${destName} = `);
85
+ } else {
86
+ this.write(this.currentIndent);
87
+ }
88
+
89
+ this.visitCallee(node.callee);
90
+ this.write('(');
91
+ this.emitArgs(node.arguments);
92
+ this.write(')\n');
93
+ },
94
+
95
+ visitReturnStatement(node) {
96
+ this.write(`${this.currentIndent}return`);
97
+ if (node.argument) {
98
+ this.write(' ');
99
+ this.visitNode(node.argument);
100
+ }
101
+ this.write('\n');
102
+ },
103
+
104
+ visitIfStatement(node) {
105
+ this.write(`${this.currentIndent}if `);
106
+ this.visitNode(node.condition);
107
+ this.write(':\n');
108
+ this.visitBlock(node.consequent);
109
+
110
+ if (node.alternate) {
111
+ if (node.alternate.type === 'IfStatement') {
112
+ // `elif` — prefix with `el` then let visitIfStatement emit `if`
113
+ this.write(`${this.currentIndent}el`);
114
+ this.visitNode(node.alternate);
115
+ } else {
116
+ this.writeLine('else:');
117
+ this.visitBlock(node.alternate);
118
+ }
119
+ }
120
+ },
121
+
122
+ visitGuardStatement(node) {
123
+ // guard cond else ... end → if not (cond): <alternate-block>
124
+ this.write(`${this.currentIndent}if not (`);
125
+ this.visitNode(node.condition);
126
+ this.write('):\n');
127
+ this.visitBlock(node.alternate || node.elseBlock || []);
128
+ },
129
+
130
+ visitWhileStatement(node) {
131
+ this.write(`${this.currentIndent}while `);
132
+ this.visitNode(node.condition);
133
+ this.write(':\n');
134
+ this.visitBlock(node.body);
135
+ },
136
+
137
+ visitForStatement(node) {
138
+ this.write(`${this.currentIndent}for ${node.variable.name} in `);
139
+ this.visitNode(node.iterable);
140
+ this.write(':\n');
141
+ this.visitBlock(node.body);
142
+ },
143
+
144
+ visitLoopStatement(node) {
145
+ if (node.label) this.writeLine(`# label: ${node.label}`);
146
+ this.writeLine('while True:');
147
+ this.visitBlock(node.body);
148
+ },
149
+
150
+ visitLabeledStatement(node) {
151
+ this.writeLine(`# label: ${node.label}`);
152
+ this.visitNode(node.statement);
153
+ },
154
+
155
+ visitBreakStatement(node) {
156
+ // Python doesn't support labeled break; emit a comment if label present
157
+ if (node.label) this.writeLine(`# break ${node.label} (labeled break not supported in Python)`);
158
+ this.writeLine('break');
159
+ },
160
+
161
+ visitContinueStatement(node) {
162
+ if (node.label) this.writeLine(`# continue ${node.label} (labeled continue not supported in Python)`);
163
+ this.writeLine('continue');
164
+ },
165
+
166
+ visitTryStatement(node) {
167
+ this.writeLine('try:');
168
+ this.visitBlock(node.tryBlock);
169
+ if (node.catchBlock) {
170
+ const errName = node.catchVar?.name || '_err';
171
+ const tmpName = `_exc_${errName}`;
172
+ this.writeLine(`except Exception as ${tmpName}:`);
173
+ this.indent();
174
+ // Coerce the exception to a string so it behaves like in Mimo/JS
175
+ this.writeLine(`${errName} = str(${tmpName})`);
176
+ (node.catchBlock || []).forEach((stmt) => this.visitNode(stmt));
177
+ this.dedent();
178
+ }
179
+ },
180
+
181
+ visitThrowStatement(node) {
182
+ this.write(`${this.currentIndent}raise Exception(`);
183
+ this.visitNode(node.argument);
184
+ this.write(')\n');
185
+ },
186
+
187
+ visitImportStatement(node) {
188
+ this.moduleAliases.set(node.alias, node.path);
189
+ if (this.isStdlibModule(node.path)) {
190
+ // Bind the stdlib sub-object from the mimo runtime
191
+ this.writeLine(`${node.alias} = mimo.${node.path}`);
192
+ } else {
193
+ // External import was already emitted as a top-level `import` statement.
194
+ // Just bind the alias name so the rest of the code works.
195
+ this.writeLine(`${node.alias} = ${node.alias}`);
196
+ }
197
+ },
198
+
199
+ visitDestructuringAssignment(node) {
200
+ if (node.pattern.type === 'ObjectPattern') {
201
+ // Python can't unpack dicts like JS, emit individual assignments
202
+ const exprStr = this._exprToString(node.expression);
203
+ this._emitObjectDestructuring(node.pattern, exprStr);
204
+ } else if (node.pattern.type === 'ArrayPattern') {
205
+ // Array pattern: emit individual index assignments to handle extra elements
206
+ // e.g. [x, y] = arr → __tmp = arr; x = mimo.get(__tmp, 0); y = mimo.get(__tmp, 1)
207
+ const elements = node.pattern.elements || [];
208
+ if (elements.length === 0) return;
209
+ const tmpVar = `__tmp_${this._matchCounter++}`;
210
+ this.write(`${this.currentIndent}${tmpVar} = `);
211
+ this.visitNode(node.expression);
212
+ this.write('\n');
213
+ elements.forEach((el, i) => {
214
+ if (el && el.name) {
215
+ this.writeLine(`${el.name} = mimo.get(${tmpVar}, ${i})`);
216
+ }
217
+ });
218
+ } else {
219
+ this.write(this.currentIndent);
220
+ this.visitNode(node.pattern);
221
+ this.write(' = ');
222
+ this.visitNode(node.expression);
223
+ this.write('\n');
224
+ }
225
+ },
226
+
227
+ visitPropertyAssignment(node) {
228
+ this.write(this.currentIndent);
229
+ this.visitNode(node.object);
230
+ this.write(`["${node.property}"] = `);
231
+ this.visitNode(node.value);
232
+ this.write('\n');
233
+ },
234
+
235
+ visitBracketAssignment(node) {
236
+ this.write(this.currentIndent);
237
+ this.visitNode(node.object);
238
+ this.write('[');
239
+ this.visitNode(node.index);
240
+ this.write('] = ');
241
+ this.visitNode(node.value);
242
+ this.write('\n');
243
+ },
244
+
245
+ /**
246
+ * Emit a Python object destructuring assignment.
247
+ * Python can't do `a, b = obj` for dicts, so we expand it as:
248
+ * a = obj.get("a")
249
+ * b = obj.get("b")
250
+ */
251
+ _emitObjectDestructuring(pattern, exprStr) {
252
+ pattern.properties.forEach((prop) => {
253
+ const key = prop.key || prop.name;
254
+ this.writeLine(`${prop.name} = (${exprStr}).get("${key}")`);
255
+ });
256
+ },
257
+ };
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Mimo Language Converter - Standardized Entry Point
4
+ */
5
+ import fs from 'node:fs';
6
+ import path from 'node:path';
7
+ import { fileURLToPath } from 'node:url';
8
+
9
+ import { ConverterRegistry } from './convert/Registry.js';
10
+ import { parseArgs, determineTarget } from './convert/Args.js';
11
+ import { Transpiler } from './convert/Transpiler.js';
12
+
13
+ const converterRegistry = new ConverterRegistry();
14
+
15
+ async function readStdin() {
16
+ return new Promise((resolve) => {
17
+ let data = '';
18
+ process.stdin.setEncoding('utf-8');
19
+ process.stdin.on('data', (chunk) => { data += chunk; });
20
+ process.stdin.on('end', () => { resolve(data); });
21
+ });
22
+ }
23
+
24
+ async function main(providedArgs) {
25
+ const args = providedArgs || process.argv.slice(2);
26
+ const options = parseArgs(args);
27
+
28
+ // Initialize the registry by discovering plugins
29
+ await converterRegistry.discoverConverters();
30
+
31
+ if ((!options.in && process.stdin.isTTY) || !options.out) {
32
+ const availableLanguages = converterRegistry.getLanguages().join(', ');
33
+ console.error('Usage: mimo convert --in <infile> --out <outfile|outdir> [--to <language>]');
34
+ console.error('Or pipe to stdin: echo "..." | mimo convert --out <outfile> [--to <language>]');
35
+ console.error(`Available target languages: ${availableLanguages}`);
36
+ process.exit(1);
37
+ }
38
+
39
+ let targetConfig;
40
+ try {
41
+ targetConfig = determineTarget(options, converterRegistry);
42
+ } catch (error) {
43
+ console.error(`Error: ${error.message}`);
44
+ process.exit(1);
45
+ }
46
+
47
+ const { language, converterInfo, targetExtension } = targetConfig;
48
+ const transpiler = new Transpiler();
49
+
50
+ let outDir;
51
+ let isFileOutput = false;
52
+
53
+ if (path.extname(options.out) === targetExtension) {
54
+ outDir = path.dirname(options.out);
55
+ isFileOutput = true;
56
+ console.log(`Converting ${options.in ? `'${options.in}'` : 'STDIN'} to ${language.toUpperCase()} file '${options.out}'...`);
57
+ } else {
58
+ outDir = options.out;
59
+ console.log(`Converting ${options.in ? `'${options.in}'` : 'STDIN'} to ${language.toUpperCase()} in directory '${options.out}'...`);
60
+ }
61
+
62
+ if (!fs.existsSync(outDir)) {
63
+ fs.mkdirSync(outDir, { recursive: true });
64
+ }
65
+
66
+ if (isFileOutput) {
67
+ if (options.in) {
68
+ transpiler.transpileMainFile(options.in, options.out, converterInfo);
69
+ } else {
70
+ const source = await readStdin();
71
+ const output = transpiler.transpileSource(source, 'stdin', converterInfo);
72
+ fs.writeFileSync(options.out, output, 'utf-8');
73
+ }
74
+ } else {
75
+ if (options.in) {
76
+ transpiler.transpileFile(options.in, outDir, converterInfo, targetExtension);
77
+ } else {
78
+ console.error('Error: Directory output requires an input file to resolve dependencies.');
79
+ process.exit(1);
80
+ }
81
+
82
+ // Copy runtime if exists
83
+ if (converterInfo.runtimeFile) {
84
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
85
+ const runtimeSourcePath = path.join(__dirname, 'convert', 'plugins', converterInfo.runtimeFile);
86
+ const runtimeFileName = path.basename(converterInfo.runtimeFile);
87
+
88
+ if (fs.existsSync(runtimeSourcePath)) {
89
+ fs.copyFileSync(runtimeSourcePath, path.join(outDir, runtimeFileName));
90
+ console.log(` -> Copied runtime: ${runtimeFileName}`);
91
+ }
92
+ }
93
+ }
94
+
95
+ console.log(`✅ Conversion to ${language.toUpperCase()} successful!`);
96
+ }
97
+
98
+ export { main as runConverter };
99
+
100
+ if (process.argv[1] === fileURLToPath(import.meta.url)) {
101
+ main();
102
+ }
@@ -0,0 +1,190 @@
1
+ /**
2
+ * tools/format/CommentAttacher.js
3
+ *
4
+ * Attaches extracted comments to the nearest AST nodes so the Printer
5
+ * can re-emit them in the correct positions.
6
+ *
7
+ * Attachment rules (line-number based):
8
+ *
9
+ * leadingComments — a comment is "leading" for node N if:
10
+ * - The comment ends on the line immediately before N starts, OR
11
+ * - The comment is a block of consecutive line-comments that sits above N
12
+ * with no intervening blank lines between the block and N.
13
+ *
14
+ * trailingComment — a comment is "trailing" for node N if:
15
+ * - It starts on the same line that N ends on (inline comment).
16
+ * - Only the first such comment is attached (one trailing comment per node).
17
+ *
18
+ * After attachment the comment is marked `attached = true` so it is not
19
+ * double-emitted. Any remaining unattached comments are prepended to the
20
+ * Program body as synthetic leading comments on a virtual sentinel node
21
+ * (the Program node itself gets them as `node.leadingComments`).
22
+ *
23
+ * Limitations (acceptable for v1):
24
+ * - Comments inside expressions (e.g. inside array literals) are treated as
25
+ * leading comments of the next sibling statement, not the sub-expression.
26
+ * - Only top-level Program.body and inner block arrays are walked; deeply
27
+ * nested expression trees are not traversed for attachment purposes.
28
+ */
29
+
30
+ /**
31
+ * Mutates the AST in-place by setting `leadingComments` and `trailingComment`
32
+ * on statement nodes where comments can be meaningfully attached.
33
+ *
34
+ * @param {object} ast Parsed Program node
35
+ * @param {object[]} comments Output of CommentLexer.extractComments()
36
+ */
37
+ export function attachComments(ast, comments) {
38
+ if (!comments.length) return;
39
+
40
+ // Work on a copy we can mark as attached
41
+ const pool = comments.map(c => ({ ...c, attached: false }));
42
+
43
+ // Walk every list of statements in the tree
44
+ _attachToStatementList(ast.body, pool);
45
+
46
+ // Any leftover comments at the very top (before first statement) go on the Program
47
+ const unattached = pool.filter(c => !c.attached);
48
+ if (unattached.length) {
49
+ ast.leadingComments = (ast.leadingComments ?? []).concat(unattached);
50
+ }
51
+ }
52
+
53
+ // ── Internal helpers ──────────────────────────────────────────────────────────
54
+
55
+ function _nodeEndLine(node) {
56
+ // Most nodes have `line` = their start line. We approximate end line by
57
+ // looking for the deepest child with a line property, or we just use
58
+ // start line (safe conservative estimate — trailing comments still work
59
+ // because they are same-line as the keyword that opens the node).
60
+ return node.line ?? 1;
61
+ }
62
+
63
+ function _nodeStartLine(node) {
64
+ return node.line ?? 1;
65
+ }
66
+
67
+ /**
68
+ * Attach comments to a flat list of statement nodes.
69
+ * Recurses into block-owning nodes.
70
+ */
71
+ function _attachToStatementList(statements, pool) {
72
+ if (!statements?.length) return;
73
+
74
+ for (let i = 0; i < statements.length; i++) {
75
+ const node = statements[i];
76
+ const startLine = _nodeStartLine(node);
77
+
78
+ // ── Leading comments ─────────────────────────────────────────────────
79
+ // Collect unattached comments that appear immediately before this node.
80
+ // "immediately before" = comment line is < startLine, and there is no
81
+ // blank line between the last comment in the block and this node.
82
+ const leading = [];
83
+ // Walk pool in reverse to find the contiguous block just above this node
84
+ let expectedLine = startLine - 1;
85
+ for (let j = pool.length - 1; j >= 0; j--) {
86
+ const c = pool[j];
87
+ if (c.attached) continue;
88
+ if (c.line > startLine) continue; // comment is after this node — skip
89
+ if (c.line === expectedLine || c.line === expectedLine + 1 || c.line < startLine) {
90
+ // Check contiguity: only attach if no gap
91
+ if (c.line >= startLine - 20 && c.line < startLine) {
92
+ // Check that no non-comment, non-blank source appears between c and node
93
+ // (we use a simple line-distance heuristic)
94
+ leading.unshift(c);
95
+ c.attached = true;
96
+ expectedLine = c.line - 1;
97
+ }
98
+ }
99
+ }
100
+
101
+ // Filter: only keep comments that form a contiguous block touching this node
102
+ const attached = _filterContiguousLeading(leading, startLine);
103
+ if (attached.length) {
104
+ node.leadingComments = attached;
105
+ }
106
+
107
+ // ── Trailing comment ─────────────────────────────────────────────────
108
+ // A comment on the same line as this node's start line (for single-line
109
+ // statements) or a comment on the line of the closing keyword.
110
+ for (const c of pool) {
111
+ if (c.attached) continue;
112
+ if (c.line === startLine && c.kind === 'line') {
113
+ node.trailingComment = c;
114
+ c.attached = true;
115
+ break;
116
+ }
117
+ }
118
+
119
+ // ── Recurse into child blocks ─────────────────────────────────────────
120
+ _recurseIntoNode(node, pool);
121
+ }
122
+ }
123
+
124
+ /**
125
+ * Filter the collected leading candidates to only the contiguous block that
126
+ * immediately precedes `startLine` (no blank-line gaps).
127
+ */
128
+ function _filterContiguousLeading(candidates, startLine) {
129
+ if (!candidates.length) return [];
130
+ // candidates is already sorted ascending by line
131
+ // Work backwards from startLine to find the contiguous run
132
+ const result = [];
133
+ let expectedLine = startLine - 1;
134
+ for (let i = candidates.length - 1; i >= 0; i--) {
135
+ const c = candidates[i];
136
+ if (c.line === expectedLine || c.line === expectedLine - 0) {
137
+ result.unshift(c);
138
+ expectedLine = c.line - 1;
139
+ } else if (c.line < expectedLine - 1) {
140
+ // Gap found — stop
141
+ break;
142
+ } else {
143
+ result.unshift(c);
144
+ expectedLine = c.line - 1;
145
+ }
146
+ }
147
+ return result;
148
+ }
149
+
150
+ /**
151
+ * Recurse into the child statement lists of block-owning nodes.
152
+ */
153
+ function _recurseIntoNode(node, pool) {
154
+ switch (node.type) {
155
+ case 'FunctionDeclaration':
156
+ case 'AnonymousFunction':
157
+ _attachToStatementList(node.body, pool);
158
+ break;
159
+ case 'IfStatement':
160
+ _attachToStatementList(node.consequent, pool);
161
+ if (Array.isArray(node.alternate)) {
162
+ _attachToStatementList(node.alternate, pool);
163
+ } else if (node.alternate) {
164
+ _recurseIntoNode(node.alternate, pool);
165
+ }
166
+ break;
167
+ case 'GuardStatement':
168
+ _attachToStatementList(node.alternate, pool);
169
+ break;
170
+ case 'WhileStatement':
171
+ case 'ForStatement':
172
+ case 'LoopStatement':
173
+ _attachToStatementList(node.body, pool);
174
+ break;
175
+ case 'TryStatement':
176
+ _attachToStatementList(node.tryBlock, pool);
177
+ _attachToStatementList(node.catchBlock, pool);
178
+ break;
179
+ case 'MatchStatement':
180
+ for (const c of (node.cases ?? [])) {
181
+ _attachToStatementList(c.consequent, pool);
182
+ }
183
+ break;
184
+ case 'LabeledStatement':
185
+ if (node.statement) _recurseIntoNode(node.statement, pool);
186
+ break;
187
+ default:
188
+ break;
189
+ }
190
+ }
@@ -0,0 +1,152 @@
1
+ /**
2
+ * tools/format/CommentLexer.js
3
+ *
4
+ * A lightweight, self-contained scanner that extracts all comments from raw
5
+ * Mimo source without going through the main Lexer/Parser pipeline.
6
+ *
7
+ * This is the formatter-only path for comment preservation (A1).
8
+ * It deliberately does NOT touch the main lexer or any interpreter code.
9
+ *
10
+ * Returns:
11
+ * {
12
+ * comments: Array<{ kind: 'line'|'block', value: string, line: number,
13
+ * column: number, startOffset: number, endOffset: number }>,
14
+ * lineOffsets: number[] // lineOffsets[n] = char offset where line n+1 starts
15
+ * }
16
+ */
17
+
18
+ export function extractComments(source) {
19
+ const comments = [];
20
+ // lineOffsets[i] = char index of the first character of line (i+1).
21
+ // lineOffsets[0] = 0 (line 1 starts at offset 0)
22
+ const lineOffsets = [0];
23
+
24
+ let i = 0;
25
+ let line = 1;
26
+ let column = 1;
27
+
28
+ // Track whether we are inside a string or template literal so we don't
29
+ // mistake comment-like sequences inside strings for actual comments.
30
+ let inString = false;
31
+ let inTemplate = 0; // nesting depth for backtick templates
32
+ let inInterpolation = 0; // depth of ${ } inside templates
33
+
34
+ function advance() {
35
+ const ch = source[i];
36
+ i++;
37
+ if (ch === '\n') {
38
+ lineOffsets.push(i);
39
+ line++;
40
+ column = 1;
41
+ } else {
42
+ column++;
43
+ }
44
+ return ch;
45
+ }
46
+
47
+ function peek(offset = 0) {
48
+ return source[i + offset] ?? null;
49
+ }
50
+
51
+ while (i < source.length) {
52
+ const ch = peek();
53
+
54
+ // ── Template literals ────────────────────────────────────────────────
55
+ if (!inString && ch === '`') {
56
+ advance();
57
+ inTemplate++;
58
+ continue;
59
+ }
60
+ if (inTemplate > 0 && !inString) {
61
+ if (ch === '`') {
62
+ advance();
63
+ inTemplate--;
64
+ continue;
65
+ }
66
+ if (ch === '$' && peek(1) === '{') {
67
+ advance(); advance();
68
+ inInterpolation++;
69
+ continue;
70
+ }
71
+ if (inInterpolation === 0) {
72
+ // inside template fragment — consume (handle escapes)
73
+ if (ch === '\\') { advance(); advance(); continue; }
74
+ advance();
75
+ continue;
76
+ }
77
+ // inside interpolation — fall through to normal processing
78
+ if (ch === '}') {
79
+ advance();
80
+ inInterpolation--;
81
+ continue;
82
+ }
83
+ }
84
+
85
+ // ── Regular strings ──────────────────────────────────────────────────
86
+ if (!inString && (ch === '"' || ch === "'")) {
87
+ const quote = ch;
88
+ advance();
89
+ inString = true;
90
+ while (i < source.length) {
91
+ const sc = peek();
92
+ if (sc === '\\') { advance(); advance(); continue; }
93
+ if (sc === quote) { advance(); inString = false; break; }
94
+ if (sc === '\n') { advance(); break; } // unterminated — let the real lexer handle it
95
+ advance();
96
+ }
97
+ continue;
98
+ }
99
+
100
+ // ── Line comment: // ─────────────────────────────────────────────────
101
+ if (ch === '/' && peek(1) === '/') {
102
+ const startOffset = i;
103
+ const startLine = line;
104
+ const startCol = column;
105
+ advance(); advance(); // consume '//'
106
+ let value = '';
107
+ while (i < source.length && peek() !== '\n') {
108
+ value += peek();
109
+ advance();
110
+ }
111
+ comments.push({
112
+ kind: 'line',
113
+ value: value.trim(),
114
+ line: startLine,
115
+ column: startCol,
116
+ startOffset,
117
+ endOffset: i,
118
+ });
119
+ continue;
120
+ }
121
+
122
+ // ── Block comment: /* … */ ───────────────────────────────────────────
123
+ if (ch === '/' && peek(1) === '*') {
124
+ const startOffset = i;
125
+ const startLine = line;
126
+ const startCol = column;
127
+ advance(); advance(); // consume '/*'
128
+ let value = '';
129
+ while (i < source.length) {
130
+ if (peek() === '*' && peek(1) === '/') {
131
+ advance(); advance();
132
+ break;
133
+ }
134
+ value += peek();
135
+ advance();
136
+ }
137
+ comments.push({
138
+ kind: 'block',
139
+ value: value.trim(),
140
+ line: startLine,
141
+ column: startCol,
142
+ startOffset,
143
+ endOffset: i,
144
+ });
145
+ continue;
146
+ }
147
+
148
+ advance();
149
+ }
150
+
151
+ return { comments, lineOffsets };
152
+ }