machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,1738 @@
1
+ """HIR to MIR Lowering.
2
+
3
+ This module implements the translation from HIR (desugared AST) to MIR
4
+ (Three-Address Code representation).
5
+ """
6
+
7
+ from machine_dialect.ast import (
8
+ ActionStatement,
9
+ Arguments,
10
+ ASTNode,
11
+ BlankLiteral,
12
+ BlockStatement,
13
+ CallStatement,
14
+ CollectionAccessExpression,
15
+ CollectionMutationStatement,
16
+ ConditionalExpression,
17
+ DefineStatement,
18
+ EmptyLiteral,
19
+ ErrorExpression,
20
+ ErrorStatement,
21
+ Expression,
22
+ ExpressionStatement,
23
+ FloatLiteral,
24
+ ForEachStatement,
25
+ FunctionStatement,
26
+ FunctionVisibility,
27
+ Identifier,
28
+ IfStatement,
29
+ InfixExpression,
30
+ InteractionStatement,
31
+ NamedListLiteral,
32
+ OrderedListLiteral,
33
+ Parameter,
34
+ PrefixExpression,
35
+ Program,
36
+ ReturnStatement,
37
+ SayStatement,
38
+ SetStatement,
39
+ Statement,
40
+ StringLiteral,
41
+ UnorderedListLiteral,
42
+ URLLiteral,
43
+ UtilityStatement,
44
+ WhileStatement,
45
+ WholeNumberLiteral,
46
+ YesNoLiteral,
47
+ )
48
+ from machine_dialect.mir.basic_block import BasicBlock
49
+ from machine_dialect.mir.debug_info import DebugInfoBuilder
50
+ from machine_dialect.mir.mir_function import MIRFunction
51
+ from machine_dialect.mir.mir_instructions import (
52
+ ArrayAppend,
53
+ ArrayClear,
54
+ ArrayCreate,
55
+ ArrayFindIndex,
56
+ ArrayGet,
57
+ ArrayInsert,
58
+ ArrayLength,
59
+ ArrayRemove,
60
+ ArraySet,
61
+ Assert,
62
+ BinaryOp,
63
+ Call,
64
+ ConditionalJump,
65
+ Copy,
66
+ DictCreate,
67
+ Jump,
68
+ LoadConst,
69
+ MIRInstruction,
70
+ Pop,
71
+ Print,
72
+ Return,
73
+ Scope,
74
+ Select,
75
+ StoreVar,
76
+ UnaryOp,
77
+ )
78
+ from machine_dialect.mir.mir_module import MIRModule
79
+ from machine_dialect.mir.mir_types import MIRType, MIRUnionType, ast_type_to_mir_type
80
+ from machine_dialect.mir.mir_values import (
81
+ Constant,
82
+ FunctionRef,
83
+ MIRValue,
84
+ ScopedVariable,
85
+ Temp,
86
+ Variable,
87
+ VariableScope,
88
+ )
89
+ from machine_dialect.mir.ssa_construction import construct_ssa
90
+ from machine_dialect.mir.type_inference import TypeInferencer, infer_ast_expression_type
91
+
92
+
93
+ class HIRToMIRLowering:
94
+ """Lowers HIR (desugared AST) to MIR representation."""
95
+
96
+ def __init__(self) -> None:
97
+ """Initialize the lowering context."""
98
+ self.module: MIRModule | None = None
99
+ self.current_function: MIRFunction | None = None
100
+ self.current_block: BasicBlock | None = None
101
+ self.variable_map: dict[str, Variable | ScopedVariable] = {}
102
+ self.label_counter = 0
103
+ self.type_context: dict[str, MIRType | MIRUnionType] = {} # Track variable types
104
+ self.union_type_context: dict[str, MIRUnionType] = {} # Track union types separately
105
+ self.debug_builder = DebugInfoBuilder() # Debug information tracking
106
+
107
+ def _add_instruction(self, instruction: "MIRInstruction", ast_node: ASTNode) -> None:
108
+ """Add an instruction to the current block with source location.
109
+
110
+ Args:
111
+ instruction: The MIR instruction to add.
112
+ ast_node: The AST node to extract location from (required).
113
+ """
114
+ location = ast_node.get_source_location()
115
+ if location is None:
116
+ # If the node doesn't have location info, raise an error
117
+ # This forces all nodes to have proper location tracking
118
+ raise ValueError(f"AST node {type(ast_node).__name__} missing source location")
119
+ instruction.source_location = location
120
+ if self.current_block is not None:
121
+ self.current_block.add_instruction(instruction)
122
+
123
+ def lower_program(self, program: Program, module_name: str = "__main__") -> MIRModule:
124
+ """Lower a complete program to MIR.
125
+
126
+ Args:
127
+ program: The HIR program to lower.
128
+
129
+ Returns:
130
+ The MIR module.
131
+ """
132
+ # Desugar the AST to HIR
133
+ hir = program.desugar()
134
+ if not isinstance(hir, Program):
135
+ raise TypeError("Expected Program after desugaring")
136
+
137
+ self.module = MIRModule(module_name)
138
+
139
+ # Separate functions from top-level statements
140
+ functions = []
141
+ top_level_statements = []
142
+
143
+ for stmt in hir.statements:
144
+ if isinstance(stmt, FunctionStatement | UtilityStatement | ActionStatement | InteractionStatement):
145
+ functions.append(stmt)
146
+ else:
147
+ top_level_statements.append(stmt)
148
+
149
+ # Process function definitions first
150
+ for func_stmt in functions:
151
+ self.lower_function(func_stmt)
152
+
153
+ # If there are top-level statements, create an implicit main function
154
+ if top_level_statements and not self.module.get_function("__main__"):
155
+ self._create_implicit_main(top_level_statements)
156
+
157
+ # Set main function if it exists
158
+ if self.module.get_function("__main__"):
159
+ self.module.set_main_function("__main__")
160
+
161
+ # Apply SSA construction to all functions
162
+ for func in self.module.functions.values():
163
+ construct_ssa(func)
164
+
165
+ # Apply type inference
166
+ inferencer = TypeInferencer()
167
+ inferencer.infer_module_types(self.module)
168
+
169
+ return self.module
170
+
171
+ def _create_implicit_main(self, statements: list[Statement]) -> None:
172
+ """Create an implicit main function for top-level statements.
173
+
174
+ Args:
175
+ statements: The top-level statements to include in main.
176
+ """
177
+ # Create main function
178
+ main = MIRFunction("__main__", [], MIRType.EMPTY)
179
+ self.current_function = main
180
+
181
+ # Create entry block
182
+ entry = BasicBlock("entry")
183
+ main.cfg.add_block(entry)
184
+ main.cfg.set_entry_block(entry)
185
+ self.current_block = entry
186
+
187
+ # Lower all top-level statements
188
+ last_stmt = None
189
+ for stmt in statements:
190
+ self.lower_statement(stmt)
191
+ last_stmt = stmt
192
+
193
+ # Add implicit return if needed
194
+ if not self.current_block.is_terminated():
195
+ # For implicit returns, we need a source location
196
+ # Use the location from the last statement
197
+ if last_stmt is None:
198
+ raise ValueError("Cannot create implicit return without any statements")
199
+ source_loc = last_stmt.get_source_location()
200
+ if source_loc is None:
201
+ raise ValueError("Last statement missing source location for implicit return")
202
+ return_inst = Return(source_loc)
203
+ return_inst.source_location = source_loc
204
+ if self.current_block is not None:
205
+ self.current_block.add_instruction(return_inst)
206
+
207
+ # Add main function to module
208
+ if self.module:
209
+ self.module.add_function(main)
210
+
211
+ # Reset context
212
+ self.current_function = None
213
+ self.current_block = None
214
+ self.variable_map = {}
215
+
216
+ def lower_statement(self, stmt: ASTNode) -> None:
217
+ """Lower a statement to MIR.
218
+
219
+ Args:
220
+ stmt: The statement to lower.
221
+ """
222
+ if isinstance(stmt, FunctionStatement | UtilityStatement | ActionStatement | InteractionStatement):
223
+ self.lower_function(stmt)
224
+ elif isinstance(stmt, DefineStatement):
225
+ self._convert_define_statement(stmt)
226
+ elif isinstance(stmt, SetStatement):
227
+ self.lower_set_statement(stmt)
228
+ elif isinstance(stmt, IfStatement):
229
+ self.lower_if_statement(stmt)
230
+ elif isinstance(stmt, ReturnStatement):
231
+ self.lower_return_statement(stmt)
232
+ elif isinstance(stmt, CallStatement):
233
+ self.lower_call_statement(stmt)
234
+ elif isinstance(stmt, SayStatement):
235
+ self.lower_say_statement(stmt)
236
+ elif isinstance(stmt, CollectionMutationStatement):
237
+ self.lower_collection_mutation(stmt)
238
+ elif isinstance(stmt, WhileStatement):
239
+ self.lower_while_statement(stmt)
240
+ elif isinstance(stmt, ForEachStatement):
241
+ # ForEachStatement should be desugared to while in HIR
242
+ # But if it reaches here, desugar and lower
243
+ desugared = stmt.desugar()
244
+ self.lower_statement(desugared)
245
+ elif isinstance(stmt, BlockStatement):
246
+ self.lower_block_statement(stmt)
247
+ elif isinstance(stmt, ExpressionStatement):
248
+ self.lower_expression_statement(stmt)
249
+ elif isinstance(stmt, ErrorStatement):
250
+ self.lower_error_statement(stmt)
251
+ else:
252
+ # Other statements can be handled as expressions
253
+ self.lower_expression(stmt)
254
+
255
+ def lower_function(
256
+ self,
257
+ func: FunctionStatement | UtilityStatement | ActionStatement | InteractionStatement,
258
+ ) -> None:
259
+ """Lower a function definition to MIR.
260
+
261
+ Args:
262
+ func: The function to lower (any type of function).
263
+ """
264
+ # Create parameter variables
265
+ params: list[Variable | ScopedVariable] = []
266
+ for param in func.inputs:
267
+ # Infer parameter type from default value if available
268
+ param_type: MIRType | MIRUnionType = MIRType.UNKNOWN
269
+ if isinstance(param, Parameter):
270
+ param_name = param.name.value if isinstance(param.name, Identifier) else str(param.name)
271
+ # Try to infer type from default value
272
+ if hasattr(param, "default_value") and param.default_value:
273
+ param_type = infer_ast_expression_type(param.default_value, self.type_context)
274
+ else:
275
+ param_name = str(param)
276
+
277
+ # If still unknown, will be inferred later from usage
278
+ # Parameters are always scoped as PARAMETER
279
+ var = ScopedVariable(param_name, VariableScope.PARAMETER, param_type)
280
+ params.append(var)
281
+ self.type_context[param_name] = param_type
282
+
283
+ # Track parameter for debugging
284
+ self.debug_builder.track_variable(param_name, var, str(param_type), is_parameter=True)
285
+
286
+ # Determine return type based on function type
287
+ # UtilityStatement = Function (returns value)
288
+ # ActionStatement = Private method (returns nothing)
289
+ # InteractionStatement = Public method (returns nothing)
290
+ # FunctionStatement has visibility attribute
291
+ if isinstance(func, UtilityStatement):
292
+ return_type = MIRType.UNKNOWN # Functions return values
293
+ elif isinstance(func, ActionStatement | InteractionStatement):
294
+ return_type = MIRType.EMPTY # Methods return nothing
295
+ elif isinstance(func, FunctionStatement):
296
+ return_type = MIRType.EMPTY if func.visibility != FunctionVisibility.FUNCTION else MIRType.UNKNOWN
297
+ else:
298
+ return_type = MIRType.UNKNOWN
299
+
300
+ # Get function name from Identifier
301
+ func_name = func.name.value if isinstance(func.name, Identifier) else str(func.name)
302
+
303
+ # Create MIR function
304
+ mir_func = MIRFunction(func_name, params, return_type)
305
+ self.current_function = mir_func
306
+
307
+ # Create entry block
308
+ entry = BasicBlock("entry")
309
+ mir_func.cfg.add_block(entry)
310
+ mir_func.cfg.set_entry_block(entry)
311
+ self.current_block = entry
312
+
313
+ # Initialize parameter variables
314
+ self.variable_map.clear()
315
+ param_var: Variable | ScopedVariable
316
+ for param_var in params:
317
+ self.variable_map[param_var.name] = param_var
318
+ mir_func.add_local(param_var)
319
+
320
+ # Lower function body
321
+ last_stmt = None
322
+ if func.body:
323
+ for stmt in func.body.statements:
324
+ self.lower_statement(stmt)
325
+ last_stmt = stmt
326
+
327
+ # Add implicit return if needed
328
+ if self.current_block and not self.current_block.is_terminated():
329
+ # Use function's source location for implicit return
330
+ source_loc = func.get_source_location()
331
+ if source_loc is None:
332
+ # If function has no location, try to use last statement's location
333
+ if last_stmt:
334
+ source_loc = last_stmt.get_source_location()
335
+ if source_loc is None:
336
+ raise ValueError(f"Function {func_name} missing source location for implicit return")
337
+
338
+ if return_type == MIRType.EMPTY:
339
+ return_inst = Return(source_loc)
340
+ return_inst.source_location = source_loc
341
+ if self.current_block is not None:
342
+ self.current_block.add_instruction(return_inst)
343
+ else:
344
+ # Return a default value
345
+ temp = self.current_function.new_temp(return_type)
346
+ load_inst = LoadConst(temp, None, source_loc)
347
+ if self.current_block is not None:
348
+ self.current_block.add_instruction(load_inst)
349
+ return_inst = Return(source_loc, temp)
350
+ return_inst.source_location = source_loc
351
+ if self.current_block is not None:
352
+ self.current_block.add_instruction(return_inst)
353
+
354
+ # Add function to module
355
+ if self.module:
356
+ self.module.add_function(mir_func)
357
+
358
+ self.current_function = None
359
+ self.current_block = None
360
+
361
+ def lower_set_statement(self, stmt: SetStatement) -> None:
362
+ """Lower a set statement to MIR with enhanced type tracking.
363
+
364
+ Args:
365
+ stmt: The set statement to lower.
366
+ """
367
+ if not self.current_function or not self.current_block:
368
+ return
369
+
370
+ # Get source location from the statement
371
+ source_loc = stmt.get_source_location()
372
+ if source_loc is None:
373
+ raise ValueError("SetStatement missing source location")
374
+
375
+ # Lower the value expression
376
+ if stmt.value is not None:
377
+ # Special handling for BlankLiteral - creates empty collection
378
+ if isinstance(stmt.value, BlankLiteral):
379
+ # We'll handle this after we know the variable's type
380
+ value = None # Placeholder, will be set based on variable type
381
+ else:
382
+ value = self.lower_expression(stmt.value)
383
+ else:
384
+ # This shouldn't happen but handle gracefully
385
+ value = Constant(None, MIRType.ERROR)
386
+
387
+ # Get or create variable
388
+ var_name = stmt.name.value if isinstance(stmt.name, Identifier) else str(stmt.name)
389
+ var: Variable | ScopedVariable
390
+ if var_name not in self.variable_map:
391
+ # Variable wasn't defined - this should be caught by semantic analysis
392
+ # Create with inferred type for error recovery
393
+ var_type = (
394
+ value.type
395
+ if value and hasattr(value, "type")
396
+ else infer_ast_expression_type(stmt.value, self.type_context)
397
+ if stmt.value and not isinstance(stmt.value, BlankLiteral)
398
+ else MIRType.ARRAY # Default to array for BlankLiteral
399
+ if isinstance(stmt.value, BlankLiteral)
400
+ else MIRType.UNKNOWN
401
+ )
402
+
403
+ # Check if we're inside a function (not __main__)
404
+ if self.current_function and self.current_function.name != "__main__":
405
+ # Check if this is a parameter
406
+ is_param = any(p.name == var_name for p in self.current_function.params)
407
+ if is_param:
408
+ # This shouldn't happen - parameters should already be in variable_map
409
+ var = ScopedVariable(var_name, VariableScope.PARAMETER, var_type)
410
+ else:
411
+ # This is a function-local variable
412
+ var = ScopedVariable(var_name, VariableScope.LOCAL, var_type)
413
+ else:
414
+ # This is a global variable (module-level)
415
+ var = ScopedVariable(var_name, VariableScope.GLOBAL, var_type)
416
+
417
+ self.variable_map[var_name] = var
418
+ self.current_function.add_local(var)
419
+ self.type_context[var_name] = var_type
420
+
421
+ # Track variable for debugging
422
+ self.debug_builder.track_variable(var_name, var, str(var_type), is_parameter=False)
423
+
424
+ # Handle BlankLiteral for new variable
425
+ if isinstance(stmt.value, BlankLiteral):
426
+ # Create empty collection based on inferred type
427
+ if var_type == MIRType.DICT:
428
+ # Create empty dictionary
429
+ dict_var = self.current_function.new_temp(MIRType.DICT)
430
+ self._add_instruction(DictCreate(dict_var, source_loc), stmt)
431
+ value = dict_var
432
+ else:
433
+ # Default to empty array
434
+ size = Constant(0, MIRType.INT)
435
+ temp_size = self.current_function.new_temp(MIRType.INT)
436
+ self._add_instruction(LoadConst(temp_size, size, source_loc), stmt)
437
+ array_var = self.current_function.new_temp(MIRType.ARRAY)
438
+ self._add_instruction(ArrayCreate(array_var, temp_size, source_loc), stmt)
439
+ value = array_var
440
+ else:
441
+ var = self.variable_map[var_name]
442
+
443
+ # Handle BlankLiteral based on variable type
444
+ if isinstance(stmt.value, BlankLiteral):
445
+ # Create empty collection based on variable type
446
+ if var.type == MIRType.ARRAY:
447
+ # Create empty array
448
+ size = Constant(0, MIRType.INT)
449
+ temp_size = self.current_function.new_temp(MIRType.INT)
450
+ self._add_instruction(LoadConst(temp_size, size, source_loc), stmt)
451
+ array_var = self.current_function.new_temp(MIRType.ARRAY)
452
+ self._add_instruction(ArrayCreate(array_var, temp_size, source_loc), stmt)
453
+ value = array_var
454
+ elif var.type == MIRType.DICT:
455
+ # Create empty dictionary
456
+ dict_var = self.current_function.new_temp(MIRType.DICT)
457
+ self._add_instruction(DictCreate(dict_var, source_loc), stmt)
458
+ value = dict_var
459
+ else:
460
+ # For other types or unknown types, default to empty array
461
+ size = Constant(0, MIRType.INT)
462
+ temp_size = self.current_function.new_temp(MIRType.INT)
463
+ self._add_instruction(LoadConst(temp_size, size, source_loc), stmt)
464
+ array_var = self.current_function.new_temp(MIRType.ARRAY)
465
+ self._add_instruction(ArrayCreate(array_var, temp_size, source_loc), stmt)
466
+ value = array_var
467
+
468
+ # For union types, track the actual runtime type being assigned
469
+ if var_name in self.union_type_context:
470
+ if value and hasattr(value, "type") and value.type != MIRType.UNKNOWN:
471
+ # This assignment narrows the type for flow-sensitive analysis
472
+ # Store this info for optimization passes
473
+ if hasattr(var, "runtime_type"):
474
+ var.runtime_type = value.type
475
+ else:
476
+ # Update type context if we have better type info
477
+ if value and hasattr(value, "type") and value.type != MIRType.UNKNOWN:
478
+ self.type_context[var_name] = value.type
479
+
480
+ # If the value is a constant, load it into a temporary first
481
+ if value and isinstance(value, Constant):
482
+ # Create a temporary variable for the constant
483
+ temp = self.current_function.new_temp(value.type)
484
+ self._add_instruction(LoadConst(temp, value, source_loc), stmt)
485
+ # Use the temp as the source
486
+ value = temp
487
+
488
+ # Store the value (value should always be set by now)
489
+ if value:
490
+ self._add_instruction(StoreVar(var, value, source_loc), stmt)
491
+
492
+ def _convert_define_statement(self, stmt: DefineStatement) -> None:
493
+ """Convert DefineStatement to MIR with enhanced type tracking.
494
+
495
+ Args:
496
+ stmt: DefineStatement from HIR
497
+ """
498
+ if not self.current_function:
499
+ return
500
+
501
+ var_name = stmt.name.value if isinstance(stmt.name, Identifier) else str(stmt.name)
502
+
503
+ # Convert type specification to MIR type
504
+ mir_type = ast_type_to_mir_type(stmt.type_spec)
505
+
506
+ # Create typed variable in MIR
507
+ # Check if we're inside a function (not __main__) to determine scope
508
+ if self.current_function and self.current_function.name != "__main__":
509
+ # This is a function-local variable
510
+ scope = VariableScope.LOCAL
511
+ else:
512
+ # This is a global variable (module-level)
513
+ scope = VariableScope.GLOBAL
514
+
515
+ if isinstance(mir_type, MIRUnionType):
516
+ # For union types, track both the union and create a variable with UNKNOWN type
517
+ # The actual type will be refined during type inference and optimization
518
+ var = ScopedVariable(var_name, scope, MIRType.UNKNOWN)
519
+
520
+ # Store the union type information separately for optimization passes
521
+ self.union_type_context[var_name] = mir_type
522
+ self.type_context[var_name] = MIRType.UNKNOWN
523
+
524
+ # Add metadata to the variable for optimization passes
525
+ var.union_type = mir_type
526
+ else:
527
+ # Single type - use it directly
528
+ var = ScopedVariable(var_name, scope, mir_type)
529
+ self.type_context[var_name] = mir_type
530
+
531
+ # Register in variable map with type
532
+ self.variable_map[var_name] = var
533
+
534
+ # Add to function locals
535
+ self.current_function.add_local(var)
536
+
537
+ # Track variable for debugging with full type information
538
+ self.debug_builder.track_variable(var_name, var, str(mir_type), is_parameter=False)
539
+
540
+ # If there's an initial value (shouldn't happen after HIR desugaring but handle it)
541
+ if stmt.initial_value:
542
+ # This case shouldn't occur as HIR desugars default values
543
+ # But handle it for completeness
544
+ if self.current_block:
545
+ source_loc = stmt.get_source_location()
546
+ if source_loc is None:
547
+ raise ValueError("DefineStatement missing source location")
548
+
549
+ value = self.lower_expression(stmt.initial_value)
550
+
551
+ # If the value is a constant, load it into a temporary first
552
+ if isinstance(value, Constant):
553
+ temp = self.current_function.new_temp(value.type)
554
+ self._add_instruction(LoadConst(temp, value, source_loc), stmt)
555
+ value = temp
556
+
557
+ # Store the value
558
+ self._add_instruction(StoreVar(var, value, source_loc), stmt)
559
+
560
+ def lower_if_statement(self, stmt: IfStatement) -> None:
561
+ """Lower an if statement to MIR.
562
+
563
+ Args:
564
+ stmt: The if statement to lower.
565
+ """
566
+ if not self.current_function or not self.current_block:
567
+ return
568
+
569
+ # Get source location from the statement
570
+ source_loc = stmt.get_source_location()
571
+ if source_loc is None:
572
+ raise ValueError("IfStatement missing source location")
573
+
574
+ # Lower condition
575
+ if stmt.condition is not None:
576
+ condition = self.lower_expression(stmt.condition)
577
+ else:
578
+ # Should not happen - if statements always have conditions
579
+ raise ValueError("If statement missing condition")
580
+
581
+ # Load constant into temporary if needed
582
+ if isinstance(condition, Constant):
583
+ temp = self.current_function.new_temp(condition.type)
584
+ self._add_instruction(LoadConst(temp, condition, source_loc), stmt)
585
+ condition = temp
586
+
587
+ # Create blocks
588
+ then_label = self.generate_label("then")
589
+ else_label = self.generate_label("else") if stmt.alternative else None
590
+ merge_label = self.generate_label("merge")
591
+
592
+ then_block = BasicBlock(then_label)
593
+ merge_block = BasicBlock(merge_label)
594
+ self.current_function.cfg.add_block(then_block)
595
+ self.current_function.cfg.add_block(merge_block)
596
+
597
+ if else_label:
598
+ else_block = BasicBlock(else_label)
599
+ self.current_function.cfg.add_block(else_block)
600
+
601
+ # Add conditional jump
602
+ self._add_instruction(ConditionalJump(condition, then_label, source_loc, else_label), stmt)
603
+ self.current_function.cfg.connect(self.current_block, then_block)
604
+ self.current_function.cfg.connect(self.current_block, else_block)
605
+ else:
606
+ # Jump to then block if true, otherwise to merge
607
+ self._add_instruction(ConditionalJump(condition, then_label, source_loc, merge_label), stmt)
608
+ self.current_function.cfg.connect(self.current_block, then_block)
609
+ self.current_function.cfg.connect(self.current_block, merge_block)
610
+
611
+ # Lower then block
612
+ self.current_block = then_block
613
+ if stmt.consequence:
614
+ for s in stmt.consequence.statements:
615
+ self.lower_statement(s)
616
+
617
+ # Add jump to merge if not terminated
618
+ if not self.current_block.is_terminated():
619
+ self._add_instruction(Jump(merge_label, source_loc), stmt)
620
+ self.current_function.cfg.connect(self.current_block, merge_block)
621
+
622
+ # Lower else block if present
623
+ if else_label and stmt.alternative:
624
+ self.current_block = else_block
625
+ for s in stmt.alternative.statements:
626
+ self.lower_statement(s)
627
+
628
+ # Add jump to merge if not terminated
629
+ if not self.current_block.is_terminated():
630
+ self._add_instruction(Jump(merge_label, source_loc), stmt)
631
+ self.current_function.cfg.connect(self.current_block, merge_block)
632
+
633
+ # Continue with merge block
634
+ self.current_block = merge_block
635
+
636
+ def lower_while_statement(self, stmt: WhileStatement) -> None:
637
+ """Lower a while statement to MIR.
638
+
639
+ Args:
640
+ stmt: The while statement to lower.
641
+ """
642
+ if not self.current_function or not self.current_block:
643
+ return
644
+
645
+ # Get source location from the statement
646
+ source_loc = stmt.get_source_location()
647
+ if source_loc is None:
648
+ source_loc = (0, 0) # Default location for while statements
649
+
650
+ # Create blocks for the while loop
651
+ loop_header_label = self.generate_label("while_header")
652
+ loop_body_label = self.generate_label("while_body")
653
+ loop_exit_label = self.generate_label("while_exit")
654
+
655
+ loop_header = BasicBlock(loop_header_label)
656
+ loop_body = BasicBlock(loop_body_label)
657
+ loop_exit = BasicBlock(loop_exit_label)
658
+
659
+ self.current_function.cfg.add_block(loop_header)
660
+ self.current_function.cfg.add_block(loop_body)
661
+ self.current_function.cfg.add_block(loop_exit)
662
+
663
+ # Jump to loop header from current block
664
+ self._add_instruction(Jump(loop_header_label, source_loc), stmt)
665
+ self.current_function.cfg.connect(self.current_block, loop_header)
666
+
667
+ # Switch to loop header block
668
+ self.current_block = loop_header
669
+
670
+ # Lower and evaluate the condition
671
+ if stmt.condition is not None:
672
+ condition = self.lower_expression(stmt.condition)
673
+ else:
674
+ raise ValueError("While statement missing condition")
675
+
676
+ # Load constant into temporary if needed
677
+ if isinstance(condition, Constant):
678
+ temp = self.current_function.new_temp(condition.type)
679
+ self._add_instruction(LoadConst(temp, condition, source_loc), stmt)
680
+ condition = temp
681
+
682
+ # Add conditional jump: if condition true, go to body, else exit
683
+ self._add_instruction(ConditionalJump(condition, loop_body_label, source_loc, loop_exit_label), stmt)
684
+ self.current_function.cfg.connect(self.current_block, loop_body)
685
+ self.current_function.cfg.connect(self.current_block, loop_exit)
686
+
687
+ # Lower the loop body
688
+ self.current_block = loop_body
689
+ if stmt.body:
690
+ for s in stmt.body.statements:
691
+ self.lower_statement(s)
692
+
693
+ # Jump back to loop header at end of body
694
+ if not self.current_block.is_terminated():
695
+ self._add_instruction(Jump(loop_header_label, source_loc), stmt)
696
+ self.current_function.cfg.connect(self.current_block, loop_header)
697
+
698
+ # Continue with exit block
699
+ self.current_block = loop_exit
700
+
701
+ def lower_return_statement(self, stmt: ReturnStatement) -> None:
702
+ """Lower a return statement to MIR.
703
+
704
+ Args:
705
+ stmt: The return statement to lower.
706
+ """
707
+ if not self.current_block:
708
+ return
709
+
710
+ # Get source location from the statement
711
+ source_loc = stmt.get_source_location()
712
+ if source_loc is None:
713
+ raise ValueError("ReturnStatement missing source location")
714
+
715
+ if stmt.return_value:
716
+ value = self.lower_expression(stmt.return_value)
717
+
718
+ # Load constant into temporary if needed
719
+ if isinstance(value, Constant):
720
+ if self.current_function is None:
721
+ raise RuntimeError("No current function context")
722
+ temp = self.current_function.new_temp(value.type)
723
+ self._add_instruction(LoadConst(temp, value, source_loc), stmt)
724
+ value = temp
725
+
726
+ self._add_instruction(Return(source_loc, value), stmt)
727
+ else:
728
+ self._add_instruction(Return(source_loc), stmt)
729
+
730
+ def lower_call_statement(self, stmt: CallStatement) -> None:
731
+ """Lower a call statement to MIR.
732
+
733
+ Args:
734
+ stmt: The call statement to lower.
735
+ """
736
+ if not self.current_block or not self.current_function:
737
+ return
738
+
739
+ # Get source location from the statement
740
+ source_loc = stmt.get_source_location()
741
+ if source_loc is None:
742
+ raise ValueError("CallStatement missing source location")
743
+
744
+ # Lower arguments
745
+ args = []
746
+ if stmt.arguments:
747
+ if isinstance(stmt.arguments, Arguments):
748
+ # Handle positional arguments
749
+ if hasattr(stmt.arguments, "positional") and stmt.arguments.positional:
750
+ for arg in stmt.arguments.positional:
751
+ val = self.lower_expression(arg)
752
+ # Load constants into temporaries if needed
753
+ if isinstance(val, Constant):
754
+ temp = self.current_function.new_temp(val.type)
755
+ self._add_instruction(LoadConst(temp, val, source_loc), stmt)
756
+ val = temp
757
+ args.append(val)
758
+
759
+ # Handle named arguments - convert to positional for now
760
+ # In a full implementation, we'd need to match these with parameter names
761
+ if hasattr(stmt.arguments, "named") and stmt.arguments.named:
762
+ for _name, arg in stmt.arguments.named:
763
+ val = self.lower_expression(arg)
764
+ # Load constants into temporaries if needed
765
+ if isinstance(val, Constant):
766
+ temp = self.current_function.new_temp(val.type)
767
+ self._add_instruction(LoadConst(temp, val, source_loc), stmt)
768
+ val = temp
769
+ args.append(val)
770
+ else:
771
+ # Single argument not wrapped in Arguments
772
+ val = self.lower_expression(stmt.arguments)
773
+ if isinstance(val, Constant):
774
+ temp = self.current_function.new_temp(val.type)
775
+ self._add_instruction(LoadConst(temp, val, source_loc), stmt)
776
+ val = temp
777
+ args.append(val)
778
+
779
+ # Get function name from expression
780
+ func_name = ""
781
+ if isinstance(stmt.function_name, StringLiteral):
782
+ func_name = stmt.function_name.value.strip('"').strip("'")
783
+ elif isinstance(stmt.function_name, Identifier):
784
+ func_name = stmt.function_name.value
785
+ else:
786
+ func_name = str(stmt.function_name)
787
+
788
+ # Create function reference
789
+ func_ref = FunctionRef(func_name)
790
+
791
+ # Call without storing result (void call)
792
+ source_loc = stmt.get_source_location()
793
+ if source_loc is None:
794
+ raise ValueError("CallStatement missing source location")
795
+ call_inst = Call(None, func_ref, args, source_loc)
796
+ self._add_instruction(call_inst, stmt)
797
+
798
+ def lower_say_statement(self, stmt: SayStatement) -> None:
799
+ """Lower a say statement to MIR.
800
+
801
+ Args:
802
+ stmt: The say statement to lower.
803
+ """
804
+ if not self.current_block:
805
+ return
806
+
807
+ # Get source location from the statement
808
+ source_loc = stmt.get_source_location()
809
+ if source_loc is None:
810
+ raise ValueError("SayStatement missing source location")
811
+
812
+ # Lower the expression to print
813
+ if stmt.expression:
814
+ value = self.lower_expression(stmt.expression)
815
+ # Load constant into temporary if needed
816
+ if isinstance(value, Constant):
817
+ if self.current_function is None:
818
+ raise RuntimeError("No current function context")
819
+ temp = self.current_function.new_temp(value.type)
820
+ self._add_instruction(LoadConst(temp, value, source_loc), stmt)
821
+ value = temp
822
+ self._add_instruction(Print(value, source_loc), stmt)
823
+
824
+ def lower_collection_mutation(self, stmt: CollectionMutationStatement) -> None:
825
+ """Lower a collection mutation statement to MIR.
826
+
827
+ Handles operations like:
828
+ Arrays (Ordered/Unordered Lists):
829
+ - Add _value_ to list
830
+ - Remove _value_ from list
831
+ - Set the second item of list to _value_
832
+ - Insert _value_ at position _3_ in list
833
+ - Clear list
834
+
835
+ Named Lists (Dictionaries):
836
+ - Add "key" to dict with value _value_
837
+ - Remove "key" from dict
838
+ - Update "key" in dict to _value_
839
+ - Clear dict
840
+
841
+ Args:
842
+ stmt: The collection mutation statement to lower.
843
+ """
844
+ if not self.current_block or not self.current_function:
845
+ return
846
+
847
+ # Get source location from the statement
848
+ source_loc = stmt.get_source_location()
849
+ if source_loc is None:
850
+ source_loc = (1, 1)
851
+
852
+ # Lower the collection expression
853
+ collection = self.lower_expression(stmt.collection)
854
+
855
+ # Ensure collection is loaded into a temp if it's a variable
856
+ if isinstance(collection, Variable):
857
+ temp_collection = self.current_function.new_temp(collection.type)
858
+ self._add_instruction(Copy(temp_collection, collection, source_loc), stmt)
859
+ collection = temp_collection
860
+
861
+ # Determine if this is a dictionary operation based on position_type
862
+ is_dict_operation = stmt.position_type == "key" or (
863
+ collection.type == MIRType.DICT if hasattr(collection, "type") else False
864
+ )
865
+
866
+ # Handle different operations
867
+ if stmt.operation == "add":
868
+ if is_dict_operation and stmt.position:
869
+ # Dictionary: Add "key" to dict with value _value_
870
+ # Import dictionary instructions
871
+ from machine_dialect.mir.mir_instructions import DictSet
872
+
873
+ # Lower the key (stored in position field)
874
+ # Convert position to appropriate AST node if it's a raw value
875
+ position_node: Expression | None
876
+ if isinstance(stmt.position, str):
877
+ position_node = StringLiteral(token=stmt.token, value=stmt.position)
878
+ elif isinstance(stmt.position, int):
879
+ position_node = WholeNumberLiteral(token=stmt.token, value=stmt.position)
880
+ else:
881
+ position_node = stmt.position
882
+
883
+ if position_node:
884
+ key = self.lower_expression(position_node)
885
+ else:
886
+ # Should not happen but handle gracefully
887
+ key = Constant("", MIRType.STRING)
888
+
889
+ if isinstance(key, Constant):
890
+ temp_key = self.current_function.new_temp(MIRType.STRING)
891
+ self._add_instruction(LoadConst(temp_key, key, source_loc), stmt)
892
+ key = temp_key
893
+
894
+ # Lower the value
895
+ if stmt.value:
896
+ value = self.lower_expression(stmt.value)
897
+ if isinstance(value, Constant):
898
+ temp_value = self.current_function.new_temp(value.type)
899
+ self._add_instruction(LoadConst(temp_value, value, source_loc), stmt)
900
+ value = temp_value
901
+
902
+ # Use DictSet to add key-value pair
903
+ self._add_instruction(DictSet(collection, key, value, source_loc), stmt)
904
+ else:
905
+ # Array: Add _value_ to list
906
+ if stmt.value:
907
+ value = self.lower_expression(stmt.value)
908
+
909
+ # Load constant into temp if needed
910
+ if isinstance(value, Constant):
911
+ temp_value = self.current_function.new_temp(value.type)
912
+ self._add_instruction(LoadConst(temp_value, value, source_loc), stmt)
913
+ value = temp_value
914
+
915
+ # Use ArrayAppend instruction
916
+ self._add_instruction(ArrayAppend(collection, value, source_loc), stmt)
917
+
918
+ elif stmt.operation == "set":
919
+ # Set operation: array[index] = value
920
+ if stmt.value and stmt.position is not None:
921
+ value = self.lower_expression(stmt.value)
922
+
923
+ # Load value constant into temp if needed
924
+ if isinstance(value, Constant):
925
+ temp_value = self.current_function.new_temp(value.type)
926
+ self._add_instruction(LoadConst(temp_value, value, source_loc), stmt)
927
+ value = temp_value
928
+
929
+ # Handle position
930
+ if isinstance(stmt.position, int):
931
+ # Integer indices are already 0-based from HIR
932
+ index = Constant(stmt.position, MIRType.INT)
933
+ temp_index = self.current_function.new_temp(MIRType.INT)
934
+ self._add_instruction(LoadConst(temp_index, index, source_loc), stmt)
935
+ elif isinstance(stmt.position, str) and stmt.position == "last":
936
+ # Special case for "last" - get array length - 1
937
+ length_temp = self.current_function.new_temp(MIRType.INT)
938
+ self._add_instruction(ArrayLength(length_temp, collection, source_loc), stmt)
939
+
940
+ # Subtract 1
941
+ one = Constant(1, MIRType.INT)
942
+ temp_one = self.current_function.new_temp(MIRType.INT)
943
+ self._add_instruction(LoadConst(temp_one, one, source_loc), stmt)
944
+
945
+ temp_index = self.current_function.new_temp(MIRType.INT)
946
+ self._add_instruction(BinaryOp(temp_index, "-", length_temp, temp_one, source_loc), stmt)
947
+ else:
948
+ # Expression-based indices need to subtract 1 (convert from 1-based to 0-based)
949
+ if isinstance(stmt.position, Expression):
950
+ index_value = self.lower_expression(stmt.position)
951
+ if isinstance(index_value, Constant):
952
+ temp_expr = self.current_function.new_temp(MIRType.INT)
953
+ self._add_instruction(LoadConst(temp_expr, index_value, source_loc), stmt)
954
+ elif isinstance(index_value, Temp):
955
+ temp_expr = index_value
956
+ else:
957
+ # Handle other MIRValue types
958
+ temp_expr = self.current_function.new_temp(MIRType.INT)
959
+ self._add_instruction(Copy(temp_expr, index_value, source_loc), stmt)
960
+
961
+ # Subtract 1 to convert from 1-based to 0-based
962
+ one = Constant(1, MIRType.INT)
963
+ temp_one = self.current_function.new_temp(MIRType.INT)
964
+ self._add_instruction(LoadConst(temp_one, one, source_loc), stmt)
965
+
966
+ temp_index = self.current_function.new_temp(MIRType.INT)
967
+ self._add_instruction(BinaryOp(temp_index, "-", temp_expr, temp_one, source_loc), stmt)
968
+ else:
969
+ # This shouldn't happen if HIR is correct, but handle gracefully
970
+ temp_index = self.current_function.new_temp(MIRType.INT)
971
+ self._add_instruction(LoadConst(temp_index, Constant(0, MIRType.INT), source_loc), stmt)
972
+
973
+ # Perform the array set
974
+ self._add_instruction(ArraySet(collection, temp_index, value, source_loc), stmt)
975
+
976
+ elif stmt.operation == "remove":
977
+ if is_dict_operation:
978
+ # Dictionary: Remove "key" from dict
979
+ from machine_dialect.mir.mir_instructions import DictRemove
980
+
981
+ if stmt.value:
982
+ # The key is stored in the value field for remove operations
983
+ key = self.lower_expression(stmt.value)
984
+ if isinstance(key, Constant):
985
+ temp_key = self.current_function.new_temp(MIRType.STRING)
986
+ self._add_instruction(LoadConst(temp_key, key, source_loc), stmt)
987
+ key = temp_key
988
+
989
+ # Use DictRemove to remove the key
990
+ self._add_instruction(DictRemove(collection, key, source_loc), stmt)
991
+ elif stmt.position is not None:
992
+ # Remove by position
993
+ if isinstance(stmt.position, int):
994
+ # Integer indices are already 0-based from HIR
995
+ index = Constant(stmt.position, MIRType.INT)
996
+ temp_index = self.current_function.new_temp(MIRType.INT)
997
+ self._add_instruction(LoadConst(temp_index, index, source_loc), stmt)
998
+ elif isinstance(stmt.position, str) and stmt.position == "last":
999
+ # Special case for "last"
1000
+ length_temp = self.current_function.new_temp(MIRType.INT)
1001
+ self._add_instruction(ArrayLength(length_temp, collection, source_loc), stmt)
1002
+
1003
+ # Subtract 1
1004
+ one = Constant(1, MIRType.INT)
1005
+ temp_one = self.current_function.new_temp(MIRType.INT)
1006
+ self._add_instruction(LoadConst(temp_one, one, source_loc), stmt)
1007
+
1008
+ temp_index = self.current_function.new_temp(MIRType.INT)
1009
+ self._add_instruction(BinaryOp(temp_index, "-", length_temp, temp_one, source_loc), stmt)
1010
+ else:
1011
+ # Expression-based indices need to subtract 1 (convert from 1-based to 0-based)
1012
+ if isinstance(stmt.position, Expression):
1013
+ index_value = self.lower_expression(stmt.position)
1014
+ if isinstance(index_value, Constant):
1015
+ temp_expr = self.current_function.new_temp(MIRType.INT)
1016
+ self._add_instruction(LoadConst(temp_expr, index_value, source_loc), stmt)
1017
+ elif isinstance(index_value, Temp):
1018
+ temp_expr = index_value
1019
+ else:
1020
+ temp_expr = self.current_function.new_temp(MIRType.INT)
1021
+ self._add_instruction(Copy(temp_expr, index_value, source_loc), stmt)
1022
+
1023
+ # Subtract 1 to convert from 1-based to 0-based
1024
+ one = Constant(1, MIRType.INT)
1025
+ temp_one = self.current_function.new_temp(MIRType.INT)
1026
+ self._add_instruction(LoadConst(temp_one, one, source_loc), stmt)
1027
+
1028
+ temp_index = self.current_function.new_temp(MIRType.INT)
1029
+ self._add_instruction(BinaryOp(temp_index, "-", temp_expr, temp_one, source_loc), stmt)
1030
+ else:
1031
+ # Default to removing first element
1032
+ temp_index = self.current_function.new_temp(MIRType.INT)
1033
+ self._add_instruction(LoadConst(temp_index, Constant(0, MIRType.INT), source_loc), stmt)
1034
+
1035
+ # Perform the array remove
1036
+ self._add_instruction(ArrayRemove(collection, temp_index, source_loc), stmt)
1037
+ elif stmt.value:
1038
+ # Remove by value - find the value's index first, then remove it
1039
+ value = self.lower_expression(stmt.value)
1040
+
1041
+ # Load value constant into temp if needed
1042
+ if isinstance(value, Constant):
1043
+ temp_value = self.current_function.new_temp(value.type)
1044
+ self._add_instruction(LoadConst(temp_value, value, source_loc), stmt)
1045
+ value = temp_value
1046
+
1047
+ # Find the index of the value in the array
1048
+ temp_index = self.current_function.new_temp(MIRType.INT)
1049
+ self._add_instruction(ArrayFindIndex(temp_index, collection, value, source_loc), stmt)
1050
+
1051
+ # Now we need to check if the index is valid (not -1)
1052
+ # and only remove if found. For simplicity, we'll always call remove
1053
+ # The VM should handle the -1 case gracefully (no-op or error)
1054
+ self._add_instruction(ArrayRemove(collection, temp_index, source_loc), stmt)
1055
+
1056
+ elif stmt.operation == "insert":
1057
+ # Insert operation: insert at specific position
1058
+ if stmt.value and stmt.position is not None:
1059
+ value = self.lower_expression(stmt.value)
1060
+
1061
+ # Load value constant into temp if needed
1062
+ if isinstance(value, Constant):
1063
+ temp_value = self.current_function.new_temp(value.type)
1064
+ self._add_instruction(LoadConst(temp_value, value, source_loc), stmt)
1065
+ value = temp_value
1066
+
1067
+ # Handle position
1068
+ if isinstance(stmt.position, int):
1069
+ # Integer indices are already 0-based from HIR
1070
+ index = Constant(stmt.position, MIRType.INT)
1071
+ temp_index = self.current_function.new_temp(MIRType.INT)
1072
+ self._add_instruction(LoadConst(temp_index, index, source_loc), stmt)
1073
+ elif isinstance(stmt.position, str) and stmt.position == "last":
1074
+ # Insert at the end (same as append)
1075
+ length_temp = self.current_function.new_temp(MIRType.INT)
1076
+ self._add_instruction(ArrayLength(length_temp, collection, source_loc), stmt)
1077
+ temp_index = length_temp
1078
+ else:
1079
+ # Expression-based indices need to subtract 1 (convert from 1-based to 0-based)
1080
+ if isinstance(stmt.position, Expression):
1081
+ index_value = self.lower_expression(stmt.position)
1082
+ if isinstance(index_value, Constant):
1083
+ temp_expr = self.current_function.new_temp(MIRType.INT)
1084
+ self._add_instruction(LoadConst(temp_expr, index_value, source_loc), stmt)
1085
+ elif isinstance(index_value, Temp):
1086
+ temp_expr = index_value
1087
+ else:
1088
+ temp_expr = self.current_function.new_temp(MIRType.INT)
1089
+ self._add_instruction(Copy(temp_expr, index_value, source_loc), stmt)
1090
+
1091
+ # Subtract 1 to convert from 1-based to 0-based
1092
+ one = Constant(1, MIRType.INT)
1093
+ temp_one = self.current_function.new_temp(MIRType.INT)
1094
+ self._add_instruction(LoadConst(temp_one, one, source_loc), stmt)
1095
+
1096
+ temp_index = self.current_function.new_temp(MIRType.INT)
1097
+ self._add_instruction(BinaryOp(temp_index, "-", temp_expr, temp_one, source_loc), stmt)
1098
+ else:
1099
+ # Default to inserting at beginning
1100
+ temp_index = self.current_function.new_temp(MIRType.INT)
1101
+ self._add_instruction(LoadConst(temp_index, Constant(0, MIRType.INT), source_loc), stmt)
1102
+
1103
+ # Perform the array insert
1104
+ self._add_instruction(ArrayInsert(collection, temp_index, value, source_loc), stmt)
1105
+
1106
+ elif stmt.operation == "update":
1107
+ # Update operation: only for dictionaries
1108
+ if is_dict_operation and stmt.position and stmt.value:
1109
+ from machine_dialect.mir.mir_instructions import DictSet
1110
+
1111
+ # Lower the key (stored in position field)
1112
+ # Convert position to appropriate AST node if it's a raw value
1113
+ update_position_node: Expression | None
1114
+ if isinstance(stmt.position, str):
1115
+ update_position_node = StringLiteral(token=stmt.token, value=stmt.position)
1116
+ elif isinstance(stmt.position, int):
1117
+ update_position_node = WholeNumberLiteral(token=stmt.token, value=stmt.position)
1118
+ else:
1119
+ update_position_node = stmt.position
1120
+
1121
+ if update_position_node:
1122
+ key = self.lower_expression(update_position_node)
1123
+ else:
1124
+ # Should not happen but handle gracefully
1125
+ key = Constant("", MIRType.STRING)
1126
+
1127
+ if isinstance(key, Constant):
1128
+ temp_key = self.current_function.new_temp(MIRType.STRING)
1129
+ self._add_instruction(LoadConst(temp_key, key, source_loc), stmt)
1130
+ key = temp_key
1131
+
1132
+ # Lower the value
1133
+ value = self.lower_expression(stmt.value)
1134
+ if isinstance(value, Constant):
1135
+ temp_value = self.current_function.new_temp(value.type)
1136
+ self._add_instruction(LoadConst(temp_value, value, source_loc), stmt)
1137
+ value = temp_value
1138
+
1139
+ # Use DictSet to update the key-value pair
1140
+ self._add_instruction(DictSet(collection, key, value, source_loc), stmt)
1141
+
1142
+ elif stmt.operation == "clear":
1143
+ # Clear operation: works for both arrays and dictionaries
1144
+ if is_dict_operation or collection.type == MIRType.DICT if hasattr(collection, "type") else False:
1145
+ from machine_dialect.mir.mir_instructions import DictClear
1146
+
1147
+ self._add_instruction(DictClear(collection, source_loc), stmt)
1148
+ else:
1149
+ self._add_instruction(ArrayClear(collection, source_loc), stmt)
1150
+
1151
+ def lower_block_statement(self, stmt: BlockStatement) -> None:
1152
+ """Lower a block statement to MIR.
1153
+
1154
+ Args:
1155
+ stmt: The block statement to lower.
1156
+ """
1157
+ if not self.current_block:
1158
+ return
1159
+
1160
+ # Get source location from the statement
1161
+ source_loc = stmt.get_source_location()
1162
+ if source_loc is None:
1163
+ raise ValueError("BlockStatement missing source location")
1164
+
1165
+ # Add scope begin instruction
1166
+ self._add_instruction(Scope(source_loc, is_begin=True), stmt)
1167
+
1168
+ # Lower all statements in the block
1169
+ for s in stmt.statements:
1170
+ self.lower_statement(s)
1171
+
1172
+ # Add scope end instruction
1173
+ # Always add end scope - it's safe even if block is terminated
1174
+ if self.current_block:
1175
+ self._add_instruction(Scope(source_loc, is_begin=False), stmt)
1176
+
1177
+ def lower_expression_statement(self, stmt: ExpressionStatement) -> None:
1178
+ """Lower an expression statement to MIR.
1179
+
1180
+ Args:
1181
+ stmt: The expression statement to lower.
1182
+ """
1183
+ if not self.current_block:
1184
+ return
1185
+
1186
+ # Get source location from the statement
1187
+ source_loc = stmt.get_source_location()
1188
+ if source_loc is None:
1189
+ raise ValueError("ExpressionStatement missing source location")
1190
+
1191
+ # Lower the expression and discard the result
1192
+ if stmt.expression:
1193
+ result = self.lower_expression(stmt.expression)
1194
+ # Generate a Pop instruction to discard the unused result
1195
+ if result is not None:
1196
+ self._add_instruction(Pop(result, source_loc), stmt)
1197
+
1198
+ def lower_error_statement(self, stmt: ErrorStatement) -> None:
1199
+ """Lower an error statement to MIR.
1200
+
1201
+ Args:
1202
+ stmt: The error statement to lower.
1203
+ """
1204
+ if not self.current_block or not self.current_function:
1205
+ return
1206
+
1207
+ # Get source location from the statement
1208
+ source_loc = stmt.get_source_location()
1209
+ if source_loc is None:
1210
+ raise ValueError("ErrorStatement missing source location")
1211
+
1212
+ # Generate an assert with error message
1213
+ # This will fail at runtime with the parse error
1214
+ error_msg = f"Parse error: {stmt.message}"
1215
+ false_val = Constant(False, MIRType.BOOL)
1216
+ self._add_instruction(Assert(false_val, source_loc, error_msg), stmt)
1217
+
1218
+ def lower_expression(self, expr: ASTNode) -> MIRValue:
1219
+ """Lower an expression to MIR.
1220
+
1221
+ Args:
1222
+ expr: The expression to lower.
1223
+
1224
+ Returns:
1225
+ The MIR value representing the expression result.
1226
+ """
1227
+ if not self.current_function or not self.current_block:
1228
+ return Constant(None)
1229
+
1230
+ # Handle literals
1231
+ if isinstance(expr, WholeNumberLiteral):
1232
+ return Constant(expr.value, MIRType.INT)
1233
+ elif isinstance(expr, FloatLiteral):
1234
+ return Constant(expr.value, MIRType.FLOAT)
1235
+ elif isinstance(expr, StringLiteral):
1236
+ return Constant(expr.value, MIRType.STRING)
1237
+ elif isinstance(expr, YesNoLiteral):
1238
+ return Constant(expr.value, MIRType.BOOL)
1239
+ elif isinstance(expr, EmptyLiteral):
1240
+ return Constant(None, MIRType.EMPTY)
1241
+ elif isinstance(expr, URLLiteral):
1242
+ return Constant(expr.value, MIRType.URL)
1243
+
1244
+ # Handle list literals
1245
+ elif isinstance(expr, UnorderedListLiteral | OrderedListLiteral):
1246
+ # Get source location
1247
+ source_loc = expr.get_source_location()
1248
+ if source_loc is None:
1249
+ source_loc = (1, 1)
1250
+
1251
+ # Create array with size
1252
+ size = Constant(len(expr.elements), MIRType.INT)
1253
+ # Load size constant into register for proper constant pool usage
1254
+ temp_size = self.current_function.new_temp(MIRType.INT)
1255
+ self._add_instruction(LoadConst(temp_size, size, source_loc), expr)
1256
+
1257
+ array_var = self.current_function.new_temp(MIRType.ARRAY)
1258
+ self._add_instruction(ArrayCreate(array_var, temp_size, source_loc), expr)
1259
+
1260
+ # Add elements to array
1261
+ for i, element in enumerate(expr.elements):
1262
+ elem_value = self.lower_expression(element)
1263
+
1264
+ # Load constant values into registers for proper constant pool usage
1265
+ if isinstance(elem_value, Constant):
1266
+ temp_elem = self.current_function.new_temp(elem_value.type)
1267
+ self._add_instruction(LoadConst(temp_elem, elem_value, source_loc), expr)
1268
+ elem_value = temp_elem
1269
+
1270
+ # Create index value
1271
+ index = Constant(i, MIRType.INT)
1272
+ # Load index constant into register too
1273
+ temp_index = self.current_function.new_temp(MIRType.INT)
1274
+ self._add_instruction(LoadConst(temp_index, index, source_loc), expr)
1275
+
1276
+ self._add_instruction(ArraySet(array_var, temp_index, elem_value, source_loc), expr)
1277
+
1278
+ return array_var
1279
+
1280
+ elif isinstance(expr, NamedListLiteral):
1281
+ # Create a dictionary and populate it with key-value pairs
1282
+ source_loc = expr.get_source_location()
1283
+ if source_loc is None:
1284
+ source_loc = (1, 1)
1285
+
1286
+ # Import DictCreate and DictSet
1287
+ from machine_dialect.mir.mir_instructions import DictCreate, DictSet
1288
+
1289
+ # Create an empty dictionary
1290
+ dict_var = self.current_function.new_temp(MIRType.DICT)
1291
+ self._add_instruction(DictCreate(dict_var, source_loc), expr)
1292
+
1293
+ # Add each key-value pair
1294
+ for key, value in expr.entries:
1295
+ # Handle key - can be a string or an Identifier expression
1296
+ if isinstance(key, str):
1297
+ # Direct string key
1298
+ key_str = Constant(key, MIRType.STRING)
1299
+ key_value = self.current_function.new_temp(MIRType.STRING)
1300
+ self._add_instruction(LoadConst(key_value, key_str, source_loc), expr)
1301
+ elif isinstance(key, Identifier):
1302
+ # Identifier used as key - convert to string
1303
+ key_str = Constant(key.value, MIRType.STRING)
1304
+ key_value = self.current_function.new_temp(MIRType.STRING)
1305
+ self._add_instruction(LoadConst(key_value, key_str, source_loc), expr)
1306
+ else:
1307
+ # Other expression types - lower them
1308
+ key_val = self.lower_expression(key)
1309
+ if isinstance(key_val, Constant):
1310
+ # Load constant into temp
1311
+ key_value = self.current_function.new_temp(MIRType.STRING)
1312
+ self._add_instruction(LoadConst(key_value, key_val, source_loc), expr)
1313
+ else:
1314
+ # Already in temp register
1315
+ key_value = key_val
1316
+
1317
+ # Lower the value expression
1318
+ value_val = self.lower_expression(value)
1319
+ # Ensure value is in a temp
1320
+ if isinstance(value_val, Constant):
1321
+ temp_val = self.current_function.new_temp(self._get_mir_type(value_val))
1322
+ self._add_instruction(LoadConst(temp_val, value_val, source_loc), expr)
1323
+ value_val = temp_val
1324
+
1325
+ # Set the key-value pair in the dictionary
1326
+ self._add_instruction(DictSet(dict_var, key_value, value_val, source_loc), expr)
1327
+
1328
+ return dict_var
1329
+
1330
+ # Handle collection access
1331
+ elif isinstance(expr, CollectionAccessExpression):
1332
+ source_loc = expr.get_source_location()
1333
+ if source_loc is None:
1334
+ source_loc = (1, 1)
1335
+
1336
+ # Lower the collection
1337
+ collection = self.lower_expression(expr.collection)
1338
+
1339
+ # Ensure collection is in a temp register
1340
+ if isinstance(collection, Constant):
1341
+ temp_collection = self.current_function.new_temp(MIRType.ARRAY)
1342
+ self._add_instruction(LoadConst(temp_collection, collection, source_loc), expr)
1343
+ collection = temp_collection
1344
+
1345
+ # Import DictGet for dictionary access
1346
+ from machine_dialect.mir.mir_instructions import DictGet
1347
+
1348
+ # Handle index based on access type
1349
+ if expr.access_type == "numeric":
1350
+ # Numeric index
1351
+ if isinstance(expr.accessor, int):
1352
+ # Integer indices are already 0-based from HIR
1353
+ index = Constant(expr.accessor, MIRType.INT)
1354
+ temp_index = self.current_function.new_temp(MIRType.INT)
1355
+ self._add_instruction(LoadConst(temp_index, index, source_loc), expr)
1356
+ else:
1357
+ # Expression-based indices need to subtract 1 (convert from 1-based to 0-based)
1358
+ if isinstance(expr.accessor, Expression):
1359
+ index_value = self.lower_expression(expr.accessor)
1360
+ if isinstance(index_value, Constant):
1361
+ temp_expr = self.current_function.new_temp(MIRType.INT)
1362
+ self._add_instruction(LoadConst(temp_expr, index_value, source_loc), expr)
1363
+ elif isinstance(index_value, Temp):
1364
+ temp_expr = index_value
1365
+ else:
1366
+ # Handle other MIRValue types
1367
+ temp_expr = self.current_function.new_temp(MIRType.INT)
1368
+ self._add_instruction(Copy(temp_expr, index_value, source_loc), expr)
1369
+
1370
+ # Subtract 1 to convert from 1-based to 0-based
1371
+ one = Constant(1, MIRType.INT)
1372
+ temp_one = self.current_function.new_temp(MIRType.INT)
1373
+ self._add_instruction(LoadConst(temp_one, one, source_loc), expr)
1374
+
1375
+ temp_index = self.current_function.new_temp(MIRType.INT)
1376
+ self._add_instruction(BinaryOp(temp_index, "-", temp_expr, temp_one, source_loc), expr)
1377
+ else:
1378
+ # This shouldn't happen, but handle gracefully
1379
+ temp_index = self.current_function.new_temp(MIRType.INT)
1380
+ self._add_instruction(LoadConst(temp_index, Constant(0, MIRType.INT), source_loc), expr)
1381
+
1382
+ # Perform array get
1383
+ result = self.current_function.new_temp(MIRType.UNKNOWN)
1384
+ self._add_instruction(ArrayGet(result, collection, temp_index, source_loc), expr)
1385
+ return result
1386
+
1387
+ elif expr.access_type == "ordinal" and expr.accessor == "last":
1388
+ # Special case for "last"
1389
+ length_temp = self.current_function.new_temp(MIRType.INT)
1390
+ self._add_instruction(ArrayLength(length_temp, collection, source_loc), expr)
1391
+
1392
+ # Subtract 1
1393
+ one = Constant(1, MIRType.INT)
1394
+ temp_one = self.current_function.new_temp(MIRType.INT)
1395
+ self._add_instruction(LoadConst(temp_one, one, source_loc), expr)
1396
+
1397
+ temp_index = self.current_function.new_temp(MIRType.INT)
1398
+ self._add_instruction(BinaryOp(temp_index, "-", length_temp, temp_one, source_loc), expr)
1399
+
1400
+ # Perform array get
1401
+ result = self.current_function.new_temp(MIRType.UNKNOWN)
1402
+ self._add_instruction(ArrayGet(result, collection, temp_index, source_loc), expr)
1403
+ return result
1404
+
1405
+ elif expr.access_type in ("property", "name"):
1406
+ # Dictionary property or name access
1407
+ # Get the key as a string or MIRValue
1408
+ dict_key: MIRValue
1409
+ if isinstance(expr.accessor, str):
1410
+ key_const = Constant(expr.accessor, MIRType.STRING)
1411
+ temp_key = self.current_function.new_temp(MIRType.STRING)
1412
+ self._add_instruction(LoadConst(temp_key, key_const, source_loc), expr)
1413
+ dict_key = temp_key
1414
+ elif isinstance(expr.accessor, Identifier):
1415
+ key_const = Constant(expr.accessor.value, MIRType.STRING)
1416
+ temp_key = self.current_function.new_temp(MIRType.STRING)
1417
+ self._add_instruction(LoadConst(temp_key, key_const, source_loc), expr)
1418
+ dict_key = temp_key
1419
+ elif isinstance(expr.accessor, Expression):
1420
+ dict_key = self.lower_expression(expr.accessor)
1421
+ if isinstance(dict_key, Constant):
1422
+ temp_key = self.current_function.new_temp(MIRType.STRING)
1423
+ self._add_instruction(LoadConst(temp_key, dict_key, source_loc), expr)
1424
+ dict_key = temp_key
1425
+ # Otherwise dict_key is already a proper MIRValue (likely Temp)
1426
+ else:
1427
+ # Fallback - shouldn't normally happen
1428
+ key_const = Constant(str(expr.accessor), MIRType.STRING)
1429
+ temp_key = self.current_function.new_temp(MIRType.STRING)
1430
+ self._add_instruction(LoadConst(temp_key, key_const, source_loc), expr)
1431
+ dict_key = temp_key
1432
+
1433
+ # Perform dictionary get
1434
+ result = self.current_function.new_temp(MIRType.UNKNOWN)
1435
+ self._add_instruction(DictGet(result, collection, dict_key, source_loc), expr)
1436
+ return result
1437
+
1438
+ else:
1439
+ # Other access types - not yet supported
1440
+ return Constant(None, MIRType.ERROR)
1441
+
1442
+ # Handle identifier
1443
+ elif isinstance(expr, Identifier):
1444
+ if expr.value in self.variable_map:
1445
+ var = self.variable_map[expr.value]
1446
+ # Use type from context if available
1447
+ if expr.value in self.type_context and var.type == MIRType.UNKNOWN:
1448
+ var.type = self.type_context[expr.value]
1449
+ # Load variable into temp
1450
+ temp = self.current_function.new_temp(var.type)
1451
+ self._add_instruction(Copy(temp, var, expr.get_source_location() or (1, 1)), expr)
1452
+ return temp
1453
+ else:
1454
+ # Unknown identifier, return error value
1455
+ return Constant(None, MIRType.ERROR)
1456
+
1457
+ # Handle dictionary extraction (the names of, the contents of)
1458
+ elif hasattr(expr, "__class__") and expr.__class__.__name__ == "DictExtraction":
1459
+ # Import here to avoid circular dependency
1460
+ from machine_dialect.ast.dict_extraction import DictExtraction
1461
+ from machine_dialect.mir.mir_instructions import DictKeys, DictValues
1462
+
1463
+ if isinstance(expr, DictExtraction):
1464
+ # Get source location
1465
+ source_loc = expr.get_source_location()
1466
+ if source_loc is None:
1467
+ source_loc = (0, 0)
1468
+
1469
+ # Lower the dictionary expression
1470
+ dict_value = self.lower_expression(expr.dictionary)
1471
+
1472
+ # Load into temp if it's a constant
1473
+ if isinstance(dict_value, Constant):
1474
+ temp_dict = self.current_function.new_temp(MIRType.DICT)
1475
+ self._add_instruction(LoadConst(temp_dict, dict_value, source_loc), expr)
1476
+ dict_value = temp_dict
1477
+ elif not isinstance(dict_value, Temp):
1478
+ # Ensure it's a temp register
1479
+ temp_dict = self.current_function.new_temp(MIRType.DICT)
1480
+ self._add_instruction(Copy(temp_dict, dict_value, source_loc), expr)
1481
+ dict_value = temp_dict
1482
+
1483
+ # Create result temp for the extracted array
1484
+ result = self.current_function.new_temp(MIRType.ARRAY)
1485
+
1486
+ # Generate appropriate extraction instruction
1487
+ if expr.extract_type == "names":
1488
+ self._add_instruction(DictKeys(result, dict_value, source_loc), expr)
1489
+ else: # contents
1490
+ self._add_instruction(DictValues(result, dict_value, source_loc), expr)
1491
+
1492
+ return result
1493
+
1494
+ # Handle infix expression
1495
+ elif isinstance(expr, InfixExpression):
1496
+ left = self.lower_expression(expr.left)
1497
+ if expr.right is not None:
1498
+ right = self.lower_expression(expr.right)
1499
+ else:
1500
+ raise ValueError("Infix expression missing right operand")
1501
+
1502
+ # Load constants into temporaries if needed
1503
+ if isinstance(left, Constant):
1504
+ temp_left = self.current_function.new_temp(left.type)
1505
+ source_loc = expr.get_source_location()
1506
+ if source_loc is None:
1507
+ raise ValueError("InfixExpression missing source location")
1508
+ self._add_instruction(LoadConst(temp_left, left, source_loc), expr)
1509
+ left = temp_left
1510
+
1511
+ if isinstance(right, Constant):
1512
+ temp_right = self.current_function.new_temp(right.type)
1513
+ source_loc = expr.get_source_location()
1514
+ if source_loc is None:
1515
+ raise ValueError("InfixExpression missing source location")
1516
+ self._add_instruction(LoadConst(temp_right, right, source_loc), expr)
1517
+ right = temp_right
1518
+
1519
+ # Map AST operators to MIR operators
1520
+ # AST uses ^ for power, but MIR/bytecode use ** for power and ^ for XOR
1521
+ mir_operator = expr.operator
1522
+ if expr.operator == "^":
1523
+ mir_operator = "**" # In AST, ^ means power; convert to ** for MIR
1524
+
1525
+ # Get result type
1526
+ from machine_dialect.mir.mir_types import get_binary_op_result_type
1527
+
1528
+ left_type = left.type if hasattr(left, "type") else infer_ast_expression_type(expr.left, self.type_context)
1529
+ right_type = (
1530
+ right.type
1531
+ if hasattr(right, "type")
1532
+ else infer_ast_expression_type(expr.right, self.type_context)
1533
+ if expr.right
1534
+ else MIRType.UNKNOWN
1535
+ )
1536
+ result_type = get_binary_op_result_type(mir_operator, left_type, right_type)
1537
+
1538
+ # Create temp for result
1539
+ result = self.current_function.new_temp(result_type)
1540
+ source_loc = expr.get_source_location()
1541
+ if source_loc is None:
1542
+ raise ValueError("InfixExpression missing source location")
1543
+ self._add_instruction(BinaryOp(result, mir_operator, left, right, source_loc), expr)
1544
+ return result
1545
+
1546
+ # Handle prefix expression
1547
+ elif isinstance(expr, PrefixExpression):
1548
+ if expr.right is not None:
1549
+ operand = self.lower_expression(expr.right)
1550
+ else:
1551
+ raise ValueError("Prefix expression missing right operand")
1552
+
1553
+ # Load constant into temporary if needed
1554
+ if isinstance(operand, Constant):
1555
+ temp_operand = self.current_function.new_temp(operand.type)
1556
+ source_loc = expr.get_source_location()
1557
+ if source_loc is None:
1558
+ raise ValueError("PrefixExpression missing source location")
1559
+ self._add_instruction(LoadConst(temp_operand, operand, source_loc), expr)
1560
+ operand = temp_operand
1561
+
1562
+ # Get result type
1563
+ from machine_dialect.mir.mir_types import get_unary_op_result_type
1564
+
1565
+ operand_type = (
1566
+ operand.type
1567
+ if hasattr(operand, "type")
1568
+ else infer_ast_expression_type(expr.right, self.type_context)
1569
+ if expr.right
1570
+ else MIRType.UNKNOWN
1571
+ )
1572
+ result_type = get_unary_op_result_type(expr.operator, operand_type)
1573
+
1574
+ # Create temp for result
1575
+ result = self.current_function.new_temp(result_type)
1576
+ source_loc = expr.get_source_location()
1577
+ if source_loc is None:
1578
+ raise ValueError("PrefixExpression missing source location")
1579
+ self._add_instruction(UnaryOp(result, expr.operator, operand, source_loc), expr)
1580
+ return result
1581
+
1582
+ # Handle conditional expression (ternary)
1583
+ elif isinstance(expr, ConditionalExpression):
1584
+ if expr.condition is None or expr.consequence is None or expr.alternative is None:
1585
+ raise ValueError("Conditional expression missing required parts")
1586
+
1587
+ # Lower condition
1588
+ condition = self.lower_expression(expr.condition)
1589
+
1590
+ # Lower both branches
1591
+ true_val = self.lower_expression(expr.consequence)
1592
+ false_val = self.lower_expression(expr.alternative)
1593
+
1594
+ # Load constants into temporaries if needed
1595
+ if isinstance(condition, Constant):
1596
+ temp_cond = self.current_function.new_temp(condition.type)
1597
+ source_loc = expr.get_source_location()
1598
+ if source_loc is None:
1599
+ raise ValueError("ConditionalExpression missing source location")
1600
+ self._add_instruction(LoadConst(temp_cond, condition, source_loc), expr)
1601
+ condition = temp_cond
1602
+
1603
+ if isinstance(true_val, Constant):
1604
+ temp_true = self.current_function.new_temp(true_val.type)
1605
+ source_loc = expr.get_source_location()
1606
+ if source_loc is None:
1607
+ raise ValueError("ConditionalExpression missing source location")
1608
+ self._add_instruction(LoadConst(temp_true, true_val, source_loc), expr)
1609
+ true_val = temp_true
1610
+
1611
+ if isinstance(false_val, Constant):
1612
+ temp_false = self.current_function.new_temp(false_val.type)
1613
+ source_loc = expr.get_source_location()
1614
+ if source_loc is None:
1615
+ raise ValueError("ConditionalExpression missing source location")
1616
+ self._add_instruction(LoadConst(temp_false, false_val, source_loc), expr)
1617
+ false_val = temp_false
1618
+
1619
+ # Get result type (should be the same for both branches)
1620
+ result_type = true_val.type if hasattr(true_val, "type") else MIRType.UNKNOWN
1621
+
1622
+ # Create temp for result
1623
+ result = self.current_function.new_temp(result_type)
1624
+
1625
+ # Use Select instruction for conditional expression
1626
+ source_loc = expr.get_source_location()
1627
+ if source_loc is None:
1628
+ raise ValueError("ConditionalExpression missing source location")
1629
+ self._add_instruction(Select(result, condition, true_val, false_val, source_loc), expr)
1630
+ return result
1631
+
1632
+ # Handle error expression
1633
+ elif isinstance(expr, ErrorExpression):
1634
+ # Generate an assert for error expressions with position information
1635
+ # ErrorExpression MUST have a token with position info
1636
+ error_msg = f"line {expr.token.line}, column {expr.token.position}: Expression error: {expr.message}"
1637
+ false_val = Constant(False, MIRType.BOOL)
1638
+ # Load constant into temporary
1639
+ temp_false = self.current_function.new_temp(false_val.type)
1640
+ source_loc = (expr.token.line, expr.token.position)
1641
+ self._add_instruction(LoadConst(temp_false, false_val, source_loc), expr)
1642
+ self._add_instruction(Assert(temp_false, source_loc, error_msg), expr)
1643
+ # Return error value
1644
+ return Constant(None, MIRType.ERROR)
1645
+
1646
+ # Handle call expression (not available in AST, using CallStatement instead)
1647
+ # This would need to be refactored if we have call expressions
1648
+ elif hasattr(expr, "function_name"): # Check if it's a call-like expression
1649
+ # Lower arguments
1650
+ args = []
1651
+ if hasattr(expr, "arguments"):
1652
+ arguments = expr.arguments
1653
+ if isinstance(arguments, Arguments):
1654
+ if hasattr(arguments, "positional"):
1655
+ for arg in arguments.positional:
1656
+ val = self.lower_expression(arg)
1657
+ # Load constants into temporaries if needed
1658
+ if isinstance(val, Constant):
1659
+ temp = self.current_function.new_temp(val.type)
1660
+ source_loc = expr.get_source_location()
1661
+ if source_loc is None:
1662
+ raise ValueError("Call expression missing source location")
1663
+ self._add_instruction(LoadConst(temp, val, source_loc), expr)
1664
+ val = temp
1665
+ args.append(val)
1666
+
1667
+ # Get function name
1668
+ func_name_expr = getattr(expr, "function_name", None)
1669
+ if isinstance(func_name_expr, Identifier):
1670
+ func_name = func_name_expr.value
1671
+ elif isinstance(func_name_expr, StringLiteral):
1672
+ func_name = func_name_expr.value.strip('"').strip("'")
1673
+ else:
1674
+ func_name = str(func_name_expr) if func_name_expr else "unknown"
1675
+ func_ref = FunctionRef(func_name)
1676
+
1677
+ # Create temp for result
1678
+ result = self.current_function.new_temp(MIRType.UNKNOWN)
1679
+ source_loc = expr.get_source_location()
1680
+ if source_loc is None:
1681
+ raise ValueError("Call expression missing source location")
1682
+ call_inst = Call(result, func_ref, args, source_loc)
1683
+ self._add_instruction(call_inst, expr)
1684
+ return result
1685
+
1686
+ # Default: return error value
1687
+ return Constant(None, MIRType.ERROR)
1688
+
1689
+ def _get_mir_type(self, value: MIRValue) -> MIRType:
1690
+ """Get the MIR type of a value.
1691
+
1692
+ Args:
1693
+ value: The MIR value to get the type of.
1694
+
1695
+ Returns:
1696
+ The MIR type of the value, or UNKNOWN for union types.
1697
+ """
1698
+ if isinstance(value, Constant):
1699
+ const_type = value.type
1700
+ if isinstance(const_type, MIRType):
1701
+ return const_type
1702
+ # If it's a MIRUnionType, return UNKNOWN
1703
+ return MIRType.UNKNOWN
1704
+ elif hasattr(value, "type"):
1705
+ val_type = value.type
1706
+ if isinstance(val_type, MIRType):
1707
+ return val_type
1708
+ # If it's a MIRUnionType or anything else, return UNKNOWN for now
1709
+ return MIRType.UNKNOWN
1710
+ else:
1711
+ return MIRType.UNKNOWN
1712
+
1713
+ def generate_label(self, prefix: str = "L") -> str:
1714
+ """Generate a unique label.
1715
+
1716
+ Args:
1717
+ prefix: Label prefix.
1718
+
1719
+ Returns:
1720
+ A unique label.
1721
+ """
1722
+ label = f"{prefix}_{self.label_counter}"
1723
+ self.label_counter += 1
1724
+ return label
1725
+
1726
+
1727
+ def lower_to_mir(program: Program, module_name: str = "__main__") -> MIRModule:
1728
+ """Lower a program to MIR.
1729
+
1730
+ Args:
1731
+ program: The program to lower.
1732
+ module_name: Name for the MIR module.
1733
+
1734
+ Returns:
1735
+ The MIR module.
1736
+ """
1737
+ lowerer = HIRToMIRLowering()
1738
+ return lowerer.lower_program(program, module_name)