machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,1996 @@
1
+ """Register-based bytecode generator for the Rust VM.
2
+
3
+ This module generates register-based bytecode from MIR for the new Rust VM.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import struct
9
+ from dataclasses import dataclass, field
10
+ from typing import Any
11
+
12
+ from machine_dialect.codegen.bytecode_module import BytecodeModule, Chunk, ChunkType, ConstantTag
13
+ from machine_dialect.codegen.opcodes import Opcode
14
+
15
+ # Note: ConstantPool is now just a list of (tag, value) tuples
16
+ from machine_dialect.mir.mir_function import MIRFunction
17
+ from machine_dialect.mir.mir_instructions import (
18
+ ArrayAppend,
19
+ ArrayClear,
20
+ ArrayCreate,
21
+ ArrayFindIndex,
22
+ ArrayGet,
23
+ ArrayInsert,
24
+ ArrayLength,
25
+ ArrayRemove,
26
+ ArraySet,
27
+ Assert,
28
+ BinaryOp,
29
+ Call,
30
+ ConditionalJump,
31
+ Copy,
32
+ DictClear,
33
+ DictContains,
34
+ DictCreate,
35
+ DictGet,
36
+ DictKeys,
37
+ DictRemove,
38
+ DictSet,
39
+ DictValues,
40
+ Jump,
41
+ LoadConst,
42
+ LoadVar,
43
+ MIRInstruction,
44
+ Nop,
45
+ Phi,
46
+ Print,
47
+ Return,
48
+ Scope,
49
+ StoreVar,
50
+ UnaryOp,
51
+ )
52
+ from machine_dialect.mir.mir_module import MIRModule
53
+ from machine_dialect.mir.mir_values import Constant, MIRValue, Variable
54
+
55
+
56
+ @dataclass
57
+ class RegisterAllocation:
58
+ """Register allocation for a function."""
59
+
60
+ # Map from MIR values to register numbers
61
+ value_to_register: dict[MIRValue, int] = field(default_factory=dict)
62
+ # Next available register
63
+ next_register: int = 0
64
+ # Maximum registers used
65
+ max_registers: int = 256
66
+
67
+
68
+ class RegisterAllocator:
69
+ """Allocates registers for MIR values."""
70
+
71
+ def _is_global_variable(self, var: MIRValue, func: MIRFunction) -> bool:
72
+ """Check if a variable is a global variable.
73
+
74
+ Global variables are Variables with version 0 that are NOT function parameters
75
+ or function-local variables. Function parameters and locals are allocated to registers.
76
+
77
+ Args:
78
+ var: The MIR value to check.
79
+ func: The current function.
80
+
81
+ Returns:
82
+ True if the variable is a global variable.
83
+ """
84
+ from machine_dialect.mir.mir_values import ScopedVariable, VariableScope
85
+
86
+ # Check if it's a ScopedVariable with explicit scope
87
+ if isinstance(var, ScopedVariable):
88
+ return var.scope == VariableScope.GLOBAL
89
+
90
+ if not isinstance(var, Variable) or var.version != 0:
91
+ return False
92
+
93
+ # Check if it's a function parameter by name (not object identity)
94
+ for param in func.params:
95
+ if param.name == var.name:
96
+ return False
97
+
98
+ # Check if it's a function-local variable by name
99
+ if var.name in func.locals:
100
+ return False
101
+
102
+ return True
103
+
104
+ def allocate_function(self, func: MIRFunction) -> RegisterAllocation:
105
+ """Allocate registers for a function.
106
+
107
+ Args:
108
+ func: MIR function to allocate registers for.
109
+
110
+ Returns:
111
+ Register allocation.
112
+ """
113
+ allocation = RegisterAllocation()
114
+
115
+ # Allocate registers for parameters
116
+ for param in func.params:
117
+ self.allocate_register(param, allocation)
118
+
119
+ # Allocate registers for all instructions
120
+ for block_name in func.cfg.blocks:
121
+ block = func.cfg.blocks[block_name]
122
+ for inst in block.instructions:
123
+ # Allocate for definitions
124
+ for value in inst.get_defs():
125
+ if value not in allocation.value_to_register:
126
+ # Skip global variables (Variables with version=0 that are not parameters)
127
+ if self._is_global_variable(value, func):
128
+ continue # Skip global variables
129
+ self.allocate_register(value, allocation)
130
+
131
+ # Ensure uses are allocated
132
+ for value in inst.get_uses():
133
+ if value not in allocation.value_to_register:
134
+ if not isinstance(value, Constant):
135
+ # Skip global variables (Variables with version=0 that are not parameters)
136
+ if self._is_global_variable(value, func):
137
+ continue # Skip global variables
138
+ self.allocate_register(value, allocation)
139
+
140
+ return allocation
141
+
142
+ def allocate_register(self, value: MIRValue, allocation: RegisterAllocation) -> int:
143
+ """Allocate a register for a value.
144
+
145
+ Args:
146
+ value: Value to allocate register for.
147
+ allocation: Current allocation state.
148
+
149
+ Returns:
150
+ Allocated register number.
151
+ """
152
+ if value in allocation.value_to_register:
153
+ return allocation.value_to_register[value]
154
+
155
+ if allocation.next_register >= allocation.max_registers:
156
+ raise RuntimeError(f"Out of registers (max {allocation.max_registers})")
157
+
158
+ reg = allocation.next_register
159
+ allocation.value_to_register[value] = reg
160
+ allocation.next_register += 1
161
+ return reg
162
+
163
+
164
+ class RegisterBytecodeGenerator:
165
+ """Generate register-based bytecode from MIR."""
166
+
167
+ def __init__(self, debug: bool = False) -> None:
168
+ """Initialize the generator.
169
+
170
+ Args:
171
+ debug: Enable debug output for bytecode generation.
172
+ """
173
+ self.allocator = RegisterAllocator()
174
+ self.constants: list[tuple[ConstantTag, Any]] = []
175
+ self.bytecode: bytearray = bytearray()
176
+ self.allocation: RegisterAllocation | None = None
177
+ # Map from basic block labels to instruction indices (not byte offsets)
178
+ self.block_offsets: dict[str, int] = {}
179
+ # Map from instruction index to byte offset
180
+ self.instruction_offsets: list[int] = []
181
+ # Pending jumps to resolve: (byte_pos, target_label, source_inst_idx)
182
+ self.pending_jumps: list[tuple[int, str, int]] = []
183
+ self.debug = debug
184
+ self.current_function: MIRFunction | None = None
185
+ # Label counter for generating unique labels
186
+ self.label_counter = 0
187
+
188
+ @staticmethod
189
+ def is_ssa_variable(var: MIRValue) -> bool:
190
+ """Check if a variable is an SSA-renamed variable.
191
+
192
+ SSA variables have version > 0, indicating they've been
193
+ renamed during SSA construction. Non-SSA variables (globals,
194
+ original parameters) have version 0.
195
+
196
+ Args:
197
+ var: The MIR value to check.
198
+
199
+ Returns:
200
+ True if the variable is an SSA-renamed variable.
201
+ """
202
+ return isinstance(var, Variable) and var.version > 0
203
+
204
+ def is_global_variable(self, var: MIRValue) -> bool:
205
+ """Check if a variable is a global variable.
206
+
207
+ Global variables are Variables with version 0 that are NOT function parameters
208
+ or function-local variables. Function parameters and locals are allocated to registers.
209
+
210
+ Args:
211
+ var: The MIR value to check.
212
+
213
+ Returns:
214
+ True if the variable is a global variable.
215
+ """
216
+ from machine_dialect.mir.mir_values import ScopedVariable, VariableScope
217
+
218
+ # Check if it's a ScopedVariable with explicit scope
219
+ if isinstance(var, ScopedVariable):
220
+ return var.scope == VariableScope.GLOBAL
221
+
222
+ if not isinstance(var, Variable) or var.version != 0:
223
+ return False
224
+
225
+ # Check if it's a function parameter by name (not object identity)
226
+ if self.current_function:
227
+ for param in self.current_function.params:
228
+ if param.name == var.name:
229
+ return False
230
+
231
+ # Check if it's a function-local variable by name
232
+ if var.name in self.current_function.locals:
233
+ return False
234
+
235
+ return True
236
+
237
+ def generate(self, mir_module: MIRModule) -> BytecodeModule:
238
+ """Generate bytecode module from MIR.
239
+
240
+ Args:
241
+ mir_module: MIR module to generate bytecode from.
242
+
243
+ Returns:
244
+ Bytecode module.
245
+ """
246
+ module = BytecodeModule()
247
+
248
+ # Process main function
249
+ if main_func := mir_module.get_function("__main__"):
250
+ chunk = self.generate_function(main_func)
251
+ module.chunks.append(chunk)
252
+
253
+ # Process other functions
254
+ for name, func in mir_module.functions.items():
255
+ if name != "__main__":
256
+ chunk = self.generate_function(func)
257
+ module.add_chunk(chunk)
258
+
259
+ return module
260
+
261
+ def generate_function(self, func: MIRFunction) -> Chunk:
262
+ """Generate bytecode chunk for a function.
263
+
264
+ Args:
265
+ func: MIR function to generate bytecode for.
266
+
267
+ Returns:
268
+ Bytecode chunk.
269
+ """
270
+ # Reset state
271
+ self.bytecode = bytearray()
272
+ self.constants = []
273
+ self.block_offsets = {} # Will store instruction indices
274
+ self.instruction_offsets = [] # Track byte offset of each instruction
275
+ self.pending_jumps = []
276
+ self.current_function = func
277
+
278
+ # Allocate registers
279
+ self.allocation = self.allocator.allocate_function(func)
280
+
281
+ # Debug output for register allocation
282
+ if self.debug:
283
+ print(f"\nDEBUG Function {func.name}:")
284
+ print(f" Parameters: {[p.name for p in func.params]}")
285
+ for param in func.params:
286
+ if param in self.allocation.value_to_register:
287
+ print(f" {param.name} -> r{self.allocation.value_to_register[param]}")
288
+ else:
289
+ print(f" {param.name} -> NOT ALLOCATED!")
290
+
291
+ # Generate code for each block in topological order
292
+ blocks_in_order = func.cfg.topological_sort()
293
+ for block in blocks_in_order:
294
+ # Record block offset in instruction count
295
+ self.block_offsets[block.label] = len(self.instruction_offsets)
296
+ # Generate instructions
297
+ for inst in block.instructions:
298
+ # Note: Each generate_* method is responsible for tracking
299
+ # the VM instructions it generates using track_vm_instruction()
300
+ self.generate_instruction(inst)
301
+
302
+ # Resolve pending jumps
303
+ self.resolve_jumps()
304
+
305
+ # Create chunk
306
+ chunk = Chunk(
307
+ name=func.name,
308
+ chunk_type=ChunkType.FUNCTION if func.name != "__main__" else ChunkType.MAIN,
309
+ bytecode=self.bytecode,
310
+ constants=self.constants,
311
+ num_locals=self.allocation.next_register,
312
+ num_params=len(func.params),
313
+ )
314
+
315
+ return chunk
316
+
317
+ def generate_instruction(self, inst: MIRInstruction) -> None:
318
+ """Generate bytecode for a MIR instruction.
319
+
320
+ Args:
321
+ inst: MIR instruction to generate bytecode for.
322
+ """
323
+ if isinstance(inst, LoadConst):
324
+ self.generate_load_const(inst)
325
+ elif isinstance(inst, Copy):
326
+ self.generate_copy(inst)
327
+ elif isinstance(inst, LoadVar):
328
+ self.generate_load_var(inst)
329
+ elif isinstance(inst, StoreVar):
330
+ self.generate_store_var(inst)
331
+ elif isinstance(inst, BinaryOp):
332
+ self.generate_binary_op(inst)
333
+ elif isinstance(inst, UnaryOp):
334
+ self.generate_unary_op(inst)
335
+ elif isinstance(inst, Jump):
336
+ self.generate_jump(inst)
337
+ elif isinstance(inst, ConditionalJump):
338
+ self.generate_conditional_jump(inst)
339
+ elif isinstance(inst, Call):
340
+ self.generate_call(inst)
341
+ elif isinstance(inst, Return):
342
+ self.generate_return(inst)
343
+ elif isinstance(inst, Phi):
344
+ self.generate_phi(inst)
345
+ elif isinstance(inst, Assert):
346
+ self.generate_assert(inst)
347
+ elif isinstance(inst, ArrayCreate):
348
+ self.generate_array_create(inst)
349
+ elif isinstance(inst, ArrayGet):
350
+ self.generate_array_get(inst)
351
+ elif isinstance(inst, ArraySet):
352
+ self.generate_array_set(inst)
353
+ elif isinstance(inst, ArrayLength):
354
+ self.generate_array_length(inst)
355
+ elif isinstance(inst, ArrayAppend):
356
+ self.generate_array_append(inst)
357
+ elif isinstance(inst, ArrayRemove):
358
+ self.generate_array_remove(inst)
359
+ elif isinstance(inst, ArrayInsert):
360
+ self.generate_array_insert(inst)
361
+ elif isinstance(inst, ArrayClear):
362
+ self.generate_array_clear(inst)
363
+ elif isinstance(inst, ArrayFindIndex):
364
+ self.generate_array_find_index(inst)
365
+ elif isinstance(inst, DictCreate):
366
+ self.generate_dict_create(inst)
367
+ elif isinstance(inst, DictGet):
368
+ self.generate_dict_get(inst)
369
+ elif isinstance(inst, DictSet):
370
+ self.generate_dict_set(inst)
371
+ elif isinstance(inst, DictRemove):
372
+ self.generate_dict_remove(inst)
373
+ elif isinstance(inst, DictContains):
374
+ self.generate_dict_contains(inst)
375
+ elif isinstance(inst, DictKeys):
376
+ self.generate_dict_keys(inst)
377
+ elif isinstance(inst, DictValues):
378
+ self.generate_dict_values(inst)
379
+ elif isinstance(inst, DictClear):
380
+ self.generate_dict_clear(inst)
381
+ elif isinstance(inst, Scope):
382
+ self.generate_scope(inst)
383
+ elif isinstance(inst, Print):
384
+ self.generate_print(inst)
385
+ elif isinstance(inst, Nop):
386
+ pass # No operation
387
+
388
+ def generate_load_const(self, inst: LoadConst) -> None:
389
+ """Generate LoadConstR instruction."""
390
+ dst = self.get_register(inst.dest)
391
+ # Extract the actual value from the Constant object
392
+ if hasattr(inst.constant, "value"):
393
+ const_value = inst.constant.value
394
+ else:
395
+ const_value = inst.constant
396
+ const_idx = self.add_constant(const_value)
397
+ self.track_vm_instruction()
398
+ self.emit_opcode(Opcode.LOAD_CONST_R)
399
+ self.emit_u8(dst)
400
+ self.emit_u16(const_idx)
401
+
402
+ def generate_copy(self, inst: Copy) -> None:
403
+ """Generate MoveR or LoadGlobalR instruction based on source type.
404
+
405
+ This method handles both SSA-renamed variables (version > 0) and
406
+ regular variables (version = 0). SSA variables should always be
407
+ allocated to registers during the allocation phase, while regular
408
+ variables may be globals that need to be loaded by name.
409
+ """
410
+ dst = self.get_register(inst.dest)
411
+
412
+ # Debug output
413
+ if self.debug:
414
+ print(f"DEBUG Copy: source={inst.source}, dest={inst.dest}")
415
+ if isinstance(inst.source, Variable):
416
+ print(f" source is Variable, name={inst.source.name}, version={inst.source.version}")
417
+ if self.allocation:
418
+ print(f" in allocation? {inst.source in self.allocation.value_to_register}")
419
+
420
+ # Handle ScopedVariable parameters
421
+ from machine_dialect.mir.mir_values import ScopedVariable, VariableScope
422
+
423
+ if isinstance(inst.source, ScopedVariable) and inst.source.scope == VariableScope.PARAMETER:
424
+ # This is a parameter reference - it might be the same object or a different one
425
+ # First check if the ScopedVariable itself is allocated
426
+ if self.allocation and inst.source in self.allocation.value_to_register:
427
+ src = self.allocation.value_to_register[inst.source]
428
+ self.track_vm_instruction()
429
+ self.emit_opcode(Opcode.MOVE_R)
430
+ self.emit_u8(dst)
431
+ self.emit_u8(src)
432
+ if self.debug:
433
+ print(f" -> Generated MoveR from r{src} (param {inst.source.name} direct) to r{dst}")
434
+ return
435
+ # Otherwise look for the parameter by name in the function
436
+ elif self.current_function:
437
+ for param in self.current_function.params:
438
+ if param.name == inst.source.name:
439
+ if self.allocation and param in self.allocation.value_to_register:
440
+ src = self.allocation.value_to_register[param]
441
+ self.track_vm_instruction()
442
+ self.emit_opcode(Opcode.MOVE_R)
443
+ self.emit_u8(dst)
444
+ self.emit_u8(src)
445
+ if self.debug:
446
+ print(f" -> Generated MoveR from r{src} (param {inst.source.name} by name) to r{dst}")
447
+ return
448
+
449
+ # Check if source is already in a register (local variable, parameter, or SSA variable)
450
+ if self.allocation and inst.source in self.allocation.value_to_register:
451
+ # This is a local variable, parameter, or SSA variable in a register
452
+ src = self.allocation.value_to_register[inst.source]
453
+ self.track_vm_instruction()
454
+ self.emit_opcode(Opcode.MOVE_R)
455
+ self.emit_u8(dst)
456
+ self.emit_u8(src)
457
+ if self.debug:
458
+ print(f" -> Generated MoveR from r{src} to r{dst}")
459
+ elif isinstance(inst.source, Variable):
460
+ # Special handling for parameters - check by name
461
+ if self.current_function:
462
+ for param in self.current_function.params:
463
+ if param.name == inst.source.name and inst.source.version == 0:
464
+ # This is a parameter - find its register
465
+ if self.allocation and param in self.allocation.value_to_register:
466
+ src = self.allocation.value_to_register[param]
467
+ self.track_vm_instruction()
468
+ self.emit_opcode(Opcode.MOVE_R)
469
+ self.emit_u8(dst)
470
+ self.emit_u8(src)
471
+ if self.debug:
472
+ print(f" -> Generated MoveR from r{src} (param {param.name}) to r{dst}")
473
+ return
474
+ else:
475
+ raise RuntimeError(f"Parameter {param.name} not allocated to register")
476
+ # Check if this is an SSA variable that should have been allocated
477
+ if self.is_ssa_variable(inst.source):
478
+ raise RuntimeError(
479
+ f"SSA variable {inst.source} (version {inst.source.version}) not allocated to register"
480
+ )
481
+
482
+ # This is a true global variable that needs to be loaded by name
483
+ name_idx = self.add_string_constant(inst.source.name)
484
+ self.track_vm_instruction()
485
+ self.emit_opcode(Opcode.LOAD_GLOBAL_R)
486
+ self.emit_u8(dst)
487
+ self.emit_u16(name_idx)
488
+ if self.debug:
489
+ print(f" -> Generated LoadGlobalR for {inst.source.name}")
490
+ else:
491
+ # Handle other types (constants, etc.)
492
+ src = self.get_register(inst.source)
493
+ self.track_vm_instruction()
494
+ self.emit_opcode(Opcode.MOVE_R)
495
+ self.emit_u8(dst)
496
+ self.emit_u8(src)
497
+ if self.debug:
498
+ print(f" -> Generated MoveR from r{src} to r{dst}")
499
+
500
+ def generate_load_var(self, inst: LoadVar) -> None:
501
+ """Generate LoadGlobalR instruction for variables or MoveR for parameters.
502
+
503
+ SSA variables (version > 0) and function parameters are expected to be
504
+ in registers. Global variables (version = 0) need to be loaded by name
505
+ from the global scope.
506
+ """
507
+ dst = self.get_register(inst.dest)
508
+
509
+ # Debug output
510
+ if self.debug:
511
+ print(f"DEBUG LoadVar: var={inst.var}, var.name={inst.var.name}, version={inst.var.version}")
512
+ if self.allocation:
513
+ print(f" in allocation? {inst.var in self.allocation.value_to_register}")
514
+ if inst.var in self.allocation.value_to_register:
515
+ print(f" allocated to register {self.allocation.value_to_register[inst.var]}")
516
+ if self.current_function:
517
+ print(f" function params: {[p.name for p in self.current_function.params]}")
518
+ print(f" is param? {inst.var in self.current_function.params}")
519
+
520
+ # Check if the variable is already in a register (function parameter, local var, or SSA var)
521
+ if self.allocation and inst.var in self.allocation.value_to_register:
522
+ # This is a function parameter, local variable, or SSA variable in a register
523
+ src = self.allocation.value_to_register[inst.var]
524
+ self.track_vm_instruction()
525
+ self.emit_opcode(Opcode.MOVE_R)
526
+ self.emit_u8(dst)
527
+ self.emit_u8(src)
528
+ else:
529
+ # Check if this is an SSA variable that should have been allocated
530
+ if self.is_ssa_variable(inst.var):
531
+ raise RuntimeError(f"SSA variable {inst.var} (version {inst.var.version}) not allocated to register")
532
+
533
+ # Check if this variable is a function parameter by name
534
+ # Parameters have version 0 but should be in registers
535
+ is_param = False
536
+ if self.current_function and self.allocation:
537
+ if self.debug:
538
+ print(f" Checking if {inst.var.name} is a parameter...")
539
+ print(f" Allocation keys: {list(self.allocation.value_to_register.keys())}")
540
+ for param in self.current_function.params:
541
+ if self.debug:
542
+ print(f" Comparing {param.name} == {inst.var.name}: {param.name == inst.var.name}")
543
+ if param.name == inst.var.name:
544
+ is_param = True
545
+ # Try to find the parameter's register
546
+ if param in self.allocation.value_to_register:
547
+ src = self.allocation.value_to_register[param]
548
+ if self.debug:
549
+ print(f" Found parameter {inst.var.name} in register {src}!")
550
+ self.track_vm_instruction()
551
+ self.emit_opcode(Opcode.MOVE_R)
552
+ self.emit_u8(dst)
553
+ self.emit_u8(src)
554
+ return
555
+ else:
556
+ if self.debug:
557
+ print(f" Parameter {inst.var.name} not in allocation!")
558
+ raise RuntimeError(f"Function parameter {inst.var.name} not allocated to register")
559
+
560
+ if is_param:
561
+ raise RuntimeError(f"Function parameter {inst.var.name} handling failed")
562
+
563
+ # This is a true global variable that needs to be loaded by name
564
+ name_idx = self.add_string_constant(inst.var.name)
565
+ self.track_vm_instruction()
566
+ self.emit_opcode(Opcode.LOAD_GLOBAL_R)
567
+ self.emit_u8(dst)
568
+ self.emit_u16(name_idx)
569
+
570
+ def generate_store_var(self, inst: StoreVar) -> None:
571
+ """Generate StoreGlobalR instruction or register move for SSA variables.
572
+
573
+ SSA variables (version > 0) are stored in registers using MoveR.
574
+ Global variables (version = 0) are stored to the global scope using
575
+ StoreGlobalR with the variable name.
576
+ """
577
+ if self.debug:
578
+ print(f"DEBUG StoreVar: var={inst.var}, source={inst.source}")
579
+ src = self.get_register(inst.source)
580
+
581
+ # Check if the destination variable is allocated to a register (SSA or local)
582
+ if self.allocation and inst.var in self.allocation.value_to_register:
583
+ # This is an SSA or local variable - use register move
584
+ dst = self.allocation.value_to_register[inst.var]
585
+ self.track_vm_instruction()
586
+ self.emit_opcode(Opcode.MOVE_R)
587
+ self.emit_u8(dst)
588
+ self.emit_u8(src)
589
+ if self.debug:
590
+ print(f" -> Generated MoveR from r{src} to r{dst} for {inst.var}")
591
+ else:
592
+ # Check if this is an SSA variable that should have been allocated
593
+ if self.is_ssa_variable(inst.var):
594
+ raise RuntimeError(f"SSA variable {inst.var} (version {inst.var.version}) not allocated to register")
595
+
596
+ # This is a true global variable
597
+ name_idx = self.add_string_constant(inst.var.name if hasattr(inst.var, "name") else str(inst.var))
598
+ self.track_vm_instruction()
599
+ self.emit_opcode(Opcode.STORE_GLOBAL_R)
600
+ self.emit_u8(src)
601
+ self.emit_u16(name_idx)
602
+ if self.debug:
603
+ print(f" -> Generated StoreGlobalR for {inst.var}")
604
+
605
+ def generate_binary_op(self, inst: BinaryOp) -> None:
606
+ """Generate binary operation instruction."""
607
+ # Load constants first if needed
608
+ if isinstance(inst.left, Constant):
609
+ left = self.get_register(inst.left)
610
+ const_val = inst.left.value if hasattr(inst.left, "value") else inst.left
611
+ const_idx = self.add_constant(const_val)
612
+ self.track_vm_instruction()
613
+ self.emit_opcode(Opcode.LOAD_CONST_R)
614
+ self.emit_u8(left)
615
+ self.emit_u16(const_idx)
616
+ else:
617
+ left = self.get_register(inst.left)
618
+
619
+ if isinstance(inst.right, Constant):
620
+ right = self.get_register(inst.right)
621
+ const_val = inst.right.value if hasattr(inst.right, "value") else inst.right
622
+ const_idx = self.add_constant(const_val)
623
+ self.track_vm_instruction()
624
+ self.emit_opcode(Opcode.LOAD_CONST_R)
625
+ self.emit_u8(right)
626
+ self.emit_u16(const_idx)
627
+ else:
628
+ right = self.get_register(inst.right)
629
+
630
+ # Get destination register
631
+ dst = self.get_register(inst.dest)
632
+
633
+ if self.debug:
634
+ print(
635
+ f"DEBUG BinaryOp: op={inst.op}, left={inst.left} "
636
+ f"(type={type(inst.left).__name__}), "
637
+ f"right={inst.right} (type={type(inst.right).__name__})"
638
+ )
639
+ print(f" left register: r{left}, right register: r{right}, dest register: r{dst}")
640
+
641
+ # Map operators to opcodes
642
+ op_map = {
643
+ "+": Opcode.ADD_R,
644
+ "-": Opcode.SUB_R,
645
+ "*": Opcode.MUL_R,
646
+ "/": Opcode.DIV_R,
647
+ "%": Opcode.MOD_R,
648
+ "and": Opcode.AND_R,
649
+ "or": Opcode.OR_R,
650
+ "==": Opcode.EQ_R,
651
+ "!=": Opcode.NEQ_R,
652
+ "<": Opcode.LT_R,
653
+ ">": Opcode.GT_R,
654
+ "<=": Opcode.LTE_R,
655
+ ">=": Opcode.GTE_R,
656
+ }
657
+
658
+ if opcode := op_map.get(inst.op):
659
+ self.track_vm_instruction()
660
+ self.emit_opcode(opcode)
661
+ self.emit_u8(dst)
662
+ self.emit_u8(left)
663
+ self.emit_u8(right)
664
+ else:
665
+ # Debug: print unmapped operator
666
+ if self.debug:
667
+ print(f"Warning: Unmapped operator '{inst.op}'")
668
+
669
+ def generate_unary_op(self, inst: UnaryOp) -> None:
670
+ """Generate unary operation instruction."""
671
+ dst = self.get_register(inst.dest)
672
+ src = self.get_register(inst.operand)
673
+
674
+ if inst.op == "-":
675
+ self.track_vm_instruction()
676
+ self.emit_opcode(Opcode.NEG_R)
677
+ elif inst.op == "not":
678
+ self.track_vm_instruction()
679
+ self.emit_opcode(Opcode.NOT_R)
680
+ else:
681
+ return
682
+
683
+ self.emit_u8(dst)
684
+ self.emit_u8(src)
685
+
686
+ def generate_jump(self, inst: Jump) -> None:
687
+ """Generate JumpR instruction."""
688
+ self.track_vm_instruction()
689
+ self.emit_opcode(Opcode.JUMP_R)
690
+ # Record position for later resolution (byte pos, target, current instruction index)
691
+ self.pending_jumps.append((len(self.bytecode), inst.label, len(self.instruction_offsets) - 1))
692
+ self.emit_i32(0) # Placeholder offset
693
+
694
+ def generate_conditional_jump(self, inst: ConditionalJump) -> None:
695
+ """Generate JumpIfR instruction with true and false targets."""
696
+ cond = self.get_register(inst.condition)
697
+
698
+ # Generate jump to true target
699
+ self.track_vm_instruction()
700
+ self.emit_opcode(Opcode.JUMP_IF_R)
701
+ self.emit_u8(cond)
702
+ # Record position for later resolution (byte pos, target, current instruction index)
703
+ current_inst_idx = len(self.instruction_offsets) - 1
704
+ self.pending_jumps.append((len(self.bytecode), inst.true_label, current_inst_idx))
705
+ self.emit_i32(0) # Placeholder offset
706
+
707
+ # If there's a false label, generate unconditional jump to it
708
+ # (this executes if the condition was false)
709
+ if inst.false_label:
710
+ # This will be a new instruction
711
+ self.track_vm_instruction()
712
+ self.emit_opcode(Opcode.JUMP_R)
713
+ current_inst_idx = len(self.instruction_offsets) - 1
714
+ self.pending_jumps.append((len(self.bytecode), inst.false_label, current_inst_idx))
715
+ self.emit_i32(0) # Placeholder offset
716
+
717
+ def generate_call(self, inst: Call) -> None:
718
+ """Generate CallR instruction."""
719
+ if self.debug:
720
+ print(f"DEBUG Call: func={inst.func}, args={inst.args}, dest={inst.dest}")
721
+ dst = self.get_register(inst.dest) if inst.dest else 0
722
+
723
+ # Handle function reference - could be a string name, FunctionRef, or a register value
724
+ from machine_dialect.mir.mir_values import FunctionRef
725
+
726
+ if isinstance(inst.func, str):
727
+ # Function name as string - load it as a constant
728
+ assert self.allocation is not None
729
+ func_reg = self.allocation.next_register
730
+ if func_reg >= self.allocation.max_registers:
731
+ raise RuntimeError("Out of registers")
732
+ self.allocation.next_register += 1
733
+
734
+ # Add function name as string constant
735
+ if self.debug:
736
+ print(f" DEBUG: Loading function name '{inst.func}' as constant into r{func_reg}")
737
+ const_idx = self.add_constant(inst.func)
738
+ self.track_vm_instruction()
739
+ self.emit_opcode(Opcode.LOAD_CONST_R)
740
+ self.emit_u8(func_reg)
741
+ self.emit_u16(const_idx)
742
+ func = func_reg
743
+ elif isinstance(inst.func, FunctionRef):
744
+ # FunctionRef - extract the name and load as constant
745
+ assert self.allocation is not None
746
+ func_reg = self.allocation.next_register
747
+ if func_reg >= self.allocation.max_registers:
748
+ raise RuntimeError("Out of registers")
749
+ self.allocation.next_register += 1
750
+
751
+ # Add function name as string constant
752
+ if self.debug:
753
+ print(f" DEBUG: Loading FunctionRef '{inst.func.name}' as constant into r{func_reg}")
754
+ const_idx = self.add_constant(inst.func.name)
755
+ self.track_vm_instruction()
756
+ self.emit_opcode(Opcode.LOAD_CONST_R)
757
+ self.emit_u8(func_reg)
758
+ self.emit_u16(const_idx)
759
+ func = func_reg
760
+ else:
761
+ # Already a register value
762
+ if self.debug:
763
+ print(f" DEBUG: Function is already in register: {inst.func}")
764
+ func = self.get_register(inst.func)
765
+
766
+ # Load argument constants if needed
767
+ args = []
768
+ for arg in inst.args:
769
+ if isinstance(arg, Constant):
770
+ arg_reg = self.get_register(arg)
771
+ const_val = arg.value if hasattr(arg, "value") else arg
772
+ const_idx = self.add_constant(const_val)
773
+ self.track_vm_instruction()
774
+ self.emit_opcode(Opcode.LOAD_CONST_R)
775
+ self.emit_u8(arg_reg)
776
+ self.emit_u16(const_idx)
777
+ args.append(arg_reg)
778
+ else:
779
+ args.append(self.get_register(arg))
780
+
781
+ if self.debug:
782
+ print(f" Function register: r{func}, dest register: r{dst}")
783
+ print(f" Argument registers: {[f'r{a}' for a in args]}")
784
+
785
+ self.track_vm_instruction()
786
+ self.emit_opcode(Opcode.CALL_R)
787
+ self.emit_u8(func)
788
+ self.emit_u8(dst)
789
+ self.emit_u8(len(args))
790
+ for arg_reg in args:
791
+ self.emit_u8(arg_reg)
792
+
793
+ def generate_return(self, inst: Return) -> None:
794
+ """Generate ReturnR instruction."""
795
+ if self.debug:
796
+ print(f"DEBUG Return: value={inst.value}")
797
+ if inst.value:
798
+ print(f" value type: {type(inst.value)}")
799
+ if hasattr(inst.value, "name"):
800
+ print(f" value name: {inst.value.name}")
801
+ if hasattr(inst.value, "version"):
802
+ print(f" value version: {inst.value.version}")
803
+ # Debug: show allocation map
804
+ if self.allocation:
805
+ print(f" Allocation map has {len(self.allocation.value_to_register)} entries")
806
+ for val, reg in self.allocation.value_to_register.items():
807
+ if hasattr(val, "name"):
808
+ print(f" {val.name} (v{getattr(val, 'version', '?')}) -> r{reg}")
809
+
810
+ if inst.value:
811
+ # If the value is a constant, we need to load it first
812
+ if isinstance(inst.value, Constant):
813
+ # Load constant into register 0 (return register)
814
+ const_value = inst.value.value if hasattr(inst.value, "value") else inst.value
815
+ const_idx = self.add_constant(const_value)
816
+ if self.debug:
817
+ print(f" -> Loading constant {const_value} into r0 for return")
818
+ self.track_vm_instruction()
819
+ self.emit_opcode(Opcode.LOAD_CONST_R)
820
+ self.emit_u8(0) # Use register 0 for return
821
+ self.emit_u16(const_idx)
822
+
823
+ # Now return from register 0
824
+ self.track_vm_instruction()
825
+ self.emit_opcode(Opcode.RETURN_R)
826
+ self.emit_u8(1) # Has return value
827
+ self.emit_u8(0) # Return from register 0
828
+ else:
829
+ # Value is already in a register
830
+ reg = self.get_register(inst.value)
831
+ if self.debug:
832
+ print(f" -> Returning from register r{reg}")
833
+ self.track_vm_instruction()
834
+ self.emit_opcode(Opcode.RETURN_R)
835
+ self.emit_u8(1) # Has return value
836
+ self.emit_u8(reg)
837
+ else:
838
+ if self.debug:
839
+ print(" -> Returning with no value")
840
+ self.track_vm_instruction()
841
+ self.emit_opcode(Opcode.RETURN_R)
842
+ self.emit_u8(0) # No return value
843
+
844
+ def generate_phi(self, inst: Phi) -> None:
845
+ """Generate PhiR instruction."""
846
+ dst = self.get_register(inst.dest)
847
+ sources = []
848
+ for value, _ in inst.sources: # type: ignore[attr-defined]
849
+ src = self.get_register(value)
850
+ # TODO: Map label to block ID
851
+ block_id = 0
852
+ sources.append((src, block_id))
853
+
854
+ self.track_vm_instruction()
855
+ self.emit_opcode(Opcode.PHI_R)
856
+ self.emit_u8(dst)
857
+ self.emit_u8(len(sources))
858
+ for src, block_id in sources:
859
+ self.emit_u8(src)
860
+ self.emit_u16(block_id)
861
+
862
+ def generate_assert(self, inst: Assert) -> None:
863
+ """Generate AssertR instruction."""
864
+ reg = self.get_register(inst.condition)
865
+ msg = inst.message or "Assertion failed"
866
+ msg_idx = self.add_string_constant(msg)
867
+
868
+ self.track_vm_instruction()
869
+ self.emit_opcode(Opcode.ASSERT_R)
870
+ self.emit_u8(reg)
871
+ self.emit_u8(0) # AssertType::True
872
+ self.emit_u16(msg_idx)
873
+
874
+ def generate_scope(self, inst: Scope) -> None:
875
+ """Generate ScopeEnterR/ScopeExitR instruction."""
876
+ scope_id = inst.scope_id # type: ignore[attr-defined]
877
+ if inst.action == "enter": # type: ignore[attr-defined]
878
+ self.track_vm_instruction()
879
+ self.emit_opcode(Opcode.SCOPE_ENTER_R)
880
+ else:
881
+ self.track_vm_instruction()
882
+ self.emit_opcode(Opcode.SCOPE_EXIT_R)
883
+
884
+ self.emit_u16(scope_id)
885
+
886
+ def generate_print(self, inst: Print) -> None:
887
+ """Generate DebugPrint instruction."""
888
+ # If the value is a constant, we need to load it first
889
+ if isinstance(inst.value, Constant):
890
+ # Allocate a register for the constant
891
+ src = self.get_register(inst.value)
892
+ # Add the constant to the constant pool
893
+ const_idx = self.add_constant(inst.value.value)
894
+ # Emit LOAD_CONST_R to load the constant into the register
895
+ self.track_vm_instruction()
896
+ self.emit_opcode(Opcode.LOAD_CONST_R)
897
+ self.emit_u8(src)
898
+ self.emit_u16(const_idx)
899
+ else:
900
+ # For non-constants, just get the register
901
+ src = self.get_register(inst.value)
902
+
903
+ self.track_vm_instruction()
904
+ self.emit_opcode(Opcode.DEBUG_PRINT)
905
+ self.emit_u8(src)
906
+
907
+ def resolve_jumps(self) -> None:
908
+ """Resolve pending jump offsets."""
909
+ for jump_offset_pos, target_label, source_inst_idx in self.pending_jumps:
910
+ if target_label in self.block_offsets:
911
+ target_inst_idx = self.block_offsets[target_label]
912
+ # The VM uses instruction-based PC, not byte offsets
913
+ # The offset is in instructions, relative to the NEXT instruction
914
+ # source_inst_idx is the index of the jump instruction itself
915
+ # After execution, PC will be source_inst_idx + 1
916
+ offset = target_inst_idx - (source_inst_idx + 1)
917
+ # Write offset at jump position
918
+ struct.pack_into("<i", self.bytecode, jump_offset_pos, offset)
919
+
920
+ def get_register(self, value: MIRValue) -> int:
921
+ """Get register number for a value.
922
+
923
+ For constants, this allocates a register and remembers it,
924
+ but does NOT emit the LOAD_CONST_R instruction.
925
+ The caller is responsible for loading constants.
926
+
927
+ Args:
928
+ value: MIR value.
929
+
930
+ Returns:
931
+ Register number.
932
+ """
933
+ if isinstance(value, Constant):
934
+ # Check if we already allocated a register for this constant
935
+ if self.allocation and value in self.allocation.value_to_register:
936
+ return self.allocation.value_to_register[value]
937
+
938
+ # Allocate a new register for this constant
939
+ assert self.allocation is not None
940
+ reg = self.allocation.next_register
941
+ if reg >= self.allocation.max_registers:
942
+ raise RuntimeError("Out of registers")
943
+ self.allocation.next_register += 1
944
+ self.allocation.value_to_register[value] = reg
945
+
946
+ # Note: We do NOT emit LOAD_CONST_R here!
947
+ # The caller must handle loading the constant
948
+ if self.debug:
949
+ print(f" DEBUG: Allocated r{reg} for constant {value.value if hasattr(value, 'value') else value}")
950
+ return reg
951
+
952
+ assert self.allocation is not None
953
+ if value not in self.allocation.value_to_register:
954
+ # Special case: check if this is a parameter by name
955
+ if self.current_function and isinstance(value, Variable):
956
+ for param in self.current_function.params:
957
+ if param.name == value.name:
958
+ # Found the parameter, look it up in allocation
959
+ if param in self.allocation.value_to_register:
960
+ if self.debug:
961
+ reg = self.allocation.value_to_register[param]
962
+ print(f" DEBUG: Found parameter {value.name} by name -> r{reg}")
963
+ return self.allocation.value_to_register[param]
964
+ else:
965
+ raise RuntimeError(f"Parameter {value.name} not allocated to register")
966
+
967
+ # Check if this is an SSA variable that should have been allocated
968
+ if self.is_ssa_variable(value) and isinstance(value, Variable):
969
+ raise RuntimeError(f"SSA variable {value.name} (version {value.version}) not allocated to register")
970
+
971
+ # For non-SSA variables, check if we should error
972
+ if self.debug:
973
+ print(f" WARNING: Value {value} not in allocation map, returning r23 (uninitialized!)")
974
+ # This is likely the bug - returning an arbitrary register
975
+ return 23 # This will help us identify the issue
976
+ return self.allocation.value_to_register[value]
977
+
978
+ def add_constant(self, value: Any) -> int:
979
+ """Add a constant to the pool.
980
+
981
+ Args:
982
+ value: Constant value.
983
+
984
+ Returns:
985
+ Constant index.
986
+ """
987
+ # Determine constant type and add to pool
988
+ tag: ConstantTag
989
+ val: Any
990
+ if value is None:
991
+ tag = ConstantTag.EMPTY
992
+ val = 0
993
+ elif isinstance(value, bool):
994
+ tag = ConstantTag.BOOL
995
+ val = value
996
+ elif isinstance(value, int):
997
+ tag = ConstantTag.INT
998
+ val = value
999
+ elif isinstance(value, float):
1000
+ tag = ConstantTag.FLOAT
1001
+ val = value
1002
+ elif isinstance(value, str):
1003
+ tag = ConstantTag.STRING
1004
+ val = value
1005
+ else:
1006
+ # Default to string representation
1007
+ tag = ConstantTag.STRING
1008
+ val = str(value)
1009
+
1010
+ # Check if constant already exists
1011
+ for i, (t, v) in enumerate(self.constants):
1012
+ if t == tag and v == val:
1013
+ return i
1014
+
1015
+ # Add new constant
1016
+ idx = len(self.constants)
1017
+ self.constants.append((tag, val))
1018
+ return idx
1019
+
1020
+ def add_string_constant(self, value: str) -> int:
1021
+ """Add a string constant to the pool.
1022
+
1023
+ Args:
1024
+ value: String value.
1025
+
1026
+ Returns:
1027
+ Constant index.
1028
+ """
1029
+ # Check if string already exists
1030
+ for i, (tag, val) in enumerate(self.constants):
1031
+ if tag == ConstantTag.STRING and val == value:
1032
+ return i
1033
+
1034
+ # Add new string constant
1035
+ idx = len(self.constants)
1036
+ self.constants.append((ConstantTag.STRING, value))
1037
+ return idx
1038
+
1039
+ def track_vm_instruction(self) -> None:
1040
+ """Track the start of a new VM instruction.
1041
+
1042
+ This must be called before emitting each VM instruction to maintain
1043
+ proper instruction offset tracking for jump resolution.
1044
+ """
1045
+ self.instruction_offsets.append(len(self.bytecode))
1046
+
1047
+ def emit_opcode(self, opcode: int) -> None:
1048
+ """Emit an opcode."""
1049
+ self.bytecode.append(opcode)
1050
+
1051
+ def emit_u8(self, value: int) -> None:
1052
+ """Emit an unsigned 8-bit value."""
1053
+ self.bytecode.append(value & 0xFF)
1054
+
1055
+ def emit_u16(self, value: int) -> None:
1056
+ """Emit an unsigned 16-bit value."""
1057
+ self.bytecode.extend(struct.pack("<H", value))
1058
+
1059
+ def emit_i32(self, value: int) -> None:
1060
+ """Emit a signed 32-bit value."""
1061
+ self.bytecode.extend(struct.pack("<i", value))
1062
+
1063
+ def add_label(self, label: str) -> None:
1064
+ """Add a label at the current bytecode position.
1065
+
1066
+ Args:
1067
+ label: The label to add.
1068
+ """
1069
+ # Map label to current instruction index
1070
+ self.block_offsets[label] = len(self.instruction_offsets)
1071
+
1072
+ def generate_array_create(self, inst: ArrayCreate) -> None:
1073
+ """Generate NewArrayR instruction from MIR ArrayCreate."""
1074
+ dst = self.get_register(inst.dest)
1075
+
1076
+ # Handle size - load constant if needed
1077
+ if isinstance(inst.size, Constant):
1078
+ size = self.get_register(inst.size)
1079
+ # Load the constant into the register
1080
+ const_idx = self.add_constant(inst.size.value if hasattr(inst.size, "value") else inst.size)
1081
+ self.track_vm_instruction()
1082
+ self.emit_opcode(Opcode.LOAD_CONST_R)
1083
+ self.emit_u8(size)
1084
+ self.emit_u16(const_idx)
1085
+ else:
1086
+ size = self.get_register(inst.size)
1087
+
1088
+ self.track_vm_instruction()
1089
+ self.emit_opcode(Opcode.NEW_ARRAY_R)
1090
+ self.emit_u8(dst)
1091
+ self.emit_u8(size)
1092
+
1093
+ if self.debug:
1094
+ print(f" -> Generated NewArrayR: r{dst} = new_array(r{size})")
1095
+
1096
+ def generate_array_get(self, inst: ArrayGet) -> None:
1097
+ """Generate ArrayGetR instruction from MIR ArrayGet."""
1098
+ dst = self.get_register(inst.dest)
1099
+ array = self.get_register(inst.array)
1100
+ index = self.get_register(inst.index)
1101
+
1102
+ self.track_vm_instruction()
1103
+ self.emit_opcode(Opcode.ARRAY_GET_R)
1104
+ self.emit_u8(dst)
1105
+ self.emit_u8(array)
1106
+ self.emit_u8(index)
1107
+
1108
+ if self.debug:
1109
+ print(f" -> Generated ArrayGetR: r{dst} = r{array}[r{index}]")
1110
+
1111
+ def generate_array_set(self, inst: ArraySet) -> None:
1112
+ """Generate ArraySetR instruction from MIR ArraySet."""
1113
+ array = self.get_register(inst.array)
1114
+
1115
+ # Handle index - load constant if needed
1116
+ if isinstance(inst.index, Constant):
1117
+ index = self.get_register(inst.index)
1118
+ # Load the constant into the register
1119
+ const_idx = self.add_constant(inst.index.value if hasattr(inst.index, "value") else inst.index)
1120
+ self.track_vm_instruction()
1121
+ self.emit_opcode(Opcode.LOAD_CONST_R)
1122
+ self.emit_u8(index)
1123
+ self.emit_u16(const_idx)
1124
+ else:
1125
+ index = self.get_register(inst.index)
1126
+
1127
+ # Handle value - load constant if needed
1128
+ if isinstance(inst.value, Constant):
1129
+ value = self.get_register(inst.value)
1130
+ # Load the constant into the register
1131
+ const_idx = self.add_constant(inst.value.value if hasattr(inst.value, "value") else inst.value)
1132
+ self.track_vm_instruction()
1133
+ self.emit_opcode(Opcode.LOAD_CONST_R)
1134
+ self.emit_u8(value)
1135
+ self.emit_u16(const_idx)
1136
+ else:
1137
+ value = self.get_register(inst.value)
1138
+
1139
+ self.track_vm_instruction()
1140
+ self.emit_opcode(Opcode.ARRAY_SET_R)
1141
+ self.emit_u8(array)
1142
+ self.emit_u8(index)
1143
+ self.emit_u8(value)
1144
+
1145
+ if self.debug:
1146
+ print(f" -> Generated ArraySetR: r{array}[r{index}] = r{value}")
1147
+
1148
+ def generate_array_length(self, inst: ArrayLength) -> None:
1149
+ """Generate ArrayLenR instruction from MIR ArrayLength."""
1150
+ dst = self.get_register(inst.dest)
1151
+ array = self.get_register(inst.array)
1152
+
1153
+ self.track_vm_instruction()
1154
+ self.emit_opcode(Opcode.ARRAY_LEN_R)
1155
+ self.emit_u8(dst)
1156
+ self.emit_u8(array)
1157
+
1158
+ if self.debug:
1159
+ print(f" -> Generated ArrayLenR: r{dst} = len(r{array})")
1160
+
1161
+ def generate_array_append(self, inst: ArrayAppend) -> None:
1162
+ """Generate array append as set at length position."""
1163
+ array = self.get_register(inst.array)
1164
+ value = self.get_register(inst.value)
1165
+
1166
+ # First get the current length into a temp register
1167
+ # We need to allocate a temp register for the length
1168
+ length_reg = 255 # Use highest register as temp
1169
+
1170
+ self.track_vm_instruction()
1171
+ self.emit_opcode(Opcode.ARRAY_LEN_R)
1172
+ self.emit_u8(length_reg)
1173
+ self.emit_u8(array)
1174
+
1175
+ # Then set array[length] = value
1176
+ self.track_vm_instruction()
1177
+ self.emit_opcode(Opcode.ARRAY_SET_R)
1178
+ self.emit_u8(array)
1179
+ self.emit_u8(length_reg)
1180
+ self.emit_u8(value)
1181
+
1182
+ if self.debug:
1183
+ print(f" -> Generated ArrayAppend: r{array}.append(r{value})")
1184
+
1185
+ def generate_dict_create(self, inst: DictCreate) -> None:
1186
+ """Generate DictNewR instruction from MIR DictCreate."""
1187
+ dst = self.get_register(inst.dest)
1188
+
1189
+ self.track_vm_instruction()
1190
+ self.emit_opcode(Opcode.DICT_NEW_R)
1191
+ self.emit_u8(dst)
1192
+
1193
+ if self.debug:
1194
+ print(f" -> Generated DictNewR: r{dst} = new_dict()")
1195
+
1196
+ def generate_dict_get(self, inst: DictGet) -> None:
1197
+ """Generate DictGetR instruction from MIR DictGet."""
1198
+ dst = self.get_register(inst.dest)
1199
+ dict_reg = self.get_register(inst.dict_val)
1200
+ key_reg = self.get_register(inst.key)
1201
+
1202
+ self.track_vm_instruction()
1203
+ self.emit_opcode(Opcode.DICT_GET_R)
1204
+ self.emit_u8(dst)
1205
+ self.emit_u8(dict_reg)
1206
+ self.emit_u8(key_reg)
1207
+
1208
+ if self.debug:
1209
+ print(f" -> Generated DictGetR: r{dst} = r{dict_reg}[r{key_reg}]")
1210
+
1211
+ def generate_dict_set(self, inst: DictSet) -> None:
1212
+ """Generate DictSetR instruction from MIR DictSet."""
1213
+ dict_reg = self.get_register(inst.dict_val)
1214
+ key_reg = self.get_register(inst.key)
1215
+ value_reg = self.get_register(inst.value)
1216
+
1217
+ self.track_vm_instruction()
1218
+ self.emit_opcode(Opcode.DICT_SET_R)
1219
+ self.emit_u8(dict_reg)
1220
+ self.emit_u8(key_reg)
1221
+ self.emit_u8(value_reg)
1222
+
1223
+ if self.debug:
1224
+ print(f" -> Generated DictSetR: r{dict_reg}[r{key_reg}] = r{value_reg}")
1225
+
1226
+ def generate_dict_remove(self, inst: DictRemove) -> None:
1227
+ """Generate DictRemoveR instruction from MIR DictRemove."""
1228
+ dict_reg = self.get_register(inst.dict_val)
1229
+ key_reg = self.get_register(inst.key)
1230
+
1231
+ self.track_vm_instruction()
1232
+ self.emit_opcode(Opcode.DICT_REMOVE_R)
1233
+ self.emit_u8(dict_reg)
1234
+ self.emit_u8(key_reg)
1235
+
1236
+ if self.debug:
1237
+ print(f" -> Generated DictRemoveR: del r{dict_reg}[r{key_reg}]")
1238
+
1239
+ def generate_dict_contains(self, inst: DictContains) -> None:
1240
+ """Generate DictHasKeyR instruction from MIR DictContains."""
1241
+ dst = self.get_register(inst.dest)
1242
+ dict_reg = self.get_register(inst.dict_val)
1243
+ key_reg = self.get_register(inst.key)
1244
+
1245
+ self.track_vm_instruction()
1246
+ self.emit_opcode(Opcode.DICT_CONTAINS_R)
1247
+ self.emit_u8(dst)
1248
+ self.emit_u8(dict_reg)
1249
+ self.emit_u8(key_reg)
1250
+
1251
+ if self.debug:
1252
+ print(f" -> Generated DictContainsR: r{dst} = r{key_reg} in r{dict_reg}")
1253
+
1254
+ def generate_array_remove(self, inst: ArrayRemove) -> None:
1255
+ """Generate array remove at index using copy emulation.
1256
+
1257
+ Emulates array.remove_at(index) by:
1258
+ 1. Get original array length
1259
+ 2. Create new array with length - 1
1260
+ 3. Copy elements [0:index] to new array
1261
+ 4. Copy elements [index+1:] to new[index:]
1262
+ 5. Replace original array with new array
1263
+ """
1264
+ array = self.get_register(inst.array)
1265
+ index = self.get_register(inst.index)
1266
+
1267
+ # Allocate temporary registers
1268
+ old_len_reg = 247 # Original length
1269
+ new_len_reg = 248 # New length (old - 1)
1270
+ new_array_reg = 249 # New array
1271
+ i_reg = 250 # Loop counter for source
1272
+ j_reg = 251 # Loop counter for destination
1273
+ element_reg = 252 # Temporary for element
1274
+ cmp_reg = 253 # Comparison result
1275
+ const_one_reg = 254 # Constant 1
1276
+
1277
+ # Get original array length
1278
+ self.track_vm_instruction()
1279
+ self.emit_opcode(Opcode.ARRAY_LEN_R)
1280
+ self.emit_u8(old_len_reg)
1281
+ self.emit_u8(array)
1282
+
1283
+ # Calculate new length (old - 1)
1284
+ const_one = self.add_constant(1)
1285
+ self.track_vm_instruction()
1286
+ self.emit_opcode(Opcode.LOAD_CONST_R)
1287
+ self.emit_u8(const_one_reg)
1288
+ self.emit_u16(const_one)
1289
+
1290
+ self.track_vm_instruction()
1291
+ self.emit_opcode(Opcode.SUB_R)
1292
+ self.emit_u8(new_len_reg)
1293
+ self.emit_u8(old_len_reg)
1294
+ self.emit_u8(const_one_reg)
1295
+
1296
+ # Create new array with new length
1297
+ self.track_vm_instruction()
1298
+ self.emit_opcode(Opcode.NEW_ARRAY_R)
1299
+ self.emit_u8(new_array_reg)
1300
+ self.emit_u8(new_len_reg)
1301
+
1302
+ # Initialize loop counters to 0
1303
+ const_zero = self.add_constant(0)
1304
+ self.track_vm_instruction()
1305
+ self.emit_opcode(Opcode.LOAD_CONST_R)
1306
+ self.emit_u8(i_reg)
1307
+ self.emit_u16(const_zero)
1308
+
1309
+ self.track_vm_instruction()
1310
+ self.emit_opcode(Opcode.LOAD_CONST_R)
1311
+ self.emit_u8(j_reg)
1312
+ self.emit_u16(const_zero)
1313
+
1314
+ # Generate unique labels
1315
+ copy_loop_label = f"remove_copy_{self.label_counter}"
1316
+ skip_removed_label = f"remove_skip_{self.label_counter}"
1317
+ copy_element_label = f"remove_element_{self.label_counter}"
1318
+ remove_done_label = f"remove_done_{self.label_counter}"
1319
+ self.label_counter += 1
1320
+
1321
+ # --- Main copy loop ---
1322
+ self.add_label(copy_loop_label)
1323
+
1324
+ # Check if i < old_len
1325
+ self.track_vm_instruction()
1326
+ self.emit_opcode(Opcode.LT_R)
1327
+ self.emit_u8(cmp_reg)
1328
+ self.emit_u8(i_reg)
1329
+ self.emit_u8(old_len_reg)
1330
+
1331
+ # If not (i >= old_len), we're done
1332
+ self.track_vm_instruction()
1333
+ self.emit_opcode(Opcode.JUMP_IF_NOT_R)
1334
+ self.emit_u8(cmp_reg)
1335
+ self.pending_jumps.append((len(self.bytecode), remove_done_label, len(self.instruction_offsets) - 1))
1336
+ self.emit_i32(0) # Placeholder
1337
+
1338
+ # Check if i == index (skip this element)
1339
+ self.track_vm_instruction()
1340
+ self.emit_opcode(Opcode.EQ_R)
1341
+ self.emit_u8(cmp_reg)
1342
+ self.emit_u8(i_reg)
1343
+ self.emit_u8(index)
1344
+
1345
+ # If i == index, skip copying this element
1346
+ self.track_vm_instruction()
1347
+ self.emit_opcode(Opcode.JUMP_IF_R)
1348
+ self.emit_u8(cmp_reg)
1349
+ self.pending_jumps.append((len(self.bytecode), skip_removed_label, len(self.instruction_offsets) - 1))
1350
+ self.emit_i32(0) # Placeholder
1351
+
1352
+ # --- Copy element from old[i] to new[j] ---
1353
+ self.add_label(copy_element_label)
1354
+
1355
+ # Get element from original array[i]
1356
+ self.track_vm_instruction()
1357
+ self.emit_opcode(Opcode.ARRAY_GET_R)
1358
+ self.emit_u8(element_reg)
1359
+ self.emit_u8(array)
1360
+ self.emit_u8(i_reg)
1361
+
1362
+ # Set new[j] = element
1363
+ self.track_vm_instruction()
1364
+ self.emit_opcode(Opcode.ARRAY_SET_R)
1365
+ self.emit_u8(new_array_reg)
1366
+ self.emit_u8(j_reg)
1367
+ self.emit_u8(element_reg)
1368
+
1369
+ # Increment j (destination index)
1370
+ self.track_vm_instruction()
1371
+ self.emit_opcode(Opcode.ADD_R)
1372
+ self.emit_u8(j_reg)
1373
+ self.emit_u8(j_reg)
1374
+ self.emit_u8(const_one_reg)
1375
+
1376
+ # --- Skip removed element (just increment i) ---
1377
+ self.add_label(skip_removed_label)
1378
+
1379
+ # Increment i (source index)
1380
+ self.track_vm_instruction()
1381
+ self.emit_opcode(Opcode.ADD_R)
1382
+ self.emit_u8(i_reg)
1383
+ self.emit_u8(i_reg)
1384
+ self.emit_u8(const_one_reg)
1385
+
1386
+ # Jump back to loop start
1387
+ self.track_vm_instruction()
1388
+ self.emit_opcode(Opcode.JUMP_R)
1389
+ self.pending_jumps.append((len(self.bytecode), copy_loop_label, len(self.instruction_offsets) - 1))
1390
+ self.emit_i32(0) # Placeholder
1391
+
1392
+ # --- Replace original array with new array ---
1393
+ self.add_label(remove_done_label)
1394
+
1395
+ # Move new array to original array register
1396
+ self.track_vm_instruction()
1397
+ self.emit_opcode(Opcode.MOVE_R)
1398
+ self.emit_u8(array)
1399
+ self.emit_u8(new_array_reg)
1400
+
1401
+ if self.debug:
1402
+ print(f" -> Generated ArrayRemove: r{array}.remove_at(r{index}) using copy emulation")
1403
+
1404
+ def generate_array_insert(self, inst: ArrayInsert) -> None:
1405
+ """Generate array insert at index using copy emulation.
1406
+
1407
+ Emulates array.insert(index, value) by:
1408
+ 1. Get original array length
1409
+ 2. Create new array with length + 1
1410
+ 3. Copy elements [0:index] to new array
1411
+ 4. Set new[index] = value
1412
+ 5. Copy elements [index:] to new[index+1:]
1413
+ 6. Replace original array with new array
1414
+ """
1415
+ array = self.get_register(inst.array)
1416
+ index = self.get_register(inst.index)
1417
+ value = self.get_register(inst.value)
1418
+
1419
+ # Allocate temporary registers
1420
+ old_len_reg = 248 # Original length
1421
+ new_len_reg = 249 # New length (old + 1)
1422
+ new_array_reg = 250 # New array
1423
+ i_reg = 251 # Loop counter
1424
+ element_reg = 252 # Temporary for element
1425
+ cmp_reg = 253 # Comparison result
1426
+ const_one_reg = 254 # Constant 1
1427
+
1428
+ # Get original array length
1429
+ self.track_vm_instruction()
1430
+ self.emit_opcode(Opcode.ARRAY_LEN_R)
1431
+ self.emit_u8(old_len_reg)
1432
+ self.emit_u8(array)
1433
+
1434
+ # Calculate new length (old + 1)
1435
+ const_one = self.add_constant(1)
1436
+ self.track_vm_instruction()
1437
+ self.emit_opcode(Opcode.LOAD_CONST_R)
1438
+ self.emit_u8(const_one_reg)
1439
+ self.emit_u16(const_one)
1440
+
1441
+ self.track_vm_instruction()
1442
+ self.emit_opcode(Opcode.ADD_R)
1443
+ self.emit_u8(new_len_reg)
1444
+ self.emit_u8(old_len_reg)
1445
+ self.emit_u8(const_one_reg)
1446
+
1447
+ # Create new array with new length
1448
+ self.track_vm_instruction()
1449
+ self.emit_opcode(Opcode.NEW_ARRAY_R)
1450
+ self.emit_u8(new_array_reg)
1451
+ self.emit_u8(new_len_reg)
1452
+
1453
+ # Initialize loop counter to 0
1454
+ const_zero = self.add_constant(0)
1455
+ self.track_vm_instruction()
1456
+ self.emit_opcode(Opcode.LOAD_CONST_R)
1457
+ self.emit_u8(i_reg)
1458
+ self.emit_u16(const_zero)
1459
+
1460
+ # Generate unique labels
1461
+ copy_before_label = f"insert_copy_before_{self.label_counter}"
1462
+ copy_after_label = f"insert_copy_after_{self.label_counter}"
1463
+ insert_done_label = f"insert_done_{self.label_counter}"
1464
+ self.label_counter += 1
1465
+
1466
+ # --- Copy elements before insertion point ---
1467
+ self.add_label(copy_before_label)
1468
+
1469
+ # Check if i < index
1470
+ self.track_vm_instruction()
1471
+ self.emit_opcode(Opcode.LT_R)
1472
+ self.emit_u8(cmp_reg)
1473
+ self.emit_u8(i_reg)
1474
+ self.emit_u8(index)
1475
+
1476
+ # If not (i >= index), skip to insert value
1477
+ self.track_vm_instruction()
1478
+ self.emit_opcode(Opcode.JUMP_IF_NOT_R)
1479
+ self.emit_u8(cmp_reg)
1480
+ self.pending_jumps.append((len(self.bytecode), copy_after_label, len(self.instruction_offsets) - 1))
1481
+ self.emit_i32(0) # Placeholder
1482
+
1483
+ # Get element from original array
1484
+ self.track_vm_instruction()
1485
+ self.emit_opcode(Opcode.ARRAY_GET_R)
1486
+ self.emit_u8(element_reg)
1487
+ self.emit_u8(array)
1488
+ self.emit_u8(i_reg)
1489
+
1490
+ # Set element in new array at same position
1491
+ self.track_vm_instruction()
1492
+ self.emit_opcode(Opcode.ARRAY_SET_R)
1493
+ self.emit_u8(new_array_reg)
1494
+ self.emit_u8(i_reg)
1495
+ self.emit_u8(element_reg)
1496
+
1497
+ # Increment i
1498
+ self.track_vm_instruction()
1499
+ self.emit_opcode(Opcode.ADD_R)
1500
+ self.emit_u8(i_reg)
1501
+ self.emit_u8(i_reg)
1502
+ self.emit_u8(const_one_reg)
1503
+
1504
+ # Jump back to loop start
1505
+ self.track_vm_instruction()
1506
+ self.emit_opcode(Opcode.JUMP_R)
1507
+ self.pending_jumps.append((len(self.bytecode), copy_before_label, len(self.instruction_offsets) - 1))
1508
+ self.emit_i32(0) # Placeholder
1509
+
1510
+ # --- Insert the value at index ---
1511
+ self.add_label(copy_after_label)
1512
+
1513
+ # Set new[index] = value
1514
+ self.track_vm_instruction()
1515
+ self.emit_opcode(Opcode.ARRAY_SET_R)
1516
+ self.emit_u8(new_array_reg)
1517
+ self.emit_u8(index)
1518
+ self.emit_u8(value)
1519
+
1520
+ # Reset i to index for copying remaining elements
1521
+ self.track_vm_instruction()
1522
+ self.emit_opcode(Opcode.MOVE_R)
1523
+ self.emit_u8(i_reg)
1524
+ self.emit_u8(index)
1525
+
1526
+ # --- Copy elements after insertion point ---
1527
+ copy_rest_label = f"insert_copy_rest_{self.label_counter - 1}"
1528
+ self.add_label(copy_rest_label)
1529
+
1530
+ # Check if i < old_len
1531
+ self.track_vm_instruction()
1532
+ self.emit_opcode(Opcode.LT_R)
1533
+ self.emit_u8(cmp_reg)
1534
+ self.emit_u8(i_reg)
1535
+ self.emit_u8(old_len_reg)
1536
+
1537
+ # If not (i >= old_len), we're done
1538
+ self.track_vm_instruction()
1539
+ self.emit_opcode(Opcode.JUMP_IF_NOT_R)
1540
+ self.emit_u8(cmp_reg)
1541
+ self.pending_jumps.append((len(self.bytecode), insert_done_label, len(self.instruction_offsets) - 1))
1542
+ self.emit_i32(0) # Placeholder
1543
+
1544
+ # Get element from original array[i]
1545
+ self.track_vm_instruction()
1546
+ self.emit_opcode(Opcode.ARRAY_GET_R)
1547
+ self.emit_u8(element_reg)
1548
+ self.emit_u8(array)
1549
+ self.emit_u8(i_reg)
1550
+
1551
+ # Calculate destination index (i + 1) using element_reg temporarily
1552
+ self.track_vm_instruction()
1553
+ self.emit_opcode(Opcode.ADD_R)
1554
+ self.emit_u8(element_reg)
1555
+ self.emit_u8(i_reg)
1556
+ self.emit_u8(const_one_reg)
1557
+
1558
+ # Get element from original array[i] again (since we overwrote element_reg)
1559
+ self.track_vm_instruction()
1560
+ self.emit_opcode(Opcode.ARRAY_GET_R)
1561
+ self.emit_u8(cmp_reg) # Use cmp_reg temporarily for the element
1562
+ self.emit_u8(array)
1563
+ self.emit_u8(i_reg)
1564
+
1565
+ # Set new[i+1] = element
1566
+ self.track_vm_instruction()
1567
+ self.emit_opcode(Opcode.ARRAY_SET_R)
1568
+ self.emit_u8(new_array_reg)
1569
+ self.emit_u8(element_reg) # This is i+1
1570
+ self.emit_u8(cmp_reg) # This is the element
1571
+
1572
+ # Increment i
1573
+ self.track_vm_instruction()
1574
+ self.emit_opcode(Opcode.ADD_R)
1575
+ self.emit_u8(i_reg)
1576
+ self.emit_u8(i_reg)
1577
+ self.emit_u8(const_one_reg)
1578
+
1579
+ # Jump back to copy rest loop
1580
+ self.track_vm_instruction()
1581
+ self.emit_opcode(Opcode.JUMP_R)
1582
+ self.pending_jumps.append((len(self.bytecode), copy_rest_label, len(self.instruction_offsets) - 1))
1583
+ self.emit_i32(0) # Placeholder
1584
+
1585
+ # --- Replace original array with new array ---
1586
+ self.add_label(insert_done_label)
1587
+
1588
+ # Move new array to original array register
1589
+ self.track_vm_instruction()
1590
+ self.emit_opcode(Opcode.MOVE_R)
1591
+ self.emit_u8(array)
1592
+ self.emit_u8(new_array_reg)
1593
+
1594
+ if self.debug:
1595
+ print(f" -> Generated ArrayInsert: r{array}.insert(r{index}, r{value}) using copy emulation")
1596
+
1597
+ def generate_dict_keys(self, inst: DictKeys) -> None:
1598
+ """Generate dictionary keys extraction.
1599
+
1600
+ Args:
1601
+ inst: DictKeys instruction.
1602
+ """
1603
+ dst = self.get_register(inst.dest)
1604
+ dict_reg = self.get_register(inst.dict_val)
1605
+
1606
+ # Emit DictKeysR instruction
1607
+ self.track_vm_instruction()
1608
+ self.emit_opcode(Opcode.DICT_KEYS_R)
1609
+ self.emit_u8(dst)
1610
+ self.emit_u8(dict_reg)
1611
+
1612
+ if self.debug:
1613
+ print(f" -> Generated DictKeysR: r{dst} = r{dict_reg}.keys()")
1614
+
1615
+ def generate_dict_values(self, inst: DictValues) -> None:
1616
+ """Generate dictionary values extraction.
1617
+
1618
+ Args:
1619
+ inst: DictValues instruction.
1620
+ """
1621
+
1622
+ dst = self.get_register(inst.dest)
1623
+ dict_reg = self.get_register(inst.dict_val)
1624
+
1625
+ # Emit DictValuesR instruction
1626
+ self.track_vm_instruction()
1627
+ self.emit_opcode(Opcode.DICT_VALUES_R)
1628
+ self.emit_u8(dst)
1629
+ self.emit_u8(dict_reg)
1630
+
1631
+ if self.debug:
1632
+ print(f" -> Generated DictValuesR: r{dst} = r{dict_reg}.values()")
1633
+
1634
+ def generate_dict_clear(self, inst: DictClear) -> None:
1635
+ """Generate DictClearR instruction.
1636
+
1637
+ Args:
1638
+ inst: DictClear instruction.
1639
+ """
1640
+ dict_reg = self.get_register(inst.dict_val)
1641
+
1642
+ # Emit DictClearR instruction
1643
+ self.track_vm_instruction()
1644
+ self.emit_opcode(Opcode.DICT_CLEAR_R)
1645
+ self.emit_u8(dict_reg)
1646
+
1647
+ if self.debug:
1648
+ print(f" -> Generated DictClearR: r{dict_reg}.clear()")
1649
+
1650
+ def generate_array_clear(self, inst: ArrayClear) -> None:
1651
+ """Generate array clear.
1652
+
1653
+ This can be implemented as creating a new empty array.
1654
+ """
1655
+ array = self.get_register(inst.array)
1656
+
1657
+ # Create a new empty array (size 0) and assign to the array register
1658
+ zero_reg = 254 # Use a temp register for constant 0
1659
+
1660
+ # Load constant 0
1661
+ const_idx = self.add_constant(0)
1662
+ self.track_vm_instruction()
1663
+ self.emit_opcode(Opcode.LOAD_CONST_R)
1664
+ self.emit_u8(zero_reg)
1665
+ self.emit_u16(const_idx)
1666
+
1667
+ # Create new empty array
1668
+ self.track_vm_instruction()
1669
+ self.emit_opcode(Opcode.NEW_ARRAY_R)
1670
+ self.emit_u8(array)
1671
+ self.emit_u8(zero_reg)
1672
+
1673
+ if self.debug:
1674
+ print(f" -> Generated ArrayClear: r{array}.clear() as new_array(0)")
1675
+
1676
+ def generate_array_find_index(self, inst: ArrayFindIndex) -> None:
1677
+ """Generate array find index by value using loop emulation.
1678
+
1679
+ Emulates array.find(value) by iterating through the array:
1680
+ 1. Get array length
1681
+ 2. Initialize index to 0
1682
+ 3. Loop through array:
1683
+ - Get element at current index
1684
+ - Compare with target value
1685
+ - If equal, store index and exit
1686
+ - Otherwise increment index and continue
1687
+ 4. If not found, store -1
1688
+ """
1689
+ dest = self.get_register(inst.dest)
1690
+ array = self.get_register(inst.array)
1691
+ value = self.get_register(inst.value)
1692
+
1693
+ # Allocate temporary registers
1694
+ length_reg = 250 # Array length
1695
+ index_reg = 251 # Current index
1696
+ element_reg = 252 # Current element
1697
+ cmp_reg = 253 # Comparison result
1698
+
1699
+ # Generate unique labels for this loop
1700
+ loop_start_label = f"find_loop_{self.label_counter}"
1701
+ loop_end_label = f"find_end_{self.label_counter}"
1702
+ found_label = f"find_found_{self.label_counter}"
1703
+ self.label_counter += 1
1704
+
1705
+ # Get array length
1706
+ self.track_vm_instruction()
1707
+ self.emit_opcode(Opcode.ARRAY_LEN_R)
1708
+ self.emit_u8(length_reg)
1709
+ self.emit_u8(array)
1710
+
1711
+ # Initialize index to 0
1712
+ const_idx = self.add_constant(0)
1713
+ self.track_vm_instruction()
1714
+ self.emit_opcode(Opcode.LOAD_CONST_R)
1715
+ self.emit_u8(index_reg)
1716
+ self.emit_u16(const_idx)
1717
+
1718
+ # Loop start
1719
+ self.add_label(loop_start_label)
1720
+
1721
+ # Check if index < length
1722
+ self.track_vm_instruction()
1723
+ self.emit_opcode(Opcode.LT_R)
1724
+ self.emit_u8(cmp_reg)
1725
+ self.emit_u8(index_reg)
1726
+ self.emit_u8(length_reg)
1727
+
1728
+ # If not (index >= length), jump to end (not found)
1729
+ self.track_vm_instruction()
1730
+ self.emit_opcode(Opcode.JUMP_IF_NOT_R)
1731
+ self.emit_u8(cmp_reg)
1732
+ self.pending_jumps.append((len(self.bytecode), loop_end_label, len(self.instruction_offsets) - 1))
1733
+ self.emit_i32(0) # Placeholder
1734
+
1735
+ # Get element at current index
1736
+ self.track_vm_instruction()
1737
+ self.emit_opcode(Opcode.ARRAY_GET_R)
1738
+ self.emit_u8(element_reg)
1739
+ self.emit_u8(array)
1740
+ self.emit_u8(index_reg)
1741
+
1742
+ # Compare element with target value
1743
+ self.track_vm_instruction()
1744
+ self.emit_opcode(Opcode.EQ_R)
1745
+ self.emit_u8(cmp_reg)
1746
+ self.emit_u8(element_reg)
1747
+ self.emit_u8(value)
1748
+
1749
+ # If equal, jump to found
1750
+ self.track_vm_instruction()
1751
+ self.emit_opcode(Opcode.JUMP_IF_R)
1752
+ self.emit_u8(cmp_reg)
1753
+ self.pending_jumps.append((len(self.bytecode), found_label, len(self.instruction_offsets) - 1))
1754
+ self.emit_i32(0) # Placeholder
1755
+
1756
+ # Increment index
1757
+ const_one = self.add_constant(1)
1758
+ self.track_vm_instruction()
1759
+ self.emit_opcode(Opcode.LOAD_CONST_R)
1760
+ self.emit_u8(element_reg) # Reuse element_reg for constant 1
1761
+ self.emit_u16(const_one)
1762
+
1763
+ self.track_vm_instruction()
1764
+ self.emit_opcode(Opcode.ADD_R)
1765
+ self.emit_u8(index_reg)
1766
+ self.emit_u8(index_reg)
1767
+ self.emit_u8(element_reg)
1768
+
1769
+ # Jump back to loop start
1770
+ self.track_vm_instruction()
1771
+ self.emit_opcode(Opcode.JUMP_R)
1772
+ self.pending_jumps.append((len(self.bytecode), loop_start_label, len(self.instruction_offsets) - 1))
1773
+ self.emit_i32(0) # Placeholder
1774
+
1775
+ # Found label - copy index to dest
1776
+ self.add_label(found_label)
1777
+ self.track_vm_instruction()
1778
+ self.emit_opcode(Opcode.MOVE_R)
1779
+ self.emit_u8(dest)
1780
+ self.emit_u8(index_reg)
1781
+
1782
+ # Jump to end (skip not found case)
1783
+ end_jump_label = f"find_exit_{self.label_counter - 1}"
1784
+ self.track_vm_instruction()
1785
+ self.emit_opcode(Opcode.JUMP_R)
1786
+ self.pending_jumps.append((len(self.bytecode), end_jump_label, len(self.instruction_offsets) - 1))
1787
+ self.emit_i32(0) # Placeholder
1788
+
1789
+ # Not found - set dest to -1
1790
+ self.add_label(loop_end_label)
1791
+ const_neg_one = self.add_constant(-1)
1792
+ self.track_vm_instruction()
1793
+ self.emit_opcode(Opcode.LOAD_CONST_R)
1794
+ self.emit_u8(dest)
1795
+ self.emit_u16(const_neg_one)
1796
+
1797
+ # Exit label
1798
+ self.add_label(end_jump_label)
1799
+
1800
+ if self.debug:
1801
+ print(f" -> Generated ArrayFindIndex: r{dest} = find_index(r{array}, r{value}) using loop emulation")
1802
+
1803
+
1804
+ class MetadataCollector:
1805
+ """Collect metadata from MIR for the Rust VM.
1806
+
1807
+ This collects minimal metadata needed for:
1808
+ - Type information for registers
1809
+ - Symbol table for debugging
1810
+ - SSA phi node information
1811
+ - Basic block boundaries
1812
+ """
1813
+
1814
+ def __init__(self, debug_mode: bool = False) -> None:
1815
+ """Initialize the metadata collector.
1816
+
1817
+ Args:
1818
+ debug_mode: Whether to collect full debug metadata.
1819
+ """
1820
+ self.debug_mode = debug_mode
1821
+
1822
+ def collect(self, mir_module: MIRModule, allocation: RegisterAllocation) -> dict[str, Any]:
1823
+ """Collect metadata from MIR module.
1824
+
1825
+ Args:
1826
+ mir_module: MIR module to extract metadata from.
1827
+ allocation: Register allocation for the module.
1828
+
1829
+ Returns:
1830
+ Metadata object.
1831
+ """
1832
+ metadata: dict[str, Any] = {
1833
+ "version": 1,
1834
+ "metadata_level": "full" if self.debug_mode else "minimal",
1835
+ "functions": [],
1836
+ }
1837
+
1838
+ # Process each function
1839
+ for _name, func in mir_module.functions.items():
1840
+ func_metadata = self.collect_function_metadata(func, allocation)
1841
+ metadata["functions"].append(func_metadata)
1842
+
1843
+ return metadata
1844
+
1845
+ def collect_function_metadata(self, func: MIRFunction, allocation: RegisterAllocation) -> dict[str, Any]:
1846
+ """Collect metadata for a function.
1847
+
1848
+ Args:
1849
+ func: MIR function to extract metadata from.
1850
+ allocation: Register allocation for the function.
1851
+
1852
+ Returns:
1853
+ Function metadata dictionary.
1854
+ """
1855
+ func_metadata = {
1856
+ "name": func.name,
1857
+ "signature": {
1858
+ "param_types": [str(p.type) for p in func.params],
1859
+ "return_type": str(func.return_type) if func.return_type else "empty",
1860
+ },
1861
+ "register_types": self.extract_register_types(func, allocation),
1862
+ "basic_blocks": self.extract_basic_blocks(func),
1863
+ "phi_nodes": self.extract_phi_nodes(func, allocation),
1864
+ }
1865
+
1866
+ if self.debug_mode:
1867
+ # Add debug information
1868
+ func_metadata["variable_names"] = self.extract_variable_names(func, allocation)
1869
+ func_metadata["source_map"] = [] # TODO: Implement source mapping
1870
+
1871
+ return func_metadata
1872
+
1873
+ def extract_register_types(self, func: MIRFunction, allocation: RegisterAllocation) -> dict[str, str]:
1874
+ """Extract type information for registers.
1875
+
1876
+ Args:
1877
+ func: MIR function.
1878
+ allocation: Register allocation.
1879
+
1880
+ Returns:
1881
+ Mapping of register numbers to type names.
1882
+ """
1883
+ register_types = {}
1884
+
1885
+ for value, reg_num in allocation.value_to_register.items():
1886
+ if hasattr(value, "type"):
1887
+ register_types[f"r{reg_num}"] = str(value.type)
1888
+ else:
1889
+ register_types[f"r{reg_num}"] = "unknown"
1890
+
1891
+ return register_types
1892
+
1893
+ def extract_basic_blocks(self, func: MIRFunction) -> list[dict[str, Any]]:
1894
+ """Extract basic block information.
1895
+
1896
+ Args:
1897
+ func: MIR function.
1898
+
1899
+ Returns:
1900
+ List of basic block metadata.
1901
+ """
1902
+ blocks = []
1903
+ offset = 0
1904
+
1905
+ for block_name in func.cfg.blocks:
1906
+ block = func.cfg.blocks[block_name]
1907
+ block_info = {
1908
+ "label": block.label,
1909
+ "start_offset": offset,
1910
+ "end_offset": offset + len(block.instructions),
1911
+ }
1912
+ blocks.append(block_info)
1913
+ offset += len(block.instructions)
1914
+ return blocks
1915
+
1916
+ def extract_phi_nodes(self, func: MIRFunction, allocation: RegisterAllocation) -> list[dict[str, Any]]:
1917
+ """Extract phi node information.
1918
+
1919
+ Args:
1920
+ func: MIR function.
1921
+ allocation: Register allocation.
1922
+
1923
+ Returns:
1924
+ List of phi node metadata.
1925
+ """
1926
+ phi_nodes = []
1927
+
1928
+ for block_name in func.cfg.blocks:
1929
+ block = func.cfg.blocks[block_name]
1930
+ for inst in block.instructions:
1931
+ if isinstance(inst, Phi):
1932
+ dest_reg = allocation.value_to_register.get(inst.dest, -1)
1933
+ sources = []
1934
+ for value, label in inst.sources: # type: ignore[attr-defined]
1935
+ src_reg = allocation.value_to_register.get(value, -1)
1936
+ sources.append(
1937
+ {
1938
+ "register": f"r{src_reg}",
1939
+ "block": label,
1940
+ }
1941
+ )
1942
+
1943
+ phi_nodes.append(
1944
+ {
1945
+ "block": block.label,
1946
+ "register": f"r{dest_reg}",
1947
+ "sources": sources,
1948
+ }
1949
+ )
1950
+
1951
+ return phi_nodes
1952
+
1953
+ def extract_variable_names(self, func: MIRFunction, allocation: RegisterAllocation) -> dict[str, str]:
1954
+ """Extract variable names for debugging.
1955
+
1956
+ Args:
1957
+ func: MIR function.
1958
+ allocation: Register allocation.
1959
+
1960
+ Returns:
1961
+ Mapping of register numbers to variable names.
1962
+ """
1963
+ var_names = {}
1964
+
1965
+ for value, reg_num in allocation.value_to_register.items():
1966
+ if isinstance(value, Variable):
1967
+ var_names[f"r{reg_num}"] = value.name
1968
+
1969
+ return var_names
1970
+
1971
+
1972
+ def generate_bytecode_from_mir(
1973
+ mir_module: MIRModule, debug: bool = False
1974
+ ) -> tuple[BytecodeModule, dict[str, Any] | None]:
1975
+ """Generate bytecode and metadata from MIR module.
1976
+
1977
+ This is the main entry point for bytecode generation.
1978
+
1979
+ Args:
1980
+ mir_module: MIR module to generate bytecode from.
1981
+ debug: Enable debug output for bytecode generation.
1982
+
1983
+ Returns:
1984
+ Tuple of (bytecode module, metadata).
1985
+ """
1986
+ generator = RegisterBytecodeGenerator(debug=debug)
1987
+ bytecode = generator.generate(mir_module)
1988
+
1989
+ # Collect metadata
1990
+ if generator.allocation is not None:
1991
+ collector = MetadataCollector(debug_mode=False)
1992
+ metadata = collector.collect(mir_module, generator.allocation)
1993
+ else:
1994
+ metadata = None
1995
+
1996
+ return bytecode, metadata