machine-dialect 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- machine_dialect/__main__.py +667 -0
- machine_dialect/agent/__init__.py +5 -0
- machine_dialect/agent/agent.py +360 -0
- machine_dialect/ast/__init__.py +95 -0
- machine_dialect/ast/ast_node.py +35 -0
- machine_dialect/ast/call_expression.py +82 -0
- machine_dialect/ast/dict_extraction.py +60 -0
- machine_dialect/ast/expressions.py +439 -0
- machine_dialect/ast/literals.py +309 -0
- machine_dialect/ast/program.py +35 -0
- machine_dialect/ast/statements.py +1433 -0
- machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
- machine_dialect/ast/tests/test_boolean_literal.py +29 -0
- machine_dialect/ast/tests/test_collection_hir.py +138 -0
- machine_dialect/ast/tests/test_define_statement.py +142 -0
- machine_dialect/ast/tests/test_desugar.py +541 -0
- machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
- machine_dialect/cfg/__init__.py +6 -0
- machine_dialect/cfg/config.py +156 -0
- machine_dialect/cfg/examples.py +221 -0
- machine_dialect/cfg/generate_with_ai.py +187 -0
- machine_dialect/cfg/openai_generation.py +200 -0
- machine_dialect/cfg/parser.py +94 -0
- machine_dialect/cfg/tests/__init__.py +1 -0
- machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
- machine_dialect/cfg/tests/test_config.py +188 -0
- machine_dialect/cfg/tests/test_examples.py +391 -0
- machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
- machine_dialect/cfg/tests/test_openai_generation.py +256 -0
- machine_dialect/codegen/__init__.py +5 -0
- machine_dialect/codegen/bytecode_module.py +89 -0
- machine_dialect/codegen/bytecode_serializer.py +300 -0
- machine_dialect/codegen/opcodes.py +101 -0
- machine_dialect/codegen/register_codegen.py +1996 -0
- machine_dialect/codegen/symtab.py +208 -0
- machine_dialect/codegen/tests/__init__.py +1 -0
- machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
- machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
- machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
- machine_dialect/codegen/tests/test_symtab.py +418 -0
- machine_dialect/codegen/vm_serializer.py +621 -0
- machine_dialect/compiler/__init__.py +18 -0
- machine_dialect/compiler/compiler.py +197 -0
- machine_dialect/compiler/config.py +149 -0
- machine_dialect/compiler/context.py +149 -0
- machine_dialect/compiler/phases/__init__.py +19 -0
- machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
- machine_dialect/compiler/phases/codegen.py +40 -0
- machine_dialect/compiler/phases/hir_generation.py +39 -0
- machine_dialect/compiler/phases/mir_generation.py +86 -0
- machine_dialect/compiler/phases/optimization.py +110 -0
- machine_dialect/compiler/phases/parsing.py +39 -0
- machine_dialect/compiler/pipeline.py +143 -0
- machine_dialect/compiler/tests/__init__.py +1 -0
- machine_dialect/compiler/tests/test_compiler.py +568 -0
- machine_dialect/compiler/vm_runner.py +173 -0
- machine_dialect/errors/__init__.py +32 -0
- machine_dialect/errors/exceptions.py +369 -0
- machine_dialect/errors/messages.py +82 -0
- machine_dialect/errors/tests/__init__.py +0 -0
- machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
- machine_dialect/errors/tests/test_name_errors.py +118 -0
- machine_dialect/helpers/__init__.py +0 -0
- machine_dialect/helpers/stopwords.py +225 -0
- machine_dialect/helpers/validators.py +30 -0
- machine_dialect/lexer/__init__.py +9 -0
- machine_dialect/lexer/constants.py +23 -0
- machine_dialect/lexer/lexer.py +907 -0
- machine_dialect/lexer/tests/__init__.py +0 -0
- machine_dialect/lexer/tests/helpers.py +86 -0
- machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
- machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
- machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
- machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
- machine_dialect/lexer/tests/test_comments.py +200 -0
- machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
- machine_dialect/lexer/tests/test_lexer_position.py +113 -0
- machine_dialect/lexer/tests/test_list_tokens.py +282 -0
- machine_dialect/lexer/tests/test_stopwords.py +80 -0
- machine_dialect/lexer/tests/test_strict_equality.py +129 -0
- machine_dialect/lexer/tests/test_token.py +41 -0
- machine_dialect/lexer/tests/test_tokenization.py +294 -0
- machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
- machine_dialect/lexer/tests/test_url_literals.py +169 -0
- machine_dialect/lexer/tokens.py +487 -0
- machine_dialect/linter/__init__.py +10 -0
- machine_dialect/linter/__main__.py +144 -0
- machine_dialect/linter/linter.py +154 -0
- machine_dialect/linter/rules/__init__.py +8 -0
- machine_dialect/linter/rules/base.py +112 -0
- machine_dialect/linter/rules/statement_termination.py +99 -0
- machine_dialect/linter/tests/__init__.py +1 -0
- machine_dialect/linter/tests/mdrules/__init__.py +0 -0
- machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
- machine_dialect/linter/tests/test_linter.py +81 -0
- machine_dialect/linter/tests/test_rules.py +110 -0
- machine_dialect/linter/tests/test_violations.py +71 -0
- machine_dialect/linter/violations.py +51 -0
- machine_dialect/mir/__init__.py +69 -0
- machine_dialect/mir/analyses/__init__.py +20 -0
- machine_dialect/mir/analyses/alias_analysis.py +315 -0
- machine_dialect/mir/analyses/dominance_analysis.py +49 -0
- machine_dialect/mir/analyses/escape_analysis.py +286 -0
- machine_dialect/mir/analyses/loop_analysis.py +272 -0
- machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
- machine_dialect/mir/analyses/type_analysis.py +448 -0
- machine_dialect/mir/analyses/use_def_chains.py +232 -0
- machine_dialect/mir/basic_block.py +385 -0
- machine_dialect/mir/dataflow.py +445 -0
- machine_dialect/mir/debug_info.py +208 -0
- machine_dialect/mir/hir_to_mir.py +1738 -0
- machine_dialect/mir/mir_dumper.py +366 -0
- machine_dialect/mir/mir_function.py +167 -0
- machine_dialect/mir/mir_instructions.py +1877 -0
- machine_dialect/mir/mir_interpreter.py +556 -0
- machine_dialect/mir/mir_module.py +225 -0
- machine_dialect/mir/mir_printer.py +480 -0
- machine_dialect/mir/mir_transformer.py +410 -0
- machine_dialect/mir/mir_types.py +367 -0
- machine_dialect/mir/mir_validation.py +455 -0
- machine_dialect/mir/mir_values.py +268 -0
- machine_dialect/mir/optimization_config.py +233 -0
- machine_dialect/mir/optimization_pass.py +251 -0
- machine_dialect/mir/optimization_pipeline.py +355 -0
- machine_dialect/mir/optimizations/__init__.py +84 -0
- machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
- machine_dialect/mir/optimizations/branch_prediction.py +372 -0
- machine_dialect/mir/optimizations/constant_propagation.py +634 -0
- machine_dialect/mir/optimizations/cse.py +398 -0
- machine_dialect/mir/optimizations/dce.py +288 -0
- machine_dialect/mir/optimizations/inlining.py +551 -0
- machine_dialect/mir/optimizations/jump_threading.py +487 -0
- machine_dialect/mir/optimizations/licm.py +405 -0
- machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
- machine_dialect/mir/optimizations/strength_reduction.py +422 -0
- machine_dialect/mir/optimizations/tail_call.py +207 -0
- machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
- machine_dialect/mir/optimizations/type_narrowing.py +397 -0
- machine_dialect/mir/optimizations/type_specialization.py +447 -0
- machine_dialect/mir/optimizations/type_specific.py +906 -0
- machine_dialect/mir/optimize_mir.py +89 -0
- machine_dialect/mir/pass_manager.py +391 -0
- machine_dialect/mir/profiling/__init__.py +26 -0
- machine_dialect/mir/profiling/profile_collector.py +318 -0
- machine_dialect/mir/profiling/profile_data.py +372 -0
- machine_dialect/mir/profiling/profile_reader.py +272 -0
- machine_dialect/mir/profiling/profile_writer.py +226 -0
- machine_dialect/mir/register_allocation.py +302 -0
- machine_dialect/mir/reporting/__init__.py +17 -0
- machine_dialect/mir/reporting/optimization_reporter.py +314 -0
- machine_dialect/mir/reporting/report_formatter.py +289 -0
- machine_dialect/mir/ssa_construction.py +342 -0
- machine_dialect/mir/tests/__init__.py +1 -0
- machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
- machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
- machine_dialect/mir/tests/test_algebraic_division.py +126 -0
- machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
- machine_dialect/mir/tests/test_basic_block.py +425 -0
- machine_dialect/mir/tests/test_branch_prediction.py +459 -0
- machine_dialect/mir/tests/test_call_lowering.py +168 -0
- machine_dialect/mir/tests/test_collection_lowering.py +604 -0
- machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
- machine_dialect/mir/tests/test_custom_passes.py +166 -0
- machine_dialect/mir/tests/test_debug_info.py +285 -0
- machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
- machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
- machine_dialect/mir/tests/test_double_negation.py +231 -0
- machine_dialect/mir/tests/test_escape_analysis.py +233 -0
- machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
- machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
- machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
- machine_dialect/mir/tests/test_inlining.py +435 -0
- machine_dialect/mir/tests/test_licm.py +472 -0
- machine_dialect/mir/tests/test_mir_dumper.py +313 -0
- machine_dialect/mir/tests/test_mir_instructions.py +445 -0
- machine_dialect/mir/tests/test_mir_module.py +860 -0
- machine_dialect/mir/tests/test_mir_printer.py +387 -0
- machine_dialect/mir/tests/test_mir_types.py +123 -0
- machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
- machine_dialect/mir/tests/test_mir_validation.py +378 -0
- machine_dialect/mir/tests/test_mir_values.py +168 -0
- machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
- machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
- machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
- machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
- machine_dialect/mir/tests/test_pass_manager.py +294 -0
- machine_dialect/mir/tests/test_pass_registration.py +64 -0
- machine_dialect/mir/tests/test_profiling.py +356 -0
- machine_dialect/mir/tests/test_register_allocation.py +307 -0
- machine_dialect/mir/tests/test_report_formatters.py +372 -0
- machine_dialect/mir/tests/test_ssa_construction.py +433 -0
- machine_dialect/mir/tests/test_tail_call.py +236 -0
- machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
- machine_dialect/mir/tests/test_type_narrowing.py +277 -0
- machine_dialect/mir/tests/test_type_specialization.py +421 -0
- machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
- machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
- machine_dialect/mir/type_inference.py +368 -0
- machine_dialect/parser/__init__.py +12 -0
- machine_dialect/parser/enums.py +45 -0
- machine_dialect/parser/parser.py +3655 -0
- machine_dialect/parser/protocols.py +11 -0
- machine_dialect/parser/symbol_table.py +169 -0
- machine_dialect/parser/tests/__init__.py +0 -0
- machine_dialect/parser/tests/helper_functions.py +193 -0
- machine_dialect/parser/tests/test_action_statements.py +334 -0
- machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
- machine_dialect/parser/tests/test_call_statements.py +154 -0
- machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
- machine_dialect/parser/tests/test_collection_mutations.py +264 -0
- machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
- machine_dialect/parser/tests/test_define_integration.py +468 -0
- machine_dialect/parser/tests/test_define_statements.py +311 -0
- machine_dialect/parser/tests/test_dict_extraction.py +115 -0
- machine_dialect/parser/tests/test_empty_literal.py +155 -0
- machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
- machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
- machine_dialect/parser/tests/test_if_empty_block.py +61 -0
- machine_dialect/parser/tests/test_if_statements.py +299 -0
- machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
- machine_dialect/parser/tests/test_infix_expressions.py +680 -0
- machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
- machine_dialect/parser/tests/test_interaction_statements.py +269 -0
- machine_dialect/parser/tests/test_list_literals.py +277 -0
- machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
- machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
- machine_dialect/parser/tests/test_parse_errors.py +114 -0
- machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
- machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
- machine_dialect/parser/tests/test_program.py +13 -0
- machine_dialect/parser/tests/test_return_statements.py +89 -0
- machine_dialect/parser/tests/test_set_statements.py +152 -0
- machine_dialect/parser/tests/test_strict_equality.py +258 -0
- machine_dialect/parser/tests/test_symbol_table.py +217 -0
- machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
- machine_dialect/parser/tests/test_utility_statements.py +423 -0
- machine_dialect/parser/token_buffer.py +159 -0
- machine_dialect/repl/__init__.py +3 -0
- machine_dialect/repl/repl.py +426 -0
- machine_dialect/repl/tests/__init__.py +0 -0
- machine_dialect/repl/tests/test_repl.py +606 -0
- machine_dialect/semantic/__init__.py +12 -0
- machine_dialect/semantic/analyzer.py +906 -0
- machine_dialect/semantic/error_messages.py +189 -0
- machine_dialect/semantic/tests/__init__.py +1 -0
- machine_dialect/semantic/tests/test_analyzer.py +364 -0
- machine_dialect/semantic/tests/test_error_messages.py +104 -0
- machine_dialect/tests/edge_cases/__init__.py +10 -0
- machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
- machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
- machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
- machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
- machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
- machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
- machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
- machine_dialect/tests/integration/test_list_compilation.py +395 -0
- machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
- machine_dialect/type_checking/__init__.py +21 -0
- machine_dialect/type_checking/tests/__init__.py +1 -0
- machine_dialect/type_checking/tests/test_type_system.py +230 -0
- machine_dialect/type_checking/type_system.py +270 -0
- machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
- machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
- machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
- machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
- machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
- machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
- machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,1996 @@
|
|
1
|
+
"""Register-based bytecode generator for the Rust VM.
|
2
|
+
|
3
|
+
This module generates register-based bytecode from MIR for the new Rust VM.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
import struct
|
9
|
+
from dataclasses import dataclass, field
|
10
|
+
from typing import Any
|
11
|
+
|
12
|
+
from machine_dialect.codegen.bytecode_module import BytecodeModule, Chunk, ChunkType, ConstantTag
|
13
|
+
from machine_dialect.codegen.opcodes import Opcode
|
14
|
+
|
15
|
+
# Note: ConstantPool is now just a list of (tag, value) tuples
|
16
|
+
from machine_dialect.mir.mir_function import MIRFunction
|
17
|
+
from machine_dialect.mir.mir_instructions import (
|
18
|
+
ArrayAppend,
|
19
|
+
ArrayClear,
|
20
|
+
ArrayCreate,
|
21
|
+
ArrayFindIndex,
|
22
|
+
ArrayGet,
|
23
|
+
ArrayInsert,
|
24
|
+
ArrayLength,
|
25
|
+
ArrayRemove,
|
26
|
+
ArraySet,
|
27
|
+
Assert,
|
28
|
+
BinaryOp,
|
29
|
+
Call,
|
30
|
+
ConditionalJump,
|
31
|
+
Copy,
|
32
|
+
DictClear,
|
33
|
+
DictContains,
|
34
|
+
DictCreate,
|
35
|
+
DictGet,
|
36
|
+
DictKeys,
|
37
|
+
DictRemove,
|
38
|
+
DictSet,
|
39
|
+
DictValues,
|
40
|
+
Jump,
|
41
|
+
LoadConst,
|
42
|
+
LoadVar,
|
43
|
+
MIRInstruction,
|
44
|
+
Nop,
|
45
|
+
Phi,
|
46
|
+
Print,
|
47
|
+
Return,
|
48
|
+
Scope,
|
49
|
+
StoreVar,
|
50
|
+
UnaryOp,
|
51
|
+
)
|
52
|
+
from machine_dialect.mir.mir_module import MIRModule
|
53
|
+
from machine_dialect.mir.mir_values import Constant, MIRValue, Variable
|
54
|
+
|
55
|
+
|
56
|
+
@dataclass
|
57
|
+
class RegisterAllocation:
|
58
|
+
"""Register allocation for a function."""
|
59
|
+
|
60
|
+
# Map from MIR values to register numbers
|
61
|
+
value_to_register: dict[MIRValue, int] = field(default_factory=dict)
|
62
|
+
# Next available register
|
63
|
+
next_register: int = 0
|
64
|
+
# Maximum registers used
|
65
|
+
max_registers: int = 256
|
66
|
+
|
67
|
+
|
68
|
+
class RegisterAllocator:
|
69
|
+
"""Allocates registers for MIR values."""
|
70
|
+
|
71
|
+
def _is_global_variable(self, var: MIRValue, func: MIRFunction) -> bool:
|
72
|
+
"""Check if a variable is a global variable.
|
73
|
+
|
74
|
+
Global variables are Variables with version 0 that are NOT function parameters
|
75
|
+
or function-local variables. Function parameters and locals are allocated to registers.
|
76
|
+
|
77
|
+
Args:
|
78
|
+
var: The MIR value to check.
|
79
|
+
func: The current function.
|
80
|
+
|
81
|
+
Returns:
|
82
|
+
True if the variable is a global variable.
|
83
|
+
"""
|
84
|
+
from machine_dialect.mir.mir_values import ScopedVariable, VariableScope
|
85
|
+
|
86
|
+
# Check if it's a ScopedVariable with explicit scope
|
87
|
+
if isinstance(var, ScopedVariable):
|
88
|
+
return var.scope == VariableScope.GLOBAL
|
89
|
+
|
90
|
+
if not isinstance(var, Variable) or var.version != 0:
|
91
|
+
return False
|
92
|
+
|
93
|
+
# Check if it's a function parameter by name (not object identity)
|
94
|
+
for param in func.params:
|
95
|
+
if param.name == var.name:
|
96
|
+
return False
|
97
|
+
|
98
|
+
# Check if it's a function-local variable by name
|
99
|
+
if var.name in func.locals:
|
100
|
+
return False
|
101
|
+
|
102
|
+
return True
|
103
|
+
|
104
|
+
def allocate_function(self, func: MIRFunction) -> RegisterAllocation:
|
105
|
+
"""Allocate registers for a function.
|
106
|
+
|
107
|
+
Args:
|
108
|
+
func: MIR function to allocate registers for.
|
109
|
+
|
110
|
+
Returns:
|
111
|
+
Register allocation.
|
112
|
+
"""
|
113
|
+
allocation = RegisterAllocation()
|
114
|
+
|
115
|
+
# Allocate registers for parameters
|
116
|
+
for param in func.params:
|
117
|
+
self.allocate_register(param, allocation)
|
118
|
+
|
119
|
+
# Allocate registers for all instructions
|
120
|
+
for block_name in func.cfg.blocks:
|
121
|
+
block = func.cfg.blocks[block_name]
|
122
|
+
for inst in block.instructions:
|
123
|
+
# Allocate for definitions
|
124
|
+
for value in inst.get_defs():
|
125
|
+
if value not in allocation.value_to_register:
|
126
|
+
# Skip global variables (Variables with version=0 that are not parameters)
|
127
|
+
if self._is_global_variable(value, func):
|
128
|
+
continue # Skip global variables
|
129
|
+
self.allocate_register(value, allocation)
|
130
|
+
|
131
|
+
# Ensure uses are allocated
|
132
|
+
for value in inst.get_uses():
|
133
|
+
if value not in allocation.value_to_register:
|
134
|
+
if not isinstance(value, Constant):
|
135
|
+
# Skip global variables (Variables with version=0 that are not parameters)
|
136
|
+
if self._is_global_variable(value, func):
|
137
|
+
continue # Skip global variables
|
138
|
+
self.allocate_register(value, allocation)
|
139
|
+
|
140
|
+
return allocation
|
141
|
+
|
142
|
+
def allocate_register(self, value: MIRValue, allocation: RegisterAllocation) -> int:
|
143
|
+
"""Allocate a register for a value.
|
144
|
+
|
145
|
+
Args:
|
146
|
+
value: Value to allocate register for.
|
147
|
+
allocation: Current allocation state.
|
148
|
+
|
149
|
+
Returns:
|
150
|
+
Allocated register number.
|
151
|
+
"""
|
152
|
+
if value in allocation.value_to_register:
|
153
|
+
return allocation.value_to_register[value]
|
154
|
+
|
155
|
+
if allocation.next_register >= allocation.max_registers:
|
156
|
+
raise RuntimeError(f"Out of registers (max {allocation.max_registers})")
|
157
|
+
|
158
|
+
reg = allocation.next_register
|
159
|
+
allocation.value_to_register[value] = reg
|
160
|
+
allocation.next_register += 1
|
161
|
+
return reg
|
162
|
+
|
163
|
+
|
164
|
+
class RegisterBytecodeGenerator:
|
165
|
+
"""Generate register-based bytecode from MIR."""
|
166
|
+
|
167
|
+
def __init__(self, debug: bool = False) -> None:
|
168
|
+
"""Initialize the generator.
|
169
|
+
|
170
|
+
Args:
|
171
|
+
debug: Enable debug output for bytecode generation.
|
172
|
+
"""
|
173
|
+
self.allocator = RegisterAllocator()
|
174
|
+
self.constants: list[tuple[ConstantTag, Any]] = []
|
175
|
+
self.bytecode: bytearray = bytearray()
|
176
|
+
self.allocation: RegisterAllocation | None = None
|
177
|
+
# Map from basic block labels to instruction indices (not byte offsets)
|
178
|
+
self.block_offsets: dict[str, int] = {}
|
179
|
+
# Map from instruction index to byte offset
|
180
|
+
self.instruction_offsets: list[int] = []
|
181
|
+
# Pending jumps to resolve: (byte_pos, target_label, source_inst_idx)
|
182
|
+
self.pending_jumps: list[tuple[int, str, int]] = []
|
183
|
+
self.debug = debug
|
184
|
+
self.current_function: MIRFunction | None = None
|
185
|
+
# Label counter for generating unique labels
|
186
|
+
self.label_counter = 0
|
187
|
+
|
188
|
+
@staticmethod
|
189
|
+
def is_ssa_variable(var: MIRValue) -> bool:
|
190
|
+
"""Check if a variable is an SSA-renamed variable.
|
191
|
+
|
192
|
+
SSA variables have version > 0, indicating they've been
|
193
|
+
renamed during SSA construction. Non-SSA variables (globals,
|
194
|
+
original parameters) have version 0.
|
195
|
+
|
196
|
+
Args:
|
197
|
+
var: The MIR value to check.
|
198
|
+
|
199
|
+
Returns:
|
200
|
+
True if the variable is an SSA-renamed variable.
|
201
|
+
"""
|
202
|
+
return isinstance(var, Variable) and var.version > 0
|
203
|
+
|
204
|
+
def is_global_variable(self, var: MIRValue) -> bool:
|
205
|
+
"""Check if a variable is a global variable.
|
206
|
+
|
207
|
+
Global variables are Variables with version 0 that are NOT function parameters
|
208
|
+
or function-local variables. Function parameters and locals are allocated to registers.
|
209
|
+
|
210
|
+
Args:
|
211
|
+
var: The MIR value to check.
|
212
|
+
|
213
|
+
Returns:
|
214
|
+
True if the variable is a global variable.
|
215
|
+
"""
|
216
|
+
from machine_dialect.mir.mir_values import ScopedVariable, VariableScope
|
217
|
+
|
218
|
+
# Check if it's a ScopedVariable with explicit scope
|
219
|
+
if isinstance(var, ScopedVariable):
|
220
|
+
return var.scope == VariableScope.GLOBAL
|
221
|
+
|
222
|
+
if not isinstance(var, Variable) or var.version != 0:
|
223
|
+
return False
|
224
|
+
|
225
|
+
# Check if it's a function parameter by name (not object identity)
|
226
|
+
if self.current_function:
|
227
|
+
for param in self.current_function.params:
|
228
|
+
if param.name == var.name:
|
229
|
+
return False
|
230
|
+
|
231
|
+
# Check if it's a function-local variable by name
|
232
|
+
if var.name in self.current_function.locals:
|
233
|
+
return False
|
234
|
+
|
235
|
+
return True
|
236
|
+
|
237
|
+
def generate(self, mir_module: MIRModule) -> BytecodeModule:
|
238
|
+
"""Generate bytecode module from MIR.
|
239
|
+
|
240
|
+
Args:
|
241
|
+
mir_module: MIR module to generate bytecode from.
|
242
|
+
|
243
|
+
Returns:
|
244
|
+
Bytecode module.
|
245
|
+
"""
|
246
|
+
module = BytecodeModule()
|
247
|
+
|
248
|
+
# Process main function
|
249
|
+
if main_func := mir_module.get_function("__main__"):
|
250
|
+
chunk = self.generate_function(main_func)
|
251
|
+
module.chunks.append(chunk)
|
252
|
+
|
253
|
+
# Process other functions
|
254
|
+
for name, func in mir_module.functions.items():
|
255
|
+
if name != "__main__":
|
256
|
+
chunk = self.generate_function(func)
|
257
|
+
module.add_chunk(chunk)
|
258
|
+
|
259
|
+
return module
|
260
|
+
|
261
|
+
def generate_function(self, func: MIRFunction) -> Chunk:
|
262
|
+
"""Generate bytecode chunk for a function.
|
263
|
+
|
264
|
+
Args:
|
265
|
+
func: MIR function to generate bytecode for.
|
266
|
+
|
267
|
+
Returns:
|
268
|
+
Bytecode chunk.
|
269
|
+
"""
|
270
|
+
# Reset state
|
271
|
+
self.bytecode = bytearray()
|
272
|
+
self.constants = []
|
273
|
+
self.block_offsets = {} # Will store instruction indices
|
274
|
+
self.instruction_offsets = [] # Track byte offset of each instruction
|
275
|
+
self.pending_jumps = []
|
276
|
+
self.current_function = func
|
277
|
+
|
278
|
+
# Allocate registers
|
279
|
+
self.allocation = self.allocator.allocate_function(func)
|
280
|
+
|
281
|
+
# Debug output for register allocation
|
282
|
+
if self.debug:
|
283
|
+
print(f"\nDEBUG Function {func.name}:")
|
284
|
+
print(f" Parameters: {[p.name for p in func.params]}")
|
285
|
+
for param in func.params:
|
286
|
+
if param in self.allocation.value_to_register:
|
287
|
+
print(f" {param.name} -> r{self.allocation.value_to_register[param]}")
|
288
|
+
else:
|
289
|
+
print(f" {param.name} -> NOT ALLOCATED!")
|
290
|
+
|
291
|
+
# Generate code for each block in topological order
|
292
|
+
blocks_in_order = func.cfg.topological_sort()
|
293
|
+
for block in blocks_in_order:
|
294
|
+
# Record block offset in instruction count
|
295
|
+
self.block_offsets[block.label] = len(self.instruction_offsets)
|
296
|
+
# Generate instructions
|
297
|
+
for inst in block.instructions:
|
298
|
+
# Note: Each generate_* method is responsible for tracking
|
299
|
+
# the VM instructions it generates using track_vm_instruction()
|
300
|
+
self.generate_instruction(inst)
|
301
|
+
|
302
|
+
# Resolve pending jumps
|
303
|
+
self.resolve_jumps()
|
304
|
+
|
305
|
+
# Create chunk
|
306
|
+
chunk = Chunk(
|
307
|
+
name=func.name,
|
308
|
+
chunk_type=ChunkType.FUNCTION if func.name != "__main__" else ChunkType.MAIN,
|
309
|
+
bytecode=self.bytecode,
|
310
|
+
constants=self.constants,
|
311
|
+
num_locals=self.allocation.next_register,
|
312
|
+
num_params=len(func.params),
|
313
|
+
)
|
314
|
+
|
315
|
+
return chunk
|
316
|
+
|
317
|
+
def generate_instruction(self, inst: MIRInstruction) -> None:
|
318
|
+
"""Generate bytecode for a MIR instruction.
|
319
|
+
|
320
|
+
Args:
|
321
|
+
inst: MIR instruction to generate bytecode for.
|
322
|
+
"""
|
323
|
+
if isinstance(inst, LoadConst):
|
324
|
+
self.generate_load_const(inst)
|
325
|
+
elif isinstance(inst, Copy):
|
326
|
+
self.generate_copy(inst)
|
327
|
+
elif isinstance(inst, LoadVar):
|
328
|
+
self.generate_load_var(inst)
|
329
|
+
elif isinstance(inst, StoreVar):
|
330
|
+
self.generate_store_var(inst)
|
331
|
+
elif isinstance(inst, BinaryOp):
|
332
|
+
self.generate_binary_op(inst)
|
333
|
+
elif isinstance(inst, UnaryOp):
|
334
|
+
self.generate_unary_op(inst)
|
335
|
+
elif isinstance(inst, Jump):
|
336
|
+
self.generate_jump(inst)
|
337
|
+
elif isinstance(inst, ConditionalJump):
|
338
|
+
self.generate_conditional_jump(inst)
|
339
|
+
elif isinstance(inst, Call):
|
340
|
+
self.generate_call(inst)
|
341
|
+
elif isinstance(inst, Return):
|
342
|
+
self.generate_return(inst)
|
343
|
+
elif isinstance(inst, Phi):
|
344
|
+
self.generate_phi(inst)
|
345
|
+
elif isinstance(inst, Assert):
|
346
|
+
self.generate_assert(inst)
|
347
|
+
elif isinstance(inst, ArrayCreate):
|
348
|
+
self.generate_array_create(inst)
|
349
|
+
elif isinstance(inst, ArrayGet):
|
350
|
+
self.generate_array_get(inst)
|
351
|
+
elif isinstance(inst, ArraySet):
|
352
|
+
self.generate_array_set(inst)
|
353
|
+
elif isinstance(inst, ArrayLength):
|
354
|
+
self.generate_array_length(inst)
|
355
|
+
elif isinstance(inst, ArrayAppend):
|
356
|
+
self.generate_array_append(inst)
|
357
|
+
elif isinstance(inst, ArrayRemove):
|
358
|
+
self.generate_array_remove(inst)
|
359
|
+
elif isinstance(inst, ArrayInsert):
|
360
|
+
self.generate_array_insert(inst)
|
361
|
+
elif isinstance(inst, ArrayClear):
|
362
|
+
self.generate_array_clear(inst)
|
363
|
+
elif isinstance(inst, ArrayFindIndex):
|
364
|
+
self.generate_array_find_index(inst)
|
365
|
+
elif isinstance(inst, DictCreate):
|
366
|
+
self.generate_dict_create(inst)
|
367
|
+
elif isinstance(inst, DictGet):
|
368
|
+
self.generate_dict_get(inst)
|
369
|
+
elif isinstance(inst, DictSet):
|
370
|
+
self.generate_dict_set(inst)
|
371
|
+
elif isinstance(inst, DictRemove):
|
372
|
+
self.generate_dict_remove(inst)
|
373
|
+
elif isinstance(inst, DictContains):
|
374
|
+
self.generate_dict_contains(inst)
|
375
|
+
elif isinstance(inst, DictKeys):
|
376
|
+
self.generate_dict_keys(inst)
|
377
|
+
elif isinstance(inst, DictValues):
|
378
|
+
self.generate_dict_values(inst)
|
379
|
+
elif isinstance(inst, DictClear):
|
380
|
+
self.generate_dict_clear(inst)
|
381
|
+
elif isinstance(inst, Scope):
|
382
|
+
self.generate_scope(inst)
|
383
|
+
elif isinstance(inst, Print):
|
384
|
+
self.generate_print(inst)
|
385
|
+
elif isinstance(inst, Nop):
|
386
|
+
pass # No operation
|
387
|
+
|
388
|
+
def generate_load_const(self, inst: LoadConst) -> None:
|
389
|
+
"""Generate LoadConstR instruction."""
|
390
|
+
dst = self.get_register(inst.dest)
|
391
|
+
# Extract the actual value from the Constant object
|
392
|
+
if hasattr(inst.constant, "value"):
|
393
|
+
const_value = inst.constant.value
|
394
|
+
else:
|
395
|
+
const_value = inst.constant
|
396
|
+
const_idx = self.add_constant(const_value)
|
397
|
+
self.track_vm_instruction()
|
398
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
399
|
+
self.emit_u8(dst)
|
400
|
+
self.emit_u16(const_idx)
|
401
|
+
|
402
|
+
def generate_copy(self, inst: Copy) -> None:
|
403
|
+
"""Generate MoveR or LoadGlobalR instruction based on source type.
|
404
|
+
|
405
|
+
This method handles both SSA-renamed variables (version > 0) and
|
406
|
+
regular variables (version = 0). SSA variables should always be
|
407
|
+
allocated to registers during the allocation phase, while regular
|
408
|
+
variables may be globals that need to be loaded by name.
|
409
|
+
"""
|
410
|
+
dst = self.get_register(inst.dest)
|
411
|
+
|
412
|
+
# Debug output
|
413
|
+
if self.debug:
|
414
|
+
print(f"DEBUG Copy: source={inst.source}, dest={inst.dest}")
|
415
|
+
if isinstance(inst.source, Variable):
|
416
|
+
print(f" source is Variable, name={inst.source.name}, version={inst.source.version}")
|
417
|
+
if self.allocation:
|
418
|
+
print(f" in allocation? {inst.source in self.allocation.value_to_register}")
|
419
|
+
|
420
|
+
# Handle ScopedVariable parameters
|
421
|
+
from machine_dialect.mir.mir_values import ScopedVariable, VariableScope
|
422
|
+
|
423
|
+
if isinstance(inst.source, ScopedVariable) and inst.source.scope == VariableScope.PARAMETER:
|
424
|
+
# This is a parameter reference - it might be the same object or a different one
|
425
|
+
# First check if the ScopedVariable itself is allocated
|
426
|
+
if self.allocation and inst.source in self.allocation.value_to_register:
|
427
|
+
src = self.allocation.value_to_register[inst.source]
|
428
|
+
self.track_vm_instruction()
|
429
|
+
self.emit_opcode(Opcode.MOVE_R)
|
430
|
+
self.emit_u8(dst)
|
431
|
+
self.emit_u8(src)
|
432
|
+
if self.debug:
|
433
|
+
print(f" -> Generated MoveR from r{src} (param {inst.source.name} direct) to r{dst}")
|
434
|
+
return
|
435
|
+
# Otherwise look for the parameter by name in the function
|
436
|
+
elif self.current_function:
|
437
|
+
for param in self.current_function.params:
|
438
|
+
if param.name == inst.source.name:
|
439
|
+
if self.allocation and param in self.allocation.value_to_register:
|
440
|
+
src = self.allocation.value_to_register[param]
|
441
|
+
self.track_vm_instruction()
|
442
|
+
self.emit_opcode(Opcode.MOVE_R)
|
443
|
+
self.emit_u8(dst)
|
444
|
+
self.emit_u8(src)
|
445
|
+
if self.debug:
|
446
|
+
print(f" -> Generated MoveR from r{src} (param {inst.source.name} by name) to r{dst}")
|
447
|
+
return
|
448
|
+
|
449
|
+
# Check if source is already in a register (local variable, parameter, or SSA variable)
|
450
|
+
if self.allocation and inst.source in self.allocation.value_to_register:
|
451
|
+
# This is a local variable, parameter, or SSA variable in a register
|
452
|
+
src = self.allocation.value_to_register[inst.source]
|
453
|
+
self.track_vm_instruction()
|
454
|
+
self.emit_opcode(Opcode.MOVE_R)
|
455
|
+
self.emit_u8(dst)
|
456
|
+
self.emit_u8(src)
|
457
|
+
if self.debug:
|
458
|
+
print(f" -> Generated MoveR from r{src} to r{dst}")
|
459
|
+
elif isinstance(inst.source, Variable):
|
460
|
+
# Special handling for parameters - check by name
|
461
|
+
if self.current_function:
|
462
|
+
for param in self.current_function.params:
|
463
|
+
if param.name == inst.source.name and inst.source.version == 0:
|
464
|
+
# This is a parameter - find its register
|
465
|
+
if self.allocation and param in self.allocation.value_to_register:
|
466
|
+
src = self.allocation.value_to_register[param]
|
467
|
+
self.track_vm_instruction()
|
468
|
+
self.emit_opcode(Opcode.MOVE_R)
|
469
|
+
self.emit_u8(dst)
|
470
|
+
self.emit_u8(src)
|
471
|
+
if self.debug:
|
472
|
+
print(f" -> Generated MoveR from r{src} (param {param.name}) to r{dst}")
|
473
|
+
return
|
474
|
+
else:
|
475
|
+
raise RuntimeError(f"Parameter {param.name} not allocated to register")
|
476
|
+
# Check if this is an SSA variable that should have been allocated
|
477
|
+
if self.is_ssa_variable(inst.source):
|
478
|
+
raise RuntimeError(
|
479
|
+
f"SSA variable {inst.source} (version {inst.source.version}) not allocated to register"
|
480
|
+
)
|
481
|
+
|
482
|
+
# This is a true global variable that needs to be loaded by name
|
483
|
+
name_idx = self.add_string_constant(inst.source.name)
|
484
|
+
self.track_vm_instruction()
|
485
|
+
self.emit_opcode(Opcode.LOAD_GLOBAL_R)
|
486
|
+
self.emit_u8(dst)
|
487
|
+
self.emit_u16(name_idx)
|
488
|
+
if self.debug:
|
489
|
+
print(f" -> Generated LoadGlobalR for {inst.source.name}")
|
490
|
+
else:
|
491
|
+
# Handle other types (constants, etc.)
|
492
|
+
src = self.get_register(inst.source)
|
493
|
+
self.track_vm_instruction()
|
494
|
+
self.emit_opcode(Opcode.MOVE_R)
|
495
|
+
self.emit_u8(dst)
|
496
|
+
self.emit_u8(src)
|
497
|
+
if self.debug:
|
498
|
+
print(f" -> Generated MoveR from r{src} to r{dst}")
|
499
|
+
|
500
|
+
def generate_load_var(self, inst: LoadVar) -> None:
|
501
|
+
"""Generate LoadGlobalR instruction for variables or MoveR for parameters.
|
502
|
+
|
503
|
+
SSA variables (version > 0) and function parameters are expected to be
|
504
|
+
in registers. Global variables (version = 0) need to be loaded by name
|
505
|
+
from the global scope.
|
506
|
+
"""
|
507
|
+
dst = self.get_register(inst.dest)
|
508
|
+
|
509
|
+
# Debug output
|
510
|
+
if self.debug:
|
511
|
+
print(f"DEBUG LoadVar: var={inst.var}, var.name={inst.var.name}, version={inst.var.version}")
|
512
|
+
if self.allocation:
|
513
|
+
print(f" in allocation? {inst.var in self.allocation.value_to_register}")
|
514
|
+
if inst.var in self.allocation.value_to_register:
|
515
|
+
print(f" allocated to register {self.allocation.value_to_register[inst.var]}")
|
516
|
+
if self.current_function:
|
517
|
+
print(f" function params: {[p.name for p in self.current_function.params]}")
|
518
|
+
print(f" is param? {inst.var in self.current_function.params}")
|
519
|
+
|
520
|
+
# Check if the variable is already in a register (function parameter, local var, or SSA var)
|
521
|
+
if self.allocation and inst.var in self.allocation.value_to_register:
|
522
|
+
# This is a function parameter, local variable, or SSA variable in a register
|
523
|
+
src = self.allocation.value_to_register[inst.var]
|
524
|
+
self.track_vm_instruction()
|
525
|
+
self.emit_opcode(Opcode.MOVE_R)
|
526
|
+
self.emit_u8(dst)
|
527
|
+
self.emit_u8(src)
|
528
|
+
else:
|
529
|
+
# Check if this is an SSA variable that should have been allocated
|
530
|
+
if self.is_ssa_variable(inst.var):
|
531
|
+
raise RuntimeError(f"SSA variable {inst.var} (version {inst.var.version}) not allocated to register")
|
532
|
+
|
533
|
+
# Check if this variable is a function parameter by name
|
534
|
+
# Parameters have version 0 but should be in registers
|
535
|
+
is_param = False
|
536
|
+
if self.current_function and self.allocation:
|
537
|
+
if self.debug:
|
538
|
+
print(f" Checking if {inst.var.name} is a parameter...")
|
539
|
+
print(f" Allocation keys: {list(self.allocation.value_to_register.keys())}")
|
540
|
+
for param in self.current_function.params:
|
541
|
+
if self.debug:
|
542
|
+
print(f" Comparing {param.name} == {inst.var.name}: {param.name == inst.var.name}")
|
543
|
+
if param.name == inst.var.name:
|
544
|
+
is_param = True
|
545
|
+
# Try to find the parameter's register
|
546
|
+
if param in self.allocation.value_to_register:
|
547
|
+
src = self.allocation.value_to_register[param]
|
548
|
+
if self.debug:
|
549
|
+
print(f" Found parameter {inst.var.name} in register {src}!")
|
550
|
+
self.track_vm_instruction()
|
551
|
+
self.emit_opcode(Opcode.MOVE_R)
|
552
|
+
self.emit_u8(dst)
|
553
|
+
self.emit_u8(src)
|
554
|
+
return
|
555
|
+
else:
|
556
|
+
if self.debug:
|
557
|
+
print(f" Parameter {inst.var.name} not in allocation!")
|
558
|
+
raise RuntimeError(f"Function parameter {inst.var.name} not allocated to register")
|
559
|
+
|
560
|
+
if is_param:
|
561
|
+
raise RuntimeError(f"Function parameter {inst.var.name} handling failed")
|
562
|
+
|
563
|
+
# This is a true global variable that needs to be loaded by name
|
564
|
+
name_idx = self.add_string_constant(inst.var.name)
|
565
|
+
self.track_vm_instruction()
|
566
|
+
self.emit_opcode(Opcode.LOAD_GLOBAL_R)
|
567
|
+
self.emit_u8(dst)
|
568
|
+
self.emit_u16(name_idx)
|
569
|
+
|
570
|
+
def generate_store_var(self, inst: StoreVar) -> None:
|
571
|
+
"""Generate StoreGlobalR instruction or register move for SSA variables.
|
572
|
+
|
573
|
+
SSA variables (version > 0) are stored in registers using MoveR.
|
574
|
+
Global variables (version = 0) are stored to the global scope using
|
575
|
+
StoreGlobalR with the variable name.
|
576
|
+
"""
|
577
|
+
if self.debug:
|
578
|
+
print(f"DEBUG StoreVar: var={inst.var}, source={inst.source}")
|
579
|
+
src = self.get_register(inst.source)
|
580
|
+
|
581
|
+
# Check if the destination variable is allocated to a register (SSA or local)
|
582
|
+
if self.allocation and inst.var in self.allocation.value_to_register:
|
583
|
+
# This is an SSA or local variable - use register move
|
584
|
+
dst = self.allocation.value_to_register[inst.var]
|
585
|
+
self.track_vm_instruction()
|
586
|
+
self.emit_opcode(Opcode.MOVE_R)
|
587
|
+
self.emit_u8(dst)
|
588
|
+
self.emit_u8(src)
|
589
|
+
if self.debug:
|
590
|
+
print(f" -> Generated MoveR from r{src} to r{dst} for {inst.var}")
|
591
|
+
else:
|
592
|
+
# Check if this is an SSA variable that should have been allocated
|
593
|
+
if self.is_ssa_variable(inst.var):
|
594
|
+
raise RuntimeError(f"SSA variable {inst.var} (version {inst.var.version}) not allocated to register")
|
595
|
+
|
596
|
+
# This is a true global variable
|
597
|
+
name_idx = self.add_string_constant(inst.var.name if hasattr(inst.var, "name") else str(inst.var))
|
598
|
+
self.track_vm_instruction()
|
599
|
+
self.emit_opcode(Opcode.STORE_GLOBAL_R)
|
600
|
+
self.emit_u8(src)
|
601
|
+
self.emit_u16(name_idx)
|
602
|
+
if self.debug:
|
603
|
+
print(f" -> Generated StoreGlobalR for {inst.var}")
|
604
|
+
|
605
|
+
def generate_binary_op(self, inst: BinaryOp) -> None:
|
606
|
+
"""Generate binary operation instruction."""
|
607
|
+
# Load constants first if needed
|
608
|
+
if isinstance(inst.left, Constant):
|
609
|
+
left = self.get_register(inst.left)
|
610
|
+
const_val = inst.left.value if hasattr(inst.left, "value") else inst.left
|
611
|
+
const_idx = self.add_constant(const_val)
|
612
|
+
self.track_vm_instruction()
|
613
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
614
|
+
self.emit_u8(left)
|
615
|
+
self.emit_u16(const_idx)
|
616
|
+
else:
|
617
|
+
left = self.get_register(inst.left)
|
618
|
+
|
619
|
+
if isinstance(inst.right, Constant):
|
620
|
+
right = self.get_register(inst.right)
|
621
|
+
const_val = inst.right.value if hasattr(inst.right, "value") else inst.right
|
622
|
+
const_idx = self.add_constant(const_val)
|
623
|
+
self.track_vm_instruction()
|
624
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
625
|
+
self.emit_u8(right)
|
626
|
+
self.emit_u16(const_idx)
|
627
|
+
else:
|
628
|
+
right = self.get_register(inst.right)
|
629
|
+
|
630
|
+
# Get destination register
|
631
|
+
dst = self.get_register(inst.dest)
|
632
|
+
|
633
|
+
if self.debug:
|
634
|
+
print(
|
635
|
+
f"DEBUG BinaryOp: op={inst.op}, left={inst.left} "
|
636
|
+
f"(type={type(inst.left).__name__}), "
|
637
|
+
f"right={inst.right} (type={type(inst.right).__name__})"
|
638
|
+
)
|
639
|
+
print(f" left register: r{left}, right register: r{right}, dest register: r{dst}")
|
640
|
+
|
641
|
+
# Map operators to opcodes
|
642
|
+
op_map = {
|
643
|
+
"+": Opcode.ADD_R,
|
644
|
+
"-": Opcode.SUB_R,
|
645
|
+
"*": Opcode.MUL_R,
|
646
|
+
"/": Opcode.DIV_R,
|
647
|
+
"%": Opcode.MOD_R,
|
648
|
+
"and": Opcode.AND_R,
|
649
|
+
"or": Opcode.OR_R,
|
650
|
+
"==": Opcode.EQ_R,
|
651
|
+
"!=": Opcode.NEQ_R,
|
652
|
+
"<": Opcode.LT_R,
|
653
|
+
">": Opcode.GT_R,
|
654
|
+
"<=": Opcode.LTE_R,
|
655
|
+
">=": Opcode.GTE_R,
|
656
|
+
}
|
657
|
+
|
658
|
+
if opcode := op_map.get(inst.op):
|
659
|
+
self.track_vm_instruction()
|
660
|
+
self.emit_opcode(opcode)
|
661
|
+
self.emit_u8(dst)
|
662
|
+
self.emit_u8(left)
|
663
|
+
self.emit_u8(right)
|
664
|
+
else:
|
665
|
+
# Debug: print unmapped operator
|
666
|
+
if self.debug:
|
667
|
+
print(f"Warning: Unmapped operator '{inst.op}'")
|
668
|
+
|
669
|
+
def generate_unary_op(self, inst: UnaryOp) -> None:
|
670
|
+
"""Generate unary operation instruction."""
|
671
|
+
dst = self.get_register(inst.dest)
|
672
|
+
src = self.get_register(inst.operand)
|
673
|
+
|
674
|
+
if inst.op == "-":
|
675
|
+
self.track_vm_instruction()
|
676
|
+
self.emit_opcode(Opcode.NEG_R)
|
677
|
+
elif inst.op == "not":
|
678
|
+
self.track_vm_instruction()
|
679
|
+
self.emit_opcode(Opcode.NOT_R)
|
680
|
+
else:
|
681
|
+
return
|
682
|
+
|
683
|
+
self.emit_u8(dst)
|
684
|
+
self.emit_u8(src)
|
685
|
+
|
686
|
+
def generate_jump(self, inst: Jump) -> None:
|
687
|
+
"""Generate JumpR instruction."""
|
688
|
+
self.track_vm_instruction()
|
689
|
+
self.emit_opcode(Opcode.JUMP_R)
|
690
|
+
# Record position for later resolution (byte pos, target, current instruction index)
|
691
|
+
self.pending_jumps.append((len(self.bytecode), inst.label, len(self.instruction_offsets) - 1))
|
692
|
+
self.emit_i32(0) # Placeholder offset
|
693
|
+
|
694
|
+
def generate_conditional_jump(self, inst: ConditionalJump) -> None:
|
695
|
+
"""Generate JumpIfR instruction with true and false targets."""
|
696
|
+
cond = self.get_register(inst.condition)
|
697
|
+
|
698
|
+
# Generate jump to true target
|
699
|
+
self.track_vm_instruction()
|
700
|
+
self.emit_opcode(Opcode.JUMP_IF_R)
|
701
|
+
self.emit_u8(cond)
|
702
|
+
# Record position for later resolution (byte pos, target, current instruction index)
|
703
|
+
current_inst_idx = len(self.instruction_offsets) - 1
|
704
|
+
self.pending_jumps.append((len(self.bytecode), inst.true_label, current_inst_idx))
|
705
|
+
self.emit_i32(0) # Placeholder offset
|
706
|
+
|
707
|
+
# If there's a false label, generate unconditional jump to it
|
708
|
+
# (this executes if the condition was false)
|
709
|
+
if inst.false_label:
|
710
|
+
# This will be a new instruction
|
711
|
+
self.track_vm_instruction()
|
712
|
+
self.emit_opcode(Opcode.JUMP_R)
|
713
|
+
current_inst_idx = len(self.instruction_offsets) - 1
|
714
|
+
self.pending_jumps.append((len(self.bytecode), inst.false_label, current_inst_idx))
|
715
|
+
self.emit_i32(0) # Placeholder offset
|
716
|
+
|
717
|
+
def generate_call(self, inst: Call) -> None:
|
718
|
+
"""Generate CallR instruction."""
|
719
|
+
if self.debug:
|
720
|
+
print(f"DEBUG Call: func={inst.func}, args={inst.args}, dest={inst.dest}")
|
721
|
+
dst = self.get_register(inst.dest) if inst.dest else 0
|
722
|
+
|
723
|
+
# Handle function reference - could be a string name, FunctionRef, or a register value
|
724
|
+
from machine_dialect.mir.mir_values import FunctionRef
|
725
|
+
|
726
|
+
if isinstance(inst.func, str):
|
727
|
+
# Function name as string - load it as a constant
|
728
|
+
assert self.allocation is not None
|
729
|
+
func_reg = self.allocation.next_register
|
730
|
+
if func_reg >= self.allocation.max_registers:
|
731
|
+
raise RuntimeError("Out of registers")
|
732
|
+
self.allocation.next_register += 1
|
733
|
+
|
734
|
+
# Add function name as string constant
|
735
|
+
if self.debug:
|
736
|
+
print(f" DEBUG: Loading function name '{inst.func}' as constant into r{func_reg}")
|
737
|
+
const_idx = self.add_constant(inst.func)
|
738
|
+
self.track_vm_instruction()
|
739
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
740
|
+
self.emit_u8(func_reg)
|
741
|
+
self.emit_u16(const_idx)
|
742
|
+
func = func_reg
|
743
|
+
elif isinstance(inst.func, FunctionRef):
|
744
|
+
# FunctionRef - extract the name and load as constant
|
745
|
+
assert self.allocation is not None
|
746
|
+
func_reg = self.allocation.next_register
|
747
|
+
if func_reg >= self.allocation.max_registers:
|
748
|
+
raise RuntimeError("Out of registers")
|
749
|
+
self.allocation.next_register += 1
|
750
|
+
|
751
|
+
# Add function name as string constant
|
752
|
+
if self.debug:
|
753
|
+
print(f" DEBUG: Loading FunctionRef '{inst.func.name}' as constant into r{func_reg}")
|
754
|
+
const_idx = self.add_constant(inst.func.name)
|
755
|
+
self.track_vm_instruction()
|
756
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
757
|
+
self.emit_u8(func_reg)
|
758
|
+
self.emit_u16(const_idx)
|
759
|
+
func = func_reg
|
760
|
+
else:
|
761
|
+
# Already a register value
|
762
|
+
if self.debug:
|
763
|
+
print(f" DEBUG: Function is already in register: {inst.func}")
|
764
|
+
func = self.get_register(inst.func)
|
765
|
+
|
766
|
+
# Load argument constants if needed
|
767
|
+
args = []
|
768
|
+
for arg in inst.args:
|
769
|
+
if isinstance(arg, Constant):
|
770
|
+
arg_reg = self.get_register(arg)
|
771
|
+
const_val = arg.value if hasattr(arg, "value") else arg
|
772
|
+
const_idx = self.add_constant(const_val)
|
773
|
+
self.track_vm_instruction()
|
774
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
775
|
+
self.emit_u8(arg_reg)
|
776
|
+
self.emit_u16(const_idx)
|
777
|
+
args.append(arg_reg)
|
778
|
+
else:
|
779
|
+
args.append(self.get_register(arg))
|
780
|
+
|
781
|
+
if self.debug:
|
782
|
+
print(f" Function register: r{func}, dest register: r{dst}")
|
783
|
+
print(f" Argument registers: {[f'r{a}' for a in args]}")
|
784
|
+
|
785
|
+
self.track_vm_instruction()
|
786
|
+
self.emit_opcode(Opcode.CALL_R)
|
787
|
+
self.emit_u8(func)
|
788
|
+
self.emit_u8(dst)
|
789
|
+
self.emit_u8(len(args))
|
790
|
+
for arg_reg in args:
|
791
|
+
self.emit_u8(arg_reg)
|
792
|
+
|
793
|
+
def generate_return(self, inst: Return) -> None:
|
794
|
+
"""Generate ReturnR instruction."""
|
795
|
+
if self.debug:
|
796
|
+
print(f"DEBUG Return: value={inst.value}")
|
797
|
+
if inst.value:
|
798
|
+
print(f" value type: {type(inst.value)}")
|
799
|
+
if hasattr(inst.value, "name"):
|
800
|
+
print(f" value name: {inst.value.name}")
|
801
|
+
if hasattr(inst.value, "version"):
|
802
|
+
print(f" value version: {inst.value.version}")
|
803
|
+
# Debug: show allocation map
|
804
|
+
if self.allocation:
|
805
|
+
print(f" Allocation map has {len(self.allocation.value_to_register)} entries")
|
806
|
+
for val, reg in self.allocation.value_to_register.items():
|
807
|
+
if hasattr(val, "name"):
|
808
|
+
print(f" {val.name} (v{getattr(val, 'version', '?')}) -> r{reg}")
|
809
|
+
|
810
|
+
if inst.value:
|
811
|
+
# If the value is a constant, we need to load it first
|
812
|
+
if isinstance(inst.value, Constant):
|
813
|
+
# Load constant into register 0 (return register)
|
814
|
+
const_value = inst.value.value if hasattr(inst.value, "value") else inst.value
|
815
|
+
const_idx = self.add_constant(const_value)
|
816
|
+
if self.debug:
|
817
|
+
print(f" -> Loading constant {const_value} into r0 for return")
|
818
|
+
self.track_vm_instruction()
|
819
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
820
|
+
self.emit_u8(0) # Use register 0 for return
|
821
|
+
self.emit_u16(const_idx)
|
822
|
+
|
823
|
+
# Now return from register 0
|
824
|
+
self.track_vm_instruction()
|
825
|
+
self.emit_opcode(Opcode.RETURN_R)
|
826
|
+
self.emit_u8(1) # Has return value
|
827
|
+
self.emit_u8(0) # Return from register 0
|
828
|
+
else:
|
829
|
+
# Value is already in a register
|
830
|
+
reg = self.get_register(inst.value)
|
831
|
+
if self.debug:
|
832
|
+
print(f" -> Returning from register r{reg}")
|
833
|
+
self.track_vm_instruction()
|
834
|
+
self.emit_opcode(Opcode.RETURN_R)
|
835
|
+
self.emit_u8(1) # Has return value
|
836
|
+
self.emit_u8(reg)
|
837
|
+
else:
|
838
|
+
if self.debug:
|
839
|
+
print(" -> Returning with no value")
|
840
|
+
self.track_vm_instruction()
|
841
|
+
self.emit_opcode(Opcode.RETURN_R)
|
842
|
+
self.emit_u8(0) # No return value
|
843
|
+
|
844
|
+
def generate_phi(self, inst: Phi) -> None:
|
845
|
+
"""Generate PhiR instruction."""
|
846
|
+
dst = self.get_register(inst.dest)
|
847
|
+
sources = []
|
848
|
+
for value, _ in inst.sources: # type: ignore[attr-defined]
|
849
|
+
src = self.get_register(value)
|
850
|
+
# TODO: Map label to block ID
|
851
|
+
block_id = 0
|
852
|
+
sources.append((src, block_id))
|
853
|
+
|
854
|
+
self.track_vm_instruction()
|
855
|
+
self.emit_opcode(Opcode.PHI_R)
|
856
|
+
self.emit_u8(dst)
|
857
|
+
self.emit_u8(len(sources))
|
858
|
+
for src, block_id in sources:
|
859
|
+
self.emit_u8(src)
|
860
|
+
self.emit_u16(block_id)
|
861
|
+
|
862
|
+
def generate_assert(self, inst: Assert) -> None:
|
863
|
+
"""Generate AssertR instruction."""
|
864
|
+
reg = self.get_register(inst.condition)
|
865
|
+
msg = inst.message or "Assertion failed"
|
866
|
+
msg_idx = self.add_string_constant(msg)
|
867
|
+
|
868
|
+
self.track_vm_instruction()
|
869
|
+
self.emit_opcode(Opcode.ASSERT_R)
|
870
|
+
self.emit_u8(reg)
|
871
|
+
self.emit_u8(0) # AssertType::True
|
872
|
+
self.emit_u16(msg_idx)
|
873
|
+
|
874
|
+
def generate_scope(self, inst: Scope) -> None:
|
875
|
+
"""Generate ScopeEnterR/ScopeExitR instruction."""
|
876
|
+
scope_id = inst.scope_id # type: ignore[attr-defined]
|
877
|
+
if inst.action == "enter": # type: ignore[attr-defined]
|
878
|
+
self.track_vm_instruction()
|
879
|
+
self.emit_opcode(Opcode.SCOPE_ENTER_R)
|
880
|
+
else:
|
881
|
+
self.track_vm_instruction()
|
882
|
+
self.emit_opcode(Opcode.SCOPE_EXIT_R)
|
883
|
+
|
884
|
+
self.emit_u16(scope_id)
|
885
|
+
|
886
|
+
def generate_print(self, inst: Print) -> None:
|
887
|
+
"""Generate DebugPrint instruction."""
|
888
|
+
# If the value is a constant, we need to load it first
|
889
|
+
if isinstance(inst.value, Constant):
|
890
|
+
# Allocate a register for the constant
|
891
|
+
src = self.get_register(inst.value)
|
892
|
+
# Add the constant to the constant pool
|
893
|
+
const_idx = self.add_constant(inst.value.value)
|
894
|
+
# Emit LOAD_CONST_R to load the constant into the register
|
895
|
+
self.track_vm_instruction()
|
896
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
897
|
+
self.emit_u8(src)
|
898
|
+
self.emit_u16(const_idx)
|
899
|
+
else:
|
900
|
+
# For non-constants, just get the register
|
901
|
+
src = self.get_register(inst.value)
|
902
|
+
|
903
|
+
self.track_vm_instruction()
|
904
|
+
self.emit_opcode(Opcode.DEBUG_PRINT)
|
905
|
+
self.emit_u8(src)
|
906
|
+
|
907
|
+
def resolve_jumps(self) -> None:
|
908
|
+
"""Resolve pending jump offsets."""
|
909
|
+
for jump_offset_pos, target_label, source_inst_idx in self.pending_jumps:
|
910
|
+
if target_label in self.block_offsets:
|
911
|
+
target_inst_idx = self.block_offsets[target_label]
|
912
|
+
# The VM uses instruction-based PC, not byte offsets
|
913
|
+
# The offset is in instructions, relative to the NEXT instruction
|
914
|
+
# source_inst_idx is the index of the jump instruction itself
|
915
|
+
# After execution, PC will be source_inst_idx + 1
|
916
|
+
offset = target_inst_idx - (source_inst_idx + 1)
|
917
|
+
# Write offset at jump position
|
918
|
+
struct.pack_into("<i", self.bytecode, jump_offset_pos, offset)
|
919
|
+
|
920
|
+
def get_register(self, value: MIRValue) -> int:
|
921
|
+
"""Get register number for a value.
|
922
|
+
|
923
|
+
For constants, this allocates a register and remembers it,
|
924
|
+
but does NOT emit the LOAD_CONST_R instruction.
|
925
|
+
The caller is responsible for loading constants.
|
926
|
+
|
927
|
+
Args:
|
928
|
+
value: MIR value.
|
929
|
+
|
930
|
+
Returns:
|
931
|
+
Register number.
|
932
|
+
"""
|
933
|
+
if isinstance(value, Constant):
|
934
|
+
# Check if we already allocated a register for this constant
|
935
|
+
if self.allocation and value in self.allocation.value_to_register:
|
936
|
+
return self.allocation.value_to_register[value]
|
937
|
+
|
938
|
+
# Allocate a new register for this constant
|
939
|
+
assert self.allocation is not None
|
940
|
+
reg = self.allocation.next_register
|
941
|
+
if reg >= self.allocation.max_registers:
|
942
|
+
raise RuntimeError("Out of registers")
|
943
|
+
self.allocation.next_register += 1
|
944
|
+
self.allocation.value_to_register[value] = reg
|
945
|
+
|
946
|
+
# Note: We do NOT emit LOAD_CONST_R here!
|
947
|
+
# The caller must handle loading the constant
|
948
|
+
if self.debug:
|
949
|
+
print(f" DEBUG: Allocated r{reg} for constant {value.value if hasattr(value, 'value') else value}")
|
950
|
+
return reg
|
951
|
+
|
952
|
+
assert self.allocation is not None
|
953
|
+
if value not in self.allocation.value_to_register:
|
954
|
+
# Special case: check if this is a parameter by name
|
955
|
+
if self.current_function and isinstance(value, Variable):
|
956
|
+
for param in self.current_function.params:
|
957
|
+
if param.name == value.name:
|
958
|
+
# Found the parameter, look it up in allocation
|
959
|
+
if param in self.allocation.value_to_register:
|
960
|
+
if self.debug:
|
961
|
+
reg = self.allocation.value_to_register[param]
|
962
|
+
print(f" DEBUG: Found parameter {value.name} by name -> r{reg}")
|
963
|
+
return self.allocation.value_to_register[param]
|
964
|
+
else:
|
965
|
+
raise RuntimeError(f"Parameter {value.name} not allocated to register")
|
966
|
+
|
967
|
+
# Check if this is an SSA variable that should have been allocated
|
968
|
+
if self.is_ssa_variable(value) and isinstance(value, Variable):
|
969
|
+
raise RuntimeError(f"SSA variable {value.name} (version {value.version}) not allocated to register")
|
970
|
+
|
971
|
+
# For non-SSA variables, check if we should error
|
972
|
+
if self.debug:
|
973
|
+
print(f" WARNING: Value {value} not in allocation map, returning r23 (uninitialized!)")
|
974
|
+
# This is likely the bug - returning an arbitrary register
|
975
|
+
return 23 # This will help us identify the issue
|
976
|
+
return self.allocation.value_to_register[value]
|
977
|
+
|
978
|
+
def add_constant(self, value: Any) -> int:
|
979
|
+
"""Add a constant to the pool.
|
980
|
+
|
981
|
+
Args:
|
982
|
+
value: Constant value.
|
983
|
+
|
984
|
+
Returns:
|
985
|
+
Constant index.
|
986
|
+
"""
|
987
|
+
# Determine constant type and add to pool
|
988
|
+
tag: ConstantTag
|
989
|
+
val: Any
|
990
|
+
if value is None:
|
991
|
+
tag = ConstantTag.EMPTY
|
992
|
+
val = 0
|
993
|
+
elif isinstance(value, bool):
|
994
|
+
tag = ConstantTag.BOOL
|
995
|
+
val = value
|
996
|
+
elif isinstance(value, int):
|
997
|
+
tag = ConstantTag.INT
|
998
|
+
val = value
|
999
|
+
elif isinstance(value, float):
|
1000
|
+
tag = ConstantTag.FLOAT
|
1001
|
+
val = value
|
1002
|
+
elif isinstance(value, str):
|
1003
|
+
tag = ConstantTag.STRING
|
1004
|
+
val = value
|
1005
|
+
else:
|
1006
|
+
# Default to string representation
|
1007
|
+
tag = ConstantTag.STRING
|
1008
|
+
val = str(value)
|
1009
|
+
|
1010
|
+
# Check if constant already exists
|
1011
|
+
for i, (t, v) in enumerate(self.constants):
|
1012
|
+
if t == tag and v == val:
|
1013
|
+
return i
|
1014
|
+
|
1015
|
+
# Add new constant
|
1016
|
+
idx = len(self.constants)
|
1017
|
+
self.constants.append((tag, val))
|
1018
|
+
return idx
|
1019
|
+
|
1020
|
+
def add_string_constant(self, value: str) -> int:
|
1021
|
+
"""Add a string constant to the pool.
|
1022
|
+
|
1023
|
+
Args:
|
1024
|
+
value: String value.
|
1025
|
+
|
1026
|
+
Returns:
|
1027
|
+
Constant index.
|
1028
|
+
"""
|
1029
|
+
# Check if string already exists
|
1030
|
+
for i, (tag, val) in enumerate(self.constants):
|
1031
|
+
if tag == ConstantTag.STRING and val == value:
|
1032
|
+
return i
|
1033
|
+
|
1034
|
+
# Add new string constant
|
1035
|
+
idx = len(self.constants)
|
1036
|
+
self.constants.append((ConstantTag.STRING, value))
|
1037
|
+
return idx
|
1038
|
+
|
1039
|
+
def track_vm_instruction(self) -> None:
|
1040
|
+
"""Track the start of a new VM instruction.
|
1041
|
+
|
1042
|
+
This must be called before emitting each VM instruction to maintain
|
1043
|
+
proper instruction offset tracking for jump resolution.
|
1044
|
+
"""
|
1045
|
+
self.instruction_offsets.append(len(self.bytecode))
|
1046
|
+
|
1047
|
+
def emit_opcode(self, opcode: int) -> None:
|
1048
|
+
"""Emit an opcode."""
|
1049
|
+
self.bytecode.append(opcode)
|
1050
|
+
|
1051
|
+
def emit_u8(self, value: int) -> None:
|
1052
|
+
"""Emit an unsigned 8-bit value."""
|
1053
|
+
self.bytecode.append(value & 0xFF)
|
1054
|
+
|
1055
|
+
def emit_u16(self, value: int) -> None:
|
1056
|
+
"""Emit an unsigned 16-bit value."""
|
1057
|
+
self.bytecode.extend(struct.pack("<H", value))
|
1058
|
+
|
1059
|
+
def emit_i32(self, value: int) -> None:
|
1060
|
+
"""Emit a signed 32-bit value."""
|
1061
|
+
self.bytecode.extend(struct.pack("<i", value))
|
1062
|
+
|
1063
|
+
def add_label(self, label: str) -> None:
|
1064
|
+
"""Add a label at the current bytecode position.
|
1065
|
+
|
1066
|
+
Args:
|
1067
|
+
label: The label to add.
|
1068
|
+
"""
|
1069
|
+
# Map label to current instruction index
|
1070
|
+
self.block_offsets[label] = len(self.instruction_offsets)
|
1071
|
+
|
1072
|
+
def generate_array_create(self, inst: ArrayCreate) -> None:
|
1073
|
+
"""Generate NewArrayR instruction from MIR ArrayCreate."""
|
1074
|
+
dst = self.get_register(inst.dest)
|
1075
|
+
|
1076
|
+
# Handle size - load constant if needed
|
1077
|
+
if isinstance(inst.size, Constant):
|
1078
|
+
size = self.get_register(inst.size)
|
1079
|
+
# Load the constant into the register
|
1080
|
+
const_idx = self.add_constant(inst.size.value if hasattr(inst.size, "value") else inst.size)
|
1081
|
+
self.track_vm_instruction()
|
1082
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
1083
|
+
self.emit_u8(size)
|
1084
|
+
self.emit_u16(const_idx)
|
1085
|
+
else:
|
1086
|
+
size = self.get_register(inst.size)
|
1087
|
+
|
1088
|
+
self.track_vm_instruction()
|
1089
|
+
self.emit_opcode(Opcode.NEW_ARRAY_R)
|
1090
|
+
self.emit_u8(dst)
|
1091
|
+
self.emit_u8(size)
|
1092
|
+
|
1093
|
+
if self.debug:
|
1094
|
+
print(f" -> Generated NewArrayR: r{dst} = new_array(r{size})")
|
1095
|
+
|
1096
|
+
def generate_array_get(self, inst: ArrayGet) -> None:
|
1097
|
+
"""Generate ArrayGetR instruction from MIR ArrayGet."""
|
1098
|
+
dst = self.get_register(inst.dest)
|
1099
|
+
array = self.get_register(inst.array)
|
1100
|
+
index = self.get_register(inst.index)
|
1101
|
+
|
1102
|
+
self.track_vm_instruction()
|
1103
|
+
self.emit_opcode(Opcode.ARRAY_GET_R)
|
1104
|
+
self.emit_u8(dst)
|
1105
|
+
self.emit_u8(array)
|
1106
|
+
self.emit_u8(index)
|
1107
|
+
|
1108
|
+
if self.debug:
|
1109
|
+
print(f" -> Generated ArrayGetR: r{dst} = r{array}[r{index}]")
|
1110
|
+
|
1111
|
+
def generate_array_set(self, inst: ArraySet) -> None:
|
1112
|
+
"""Generate ArraySetR instruction from MIR ArraySet."""
|
1113
|
+
array = self.get_register(inst.array)
|
1114
|
+
|
1115
|
+
# Handle index - load constant if needed
|
1116
|
+
if isinstance(inst.index, Constant):
|
1117
|
+
index = self.get_register(inst.index)
|
1118
|
+
# Load the constant into the register
|
1119
|
+
const_idx = self.add_constant(inst.index.value if hasattr(inst.index, "value") else inst.index)
|
1120
|
+
self.track_vm_instruction()
|
1121
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
1122
|
+
self.emit_u8(index)
|
1123
|
+
self.emit_u16(const_idx)
|
1124
|
+
else:
|
1125
|
+
index = self.get_register(inst.index)
|
1126
|
+
|
1127
|
+
# Handle value - load constant if needed
|
1128
|
+
if isinstance(inst.value, Constant):
|
1129
|
+
value = self.get_register(inst.value)
|
1130
|
+
# Load the constant into the register
|
1131
|
+
const_idx = self.add_constant(inst.value.value if hasattr(inst.value, "value") else inst.value)
|
1132
|
+
self.track_vm_instruction()
|
1133
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
1134
|
+
self.emit_u8(value)
|
1135
|
+
self.emit_u16(const_idx)
|
1136
|
+
else:
|
1137
|
+
value = self.get_register(inst.value)
|
1138
|
+
|
1139
|
+
self.track_vm_instruction()
|
1140
|
+
self.emit_opcode(Opcode.ARRAY_SET_R)
|
1141
|
+
self.emit_u8(array)
|
1142
|
+
self.emit_u8(index)
|
1143
|
+
self.emit_u8(value)
|
1144
|
+
|
1145
|
+
if self.debug:
|
1146
|
+
print(f" -> Generated ArraySetR: r{array}[r{index}] = r{value}")
|
1147
|
+
|
1148
|
+
def generate_array_length(self, inst: ArrayLength) -> None:
|
1149
|
+
"""Generate ArrayLenR instruction from MIR ArrayLength."""
|
1150
|
+
dst = self.get_register(inst.dest)
|
1151
|
+
array = self.get_register(inst.array)
|
1152
|
+
|
1153
|
+
self.track_vm_instruction()
|
1154
|
+
self.emit_opcode(Opcode.ARRAY_LEN_R)
|
1155
|
+
self.emit_u8(dst)
|
1156
|
+
self.emit_u8(array)
|
1157
|
+
|
1158
|
+
if self.debug:
|
1159
|
+
print(f" -> Generated ArrayLenR: r{dst} = len(r{array})")
|
1160
|
+
|
1161
|
+
def generate_array_append(self, inst: ArrayAppend) -> None:
|
1162
|
+
"""Generate array append as set at length position."""
|
1163
|
+
array = self.get_register(inst.array)
|
1164
|
+
value = self.get_register(inst.value)
|
1165
|
+
|
1166
|
+
# First get the current length into a temp register
|
1167
|
+
# We need to allocate a temp register for the length
|
1168
|
+
length_reg = 255 # Use highest register as temp
|
1169
|
+
|
1170
|
+
self.track_vm_instruction()
|
1171
|
+
self.emit_opcode(Opcode.ARRAY_LEN_R)
|
1172
|
+
self.emit_u8(length_reg)
|
1173
|
+
self.emit_u8(array)
|
1174
|
+
|
1175
|
+
# Then set array[length] = value
|
1176
|
+
self.track_vm_instruction()
|
1177
|
+
self.emit_opcode(Opcode.ARRAY_SET_R)
|
1178
|
+
self.emit_u8(array)
|
1179
|
+
self.emit_u8(length_reg)
|
1180
|
+
self.emit_u8(value)
|
1181
|
+
|
1182
|
+
if self.debug:
|
1183
|
+
print(f" -> Generated ArrayAppend: r{array}.append(r{value})")
|
1184
|
+
|
1185
|
+
def generate_dict_create(self, inst: DictCreate) -> None:
|
1186
|
+
"""Generate DictNewR instruction from MIR DictCreate."""
|
1187
|
+
dst = self.get_register(inst.dest)
|
1188
|
+
|
1189
|
+
self.track_vm_instruction()
|
1190
|
+
self.emit_opcode(Opcode.DICT_NEW_R)
|
1191
|
+
self.emit_u8(dst)
|
1192
|
+
|
1193
|
+
if self.debug:
|
1194
|
+
print(f" -> Generated DictNewR: r{dst} = new_dict()")
|
1195
|
+
|
1196
|
+
def generate_dict_get(self, inst: DictGet) -> None:
|
1197
|
+
"""Generate DictGetR instruction from MIR DictGet."""
|
1198
|
+
dst = self.get_register(inst.dest)
|
1199
|
+
dict_reg = self.get_register(inst.dict_val)
|
1200
|
+
key_reg = self.get_register(inst.key)
|
1201
|
+
|
1202
|
+
self.track_vm_instruction()
|
1203
|
+
self.emit_opcode(Opcode.DICT_GET_R)
|
1204
|
+
self.emit_u8(dst)
|
1205
|
+
self.emit_u8(dict_reg)
|
1206
|
+
self.emit_u8(key_reg)
|
1207
|
+
|
1208
|
+
if self.debug:
|
1209
|
+
print(f" -> Generated DictGetR: r{dst} = r{dict_reg}[r{key_reg}]")
|
1210
|
+
|
1211
|
+
def generate_dict_set(self, inst: DictSet) -> None:
|
1212
|
+
"""Generate DictSetR instruction from MIR DictSet."""
|
1213
|
+
dict_reg = self.get_register(inst.dict_val)
|
1214
|
+
key_reg = self.get_register(inst.key)
|
1215
|
+
value_reg = self.get_register(inst.value)
|
1216
|
+
|
1217
|
+
self.track_vm_instruction()
|
1218
|
+
self.emit_opcode(Opcode.DICT_SET_R)
|
1219
|
+
self.emit_u8(dict_reg)
|
1220
|
+
self.emit_u8(key_reg)
|
1221
|
+
self.emit_u8(value_reg)
|
1222
|
+
|
1223
|
+
if self.debug:
|
1224
|
+
print(f" -> Generated DictSetR: r{dict_reg}[r{key_reg}] = r{value_reg}")
|
1225
|
+
|
1226
|
+
def generate_dict_remove(self, inst: DictRemove) -> None:
|
1227
|
+
"""Generate DictRemoveR instruction from MIR DictRemove."""
|
1228
|
+
dict_reg = self.get_register(inst.dict_val)
|
1229
|
+
key_reg = self.get_register(inst.key)
|
1230
|
+
|
1231
|
+
self.track_vm_instruction()
|
1232
|
+
self.emit_opcode(Opcode.DICT_REMOVE_R)
|
1233
|
+
self.emit_u8(dict_reg)
|
1234
|
+
self.emit_u8(key_reg)
|
1235
|
+
|
1236
|
+
if self.debug:
|
1237
|
+
print(f" -> Generated DictRemoveR: del r{dict_reg}[r{key_reg}]")
|
1238
|
+
|
1239
|
+
def generate_dict_contains(self, inst: DictContains) -> None:
|
1240
|
+
"""Generate DictHasKeyR instruction from MIR DictContains."""
|
1241
|
+
dst = self.get_register(inst.dest)
|
1242
|
+
dict_reg = self.get_register(inst.dict_val)
|
1243
|
+
key_reg = self.get_register(inst.key)
|
1244
|
+
|
1245
|
+
self.track_vm_instruction()
|
1246
|
+
self.emit_opcode(Opcode.DICT_CONTAINS_R)
|
1247
|
+
self.emit_u8(dst)
|
1248
|
+
self.emit_u8(dict_reg)
|
1249
|
+
self.emit_u8(key_reg)
|
1250
|
+
|
1251
|
+
if self.debug:
|
1252
|
+
print(f" -> Generated DictContainsR: r{dst} = r{key_reg} in r{dict_reg}")
|
1253
|
+
|
1254
|
+
def generate_array_remove(self, inst: ArrayRemove) -> None:
|
1255
|
+
"""Generate array remove at index using copy emulation.
|
1256
|
+
|
1257
|
+
Emulates array.remove_at(index) by:
|
1258
|
+
1. Get original array length
|
1259
|
+
2. Create new array with length - 1
|
1260
|
+
3. Copy elements [0:index] to new array
|
1261
|
+
4. Copy elements [index+1:] to new[index:]
|
1262
|
+
5. Replace original array with new array
|
1263
|
+
"""
|
1264
|
+
array = self.get_register(inst.array)
|
1265
|
+
index = self.get_register(inst.index)
|
1266
|
+
|
1267
|
+
# Allocate temporary registers
|
1268
|
+
old_len_reg = 247 # Original length
|
1269
|
+
new_len_reg = 248 # New length (old - 1)
|
1270
|
+
new_array_reg = 249 # New array
|
1271
|
+
i_reg = 250 # Loop counter for source
|
1272
|
+
j_reg = 251 # Loop counter for destination
|
1273
|
+
element_reg = 252 # Temporary for element
|
1274
|
+
cmp_reg = 253 # Comparison result
|
1275
|
+
const_one_reg = 254 # Constant 1
|
1276
|
+
|
1277
|
+
# Get original array length
|
1278
|
+
self.track_vm_instruction()
|
1279
|
+
self.emit_opcode(Opcode.ARRAY_LEN_R)
|
1280
|
+
self.emit_u8(old_len_reg)
|
1281
|
+
self.emit_u8(array)
|
1282
|
+
|
1283
|
+
# Calculate new length (old - 1)
|
1284
|
+
const_one = self.add_constant(1)
|
1285
|
+
self.track_vm_instruction()
|
1286
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
1287
|
+
self.emit_u8(const_one_reg)
|
1288
|
+
self.emit_u16(const_one)
|
1289
|
+
|
1290
|
+
self.track_vm_instruction()
|
1291
|
+
self.emit_opcode(Opcode.SUB_R)
|
1292
|
+
self.emit_u8(new_len_reg)
|
1293
|
+
self.emit_u8(old_len_reg)
|
1294
|
+
self.emit_u8(const_one_reg)
|
1295
|
+
|
1296
|
+
# Create new array with new length
|
1297
|
+
self.track_vm_instruction()
|
1298
|
+
self.emit_opcode(Opcode.NEW_ARRAY_R)
|
1299
|
+
self.emit_u8(new_array_reg)
|
1300
|
+
self.emit_u8(new_len_reg)
|
1301
|
+
|
1302
|
+
# Initialize loop counters to 0
|
1303
|
+
const_zero = self.add_constant(0)
|
1304
|
+
self.track_vm_instruction()
|
1305
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
1306
|
+
self.emit_u8(i_reg)
|
1307
|
+
self.emit_u16(const_zero)
|
1308
|
+
|
1309
|
+
self.track_vm_instruction()
|
1310
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
1311
|
+
self.emit_u8(j_reg)
|
1312
|
+
self.emit_u16(const_zero)
|
1313
|
+
|
1314
|
+
# Generate unique labels
|
1315
|
+
copy_loop_label = f"remove_copy_{self.label_counter}"
|
1316
|
+
skip_removed_label = f"remove_skip_{self.label_counter}"
|
1317
|
+
copy_element_label = f"remove_element_{self.label_counter}"
|
1318
|
+
remove_done_label = f"remove_done_{self.label_counter}"
|
1319
|
+
self.label_counter += 1
|
1320
|
+
|
1321
|
+
# --- Main copy loop ---
|
1322
|
+
self.add_label(copy_loop_label)
|
1323
|
+
|
1324
|
+
# Check if i < old_len
|
1325
|
+
self.track_vm_instruction()
|
1326
|
+
self.emit_opcode(Opcode.LT_R)
|
1327
|
+
self.emit_u8(cmp_reg)
|
1328
|
+
self.emit_u8(i_reg)
|
1329
|
+
self.emit_u8(old_len_reg)
|
1330
|
+
|
1331
|
+
# If not (i >= old_len), we're done
|
1332
|
+
self.track_vm_instruction()
|
1333
|
+
self.emit_opcode(Opcode.JUMP_IF_NOT_R)
|
1334
|
+
self.emit_u8(cmp_reg)
|
1335
|
+
self.pending_jumps.append((len(self.bytecode), remove_done_label, len(self.instruction_offsets) - 1))
|
1336
|
+
self.emit_i32(0) # Placeholder
|
1337
|
+
|
1338
|
+
# Check if i == index (skip this element)
|
1339
|
+
self.track_vm_instruction()
|
1340
|
+
self.emit_opcode(Opcode.EQ_R)
|
1341
|
+
self.emit_u8(cmp_reg)
|
1342
|
+
self.emit_u8(i_reg)
|
1343
|
+
self.emit_u8(index)
|
1344
|
+
|
1345
|
+
# If i == index, skip copying this element
|
1346
|
+
self.track_vm_instruction()
|
1347
|
+
self.emit_opcode(Opcode.JUMP_IF_R)
|
1348
|
+
self.emit_u8(cmp_reg)
|
1349
|
+
self.pending_jumps.append((len(self.bytecode), skip_removed_label, len(self.instruction_offsets) - 1))
|
1350
|
+
self.emit_i32(0) # Placeholder
|
1351
|
+
|
1352
|
+
# --- Copy element from old[i] to new[j] ---
|
1353
|
+
self.add_label(copy_element_label)
|
1354
|
+
|
1355
|
+
# Get element from original array[i]
|
1356
|
+
self.track_vm_instruction()
|
1357
|
+
self.emit_opcode(Opcode.ARRAY_GET_R)
|
1358
|
+
self.emit_u8(element_reg)
|
1359
|
+
self.emit_u8(array)
|
1360
|
+
self.emit_u8(i_reg)
|
1361
|
+
|
1362
|
+
# Set new[j] = element
|
1363
|
+
self.track_vm_instruction()
|
1364
|
+
self.emit_opcode(Opcode.ARRAY_SET_R)
|
1365
|
+
self.emit_u8(new_array_reg)
|
1366
|
+
self.emit_u8(j_reg)
|
1367
|
+
self.emit_u8(element_reg)
|
1368
|
+
|
1369
|
+
# Increment j (destination index)
|
1370
|
+
self.track_vm_instruction()
|
1371
|
+
self.emit_opcode(Opcode.ADD_R)
|
1372
|
+
self.emit_u8(j_reg)
|
1373
|
+
self.emit_u8(j_reg)
|
1374
|
+
self.emit_u8(const_one_reg)
|
1375
|
+
|
1376
|
+
# --- Skip removed element (just increment i) ---
|
1377
|
+
self.add_label(skip_removed_label)
|
1378
|
+
|
1379
|
+
# Increment i (source index)
|
1380
|
+
self.track_vm_instruction()
|
1381
|
+
self.emit_opcode(Opcode.ADD_R)
|
1382
|
+
self.emit_u8(i_reg)
|
1383
|
+
self.emit_u8(i_reg)
|
1384
|
+
self.emit_u8(const_one_reg)
|
1385
|
+
|
1386
|
+
# Jump back to loop start
|
1387
|
+
self.track_vm_instruction()
|
1388
|
+
self.emit_opcode(Opcode.JUMP_R)
|
1389
|
+
self.pending_jumps.append((len(self.bytecode), copy_loop_label, len(self.instruction_offsets) - 1))
|
1390
|
+
self.emit_i32(0) # Placeholder
|
1391
|
+
|
1392
|
+
# --- Replace original array with new array ---
|
1393
|
+
self.add_label(remove_done_label)
|
1394
|
+
|
1395
|
+
# Move new array to original array register
|
1396
|
+
self.track_vm_instruction()
|
1397
|
+
self.emit_opcode(Opcode.MOVE_R)
|
1398
|
+
self.emit_u8(array)
|
1399
|
+
self.emit_u8(new_array_reg)
|
1400
|
+
|
1401
|
+
if self.debug:
|
1402
|
+
print(f" -> Generated ArrayRemove: r{array}.remove_at(r{index}) using copy emulation")
|
1403
|
+
|
1404
|
+
def generate_array_insert(self, inst: ArrayInsert) -> None:
|
1405
|
+
"""Generate array insert at index using copy emulation.
|
1406
|
+
|
1407
|
+
Emulates array.insert(index, value) by:
|
1408
|
+
1. Get original array length
|
1409
|
+
2. Create new array with length + 1
|
1410
|
+
3. Copy elements [0:index] to new array
|
1411
|
+
4. Set new[index] = value
|
1412
|
+
5. Copy elements [index:] to new[index+1:]
|
1413
|
+
6. Replace original array with new array
|
1414
|
+
"""
|
1415
|
+
array = self.get_register(inst.array)
|
1416
|
+
index = self.get_register(inst.index)
|
1417
|
+
value = self.get_register(inst.value)
|
1418
|
+
|
1419
|
+
# Allocate temporary registers
|
1420
|
+
old_len_reg = 248 # Original length
|
1421
|
+
new_len_reg = 249 # New length (old + 1)
|
1422
|
+
new_array_reg = 250 # New array
|
1423
|
+
i_reg = 251 # Loop counter
|
1424
|
+
element_reg = 252 # Temporary for element
|
1425
|
+
cmp_reg = 253 # Comparison result
|
1426
|
+
const_one_reg = 254 # Constant 1
|
1427
|
+
|
1428
|
+
# Get original array length
|
1429
|
+
self.track_vm_instruction()
|
1430
|
+
self.emit_opcode(Opcode.ARRAY_LEN_R)
|
1431
|
+
self.emit_u8(old_len_reg)
|
1432
|
+
self.emit_u8(array)
|
1433
|
+
|
1434
|
+
# Calculate new length (old + 1)
|
1435
|
+
const_one = self.add_constant(1)
|
1436
|
+
self.track_vm_instruction()
|
1437
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
1438
|
+
self.emit_u8(const_one_reg)
|
1439
|
+
self.emit_u16(const_one)
|
1440
|
+
|
1441
|
+
self.track_vm_instruction()
|
1442
|
+
self.emit_opcode(Opcode.ADD_R)
|
1443
|
+
self.emit_u8(new_len_reg)
|
1444
|
+
self.emit_u8(old_len_reg)
|
1445
|
+
self.emit_u8(const_one_reg)
|
1446
|
+
|
1447
|
+
# Create new array with new length
|
1448
|
+
self.track_vm_instruction()
|
1449
|
+
self.emit_opcode(Opcode.NEW_ARRAY_R)
|
1450
|
+
self.emit_u8(new_array_reg)
|
1451
|
+
self.emit_u8(new_len_reg)
|
1452
|
+
|
1453
|
+
# Initialize loop counter to 0
|
1454
|
+
const_zero = self.add_constant(0)
|
1455
|
+
self.track_vm_instruction()
|
1456
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
1457
|
+
self.emit_u8(i_reg)
|
1458
|
+
self.emit_u16(const_zero)
|
1459
|
+
|
1460
|
+
# Generate unique labels
|
1461
|
+
copy_before_label = f"insert_copy_before_{self.label_counter}"
|
1462
|
+
copy_after_label = f"insert_copy_after_{self.label_counter}"
|
1463
|
+
insert_done_label = f"insert_done_{self.label_counter}"
|
1464
|
+
self.label_counter += 1
|
1465
|
+
|
1466
|
+
# --- Copy elements before insertion point ---
|
1467
|
+
self.add_label(copy_before_label)
|
1468
|
+
|
1469
|
+
# Check if i < index
|
1470
|
+
self.track_vm_instruction()
|
1471
|
+
self.emit_opcode(Opcode.LT_R)
|
1472
|
+
self.emit_u8(cmp_reg)
|
1473
|
+
self.emit_u8(i_reg)
|
1474
|
+
self.emit_u8(index)
|
1475
|
+
|
1476
|
+
# If not (i >= index), skip to insert value
|
1477
|
+
self.track_vm_instruction()
|
1478
|
+
self.emit_opcode(Opcode.JUMP_IF_NOT_R)
|
1479
|
+
self.emit_u8(cmp_reg)
|
1480
|
+
self.pending_jumps.append((len(self.bytecode), copy_after_label, len(self.instruction_offsets) - 1))
|
1481
|
+
self.emit_i32(0) # Placeholder
|
1482
|
+
|
1483
|
+
# Get element from original array
|
1484
|
+
self.track_vm_instruction()
|
1485
|
+
self.emit_opcode(Opcode.ARRAY_GET_R)
|
1486
|
+
self.emit_u8(element_reg)
|
1487
|
+
self.emit_u8(array)
|
1488
|
+
self.emit_u8(i_reg)
|
1489
|
+
|
1490
|
+
# Set element in new array at same position
|
1491
|
+
self.track_vm_instruction()
|
1492
|
+
self.emit_opcode(Opcode.ARRAY_SET_R)
|
1493
|
+
self.emit_u8(new_array_reg)
|
1494
|
+
self.emit_u8(i_reg)
|
1495
|
+
self.emit_u8(element_reg)
|
1496
|
+
|
1497
|
+
# Increment i
|
1498
|
+
self.track_vm_instruction()
|
1499
|
+
self.emit_opcode(Opcode.ADD_R)
|
1500
|
+
self.emit_u8(i_reg)
|
1501
|
+
self.emit_u8(i_reg)
|
1502
|
+
self.emit_u8(const_one_reg)
|
1503
|
+
|
1504
|
+
# Jump back to loop start
|
1505
|
+
self.track_vm_instruction()
|
1506
|
+
self.emit_opcode(Opcode.JUMP_R)
|
1507
|
+
self.pending_jumps.append((len(self.bytecode), copy_before_label, len(self.instruction_offsets) - 1))
|
1508
|
+
self.emit_i32(0) # Placeholder
|
1509
|
+
|
1510
|
+
# --- Insert the value at index ---
|
1511
|
+
self.add_label(copy_after_label)
|
1512
|
+
|
1513
|
+
# Set new[index] = value
|
1514
|
+
self.track_vm_instruction()
|
1515
|
+
self.emit_opcode(Opcode.ARRAY_SET_R)
|
1516
|
+
self.emit_u8(new_array_reg)
|
1517
|
+
self.emit_u8(index)
|
1518
|
+
self.emit_u8(value)
|
1519
|
+
|
1520
|
+
# Reset i to index for copying remaining elements
|
1521
|
+
self.track_vm_instruction()
|
1522
|
+
self.emit_opcode(Opcode.MOVE_R)
|
1523
|
+
self.emit_u8(i_reg)
|
1524
|
+
self.emit_u8(index)
|
1525
|
+
|
1526
|
+
# --- Copy elements after insertion point ---
|
1527
|
+
copy_rest_label = f"insert_copy_rest_{self.label_counter - 1}"
|
1528
|
+
self.add_label(copy_rest_label)
|
1529
|
+
|
1530
|
+
# Check if i < old_len
|
1531
|
+
self.track_vm_instruction()
|
1532
|
+
self.emit_opcode(Opcode.LT_R)
|
1533
|
+
self.emit_u8(cmp_reg)
|
1534
|
+
self.emit_u8(i_reg)
|
1535
|
+
self.emit_u8(old_len_reg)
|
1536
|
+
|
1537
|
+
# If not (i >= old_len), we're done
|
1538
|
+
self.track_vm_instruction()
|
1539
|
+
self.emit_opcode(Opcode.JUMP_IF_NOT_R)
|
1540
|
+
self.emit_u8(cmp_reg)
|
1541
|
+
self.pending_jumps.append((len(self.bytecode), insert_done_label, len(self.instruction_offsets) - 1))
|
1542
|
+
self.emit_i32(0) # Placeholder
|
1543
|
+
|
1544
|
+
# Get element from original array[i]
|
1545
|
+
self.track_vm_instruction()
|
1546
|
+
self.emit_opcode(Opcode.ARRAY_GET_R)
|
1547
|
+
self.emit_u8(element_reg)
|
1548
|
+
self.emit_u8(array)
|
1549
|
+
self.emit_u8(i_reg)
|
1550
|
+
|
1551
|
+
# Calculate destination index (i + 1) using element_reg temporarily
|
1552
|
+
self.track_vm_instruction()
|
1553
|
+
self.emit_opcode(Opcode.ADD_R)
|
1554
|
+
self.emit_u8(element_reg)
|
1555
|
+
self.emit_u8(i_reg)
|
1556
|
+
self.emit_u8(const_one_reg)
|
1557
|
+
|
1558
|
+
# Get element from original array[i] again (since we overwrote element_reg)
|
1559
|
+
self.track_vm_instruction()
|
1560
|
+
self.emit_opcode(Opcode.ARRAY_GET_R)
|
1561
|
+
self.emit_u8(cmp_reg) # Use cmp_reg temporarily for the element
|
1562
|
+
self.emit_u8(array)
|
1563
|
+
self.emit_u8(i_reg)
|
1564
|
+
|
1565
|
+
# Set new[i+1] = element
|
1566
|
+
self.track_vm_instruction()
|
1567
|
+
self.emit_opcode(Opcode.ARRAY_SET_R)
|
1568
|
+
self.emit_u8(new_array_reg)
|
1569
|
+
self.emit_u8(element_reg) # This is i+1
|
1570
|
+
self.emit_u8(cmp_reg) # This is the element
|
1571
|
+
|
1572
|
+
# Increment i
|
1573
|
+
self.track_vm_instruction()
|
1574
|
+
self.emit_opcode(Opcode.ADD_R)
|
1575
|
+
self.emit_u8(i_reg)
|
1576
|
+
self.emit_u8(i_reg)
|
1577
|
+
self.emit_u8(const_one_reg)
|
1578
|
+
|
1579
|
+
# Jump back to copy rest loop
|
1580
|
+
self.track_vm_instruction()
|
1581
|
+
self.emit_opcode(Opcode.JUMP_R)
|
1582
|
+
self.pending_jumps.append((len(self.bytecode), copy_rest_label, len(self.instruction_offsets) - 1))
|
1583
|
+
self.emit_i32(0) # Placeholder
|
1584
|
+
|
1585
|
+
# --- Replace original array with new array ---
|
1586
|
+
self.add_label(insert_done_label)
|
1587
|
+
|
1588
|
+
# Move new array to original array register
|
1589
|
+
self.track_vm_instruction()
|
1590
|
+
self.emit_opcode(Opcode.MOVE_R)
|
1591
|
+
self.emit_u8(array)
|
1592
|
+
self.emit_u8(new_array_reg)
|
1593
|
+
|
1594
|
+
if self.debug:
|
1595
|
+
print(f" -> Generated ArrayInsert: r{array}.insert(r{index}, r{value}) using copy emulation")
|
1596
|
+
|
1597
|
+
def generate_dict_keys(self, inst: DictKeys) -> None:
|
1598
|
+
"""Generate dictionary keys extraction.
|
1599
|
+
|
1600
|
+
Args:
|
1601
|
+
inst: DictKeys instruction.
|
1602
|
+
"""
|
1603
|
+
dst = self.get_register(inst.dest)
|
1604
|
+
dict_reg = self.get_register(inst.dict_val)
|
1605
|
+
|
1606
|
+
# Emit DictKeysR instruction
|
1607
|
+
self.track_vm_instruction()
|
1608
|
+
self.emit_opcode(Opcode.DICT_KEYS_R)
|
1609
|
+
self.emit_u8(dst)
|
1610
|
+
self.emit_u8(dict_reg)
|
1611
|
+
|
1612
|
+
if self.debug:
|
1613
|
+
print(f" -> Generated DictKeysR: r{dst} = r{dict_reg}.keys()")
|
1614
|
+
|
1615
|
+
def generate_dict_values(self, inst: DictValues) -> None:
|
1616
|
+
"""Generate dictionary values extraction.
|
1617
|
+
|
1618
|
+
Args:
|
1619
|
+
inst: DictValues instruction.
|
1620
|
+
"""
|
1621
|
+
|
1622
|
+
dst = self.get_register(inst.dest)
|
1623
|
+
dict_reg = self.get_register(inst.dict_val)
|
1624
|
+
|
1625
|
+
# Emit DictValuesR instruction
|
1626
|
+
self.track_vm_instruction()
|
1627
|
+
self.emit_opcode(Opcode.DICT_VALUES_R)
|
1628
|
+
self.emit_u8(dst)
|
1629
|
+
self.emit_u8(dict_reg)
|
1630
|
+
|
1631
|
+
if self.debug:
|
1632
|
+
print(f" -> Generated DictValuesR: r{dst} = r{dict_reg}.values()")
|
1633
|
+
|
1634
|
+
def generate_dict_clear(self, inst: DictClear) -> None:
|
1635
|
+
"""Generate DictClearR instruction.
|
1636
|
+
|
1637
|
+
Args:
|
1638
|
+
inst: DictClear instruction.
|
1639
|
+
"""
|
1640
|
+
dict_reg = self.get_register(inst.dict_val)
|
1641
|
+
|
1642
|
+
# Emit DictClearR instruction
|
1643
|
+
self.track_vm_instruction()
|
1644
|
+
self.emit_opcode(Opcode.DICT_CLEAR_R)
|
1645
|
+
self.emit_u8(dict_reg)
|
1646
|
+
|
1647
|
+
if self.debug:
|
1648
|
+
print(f" -> Generated DictClearR: r{dict_reg}.clear()")
|
1649
|
+
|
1650
|
+
def generate_array_clear(self, inst: ArrayClear) -> None:
|
1651
|
+
"""Generate array clear.
|
1652
|
+
|
1653
|
+
This can be implemented as creating a new empty array.
|
1654
|
+
"""
|
1655
|
+
array = self.get_register(inst.array)
|
1656
|
+
|
1657
|
+
# Create a new empty array (size 0) and assign to the array register
|
1658
|
+
zero_reg = 254 # Use a temp register for constant 0
|
1659
|
+
|
1660
|
+
# Load constant 0
|
1661
|
+
const_idx = self.add_constant(0)
|
1662
|
+
self.track_vm_instruction()
|
1663
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
1664
|
+
self.emit_u8(zero_reg)
|
1665
|
+
self.emit_u16(const_idx)
|
1666
|
+
|
1667
|
+
# Create new empty array
|
1668
|
+
self.track_vm_instruction()
|
1669
|
+
self.emit_opcode(Opcode.NEW_ARRAY_R)
|
1670
|
+
self.emit_u8(array)
|
1671
|
+
self.emit_u8(zero_reg)
|
1672
|
+
|
1673
|
+
if self.debug:
|
1674
|
+
print(f" -> Generated ArrayClear: r{array}.clear() as new_array(0)")
|
1675
|
+
|
1676
|
+
def generate_array_find_index(self, inst: ArrayFindIndex) -> None:
|
1677
|
+
"""Generate array find index by value using loop emulation.
|
1678
|
+
|
1679
|
+
Emulates array.find(value) by iterating through the array:
|
1680
|
+
1. Get array length
|
1681
|
+
2. Initialize index to 0
|
1682
|
+
3. Loop through array:
|
1683
|
+
- Get element at current index
|
1684
|
+
- Compare with target value
|
1685
|
+
- If equal, store index and exit
|
1686
|
+
- Otherwise increment index and continue
|
1687
|
+
4. If not found, store -1
|
1688
|
+
"""
|
1689
|
+
dest = self.get_register(inst.dest)
|
1690
|
+
array = self.get_register(inst.array)
|
1691
|
+
value = self.get_register(inst.value)
|
1692
|
+
|
1693
|
+
# Allocate temporary registers
|
1694
|
+
length_reg = 250 # Array length
|
1695
|
+
index_reg = 251 # Current index
|
1696
|
+
element_reg = 252 # Current element
|
1697
|
+
cmp_reg = 253 # Comparison result
|
1698
|
+
|
1699
|
+
# Generate unique labels for this loop
|
1700
|
+
loop_start_label = f"find_loop_{self.label_counter}"
|
1701
|
+
loop_end_label = f"find_end_{self.label_counter}"
|
1702
|
+
found_label = f"find_found_{self.label_counter}"
|
1703
|
+
self.label_counter += 1
|
1704
|
+
|
1705
|
+
# Get array length
|
1706
|
+
self.track_vm_instruction()
|
1707
|
+
self.emit_opcode(Opcode.ARRAY_LEN_R)
|
1708
|
+
self.emit_u8(length_reg)
|
1709
|
+
self.emit_u8(array)
|
1710
|
+
|
1711
|
+
# Initialize index to 0
|
1712
|
+
const_idx = self.add_constant(0)
|
1713
|
+
self.track_vm_instruction()
|
1714
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
1715
|
+
self.emit_u8(index_reg)
|
1716
|
+
self.emit_u16(const_idx)
|
1717
|
+
|
1718
|
+
# Loop start
|
1719
|
+
self.add_label(loop_start_label)
|
1720
|
+
|
1721
|
+
# Check if index < length
|
1722
|
+
self.track_vm_instruction()
|
1723
|
+
self.emit_opcode(Opcode.LT_R)
|
1724
|
+
self.emit_u8(cmp_reg)
|
1725
|
+
self.emit_u8(index_reg)
|
1726
|
+
self.emit_u8(length_reg)
|
1727
|
+
|
1728
|
+
# If not (index >= length), jump to end (not found)
|
1729
|
+
self.track_vm_instruction()
|
1730
|
+
self.emit_opcode(Opcode.JUMP_IF_NOT_R)
|
1731
|
+
self.emit_u8(cmp_reg)
|
1732
|
+
self.pending_jumps.append((len(self.bytecode), loop_end_label, len(self.instruction_offsets) - 1))
|
1733
|
+
self.emit_i32(0) # Placeholder
|
1734
|
+
|
1735
|
+
# Get element at current index
|
1736
|
+
self.track_vm_instruction()
|
1737
|
+
self.emit_opcode(Opcode.ARRAY_GET_R)
|
1738
|
+
self.emit_u8(element_reg)
|
1739
|
+
self.emit_u8(array)
|
1740
|
+
self.emit_u8(index_reg)
|
1741
|
+
|
1742
|
+
# Compare element with target value
|
1743
|
+
self.track_vm_instruction()
|
1744
|
+
self.emit_opcode(Opcode.EQ_R)
|
1745
|
+
self.emit_u8(cmp_reg)
|
1746
|
+
self.emit_u8(element_reg)
|
1747
|
+
self.emit_u8(value)
|
1748
|
+
|
1749
|
+
# If equal, jump to found
|
1750
|
+
self.track_vm_instruction()
|
1751
|
+
self.emit_opcode(Opcode.JUMP_IF_R)
|
1752
|
+
self.emit_u8(cmp_reg)
|
1753
|
+
self.pending_jumps.append((len(self.bytecode), found_label, len(self.instruction_offsets) - 1))
|
1754
|
+
self.emit_i32(0) # Placeholder
|
1755
|
+
|
1756
|
+
# Increment index
|
1757
|
+
const_one = self.add_constant(1)
|
1758
|
+
self.track_vm_instruction()
|
1759
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
1760
|
+
self.emit_u8(element_reg) # Reuse element_reg for constant 1
|
1761
|
+
self.emit_u16(const_one)
|
1762
|
+
|
1763
|
+
self.track_vm_instruction()
|
1764
|
+
self.emit_opcode(Opcode.ADD_R)
|
1765
|
+
self.emit_u8(index_reg)
|
1766
|
+
self.emit_u8(index_reg)
|
1767
|
+
self.emit_u8(element_reg)
|
1768
|
+
|
1769
|
+
# Jump back to loop start
|
1770
|
+
self.track_vm_instruction()
|
1771
|
+
self.emit_opcode(Opcode.JUMP_R)
|
1772
|
+
self.pending_jumps.append((len(self.bytecode), loop_start_label, len(self.instruction_offsets) - 1))
|
1773
|
+
self.emit_i32(0) # Placeholder
|
1774
|
+
|
1775
|
+
# Found label - copy index to dest
|
1776
|
+
self.add_label(found_label)
|
1777
|
+
self.track_vm_instruction()
|
1778
|
+
self.emit_opcode(Opcode.MOVE_R)
|
1779
|
+
self.emit_u8(dest)
|
1780
|
+
self.emit_u8(index_reg)
|
1781
|
+
|
1782
|
+
# Jump to end (skip not found case)
|
1783
|
+
end_jump_label = f"find_exit_{self.label_counter - 1}"
|
1784
|
+
self.track_vm_instruction()
|
1785
|
+
self.emit_opcode(Opcode.JUMP_R)
|
1786
|
+
self.pending_jumps.append((len(self.bytecode), end_jump_label, len(self.instruction_offsets) - 1))
|
1787
|
+
self.emit_i32(0) # Placeholder
|
1788
|
+
|
1789
|
+
# Not found - set dest to -1
|
1790
|
+
self.add_label(loop_end_label)
|
1791
|
+
const_neg_one = self.add_constant(-1)
|
1792
|
+
self.track_vm_instruction()
|
1793
|
+
self.emit_opcode(Opcode.LOAD_CONST_R)
|
1794
|
+
self.emit_u8(dest)
|
1795
|
+
self.emit_u16(const_neg_one)
|
1796
|
+
|
1797
|
+
# Exit label
|
1798
|
+
self.add_label(end_jump_label)
|
1799
|
+
|
1800
|
+
if self.debug:
|
1801
|
+
print(f" -> Generated ArrayFindIndex: r{dest} = find_index(r{array}, r{value}) using loop emulation")
|
1802
|
+
|
1803
|
+
|
1804
|
+
class MetadataCollector:
|
1805
|
+
"""Collect metadata from MIR for the Rust VM.
|
1806
|
+
|
1807
|
+
This collects minimal metadata needed for:
|
1808
|
+
- Type information for registers
|
1809
|
+
- Symbol table for debugging
|
1810
|
+
- SSA phi node information
|
1811
|
+
- Basic block boundaries
|
1812
|
+
"""
|
1813
|
+
|
1814
|
+
def __init__(self, debug_mode: bool = False) -> None:
|
1815
|
+
"""Initialize the metadata collector.
|
1816
|
+
|
1817
|
+
Args:
|
1818
|
+
debug_mode: Whether to collect full debug metadata.
|
1819
|
+
"""
|
1820
|
+
self.debug_mode = debug_mode
|
1821
|
+
|
1822
|
+
def collect(self, mir_module: MIRModule, allocation: RegisterAllocation) -> dict[str, Any]:
|
1823
|
+
"""Collect metadata from MIR module.
|
1824
|
+
|
1825
|
+
Args:
|
1826
|
+
mir_module: MIR module to extract metadata from.
|
1827
|
+
allocation: Register allocation for the module.
|
1828
|
+
|
1829
|
+
Returns:
|
1830
|
+
Metadata object.
|
1831
|
+
"""
|
1832
|
+
metadata: dict[str, Any] = {
|
1833
|
+
"version": 1,
|
1834
|
+
"metadata_level": "full" if self.debug_mode else "minimal",
|
1835
|
+
"functions": [],
|
1836
|
+
}
|
1837
|
+
|
1838
|
+
# Process each function
|
1839
|
+
for _name, func in mir_module.functions.items():
|
1840
|
+
func_metadata = self.collect_function_metadata(func, allocation)
|
1841
|
+
metadata["functions"].append(func_metadata)
|
1842
|
+
|
1843
|
+
return metadata
|
1844
|
+
|
1845
|
+
def collect_function_metadata(self, func: MIRFunction, allocation: RegisterAllocation) -> dict[str, Any]:
|
1846
|
+
"""Collect metadata for a function.
|
1847
|
+
|
1848
|
+
Args:
|
1849
|
+
func: MIR function to extract metadata from.
|
1850
|
+
allocation: Register allocation for the function.
|
1851
|
+
|
1852
|
+
Returns:
|
1853
|
+
Function metadata dictionary.
|
1854
|
+
"""
|
1855
|
+
func_metadata = {
|
1856
|
+
"name": func.name,
|
1857
|
+
"signature": {
|
1858
|
+
"param_types": [str(p.type) for p in func.params],
|
1859
|
+
"return_type": str(func.return_type) if func.return_type else "empty",
|
1860
|
+
},
|
1861
|
+
"register_types": self.extract_register_types(func, allocation),
|
1862
|
+
"basic_blocks": self.extract_basic_blocks(func),
|
1863
|
+
"phi_nodes": self.extract_phi_nodes(func, allocation),
|
1864
|
+
}
|
1865
|
+
|
1866
|
+
if self.debug_mode:
|
1867
|
+
# Add debug information
|
1868
|
+
func_metadata["variable_names"] = self.extract_variable_names(func, allocation)
|
1869
|
+
func_metadata["source_map"] = [] # TODO: Implement source mapping
|
1870
|
+
|
1871
|
+
return func_metadata
|
1872
|
+
|
1873
|
+
def extract_register_types(self, func: MIRFunction, allocation: RegisterAllocation) -> dict[str, str]:
|
1874
|
+
"""Extract type information for registers.
|
1875
|
+
|
1876
|
+
Args:
|
1877
|
+
func: MIR function.
|
1878
|
+
allocation: Register allocation.
|
1879
|
+
|
1880
|
+
Returns:
|
1881
|
+
Mapping of register numbers to type names.
|
1882
|
+
"""
|
1883
|
+
register_types = {}
|
1884
|
+
|
1885
|
+
for value, reg_num in allocation.value_to_register.items():
|
1886
|
+
if hasattr(value, "type"):
|
1887
|
+
register_types[f"r{reg_num}"] = str(value.type)
|
1888
|
+
else:
|
1889
|
+
register_types[f"r{reg_num}"] = "unknown"
|
1890
|
+
|
1891
|
+
return register_types
|
1892
|
+
|
1893
|
+
def extract_basic_blocks(self, func: MIRFunction) -> list[dict[str, Any]]:
|
1894
|
+
"""Extract basic block information.
|
1895
|
+
|
1896
|
+
Args:
|
1897
|
+
func: MIR function.
|
1898
|
+
|
1899
|
+
Returns:
|
1900
|
+
List of basic block metadata.
|
1901
|
+
"""
|
1902
|
+
blocks = []
|
1903
|
+
offset = 0
|
1904
|
+
|
1905
|
+
for block_name in func.cfg.blocks:
|
1906
|
+
block = func.cfg.blocks[block_name]
|
1907
|
+
block_info = {
|
1908
|
+
"label": block.label,
|
1909
|
+
"start_offset": offset,
|
1910
|
+
"end_offset": offset + len(block.instructions),
|
1911
|
+
}
|
1912
|
+
blocks.append(block_info)
|
1913
|
+
offset += len(block.instructions)
|
1914
|
+
return blocks
|
1915
|
+
|
1916
|
+
def extract_phi_nodes(self, func: MIRFunction, allocation: RegisterAllocation) -> list[dict[str, Any]]:
|
1917
|
+
"""Extract phi node information.
|
1918
|
+
|
1919
|
+
Args:
|
1920
|
+
func: MIR function.
|
1921
|
+
allocation: Register allocation.
|
1922
|
+
|
1923
|
+
Returns:
|
1924
|
+
List of phi node metadata.
|
1925
|
+
"""
|
1926
|
+
phi_nodes = []
|
1927
|
+
|
1928
|
+
for block_name in func.cfg.blocks:
|
1929
|
+
block = func.cfg.blocks[block_name]
|
1930
|
+
for inst in block.instructions:
|
1931
|
+
if isinstance(inst, Phi):
|
1932
|
+
dest_reg = allocation.value_to_register.get(inst.dest, -1)
|
1933
|
+
sources = []
|
1934
|
+
for value, label in inst.sources: # type: ignore[attr-defined]
|
1935
|
+
src_reg = allocation.value_to_register.get(value, -1)
|
1936
|
+
sources.append(
|
1937
|
+
{
|
1938
|
+
"register": f"r{src_reg}",
|
1939
|
+
"block": label,
|
1940
|
+
}
|
1941
|
+
)
|
1942
|
+
|
1943
|
+
phi_nodes.append(
|
1944
|
+
{
|
1945
|
+
"block": block.label,
|
1946
|
+
"register": f"r{dest_reg}",
|
1947
|
+
"sources": sources,
|
1948
|
+
}
|
1949
|
+
)
|
1950
|
+
|
1951
|
+
return phi_nodes
|
1952
|
+
|
1953
|
+
def extract_variable_names(self, func: MIRFunction, allocation: RegisterAllocation) -> dict[str, str]:
|
1954
|
+
"""Extract variable names for debugging.
|
1955
|
+
|
1956
|
+
Args:
|
1957
|
+
func: MIR function.
|
1958
|
+
allocation: Register allocation.
|
1959
|
+
|
1960
|
+
Returns:
|
1961
|
+
Mapping of register numbers to variable names.
|
1962
|
+
"""
|
1963
|
+
var_names = {}
|
1964
|
+
|
1965
|
+
for value, reg_num in allocation.value_to_register.items():
|
1966
|
+
if isinstance(value, Variable):
|
1967
|
+
var_names[f"r{reg_num}"] = value.name
|
1968
|
+
|
1969
|
+
return var_names
|
1970
|
+
|
1971
|
+
|
1972
|
+
def generate_bytecode_from_mir(
|
1973
|
+
mir_module: MIRModule, debug: bool = False
|
1974
|
+
) -> tuple[BytecodeModule, dict[str, Any] | None]:
|
1975
|
+
"""Generate bytecode and metadata from MIR module.
|
1976
|
+
|
1977
|
+
This is the main entry point for bytecode generation.
|
1978
|
+
|
1979
|
+
Args:
|
1980
|
+
mir_module: MIR module to generate bytecode from.
|
1981
|
+
debug: Enable debug output for bytecode generation.
|
1982
|
+
|
1983
|
+
Returns:
|
1984
|
+
Tuple of (bytecode module, metadata).
|
1985
|
+
"""
|
1986
|
+
generator = RegisterBytecodeGenerator(debug=debug)
|
1987
|
+
bytecode = generator.generate(mir_module)
|
1988
|
+
|
1989
|
+
# Collect metadata
|
1990
|
+
if generator.allocation is not None:
|
1991
|
+
collector = MetadataCollector(debug_mode=False)
|
1992
|
+
metadata = collector.collect(mir_module, generator.allocation)
|
1993
|
+
else:
|
1994
|
+
metadata = None
|
1995
|
+
|
1996
|
+
return bytecode, metadata
|