machine-dialect 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- machine_dialect/__main__.py +667 -0
- machine_dialect/agent/__init__.py +5 -0
- machine_dialect/agent/agent.py +360 -0
- machine_dialect/ast/__init__.py +95 -0
- machine_dialect/ast/ast_node.py +35 -0
- machine_dialect/ast/call_expression.py +82 -0
- machine_dialect/ast/dict_extraction.py +60 -0
- machine_dialect/ast/expressions.py +439 -0
- machine_dialect/ast/literals.py +309 -0
- machine_dialect/ast/program.py +35 -0
- machine_dialect/ast/statements.py +1433 -0
- machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
- machine_dialect/ast/tests/test_boolean_literal.py +29 -0
- machine_dialect/ast/tests/test_collection_hir.py +138 -0
- machine_dialect/ast/tests/test_define_statement.py +142 -0
- machine_dialect/ast/tests/test_desugar.py +541 -0
- machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
- machine_dialect/cfg/__init__.py +6 -0
- machine_dialect/cfg/config.py +156 -0
- machine_dialect/cfg/examples.py +221 -0
- machine_dialect/cfg/generate_with_ai.py +187 -0
- machine_dialect/cfg/openai_generation.py +200 -0
- machine_dialect/cfg/parser.py +94 -0
- machine_dialect/cfg/tests/__init__.py +1 -0
- machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
- machine_dialect/cfg/tests/test_config.py +188 -0
- machine_dialect/cfg/tests/test_examples.py +391 -0
- machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
- machine_dialect/cfg/tests/test_openai_generation.py +256 -0
- machine_dialect/codegen/__init__.py +5 -0
- machine_dialect/codegen/bytecode_module.py +89 -0
- machine_dialect/codegen/bytecode_serializer.py +300 -0
- machine_dialect/codegen/opcodes.py +101 -0
- machine_dialect/codegen/register_codegen.py +1996 -0
- machine_dialect/codegen/symtab.py +208 -0
- machine_dialect/codegen/tests/__init__.py +1 -0
- machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
- machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
- machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
- machine_dialect/codegen/tests/test_symtab.py +418 -0
- machine_dialect/codegen/vm_serializer.py +621 -0
- machine_dialect/compiler/__init__.py +18 -0
- machine_dialect/compiler/compiler.py +197 -0
- machine_dialect/compiler/config.py +149 -0
- machine_dialect/compiler/context.py +149 -0
- machine_dialect/compiler/phases/__init__.py +19 -0
- machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
- machine_dialect/compiler/phases/codegen.py +40 -0
- machine_dialect/compiler/phases/hir_generation.py +39 -0
- machine_dialect/compiler/phases/mir_generation.py +86 -0
- machine_dialect/compiler/phases/optimization.py +110 -0
- machine_dialect/compiler/phases/parsing.py +39 -0
- machine_dialect/compiler/pipeline.py +143 -0
- machine_dialect/compiler/tests/__init__.py +1 -0
- machine_dialect/compiler/tests/test_compiler.py +568 -0
- machine_dialect/compiler/vm_runner.py +173 -0
- machine_dialect/errors/__init__.py +32 -0
- machine_dialect/errors/exceptions.py +369 -0
- machine_dialect/errors/messages.py +82 -0
- machine_dialect/errors/tests/__init__.py +0 -0
- machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
- machine_dialect/errors/tests/test_name_errors.py +118 -0
- machine_dialect/helpers/__init__.py +0 -0
- machine_dialect/helpers/stopwords.py +225 -0
- machine_dialect/helpers/validators.py +30 -0
- machine_dialect/lexer/__init__.py +9 -0
- machine_dialect/lexer/constants.py +23 -0
- machine_dialect/lexer/lexer.py +907 -0
- machine_dialect/lexer/tests/__init__.py +0 -0
- machine_dialect/lexer/tests/helpers.py +86 -0
- machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
- machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
- machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
- machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
- machine_dialect/lexer/tests/test_comments.py +200 -0
- machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
- machine_dialect/lexer/tests/test_lexer_position.py +113 -0
- machine_dialect/lexer/tests/test_list_tokens.py +282 -0
- machine_dialect/lexer/tests/test_stopwords.py +80 -0
- machine_dialect/lexer/tests/test_strict_equality.py +129 -0
- machine_dialect/lexer/tests/test_token.py +41 -0
- machine_dialect/lexer/tests/test_tokenization.py +294 -0
- machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
- machine_dialect/lexer/tests/test_url_literals.py +169 -0
- machine_dialect/lexer/tokens.py +487 -0
- machine_dialect/linter/__init__.py +10 -0
- machine_dialect/linter/__main__.py +144 -0
- machine_dialect/linter/linter.py +154 -0
- machine_dialect/linter/rules/__init__.py +8 -0
- machine_dialect/linter/rules/base.py +112 -0
- machine_dialect/linter/rules/statement_termination.py +99 -0
- machine_dialect/linter/tests/__init__.py +1 -0
- machine_dialect/linter/tests/mdrules/__init__.py +0 -0
- machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
- machine_dialect/linter/tests/test_linter.py +81 -0
- machine_dialect/linter/tests/test_rules.py +110 -0
- machine_dialect/linter/tests/test_violations.py +71 -0
- machine_dialect/linter/violations.py +51 -0
- machine_dialect/mir/__init__.py +69 -0
- machine_dialect/mir/analyses/__init__.py +20 -0
- machine_dialect/mir/analyses/alias_analysis.py +315 -0
- machine_dialect/mir/analyses/dominance_analysis.py +49 -0
- machine_dialect/mir/analyses/escape_analysis.py +286 -0
- machine_dialect/mir/analyses/loop_analysis.py +272 -0
- machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
- machine_dialect/mir/analyses/type_analysis.py +448 -0
- machine_dialect/mir/analyses/use_def_chains.py +232 -0
- machine_dialect/mir/basic_block.py +385 -0
- machine_dialect/mir/dataflow.py +445 -0
- machine_dialect/mir/debug_info.py +208 -0
- machine_dialect/mir/hir_to_mir.py +1738 -0
- machine_dialect/mir/mir_dumper.py +366 -0
- machine_dialect/mir/mir_function.py +167 -0
- machine_dialect/mir/mir_instructions.py +1877 -0
- machine_dialect/mir/mir_interpreter.py +556 -0
- machine_dialect/mir/mir_module.py +225 -0
- machine_dialect/mir/mir_printer.py +480 -0
- machine_dialect/mir/mir_transformer.py +410 -0
- machine_dialect/mir/mir_types.py +367 -0
- machine_dialect/mir/mir_validation.py +455 -0
- machine_dialect/mir/mir_values.py +268 -0
- machine_dialect/mir/optimization_config.py +233 -0
- machine_dialect/mir/optimization_pass.py +251 -0
- machine_dialect/mir/optimization_pipeline.py +355 -0
- machine_dialect/mir/optimizations/__init__.py +84 -0
- machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
- machine_dialect/mir/optimizations/branch_prediction.py +372 -0
- machine_dialect/mir/optimizations/constant_propagation.py +634 -0
- machine_dialect/mir/optimizations/cse.py +398 -0
- machine_dialect/mir/optimizations/dce.py +288 -0
- machine_dialect/mir/optimizations/inlining.py +551 -0
- machine_dialect/mir/optimizations/jump_threading.py +487 -0
- machine_dialect/mir/optimizations/licm.py +405 -0
- machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
- machine_dialect/mir/optimizations/strength_reduction.py +422 -0
- machine_dialect/mir/optimizations/tail_call.py +207 -0
- machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
- machine_dialect/mir/optimizations/type_narrowing.py +397 -0
- machine_dialect/mir/optimizations/type_specialization.py +447 -0
- machine_dialect/mir/optimizations/type_specific.py +906 -0
- machine_dialect/mir/optimize_mir.py +89 -0
- machine_dialect/mir/pass_manager.py +391 -0
- machine_dialect/mir/profiling/__init__.py +26 -0
- machine_dialect/mir/profiling/profile_collector.py +318 -0
- machine_dialect/mir/profiling/profile_data.py +372 -0
- machine_dialect/mir/profiling/profile_reader.py +272 -0
- machine_dialect/mir/profiling/profile_writer.py +226 -0
- machine_dialect/mir/register_allocation.py +302 -0
- machine_dialect/mir/reporting/__init__.py +17 -0
- machine_dialect/mir/reporting/optimization_reporter.py +314 -0
- machine_dialect/mir/reporting/report_formatter.py +289 -0
- machine_dialect/mir/ssa_construction.py +342 -0
- machine_dialect/mir/tests/__init__.py +1 -0
- machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
- machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
- machine_dialect/mir/tests/test_algebraic_division.py +126 -0
- machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
- machine_dialect/mir/tests/test_basic_block.py +425 -0
- machine_dialect/mir/tests/test_branch_prediction.py +459 -0
- machine_dialect/mir/tests/test_call_lowering.py +168 -0
- machine_dialect/mir/tests/test_collection_lowering.py +604 -0
- machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
- machine_dialect/mir/tests/test_custom_passes.py +166 -0
- machine_dialect/mir/tests/test_debug_info.py +285 -0
- machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
- machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
- machine_dialect/mir/tests/test_double_negation.py +231 -0
- machine_dialect/mir/tests/test_escape_analysis.py +233 -0
- machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
- machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
- machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
- machine_dialect/mir/tests/test_inlining.py +435 -0
- machine_dialect/mir/tests/test_licm.py +472 -0
- machine_dialect/mir/tests/test_mir_dumper.py +313 -0
- machine_dialect/mir/tests/test_mir_instructions.py +445 -0
- machine_dialect/mir/tests/test_mir_module.py +860 -0
- machine_dialect/mir/tests/test_mir_printer.py +387 -0
- machine_dialect/mir/tests/test_mir_types.py +123 -0
- machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
- machine_dialect/mir/tests/test_mir_validation.py +378 -0
- machine_dialect/mir/tests/test_mir_values.py +168 -0
- machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
- machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
- machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
- machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
- machine_dialect/mir/tests/test_pass_manager.py +294 -0
- machine_dialect/mir/tests/test_pass_registration.py +64 -0
- machine_dialect/mir/tests/test_profiling.py +356 -0
- machine_dialect/mir/tests/test_register_allocation.py +307 -0
- machine_dialect/mir/tests/test_report_formatters.py +372 -0
- machine_dialect/mir/tests/test_ssa_construction.py +433 -0
- machine_dialect/mir/tests/test_tail_call.py +236 -0
- machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
- machine_dialect/mir/tests/test_type_narrowing.py +277 -0
- machine_dialect/mir/tests/test_type_specialization.py +421 -0
- machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
- machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
- machine_dialect/mir/type_inference.py +368 -0
- machine_dialect/parser/__init__.py +12 -0
- machine_dialect/parser/enums.py +45 -0
- machine_dialect/parser/parser.py +3655 -0
- machine_dialect/parser/protocols.py +11 -0
- machine_dialect/parser/symbol_table.py +169 -0
- machine_dialect/parser/tests/__init__.py +0 -0
- machine_dialect/parser/tests/helper_functions.py +193 -0
- machine_dialect/parser/tests/test_action_statements.py +334 -0
- machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
- machine_dialect/parser/tests/test_call_statements.py +154 -0
- machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
- machine_dialect/parser/tests/test_collection_mutations.py +264 -0
- machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
- machine_dialect/parser/tests/test_define_integration.py +468 -0
- machine_dialect/parser/tests/test_define_statements.py +311 -0
- machine_dialect/parser/tests/test_dict_extraction.py +115 -0
- machine_dialect/parser/tests/test_empty_literal.py +155 -0
- machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
- machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
- machine_dialect/parser/tests/test_if_empty_block.py +61 -0
- machine_dialect/parser/tests/test_if_statements.py +299 -0
- machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
- machine_dialect/parser/tests/test_infix_expressions.py +680 -0
- machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
- machine_dialect/parser/tests/test_interaction_statements.py +269 -0
- machine_dialect/parser/tests/test_list_literals.py +277 -0
- machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
- machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
- machine_dialect/parser/tests/test_parse_errors.py +114 -0
- machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
- machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
- machine_dialect/parser/tests/test_program.py +13 -0
- machine_dialect/parser/tests/test_return_statements.py +89 -0
- machine_dialect/parser/tests/test_set_statements.py +152 -0
- machine_dialect/parser/tests/test_strict_equality.py +258 -0
- machine_dialect/parser/tests/test_symbol_table.py +217 -0
- machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
- machine_dialect/parser/tests/test_utility_statements.py +423 -0
- machine_dialect/parser/token_buffer.py +159 -0
- machine_dialect/repl/__init__.py +3 -0
- machine_dialect/repl/repl.py +426 -0
- machine_dialect/repl/tests/__init__.py +0 -0
- machine_dialect/repl/tests/test_repl.py +606 -0
- machine_dialect/semantic/__init__.py +12 -0
- machine_dialect/semantic/analyzer.py +906 -0
- machine_dialect/semantic/error_messages.py +189 -0
- machine_dialect/semantic/tests/__init__.py +1 -0
- machine_dialect/semantic/tests/test_analyzer.py +364 -0
- machine_dialect/semantic/tests/test_error_messages.py +104 -0
- machine_dialect/tests/edge_cases/__init__.py +10 -0
- machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
- machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
- machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
- machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
- machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
- machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
- machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
- machine_dialect/tests/integration/test_list_compilation.py +395 -0
- machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
- machine_dialect/type_checking/__init__.py +21 -0
- machine_dialect/type_checking/tests/__init__.py +1 -0
- machine_dialect/type_checking/tests/test_type_system.py +230 -0
- machine_dialect/type_checking/type_system.py +270 -0
- machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
- machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
- machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
- machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
- machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
- machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
- machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,621 @@
|
|
1
|
+
"""Proper bytecode serializer for the Rust VM with constant pool remapping.
|
2
|
+
|
3
|
+
This serializer correctly handles individual instruction parsing and remaps
|
4
|
+
constant indices when merging multiple chunks into a single module.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from __future__ import annotations
|
8
|
+
|
9
|
+
import struct
|
10
|
+
from dataclasses import dataclass, field
|
11
|
+
from io import BytesIO
|
12
|
+
from typing import Any, BinaryIO
|
13
|
+
|
14
|
+
from machine_dialect.codegen.bytecode_module import BytecodeModule, ConstantTag
|
15
|
+
from machine_dialect.codegen.opcodes import Opcode
|
16
|
+
|
17
|
+
# =============================================================================
|
18
|
+
# Deduplication and Mapping Support
|
19
|
+
# =============================================================================
|
20
|
+
|
21
|
+
|
22
|
+
@dataclass
|
23
|
+
class DeduplicationStats:
|
24
|
+
"""Track deduplication effectiveness."""
|
25
|
+
|
26
|
+
original_count: int
|
27
|
+
deduped_count: int
|
28
|
+
bytes_saved: int
|
29
|
+
duplicate_chains: dict[tuple[Any, ...], list[int]] = field(default_factory=dict)
|
30
|
+
|
31
|
+
|
32
|
+
@dataclass
|
33
|
+
class ConstantMapping:
|
34
|
+
"""Maps local chunk indices to global pool indices."""
|
35
|
+
|
36
|
+
# For each chunk, maps local index -> global index
|
37
|
+
chunk_mappings: list[dict[int, int]]
|
38
|
+
|
39
|
+
# Global constant pool with deduplication
|
40
|
+
global_constants: list[tuple[ConstantTag, Any]]
|
41
|
+
|
42
|
+
# Statistics for debugging
|
43
|
+
stats: DeduplicationStats
|
44
|
+
|
45
|
+
|
46
|
+
# =============================================================================
|
47
|
+
# Error Handling
|
48
|
+
# =============================================================================
|
49
|
+
|
50
|
+
|
51
|
+
class RemappingError(Exception):
|
52
|
+
"""Base class for remapping errors."""
|
53
|
+
|
54
|
+
pass
|
55
|
+
|
56
|
+
|
57
|
+
class InvalidBytecodeError(RemappingError):
|
58
|
+
"""Raised when bytecode is malformed."""
|
59
|
+
|
60
|
+
def __init__(self, message: str, offset: int | None = None, chunk_idx: int | None = None):
|
61
|
+
super().__init__(
|
62
|
+
f"{message}"
|
63
|
+
f"{f' at offset {offset:#x}' if offset is not None else ''}"
|
64
|
+
f"{f' in chunk {chunk_idx}' if chunk_idx is not None else ''}"
|
65
|
+
)
|
66
|
+
self.offset = offset
|
67
|
+
self.chunk_idx = chunk_idx
|
68
|
+
|
69
|
+
|
70
|
+
class ConstantIndexError(RemappingError):
|
71
|
+
"""Raised when a constant index is out of range."""
|
72
|
+
|
73
|
+
def __init__(self, idx: int, max_idx: int, chunk_idx: int | None = None, offset: int | None = None):
|
74
|
+
super().__init__(
|
75
|
+
f"Invalid constant index {idx} (max: {max_idx})"
|
76
|
+
f"{f' in chunk {chunk_idx}' if chunk_idx is not None else ''}"
|
77
|
+
f"{f' at instruction offset {offset:#x}' if offset is not None else ''}"
|
78
|
+
)
|
79
|
+
self.index = idx
|
80
|
+
self.max_index = max_idx
|
81
|
+
self.chunk_idx = chunk_idx
|
82
|
+
self.offset = offset
|
83
|
+
|
84
|
+
|
85
|
+
# =============================================================================
|
86
|
+
# Instruction Format Definitions
|
87
|
+
# =============================================================================
|
88
|
+
|
89
|
+
|
90
|
+
@dataclass
|
91
|
+
class InstructionFormat:
|
92
|
+
"""Describes the format of a bytecode instruction."""
|
93
|
+
|
94
|
+
opcode: int
|
95
|
+
name: str
|
96
|
+
size: int # Total size in bytes (-1 for variable)
|
97
|
+
has_const_operand: bool
|
98
|
+
operand_format: str # Format string for struct
|
99
|
+
|
100
|
+
|
101
|
+
# Instruction format definitions
|
102
|
+
INSTRUCTION_FORMATS = {
|
103
|
+
0x00: InstructionFormat(0x00, "LOAD_CONST_R", 4, True, "BH"),
|
104
|
+
0x01: InstructionFormat(0x01, "MOVE_R", 3, False, "BB"),
|
105
|
+
0x02: InstructionFormat(0x02, "LOAD_GLOBAL_R", 4, True, "BH"),
|
106
|
+
0x03: InstructionFormat(0x03, "STORE_GLOBAL_R", 4, True, "BH"),
|
107
|
+
# Arithmetic
|
108
|
+
0x07: InstructionFormat(0x07, "ADD_R", 4, False, "BBB"),
|
109
|
+
0x08: InstructionFormat(0x08, "SUB_R", 4, False, "BBB"),
|
110
|
+
0x09: InstructionFormat(0x09, "MUL_R", 4, False, "BBB"),
|
111
|
+
0x0A: InstructionFormat(0x0A, "DIV_R", 4, False, "BBB"),
|
112
|
+
0x0B: InstructionFormat(0x0B, "MOD_R", 4, False, "BBB"),
|
113
|
+
0x0C: InstructionFormat(0x0C, "NEG_R", 3, False, "BB"),
|
114
|
+
# Logical
|
115
|
+
0x0D: InstructionFormat(0x0D, "NOT_R", 3, False, "BB"),
|
116
|
+
0x0E: InstructionFormat(0x0E, "AND_R", 4, False, "BBB"),
|
117
|
+
0x0F: InstructionFormat(0x0F, "OR_R", 4, False, "BBB"),
|
118
|
+
# Comparisons
|
119
|
+
0x10: InstructionFormat(0x10, "EQ_R", 4, False, "BBB"),
|
120
|
+
0x11: InstructionFormat(0x11, "NEQ_R", 4, False, "BBB"),
|
121
|
+
0x12: InstructionFormat(0x12, "LT_R", 4, False, "BBB"),
|
122
|
+
0x13: InstructionFormat(0x13, "GT_R", 4, False, "BBB"),
|
123
|
+
0x14: InstructionFormat(0x14, "LTE_R", 4, False, "BBB"),
|
124
|
+
0x15: InstructionFormat(0x15, "GTE_R", 4, False, "BBB"),
|
125
|
+
# Control flow
|
126
|
+
0x16: InstructionFormat(0x16, "JUMP_R", 5, False, "i"),
|
127
|
+
0x17: InstructionFormat(0x17, "JUMP_IF_R", 6, False, "Bi"),
|
128
|
+
0x18: InstructionFormat(0x18, "JUMP_IF_NOT_R", 6, False, "Bi"),
|
129
|
+
0x19: InstructionFormat(0x19, "CALL_R", -1, False, ""), # Variable size
|
130
|
+
0x1A: InstructionFormat(0x1A, "RETURN_R", -1, False, ""), # Variable size
|
131
|
+
# MIR Support
|
132
|
+
0x1B: InstructionFormat(0x1B, "PHI_R", -1, False, ""), # Variable size
|
133
|
+
0x1C: InstructionFormat(0x1C, "ASSERT_R", 5, True, "BBH"), # reg + assert_type + msg_idx
|
134
|
+
0x1D: InstructionFormat(0x1D, "SCOPE_ENTER_R", 3, False, "H"),
|
135
|
+
0x1E: InstructionFormat(0x1E, "SCOPE_EXIT_R", 3, False, "H"),
|
136
|
+
# String operations
|
137
|
+
0x1F: InstructionFormat(0x1F, "CONCAT_STR_R", 4, False, "BBB"),
|
138
|
+
0x20: InstructionFormat(0x20, "STR_LEN_R", 3, False, "BB"),
|
139
|
+
# Arrays
|
140
|
+
0x21: InstructionFormat(0x21, "NEW_ARRAY_R", 3, False, "BB"),
|
141
|
+
0x22: InstructionFormat(0x22, "ARRAY_GET_R", 4, False, "BBB"),
|
142
|
+
0x23: InstructionFormat(0x23, "ARRAY_SET_R", 4, False, "BBB"),
|
143
|
+
0x24: InstructionFormat(0x24, "ARRAY_LEN_R", 3, False, "BB"),
|
144
|
+
# Debug
|
145
|
+
0x25: InstructionFormat(0x25, "DEBUG_PRINT", 2, False, "B"),
|
146
|
+
0x26: InstructionFormat(0x26, "BREAKPOINT", 1, False, ""),
|
147
|
+
}
|
148
|
+
|
149
|
+
|
150
|
+
# =============================================================================
|
151
|
+
# Helper Functions
|
152
|
+
# =============================================================================
|
153
|
+
|
154
|
+
|
155
|
+
def make_hashable(value: Any) -> Any:
|
156
|
+
"""Convert value to hashable form for deduplication."""
|
157
|
+
if isinstance(value, int | float | str | bool | type(None)):
|
158
|
+
return value
|
159
|
+
elif isinstance(value, bytes):
|
160
|
+
return value
|
161
|
+
elif isinstance(value, list):
|
162
|
+
return tuple(make_hashable(v) for v in value)
|
163
|
+
elif isinstance(value, dict):
|
164
|
+
return tuple(sorted((k, make_hashable(v)) for k, v in value.items()))
|
165
|
+
else:
|
166
|
+
# Fallback: use string representation
|
167
|
+
return str(value)
|
168
|
+
|
169
|
+
|
170
|
+
def build_constant_mapping(module: BytecodeModule) -> ConstantMapping:
|
171
|
+
"""Build mapping from local to global constant indices with deduplication."""
|
172
|
+
|
173
|
+
global_constants: list[tuple[ConstantTag, Any]] = []
|
174
|
+
chunk_mappings: list[dict[int, int]] = []
|
175
|
+
original_count = 0
|
176
|
+
bytes_saved = 0
|
177
|
+
duplicate_chains: dict[tuple[Any, ...], list[int]] = {}
|
178
|
+
|
179
|
+
# Global deduplication map: (tag, value) -> global_index
|
180
|
+
global_dedupe: dict[tuple[Any, ...], int] = {}
|
181
|
+
|
182
|
+
for chunk_idx, chunk in enumerate(module.chunks):
|
183
|
+
local_to_global = {}
|
184
|
+
|
185
|
+
for local_idx, (tag, value) in enumerate(chunk.constants):
|
186
|
+
original_count += 1
|
187
|
+
|
188
|
+
# Create hashable key for deduplication
|
189
|
+
key = (tag, make_hashable(value))
|
190
|
+
|
191
|
+
if key in global_dedupe:
|
192
|
+
# Reuse existing global constant
|
193
|
+
global_idx = global_dedupe[key]
|
194
|
+
bytes_saved += estimate_constant_size(tag, value)
|
195
|
+
|
196
|
+
# Track duplicate chains
|
197
|
+
if key not in duplicate_chains:
|
198
|
+
duplicate_chains[key] = []
|
199
|
+
duplicate_chains[key].append(chunk_idx)
|
200
|
+
else:
|
201
|
+
# Add new global constant
|
202
|
+
global_idx = len(global_constants)
|
203
|
+
global_constants.append((tag, value))
|
204
|
+
global_dedupe[key] = global_idx
|
205
|
+
|
206
|
+
local_to_global[local_idx] = global_idx
|
207
|
+
|
208
|
+
chunk_mappings.append(local_to_global)
|
209
|
+
|
210
|
+
stats = DeduplicationStats(
|
211
|
+
original_count=original_count,
|
212
|
+
deduped_count=len(global_constants),
|
213
|
+
bytes_saved=bytes_saved,
|
214
|
+
duplicate_chains=duplicate_chains,
|
215
|
+
)
|
216
|
+
|
217
|
+
return ConstantMapping(chunk_mappings=chunk_mappings, global_constants=global_constants, stats=stats)
|
218
|
+
|
219
|
+
|
220
|
+
def estimate_constant_size(tag: ConstantTag, value: Any) -> int:
|
221
|
+
"""Estimate the size of a constant in bytes."""
|
222
|
+
if tag == ConstantTag.INT:
|
223
|
+
return 9 # 1 (tag) + 8 (i64)
|
224
|
+
elif tag == ConstantTag.FLOAT:
|
225
|
+
return 9 # 1 (tag) + 8 (f64)
|
226
|
+
elif tag == ConstantTag.STRING:
|
227
|
+
return 5 + len(value.encode("utf-8")) # 1 (tag) + 4 (len) + data
|
228
|
+
elif tag == ConstantTag.BOOL:
|
229
|
+
return 2 # 1 (tag) + 1 (bool)
|
230
|
+
elif tag == ConstantTag.EMPTY:
|
231
|
+
return 1 # 1 (tag)
|
232
|
+
return 1
|
233
|
+
|
234
|
+
|
235
|
+
def get_instruction_size(opcode: int, bytecode: bytes, offset: int) -> int:
|
236
|
+
"""Get the actual size of an instruction at the given offset."""
|
237
|
+
|
238
|
+
fmt = INSTRUCTION_FORMATS.get(opcode)
|
239
|
+
if not fmt:
|
240
|
+
return 1 # Unknown opcode, skip single byte
|
241
|
+
|
242
|
+
if fmt.size > 0:
|
243
|
+
return fmt.size
|
244
|
+
|
245
|
+
# Handle variable-size instructions
|
246
|
+
if opcode == Opcode.CALL_R:
|
247
|
+
# Format: opcode + func + dst + num_args + args...
|
248
|
+
if offset + 3 < len(bytecode):
|
249
|
+
num_args = bytecode[offset + 3]
|
250
|
+
return 4 + num_args
|
251
|
+
return 1
|
252
|
+
|
253
|
+
elif opcode == Opcode.PHI_R:
|
254
|
+
# Format: opcode + dst + num_sources + (src + block_id) * num_sources
|
255
|
+
if offset + 2 < len(bytecode):
|
256
|
+
num_sources = bytecode[offset + 2]
|
257
|
+
return 3 + num_sources * 3 # Each source is reg(1) + block_id(2)
|
258
|
+
return 1
|
259
|
+
|
260
|
+
elif opcode == Opcode.RETURN_R:
|
261
|
+
# Format: opcode + has_value + [src]
|
262
|
+
if offset + 1 < len(bytecode):
|
263
|
+
has_value = bytecode[offset + 1]
|
264
|
+
return 3 if has_value else 2
|
265
|
+
return 1
|
266
|
+
|
267
|
+
return 1
|
268
|
+
|
269
|
+
|
270
|
+
# =============================================================================
|
271
|
+
# Bytecode Remapper
|
272
|
+
# =============================================================================
|
273
|
+
|
274
|
+
|
275
|
+
class BytecodeRemapper:
|
276
|
+
"""Remaps constant indices in bytecode instructions."""
|
277
|
+
|
278
|
+
def __init__(self, mapping: ConstantMapping):
|
279
|
+
self.mapping = mapping
|
280
|
+
|
281
|
+
def remap_chunk(self, chunk_index: int, bytecode: bytes) -> bytes:
|
282
|
+
"""Remap all constant indices in a chunk's bytecode."""
|
283
|
+
|
284
|
+
if chunk_index >= len(self.mapping.chunk_mappings):
|
285
|
+
# No remapping needed (e.g., chunk has no constants)
|
286
|
+
return bytecode
|
287
|
+
|
288
|
+
chunk_map = self.mapping.chunk_mappings[chunk_index]
|
289
|
+
if not chunk_map:
|
290
|
+
# Empty mapping, no constants to remap
|
291
|
+
return bytecode
|
292
|
+
|
293
|
+
result = bytearray()
|
294
|
+
offset = 0
|
295
|
+
|
296
|
+
while offset < len(bytecode):
|
297
|
+
opcode = bytecode[offset]
|
298
|
+
|
299
|
+
# Get instruction size
|
300
|
+
inst_size = get_instruction_size(opcode, bytecode, offset)
|
301
|
+
|
302
|
+
# Check if we have enough bytes
|
303
|
+
if offset + inst_size > len(bytecode):
|
304
|
+
raise InvalidBytecodeError(
|
305
|
+
f"Truncated instruction (opcode {opcode:#x}, expected {inst_size} bytes)",
|
306
|
+
offset=offset,
|
307
|
+
chunk_idx=chunk_index,
|
308
|
+
)
|
309
|
+
|
310
|
+
# Extract instruction bytes
|
311
|
+
inst_bytes = bytecode[offset : offset + inst_size]
|
312
|
+
|
313
|
+
# Check if this instruction needs remapping
|
314
|
+
fmt = INSTRUCTION_FORMATS.get(opcode)
|
315
|
+
if fmt and fmt.has_const_operand:
|
316
|
+
# Remap the instruction
|
317
|
+
result.append(opcode)
|
318
|
+
remapped_operands = self.remap_instruction(
|
319
|
+
opcode, inst_bytes[1:], chunk_map, chunk_idx=chunk_index, offset=offset
|
320
|
+
)
|
321
|
+
result.extend(remapped_operands)
|
322
|
+
else:
|
323
|
+
# Copy instruction as-is
|
324
|
+
result.extend(inst_bytes)
|
325
|
+
|
326
|
+
offset += inst_size
|
327
|
+
|
328
|
+
return bytes(result)
|
329
|
+
|
330
|
+
def remap_instruction(
|
331
|
+
self,
|
332
|
+
opcode: int,
|
333
|
+
operands: bytes,
|
334
|
+
chunk_map: dict[int, int],
|
335
|
+
chunk_idx: int | None = None,
|
336
|
+
offset: int | None = None,
|
337
|
+
) -> bytes:
|
338
|
+
"""Remap constant indices in a single instruction."""
|
339
|
+
|
340
|
+
if opcode == Opcode.LOAD_CONST_R:
|
341
|
+
# Format: dst_reg(u8) + const_idx(u16)
|
342
|
+
if len(operands) < 3:
|
343
|
+
raise InvalidBytecodeError("LOAD_CONST_R operands too short", offset=offset, chunk_idx=chunk_idx)
|
344
|
+
dst_reg = operands[0]
|
345
|
+
old_idx = struct.unpack("<H", operands[1:3])[0]
|
346
|
+
new_idx = chunk_map.get(old_idx, old_idx)
|
347
|
+
|
348
|
+
return bytes([dst_reg]) + struct.pack("<H", new_idx)
|
349
|
+
|
350
|
+
elif opcode in [Opcode.LOAD_GLOBAL_R, Opcode.STORE_GLOBAL_R]:
|
351
|
+
# Format: reg(u8) + name_idx(u16)
|
352
|
+
# Name index might reference string constants
|
353
|
+
if len(operands) < 3:
|
354
|
+
raise InvalidBytecodeError(
|
355
|
+
f"{INSTRUCTION_FORMATS[opcode].name} operands too short", offset=offset, chunk_idx=chunk_idx
|
356
|
+
)
|
357
|
+
reg = operands[0]
|
358
|
+
old_idx = struct.unpack("<H", operands[1:3])[0]
|
359
|
+
new_idx = chunk_map.get(old_idx, old_idx)
|
360
|
+
|
361
|
+
return bytes([reg]) + struct.pack("<H", new_idx)
|
362
|
+
|
363
|
+
elif opcode == Opcode.ASSERT_R:
|
364
|
+
# Format: cond_reg(u8) + assert_type(u8) + msg_idx(u16)
|
365
|
+
# Message index references string constant for assertion message
|
366
|
+
if len(operands) < 4:
|
367
|
+
raise InvalidBytecodeError("ASSERT_R operands too short", offset=offset, chunk_idx=chunk_idx)
|
368
|
+
cond_reg = operands[0]
|
369
|
+
assert_type = operands[1]
|
370
|
+
old_idx = struct.unpack("<H", operands[2:4])[0]
|
371
|
+
new_idx = chunk_map.get(old_idx, old_idx)
|
372
|
+
|
373
|
+
# Validate the message index is valid
|
374
|
+
if new_idx >= len(self.mapping.global_constants):
|
375
|
+
raise ConstantIndexError(new_idx, len(self.mapping.global_constants) - 1, chunk_idx, offset)
|
376
|
+
|
377
|
+
return bytes([cond_reg, assert_type]) + struct.pack("<H", new_idx)
|
378
|
+
|
379
|
+
# Other opcodes don't reference constants
|
380
|
+
return operands
|
381
|
+
|
382
|
+
|
383
|
+
def generate_remapping_report(module: BytecodeModule, mapping: ConstantMapping) -> str:
|
384
|
+
"""Generate human-readable remapping report for debugging."""
|
385
|
+
report = []
|
386
|
+
report.append("=== Constant Pool Remapping Report ===\n")
|
387
|
+
|
388
|
+
# Deduplication statistics
|
389
|
+
stats = mapping.stats
|
390
|
+
report.append(f"Original constants: {stats.original_count}")
|
391
|
+
report.append(f"After deduplication: {stats.deduped_count}")
|
392
|
+
report.append(f"Bytes saved: {stats.bytes_saved}")
|
393
|
+
if stats.original_count > 0:
|
394
|
+
reduction = 100 * (stats.original_count - stats.deduped_count) / stats.original_count
|
395
|
+
report.append(f"Reduction: {reduction:.1f}%\n")
|
396
|
+
else:
|
397
|
+
report.append("Reduction: N/A (no constants)\n")
|
398
|
+
|
399
|
+
# Duplicate chains
|
400
|
+
if stats.duplicate_chains:
|
401
|
+
report.append("Duplicate constants found:")
|
402
|
+
for key, chunks in stats.duplicate_chains.items():
|
403
|
+
tag, val = key
|
404
|
+
report.append(f" {tag}: {val} appears in chunks: {chunks}")
|
405
|
+
report.append("")
|
406
|
+
|
407
|
+
# Per-chunk mappings
|
408
|
+
report.append("Per-chunk remapping:")
|
409
|
+
for chunk_idx, chunk_map in enumerate(mapping.chunk_mappings):
|
410
|
+
if chunk_idx < len(module.chunks):
|
411
|
+
chunk = module.chunks[chunk_idx]
|
412
|
+
report.append(f"\nChunk {chunk_idx} ({chunk.name}):")
|
413
|
+
for local, global_idx in sorted(chunk_map.items()):
|
414
|
+
if global_idx < len(mapping.global_constants):
|
415
|
+
tag, val = mapping.global_constants[global_idx]
|
416
|
+
report.append(f" [{local}] -> [{global_idx}]: {tag.name}: {val}")
|
417
|
+
|
418
|
+
return "\n".join(report)
|
419
|
+
|
420
|
+
|
421
|
+
# =============================================================================
|
422
|
+
# Main Serializer
|
423
|
+
# =============================================================================
|
424
|
+
|
425
|
+
|
426
|
+
class VMBytecodeSerializer:
|
427
|
+
"""Serializes bytecode modules for the Rust VM with constant remapping."""
|
428
|
+
|
429
|
+
@staticmethod
|
430
|
+
def serialize(module: BytecodeModule, debug: bool = False) -> bytes:
|
431
|
+
"""Serialize a bytecode module to bytes.
|
432
|
+
|
433
|
+
Args:
|
434
|
+
module: BytecodeModule to serialize.
|
435
|
+
debug: If True, print remapping report.
|
436
|
+
|
437
|
+
Returns:
|
438
|
+
Serialized bytecode.
|
439
|
+
"""
|
440
|
+
buffer = BytesIO()
|
441
|
+
VMBytecodeSerializer.write_to_stream(module, buffer, debug=debug)
|
442
|
+
return buffer.getvalue()
|
443
|
+
|
444
|
+
@staticmethod
|
445
|
+
def write_to_stream(module: BytecodeModule, stream: BinaryIO, debug: bool = False) -> None:
|
446
|
+
"""Write bytecode module to a stream with constant index remapping.
|
447
|
+
|
448
|
+
Args:
|
449
|
+
module: BytecodeModule to serialize.
|
450
|
+
stream: Binary stream to write to.
|
451
|
+
debug: If True, print remapping report.
|
452
|
+
"""
|
453
|
+
# Step 1: Build constant mapping with deduplication
|
454
|
+
mapping = build_constant_mapping(module)
|
455
|
+
|
456
|
+
# Print debug report if requested
|
457
|
+
if debug:
|
458
|
+
print(generate_remapping_report(module, mapping))
|
459
|
+
|
460
|
+
# Step 2: Initialize remapper
|
461
|
+
remapper = BytecodeRemapper(mapping)
|
462
|
+
|
463
|
+
# Step 3: Process chunks with remapping
|
464
|
+
all_bytecode = bytearray()
|
465
|
+
chunk_offsets = {}
|
466
|
+
|
467
|
+
for i, chunk in enumerate(module.chunks):
|
468
|
+
chunk_offsets[i] = len(all_bytecode)
|
469
|
+
|
470
|
+
# Remap this chunk's bytecode
|
471
|
+
try:
|
472
|
+
remapped = remapper.remap_chunk(i, bytes(chunk.bytecode))
|
473
|
+
all_bytecode.extend(remapped)
|
474
|
+
except RemappingError as e:
|
475
|
+
# Add module context to error
|
476
|
+
raise RemappingError(f"Failed to remap chunk '{chunk.name}': {e}") from e
|
477
|
+
|
478
|
+
# Use remapped constants
|
479
|
+
all_constants = mapping.global_constants
|
480
|
+
|
481
|
+
# Calculate section sizes
|
482
|
+
header_size = 28 # 4 (magic) + 4 (version) + 4 (flags) + 16 (4 offsets)
|
483
|
+
|
484
|
+
name_bytes = module.name.encode("utf-8")
|
485
|
+
name_section_size = 4 + len(name_bytes)
|
486
|
+
|
487
|
+
const_section_size = 4 # count
|
488
|
+
for tag, value in all_constants:
|
489
|
+
const_section_size += 1 # tag
|
490
|
+
if tag == ConstantTag.INT:
|
491
|
+
const_section_size += 8 # i64
|
492
|
+
elif tag == ConstantTag.FLOAT:
|
493
|
+
const_section_size += 8 # f64
|
494
|
+
elif tag == ConstantTag.STRING:
|
495
|
+
str_bytes = value.encode("utf-8")
|
496
|
+
const_section_size += 4 + len(str_bytes) # length + data
|
497
|
+
elif tag == ConstantTag.BOOL:
|
498
|
+
const_section_size += 1 # u8
|
499
|
+
# EMPTY has no data
|
500
|
+
|
501
|
+
# Calculate function table section size
|
502
|
+
func_section_size = 4 # count
|
503
|
+
for func_name in module.function_table:
|
504
|
+
func_name_bytes = func_name.encode("utf-8")
|
505
|
+
func_section_size += 4 + len(func_name_bytes) + 4 # name length + name + offset
|
506
|
+
|
507
|
+
# Calculate offsets
|
508
|
+
name_offset = header_size
|
509
|
+
const_offset = name_offset + name_section_size
|
510
|
+
func_offset = const_offset + const_section_size
|
511
|
+
inst_offset = func_offset + func_section_size
|
512
|
+
|
513
|
+
# Write header
|
514
|
+
stream.write(b"MDBC") # Magic
|
515
|
+
stream.write(struct.pack("<I", 1)) # Version
|
516
|
+
stream.write(struct.pack("<I", 0x0001)) # Flags (little-endian)
|
517
|
+
stream.write(struct.pack("<I", name_offset))
|
518
|
+
stream.write(struct.pack("<I", const_offset))
|
519
|
+
stream.write(struct.pack("<I", func_offset))
|
520
|
+
stream.write(struct.pack("<I", inst_offset))
|
521
|
+
|
522
|
+
# Write module name
|
523
|
+
stream.write(struct.pack("<I", len(name_bytes)))
|
524
|
+
stream.write(name_bytes)
|
525
|
+
|
526
|
+
# Write constants (now deduplicated and remapped)
|
527
|
+
stream.write(struct.pack("<I", len(all_constants)))
|
528
|
+
for tag, value in all_constants:
|
529
|
+
stream.write(struct.pack("<B", tag))
|
530
|
+
if tag == ConstantTag.INT:
|
531
|
+
stream.write(struct.pack("<q", value))
|
532
|
+
elif tag == ConstantTag.FLOAT:
|
533
|
+
stream.write(struct.pack("<d", value))
|
534
|
+
elif tag == ConstantTag.STRING:
|
535
|
+
str_bytes = value.encode("utf-8")
|
536
|
+
stream.write(struct.pack("<I", len(str_bytes)))
|
537
|
+
stream.write(str_bytes)
|
538
|
+
elif tag == ConstantTag.BOOL:
|
539
|
+
stream.write(struct.pack("<B", 1 if value else 0))
|
540
|
+
# EMPTY has no data
|
541
|
+
|
542
|
+
# Write function table (convert chunk indices to bytecode offsets)
|
543
|
+
stream.write(struct.pack("<I", len(module.function_table)))
|
544
|
+
for func_name, chunk_idx in module.function_table.items():
|
545
|
+
func_name_bytes = func_name.encode("utf-8")
|
546
|
+
stream.write(struct.pack("<I", len(func_name_bytes)))
|
547
|
+
stream.write(func_name_bytes)
|
548
|
+
# Convert chunk index to bytecode offset (instruction index)
|
549
|
+
bytecode_offset = chunk_offsets.get(chunk_idx, 0)
|
550
|
+
# Convert byte offset to instruction offset
|
551
|
+
inst_offset = VMBytecodeSerializer.count_instructions(bytes(all_bytecode[:bytecode_offset]))
|
552
|
+
stream.write(struct.pack("<I", inst_offset))
|
553
|
+
|
554
|
+
# Write instructions
|
555
|
+
# The Rust loader expects the number of instructions, not bytes
|
556
|
+
instruction_count = VMBytecodeSerializer.count_instructions(bytes(all_bytecode))
|
557
|
+
stream.write(struct.pack("<I", instruction_count))
|
558
|
+
stream.write(all_bytecode)
|
559
|
+
|
560
|
+
@staticmethod
|
561
|
+
def count_instructions(bytecode: bytes) -> int:
|
562
|
+
"""Count the number of instructions in bytecode.
|
563
|
+
|
564
|
+
Args:
|
565
|
+
bytecode: Raw bytecode bytes.
|
566
|
+
|
567
|
+
Returns:
|
568
|
+
Number of instructions.
|
569
|
+
"""
|
570
|
+
count = 0
|
571
|
+
i = 0
|
572
|
+
|
573
|
+
while i < len(bytecode):
|
574
|
+
opcode = bytecode[i]
|
575
|
+
count += 1
|
576
|
+
|
577
|
+
# Use the get_instruction_size helper
|
578
|
+
inst_size = get_instruction_size(opcode, bytecode, i)
|
579
|
+
i += inst_size
|
580
|
+
|
581
|
+
return count
|
582
|
+
|
583
|
+
@staticmethod
|
584
|
+
def parse_instructions(bytecode: bytes, const_base: int = 0) -> list[bytes]:
|
585
|
+
"""Parse bytecode into individual instructions.
|
586
|
+
|
587
|
+
DEPRECATED: This method is kept for backward compatibility but
|
588
|
+
the new remapping approach is used in write_to_stream.
|
589
|
+
|
590
|
+
Args:
|
591
|
+
bytecode: Raw bytecode bytes.
|
592
|
+
const_base: Base offset for constant indices.
|
593
|
+
|
594
|
+
Returns:
|
595
|
+
List of individual instruction bytes.
|
596
|
+
"""
|
597
|
+
instructions = []
|
598
|
+
i = 0
|
599
|
+
|
600
|
+
while i < len(bytecode):
|
601
|
+
start = i
|
602
|
+
opcode = bytecode[i]
|
603
|
+
|
604
|
+
# Get instruction size
|
605
|
+
inst_size = get_instruction_size(opcode, bytecode, i)
|
606
|
+
|
607
|
+
# Extract instruction
|
608
|
+
inst = bytecode[start : start + inst_size]
|
609
|
+
|
610
|
+
# Legacy remapping for LOAD_CONST_R only
|
611
|
+
if const_base > 0 and opcode == Opcode.LOAD_CONST_R:
|
612
|
+
new_inst = bytearray(inst)
|
613
|
+
old_idx = struct.unpack("<H", inst[2:4])[0]
|
614
|
+
new_idx = old_idx + const_base
|
615
|
+
struct.pack_into("<H", new_inst, 2, new_idx)
|
616
|
+
inst = bytes(new_inst)
|
617
|
+
|
618
|
+
instructions.append(inst)
|
619
|
+
i += inst_size
|
620
|
+
|
621
|
+
return instructions
|
@@ -0,0 +1,18 @@
|
|
1
|
+
"""Compiler module for Machine Dialect™.
|
2
|
+
|
3
|
+
This module provides the main compilation infrastructure for Machine Dialect™,
|
4
|
+
organizing the compilation process into clear phases and providing a unified
|
5
|
+
interface for compilation.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from machine_dialect.compiler.compiler import Compiler
|
9
|
+
from machine_dialect.compiler.config import CompilerConfig
|
10
|
+
from machine_dialect.compiler.context import CompilationContext
|
11
|
+
from machine_dialect.compiler.pipeline import CompilationPipeline
|
12
|
+
|
13
|
+
__all__ = [
|
14
|
+
"CompilationContext",
|
15
|
+
"CompilationPipeline",
|
16
|
+
"Compiler",
|
17
|
+
"CompilerConfig",
|
18
|
+
]
|