machine-dialect 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- machine_dialect/__main__.py +667 -0
- machine_dialect/agent/__init__.py +5 -0
- machine_dialect/agent/agent.py +360 -0
- machine_dialect/ast/__init__.py +95 -0
- machine_dialect/ast/ast_node.py +35 -0
- machine_dialect/ast/call_expression.py +82 -0
- machine_dialect/ast/dict_extraction.py +60 -0
- machine_dialect/ast/expressions.py +439 -0
- machine_dialect/ast/literals.py +309 -0
- machine_dialect/ast/program.py +35 -0
- machine_dialect/ast/statements.py +1433 -0
- machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
- machine_dialect/ast/tests/test_boolean_literal.py +29 -0
- machine_dialect/ast/tests/test_collection_hir.py +138 -0
- machine_dialect/ast/tests/test_define_statement.py +142 -0
- machine_dialect/ast/tests/test_desugar.py +541 -0
- machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
- machine_dialect/cfg/__init__.py +6 -0
- machine_dialect/cfg/config.py +156 -0
- machine_dialect/cfg/examples.py +221 -0
- machine_dialect/cfg/generate_with_ai.py +187 -0
- machine_dialect/cfg/openai_generation.py +200 -0
- machine_dialect/cfg/parser.py +94 -0
- machine_dialect/cfg/tests/__init__.py +1 -0
- machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
- machine_dialect/cfg/tests/test_config.py +188 -0
- machine_dialect/cfg/tests/test_examples.py +391 -0
- machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
- machine_dialect/cfg/tests/test_openai_generation.py +256 -0
- machine_dialect/codegen/__init__.py +5 -0
- machine_dialect/codegen/bytecode_module.py +89 -0
- machine_dialect/codegen/bytecode_serializer.py +300 -0
- machine_dialect/codegen/opcodes.py +101 -0
- machine_dialect/codegen/register_codegen.py +1996 -0
- machine_dialect/codegen/symtab.py +208 -0
- machine_dialect/codegen/tests/__init__.py +1 -0
- machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
- machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
- machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
- machine_dialect/codegen/tests/test_symtab.py +418 -0
- machine_dialect/codegen/vm_serializer.py +621 -0
- machine_dialect/compiler/__init__.py +18 -0
- machine_dialect/compiler/compiler.py +197 -0
- machine_dialect/compiler/config.py +149 -0
- machine_dialect/compiler/context.py +149 -0
- machine_dialect/compiler/phases/__init__.py +19 -0
- machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
- machine_dialect/compiler/phases/codegen.py +40 -0
- machine_dialect/compiler/phases/hir_generation.py +39 -0
- machine_dialect/compiler/phases/mir_generation.py +86 -0
- machine_dialect/compiler/phases/optimization.py +110 -0
- machine_dialect/compiler/phases/parsing.py +39 -0
- machine_dialect/compiler/pipeline.py +143 -0
- machine_dialect/compiler/tests/__init__.py +1 -0
- machine_dialect/compiler/tests/test_compiler.py +568 -0
- machine_dialect/compiler/vm_runner.py +173 -0
- machine_dialect/errors/__init__.py +32 -0
- machine_dialect/errors/exceptions.py +369 -0
- machine_dialect/errors/messages.py +82 -0
- machine_dialect/errors/tests/__init__.py +0 -0
- machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
- machine_dialect/errors/tests/test_name_errors.py +118 -0
- machine_dialect/helpers/__init__.py +0 -0
- machine_dialect/helpers/stopwords.py +225 -0
- machine_dialect/helpers/validators.py +30 -0
- machine_dialect/lexer/__init__.py +9 -0
- machine_dialect/lexer/constants.py +23 -0
- machine_dialect/lexer/lexer.py +907 -0
- machine_dialect/lexer/tests/__init__.py +0 -0
- machine_dialect/lexer/tests/helpers.py +86 -0
- machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
- machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
- machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
- machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
- machine_dialect/lexer/tests/test_comments.py +200 -0
- machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
- machine_dialect/lexer/tests/test_lexer_position.py +113 -0
- machine_dialect/lexer/tests/test_list_tokens.py +282 -0
- machine_dialect/lexer/tests/test_stopwords.py +80 -0
- machine_dialect/lexer/tests/test_strict_equality.py +129 -0
- machine_dialect/lexer/tests/test_token.py +41 -0
- machine_dialect/lexer/tests/test_tokenization.py +294 -0
- machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
- machine_dialect/lexer/tests/test_url_literals.py +169 -0
- machine_dialect/lexer/tokens.py +487 -0
- machine_dialect/linter/__init__.py +10 -0
- machine_dialect/linter/__main__.py +144 -0
- machine_dialect/linter/linter.py +154 -0
- machine_dialect/linter/rules/__init__.py +8 -0
- machine_dialect/linter/rules/base.py +112 -0
- machine_dialect/linter/rules/statement_termination.py +99 -0
- machine_dialect/linter/tests/__init__.py +1 -0
- machine_dialect/linter/tests/mdrules/__init__.py +0 -0
- machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
- machine_dialect/linter/tests/test_linter.py +81 -0
- machine_dialect/linter/tests/test_rules.py +110 -0
- machine_dialect/linter/tests/test_violations.py +71 -0
- machine_dialect/linter/violations.py +51 -0
- machine_dialect/mir/__init__.py +69 -0
- machine_dialect/mir/analyses/__init__.py +20 -0
- machine_dialect/mir/analyses/alias_analysis.py +315 -0
- machine_dialect/mir/analyses/dominance_analysis.py +49 -0
- machine_dialect/mir/analyses/escape_analysis.py +286 -0
- machine_dialect/mir/analyses/loop_analysis.py +272 -0
- machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
- machine_dialect/mir/analyses/type_analysis.py +448 -0
- machine_dialect/mir/analyses/use_def_chains.py +232 -0
- machine_dialect/mir/basic_block.py +385 -0
- machine_dialect/mir/dataflow.py +445 -0
- machine_dialect/mir/debug_info.py +208 -0
- machine_dialect/mir/hir_to_mir.py +1738 -0
- machine_dialect/mir/mir_dumper.py +366 -0
- machine_dialect/mir/mir_function.py +167 -0
- machine_dialect/mir/mir_instructions.py +1877 -0
- machine_dialect/mir/mir_interpreter.py +556 -0
- machine_dialect/mir/mir_module.py +225 -0
- machine_dialect/mir/mir_printer.py +480 -0
- machine_dialect/mir/mir_transformer.py +410 -0
- machine_dialect/mir/mir_types.py +367 -0
- machine_dialect/mir/mir_validation.py +455 -0
- machine_dialect/mir/mir_values.py +268 -0
- machine_dialect/mir/optimization_config.py +233 -0
- machine_dialect/mir/optimization_pass.py +251 -0
- machine_dialect/mir/optimization_pipeline.py +355 -0
- machine_dialect/mir/optimizations/__init__.py +84 -0
- machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
- machine_dialect/mir/optimizations/branch_prediction.py +372 -0
- machine_dialect/mir/optimizations/constant_propagation.py +634 -0
- machine_dialect/mir/optimizations/cse.py +398 -0
- machine_dialect/mir/optimizations/dce.py +288 -0
- machine_dialect/mir/optimizations/inlining.py +551 -0
- machine_dialect/mir/optimizations/jump_threading.py +487 -0
- machine_dialect/mir/optimizations/licm.py +405 -0
- machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
- machine_dialect/mir/optimizations/strength_reduction.py +422 -0
- machine_dialect/mir/optimizations/tail_call.py +207 -0
- machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
- machine_dialect/mir/optimizations/type_narrowing.py +397 -0
- machine_dialect/mir/optimizations/type_specialization.py +447 -0
- machine_dialect/mir/optimizations/type_specific.py +906 -0
- machine_dialect/mir/optimize_mir.py +89 -0
- machine_dialect/mir/pass_manager.py +391 -0
- machine_dialect/mir/profiling/__init__.py +26 -0
- machine_dialect/mir/profiling/profile_collector.py +318 -0
- machine_dialect/mir/profiling/profile_data.py +372 -0
- machine_dialect/mir/profiling/profile_reader.py +272 -0
- machine_dialect/mir/profiling/profile_writer.py +226 -0
- machine_dialect/mir/register_allocation.py +302 -0
- machine_dialect/mir/reporting/__init__.py +17 -0
- machine_dialect/mir/reporting/optimization_reporter.py +314 -0
- machine_dialect/mir/reporting/report_formatter.py +289 -0
- machine_dialect/mir/ssa_construction.py +342 -0
- machine_dialect/mir/tests/__init__.py +1 -0
- machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
- machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
- machine_dialect/mir/tests/test_algebraic_division.py +126 -0
- machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
- machine_dialect/mir/tests/test_basic_block.py +425 -0
- machine_dialect/mir/tests/test_branch_prediction.py +459 -0
- machine_dialect/mir/tests/test_call_lowering.py +168 -0
- machine_dialect/mir/tests/test_collection_lowering.py +604 -0
- machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
- machine_dialect/mir/tests/test_custom_passes.py +166 -0
- machine_dialect/mir/tests/test_debug_info.py +285 -0
- machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
- machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
- machine_dialect/mir/tests/test_double_negation.py +231 -0
- machine_dialect/mir/tests/test_escape_analysis.py +233 -0
- machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
- machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
- machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
- machine_dialect/mir/tests/test_inlining.py +435 -0
- machine_dialect/mir/tests/test_licm.py +472 -0
- machine_dialect/mir/tests/test_mir_dumper.py +313 -0
- machine_dialect/mir/tests/test_mir_instructions.py +445 -0
- machine_dialect/mir/tests/test_mir_module.py +860 -0
- machine_dialect/mir/tests/test_mir_printer.py +387 -0
- machine_dialect/mir/tests/test_mir_types.py +123 -0
- machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
- machine_dialect/mir/tests/test_mir_validation.py +378 -0
- machine_dialect/mir/tests/test_mir_values.py +168 -0
- machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
- machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
- machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
- machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
- machine_dialect/mir/tests/test_pass_manager.py +294 -0
- machine_dialect/mir/tests/test_pass_registration.py +64 -0
- machine_dialect/mir/tests/test_profiling.py +356 -0
- machine_dialect/mir/tests/test_register_allocation.py +307 -0
- machine_dialect/mir/tests/test_report_formatters.py +372 -0
- machine_dialect/mir/tests/test_ssa_construction.py +433 -0
- machine_dialect/mir/tests/test_tail_call.py +236 -0
- machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
- machine_dialect/mir/tests/test_type_narrowing.py +277 -0
- machine_dialect/mir/tests/test_type_specialization.py +421 -0
- machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
- machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
- machine_dialect/mir/type_inference.py +368 -0
- machine_dialect/parser/__init__.py +12 -0
- machine_dialect/parser/enums.py +45 -0
- machine_dialect/parser/parser.py +3655 -0
- machine_dialect/parser/protocols.py +11 -0
- machine_dialect/parser/symbol_table.py +169 -0
- machine_dialect/parser/tests/__init__.py +0 -0
- machine_dialect/parser/tests/helper_functions.py +193 -0
- machine_dialect/parser/tests/test_action_statements.py +334 -0
- machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
- machine_dialect/parser/tests/test_call_statements.py +154 -0
- machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
- machine_dialect/parser/tests/test_collection_mutations.py +264 -0
- machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
- machine_dialect/parser/tests/test_define_integration.py +468 -0
- machine_dialect/parser/tests/test_define_statements.py +311 -0
- machine_dialect/parser/tests/test_dict_extraction.py +115 -0
- machine_dialect/parser/tests/test_empty_literal.py +155 -0
- machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
- machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
- machine_dialect/parser/tests/test_if_empty_block.py +61 -0
- machine_dialect/parser/tests/test_if_statements.py +299 -0
- machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
- machine_dialect/parser/tests/test_infix_expressions.py +680 -0
- machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
- machine_dialect/parser/tests/test_interaction_statements.py +269 -0
- machine_dialect/parser/tests/test_list_literals.py +277 -0
- machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
- machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
- machine_dialect/parser/tests/test_parse_errors.py +114 -0
- machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
- machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
- machine_dialect/parser/tests/test_program.py +13 -0
- machine_dialect/parser/tests/test_return_statements.py +89 -0
- machine_dialect/parser/tests/test_set_statements.py +152 -0
- machine_dialect/parser/tests/test_strict_equality.py +258 -0
- machine_dialect/parser/tests/test_symbol_table.py +217 -0
- machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
- machine_dialect/parser/tests/test_utility_statements.py +423 -0
- machine_dialect/parser/token_buffer.py +159 -0
- machine_dialect/repl/__init__.py +3 -0
- machine_dialect/repl/repl.py +426 -0
- machine_dialect/repl/tests/__init__.py +0 -0
- machine_dialect/repl/tests/test_repl.py +606 -0
- machine_dialect/semantic/__init__.py +12 -0
- machine_dialect/semantic/analyzer.py +906 -0
- machine_dialect/semantic/error_messages.py +189 -0
- machine_dialect/semantic/tests/__init__.py +1 -0
- machine_dialect/semantic/tests/test_analyzer.py +364 -0
- machine_dialect/semantic/tests/test_error_messages.py +104 -0
- machine_dialect/tests/edge_cases/__init__.py +10 -0
- machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
- machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
- machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
- machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
- machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
- machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
- machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
- machine_dialect/tests/integration/test_list_compilation.py +395 -0
- machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
- machine_dialect/type_checking/__init__.py +21 -0
- machine_dialect/type_checking/tests/__init__.py +1 -0
- machine_dialect/type_checking/tests/test_type_system.py +230 -0
- machine_dialect/type_checking/type_system.py +270 -0
- machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
- machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
- machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
- machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
- machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
- machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
- machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,89 @@
|
|
1
|
+
"""Bytecode module representation.
|
2
|
+
|
3
|
+
This module defines the bytecode module structure for the Rust VM.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
from dataclasses import dataclass, field
|
9
|
+
from enum import IntEnum
|
10
|
+
from typing import Any
|
11
|
+
|
12
|
+
|
13
|
+
class ChunkType(IntEnum):
|
14
|
+
"""Type of bytecode chunk."""
|
15
|
+
|
16
|
+
MAIN = 0
|
17
|
+
FUNCTION = 1
|
18
|
+
|
19
|
+
|
20
|
+
class ConstantTag(IntEnum):
|
21
|
+
"""Tags for constant pool values."""
|
22
|
+
|
23
|
+
EMPTY = 0x05
|
24
|
+
INT = 0x01
|
25
|
+
FLOAT = 0x02
|
26
|
+
STRING = 0x03
|
27
|
+
BOOL = 0x04
|
28
|
+
|
29
|
+
|
30
|
+
@dataclass
|
31
|
+
class Chunk:
|
32
|
+
"""A bytecode chunk (function or main)."""
|
33
|
+
|
34
|
+
name: str
|
35
|
+
chunk_type: ChunkType
|
36
|
+
bytecode: bytearray
|
37
|
+
constants: list[tuple[ConstantTag, Any]]
|
38
|
+
num_locals: int
|
39
|
+
num_params: int
|
40
|
+
|
41
|
+
|
42
|
+
@dataclass
|
43
|
+
class BytecodeModule:
|
44
|
+
"""A complete bytecode module."""
|
45
|
+
|
46
|
+
name: str = "__main__"
|
47
|
+
chunks: list[Chunk] = field(default_factory=list)
|
48
|
+
function_table: dict[str, int] = field(default_factory=dict)
|
49
|
+
global_names: list[str] = field(default_factory=list)
|
50
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
51
|
+
|
52
|
+
def add_chunk(self, chunk: Chunk) -> int:
|
53
|
+
"""Add a chunk and return its index.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
chunk: Chunk to add.
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
Index of the added chunk.
|
60
|
+
"""
|
61
|
+
index = len(self.chunks)
|
62
|
+
self.chunks.append(chunk)
|
63
|
+
if chunk.chunk_type == ChunkType.FUNCTION:
|
64
|
+
# Record function entry point (bytecode offset)
|
65
|
+
self.function_table[chunk.name] = index
|
66
|
+
return index
|
67
|
+
|
68
|
+
def add_global(self, name: str) -> int:
|
69
|
+
"""Add a global name and return its index.
|
70
|
+
|
71
|
+
Args:
|
72
|
+
name: Global name to add.
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
Index of the global name.
|
76
|
+
"""
|
77
|
+
if name not in self.global_names:
|
78
|
+
self.global_names.append(name)
|
79
|
+
return self.global_names.index(name)
|
80
|
+
|
81
|
+
def serialize(self) -> bytes:
|
82
|
+
"""Serialize the module to bytecode format.
|
83
|
+
|
84
|
+
Returns:
|
85
|
+
Serialized bytecode.
|
86
|
+
"""
|
87
|
+
from machine_dialect.codegen.vm_serializer import VMBytecodeSerializer
|
88
|
+
|
89
|
+
return VMBytecodeSerializer.serialize(self)
|
@@ -0,0 +1,300 @@
|
|
1
|
+
"""Bytecode serializer for the Rust VM.
|
2
|
+
|
3
|
+
This module serializes bytecode in the format expected by the Rust VM loader.
|
4
|
+
"""
|
5
|
+
|
6
|
+
from __future__ import annotations
|
7
|
+
|
8
|
+
import struct
|
9
|
+
from io import BytesIO
|
10
|
+
from pathlib import Path
|
11
|
+
from typing import Any, BinaryIO
|
12
|
+
|
13
|
+
# Magic number for bytecode files
|
14
|
+
MAGIC_NUMBER = b"MDBC"
|
15
|
+
|
16
|
+
# Current bytecode version
|
17
|
+
BYTECODE_VERSION = 1
|
18
|
+
|
19
|
+
# Flags
|
20
|
+
FLAG_LITTLE_ENDIAN = 0x0001
|
21
|
+
|
22
|
+
|
23
|
+
class BytecodeWriter:
|
24
|
+
"""Writes bytecode in the format expected by the Rust VM."""
|
25
|
+
|
26
|
+
def __init__(self, module: Any = None) -> None:
|
27
|
+
"""Initialize the bytecode writer.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
module: Optional module with bytecode data to write
|
31
|
+
"""
|
32
|
+
self.buffer = BytesIO()
|
33
|
+
|
34
|
+
# Initialize attributes with type hints
|
35
|
+
self.constants: list[tuple[int, Any]]
|
36
|
+
self.instructions: list[bytes]
|
37
|
+
self.functions: dict[str, int]
|
38
|
+
self.global_names: list[str]
|
39
|
+
self.module_name: str
|
40
|
+
|
41
|
+
if module:
|
42
|
+
# Just use the module's attributes directly
|
43
|
+
self.constants = module.constants
|
44
|
+
self.instructions = module.instructions
|
45
|
+
self.functions = getattr(module, "function_table", {})
|
46
|
+
self.global_names = getattr(module, "global_names", [])
|
47
|
+
self.module_name = getattr(module, "name", "__main__")
|
48
|
+
else:
|
49
|
+
self.constants = [] # (tag, value) pairs
|
50
|
+
self.instructions = []
|
51
|
+
self.functions = {} # name -> instruction offset
|
52
|
+
self.global_names = []
|
53
|
+
self.module_name = "__main__"
|
54
|
+
|
55
|
+
def set_module_name(self, name: str) -> None:
|
56
|
+
"""Set the module name."""
|
57
|
+
self.module_name = name
|
58
|
+
|
59
|
+
def add_constant(self, tag: int, value: Any) -> int:
|
60
|
+
"""Add a constant to the constant pool.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
tag: Type tag (1=int, 2=float, 3=string, 4=bool, 5=empty)
|
64
|
+
value: The constant value
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
Index of the constant in the pool
|
68
|
+
"""
|
69
|
+
idx = len(self.constants)
|
70
|
+
self.constants.append((tag, value))
|
71
|
+
return idx
|
72
|
+
|
73
|
+
def add_int_constant(self, value: int) -> int:
|
74
|
+
"""Add an integer constant."""
|
75
|
+
return self.add_constant(0x01, value)
|
76
|
+
|
77
|
+
def add_float_constant(self, value: float) -> int:
|
78
|
+
"""Add a float constant."""
|
79
|
+
return self.add_constant(0x02, value)
|
80
|
+
|
81
|
+
def add_string_constant(self, value: str) -> int:
|
82
|
+
"""Add a string constant."""
|
83
|
+
return self.add_constant(0x03, value)
|
84
|
+
|
85
|
+
def add_bool_constant(self, value: bool) -> int:
|
86
|
+
"""Add a boolean constant."""
|
87
|
+
return self.add_constant(0x04, value)
|
88
|
+
|
89
|
+
def add_empty_constant(self) -> int:
|
90
|
+
"""Add an empty/none constant."""
|
91
|
+
return self.add_constant(0x05, None)
|
92
|
+
|
93
|
+
def add_global_name(self, name: str) -> int:
|
94
|
+
"""Add a global name and return its index."""
|
95
|
+
if name not in self.global_names:
|
96
|
+
self.global_names.append(name)
|
97
|
+
return self.global_names.index(name)
|
98
|
+
|
99
|
+
def add_instruction(self, instruction: bytes) -> None:
|
100
|
+
"""Add a raw instruction."""
|
101
|
+
self.instructions.append(instruction)
|
102
|
+
|
103
|
+
def emit_load_const(self, dst: int, const_idx: int) -> None:
|
104
|
+
"""Emit LoadConstR instruction."""
|
105
|
+
inst = struct.pack("<BBH", 0, dst, const_idx)
|
106
|
+
self.add_instruction(inst)
|
107
|
+
|
108
|
+
def emit_move(self, dst: int, src: int) -> None:
|
109
|
+
"""Emit MoveR instruction."""
|
110
|
+
inst = struct.pack("<BBB", 1, dst, src)
|
111
|
+
self.add_instruction(inst)
|
112
|
+
|
113
|
+
def emit_load_global(self, dst: int, name_idx: int) -> None:
|
114
|
+
"""Emit LoadGlobalR instruction."""
|
115
|
+
inst = struct.pack("<BBH", 2, dst, name_idx)
|
116
|
+
self.add_instruction(inst)
|
117
|
+
|
118
|
+
def emit_store_global(self, src: int, name_idx: int) -> None:
|
119
|
+
"""Emit StoreGlobalR instruction."""
|
120
|
+
inst = struct.pack("<BBH", 3, src, name_idx)
|
121
|
+
self.add_instruction(inst)
|
122
|
+
|
123
|
+
def emit_add(self, dst: int, left: int, right: int) -> None:
|
124
|
+
"""Emit AddR instruction."""
|
125
|
+
inst = struct.pack("<BBBB", 7, dst, left, right)
|
126
|
+
self.add_instruction(inst)
|
127
|
+
|
128
|
+
def emit_sub(self, dst: int, left: int, right: int) -> None:
|
129
|
+
"""Emit SubR instruction."""
|
130
|
+
inst = struct.pack("<BBBB", 8, dst, left, right)
|
131
|
+
self.add_instruction(inst)
|
132
|
+
|
133
|
+
def emit_mul(self, dst: int, left: int, right: int) -> None:
|
134
|
+
"""Emit MulR instruction."""
|
135
|
+
inst = struct.pack("<BBBB", 9, dst, left, right)
|
136
|
+
self.add_instruction(inst)
|
137
|
+
|
138
|
+
def emit_div(self, dst: int, left: int, right: int) -> None:
|
139
|
+
"""Emit DivR instruction."""
|
140
|
+
inst = struct.pack("<BBBB", 10, dst, left, right)
|
141
|
+
self.add_instruction(inst)
|
142
|
+
|
143
|
+
def emit_jump(self, offset: int) -> None:
|
144
|
+
"""Emit JumpR instruction."""
|
145
|
+
inst = struct.pack("<Bi", 22, offset)
|
146
|
+
self.add_instruction(inst)
|
147
|
+
|
148
|
+
def emit_jump_if(self, cond: int, offset: int) -> None:
|
149
|
+
"""Emit JumpIfR instruction."""
|
150
|
+
inst = struct.pack("<BBi", 23, cond, offset)
|
151
|
+
self.add_instruction(inst)
|
152
|
+
|
153
|
+
def emit_return(self, src: int | None = None) -> None:
|
154
|
+
"""Emit ReturnR instruction."""
|
155
|
+
if src is not None:
|
156
|
+
inst = struct.pack("<BBB", 26, 1, src) # has_value=1, src
|
157
|
+
else:
|
158
|
+
inst = struct.pack("<BB", 26, 0) # has_value=0
|
159
|
+
self.add_instruction(inst)
|
160
|
+
|
161
|
+
def emit_debug_print(self, src: int) -> None:
|
162
|
+
"""Emit DebugPrint instruction."""
|
163
|
+
inst = struct.pack("<BB", 37, src)
|
164
|
+
self.add_instruction(inst)
|
165
|
+
|
166
|
+
def write(self) -> bytes:
|
167
|
+
"""Write the bytecode to bytes.
|
168
|
+
|
169
|
+
Returns:
|
170
|
+
The serialized bytecode as bytes.
|
171
|
+
"""
|
172
|
+
buffer = BytesIO()
|
173
|
+
self.write_to_stream(buffer)
|
174
|
+
return buffer.getvalue()
|
175
|
+
|
176
|
+
def write_to_file(self, path: Path) -> None:
|
177
|
+
"""Write the bytecode to a file.
|
178
|
+
|
179
|
+
Args:
|
180
|
+
path: Path to write the bytecode file (without extension)
|
181
|
+
"""
|
182
|
+
bytecode_path = path.with_suffix(".mdbc")
|
183
|
+
with open(bytecode_path, "wb") as f:
|
184
|
+
self.write_to_stream(f)
|
185
|
+
|
186
|
+
def write_to_stream(self, stream: BinaryIO) -> None:
|
187
|
+
"""Write the bytecode to a binary stream.
|
188
|
+
|
189
|
+
Args:
|
190
|
+
stream: Binary stream to write to
|
191
|
+
"""
|
192
|
+
# Calculate section offsets
|
193
|
+
header_size = 28 # 4 (magic) + 4 (version) + 4 (flags) + 16 (4 offsets)
|
194
|
+
|
195
|
+
# Module name section
|
196
|
+
name_bytes = self.module_name.encode("utf-8")
|
197
|
+
name_section_size = 4 + len(name_bytes) # length prefix + name
|
198
|
+
|
199
|
+
# Constants section
|
200
|
+
const_buffer = BytesIO()
|
201
|
+
const_buffer.write(struct.pack("<I", len(self.constants)))
|
202
|
+
for tag, value in self.constants:
|
203
|
+
const_buffer.write(struct.pack("<B", tag))
|
204
|
+
if tag == 0x01: # Int
|
205
|
+
const_buffer.write(struct.pack("<q", value))
|
206
|
+
elif tag == 0x02: # Float
|
207
|
+
const_buffer.write(struct.pack("<d", value))
|
208
|
+
elif tag == 0x03: # String
|
209
|
+
str_bytes = value.encode("utf-8")
|
210
|
+
const_buffer.write(struct.pack("<I", len(str_bytes)))
|
211
|
+
const_buffer.write(str_bytes)
|
212
|
+
elif tag == 0x04: # Bool
|
213
|
+
const_buffer.write(struct.pack("<B", 1 if value else 0))
|
214
|
+
elif tag == 0x05: # Empty
|
215
|
+
pass # No data
|
216
|
+
const_data = const_buffer.getvalue()
|
217
|
+
|
218
|
+
# Function table section
|
219
|
+
func_buffer = BytesIO()
|
220
|
+
func_buffer.write(struct.pack("<I", len(self.functions)))
|
221
|
+
for name, offset in self.functions.items():
|
222
|
+
func_name_bytes = name.encode("utf-8")
|
223
|
+
func_buffer.write(struct.pack("<I", len(func_name_bytes)))
|
224
|
+
func_buffer.write(func_name_bytes)
|
225
|
+
func_buffer.write(struct.pack("<I", offset))
|
226
|
+
func_data = func_buffer.getvalue()
|
227
|
+
|
228
|
+
# Instructions section
|
229
|
+
inst_buffer = BytesIO()
|
230
|
+
inst_buffer.write(struct.pack("<I", len(self.instructions)))
|
231
|
+
for inst in self.instructions:
|
232
|
+
inst_buffer.write(inst)
|
233
|
+
inst_data = inst_buffer.getvalue()
|
234
|
+
|
235
|
+
# Calculate offsets
|
236
|
+
name_offset = header_size
|
237
|
+
const_offset = name_offset + name_section_size
|
238
|
+
func_offset = const_offset + len(const_data)
|
239
|
+
inst_offset = func_offset + len(func_data)
|
240
|
+
|
241
|
+
# Write header
|
242
|
+
stream.write(MAGIC_NUMBER) # Magic number
|
243
|
+
stream.write(struct.pack("<I", BYTECODE_VERSION)) # Version
|
244
|
+
stream.write(struct.pack("<I", FLAG_LITTLE_ENDIAN)) # Flags
|
245
|
+
stream.write(struct.pack("<I", name_offset)) # Name offset
|
246
|
+
stream.write(struct.pack("<I", const_offset)) # Constant offset
|
247
|
+
stream.write(struct.pack("<I", func_offset)) # Function offset
|
248
|
+
stream.write(struct.pack("<I", inst_offset)) # Instruction offset
|
249
|
+
|
250
|
+
# Write sections
|
251
|
+
stream.write(struct.pack("<I", len(name_bytes)))
|
252
|
+
stream.write(name_bytes)
|
253
|
+
stream.write(const_data)
|
254
|
+
stream.write(func_data)
|
255
|
+
stream.write(inst_data)
|
256
|
+
|
257
|
+
# Write global names if any
|
258
|
+
if self.global_names:
|
259
|
+
stream.write(struct.pack("<I", len(self.global_names)))
|
260
|
+
for name in self.global_names:
|
261
|
+
name_bytes = name.encode("utf-8")
|
262
|
+
stream.write(struct.pack("<I", len(name_bytes)))
|
263
|
+
stream.write(name_bytes)
|
264
|
+
|
265
|
+
|
266
|
+
def serialize_bytecode_module(
|
267
|
+
module_name: str,
|
268
|
+
constants: list[tuple[int, Any]],
|
269
|
+
instructions: list[bytes],
|
270
|
+
functions: dict[str, int] | None = None,
|
271
|
+
global_names: list[str] | None = None,
|
272
|
+
output_path: Path | None = None,
|
273
|
+
) -> bytes:
|
274
|
+
"""Serialize a bytecode module.
|
275
|
+
|
276
|
+
Args:
|
277
|
+
module_name: Name of the module
|
278
|
+
constants: List of (tag, value) pairs for the constant pool
|
279
|
+
instructions: List of instruction bytes
|
280
|
+
functions: Optional function table (name -> offset)
|
281
|
+
global_names: Optional list of global variable names
|
282
|
+
output_path: Optional path to write the bytecode file
|
283
|
+
|
284
|
+
Returns:
|
285
|
+
The serialized bytecode as bytes
|
286
|
+
"""
|
287
|
+
writer = BytecodeWriter()
|
288
|
+
writer.set_module_name(module_name)
|
289
|
+
writer.constants = constants
|
290
|
+
writer.instructions = instructions
|
291
|
+
writer.functions = functions or {}
|
292
|
+
writer.global_names = global_names or []
|
293
|
+
|
294
|
+
if output_path:
|
295
|
+
writer.write_to_file(output_path)
|
296
|
+
|
297
|
+
# Also return the bytes
|
298
|
+
buffer = BytesIO()
|
299
|
+
writer.write_to_stream(buffer)
|
300
|
+
return buffer.getvalue()
|
@@ -0,0 +1,101 @@
|
|
1
|
+
"""Opcode definitions for the Rust VM.
|
2
|
+
|
3
|
+
This module defines the opcodes that match the Rust VM implementation.
|
4
|
+
These must stay in sync with machine_dialect_vm/src/instructions/decoder.rs
|
5
|
+
"""
|
6
|
+
|
7
|
+
from __future__ import annotations
|
8
|
+
|
9
|
+
from enum import IntEnum
|
10
|
+
|
11
|
+
|
12
|
+
class Opcode(IntEnum):
|
13
|
+
"""VM instruction opcodes."""
|
14
|
+
|
15
|
+
# Basic Operations (0-3)
|
16
|
+
LOAD_CONST_R = 0 # LoadConstR { dst: u8, const_idx: u16 }
|
17
|
+
MOVE_R = 1 # MoveR { dst: u8, src: u8 }
|
18
|
+
LOAD_GLOBAL_R = 2 # LoadGlobalR { dst: u8, name_idx: u16 }
|
19
|
+
STORE_GLOBAL_R = 3 # StoreGlobalR { src: u8, name_idx: u16 }
|
20
|
+
|
21
|
+
# Type Operations (4-6)
|
22
|
+
DEFINE_R = 4 # DefineR { dst: u8, type_id: u16 }
|
23
|
+
CHECK_TYPE_R = 5 # CheckTypeR { dst: u8, src: u8, type_id: u16 }
|
24
|
+
CAST_R = 6 # CastR { dst: u8, src: u8, to_type: u16 }
|
25
|
+
|
26
|
+
# Arithmetic (7-12)
|
27
|
+
ADD_R = 7 # AddR { dst: u8, left: u8, right: u8 }
|
28
|
+
SUB_R = 8 # SubR { dst: u8, left: u8, right: u8 }
|
29
|
+
MUL_R = 9 # MulR { dst: u8, left: u8, right: u8 }
|
30
|
+
DIV_R = 10 # DivR { dst: u8, left: u8, right: u8 }
|
31
|
+
MOD_R = 11 # ModR { dst: u8, left: u8, right: u8 }
|
32
|
+
NEG_R = 12 # NegR { dst: u8, src: u8 }
|
33
|
+
|
34
|
+
# Logical Operations (13-15)
|
35
|
+
NOT_R = 13 # NotR { dst: u8, src: u8 }
|
36
|
+
AND_R = 14 # AndR { dst: u8, left: u8, right: u8 }
|
37
|
+
OR_R = 15 # OrR { dst: u8, left: u8, right: u8 }
|
38
|
+
|
39
|
+
# Comparisons (16-21)
|
40
|
+
EQ_R = 16 # EqR { dst: u8, left: u8, right: u8 }
|
41
|
+
NEQ_R = 17 # NeqR { dst: u8, left: u8, right: u8 }
|
42
|
+
LT_R = 18 # LtR { dst: u8, left: u8, right: u8 }
|
43
|
+
GT_R = 19 # GtR { dst: u8, left: u8, right: u8 }
|
44
|
+
LTE_R = 20 # LteR { dst: u8, left: u8, right: u8 }
|
45
|
+
GTE_R = 21 # GteR { dst: u8, left: u8, right: u8 }
|
46
|
+
|
47
|
+
# Control Flow (22-26)
|
48
|
+
JUMP_R = 22 # JumpR { offset: i32 }
|
49
|
+
JUMP_IF_R = 23 # JumpIfR { cond: u8, offset: i32 }
|
50
|
+
JUMP_IF_NOT_R = 24 # JumpIfNotR { cond: u8, offset: i32 }
|
51
|
+
CALL_R = 25 # CallR { func: u8, args: Vec<u8>, dst: u8 }
|
52
|
+
RETURN_R = 26 # ReturnR { src: Option<u8> }
|
53
|
+
|
54
|
+
# MIR Support (27-30)
|
55
|
+
PHI_R = 27 # PhiR { dst: u8, sources: Vec<(u8, u16)> }
|
56
|
+
ASSERT_R = 28 # AssertR { reg: u8, msg_idx: u16 }
|
57
|
+
SCOPE_ENTER_R = 29 # ScopeEnterR { scope_id: u16 }
|
58
|
+
SCOPE_EXIT_R = 30 # ScopeExitR { scope_id: u16 }
|
59
|
+
|
60
|
+
# String Operations (31-32)
|
61
|
+
CONCAT_STR_R = 31 # ConcatStrR { dst: u8, left: u8, right: u8 }
|
62
|
+
STR_LEN_R = 32 # StrLenR { dst: u8, str: u8 }
|
63
|
+
|
64
|
+
# Arrays (33-36)
|
65
|
+
NEW_ARRAY_R = 33 # NewArrayR { dst: u8, size: u8 }
|
66
|
+
ARRAY_GET_R = 34 # ArrayGetR { dst: u8, array: u8, index: u8 }
|
67
|
+
ARRAY_SET_R = 35 # ArraySetR { array: u8, index: u8, value: u8 }
|
68
|
+
ARRAY_LEN_R = 36 # ArrayLenR { dst: u8, array: u8 }
|
69
|
+
|
70
|
+
# Debug (37-40)
|
71
|
+
DEBUG_PRINT = 37 # DebugPrint { src: u8 }
|
72
|
+
BREAKPOINT = 38 # BreakPoint
|
73
|
+
HALT = 39 # Halt execution
|
74
|
+
NOP = 40 # No operation
|
75
|
+
|
76
|
+
# Dictionaries (41-49) - Now match VM implementation
|
77
|
+
DICT_NEW_R = 41 # DictNewR { dst: u8 }
|
78
|
+
DICT_GET_R = 42 # DictGetR { dst: u8, dict: u8, key: u8 }
|
79
|
+
DICT_SET_R = 43 # DictSetR { dict: u8, key: u8, value: u8 }
|
80
|
+
DICT_REMOVE_R = 44 # DictRemoveR { dict: u8, key: u8 }
|
81
|
+
DICT_CONTAINS_R = 45 # DictContainsR { dst: u8, dict: u8, key: u8 }
|
82
|
+
DICT_KEYS_R = 46 # DictKeysR { dst: u8, dict: u8 }
|
83
|
+
DICT_VALUES_R = 47 # DictValuesR { dst: u8, dict: u8 }
|
84
|
+
DICT_CLEAR_R = 48 # DictClearR { dict: u8 }
|
85
|
+
DICT_LEN_R = 49 # DictLenR { dst: u8, dict: u8 }
|
86
|
+
|
87
|
+
|
88
|
+
# Type IDs for type operations
|
89
|
+
class TypeId(IntEnum):
|
90
|
+
"""Type identifiers."""
|
91
|
+
|
92
|
+
EMPTY = 0x00
|
93
|
+
BOOL = 0x01
|
94
|
+
INT = 0x02
|
95
|
+
FLOAT = 0x03
|
96
|
+
STRING = 0x04
|
97
|
+
FUNCTION = 0x05
|
98
|
+
URL = 0x06
|
99
|
+
ARRAY = 0x07
|
100
|
+
DICT = 0x08
|
101
|
+
UNKNOWN = 0xFF
|