machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,89 @@
1
+ """Bytecode module representation.
2
+
3
+ This module defines the bytecode module structure for the Rust VM.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass, field
9
+ from enum import IntEnum
10
+ from typing import Any
11
+
12
+
13
+ class ChunkType(IntEnum):
14
+ """Type of bytecode chunk."""
15
+
16
+ MAIN = 0
17
+ FUNCTION = 1
18
+
19
+
20
+ class ConstantTag(IntEnum):
21
+ """Tags for constant pool values."""
22
+
23
+ EMPTY = 0x05
24
+ INT = 0x01
25
+ FLOAT = 0x02
26
+ STRING = 0x03
27
+ BOOL = 0x04
28
+
29
+
30
+ @dataclass
31
+ class Chunk:
32
+ """A bytecode chunk (function or main)."""
33
+
34
+ name: str
35
+ chunk_type: ChunkType
36
+ bytecode: bytearray
37
+ constants: list[tuple[ConstantTag, Any]]
38
+ num_locals: int
39
+ num_params: int
40
+
41
+
42
+ @dataclass
43
+ class BytecodeModule:
44
+ """A complete bytecode module."""
45
+
46
+ name: str = "__main__"
47
+ chunks: list[Chunk] = field(default_factory=list)
48
+ function_table: dict[str, int] = field(default_factory=dict)
49
+ global_names: list[str] = field(default_factory=list)
50
+ metadata: dict[str, Any] = field(default_factory=dict)
51
+
52
+ def add_chunk(self, chunk: Chunk) -> int:
53
+ """Add a chunk and return its index.
54
+
55
+ Args:
56
+ chunk: Chunk to add.
57
+
58
+ Returns:
59
+ Index of the added chunk.
60
+ """
61
+ index = len(self.chunks)
62
+ self.chunks.append(chunk)
63
+ if chunk.chunk_type == ChunkType.FUNCTION:
64
+ # Record function entry point (bytecode offset)
65
+ self.function_table[chunk.name] = index
66
+ return index
67
+
68
+ def add_global(self, name: str) -> int:
69
+ """Add a global name and return its index.
70
+
71
+ Args:
72
+ name: Global name to add.
73
+
74
+ Returns:
75
+ Index of the global name.
76
+ """
77
+ if name not in self.global_names:
78
+ self.global_names.append(name)
79
+ return self.global_names.index(name)
80
+
81
+ def serialize(self) -> bytes:
82
+ """Serialize the module to bytecode format.
83
+
84
+ Returns:
85
+ Serialized bytecode.
86
+ """
87
+ from machine_dialect.codegen.vm_serializer import VMBytecodeSerializer
88
+
89
+ return VMBytecodeSerializer.serialize(self)
@@ -0,0 +1,300 @@
1
+ """Bytecode serializer for the Rust VM.
2
+
3
+ This module serializes bytecode in the format expected by the Rust VM loader.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import struct
9
+ from io import BytesIO
10
+ from pathlib import Path
11
+ from typing import Any, BinaryIO
12
+
13
+ # Magic number for bytecode files
14
+ MAGIC_NUMBER = b"MDBC"
15
+
16
+ # Current bytecode version
17
+ BYTECODE_VERSION = 1
18
+
19
+ # Flags
20
+ FLAG_LITTLE_ENDIAN = 0x0001
21
+
22
+
23
+ class BytecodeWriter:
24
+ """Writes bytecode in the format expected by the Rust VM."""
25
+
26
+ def __init__(self, module: Any = None) -> None:
27
+ """Initialize the bytecode writer.
28
+
29
+ Args:
30
+ module: Optional module with bytecode data to write
31
+ """
32
+ self.buffer = BytesIO()
33
+
34
+ # Initialize attributes with type hints
35
+ self.constants: list[tuple[int, Any]]
36
+ self.instructions: list[bytes]
37
+ self.functions: dict[str, int]
38
+ self.global_names: list[str]
39
+ self.module_name: str
40
+
41
+ if module:
42
+ # Just use the module's attributes directly
43
+ self.constants = module.constants
44
+ self.instructions = module.instructions
45
+ self.functions = getattr(module, "function_table", {})
46
+ self.global_names = getattr(module, "global_names", [])
47
+ self.module_name = getattr(module, "name", "__main__")
48
+ else:
49
+ self.constants = [] # (tag, value) pairs
50
+ self.instructions = []
51
+ self.functions = {} # name -> instruction offset
52
+ self.global_names = []
53
+ self.module_name = "__main__"
54
+
55
+ def set_module_name(self, name: str) -> None:
56
+ """Set the module name."""
57
+ self.module_name = name
58
+
59
+ def add_constant(self, tag: int, value: Any) -> int:
60
+ """Add a constant to the constant pool.
61
+
62
+ Args:
63
+ tag: Type tag (1=int, 2=float, 3=string, 4=bool, 5=empty)
64
+ value: The constant value
65
+
66
+ Returns:
67
+ Index of the constant in the pool
68
+ """
69
+ idx = len(self.constants)
70
+ self.constants.append((tag, value))
71
+ return idx
72
+
73
+ def add_int_constant(self, value: int) -> int:
74
+ """Add an integer constant."""
75
+ return self.add_constant(0x01, value)
76
+
77
+ def add_float_constant(self, value: float) -> int:
78
+ """Add a float constant."""
79
+ return self.add_constant(0x02, value)
80
+
81
+ def add_string_constant(self, value: str) -> int:
82
+ """Add a string constant."""
83
+ return self.add_constant(0x03, value)
84
+
85
+ def add_bool_constant(self, value: bool) -> int:
86
+ """Add a boolean constant."""
87
+ return self.add_constant(0x04, value)
88
+
89
+ def add_empty_constant(self) -> int:
90
+ """Add an empty/none constant."""
91
+ return self.add_constant(0x05, None)
92
+
93
+ def add_global_name(self, name: str) -> int:
94
+ """Add a global name and return its index."""
95
+ if name not in self.global_names:
96
+ self.global_names.append(name)
97
+ return self.global_names.index(name)
98
+
99
+ def add_instruction(self, instruction: bytes) -> None:
100
+ """Add a raw instruction."""
101
+ self.instructions.append(instruction)
102
+
103
+ def emit_load_const(self, dst: int, const_idx: int) -> None:
104
+ """Emit LoadConstR instruction."""
105
+ inst = struct.pack("<BBH", 0, dst, const_idx)
106
+ self.add_instruction(inst)
107
+
108
+ def emit_move(self, dst: int, src: int) -> None:
109
+ """Emit MoveR instruction."""
110
+ inst = struct.pack("<BBB", 1, dst, src)
111
+ self.add_instruction(inst)
112
+
113
+ def emit_load_global(self, dst: int, name_idx: int) -> None:
114
+ """Emit LoadGlobalR instruction."""
115
+ inst = struct.pack("<BBH", 2, dst, name_idx)
116
+ self.add_instruction(inst)
117
+
118
+ def emit_store_global(self, src: int, name_idx: int) -> None:
119
+ """Emit StoreGlobalR instruction."""
120
+ inst = struct.pack("<BBH", 3, src, name_idx)
121
+ self.add_instruction(inst)
122
+
123
+ def emit_add(self, dst: int, left: int, right: int) -> None:
124
+ """Emit AddR instruction."""
125
+ inst = struct.pack("<BBBB", 7, dst, left, right)
126
+ self.add_instruction(inst)
127
+
128
+ def emit_sub(self, dst: int, left: int, right: int) -> None:
129
+ """Emit SubR instruction."""
130
+ inst = struct.pack("<BBBB", 8, dst, left, right)
131
+ self.add_instruction(inst)
132
+
133
+ def emit_mul(self, dst: int, left: int, right: int) -> None:
134
+ """Emit MulR instruction."""
135
+ inst = struct.pack("<BBBB", 9, dst, left, right)
136
+ self.add_instruction(inst)
137
+
138
+ def emit_div(self, dst: int, left: int, right: int) -> None:
139
+ """Emit DivR instruction."""
140
+ inst = struct.pack("<BBBB", 10, dst, left, right)
141
+ self.add_instruction(inst)
142
+
143
+ def emit_jump(self, offset: int) -> None:
144
+ """Emit JumpR instruction."""
145
+ inst = struct.pack("<Bi", 22, offset)
146
+ self.add_instruction(inst)
147
+
148
+ def emit_jump_if(self, cond: int, offset: int) -> None:
149
+ """Emit JumpIfR instruction."""
150
+ inst = struct.pack("<BBi", 23, cond, offset)
151
+ self.add_instruction(inst)
152
+
153
+ def emit_return(self, src: int | None = None) -> None:
154
+ """Emit ReturnR instruction."""
155
+ if src is not None:
156
+ inst = struct.pack("<BBB", 26, 1, src) # has_value=1, src
157
+ else:
158
+ inst = struct.pack("<BB", 26, 0) # has_value=0
159
+ self.add_instruction(inst)
160
+
161
+ def emit_debug_print(self, src: int) -> None:
162
+ """Emit DebugPrint instruction."""
163
+ inst = struct.pack("<BB", 37, src)
164
+ self.add_instruction(inst)
165
+
166
+ def write(self) -> bytes:
167
+ """Write the bytecode to bytes.
168
+
169
+ Returns:
170
+ The serialized bytecode as bytes.
171
+ """
172
+ buffer = BytesIO()
173
+ self.write_to_stream(buffer)
174
+ return buffer.getvalue()
175
+
176
+ def write_to_file(self, path: Path) -> None:
177
+ """Write the bytecode to a file.
178
+
179
+ Args:
180
+ path: Path to write the bytecode file (without extension)
181
+ """
182
+ bytecode_path = path.with_suffix(".mdbc")
183
+ with open(bytecode_path, "wb") as f:
184
+ self.write_to_stream(f)
185
+
186
+ def write_to_stream(self, stream: BinaryIO) -> None:
187
+ """Write the bytecode to a binary stream.
188
+
189
+ Args:
190
+ stream: Binary stream to write to
191
+ """
192
+ # Calculate section offsets
193
+ header_size = 28 # 4 (magic) + 4 (version) + 4 (flags) + 16 (4 offsets)
194
+
195
+ # Module name section
196
+ name_bytes = self.module_name.encode("utf-8")
197
+ name_section_size = 4 + len(name_bytes) # length prefix + name
198
+
199
+ # Constants section
200
+ const_buffer = BytesIO()
201
+ const_buffer.write(struct.pack("<I", len(self.constants)))
202
+ for tag, value in self.constants:
203
+ const_buffer.write(struct.pack("<B", tag))
204
+ if tag == 0x01: # Int
205
+ const_buffer.write(struct.pack("<q", value))
206
+ elif tag == 0x02: # Float
207
+ const_buffer.write(struct.pack("<d", value))
208
+ elif tag == 0x03: # String
209
+ str_bytes = value.encode("utf-8")
210
+ const_buffer.write(struct.pack("<I", len(str_bytes)))
211
+ const_buffer.write(str_bytes)
212
+ elif tag == 0x04: # Bool
213
+ const_buffer.write(struct.pack("<B", 1 if value else 0))
214
+ elif tag == 0x05: # Empty
215
+ pass # No data
216
+ const_data = const_buffer.getvalue()
217
+
218
+ # Function table section
219
+ func_buffer = BytesIO()
220
+ func_buffer.write(struct.pack("<I", len(self.functions)))
221
+ for name, offset in self.functions.items():
222
+ func_name_bytes = name.encode("utf-8")
223
+ func_buffer.write(struct.pack("<I", len(func_name_bytes)))
224
+ func_buffer.write(func_name_bytes)
225
+ func_buffer.write(struct.pack("<I", offset))
226
+ func_data = func_buffer.getvalue()
227
+
228
+ # Instructions section
229
+ inst_buffer = BytesIO()
230
+ inst_buffer.write(struct.pack("<I", len(self.instructions)))
231
+ for inst in self.instructions:
232
+ inst_buffer.write(inst)
233
+ inst_data = inst_buffer.getvalue()
234
+
235
+ # Calculate offsets
236
+ name_offset = header_size
237
+ const_offset = name_offset + name_section_size
238
+ func_offset = const_offset + len(const_data)
239
+ inst_offset = func_offset + len(func_data)
240
+
241
+ # Write header
242
+ stream.write(MAGIC_NUMBER) # Magic number
243
+ stream.write(struct.pack("<I", BYTECODE_VERSION)) # Version
244
+ stream.write(struct.pack("<I", FLAG_LITTLE_ENDIAN)) # Flags
245
+ stream.write(struct.pack("<I", name_offset)) # Name offset
246
+ stream.write(struct.pack("<I", const_offset)) # Constant offset
247
+ stream.write(struct.pack("<I", func_offset)) # Function offset
248
+ stream.write(struct.pack("<I", inst_offset)) # Instruction offset
249
+
250
+ # Write sections
251
+ stream.write(struct.pack("<I", len(name_bytes)))
252
+ stream.write(name_bytes)
253
+ stream.write(const_data)
254
+ stream.write(func_data)
255
+ stream.write(inst_data)
256
+
257
+ # Write global names if any
258
+ if self.global_names:
259
+ stream.write(struct.pack("<I", len(self.global_names)))
260
+ for name in self.global_names:
261
+ name_bytes = name.encode("utf-8")
262
+ stream.write(struct.pack("<I", len(name_bytes)))
263
+ stream.write(name_bytes)
264
+
265
+
266
+ def serialize_bytecode_module(
267
+ module_name: str,
268
+ constants: list[tuple[int, Any]],
269
+ instructions: list[bytes],
270
+ functions: dict[str, int] | None = None,
271
+ global_names: list[str] | None = None,
272
+ output_path: Path | None = None,
273
+ ) -> bytes:
274
+ """Serialize a bytecode module.
275
+
276
+ Args:
277
+ module_name: Name of the module
278
+ constants: List of (tag, value) pairs for the constant pool
279
+ instructions: List of instruction bytes
280
+ functions: Optional function table (name -> offset)
281
+ global_names: Optional list of global variable names
282
+ output_path: Optional path to write the bytecode file
283
+
284
+ Returns:
285
+ The serialized bytecode as bytes
286
+ """
287
+ writer = BytecodeWriter()
288
+ writer.set_module_name(module_name)
289
+ writer.constants = constants
290
+ writer.instructions = instructions
291
+ writer.functions = functions or {}
292
+ writer.global_names = global_names or []
293
+
294
+ if output_path:
295
+ writer.write_to_file(output_path)
296
+
297
+ # Also return the bytes
298
+ buffer = BytesIO()
299
+ writer.write_to_stream(buffer)
300
+ return buffer.getvalue()
@@ -0,0 +1,101 @@
1
+ """Opcode definitions for the Rust VM.
2
+
3
+ This module defines the opcodes that match the Rust VM implementation.
4
+ These must stay in sync with machine_dialect_vm/src/instructions/decoder.rs
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from enum import IntEnum
10
+
11
+
12
+ class Opcode(IntEnum):
13
+ """VM instruction opcodes."""
14
+
15
+ # Basic Operations (0-3)
16
+ LOAD_CONST_R = 0 # LoadConstR { dst: u8, const_idx: u16 }
17
+ MOVE_R = 1 # MoveR { dst: u8, src: u8 }
18
+ LOAD_GLOBAL_R = 2 # LoadGlobalR { dst: u8, name_idx: u16 }
19
+ STORE_GLOBAL_R = 3 # StoreGlobalR { src: u8, name_idx: u16 }
20
+
21
+ # Type Operations (4-6)
22
+ DEFINE_R = 4 # DefineR { dst: u8, type_id: u16 }
23
+ CHECK_TYPE_R = 5 # CheckTypeR { dst: u8, src: u8, type_id: u16 }
24
+ CAST_R = 6 # CastR { dst: u8, src: u8, to_type: u16 }
25
+
26
+ # Arithmetic (7-12)
27
+ ADD_R = 7 # AddR { dst: u8, left: u8, right: u8 }
28
+ SUB_R = 8 # SubR { dst: u8, left: u8, right: u8 }
29
+ MUL_R = 9 # MulR { dst: u8, left: u8, right: u8 }
30
+ DIV_R = 10 # DivR { dst: u8, left: u8, right: u8 }
31
+ MOD_R = 11 # ModR { dst: u8, left: u8, right: u8 }
32
+ NEG_R = 12 # NegR { dst: u8, src: u8 }
33
+
34
+ # Logical Operations (13-15)
35
+ NOT_R = 13 # NotR { dst: u8, src: u8 }
36
+ AND_R = 14 # AndR { dst: u8, left: u8, right: u8 }
37
+ OR_R = 15 # OrR { dst: u8, left: u8, right: u8 }
38
+
39
+ # Comparisons (16-21)
40
+ EQ_R = 16 # EqR { dst: u8, left: u8, right: u8 }
41
+ NEQ_R = 17 # NeqR { dst: u8, left: u8, right: u8 }
42
+ LT_R = 18 # LtR { dst: u8, left: u8, right: u8 }
43
+ GT_R = 19 # GtR { dst: u8, left: u8, right: u8 }
44
+ LTE_R = 20 # LteR { dst: u8, left: u8, right: u8 }
45
+ GTE_R = 21 # GteR { dst: u8, left: u8, right: u8 }
46
+
47
+ # Control Flow (22-26)
48
+ JUMP_R = 22 # JumpR { offset: i32 }
49
+ JUMP_IF_R = 23 # JumpIfR { cond: u8, offset: i32 }
50
+ JUMP_IF_NOT_R = 24 # JumpIfNotR { cond: u8, offset: i32 }
51
+ CALL_R = 25 # CallR { func: u8, args: Vec<u8>, dst: u8 }
52
+ RETURN_R = 26 # ReturnR { src: Option<u8> }
53
+
54
+ # MIR Support (27-30)
55
+ PHI_R = 27 # PhiR { dst: u8, sources: Vec<(u8, u16)> }
56
+ ASSERT_R = 28 # AssertR { reg: u8, msg_idx: u16 }
57
+ SCOPE_ENTER_R = 29 # ScopeEnterR { scope_id: u16 }
58
+ SCOPE_EXIT_R = 30 # ScopeExitR { scope_id: u16 }
59
+
60
+ # String Operations (31-32)
61
+ CONCAT_STR_R = 31 # ConcatStrR { dst: u8, left: u8, right: u8 }
62
+ STR_LEN_R = 32 # StrLenR { dst: u8, str: u8 }
63
+
64
+ # Arrays (33-36)
65
+ NEW_ARRAY_R = 33 # NewArrayR { dst: u8, size: u8 }
66
+ ARRAY_GET_R = 34 # ArrayGetR { dst: u8, array: u8, index: u8 }
67
+ ARRAY_SET_R = 35 # ArraySetR { array: u8, index: u8, value: u8 }
68
+ ARRAY_LEN_R = 36 # ArrayLenR { dst: u8, array: u8 }
69
+
70
+ # Debug (37-40)
71
+ DEBUG_PRINT = 37 # DebugPrint { src: u8 }
72
+ BREAKPOINT = 38 # BreakPoint
73
+ HALT = 39 # Halt execution
74
+ NOP = 40 # No operation
75
+
76
+ # Dictionaries (41-49) - Now match VM implementation
77
+ DICT_NEW_R = 41 # DictNewR { dst: u8 }
78
+ DICT_GET_R = 42 # DictGetR { dst: u8, dict: u8, key: u8 }
79
+ DICT_SET_R = 43 # DictSetR { dict: u8, key: u8, value: u8 }
80
+ DICT_REMOVE_R = 44 # DictRemoveR { dict: u8, key: u8 }
81
+ DICT_CONTAINS_R = 45 # DictContainsR { dst: u8, dict: u8, key: u8 }
82
+ DICT_KEYS_R = 46 # DictKeysR { dst: u8, dict: u8 }
83
+ DICT_VALUES_R = 47 # DictValuesR { dst: u8, dict: u8 }
84
+ DICT_CLEAR_R = 48 # DictClearR { dict: u8 }
85
+ DICT_LEN_R = 49 # DictLenR { dst: u8, dict: u8 }
86
+
87
+
88
+ # Type IDs for type operations
89
+ class TypeId(IntEnum):
90
+ """Type identifiers."""
91
+
92
+ EMPTY = 0x00
93
+ BOOL = 0x01
94
+ INT = 0x02
95
+ FLOAT = 0x03
96
+ STRING = 0x04
97
+ FUNCTION = 0x05
98
+ URL = 0x06
99
+ ARRAY = 0x07
100
+ DICT = 0x08
101
+ UNKNOWN = 0xFF