machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,225 @@
1
+ """MIR Module Representation.
2
+
3
+ This module defines the MIRModule class that represents a complete
4
+ compilation unit in the MIR.
5
+ """
6
+
7
+ from typing import Any
8
+
9
+ from .mir_function import MIRFunction
10
+ from .mir_types import MIRType, MIRUnionType
11
+ from .mir_values import Constant, Variable
12
+
13
+
14
+ class ConstantPool:
15
+ """Pool of constants used in the module."""
16
+
17
+ def __init__(self) -> None:
18
+ """Initialize the constant pool."""
19
+ self.constants: list[Constant] = []
20
+ self._value_to_index: dict[tuple[Any, MIRType | MIRUnionType], int] = {}
21
+
22
+ def add(self, constant: Constant) -> int:
23
+ """Add a constant to the pool.
24
+
25
+ Args:
26
+ constant: The constant to add.
27
+
28
+ Returns:
29
+ The index of the constant in the pool.
30
+ """
31
+ key = (constant.value, constant.type)
32
+ if key in self._value_to_index:
33
+ return self._value_to_index[key]
34
+
35
+ index = len(self.constants)
36
+ self.constants.append(constant)
37
+ self._value_to_index[key] = index
38
+ return index
39
+
40
+ def get(self, index: int) -> Constant | None:
41
+ """Get a constant by index.
42
+
43
+ Args:
44
+ index: The index.
45
+
46
+ Returns:
47
+ The constant or None if index is out of bounds.
48
+ """
49
+ if 0 <= index < len(self.constants):
50
+ return self.constants[index]
51
+ return None
52
+
53
+ def size(self) -> int:
54
+ """Get the number of constants in the pool.
55
+
56
+ Returns:
57
+ The size of the pool.
58
+ """
59
+ return len(self.constants)
60
+
61
+ def __str__(self) -> str:
62
+ """Return string representation."""
63
+ lines = ["Constants:"]
64
+ for i, const in enumerate(self.constants):
65
+ lines.append(f" [{i}] {const}")
66
+ return "\n".join(lines)
67
+
68
+
69
+ class MIRModule:
70
+ """A module in MIR representation.
71
+
72
+ A module is a compilation unit containing functions, global variables,
73
+ and constants.
74
+ """
75
+
76
+ def __init__(self, name: str) -> None:
77
+ """Initialize a MIR module.
78
+
79
+ Args:
80
+ name: Module name.
81
+ """
82
+ self.name = name
83
+ self.functions: dict[str, MIRFunction] = {}
84
+ self.globals: dict[str, Variable] = {}
85
+ self.constants = ConstantPool()
86
+ self.main_function: str | None = None
87
+
88
+ def add_function(self, func: MIRFunction) -> None:
89
+ """Add a function to the module.
90
+
91
+ Args:
92
+ func: The function to add.
93
+ """
94
+ self.functions[func.name] = func
95
+
96
+ def get_function(self, name: str) -> MIRFunction | None:
97
+ """Get a function by name.
98
+
99
+ Args:
100
+ name: Function name.
101
+
102
+ Returns:
103
+ The function or None if not found.
104
+ """
105
+ return self.functions.get(name)
106
+
107
+ def add_global(self, var: Variable) -> None:
108
+ """Add a global variable.
109
+
110
+ Args:
111
+ var: The global variable.
112
+ """
113
+ self.globals[var.name] = var
114
+
115
+ def get_global(self, name: str) -> Variable | None:
116
+ """Get a global variable by name.
117
+
118
+ Args:
119
+ name: Variable name.
120
+
121
+ Returns:
122
+ The variable or None if not found.
123
+ """
124
+ return self.globals.get(name)
125
+
126
+ def set_main_function(self, name: str) -> None:
127
+ """Set the main function name.
128
+
129
+ Args:
130
+ name: The name of the main function.
131
+ """
132
+ self.main_function = name
133
+
134
+ def get_main_function(self) -> MIRFunction | None:
135
+ """Get the main function.
136
+
137
+ Returns:
138
+ The main function or None if not found.
139
+ """
140
+ if self.main_function is None:
141
+ return None
142
+ return self.functions.get(self.main_function)
143
+
144
+ def validate(self) -> list[str]:
145
+ """Validate the module for correctness.
146
+
147
+ Returns:
148
+ List of validation errors (empty if valid).
149
+ """
150
+ errors = []
151
+
152
+ # Check main function exists
153
+ if self.main_function and self.main_function not in self.functions:
154
+ errors.append(f"Main function '{self.main_function}' not found")
155
+
156
+ # Check each function
157
+ for name, func in self.functions.items():
158
+ # Check CFG has entry block
159
+ if not func.cfg.entry_block:
160
+ errors.append(f"Function '{name}' has no entry block")
161
+
162
+ # Check all blocks are terminated
163
+ for label, block in func.cfg.blocks.items():
164
+ if not block.is_terminated() and block != func.cfg.entry_block:
165
+ # Entry block might not be terminated if function is empty
166
+ if block.instructions: # Only error if block has instructions
167
+ errors.append(f"Block '{label}' in function '{name}' is not terminated")
168
+
169
+ # Check all jumps target existing blocks
170
+ for _label, block in func.cfg.blocks.items():
171
+ terminator = block.get_terminator()
172
+ if terminator:
173
+ from .mir_instructions import ConditionalJump, Jump
174
+
175
+ if isinstance(terminator, Jump):
176
+ if terminator.label not in func.cfg.blocks:
177
+ errors.append(f"Jump to undefined label '{terminator.label}' in function '{name}'")
178
+ elif isinstance(terminator, ConditionalJump):
179
+ if terminator.true_label not in func.cfg.blocks:
180
+ errors.append(f"Jump to undefined label '{terminator.true_label}' in function '{name}'")
181
+ if terminator.false_label and terminator.false_label not in func.cfg.blocks:
182
+ errors.append(f"Jump to undefined label '{terminator.false_label}' in function '{name}'")
183
+
184
+ return errors
185
+
186
+ def to_string(self, include_constants: bool = True, include_globals: bool = True) -> str:
187
+ """Convert module to string representation.
188
+
189
+ Args:
190
+ include_constants: Whether to include the constant pool.
191
+ include_globals: Whether to include global variables.
192
+
193
+ Returns:
194
+ String representation of the module.
195
+ """
196
+ lines = [f"module {self.name} {{"]
197
+
198
+ # Constants
199
+ if include_constants and self.constants.size() > 0:
200
+ lines.append(f" {self.constants}")
201
+
202
+ # Globals
203
+ if include_globals and self.globals:
204
+ lines.append(" globals:")
205
+ for name, var in self.globals.items():
206
+ lines.append(f" {name}: {var.type}")
207
+
208
+ # Functions
209
+ lines.append(" functions:")
210
+ for name in sorted(self.functions.keys()):
211
+ func = self.functions[name]
212
+ func_str = func.to_string()
213
+ for line in func_str.split("\n"):
214
+ lines.append(f" {line}")
215
+
216
+ lines.append("}")
217
+ return "\n".join(lines)
218
+
219
+ def __str__(self) -> str:
220
+ """Return string representation."""
221
+ return self.to_string()
222
+
223
+ def __repr__(self) -> str:
224
+ """Return debug representation."""
225
+ return f"MIRModule({self.name}, functions={len(self.functions)}, globals={len(self.globals)})"
@@ -0,0 +1,480 @@
1
+ """MIR printer and dumper for debugging.
2
+
3
+ This module provides utilities to print and dump MIR in human-readable formats,
4
+ including textual representation and GraphViz DOT format for visualization.
5
+ """
6
+
7
+ import io
8
+ from typing import TextIO
9
+
10
+ from machine_dialect.mir.basic_block import CFG, BasicBlock
11
+ from machine_dialect.mir.mir_function import MIRFunction
12
+ from machine_dialect.mir.mir_instructions import (
13
+ Assert,
14
+ BinaryOp,
15
+ Call,
16
+ ConditionalJump,
17
+ Copy,
18
+ GetAttr,
19
+ Jump,
20
+ Label,
21
+ LoadConst,
22
+ LoadVar,
23
+ MIRInstruction,
24
+ Nop,
25
+ Phi,
26
+ Print,
27
+ Return,
28
+ Scope,
29
+ Select,
30
+ SetAttr,
31
+ StoreVar,
32
+ UnaryOp,
33
+ )
34
+ from machine_dialect.mir.mir_module import MIRModule
35
+ from machine_dialect.mir.mir_types import MIRType, MIRUnionType
36
+ from machine_dialect.mir.mir_values import Constant, FunctionRef, MIRValue, Temp, Variable
37
+
38
+
39
+ def format_type(mir_type: MIRType | MIRUnionType) -> str:
40
+ """Format a MIR type for display.
41
+
42
+ Args:
43
+ mir_type: The type to format.
44
+
45
+ Returns:
46
+ String representation of the type.
47
+ """
48
+ if isinstance(mir_type, MIRUnionType):
49
+ return f"Union[{', '.join(t.name for t in mir_type.types)}]"
50
+ elif isinstance(mir_type, MIRType):
51
+ return mir_type.name
52
+ return str(mir_type)
53
+
54
+
55
+ class MIRPrinter:
56
+ """Prints MIR in human-readable text format."""
57
+
58
+ def __init__(self, output: TextIO | None = None) -> None:
59
+ """Initialize the MIR printer.
60
+
61
+ Args:
62
+ output: Output stream (defaults to internal buffer).
63
+ """
64
+ self.output = output or io.StringIO()
65
+ self.indent_level = 0
66
+ self.indent_str = " "
67
+
68
+ def print_module(self, module: MIRModule) -> str:
69
+ """Print a MIR module.
70
+
71
+ Args:
72
+ module: The module to print.
73
+
74
+ Returns:
75
+ String representation of the module.
76
+ """
77
+ self._write(f"Module: {module.name}")
78
+ self._write("")
79
+
80
+ # Print globals if any
81
+ if hasattr(module, "globals") and module.globals:
82
+ self._write("Globals:")
83
+ self._indent()
84
+ for name, value in module.globals.items():
85
+ self._write(f"{name}: {self._format_value(value)}")
86
+ self._dedent()
87
+ self._write("")
88
+
89
+ # Print functions
90
+ for _func_name, func in module.functions.items():
91
+ self.print_function(func)
92
+ self._write("")
93
+
94
+ # Print main function designation
95
+ if module.main_function:
96
+ self._write(f"Main: {module.main_function}")
97
+
98
+ if isinstance(self.output, io.StringIO):
99
+ return self.output.getvalue()
100
+ return ""
101
+
102
+ def print_function(self, func: MIRFunction) -> str:
103
+ """Print a MIR function.
104
+
105
+ Args:
106
+ func: The function to print.
107
+
108
+ Returns:
109
+ String representation of the function.
110
+ """
111
+ # Function signature
112
+ params = ", ".join(
113
+ f"{p.name}: {format_type(p.type)}" if hasattr(p, "name") and hasattr(p, "type") else str(p)
114
+ for p in func.params
115
+ )
116
+ self._write(f"Function {func.name}({params}) -> {format_type(func.return_type)} {{")
117
+ self._indent()
118
+
119
+ # Print locals
120
+ if func.locals:
121
+ self._write("Locals:")
122
+ self._indent()
123
+ for local in func.locals.values():
124
+ if hasattr(local, "name") and hasattr(local, "type"):
125
+ self._write(f"{local.name}: {format_type(local.type)}")
126
+ else:
127
+ self._write(str(local))
128
+ self._dedent()
129
+ self._write("")
130
+
131
+ # Print temporaries
132
+ if func.temporaries:
133
+ self._write("Temporaries:")
134
+ self._indent()
135
+ for temp in func.temporaries:
136
+ if hasattr(temp, "name") and hasattr(temp, "type"):
137
+ self._write(f"{temp.name}: {format_type(temp.type)}")
138
+ else:
139
+ self._write(str(temp))
140
+ self._dedent()
141
+ self._write("")
142
+
143
+ # Print basic blocks
144
+ self._write("Blocks:")
145
+ self._indent()
146
+
147
+ # Print entry block first
148
+ if func.cfg.entry_block:
149
+ self._print_block(func.cfg.entry_block, func.cfg)
150
+
151
+ # Print other blocks
152
+ for block in func.cfg.blocks.values():
153
+ if block != func.cfg.entry_block:
154
+ self._print_block(block, func.cfg)
155
+
156
+ self._dedent()
157
+ self._dedent()
158
+ self._write("}")
159
+
160
+ if isinstance(self.output, io.StringIO):
161
+ return self.output.getvalue()
162
+ return ""
163
+
164
+ def _print_block(self, block: BasicBlock, cfg: CFG) -> None:
165
+ """Print a basic block.
166
+
167
+ Args:
168
+ block: The block to print.
169
+ cfg: The containing CFG.
170
+ """
171
+ # Block header with predecessors
172
+ preds = [p.label for p in block.predecessors]
173
+ if preds:
174
+ self._write(f"{block.label}: (preds: {', '.join(preds)})")
175
+ else:
176
+ self._write(f"{block.label}:")
177
+
178
+ self._indent()
179
+
180
+ # Print phi nodes first
181
+ for phi in block.phi_nodes:
182
+ self._write(self._format_instruction(phi))
183
+
184
+ # Print instructions
185
+ for inst in block.instructions:
186
+ self._write(self._format_instruction(inst))
187
+
188
+ # Print successors
189
+ succs = [s.label for s in block.successors]
190
+ if succs:
191
+ self._write(f"// successors: {', '.join(succs)}")
192
+
193
+ self._dedent()
194
+ self._write("")
195
+
196
+ def _format_instruction(self, inst: MIRInstruction) -> str:
197
+ """Format an instruction as a string.
198
+
199
+ Args:
200
+ inst: The instruction to format.
201
+
202
+ Returns:
203
+ String representation of the instruction.
204
+ """
205
+ if isinstance(inst, BinaryOp):
206
+ dest = self._format_value(inst.dest)
207
+ left = self._format_value(inst.left)
208
+ right = self._format_value(inst.right)
209
+ return f"{dest} = {left} {inst.op} {right}"
210
+ elif isinstance(inst, UnaryOp):
211
+ return f"{self._format_value(inst.dest)} = {inst.op} {self._format_value(inst.operand)}"
212
+ elif isinstance(inst, Copy):
213
+ return f"{self._format_value(inst.dest)} = {self._format_value(inst.source)}"
214
+ elif isinstance(inst, LoadConst):
215
+ return f"{self._format_value(inst.dest)} = const {self._format_value(inst.constant)}"
216
+ elif isinstance(inst, LoadVar):
217
+ return f"{self._format_value(inst.dest)} = load {self._format_value(inst.var)}"
218
+ elif isinstance(inst, StoreVar):
219
+ return f"store {self._format_value(inst.var)}, {self._format_value(inst.source)}"
220
+ elif isinstance(inst, Call):
221
+ args = ", ".join(self._format_value(arg) for arg in inst.args)
222
+ if inst.dest:
223
+ return f"{self._format_value(inst.dest)} = call {inst.func.name}({args})"
224
+ else:
225
+ return f"call {inst.func.name}({args})"
226
+ elif isinstance(inst, Return):
227
+ if inst.value:
228
+ return f"return {self._format_value(inst.value)}"
229
+ else:
230
+ return "return"
231
+ elif isinstance(inst, Jump):
232
+ return f"goto {inst.label}"
233
+ elif isinstance(inst, ConditionalJump):
234
+ if inst.false_label:
235
+ return f"if {self._format_value(inst.condition)} goto {inst.true_label} else {inst.false_label}"
236
+ else:
237
+ return f"if {self._format_value(inst.condition)} goto {inst.true_label}"
238
+ elif isinstance(inst, Phi):
239
+ incoming = ", ".join(f"{self._format_value(val)}:{label}" for val, label in inst.incoming)
240
+ return f"{self._format_value(inst.dest)} = φ({incoming})"
241
+ elif isinstance(inst, Select):
242
+ dest = self._format_value(inst.dest)
243
+ cond = self._format_value(inst.condition)
244
+ true_v = self._format_value(inst.true_val)
245
+ false_v = self._format_value(inst.false_val)
246
+ return f"{dest} = select {cond}, {true_v}, {false_v}"
247
+ elif isinstance(inst, Print):
248
+ return f"print {self._format_value(inst.value)}"
249
+ elif isinstance(inst, Assert):
250
+ if inst.message:
251
+ return f'assert {self._format_value(inst.condition)}, "{inst.message}"'
252
+ return f"assert {self._format_value(inst.condition)}"
253
+ elif isinstance(inst, Scope):
254
+ return "begin_scope" if inst.is_begin else "end_scope"
255
+ elif isinstance(inst, GetAttr):
256
+ return f"{self._format_value(inst.dest)} = {self._format_value(inst.obj)}.{inst.attr}"
257
+ elif isinstance(inst, SetAttr):
258
+ return f"{self._format_value(inst.obj)}.{inst.attr} = {self._format_value(inst.value)}"
259
+ elif isinstance(inst, Label):
260
+ return f"{inst.name}:"
261
+ elif isinstance(inst, Nop):
262
+ return "nop"
263
+ else:
264
+ return str(inst)
265
+
266
+ def _format_value(self, value: MIRValue) -> str:
267
+ """Format a MIR value as a string.
268
+
269
+ Args:
270
+ value: The value to format.
271
+
272
+ Returns:
273
+ String representation of the value.
274
+ """
275
+ if isinstance(value, Variable):
276
+ return f"%{value.name}"
277
+ elif isinstance(value, Temp):
278
+ return f"#{value.name if hasattr(value, 'name') else str(value)}"
279
+ elif isinstance(value, Constant):
280
+ if value.value is None:
281
+ return "null"
282
+ elif isinstance(value.value, str):
283
+ return f'"{value.value}"'
284
+ elif isinstance(value.value, bool):
285
+ return "true" if value.value else "false"
286
+ else:
287
+ return str(value.value)
288
+ elif isinstance(value, FunctionRef):
289
+ return f"@{value.name}"
290
+ else:
291
+ return str(value)
292
+
293
+ def _write(self, text: str) -> None:
294
+ """Write text with current indentation.
295
+
296
+ Args:
297
+ text: Text to write.
298
+ """
299
+ if text:
300
+ self.output.write(self.indent_str * self.indent_level + text)
301
+ self.output.write("\n")
302
+
303
+ def _indent(self) -> None:
304
+ """Increase indentation level."""
305
+ self.indent_level += 1
306
+
307
+ def _dedent(self) -> None:
308
+ """Decrease indentation level."""
309
+ self.indent_level = max(0, self.indent_level - 1)
310
+
311
+
312
+ class MIRDotExporter:
313
+ """Exports MIR CFG to GraphViz DOT format for visualization."""
314
+
315
+ def __init__(self) -> None:
316
+ """Initialize the DOT exporter."""
317
+ self.node_counter = 0
318
+ self.node_ids: dict[BasicBlock, str] = {}
319
+
320
+ def export_module(self, module: MIRModule) -> str:
321
+ """Export all functions in a module to DOT format.
322
+
323
+ Args:
324
+ module: The module to export.
325
+
326
+ Returns:
327
+ DOT format string with all functions.
328
+ """
329
+ lines = []
330
+ lines.append("digraph MIR {")
331
+ lines.append(" rankdir=TB;")
332
+ lines.append(" node [shape=box];")
333
+ lines.append("")
334
+
335
+ for func_name, func in module.functions.items():
336
+ lines.append(f" subgraph cluster_{func_name} {{")
337
+ lines.append(f' label="{func_name}";')
338
+
339
+ # Export the function and extract its body
340
+ func_dot = self.export_function(func)
341
+ func_lines = func_dot.split("\n")
342
+
343
+ # Skip the digraph header and closing brace, add indentation
344
+ for line in func_lines[3:-1]: # Skip first 3 lines and last line
345
+ if line.strip():
346
+ lines.append(" " + line)
347
+
348
+ lines.append(" }")
349
+ lines.append("")
350
+
351
+ lines.append("}")
352
+ return "\n".join(lines)
353
+
354
+ def export_function(self, func: MIRFunction) -> str:
355
+ """Export a function's CFG to DOT format.
356
+
357
+ Args:
358
+ func: The function to export.
359
+
360
+ Returns:
361
+ DOT format string.
362
+ """
363
+ self.node_counter = 0
364
+ self.node_ids.clear()
365
+
366
+ lines = []
367
+ lines.append(f'digraph "{func.name}" {{')
368
+ lines.append(" rankdir=TB;")
369
+ lines.append(" node [shape=box, style=rounded];")
370
+ lines.append("")
371
+
372
+ # Add nodes
373
+ for block in func.cfg.blocks.values():
374
+ node_id = self._get_node_id(block)
375
+ label = self._format_block_label(block)
376
+ color = "lightgreen" if block == func.cfg.entry_block else "lightblue"
377
+ lines.append(f' {node_id} [label="{label}", fillcolor={color}, style="rounded,filled"];')
378
+
379
+ lines.append("")
380
+
381
+ # Add edges
382
+ for block in func.cfg.blocks.values():
383
+ src_id = self._get_node_id(block)
384
+ for succ in block.successors:
385
+ dst_id = self._get_node_id(succ)
386
+
387
+ # Determine edge label based on terminator
388
+ edge_label = ""
389
+ if block.instructions:
390
+ last_inst = block.instructions[-1]
391
+ if isinstance(last_inst, ConditionalJump):
392
+ if succ.label == last_inst.true_label:
393
+ edge_label = "true"
394
+ elif succ.label == last_inst.false_label:
395
+ edge_label = "false"
396
+
397
+ if edge_label:
398
+ lines.append(f' {src_id} -> {dst_id} [label="{edge_label}"];')
399
+ else:
400
+ lines.append(f" {src_id} -> {dst_id};")
401
+
402
+ lines.append("}")
403
+ return "\n".join(lines)
404
+
405
+ def _get_node_id(self, block: BasicBlock) -> str:
406
+ """Get or create a node ID for a block.
407
+
408
+ Args:
409
+ block: The block.
410
+
411
+ Returns:
412
+ Node ID string.
413
+ """
414
+ if block not in self.node_ids:
415
+ self.node_ids[block] = f"node{self.node_counter}"
416
+ self.node_counter += 1
417
+ return self.node_ids[block]
418
+
419
+ def _format_block_label(self, block: BasicBlock) -> str:
420
+ """Format a block's label for DOT.
421
+
422
+ Args:
423
+ block: The block.
424
+
425
+ Returns:
426
+ Formatted label string.
427
+ """
428
+ lines = [f"{block.label}:"]
429
+
430
+ # Add first few instructions
431
+ max_inst = 5
432
+ for _i, inst in enumerate(block.instructions[:max_inst]):
433
+ inst_str = str(inst).replace('"', '\\"')
434
+ lines.append(inst_str)
435
+
436
+ if len(block.instructions) > max_inst:
437
+ lines.append(f"... ({len(block.instructions) - max_inst} more)")
438
+
439
+ return "\\l".join(lines) + "\\l"
440
+
441
+
442
+ def dump_mir_module(module: MIRModule, output: TextIO | None = None) -> str:
443
+ """Dump a MIR module as text.
444
+
445
+ Args:
446
+ module: The module to dump.
447
+ output: Optional output stream.
448
+
449
+ Returns:
450
+ String representation of the module.
451
+ """
452
+ printer = MIRPrinter(output)
453
+ return printer.print_module(module)
454
+
455
+
456
+ def dump_mir_function(func: MIRFunction, output: TextIO | None = None) -> str:
457
+ """Dump a MIR function as text.
458
+
459
+ Args:
460
+ func: The function to dump.
461
+ output: Optional output stream.
462
+
463
+ Returns:
464
+ String representation of the function.
465
+ """
466
+ printer = MIRPrinter(output)
467
+ return printer.print_function(func)
468
+
469
+
470
+ def export_cfg_dot(func: MIRFunction) -> str:
471
+ """Export a function's CFG to DOT format.
472
+
473
+ Args:
474
+ func: The function to export.
475
+
476
+ Returns:
477
+ DOT format string.
478
+ """
479
+ exporter = MIRDotExporter()
480
+ return exporter.export_function(func)