machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,445 @@
1
+ """Generic dataflow analysis framework for MIR.
2
+
3
+ This module provides a generic framework for implementing dataflow analyses
4
+ on the MIR, replacing ad-hoc analysis implementations with a uniform approach.
5
+ """
6
+
7
+ from abc import ABC, abstractmethod
8
+ from dataclasses import dataclass, field
9
+ from enum import Enum
10
+ from typing import Any, Generic, Protocol, TypeVar, runtime_checkable
11
+
12
+ from machine_dialect.mir.basic_block import BasicBlock
13
+ from machine_dialect.mir.mir_function import MIRFunction
14
+ from machine_dialect.mir.mir_instructions import MIRInstruction
15
+ from machine_dialect.mir.mir_types import MIRType
16
+ from machine_dialect.mir.mir_values import MIRValue
17
+
18
+
19
+ @runtime_checkable
20
+ class Comparable(Protocol):
21
+ """Protocol for comparable types."""
22
+
23
+ def __lt__(self, other: Any) -> bool: ...
24
+
25
+ def __le__(self, other: Any) -> bool: ...
26
+
27
+ def __gt__(self, other: Any) -> bool: ...
28
+
29
+ def __ge__(self, other: Any) -> bool: ...
30
+
31
+ def __eq__(self, other: Any) -> bool: ...
32
+
33
+
34
+ # TypeVar for types that support ordering
35
+ T = TypeVar("T")
36
+ # Unconstrained type variable for DataFlowAnalysis
37
+ U = TypeVar("U")
38
+
39
+ # For now, we'll make Range non-generic for numeric types specifically
40
+ NumericValue = int | float
41
+
42
+
43
+ class Direction(Enum):
44
+ """Direction of dataflow analysis."""
45
+
46
+ FORWARD = "forward"
47
+ BACKWARD = "backward"
48
+
49
+
50
+ @dataclass
51
+ class Range:
52
+ """Value range with support for strided and modular arithmetic.
53
+
54
+ This replaces the simple tuple-based ranges with a rich representation
55
+ that can express more complex constraints.
56
+
57
+ Attributes:
58
+ min: Minimum value (None for unbounded).
59
+ max: Maximum value (None for unbounded).
60
+ stride: Step size for values (e.g., all even numbers).
61
+ modulo: Modular constraint (e.g., x % 4 == 0).
62
+ """
63
+
64
+ min: NumericValue | None = None
65
+ max: NumericValue | None = None
66
+ stride: NumericValue | None = None
67
+ modulo: NumericValue | None = None
68
+
69
+ def is_constant(self) -> bool:
70
+ """Check if this range represents a single constant value."""
71
+ return self.min is not None and self.min == self.max
72
+
73
+ def contains(self, value: NumericValue) -> bool:
74
+ """Check if a value is within this range.
75
+
76
+ Args:
77
+ value: The value to check.
78
+
79
+ Returns:
80
+ True if the value is in the range.
81
+ """
82
+ if self.min is not None and value < self.min:
83
+ return False
84
+ if self.max is not None and value > self.max:
85
+ return False
86
+ if self.stride is not None and self.min is not None:
87
+ diff = value - self.min
88
+ if diff % self.stride != 0:
89
+ return False
90
+ if self.modulo is not None:
91
+ if value % self.modulo != 0:
92
+ return False
93
+ return True
94
+
95
+ def intersect(self, other: "Range") -> "Range":
96
+ """Compute intersection of two ranges.
97
+
98
+ Args:
99
+ other: The other range.
100
+
101
+ Returns:
102
+ The intersection range.
103
+ """
104
+ new_min = self.min
105
+ if other.min is not None:
106
+ new_min = other.min if new_min is None else max(new_min, other.min)
107
+
108
+ new_max = self.max
109
+ if other.max is not None:
110
+ new_max = other.max if new_max is None else min(new_max, other.max)
111
+
112
+ # Handle stride - use GCD for intersection
113
+ new_stride = self.stride
114
+ if other.stride is not None:
115
+ if new_stride is None:
116
+ new_stride = other.stride
117
+ else:
118
+ # Simplified - in reality would need GCD
119
+ new_stride = max(new_stride, other.stride)
120
+
121
+ # Handle modulo - use LCM for intersection
122
+ new_modulo = self.modulo
123
+ if other.modulo is not None:
124
+ if new_modulo is None:
125
+ new_modulo = other.modulo
126
+ else:
127
+ # Simplified - in reality would need LCM
128
+ new_modulo = max(new_modulo, other.modulo)
129
+
130
+ return Range(new_min, new_max, new_stride, new_modulo)
131
+
132
+ def union(self, other: "Range") -> "Range":
133
+ """Compute union of two ranges.
134
+
135
+ Args:
136
+ other: The other range.
137
+
138
+ Returns:
139
+ The union range.
140
+ """
141
+ new_min = self.min
142
+ if other.min is not None:
143
+ new_min = other.min if new_min is None else min(new_min, other.min)
144
+
145
+ new_max = self.max
146
+ if other.max is not None:
147
+ new_max = other.max if new_max is None else max(new_max, other.max)
148
+
149
+ # Union loses stride and modulo constraints unless they match
150
+ new_stride = self.stride if self.stride == other.stride else None
151
+ new_modulo = self.modulo if self.modulo == other.modulo else None
152
+
153
+ return Range(new_min, new_max, new_stride, new_modulo)
154
+
155
+
156
+ @dataclass
157
+ class TypeContext:
158
+ """Rich type context with refinements and constraints.
159
+
160
+ This replaces the simple type dictionary with a comprehensive
161
+ representation of type information.
162
+
163
+ Attributes:
164
+ base_type: The base MIR type.
165
+ range: Value range for numeric types.
166
+ nullable: Whether the value can be null/empty.
167
+ refinements: Per-block type refinements.
168
+ provenance: Source of type information.
169
+ """
170
+
171
+ base_type: MIRType
172
+ range: Range | None = None
173
+ nullable: bool = True
174
+ refinements: dict[BasicBlock, MIRType] = field(default_factory=dict)
175
+ provenance: str | None = None
176
+
177
+ def refine_for_block(self, block: BasicBlock, refined_type: MIRType) -> None:
178
+ """Add a type refinement for a specific block.
179
+
180
+ Args:
181
+ block: The block where the refinement applies.
182
+ refined_type: The refined type in that block.
183
+ """
184
+ self.refinements[block] = refined_type
185
+
186
+ def get_type_for_block(self, block: BasicBlock) -> MIRType:
187
+ """Get the type for a specific block.
188
+
189
+ Args:
190
+ block: The block to query.
191
+
192
+ Returns:
193
+ The refined type for that block, or base type.
194
+ """
195
+ return self.refinements.get(block, self.base_type)
196
+
197
+
198
+ class DataFlowAnalysis(Generic[U], ABC):
199
+ """Generic dataflow analysis framework.
200
+
201
+ This provides a uniform way to implement dataflow analyses,
202
+ replacing ad-hoc implementations throughout the codebase.
203
+ """
204
+
205
+ def __init__(self, direction: Direction = Direction.FORWARD) -> None:
206
+ """Initialize the dataflow analysis.
207
+
208
+ Args:
209
+ direction: Direction of analysis (forward or backward).
210
+ """
211
+ self.direction = direction
212
+ self.state: dict[BasicBlock, U] = {}
213
+ self.entry_state: U | None = None
214
+ self.exit_state: U | None = None
215
+
216
+ @abstractmethod
217
+ def initial_state(self) -> U:
218
+ """Get the initial state for the analysis.
219
+
220
+ Returns:
221
+ The initial state.
222
+ """
223
+ pass
224
+
225
+ @abstractmethod
226
+ def transfer(self, inst: MIRInstruction, state: U) -> U:
227
+ """Transfer function for an instruction.
228
+
229
+ Args:
230
+ inst: The instruction to process.
231
+ state: The input state.
232
+
233
+ Returns:
234
+ The output state after the instruction.
235
+ """
236
+ pass
237
+
238
+ @abstractmethod
239
+ def meet(self, states: list[U]) -> U:
240
+ """Meet operation for joining states.
241
+
242
+ Args:
243
+ states: States to join.
244
+
245
+ Returns:
246
+ The joined state.
247
+ """
248
+ pass
249
+
250
+ def analyze(self, function: MIRFunction) -> dict[BasicBlock, U]:
251
+ """Run the dataflow analysis on a function.
252
+
253
+ Args:
254
+ function: The function to analyze.
255
+
256
+ Returns:
257
+ Mapping from blocks to their computed states.
258
+ """
259
+ # Initialize all blocks with initial state
260
+ for block in function.cfg.blocks.values():
261
+ self.state[block] = self.initial_state()
262
+
263
+ # Set entry/exit state
264
+ if function.cfg.entry_block:
265
+ self.entry_state = self.initial_state()
266
+ self.state[function.cfg.entry_block] = self.entry_state
267
+
268
+ # Iterate until fixpoint
269
+ changed = True
270
+ iteration = 0
271
+ max_iterations = 100 # Prevent infinite loops
272
+
273
+ while changed and iteration < max_iterations:
274
+ changed = False
275
+ iteration += 1
276
+
277
+ # Process blocks in appropriate order
278
+ blocks = list(function.cfg.blocks.values())
279
+ if self.direction == Direction.BACKWARD:
280
+ blocks.reverse()
281
+
282
+ for block in blocks:
283
+ old_state = self.state[block]
284
+
285
+ # Compute input state from predecessors/successors
286
+ if self.direction == Direction.FORWARD:
287
+ pred_states = [self.state[pred] for pred in block.predecessors]
288
+ if pred_states:
289
+ input_state = self.meet(pred_states)
290
+ else:
291
+ input_state = self.initial_state()
292
+ else:
293
+ succ_states = [self.state[succ] for succ in block.successors]
294
+ if succ_states:
295
+ input_state = self.meet(succ_states)
296
+ else:
297
+ input_state = self.initial_state()
298
+
299
+ # Apply transfer function to all instructions
300
+ current_state = input_state
301
+ instructions = block.instructions
302
+ if self.direction == Direction.BACKWARD:
303
+ instructions = list(reversed(instructions))
304
+
305
+ for inst in instructions:
306
+ current_state = self.transfer(inst, current_state)
307
+
308
+ # Update block state
309
+ if current_state != old_state:
310
+ self.state[block] = current_state
311
+ changed = True
312
+
313
+ return self.state
314
+
315
+
316
+ class TypePropagation(DataFlowAnalysis[dict[MIRValue, TypeContext]]):
317
+ """Type propagation as a proper dataflow analysis.
318
+
319
+ This replaces the ad-hoc type propagation in TypeSpecificOptimization.
320
+ """
321
+
322
+ def initial_state(self) -> dict[MIRValue, TypeContext]:
323
+ """Get initial type state."""
324
+ return {}
325
+
326
+ def transfer(self, inst: MIRInstruction, state: dict[MIRValue, TypeContext]) -> dict[MIRValue, TypeContext]:
327
+ """Transfer function for type propagation.
328
+
329
+ Args:
330
+ inst: The instruction to process.
331
+ state: The input type state.
332
+
333
+ Returns:
334
+ The output type state.
335
+ """
336
+ new_state = state.copy()
337
+
338
+ # This would be extended with actual type propagation logic
339
+ # For now, just a placeholder
340
+ for def_val in inst.get_defs():
341
+ # Infer type from instruction
342
+ new_state[def_val] = TypeContext(MIRType.UNKNOWN)
343
+
344
+ return new_state
345
+
346
+ def meet(self, states: list[dict[MIRValue, TypeContext]]) -> dict[MIRValue, TypeContext]:
347
+ """Meet operation for type states.
348
+
349
+ Args:
350
+ states: Type states to join.
351
+
352
+ Returns:
353
+ The joined type state.
354
+ """
355
+ if not states:
356
+ return {}
357
+
358
+ result = states[0].copy()
359
+ for state in states[1:]:
360
+ # Merge type contexts
361
+ for value, ctx in state.items():
362
+ if value in result:
363
+ # Merge contexts - for now just keep first
364
+ # In reality would compute least upper bound
365
+ pass
366
+ else:
367
+ result[value] = ctx
368
+
369
+ return result
370
+
371
+
372
+ class RangeAnalysis(DataFlowAnalysis[dict[MIRValue, Range]]):
373
+ """Range analysis as a proper dataflow analysis.
374
+
375
+ This replaces the ad-hoc range tracking in TypeSpecificOptimization.
376
+ """
377
+
378
+ def initial_state(self) -> dict[MIRValue, Range]:
379
+ """Get initial range state."""
380
+ return {}
381
+
382
+ def transfer(self, inst: MIRInstruction, state: dict[MIRValue, Range]) -> dict[MIRValue, Range]:
383
+ """Transfer function for range analysis.
384
+
385
+ Args:
386
+ inst: The instruction to process.
387
+ state: The input range state.
388
+
389
+ Returns:
390
+ The output range state.
391
+ """
392
+ new_state = state.copy()
393
+
394
+ # This would be extended with actual range propagation logic
395
+ # For now, just a placeholder
396
+ from machine_dialect.mir.mir_instructions import BinaryOp, LoadConst
397
+
398
+ if isinstance(inst, LoadConst):
399
+ # Constant has exact range
400
+ if isinstance(inst.constant.value, int):
401
+ new_state[inst.dest] = Range(inst.constant.value, inst.constant.value)
402
+ elif isinstance(inst, BinaryOp):
403
+ # Compute range from operands
404
+ left_range = state.get(inst.left)
405
+ right_range = state.get(inst.right)
406
+
407
+ if left_range and right_range and inst.op == "+":
408
+ # Addition of ranges
409
+ if left_range.min is not None and right_range.min is not None:
410
+ new_min = left_range.min + right_range.min
411
+ else:
412
+ new_min = None
413
+
414
+ if left_range.max is not None and right_range.max is not None:
415
+ new_max = left_range.max + right_range.max
416
+ else:
417
+ new_max = None
418
+
419
+ new_state[inst.dest] = Range(new_min, new_max)
420
+
421
+ return new_state
422
+
423
+ def meet(self, states: list[dict[MIRValue, Range]]) -> dict[MIRValue, Range]:
424
+ """Meet operation for range states.
425
+
426
+ Args:
427
+ states: Range states to join.
428
+
429
+ Returns:
430
+ The joined range state.
431
+ """
432
+ if not states:
433
+ return {}
434
+
435
+ result = states[0].copy()
436
+ for state in states[1:]:
437
+ # Merge ranges
438
+ for value, range_val in state.items():
439
+ if value in result:
440
+ # Union of ranges
441
+ result[value] = result[value].union(range_val)
442
+ else:
443
+ result[value] = range_val
444
+
445
+ return result
@@ -0,0 +1,208 @@
1
+ """Debug information tracking for MIR compilation.
2
+
3
+ This module tracks source locations and variable information for debugging.
4
+ """
5
+
6
+ from dataclasses import dataclass
7
+ from typing import Any
8
+
9
+ from machine_dialect.mir.mir_instructions import MIRInstruction
10
+ from machine_dialect.mir.mir_values import Variable
11
+
12
+
13
+ @dataclass
14
+ class SourceLocation:
15
+ """Represents a source code location.
16
+
17
+ Attributes:
18
+ file: Source file name.
19
+ line: Line number (1-based).
20
+ column: Column number (1-based).
21
+ """
22
+
23
+ file: str
24
+ line: int
25
+ column: int
26
+
27
+ def __str__(self) -> str:
28
+ """Return string representation."""
29
+ return f"{self.file}:{self.line}:{self.column}"
30
+
31
+
32
+ @dataclass
33
+ class DebugVariable:
34
+ """Debug information for a variable.
35
+
36
+ Attributes:
37
+ name: Original variable name.
38
+ type_name: Type name as string.
39
+ scope_level: Scope nesting level.
40
+ is_parameter: Whether this is a function parameter.
41
+ """
42
+
43
+ name: str
44
+ type_name: str
45
+ scope_level: int = 0
46
+ is_parameter: bool = False
47
+
48
+
49
+ @dataclass
50
+ class LineMapping:
51
+ """Maps bytecode offset to source line.
52
+
53
+ Attributes:
54
+ bytecode_offset: Offset in bytecode.
55
+ source_line: Source line number.
56
+ """
57
+
58
+ bytecode_offset: int
59
+ source_line: int
60
+
61
+
62
+ class DebugInfo:
63
+ """Tracks debug information during compilation."""
64
+
65
+ def __init__(self) -> None:
66
+ """Initialize debug information tracker."""
67
+ # Map MIR instructions to source locations
68
+ self.instruction_locations: dict[MIRInstruction, SourceLocation] = {}
69
+
70
+ # Map variables to debug info
71
+ self.variable_info: dict[Variable, DebugVariable] = {}
72
+
73
+ # Line number mappings for bytecode
74
+ self.line_mappings: list[LineMapping] = []
75
+
76
+ # Current source file being compiled
77
+ self.current_file: str = "<unknown>"
78
+
79
+ # Symbol table for debugging
80
+ self.symbols: dict[str, DebugVariable] = {}
81
+
82
+ def set_instruction_location(self, inst: MIRInstruction, location: SourceLocation) -> None:
83
+ """Set source location for an instruction.
84
+
85
+ Args:
86
+ inst: The MIR instruction.
87
+ location: The source location.
88
+ """
89
+ self.instruction_locations[inst] = location
90
+
91
+ def get_instruction_location(self, inst: MIRInstruction) -> SourceLocation | None:
92
+ """Get source location for an instruction.
93
+
94
+ Args:
95
+ inst: The MIR instruction.
96
+
97
+ Returns:
98
+ The source location or None.
99
+ """
100
+ return self.instruction_locations.get(inst)
101
+
102
+ def add_variable(self, var: Variable, debug_var: DebugVariable) -> None:
103
+ """Add debug information for a variable.
104
+
105
+ Args:
106
+ var: The MIR variable.
107
+ debug_var: The debug information.
108
+ """
109
+ self.variable_info[var] = debug_var
110
+ self.symbols[debug_var.name] = debug_var
111
+
112
+ def add_line_mapping(self, mapping: LineMapping) -> None:
113
+ """Add a line number mapping.
114
+
115
+ Args:
116
+ mapping: The line mapping to add.
117
+ """
118
+ self.line_mappings.append(mapping)
119
+
120
+ def get_line_for_offset(self, offset: int) -> int | None:
121
+ """Get source line for bytecode offset.
122
+
123
+ Args:
124
+ offset: Bytecode offset.
125
+
126
+ Returns:
127
+ Source line number or None.
128
+ """
129
+ # Find the mapping with the largest offset <= given offset
130
+ best_line = None
131
+ for mapping in self.line_mappings:
132
+ if mapping.bytecode_offset <= offset:
133
+ best_line = mapping.source_line
134
+ else:
135
+ break
136
+ return best_line
137
+
138
+ def generate_source_map(self) -> dict[str, Any]:
139
+ """Generate a source map for debugging.
140
+
141
+ Returns:
142
+ Source map data structure.
143
+ """
144
+ return {
145
+ "version": 1,
146
+ "file": self.current_file,
147
+ "mappings": [
148
+ {"bytecode_offset": m.bytecode_offset, "source_line": m.source_line}
149
+ for m in sorted(self.line_mappings, key=lambda x: x.bytecode_offset)
150
+ ],
151
+ "symbols": {
152
+ name: {"type": var.type_name, "scope_level": var.scope_level, "is_parameter": var.is_parameter}
153
+ for name, var in self.symbols.items()
154
+ },
155
+ }
156
+
157
+
158
+ class DebugInfoBuilder:
159
+ """Builder for constructing debug information during lowering."""
160
+
161
+ def __init__(self) -> None:
162
+ """Initialize the debug info builder."""
163
+ self.debug_info = DebugInfo()
164
+ self.current_line = 1
165
+ self.scope_level = 0
166
+
167
+ def enter_scope(self) -> None:
168
+ """Enter a new scope."""
169
+ self.scope_level += 1
170
+
171
+ def exit_scope(self) -> None:
172
+ """Exit the current scope."""
173
+ if self.scope_level > 0:
174
+ self.scope_level -= 1
175
+
176
+ def track_variable(self, name: str, var: Variable, type_name: str, is_parameter: bool = False) -> None:
177
+ """Track a variable for debugging.
178
+
179
+ Args:
180
+ name: Original variable name.
181
+ var: The MIR variable.
182
+ type_name: Type name as string.
183
+ is_parameter: Whether this is a parameter.
184
+ """
185
+ debug_var = DebugVariable(
186
+ name=name, type_name=type_name, scope_level=self.scope_level, is_parameter=is_parameter
187
+ )
188
+ self.debug_info.add_variable(var, debug_var)
189
+
190
+ def track_instruction(self, inst: MIRInstruction, line: int, column: int = 1) -> None:
191
+ """Track an instruction's source location.
192
+
193
+ Args:
194
+ inst: The MIR instruction.
195
+ line: Source line number.
196
+ column: Source column number.
197
+ """
198
+ location = SourceLocation(file=self.debug_info.current_file, line=line, column=column)
199
+ self.debug_info.set_instruction_location(inst, location)
200
+ self.current_line = line
201
+
202
+ def get_debug_info(self) -> DebugInfo:
203
+ """Get the constructed debug information.
204
+
205
+ Returns:
206
+ The debug information.
207
+ """
208
+ return self.debug_info