machine-dialect 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. machine_dialect/__main__.py +667 -0
  2. machine_dialect/agent/__init__.py +5 -0
  3. machine_dialect/agent/agent.py +360 -0
  4. machine_dialect/ast/__init__.py +95 -0
  5. machine_dialect/ast/ast_node.py +35 -0
  6. machine_dialect/ast/call_expression.py +82 -0
  7. machine_dialect/ast/dict_extraction.py +60 -0
  8. machine_dialect/ast/expressions.py +439 -0
  9. machine_dialect/ast/literals.py +309 -0
  10. machine_dialect/ast/program.py +35 -0
  11. machine_dialect/ast/statements.py +1433 -0
  12. machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
  13. machine_dialect/ast/tests/test_boolean_literal.py +29 -0
  14. machine_dialect/ast/tests/test_collection_hir.py +138 -0
  15. machine_dialect/ast/tests/test_define_statement.py +142 -0
  16. machine_dialect/ast/tests/test_desugar.py +541 -0
  17. machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
  18. machine_dialect/cfg/__init__.py +6 -0
  19. machine_dialect/cfg/config.py +156 -0
  20. machine_dialect/cfg/examples.py +221 -0
  21. machine_dialect/cfg/generate_with_ai.py +187 -0
  22. machine_dialect/cfg/openai_generation.py +200 -0
  23. machine_dialect/cfg/parser.py +94 -0
  24. machine_dialect/cfg/tests/__init__.py +1 -0
  25. machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
  26. machine_dialect/cfg/tests/test_config.py +188 -0
  27. machine_dialect/cfg/tests/test_examples.py +391 -0
  28. machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
  29. machine_dialect/cfg/tests/test_openai_generation.py +256 -0
  30. machine_dialect/codegen/__init__.py +5 -0
  31. machine_dialect/codegen/bytecode_module.py +89 -0
  32. machine_dialect/codegen/bytecode_serializer.py +300 -0
  33. machine_dialect/codegen/opcodes.py +101 -0
  34. machine_dialect/codegen/register_codegen.py +1996 -0
  35. machine_dialect/codegen/symtab.py +208 -0
  36. machine_dialect/codegen/tests/__init__.py +1 -0
  37. machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
  38. machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
  39. machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
  40. machine_dialect/codegen/tests/test_symtab.py +418 -0
  41. machine_dialect/codegen/vm_serializer.py +621 -0
  42. machine_dialect/compiler/__init__.py +18 -0
  43. machine_dialect/compiler/compiler.py +197 -0
  44. machine_dialect/compiler/config.py +149 -0
  45. machine_dialect/compiler/context.py +149 -0
  46. machine_dialect/compiler/phases/__init__.py +19 -0
  47. machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
  48. machine_dialect/compiler/phases/codegen.py +40 -0
  49. machine_dialect/compiler/phases/hir_generation.py +39 -0
  50. machine_dialect/compiler/phases/mir_generation.py +86 -0
  51. machine_dialect/compiler/phases/optimization.py +110 -0
  52. machine_dialect/compiler/phases/parsing.py +39 -0
  53. machine_dialect/compiler/pipeline.py +143 -0
  54. machine_dialect/compiler/tests/__init__.py +1 -0
  55. machine_dialect/compiler/tests/test_compiler.py +568 -0
  56. machine_dialect/compiler/vm_runner.py +173 -0
  57. machine_dialect/errors/__init__.py +32 -0
  58. machine_dialect/errors/exceptions.py +369 -0
  59. machine_dialect/errors/messages.py +82 -0
  60. machine_dialect/errors/tests/__init__.py +0 -0
  61. machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
  62. machine_dialect/errors/tests/test_name_errors.py +118 -0
  63. machine_dialect/helpers/__init__.py +0 -0
  64. machine_dialect/helpers/stopwords.py +225 -0
  65. machine_dialect/helpers/validators.py +30 -0
  66. machine_dialect/lexer/__init__.py +9 -0
  67. machine_dialect/lexer/constants.py +23 -0
  68. machine_dialect/lexer/lexer.py +907 -0
  69. machine_dialect/lexer/tests/__init__.py +0 -0
  70. machine_dialect/lexer/tests/helpers.py +86 -0
  71. machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
  72. machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
  73. machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
  74. machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
  75. machine_dialect/lexer/tests/test_comments.py +200 -0
  76. machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
  77. machine_dialect/lexer/tests/test_lexer_position.py +113 -0
  78. machine_dialect/lexer/tests/test_list_tokens.py +282 -0
  79. machine_dialect/lexer/tests/test_stopwords.py +80 -0
  80. machine_dialect/lexer/tests/test_strict_equality.py +129 -0
  81. machine_dialect/lexer/tests/test_token.py +41 -0
  82. machine_dialect/lexer/tests/test_tokenization.py +294 -0
  83. machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
  84. machine_dialect/lexer/tests/test_url_literals.py +169 -0
  85. machine_dialect/lexer/tokens.py +487 -0
  86. machine_dialect/linter/__init__.py +10 -0
  87. machine_dialect/linter/__main__.py +144 -0
  88. machine_dialect/linter/linter.py +154 -0
  89. machine_dialect/linter/rules/__init__.py +8 -0
  90. machine_dialect/linter/rules/base.py +112 -0
  91. machine_dialect/linter/rules/statement_termination.py +99 -0
  92. machine_dialect/linter/tests/__init__.py +1 -0
  93. machine_dialect/linter/tests/mdrules/__init__.py +0 -0
  94. machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
  95. machine_dialect/linter/tests/test_linter.py +81 -0
  96. machine_dialect/linter/tests/test_rules.py +110 -0
  97. machine_dialect/linter/tests/test_violations.py +71 -0
  98. machine_dialect/linter/violations.py +51 -0
  99. machine_dialect/mir/__init__.py +69 -0
  100. machine_dialect/mir/analyses/__init__.py +20 -0
  101. machine_dialect/mir/analyses/alias_analysis.py +315 -0
  102. machine_dialect/mir/analyses/dominance_analysis.py +49 -0
  103. machine_dialect/mir/analyses/escape_analysis.py +286 -0
  104. machine_dialect/mir/analyses/loop_analysis.py +272 -0
  105. machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
  106. machine_dialect/mir/analyses/type_analysis.py +448 -0
  107. machine_dialect/mir/analyses/use_def_chains.py +232 -0
  108. machine_dialect/mir/basic_block.py +385 -0
  109. machine_dialect/mir/dataflow.py +445 -0
  110. machine_dialect/mir/debug_info.py +208 -0
  111. machine_dialect/mir/hir_to_mir.py +1738 -0
  112. machine_dialect/mir/mir_dumper.py +366 -0
  113. machine_dialect/mir/mir_function.py +167 -0
  114. machine_dialect/mir/mir_instructions.py +1877 -0
  115. machine_dialect/mir/mir_interpreter.py +556 -0
  116. machine_dialect/mir/mir_module.py +225 -0
  117. machine_dialect/mir/mir_printer.py +480 -0
  118. machine_dialect/mir/mir_transformer.py +410 -0
  119. machine_dialect/mir/mir_types.py +367 -0
  120. machine_dialect/mir/mir_validation.py +455 -0
  121. machine_dialect/mir/mir_values.py +268 -0
  122. machine_dialect/mir/optimization_config.py +233 -0
  123. machine_dialect/mir/optimization_pass.py +251 -0
  124. machine_dialect/mir/optimization_pipeline.py +355 -0
  125. machine_dialect/mir/optimizations/__init__.py +84 -0
  126. machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
  127. machine_dialect/mir/optimizations/branch_prediction.py +372 -0
  128. machine_dialect/mir/optimizations/constant_propagation.py +634 -0
  129. machine_dialect/mir/optimizations/cse.py +398 -0
  130. machine_dialect/mir/optimizations/dce.py +288 -0
  131. machine_dialect/mir/optimizations/inlining.py +551 -0
  132. machine_dialect/mir/optimizations/jump_threading.py +487 -0
  133. machine_dialect/mir/optimizations/licm.py +405 -0
  134. machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
  135. machine_dialect/mir/optimizations/strength_reduction.py +422 -0
  136. machine_dialect/mir/optimizations/tail_call.py +207 -0
  137. machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
  138. machine_dialect/mir/optimizations/type_narrowing.py +397 -0
  139. machine_dialect/mir/optimizations/type_specialization.py +447 -0
  140. machine_dialect/mir/optimizations/type_specific.py +906 -0
  141. machine_dialect/mir/optimize_mir.py +89 -0
  142. machine_dialect/mir/pass_manager.py +391 -0
  143. machine_dialect/mir/profiling/__init__.py +26 -0
  144. machine_dialect/mir/profiling/profile_collector.py +318 -0
  145. machine_dialect/mir/profiling/profile_data.py +372 -0
  146. machine_dialect/mir/profiling/profile_reader.py +272 -0
  147. machine_dialect/mir/profiling/profile_writer.py +226 -0
  148. machine_dialect/mir/register_allocation.py +302 -0
  149. machine_dialect/mir/reporting/__init__.py +17 -0
  150. machine_dialect/mir/reporting/optimization_reporter.py +314 -0
  151. machine_dialect/mir/reporting/report_formatter.py +289 -0
  152. machine_dialect/mir/ssa_construction.py +342 -0
  153. machine_dialect/mir/tests/__init__.py +1 -0
  154. machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
  155. machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
  156. machine_dialect/mir/tests/test_algebraic_division.py +126 -0
  157. machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
  158. machine_dialect/mir/tests/test_basic_block.py +425 -0
  159. machine_dialect/mir/tests/test_branch_prediction.py +459 -0
  160. machine_dialect/mir/tests/test_call_lowering.py +168 -0
  161. machine_dialect/mir/tests/test_collection_lowering.py +604 -0
  162. machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
  163. machine_dialect/mir/tests/test_custom_passes.py +166 -0
  164. machine_dialect/mir/tests/test_debug_info.py +285 -0
  165. machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
  166. machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
  167. machine_dialect/mir/tests/test_double_negation.py +231 -0
  168. machine_dialect/mir/tests/test_escape_analysis.py +233 -0
  169. machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
  170. machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
  171. machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
  172. machine_dialect/mir/tests/test_inlining.py +435 -0
  173. machine_dialect/mir/tests/test_licm.py +472 -0
  174. machine_dialect/mir/tests/test_mir_dumper.py +313 -0
  175. machine_dialect/mir/tests/test_mir_instructions.py +445 -0
  176. machine_dialect/mir/tests/test_mir_module.py +860 -0
  177. machine_dialect/mir/tests/test_mir_printer.py +387 -0
  178. machine_dialect/mir/tests/test_mir_types.py +123 -0
  179. machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
  180. machine_dialect/mir/tests/test_mir_validation.py +378 -0
  181. machine_dialect/mir/tests/test_mir_values.py +168 -0
  182. machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
  183. machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
  184. machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
  185. machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
  186. machine_dialect/mir/tests/test_pass_manager.py +294 -0
  187. machine_dialect/mir/tests/test_pass_registration.py +64 -0
  188. machine_dialect/mir/tests/test_profiling.py +356 -0
  189. machine_dialect/mir/tests/test_register_allocation.py +307 -0
  190. machine_dialect/mir/tests/test_report_formatters.py +372 -0
  191. machine_dialect/mir/tests/test_ssa_construction.py +433 -0
  192. machine_dialect/mir/tests/test_tail_call.py +236 -0
  193. machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
  194. machine_dialect/mir/tests/test_type_narrowing.py +277 -0
  195. machine_dialect/mir/tests/test_type_specialization.py +421 -0
  196. machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
  197. machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
  198. machine_dialect/mir/type_inference.py +368 -0
  199. machine_dialect/parser/__init__.py +12 -0
  200. machine_dialect/parser/enums.py +45 -0
  201. machine_dialect/parser/parser.py +3655 -0
  202. machine_dialect/parser/protocols.py +11 -0
  203. machine_dialect/parser/symbol_table.py +169 -0
  204. machine_dialect/parser/tests/__init__.py +0 -0
  205. machine_dialect/parser/tests/helper_functions.py +193 -0
  206. machine_dialect/parser/tests/test_action_statements.py +334 -0
  207. machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
  208. machine_dialect/parser/tests/test_call_statements.py +154 -0
  209. machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
  210. machine_dialect/parser/tests/test_collection_mutations.py +264 -0
  211. machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
  212. machine_dialect/parser/tests/test_define_integration.py +468 -0
  213. machine_dialect/parser/tests/test_define_statements.py +311 -0
  214. machine_dialect/parser/tests/test_dict_extraction.py +115 -0
  215. machine_dialect/parser/tests/test_empty_literal.py +155 -0
  216. machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
  217. machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
  218. machine_dialect/parser/tests/test_if_empty_block.py +61 -0
  219. machine_dialect/parser/tests/test_if_statements.py +299 -0
  220. machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
  221. machine_dialect/parser/tests/test_infix_expressions.py +680 -0
  222. machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
  223. machine_dialect/parser/tests/test_interaction_statements.py +269 -0
  224. machine_dialect/parser/tests/test_list_literals.py +277 -0
  225. machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
  226. machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
  227. machine_dialect/parser/tests/test_parse_errors.py +114 -0
  228. machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
  229. machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
  230. machine_dialect/parser/tests/test_program.py +13 -0
  231. machine_dialect/parser/tests/test_return_statements.py +89 -0
  232. machine_dialect/parser/tests/test_set_statements.py +152 -0
  233. machine_dialect/parser/tests/test_strict_equality.py +258 -0
  234. machine_dialect/parser/tests/test_symbol_table.py +217 -0
  235. machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
  236. machine_dialect/parser/tests/test_utility_statements.py +423 -0
  237. machine_dialect/parser/token_buffer.py +159 -0
  238. machine_dialect/repl/__init__.py +3 -0
  239. machine_dialect/repl/repl.py +426 -0
  240. machine_dialect/repl/tests/__init__.py +0 -0
  241. machine_dialect/repl/tests/test_repl.py +606 -0
  242. machine_dialect/semantic/__init__.py +12 -0
  243. machine_dialect/semantic/analyzer.py +906 -0
  244. machine_dialect/semantic/error_messages.py +189 -0
  245. machine_dialect/semantic/tests/__init__.py +1 -0
  246. machine_dialect/semantic/tests/test_analyzer.py +364 -0
  247. machine_dialect/semantic/tests/test_error_messages.py +104 -0
  248. machine_dialect/tests/edge_cases/__init__.py +10 -0
  249. machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
  250. machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
  251. machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
  252. machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
  253. machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
  254. machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
  255. machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
  256. machine_dialect/tests/integration/test_list_compilation.py +395 -0
  257. machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
  258. machine_dialect/type_checking/__init__.py +21 -0
  259. machine_dialect/type_checking/tests/__init__.py +1 -0
  260. machine_dialect/type_checking/tests/test_type_system.py +230 -0
  261. machine_dialect/type_checking/type_system.py +270 -0
  262. machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
  263. machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
  264. machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
  265. machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
  266. machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
  267. machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
  268. machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,487 @@
1
+ """Advanced jump threading optimizations for bytecode.
2
+
3
+ This module implements sophisticated jump threading optimizations that
4
+ follow chains of jumps and eliminate redundant control flow.
5
+ """
6
+ # mypy: ignore-errors
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+ from typing import Any
12
+
13
+ from machine_dialect.codegen.bytecode_module import Chunk
14
+ from machine_dialect.codegen.opcodes import Opcode
15
+ from machine_dialect.mir.mir_module import MIRModule
16
+ from machine_dialect.mir.optimization_pass import ModulePass, PassInfo, PassType, PreservationLevel
17
+
18
+
19
+ @dataclass
20
+ class BasicBlock:
21
+ """Represents a basic block in control flow graph.
22
+
23
+ Attributes:
24
+ start: Start index in bytecode.
25
+ end: End index in bytecode (exclusive).
26
+ successors: List of successor block indices.
27
+ predecessors: List of predecessor block indices.
28
+ is_dead: Whether this block is unreachable.
29
+ """
30
+
31
+ start: int
32
+ end: int
33
+ successors: list[int]
34
+ predecessors: list[int]
35
+ is_dead: bool = False
36
+
37
+
38
+ class JumpThreadingOptimizer:
39
+ """Performs advanced jump threading optimizations."""
40
+
41
+ def __init__(self) -> None:
42
+ """Initialize the optimizer."""
43
+ self.blocks: list[BasicBlock] = []
44
+ self.block_map: dict[int, int] = {} # bytecode index -> block index
45
+ self.jump_targets: dict[int, int] = {} # jump instruction -> target
46
+ self.stats = {
47
+ "jumps_threaded": 0,
48
+ "blocks_eliminated": 0,
49
+ "jumps_simplified": 0,
50
+ "blocks_merged": 0,
51
+ }
52
+
53
+ def optimize(self, chunk: Chunk) -> Chunk:
54
+ """Optimize jumps in a bytecode chunk.
55
+
56
+ Args:
57
+ chunk: The chunk to optimize.
58
+
59
+ Returns:
60
+ Optimized chunk.
61
+ """
62
+ bytecode = list(chunk.bytecode)
63
+
64
+ # Build control flow graph
65
+ self._build_cfg(bytecode)
66
+
67
+ # Apply optimizations
68
+ bytecode = self._thread_jumps(bytecode)
69
+ # TODO: Fix dead block elimination for register-based bytecode
70
+ # bytecode = self._eliminate_dead_blocks(bytecode)
71
+ # TODO: Fix block merging for register-based bytecode
72
+ # bytecode = self._merge_blocks(bytecode)
73
+ bytecode = self._simplify_conditional_jumps(bytecode, chunk.constants)
74
+
75
+ # Create optimized chunk
76
+ new_chunk = Chunk(
77
+ name=chunk.name,
78
+ chunk_type=chunk.chunk_type,
79
+ bytecode=bytearray(bytecode),
80
+ constants=chunk.constants,
81
+ num_locals=chunk.num_locals,
82
+ num_params=chunk.num_params,
83
+ )
84
+
85
+ return new_chunk
86
+
87
+ def _build_cfg(self, bytecode: list[int]) -> None:
88
+ """Build control flow graph from bytecode.
89
+
90
+ Args:
91
+ bytecode: The bytecode to analyze.
92
+ """
93
+ self.blocks = []
94
+ self.block_map = {}
95
+ self.jump_targets = {}
96
+
97
+ # First pass: identify jump targets and block boundaries
98
+ jump_targets = set()
99
+ i = 0
100
+ while i < len(bytecode):
101
+ opcode = bytecode[i]
102
+
103
+ if opcode == Opcode.JUMP_R:
104
+ # JumpR has 4-byte offset (i32)
105
+ if i + 4 < len(bytecode):
106
+ import struct
107
+
108
+ target_offset = struct.unpack("<i", bytes(bytecode[i + 1 : i + 5]))[0]
109
+ target = i + 5 + target_offset # Calculate absolute target
110
+ jump_targets.add(target)
111
+ self.jump_targets[i] = target
112
+ i += 5
113
+ elif opcode in [Opcode.JUMP_IF_R, Opcode.JUMP_IF_NOT_R]:
114
+ # JumpIfR/JumpIfNotR has 1-byte cond + 4-byte offset
115
+ if i + 5 < len(bytecode):
116
+ import struct
117
+
118
+ target_offset = struct.unpack("<i", bytes(bytecode[i + 2 : i + 6]))[0]
119
+ target = i + 6 + target_offset # Calculate absolute target
120
+ jump_targets.add(target)
121
+ self.jump_targets[i] = target
122
+ i += 6
123
+ elif opcode == Opcode.RETURN_R:
124
+ # Return ends a block
125
+ i += 2 if i + 1 < len(bytecode) and bytecode[i + 1] == 0 else 3
126
+ else:
127
+ i += self._get_instruction_size(opcode)
128
+
129
+ # Second pass: create basic blocks
130
+ block_start = 0
131
+ i = 0
132
+ while i < len(bytecode):
133
+ opcode = bytecode[i]
134
+
135
+ # Check if this is a jump target (start of new block)
136
+ if i in jump_targets and i != block_start:
137
+ # End current block
138
+ self.blocks.append(BasicBlock(block_start, i, [], []))
139
+ block_start = i
140
+
141
+ # Check if this instruction ends a block
142
+ is_terminator = opcode in [
143
+ Opcode.JUMP_R,
144
+ Opcode.JUMP_IF_R,
145
+ Opcode.JUMP_IF_NOT_R,
146
+ Opcode.RETURN_R,
147
+ ]
148
+
149
+ i += self._get_instruction_size(opcode)
150
+
151
+ if is_terminator and i < len(bytecode):
152
+ # End current block
153
+ self.blocks.append(BasicBlock(block_start, i, [], []))
154
+ block_start = i
155
+
156
+ # Add final block if needed
157
+ if block_start < len(bytecode):
158
+ self.blocks.append(BasicBlock(block_start, len(bytecode), [], []))
159
+
160
+ # Build block map
161
+ for idx, block in enumerate(self.blocks):
162
+ for pc in range(block.start, block.end):
163
+ self.block_map[pc] = idx
164
+
165
+ # Connect blocks (build successor/predecessor relationships)
166
+ for idx, block in enumerate(self.blocks):
167
+ if block.end > 0 and block.end - 3 >= block.start:
168
+ last_pc = block.end - 3
169
+ if last_pc in self.jump_targets:
170
+ target = self.jump_targets[last_pc]
171
+ if target in self.block_map:
172
+ target_block = self.block_map[target]
173
+ block.successors.append(target_block)
174
+ self.blocks[target_block].predecessors.append(idx)
175
+
176
+ # Check for fall-through
177
+ last_opcode: int | None = bytecode[last_pc] if last_pc < len(bytecode) else None
178
+ if last_opcode not in [Opcode.JUMP_R, Opcode.RETURN_R] and idx + 1 < len(self.blocks):
179
+ block.successors.append(idx + 1)
180
+ self.blocks[idx + 1].predecessors.append(idx)
181
+
182
+ def _thread_jumps(self, bytecode: list[int]) -> list[int]:
183
+ """Thread jumps through chains of unconditional jumps.
184
+
185
+ Args:
186
+ bytecode: The bytecode to optimize.
187
+
188
+ Returns:
189
+ Optimized bytecode.
190
+ """
191
+ result = bytecode.copy()
192
+ changed = True
193
+
194
+ while changed:
195
+ changed = False
196
+ i = 0
197
+ while i < len(result):
198
+ opcode = result[i]
199
+
200
+ if opcode == Opcode.JUMP_R:
201
+ if i + 4 < len(result):
202
+ import struct
203
+
204
+ target_offset = struct.unpack("<i", bytes(result[i + 1 : i + 5]))[0]
205
+ target = i + 5 + target_offset
206
+
207
+ # Follow chain of jumps
208
+ final_target = self._follow_jump_chain(result, target)
209
+
210
+ if final_target != target:
211
+ # Update jump target with new offset
212
+ new_offset = final_target - (i + 5)
213
+ result[i + 1 : i + 5] = struct.pack("<i", new_offset)
214
+ self.stats["jumps_threaded"] += 1
215
+ changed = True
216
+
217
+ i += 5
218
+ elif opcode in [Opcode.JUMP_IF_R, Opcode.JUMP_IF_NOT_R]:
219
+ if i + 5 < len(result):
220
+ import struct
221
+
222
+ target_offset = struct.unpack("<i", bytes(result[i + 2 : i + 6]))[0]
223
+ target = i + 6 + target_offset
224
+
225
+ # Follow chain of jumps
226
+ final_target = self._follow_jump_chain(result, target)
227
+
228
+ if final_target != target:
229
+ # Update jump target with new offset
230
+ new_offset = final_target - (i + 6)
231
+ result[i + 2 : i + 6] = struct.pack("<i", new_offset)
232
+ self.stats["jumps_threaded"] += 1
233
+ changed = True
234
+
235
+ i += 6
236
+ else:
237
+ i += self._get_instruction_size(opcode)
238
+
239
+ return result
240
+
241
+ def _follow_jump_chain(self, bytecode: list[int], target: int, max_depth: int = 10) -> int:
242
+ """Follow a chain of unconditional jumps to find final target.
243
+
244
+ Args:
245
+ bytecode: The bytecode.
246
+ target: Initial jump target.
247
+ max_depth: Maximum chain depth to follow.
248
+
249
+ Returns:
250
+ Final jump target.
251
+ """
252
+ visited = set()
253
+ current = target
254
+ depth = 0
255
+
256
+ while depth < max_depth and current not in visited and current < len(bytecode):
257
+ visited.add(current)
258
+
259
+ # Check if target is an unconditional jump
260
+ if current + 4 < len(bytecode) and bytecode[current] == Opcode.JUMP_R:
261
+ import struct
262
+
263
+ # Get next target offset
264
+ target_offset = struct.unpack("<i", bytes(bytecode[current + 1 : current + 5]))[0]
265
+ next_target = current + 5 + target_offset
266
+
267
+ # Check if we're jumping to the next instruction (can eliminate)
268
+ if target_offset == 0:
269
+ return current + 5
270
+
271
+ current = next_target
272
+ depth += 1
273
+ else:
274
+ break
275
+
276
+ return current
277
+
278
+ def _eliminate_dead_blocks(self, bytecode: list[int]) -> list[int]:
279
+ """Eliminate unreachable blocks.
280
+
281
+ Args:
282
+ bytecode: The bytecode.
283
+
284
+ Returns:
285
+ Bytecode with dead blocks replaced by NOPs.
286
+ """
287
+ # Mark reachable blocks (starting from block 0)
288
+ reachable = set()
289
+ worklist = [0]
290
+
291
+ while worklist:
292
+ block_idx = worklist.pop()
293
+ if block_idx in reachable or block_idx >= len(self.blocks):
294
+ continue
295
+
296
+ reachable.add(block_idx)
297
+ worklist.extend(self.blocks[block_idx].successors)
298
+
299
+ # Replace unreachable blocks with NOPs
300
+ result = bytecode.copy()
301
+ for idx, block in enumerate(self.blocks):
302
+ if idx not in reachable:
303
+ # Replace block with NOPs
304
+ for i in range(block.start, block.end):
305
+ result[i] = Opcode.NOP
306
+ self.stats["blocks_eliminated"] += 1
307
+ block.is_dead = True
308
+
309
+ return result
310
+
311
+ def _merge_blocks(self, bytecode: list[int]) -> list[int]:
312
+ """Merge blocks that can be combined.
313
+
314
+ Args:
315
+ bytecode: The bytecode.
316
+
317
+ Returns:
318
+ Optimized bytecode.
319
+ """
320
+ result = bytecode.copy()
321
+
322
+ for block in self.blocks:
323
+ if block.is_dead:
324
+ continue
325
+
326
+ # Check if this block has a single successor that has a single predecessor
327
+ if len(block.successors) == 1:
328
+ succ_idx = block.successors[0]
329
+ if succ_idx < len(self.blocks):
330
+ succ_block = self.blocks[succ_idx]
331
+ if len(succ_block.predecessors) == 1 and not succ_block.is_dead:
332
+ # Check if the blocks are adjacent and the first ends with a jump
333
+ if block.end == succ_block.start and block.end >= 5:
334
+ last_opcode = result[block.end - 5]
335
+ if last_opcode == Opcode.JUMP_R:
336
+ # Remove the jump
337
+ for i in range(block.end - 5, block.end):
338
+ result[i] = Opcode.NOP
339
+ self.stats["blocks_merged"] += 1
340
+
341
+ return result
342
+
343
+ def _simplify_conditional_jumps(self, bytecode: list[int], constants: list[Any]) -> list[int]:
344
+ """Simplify conditional jumps with constant conditions.
345
+
346
+ Args:
347
+ bytecode: The bytecode.
348
+ constants: Constant pool.
349
+
350
+ Returns:
351
+ Optimized bytecode.
352
+ """
353
+ # TODO: Implement the bytecode optimization
354
+ # For now, just return the bytecode as-is
355
+ # This optimization would require more complex analysis with register-based bytecode
356
+ # since we need to track which register contains constants
357
+ return bytecode
358
+
359
+ def _get_instruction_size(self, opcode: int) -> int:
360
+ """Get the size of an instruction including operands.
361
+
362
+ Args:
363
+ opcode: The opcode.
364
+
365
+ Returns:
366
+ Size in bytes.
367
+ """
368
+ # Control flow with offsets
369
+ if opcode == Opcode.JUMP_R:
370
+ return 5 # 1 opcode + 4 bytes (i32 offset)
371
+ if opcode in [Opcode.JUMP_IF_R, Opcode.JUMP_IF_NOT_R]:
372
+ return 6 # 1 opcode + 1 cond + 4 bytes (i32 offset)
373
+
374
+ # Instructions with register + u16 operand
375
+ if opcode in [
376
+ Opcode.LOAD_CONST_R,
377
+ Opcode.LOAD_GLOBAL_R,
378
+ Opcode.STORE_GLOBAL_R,
379
+ Opcode.DEFINE_R,
380
+ ]:
381
+ return 4 # 1 opcode + 1 register + 2 bytes (u16)
382
+
383
+ # Instructions with 3 registers
384
+ if opcode in [
385
+ Opcode.ADD_R,
386
+ Opcode.SUB_R,
387
+ Opcode.MUL_R,
388
+ Opcode.DIV_R,
389
+ Opcode.MOD_R,
390
+ Opcode.AND_R,
391
+ Opcode.OR_R,
392
+ Opcode.EQ_R,
393
+ Opcode.NEQ_R,
394
+ Opcode.LT_R,
395
+ Opcode.GT_R,
396
+ Opcode.LTE_R,
397
+ Opcode.GTE_R,
398
+ Opcode.CONCAT_STR_R,
399
+ Opcode.ARRAY_GET_R,
400
+ Opcode.ARRAY_SET_R,
401
+ ]:
402
+ return 4 # 1 opcode + 3 registers
403
+
404
+ # Instructions with 2 registers
405
+ if opcode in [
406
+ Opcode.MOVE_R,
407
+ Opcode.NEG_R,
408
+ Opcode.NOT_R,
409
+ Opcode.STR_LEN_R,
410
+ Opcode.NEW_ARRAY_R,
411
+ Opcode.ARRAY_LEN_R,
412
+ ]:
413
+ return 3 # 1 opcode + 2 registers
414
+
415
+ # RETURN_R special case
416
+ if opcode == Opcode.RETURN_R:
417
+ return 2 # minimum size (can be 2 or 3 depending on has_value)
418
+
419
+ # CALL_R special case
420
+ if opcode == Opcode.CALL_R:
421
+ return 4 # 1 opcode + func + dst + argc (minimum)
422
+
423
+ # Simple opcodes with no operands
424
+ if opcode in [Opcode.NOP, Opcode.BREAKPOINT]:
425
+ return 1
426
+
427
+ # Default for unknown/simple opcodes
428
+ return 1
429
+
430
+ def get_stats(self) -> dict[str, int]:
431
+ """Get optimization statistics.
432
+
433
+ Returns:
434
+ Dictionary of statistics.
435
+ """
436
+ return self.stats
437
+
438
+
439
+ class JumpThreadingPass(ModulePass):
440
+ """Jump threading optimization pass wrapper for MIR Pass interface."""
441
+
442
+ def __init__(self) -> None:
443
+ """Initialize the pass."""
444
+ super().__init__()
445
+ self.optimizer = JumpThreadingOptimizer()
446
+
447
+ def get_info(self) -> PassInfo:
448
+ """Get pass information.
449
+
450
+ Returns:
451
+ Pass information.
452
+ """
453
+ return PassInfo(
454
+ name="jump-threading",
455
+ description="Thread jumps and eliminate redundant control flow",
456
+ pass_type=PassType.OPTIMIZATION,
457
+ requires=[],
458
+ preserves=PreservationLevel.NONE,
459
+ )
460
+
461
+ def run_on_module(self, module: MIRModule) -> bool:
462
+ """Run jump threading on a module.
463
+
464
+ Note: This is a bytecode-level optimization, not MIR-level.
465
+ It would typically run after MIR->bytecode generation.
466
+
467
+ Args:
468
+ module: The module to optimize.
469
+
470
+ Returns:
471
+ False as this is a bytecode-level optimization.
472
+ """
473
+ # This pass operates on bytecode, not MIR
474
+ # It's here for compatibility with the pass manager
475
+ return False
476
+
477
+ def finalize(self) -> None:
478
+ """Finalize the pass."""
479
+ pass
480
+
481
+ def get_statistics(self) -> dict[str, int]:
482
+ """Get optimization statistics.
483
+
484
+ Returns:
485
+ Dictionary of statistics.
486
+ """
487
+ return self.optimizer.get_stats().copy()