machine-dialect 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- machine_dialect/__main__.py +667 -0
- machine_dialect/agent/__init__.py +5 -0
- machine_dialect/agent/agent.py +360 -0
- machine_dialect/ast/__init__.py +95 -0
- machine_dialect/ast/ast_node.py +35 -0
- machine_dialect/ast/call_expression.py +82 -0
- machine_dialect/ast/dict_extraction.py +60 -0
- machine_dialect/ast/expressions.py +439 -0
- machine_dialect/ast/literals.py +309 -0
- machine_dialect/ast/program.py +35 -0
- machine_dialect/ast/statements.py +1433 -0
- machine_dialect/ast/tests/test_ast_string_representation.py +62 -0
- machine_dialect/ast/tests/test_boolean_literal.py +29 -0
- machine_dialect/ast/tests/test_collection_hir.py +138 -0
- machine_dialect/ast/tests/test_define_statement.py +142 -0
- machine_dialect/ast/tests/test_desugar.py +541 -0
- machine_dialect/ast/tests/test_foreach_desugar.py +245 -0
- machine_dialect/cfg/__init__.py +6 -0
- machine_dialect/cfg/config.py +156 -0
- machine_dialect/cfg/examples.py +221 -0
- machine_dialect/cfg/generate_with_ai.py +187 -0
- machine_dialect/cfg/openai_generation.py +200 -0
- machine_dialect/cfg/parser.py +94 -0
- machine_dialect/cfg/tests/__init__.py +1 -0
- machine_dialect/cfg/tests/test_cfg_parser.py +252 -0
- machine_dialect/cfg/tests/test_config.py +188 -0
- machine_dialect/cfg/tests/test_examples.py +391 -0
- machine_dialect/cfg/tests/test_generate_with_ai.py +354 -0
- machine_dialect/cfg/tests/test_openai_generation.py +256 -0
- machine_dialect/codegen/__init__.py +5 -0
- machine_dialect/codegen/bytecode_module.py +89 -0
- machine_dialect/codegen/bytecode_serializer.py +300 -0
- machine_dialect/codegen/opcodes.py +101 -0
- machine_dialect/codegen/register_codegen.py +1996 -0
- machine_dialect/codegen/symtab.py +208 -0
- machine_dialect/codegen/tests/__init__.py +1 -0
- machine_dialect/codegen/tests/test_array_operations_codegen.py +295 -0
- machine_dialect/codegen/tests/test_bytecode_serializer.py +185 -0
- machine_dialect/codegen/tests/test_register_codegen_ssa.py +324 -0
- machine_dialect/codegen/tests/test_symtab.py +418 -0
- machine_dialect/codegen/vm_serializer.py +621 -0
- machine_dialect/compiler/__init__.py +18 -0
- machine_dialect/compiler/compiler.py +197 -0
- machine_dialect/compiler/config.py +149 -0
- machine_dialect/compiler/context.py +149 -0
- machine_dialect/compiler/phases/__init__.py +19 -0
- machine_dialect/compiler/phases/bytecode_optimization.py +90 -0
- machine_dialect/compiler/phases/codegen.py +40 -0
- machine_dialect/compiler/phases/hir_generation.py +39 -0
- machine_dialect/compiler/phases/mir_generation.py +86 -0
- machine_dialect/compiler/phases/optimization.py +110 -0
- machine_dialect/compiler/phases/parsing.py +39 -0
- machine_dialect/compiler/pipeline.py +143 -0
- machine_dialect/compiler/tests/__init__.py +1 -0
- machine_dialect/compiler/tests/test_compiler.py +568 -0
- machine_dialect/compiler/vm_runner.py +173 -0
- machine_dialect/errors/__init__.py +32 -0
- machine_dialect/errors/exceptions.py +369 -0
- machine_dialect/errors/messages.py +82 -0
- machine_dialect/errors/tests/__init__.py +0 -0
- machine_dialect/errors/tests/test_expected_token_errors.py +188 -0
- machine_dialect/errors/tests/test_name_errors.py +118 -0
- machine_dialect/helpers/__init__.py +0 -0
- machine_dialect/helpers/stopwords.py +225 -0
- machine_dialect/helpers/validators.py +30 -0
- machine_dialect/lexer/__init__.py +9 -0
- machine_dialect/lexer/constants.py +23 -0
- machine_dialect/lexer/lexer.py +907 -0
- machine_dialect/lexer/tests/__init__.py +0 -0
- machine_dialect/lexer/tests/helpers.py +86 -0
- machine_dialect/lexer/tests/test_apostrophe_identifiers.py +122 -0
- machine_dialect/lexer/tests/test_backtick_identifiers.py +140 -0
- machine_dialect/lexer/tests/test_boolean_literals.py +108 -0
- machine_dialect/lexer/tests/test_case_insensitive_keywords.py +188 -0
- machine_dialect/lexer/tests/test_comments.py +200 -0
- machine_dialect/lexer/tests/test_double_asterisk_keywords.py +127 -0
- machine_dialect/lexer/tests/test_lexer_position.py +113 -0
- machine_dialect/lexer/tests/test_list_tokens.py +282 -0
- machine_dialect/lexer/tests/test_stopwords.py +80 -0
- machine_dialect/lexer/tests/test_strict_equality.py +129 -0
- machine_dialect/lexer/tests/test_token.py +41 -0
- machine_dialect/lexer/tests/test_tokenization.py +294 -0
- machine_dialect/lexer/tests/test_underscore_literals.py +343 -0
- machine_dialect/lexer/tests/test_url_literals.py +169 -0
- machine_dialect/lexer/tokens.py +487 -0
- machine_dialect/linter/__init__.py +10 -0
- machine_dialect/linter/__main__.py +144 -0
- machine_dialect/linter/linter.py +154 -0
- machine_dialect/linter/rules/__init__.py +8 -0
- machine_dialect/linter/rules/base.py +112 -0
- machine_dialect/linter/rules/statement_termination.py +99 -0
- machine_dialect/linter/tests/__init__.py +1 -0
- machine_dialect/linter/tests/mdrules/__init__.py +0 -0
- machine_dialect/linter/tests/mdrules/test_md101_statement_termination.py +181 -0
- machine_dialect/linter/tests/test_linter.py +81 -0
- machine_dialect/linter/tests/test_rules.py +110 -0
- machine_dialect/linter/tests/test_violations.py +71 -0
- machine_dialect/linter/violations.py +51 -0
- machine_dialect/mir/__init__.py +69 -0
- machine_dialect/mir/analyses/__init__.py +20 -0
- machine_dialect/mir/analyses/alias_analysis.py +315 -0
- machine_dialect/mir/analyses/dominance_analysis.py +49 -0
- machine_dialect/mir/analyses/escape_analysis.py +286 -0
- machine_dialect/mir/analyses/loop_analysis.py +272 -0
- machine_dialect/mir/analyses/tests/test_type_analysis.py +736 -0
- machine_dialect/mir/analyses/type_analysis.py +448 -0
- machine_dialect/mir/analyses/use_def_chains.py +232 -0
- machine_dialect/mir/basic_block.py +385 -0
- machine_dialect/mir/dataflow.py +445 -0
- machine_dialect/mir/debug_info.py +208 -0
- machine_dialect/mir/hir_to_mir.py +1738 -0
- machine_dialect/mir/mir_dumper.py +366 -0
- machine_dialect/mir/mir_function.py +167 -0
- machine_dialect/mir/mir_instructions.py +1877 -0
- machine_dialect/mir/mir_interpreter.py +556 -0
- machine_dialect/mir/mir_module.py +225 -0
- machine_dialect/mir/mir_printer.py +480 -0
- machine_dialect/mir/mir_transformer.py +410 -0
- machine_dialect/mir/mir_types.py +367 -0
- machine_dialect/mir/mir_validation.py +455 -0
- machine_dialect/mir/mir_values.py +268 -0
- machine_dialect/mir/optimization_config.py +233 -0
- machine_dialect/mir/optimization_pass.py +251 -0
- machine_dialect/mir/optimization_pipeline.py +355 -0
- machine_dialect/mir/optimizations/__init__.py +84 -0
- machine_dialect/mir/optimizations/algebraic_simplification.py +733 -0
- machine_dialect/mir/optimizations/branch_prediction.py +372 -0
- machine_dialect/mir/optimizations/constant_propagation.py +634 -0
- machine_dialect/mir/optimizations/cse.py +398 -0
- machine_dialect/mir/optimizations/dce.py +288 -0
- machine_dialect/mir/optimizations/inlining.py +551 -0
- machine_dialect/mir/optimizations/jump_threading.py +487 -0
- machine_dialect/mir/optimizations/licm.py +405 -0
- machine_dialect/mir/optimizations/loop_unrolling.py +366 -0
- machine_dialect/mir/optimizations/strength_reduction.py +422 -0
- machine_dialect/mir/optimizations/tail_call.py +207 -0
- machine_dialect/mir/optimizations/tests/test_loop_unrolling.py +483 -0
- machine_dialect/mir/optimizations/type_narrowing.py +397 -0
- machine_dialect/mir/optimizations/type_specialization.py +447 -0
- machine_dialect/mir/optimizations/type_specific.py +906 -0
- machine_dialect/mir/optimize_mir.py +89 -0
- machine_dialect/mir/pass_manager.py +391 -0
- machine_dialect/mir/profiling/__init__.py +26 -0
- machine_dialect/mir/profiling/profile_collector.py +318 -0
- machine_dialect/mir/profiling/profile_data.py +372 -0
- machine_dialect/mir/profiling/profile_reader.py +272 -0
- machine_dialect/mir/profiling/profile_writer.py +226 -0
- machine_dialect/mir/register_allocation.py +302 -0
- machine_dialect/mir/reporting/__init__.py +17 -0
- machine_dialect/mir/reporting/optimization_reporter.py +314 -0
- machine_dialect/mir/reporting/report_formatter.py +289 -0
- machine_dialect/mir/ssa_construction.py +342 -0
- machine_dialect/mir/tests/__init__.py +1 -0
- machine_dialect/mir/tests/test_algebraic_associativity.py +204 -0
- machine_dialect/mir/tests/test_algebraic_complex_patterns.py +221 -0
- machine_dialect/mir/tests/test_algebraic_division.py +126 -0
- machine_dialect/mir/tests/test_algebraic_simplification.py +863 -0
- machine_dialect/mir/tests/test_basic_block.py +425 -0
- machine_dialect/mir/tests/test_branch_prediction.py +459 -0
- machine_dialect/mir/tests/test_call_lowering.py +168 -0
- machine_dialect/mir/tests/test_collection_lowering.py +604 -0
- machine_dialect/mir/tests/test_cross_block_constant_propagation.py +255 -0
- machine_dialect/mir/tests/test_custom_passes.py +166 -0
- machine_dialect/mir/tests/test_debug_info.py +285 -0
- machine_dialect/mir/tests/test_dict_extraction_lowering.py +192 -0
- machine_dialect/mir/tests/test_dictionary_lowering.py +299 -0
- machine_dialect/mir/tests/test_double_negation.py +231 -0
- machine_dialect/mir/tests/test_escape_analysis.py +233 -0
- machine_dialect/mir/tests/test_hir_to_mir.py +465 -0
- machine_dialect/mir/tests/test_hir_to_mir_complete.py +389 -0
- machine_dialect/mir/tests/test_hir_to_mir_simple.py +130 -0
- machine_dialect/mir/tests/test_inlining.py +435 -0
- machine_dialect/mir/tests/test_licm.py +472 -0
- machine_dialect/mir/tests/test_mir_dumper.py +313 -0
- machine_dialect/mir/tests/test_mir_instructions.py +445 -0
- machine_dialect/mir/tests/test_mir_module.py +860 -0
- machine_dialect/mir/tests/test_mir_printer.py +387 -0
- machine_dialect/mir/tests/test_mir_types.py +123 -0
- machine_dialect/mir/tests/test_mir_types_enhanced.py +132 -0
- machine_dialect/mir/tests/test_mir_validation.py +378 -0
- machine_dialect/mir/tests/test_mir_values.py +168 -0
- machine_dialect/mir/tests/test_one_based_indexing.py +202 -0
- machine_dialect/mir/tests/test_optimization_helpers.py +60 -0
- machine_dialect/mir/tests/test_optimization_pipeline.py +554 -0
- machine_dialect/mir/tests/test_optimization_reporter.py +318 -0
- machine_dialect/mir/tests/test_pass_manager.py +294 -0
- machine_dialect/mir/tests/test_pass_registration.py +64 -0
- machine_dialect/mir/tests/test_profiling.py +356 -0
- machine_dialect/mir/tests/test_register_allocation.py +307 -0
- machine_dialect/mir/tests/test_report_formatters.py +372 -0
- machine_dialect/mir/tests/test_ssa_construction.py +433 -0
- machine_dialect/mir/tests/test_tail_call.py +236 -0
- machine_dialect/mir/tests/test_type_annotated_instructions.py +192 -0
- machine_dialect/mir/tests/test_type_narrowing.py +277 -0
- machine_dialect/mir/tests/test_type_specialization.py +421 -0
- machine_dialect/mir/tests/test_type_specific_optimization.py +545 -0
- machine_dialect/mir/tests/test_type_specific_optimization_advanced.py +382 -0
- machine_dialect/mir/type_inference.py +368 -0
- machine_dialect/parser/__init__.py +12 -0
- machine_dialect/parser/enums.py +45 -0
- machine_dialect/parser/parser.py +3655 -0
- machine_dialect/parser/protocols.py +11 -0
- machine_dialect/parser/symbol_table.py +169 -0
- machine_dialect/parser/tests/__init__.py +0 -0
- machine_dialect/parser/tests/helper_functions.py +193 -0
- machine_dialect/parser/tests/test_action_statements.py +334 -0
- machine_dialect/parser/tests/test_boolean_literal_expressions.py +152 -0
- machine_dialect/parser/tests/test_call_statements.py +154 -0
- machine_dialect/parser/tests/test_call_statements_errors.py +187 -0
- machine_dialect/parser/tests/test_collection_mutations.py +264 -0
- machine_dialect/parser/tests/test_conditional_expressions.py +343 -0
- machine_dialect/parser/tests/test_define_integration.py +468 -0
- machine_dialect/parser/tests/test_define_statements.py +311 -0
- machine_dialect/parser/tests/test_dict_extraction.py +115 -0
- machine_dialect/parser/tests/test_empty_literal.py +155 -0
- machine_dialect/parser/tests/test_float_literal_expressions.py +163 -0
- machine_dialect/parser/tests/test_identifier_expressions.py +57 -0
- machine_dialect/parser/tests/test_if_empty_block.py +61 -0
- machine_dialect/parser/tests/test_if_statements.py +299 -0
- machine_dialect/parser/tests/test_illegal_tokens.py +86 -0
- machine_dialect/parser/tests/test_infix_expressions.py +680 -0
- machine_dialect/parser/tests/test_integer_literal_expressions.py +137 -0
- machine_dialect/parser/tests/test_interaction_statements.py +269 -0
- machine_dialect/parser/tests/test_list_literals.py +277 -0
- machine_dialect/parser/tests/test_no_none_in_ast.py +94 -0
- machine_dialect/parser/tests/test_panic_mode_recovery.py +171 -0
- machine_dialect/parser/tests/test_parse_errors.py +114 -0
- machine_dialect/parser/tests/test_possessive_syntax.py +182 -0
- machine_dialect/parser/tests/test_prefix_expressions.py +415 -0
- machine_dialect/parser/tests/test_program.py +13 -0
- machine_dialect/parser/tests/test_return_statements.py +89 -0
- machine_dialect/parser/tests/test_set_statements.py +152 -0
- machine_dialect/parser/tests/test_strict_equality.py +258 -0
- machine_dialect/parser/tests/test_symbol_table.py +217 -0
- machine_dialect/parser/tests/test_url_literal_expressions.py +209 -0
- machine_dialect/parser/tests/test_utility_statements.py +423 -0
- machine_dialect/parser/token_buffer.py +159 -0
- machine_dialect/repl/__init__.py +3 -0
- machine_dialect/repl/repl.py +426 -0
- machine_dialect/repl/tests/__init__.py +0 -0
- machine_dialect/repl/tests/test_repl.py +606 -0
- machine_dialect/semantic/__init__.py +12 -0
- machine_dialect/semantic/analyzer.py +906 -0
- machine_dialect/semantic/error_messages.py +189 -0
- machine_dialect/semantic/tests/__init__.py +1 -0
- machine_dialect/semantic/tests/test_analyzer.py +364 -0
- machine_dialect/semantic/tests/test_error_messages.py +104 -0
- machine_dialect/tests/edge_cases/__init__.py +10 -0
- machine_dialect/tests/edge_cases/test_boundary_access.py +256 -0
- machine_dialect/tests/edge_cases/test_empty_collections.py +166 -0
- machine_dialect/tests/edge_cases/test_invalid_operations.py +243 -0
- machine_dialect/tests/edge_cases/test_named_list_edge_cases.py +295 -0
- machine_dialect/tests/edge_cases/test_nested_structures.py +313 -0
- machine_dialect/tests/edge_cases/test_type_mixing.py +277 -0
- machine_dialect/tests/integration/test_array_operations_emulation.py +248 -0
- machine_dialect/tests/integration/test_list_compilation.py +395 -0
- machine_dialect/tests/integration/test_lists_and_dictionaries.py +322 -0
- machine_dialect/type_checking/__init__.py +21 -0
- machine_dialect/type_checking/tests/__init__.py +1 -0
- machine_dialect/type_checking/tests/test_type_system.py +230 -0
- machine_dialect/type_checking/type_system.py +270 -0
- machine_dialect-0.1.0a1.dist-info/METADATA +128 -0
- machine_dialect-0.1.0a1.dist-info/RECORD +268 -0
- machine_dialect-0.1.0a1.dist-info/WHEEL +5 -0
- machine_dialect-0.1.0a1.dist-info/entry_points.txt +3 -0
- machine_dialect-0.1.0a1.dist-info/licenses/LICENSE +201 -0
- machine_dialect-0.1.0a1.dist-info/top_level.txt +2 -0
- machine_dialect_vm/__init__.pyi +15 -0
@@ -0,0 +1,487 @@
|
|
1
|
+
"""Advanced jump threading optimizations for bytecode.
|
2
|
+
|
3
|
+
This module implements sophisticated jump threading optimizations that
|
4
|
+
follow chains of jumps and eliminate redundant control flow.
|
5
|
+
"""
|
6
|
+
# mypy: ignore-errors
|
7
|
+
|
8
|
+
from __future__ import annotations
|
9
|
+
|
10
|
+
from dataclasses import dataclass
|
11
|
+
from typing import Any
|
12
|
+
|
13
|
+
from machine_dialect.codegen.bytecode_module import Chunk
|
14
|
+
from machine_dialect.codegen.opcodes import Opcode
|
15
|
+
from machine_dialect.mir.mir_module import MIRModule
|
16
|
+
from machine_dialect.mir.optimization_pass import ModulePass, PassInfo, PassType, PreservationLevel
|
17
|
+
|
18
|
+
|
19
|
+
@dataclass
|
20
|
+
class BasicBlock:
|
21
|
+
"""Represents a basic block in control flow graph.
|
22
|
+
|
23
|
+
Attributes:
|
24
|
+
start: Start index in bytecode.
|
25
|
+
end: End index in bytecode (exclusive).
|
26
|
+
successors: List of successor block indices.
|
27
|
+
predecessors: List of predecessor block indices.
|
28
|
+
is_dead: Whether this block is unreachable.
|
29
|
+
"""
|
30
|
+
|
31
|
+
start: int
|
32
|
+
end: int
|
33
|
+
successors: list[int]
|
34
|
+
predecessors: list[int]
|
35
|
+
is_dead: bool = False
|
36
|
+
|
37
|
+
|
38
|
+
class JumpThreadingOptimizer:
|
39
|
+
"""Performs advanced jump threading optimizations."""
|
40
|
+
|
41
|
+
def __init__(self) -> None:
|
42
|
+
"""Initialize the optimizer."""
|
43
|
+
self.blocks: list[BasicBlock] = []
|
44
|
+
self.block_map: dict[int, int] = {} # bytecode index -> block index
|
45
|
+
self.jump_targets: dict[int, int] = {} # jump instruction -> target
|
46
|
+
self.stats = {
|
47
|
+
"jumps_threaded": 0,
|
48
|
+
"blocks_eliminated": 0,
|
49
|
+
"jumps_simplified": 0,
|
50
|
+
"blocks_merged": 0,
|
51
|
+
}
|
52
|
+
|
53
|
+
def optimize(self, chunk: Chunk) -> Chunk:
|
54
|
+
"""Optimize jumps in a bytecode chunk.
|
55
|
+
|
56
|
+
Args:
|
57
|
+
chunk: The chunk to optimize.
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
Optimized chunk.
|
61
|
+
"""
|
62
|
+
bytecode = list(chunk.bytecode)
|
63
|
+
|
64
|
+
# Build control flow graph
|
65
|
+
self._build_cfg(bytecode)
|
66
|
+
|
67
|
+
# Apply optimizations
|
68
|
+
bytecode = self._thread_jumps(bytecode)
|
69
|
+
# TODO: Fix dead block elimination for register-based bytecode
|
70
|
+
# bytecode = self._eliminate_dead_blocks(bytecode)
|
71
|
+
# TODO: Fix block merging for register-based bytecode
|
72
|
+
# bytecode = self._merge_blocks(bytecode)
|
73
|
+
bytecode = self._simplify_conditional_jumps(bytecode, chunk.constants)
|
74
|
+
|
75
|
+
# Create optimized chunk
|
76
|
+
new_chunk = Chunk(
|
77
|
+
name=chunk.name,
|
78
|
+
chunk_type=chunk.chunk_type,
|
79
|
+
bytecode=bytearray(bytecode),
|
80
|
+
constants=chunk.constants,
|
81
|
+
num_locals=chunk.num_locals,
|
82
|
+
num_params=chunk.num_params,
|
83
|
+
)
|
84
|
+
|
85
|
+
return new_chunk
|
86
|
+
|
87
|
+
def _build_cfg(self, bytecode: list[int]) -> None:
|
88
|
+
"""Build control flow graph from bytecode.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
bytecode: The bytecode to analyze.
|
92
|
+
"""
|
93
|
+
self.blocks = []
|
94
|
+
self.block_map = {}
|
95
|
+
self.jump_targets = {}
|
96
|
+
|
97
|
+
# First pass: identify jump targets and block boundaries
|
98
|
+
jump_targets = set()
|
99
|
+
i = 0
|
100
|
+
while i < len(bytecode):
|
101
|
+
opcode = bytecode[i]
|
102
|
+
|
103
|
+
if opcode == Opcode.JUMP_R:
|
104
|
+
# JumpR has 4-byte offset (i32)
|
105
|
+
if i + 4 < len(bytecode):
|
106
|
+
import struct
|
107
|
+
|
108
|
+
target_offset = struct.unpack("<i", bytes(bytecode[i + 1 : i + 5]))[0]
|
109
|
+
target = i + 5 + target_offset # Calculate absolute target
|
110
|
+
jump_targets.add(target)
|
111
|
+
self.jump_targets[i] = target
|
112
|
+
i += 5
|
113
|
+
elif opcode in [Opcode.JUMP_IF_R, Opcode.JUMP_IF_NOT_R]:
|
114
|
+
# JumpIfR/JumpIfNotR has 1-byte cond + 4-byte offset
|
115
|
+
if i + 5 < len(bytecode):
|
116
|
+
import struct
|
117
|
+
|
118
|
+
target_offset = struct.unpack("<i", bytes(bytecode[i + 2 : i + 6]))[0]
|
119
|
+
target = i + 6 + target_offset # Calculate absolute target
|
120
|
+
jump_targets.add(target)
|
121
|
+
self.jump_targets[i] = target
|
122
|
+
i += 6
|
123
|
+
elif opcode == Opcode.RETURN_R:
|
124
|
+
# Return ends a block
|
125
|
+
i += 2 if i + 1 < len(bytecode) and bytecode[i + 1] == 0 else 3
|
126
|
+
else:
|
127
|
+
i += self._get_instruction_size(opcode)
|
128
|
+
|
129
|
+
# Second pass: create basic blocks
|
130
|
+
block_start = 0
|
131
|
+
i = 0
|
132
|
+
while i < len(bytecode):
|
133
|
+
opcode = bytecode[i]
|
134
|
+
|
135
|
+
# Check if this is a jump target (start of new block)
|
136
|
+
if i in jump_targets and i != block_start:
|
137
|
+
# End current block
|
138
|
+
self.blocks.append(BasicBlock(block_start, i, [], []))
|
139
|
+
block_start = i
|
140
|
+
|
141
|
+
# Check if this instruction ends a block
|
142
|
+
is_terminator = opcode in [
|
143
|
+
Opcode.JUMP_R,
|
144
|
+
Opcode.JUMP_IF_R,
|
145
|
+
Opcode.JUMP_IF_NOT_R,
|
146
|
+
Opcode.RETURN_R,
|
147
|
+
]
|
148
|
+
|
149
|
+
i += self._get_instruction_size(opcode)
|
150
|
+
|
151
|
+
if is_terminator and i < len(bytecode):
|
152
|
+
# End current block
|
153
|
+
self.blocks.append(BasicBlock(block_start, i, [], []))
|
154
|
+
block_start = i
|
155
|
+
|
156
|
+
# Add final block if needed
|
157
|
+
if block_start < len(bytecode):
|
158
|
+
self.blocks.append(BasicBlock(block_start, len(bytecode), [], []))
|
159
|
+
|
160
|
+
# Build block map
|
161
|
+
for idx, block in enumerate(self.blocks):
|
162
|
+
for pc in range(block.start, block.end):
|
163
|
+
self.block_map[pc] = idx
|
164
|
+
|
165
|
+
# Connect blocks (build successor/predecessor relationships)
|
166
|
+
for idx, block in enumerate(self.blocks):
|
167
|
+
if block.end > 0 and block.end - 3 >= block.start:
|
168
|
+
last_pc = block.end - 3
|
169
|
+
if last_pc in self.jump_targets:
|
170
|
+
target = self.jump_targets[last_pc]
|
171
|
+
if target in self.block_map:
|
172
|
+
target_block = self.block_map[target]
|
173
|
+
block.successors.append(target_block)
|
174
|
+
self.blocks[target_block].predecessors.append(idx)
|
175
|
+
|
176
|
+
# Check for fall-through
|
177
|
+
last_opcode: int | None = bytecode[last_pc] if last_pc < len(bytecode) else None
|
178
|
+
if last_opcode not in [Opcode.JUMP_R, Opcode.RETURN_R] and idx + 1 < len(self.blocks):
|
179
|
+
block.successors.append(idx + 1)
|
180
|
+
self.blocks[idx + 1].predecessors.append(idx)
|
181
|
+
|
182
|
+
def _thread_jumps(self, bytecode: list[int]) -> list[int]:
|
183
|
+
"""Thread jumps through chains of unconditional jumps.
|
184
|
+
|
185
|
+
Args:
|
186
|
+
bytecode: The bytecode to optimize.
|
187
|
+
|
188
|
+
Returns:
|
189
|
+
Optimized bytecode.
|
190
|
+
"""
|
191
|
+
result = bytecode.copy()
|
192
|
+
changed = True
|
193
|
+
|
194
|
+
while changed:
|
195
|
+
changed = False
|
196
|
+
i = 0
|
197
|
+
while i < len(result):
|
198
|
+
opcode = result[i]
|
199
|
+
|
200
|
+
if opcode == Opcode.JUMP_R:
|
201
|
+
if i + 4 < len(result):
|
202
|
+
import struct
|
203
|
+
|
204
|
+
target_offset = struct.unpack("<i", bytes(result[i + 1 : i + 5]))[0]
|
205
|
+
target = i + 5 + target_offset
|
206
|
+
|
207
|
+
# Follow chain of jumps
|
208
|
+
final_target = self._follow_jump_chain(result, target)
|
209
|
+
|
210
|
+
if final_target != target:
|
211
|
+
# Update jump target with new offset
|
212
|
+
new_offset = final_target - (i + 5)
|
213
|
+
result[i + 1 : i + 5] = struct.pack("<i", new_offset)
|
214
|
+
self.stats["jumps_threaded"] += 1
|
215
|
+
changed = True
|
216
|
+
|
217
|
+
i += 5
|
218
|
+
elif opcode in [Opcode.JUMP_IF_R, Opcode.JUMP_IF_NOT_R]:
|
219
|
+
if i + 5 < len(result):
|
220
|
+
import struct
|
221
|
+
|
222
|
+
target_offset = struct.unpack("<i", bytes(result[i + 2 : i + 6]))[0]
|
223
|
+
target = i + 6 + target_offset
|
224
|
+
|
225
|
+
# Follow chain of jumps
|
226
|
+
final_target = self._follow_jump_chain(result, target)
|
227
|
+
|
228
|
+
if final_target != target:
|
229
|
+
# Update jump target with new offset
|
230
|
+
new_offset = final_target - (i + 6)
|
231
|
+
result[i + 2 : i + 6] = struct.pack("<i", new_offset)
|
232
|
+
self.stats["jumps_threaded"] += 1
|
233
|
+
changed = True
|
234
|
+
|
235
|
+
i += 6
|
236
|
+
else:
|
237
|
+
i += self._get_instruction_size(opcode)
|
238
|
+
|
239
|
+
return result
|
240
|
+
|
241
|
+
def _follow_jump_chain(self, bytecode: list[int], target: int, max_depth: int = 10) -> int:
|
242
|
+
"""Follow a chain of unconditional jumps to find final target.
|
243
|
+
|
244
|
+
Args:
|
245
|
+
bytecode: The bytecode.
|
246
|
+
target: Initial jump target.
|
247
|
+
max_depth: Maximum chain depth to follow.
|
248
|
+
|
249
|
+
Returns:
|
250
|
+
Final jump target.
|
251
|
+
"""
|
252
|
+
visited = set()
|
253
|
+
current = target
|
254
|
+
depth = 0
|
255
|
+
|
256
|
+
while depth < max_depth and current not in visited and current < len(bytecode):
|
257
|
+
visited.add(current)
|
258
|
+
|
259
|
+
# Check if target is an unconditional jump
|
260
|
+
if current + 4 < len(bytecode) and bytecode[current] == Opcode.JUMP_R:
|
261
|
+
import struct
|
262
|
+
|
263
|
+
# Get next target offset
|
264
|
+
target_offset = struct.unpack("<i", bytes(bytecode[current + 1 : current + 5]))[0]
|
265
|
+
next_target = current + 5 + target_offset
|
266
|
+
|
267
|
+
# Check if we're jumping to the next instruction (can eliminate)
|
268
|
+
if target_offset == 0:
|
269
|
+
return current + 5
|
270
|
+
|
271
|
+
current = next_target
|
272
|
+
depth += 1
|
273
|
+
else:
|
274
|
+
break
|
275
|
+
|
276
|
+
return current
|
277
|
+
|
278
|
+
def _eliminate_dead_blocks(self, bytecode: list[int]) -> list[int]:
|
279
|
+
"""Eliminate unreachable blocks.
|
280
|
+
|
281
|
+
Args:
|
282
|
+
bytecode: The bytecode.
|
283
|
+
|
284
|
+
Returns:
|
285
|
+
Bytecode with dead blocks replaced by NOPs.
|
286
|
+
"""
|
287
|
+
# Mark reachable blocks (starting from block 0)
|
288
|
+
reachable = set()
|
289
|
+
worklist = [0]
|
290
|
+
|
291
|
+
while worklist:
|
292
|
+
block_idx = worklist.pop()
|
293
|
+
if block_idx in reachable or block_idx >= len(self.blocks):
|
294
|
+
continue
|
295
|
+
|
296
|
+
reachable.add(block_idx)
|
297
|
+
worklist.extend(self.blocks[block_idx].successors)
|
298
|
+
|
299
|
+
# Replace unreachable blocks with NOPs
|
300
|
+
result = bytecode.copy()
|
301
|
+
for idx, block in enumerate(self.blocks):
|
302
|
+
if idx not in reachable:
|
303
|
+
# Replace block with NOPs
|
304
|
+
for i in range(block.start, block.end):
|
305
|
+
result[i] = Opcode.NOP
|
306
|
+
self.stats["blocks_eliminated"] += 1
|
307
|
+
block.is_dead = True
|
308
|
+
|
309
|
+
return result
|
310
|
+
|
311
|
+
def _merge_blocks(self, bytecode: list[int]) -> list[int]:
|
312
|
+
"""Merge blocks that can be combined.
|
313
|
+
|
314
|
+
Args:
|
315
|
+
bytecode: The bytecode.
|
316
|
+
|
317
|
+
Returns:
|
318
|
+
Optimized bytecode.
|
319
|
+
"""
|
320
|
+
result = bytecode.copy()
|
321
|
+
|
322
|
+
for block in self.blocks:
|
323
|
+
if block.is_dead:
|
324
|
+
continue
|
325
|
+
|
326
|
+
# Check if this block has a single successor that has a single predecessor
|
327
|
+
if len(block.successors) == 1:
|
328
|
+
succ_idx = block.successors[0]
|
329
|
+
if succ_idx < len(self.blocks):
|
330
|
+
succ_block = self.blocks[succ_idx]
|
331
|
+
if len(succ_block.predecessors) == 1 and not succ_block.is_dead:
|
332
|
+
# Check if the blocks are adjacent and the first ends with a jump
|
333
|
+
if block.end == succ_block.start and block.end >= 5:
|
334
|
+
last_opcode = result[block.end - 5]
|
335
|
+
if last_opcode == Opcode.JUMP_R:
|
336
|
+
# Remove the jump
|
337
|
+
for i in range(block.end - 5, block.end):
|
338
|
+
result[i] = Opcode.NOP
|
339
|
+
self.stats["blocks_merged"] += 1
|
340
|
+
|
341
|
+
return result
|
342
|
+
|
343
|
+
def _simplify_conditional_jumps(self, bytecode: list[int], constants: list[Any]) -> list[int]:
|
344
|
+
"""Simplify conditional jumps with constant conditions.
|
345
|
+
|
346
|
+
Args:
|
347
|
+
bytecode: The bytecode.
|
348
|
+
constants: Constant pool.
|
349
|
+
|
350
|
+
Returns:
|
351
|
+
Optimized bytecode.
|
352
|
+
"""
|
353
|
+
# TODO: Implement the bytecode optimization
|
354
|
+
# For now, just return the bytecode as-is
|
355
|
+
# This optimization would require more complex analysis with register-based bytecode
|
356
|
+
# since we need to track which register contains constants
|
357
|
+
return bytecode
|
358
|
+
|
359
|
+
def _get_instruction_size(self, opcode: int) -> int:
|
360
|
+
"""Get the size of an instruction including operands.
|
361
|
+
|
362
|
+
Args:
|
363
|
+
opcode: The opcode.
|
364
|
+
|
365
|
+
Returns:
|
366
|
+
Size in bytes.
|
367
|
+
"""
|
368
|
+
# Control flow with offsets
|
369
|
+
if opcode == Opcode.JUMP_R:
|
370
|
+
return 5 # 1 opcode + 4 bytes (i32 offset)
|
371
|
+
if opcode in [Opcode.JUMP_IF_R, Opcode.JUMP_IF_NOT_R]:
|
372
|
+
return 6 # 1 opcode + 1 cond + 4 bytes (i32 offset)
|
373
|
+
|
374
|
+
# Instructions with register + u16 operand
|
375
|
+
if opcode in [
|
376
|
+
Opcode.LOAD_CONST_R,
|
377
|
+
Opcode.LOAD_GLOBAL_R,
|
378
|
+
Opcode.STORE_GLOBAL_R,
|
379
|
+
Opcode.DEFINE_R,
|
380
|
+
]:
|
381
|
+
return 4 # 1 opcode + 1 register + 2 bytes (u16)
|
382
|
+
|
383
|
+
# Instructions with 3 registers
|
384
|
+
if opcode in [
|
385
|
+
Opcode.ADD_R,
|
386
|
+
Opcode.SUB_R,
|
387
|
+
Opcode.MUL_R,
|
388
|
+
Opcode.DIV_R,
|
389
|
+
Opcode.MOD_R,
|
390
|
+
Opcode.AND_R,
|
391
|
+
Opcode.OR_R,
|
392
|
+
Opcode.EQ_R,
|
393
|
+
Opcode.NEQ_R,
|
394
|
+
Opcode.LT_R,
|
395
|
+
Opcode.GT_R,
|
396
|
+
Opcode.LTE_R,
|
397
|
+
Opcode.GTE_R,
|
398
|
+
Opcode.CONCAT_STR_R,
|
399
|
+
Opcode.ARRAY_GET_R,
|
400
|
+
Opcode.ARRAY_SET_R,
|
401
|
+
]:
|
402
|
+
return 4 # 1 opcode + 3 registers
|
403
|
+
|
404
|
+
# Instructions with 2 registers
|
405
|
+
if opcode in [
|
406
|
+
Opcode.MOVE_R,
|
407
|
+
Opcode.NEG_R,
|
408
|
+
Opcode.NOT_R,
|
409
|
+
Opcode.STR_LEN_R,
|
410
|
+
Opcode.NEW_ARRAY_R,
|
411
|
+
Opcode.ARRAY_LEN_R,
|
412
|
+
]:
|
413
|
+
return 3 # 1 opcode + 2 registers
|
414
|
+
|
415
|
+
# RETURN_R special case
|
416
|
+
if opcode == Opcode.RETURN_R:
|
417
|
+
return 2 # minimum size (can be 2 or 3 depending on has_value)
|
418
|
+
|
419
|
+
# CALL_R special case
|
420
|
+
if opcode == Opcode.CALL_R:
|
421
|
+
return 4 # 1 opcode + func + dst + argc (minimum)
|
422
|
+
|
423
|
+
# Simple opcodes with no operands
|
424
|
+
if opcode in [Opcode.NOP, Opcode.BREAKPOINT]:
|
425
|
+
return 1
|
426
|
+
|
427
|
+
# Default for unknown/simple opcodes
|
428
|
+
return 1
|
429
|
+
|
430
|
+
def get_stats(self) -> dict[str, int]:
|
431
|
+
"""Get optimization statistics.
|
432
|
+
|
433
|
+
Returns:
|
434
|
+
Dictionary of statistics.
|
435
|
+
"""
|
436
|
+
return self.stats
|
437
|
+
|
438
|
+
|
439
|
+
class JumpThreadingPass(ModulePass):
|
440
|
+
"""Jump threading optimization pass wrapper for MIR Pass interface."""
|
441
|
+
|
442
|
+
def __init__(self) -> None:
|
443
|
+
"""Initialize the pass."""
|
444
|
+
super().__init__()
|
445
|
+
self.optimizer = JumpThreadingOptimizer()
|
446
|
+
|
447
|
+
def get_info(self) -> PassInfo:
|
448
|
+
"""Get pass information.
|
449
|
+
|
450
|
+
Returns:
|
451
|
+
Pass information.
|
452
|
+
"""
|
453
|
+
return PassInfo(
|
454
|
+
name="jump-threading",
|
455
|
+
description="Thread jumps and eliminate redundant control flow",
|
456
|
+
pass_type=PassType.OPTIMIZATION,
|
457
|
+
requires=[],
|
458
|
+
preserves=PreservationLevel.NONE,
|
459
|
+
)
|
460
|
+
|
461
|
+
def run_on_module(self, module: MIRModule) -> bool:
|
462
|
+
"""Run jump threading on a module.
|
463
|
+
|
464
|
+
Note: This is a bytecode-level optimization, not MIR-level.
|
465
|
+
It would typically run after MIR->bytecode generation.
|
466
|
+
|
467
|
+
Args:
|
468
|
+
module: The module to optimize.
|
469
|
+
|
470
|
+
Returns:
|
471
|
+
False as this is a bytecode-level optimization.
|
472
|
+
"""
|
473
|
+
# This pass operates on bytecode, not MIR
|
474
|
+
# It's here for compatibility with the pass manager
|
475
|
+
return False
|
476
|
+
|
477
|
+
def finalize(self) -> None:
|
478
|
+
"""Finalize the pass."""
|
479
|
+
pass
|
480
|
+
|
481
|
+
def get_statistics(self) -> dict[str, int]:
|
482
|
+
"""Get optimization statistics.
|
483
|
+
|
484
|
+
Returns:
|
485
|
+
Dictionary of statistics.
|
486
|
+
"""
|
487
|
+
return self.optimizer.get_stats().copy()
|