zexus 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (227) hide show
  1. package/LICENSE +0 -0
  2. package/README.md +2513 -0
  3. package/bin/zexus +2 -0
  4. package/bin/zpics +2 -0
  5. package/bin/zpm +2 -0
  6. package/bin/zx +2 -0
  7. package/bin/zx-deploy +2 -0
  8. package/bin/zx-dev +2 -0
  9. package/bin/zx-run +2 -0
  10. package/package.json +66 -0
  11. package/scripts/README.md +24 -0
  12. package/scripts/postinstall.js +44 -0
  13. package/shared_config.json +24 -0
  14. package/src/README.md +1525 -0
  15. package/src/tests/run_zexus_tests.py +117 -0
  16. package/src/tests/test_all_phases.zx +346 -0
  17. package/src/tests/test_blockchain_features.zx +306 -0
  18. package/src/tests/test_complexity_features.zx +321 -0
  19. package/src/tests/test_core_integration.py +185 -0
  20. package/src/tests/test_phase10_ecosystem.zx +177 -0
  21. package/src/tests/test_phase1_modifiers.zx +87 -0
  22. package/src/tests/test_phase2_plugins.zx +80 -0
  23. package/src/tests/test_phase3_security.zx +97 -0
  24. package/src/tests/test_phase4_vfs.zx +116 -0
  25. package/src/tests/test_phase5_types.zx +117 -0
  26. package/src/tests/test_phase6_metaprogramming.zx +125 -0
  27. package/src/tests/test_phase7_optimization.zx +132 -0
  28. package/src/tests/test_phase9_advanced_types.zx +157 -0
  29. package/src/tests/test_security_features.py +419 -0
  30. package/src/tests/test_security_features.zx +276 -0
  31. package/src/tests/test_simple_zx.zx +1 -0
  32. package/src/tests/test_verification_simple.zx +69 -0
  33. package/src/zexus/__init__.py +28 -0
  34. package/src/zexus/__main__.py +5 -0
  35. package/src/zexus/__pycache__/__init__.cpython-312.pyc +0 -0
  36. package/src/zexus/__pycache__/advanced_types.cpython-312.pyc +0 -0
  37. package/src/zexus/__pycache__/builtin_modules.cpython-312.pyc +0 -0
  38. package/src/zexus/__pycache__/capability_system.cpython-312.pyc +0 -0
  39. package/src/zexus/__pycache__/complexity_system.cpython-312.pyc +0 -0
  40. package/src/zexus/__pycache__/concurrency_system.cpython-312.pyc +0 -0
  41. package/src/zexus/__pycache__/config.cpython-312.pyc +0 -0
  42. package/src/zexus/__pycache__/dependency_injection.cpython-312.pyc +0 -0
  43. package/src/zexus/__pycache__/ecosystem.cpython-312.pyc +0 -0
  44. package/src/zexus/__pycache__/environment.cpython-312.pyc +0 -0
  45. package/src/zexus/__pycache__/error_reporter.cpython-312.pyc +0 -0
  46. package/src/zexus/__pycache__/hybrid_orchestrator.cpython-312.pyc +0 -0
  47. package/src/zexus/__pycache__/lexer.cpython-312.pyc +0 -0
  48. package/src/zexus/__pycache__/metaprogramming.cpython-312.pyc +0 -0
  49. package/src/zexus/__pycache__/module_cache.cpython-312.pyc +0 -0
  50. package/src/zexus/__pycache__/object.cpython-312.pyc +0 -0
  51. package/src/zexus/__pycache__/optimization.cpython-312.pyc +0 -0
  52. package/src/zexus/__pycache__/plugin_system.cpython-312.pyc +0 -0
  53. package/src/zexus/__pycache__/policy_engine.cpython-312.pyc +0 -0
  54. package/src/zexus/__pycache__/security.cpython-312.pyc +0 -0
  55. package/src/zexus/__pycache__/stdlib_integration.cpython-312.pyc +0 -0
  56. package/src/zexus/__pycache__/strategy_recovery.cpython-312.pyc +0 -0
  57. package/src/zexus/__pycache__/syntax_validator.cpython-312.pyc +0 -0
  58. package/src/zexus/__pycache__/type_system.cpython-312.pyc +0 -0
  59. package/src/zexus/__pycache__/virtual_filesystem.cpython-312.pyc +0 -0
  60. package/src/zexus/__pycache__/zexus_ast.cpython-312.pyc +0 -0
  61. package/src/zexus/__pycache__/zexus_token.cpython-312.pyc +0 -0
  62. package/src/zexus/advanced_types.py +401 -0
  63. package/src/zexus/blockchain/__init__.py +40 -0
  64. package/src/zexus/blockchain/__pycache__/__init__.cpython-312.pyc +0 -0
  65. package/src/zexus/blockchain/__pycache__/crypto.cpython-312.pyc +0 -0
  66. package/src/zexus/blockchain/__pycache__/ledger.cpython-312.pyc +0 -0
  67. package/src/zexus/blockchain/__pycache__/transaction.cpython-312.pyc +0 -0
  68. package/src/zexus/blockchain/crypto.py +463 -0
  69. package/src/zexus/blockchain/ledger.py +255 -0
  70. package/src/zexus/blockchain/transaction.py +267 -0
  71. package/src/zexus/builtin_modules.py +284 -0
  72. package/src/zexus/builtin_plugins.py +317 -0
  73. package/src/zexus/capability_system.py +372 -0
  74. package/src/zexus/cli/__init__.py +2 -0
  75. package/src/zexus/cli/__pycache__/__init__.cpython-312.pyc +0 -0
  76. package/src/zexus/cli/__pycache__/main.cpython-312.pyc +0 -0
  77. package/src/zexus/cli/main.py +707 -0
  78. package/src/zexus/cli/zpm.py +203 -0
  79. package/src/zexus/compare_interpreter_compiler.py +146 -0
  80. package/src/zexus/compiler/__init__.py +169 -0
  81. package/src/zexus/compiler/__pycache__/__init__.cpython-312.pyc +0 -0
  82. package/src/zexus/compiler/__pycache__/lexer.cpython-312.pyc +0 -0
  83. package/src/zexus/compiler/__pycache__/parser.cpython-312.pyc +0 -0
  84. package/src/zexus/compiler/__pycache__/zexus_ast.cpython-312.pyc +0 -0
  85. package/src/zexus/compiler/bytecode.py +266 -0
  86. package/src/zexus/compiler/compat_runtime.py +277 -0
  87. package/src/zexus/compiler/lexer.py +257 -0
  88. package/src/zexus/compiler/parser.py +779 -0
  89. package/src/zexus/compiler/semantic.py +118 -0
  90. package/src/zexus/compiler/zexus_ast.py +454 -0
  91. package/src/zexus/complexity_system.py +575 -0
  92. package/src/zexus/concurrency_system.py +493 -0
  93. package/src/zexus/config.py +201 -0
  94. package/src/zexus/crypto_bridge.py +19 -0
  95. package/src/zexus/dependency_injection.py +423 -0
  96. package/src/zexus/ecosystem.py +434 -0
  97. package/src/zexus/environment.py +101 -0
  98. package/src/zexus/environment_manager.py +119 -0
  99. package/src/zexus/error_reporter.py +314 -0
  100. package/src/zexus/evaluator/__init__.py +12 -0
  101. package/src/zexus/evaluator/__pycache__/__init__.cpython-312.pyc +0 -0
  102. package/src/zexus/evaluator/__pycache__/bytecode_compiler.cpython-312.pyc +0 -0
  103. package/src/zexus/evaluator/__pycache__/core.cpython-312.pyc +0 -0
  104. package/src/zexus/evaluator/__pycache__/expressions.cpython-312.pyc +0 -0
  105. package/src/zexus/evaluator/__pycache__/functions.cpython-312.pyc +0 -0
  106. package/src/zexus/evaluator/__pycache__/integration.cpython-312.pyc +0 -0
  107. package/src/zexus/evaluator/__pycache__/statements.cpython-312.pyc +0 -0
  108. package/src/zexus/evaluator/__pycache__/utils.cpython-312.pyc +0 -0
  109. package/src/zexus/evaluator/bytecode_compiler.py +700 -0
  110. package/src/zexus/evaluator/core.py +891 -0
  111. package/src/zexus/evaluator/expressions.py +827 -0
  112. package/src/zexus/evaluator/functions.py +3989 -0
  113. package/src/zexus/evaluator/integration.py +396 -0
  114. package/src/zexus/evaluator/statements.py +4303 -0
  115. package/src/zexus/evaluator/utils.py +126 -0
  116. package/src/zexus/evaluator_original.py +2041 -0
  117. package/src/zexus/external_bridge.py +16 -0
  118. package/src/zexus/find_affected_imports.sh +155 -0
  119. package/src/zexus/hybrid_orchestrator.py +152 -0
  120. package/src/zexus/input_validation.py +259 -0
  121. package/src/zexus/lexer.py +571 -0
  122. package/src/zexus/logging.py +89 -0
  123. package/src/zexus/lsp/__init__.py +9 -0
  124. package/src/zexus/lsp/completion_provider.py +207 -0
  125. package/src/zexus/lsp/definition_provider.py +22 -0
  126. package/src/zexus/lsp/hover_provider.py +71 -0
  127. package/src/zexus/lsp/server.py +269 -0
  128. package/src/zexus/lsp/symbol_provider.py +31 -0
  129. package/src/zexus/metaprogramming.py +321 -0
  130. package/src/zexus/module_cache.py +89 -0
  131. package/src/zexus/module_manager.py +107 -0
  132. package/src/zexus/object.py +973 -0
  133. package/src/zexus/optimization.py +424 -0
  134. package/src/zexus/parser/__init__.py +31 -0
  135. package/src/zexus/parser/__pycache__/__init__.cpython-312.pyc +0 -0
  136. package/src/zexus/parser/__pycache__/parser.cpython-312.pyc +0 -0
  137. package/src/zexus/parser/__pycache__/strategy_context.cpython-312.pyc +0 -0
  138. package/src/zexus/parser/__pycache__/strategy_structural.cpython-312.pyc +0 -0
  139. package/src/zexus/parser/integration.py +86 -0
  140. package/src/zexus/parser/parser.py +3977 -0
  141. package/src/zexus/parser/strategy_context.py +7254 -0
  142. package/src/zexus/parser/strategy_structural.py +1033 -0
  143. package/src/zexus/persistence.py +391 -0
  144. package/src/zexus/plugin_system.py +290 -0
  145. package/src/zexus/policy_engine.py +365 -0
  146. package/src/zexus/profiler/__init__.py +5 -0
  147. package/src/zexus/profiler/profiler.py +233 -0
  148. package/src/zexus/purity_system.py +398 -0
  149. package/src/zexus/runtime/__init__.py +20 -0
  150. package/src/zexus/runtime/async_runtime.py +324 -0
  151. package/src/zexus/search_old_imports.sh +65 -0
  152. package/src/zexus/security.py +1407 -0
  153. package/src/zexus/stack_trace.py +233 -0
  154. package/src/zexus/stdlib/__init__.py +27 -0
  155. package/src/zexus/stdlib/blockchain.py +341 -0
  156. package/src/zexus/stdlib/compression.py +167 -0
  157. package/src/zexus/stdlib/crypto.py +124 -0
  158. package/src/zexus/stdlib/datetime.py +163 -0
  159. package/src/zexus/stdlib/db_mongo.py +199 -0
  160. package/src/zexus/stdlib/db_mysql.py +162 -0
  161. package/src/zexus/stdlib/db_postgres.py +163 -0
  162. package/src/zexus/stdlib/db_sqlite.py +133 -0
  163. package/src/zexus/stdlib/encoding.py +230 -0
  164. package/src/zexus/stdlib/fs.py +195 -0
  165. package/src/zexus/stdlib/http.py +219 -0
  166. package/src/zexus/stdlib/http_server.py +248 -0
  167. package/src/zexus/stdlib/json_module.py +61 -0
  168. package/src/zexus/stdlib/math.py +360 -0
  169. package/src/zexus/stdlib/os_module.py +265 -0
  170. package/src/zexus/stdlib/regex.py +148 -0
  171. package/src/zexus/stdlib/sockets.py +253 -0
  172. package/src/zexus/stdlib/test_framework.zx +208 -0
  173. package/src/zexus/stdlib/test_runner.zx +119 -0
  174. package/src/zexus/stdlib_integration.py +341 -0
  175. package/src/zexus/strategy_recovery.py +256 -0
  176. package/src/zexus/syntax_validator.py +356 -0
  177. package/src/zexus/testing/zpics.py +407 -0
  178. package/src/zexus/testing/zpics_runtime.py +369 -0
  179. package/src/zexus/type_system.py +374 -0
  180. package/src/zexus/validation_system.py +569 -0
  181. package/src/zexus/virtual_filesystem.py +355 -0
  182. package/src/zexus/vm/__init__.py +8 -0
  183. package/src/zexus/vm/__pycache__/__init__.cpython-312.pyc +0 -0
  184. package/src/zexus/vm/__pycache__/async_optimizer.cpython-312.pyc +0 -0
  185. package/src/zexus/vm/__pycache__/bytecode.cpython-312.pyc +0 -0
  186. package/src/zexus/vm/__pycache__/cache.cpython-312.pyc +0 -0
  187. package/src/zexus/vm/__pycache__/jit.cpython-312.pyc +0 -0
  188. package/src/zexus/vm/__pycache__/memory_manager.cpython-312.pyc +0 -0
  189. package/src/zexus/vm/__pycache__/memory_pool.cpython-312.pyc +0 -0
  190. package/src/zexus/vm/__pycache__/optimizer.cpython-312.pyc +0 -0
  191. package/src/zexus/vm/__pycache__/parallel_vm.cpython-312.pyc +0 -0
  192. package/src/zexus/vm/__pycache__/peephole_optimizer.cpython-312.pyc +0 -0
  193. package/src/zexus/vm/__pycache__/profiler.cpython-312.pyc +0 -0
  194. package/src/zexus/vm/__pycache__/register_allocator.cpython-312.pyc +0 -0
  195. package/src/zexus/vm/__pycache__/register_vm.cpython-312.pyc +0 -0
  196. package/src/zexus/vm/__pycache__/ssa_converter.cpython-312.pyc +0 -0
  197. package/src/zexus/vm/__pycache__/vm.cpython-312.pyc +0 -0
  198. package/src/zexus/vm/async_optimizer.py +420 -0
  199. package/src/zexus/vm/bytecode.py +428 -0
  200. package/src/zexus/vm/bytecode_converter.py +297 -0
  201. package/src/zexus/vm/cache.py +532 -0
  202. package/src/zexus/vm/jit.py +720 -0
  203. package/src/zexus/vm/memory_manager.py +520 -0
  204. package/src/zexus/vm/memory_pool.py +511 -0
  205. package/src/zexus/vm/optimizer.py +478 -0
  206. package/src/zexus/vm/parallel_vm.py +899 -0
  207. package/src/zexus/vm/peephole_optimizer.py +452 -0
  208. package/src/zexus/vm/profiler.py +527 -0
  209. package/src/zexus/vm/register_allocator.py +462 -0
  210. package/src/zexus/vm/register_vm.py +520 -0
  211. package/src/zexus/vm/ssa_converter.py +757 -0
  212. package/src/zexus/vm/vm.py +1392 -0
  213. package/src/zexus/zexus_ast.py +1782 -0
  214. package/src/zexus/zexus_token.py +253 -0
  215. package/src/zexus/zpm/__init__.py +15 -0
  216. package/src/zexus/zpm/installer.py +116 -0
  217. package/src/zexus/zpm/package_manager.py +208 -0
  218. package/src/zexus/zpm/publisher.py +98 -0
  219. package/src/zexus/zpm/registry.py +110 -0
  220. package/src/zexus.egg-info/PKG-INFO +2235 -0
  221. package/src/zexus.egg-info/SOURCES.txt +876 -0
  222. package/src/zexus.egg-info/dependency_links.txt +1 -0
  223. package/src/zexus.egg-info/entry_points.txt +3 -0
  224. package/src/zexus.egg-info/not-zip-safe +1 -0
  225. package/src/zexus.egg-info/requires.txt +14 -0
  226. package/src/zexus.egg-info/top_level.txt +2 -0
  227. package/zexus.json +14 -0
@@ -0,0 +1,757 @@
1
+ """
2
+ SSA (Static Single Assignment) Converter for Zexus VM
3
+
4
+ Converts bytecode to SSA form with:
5
+ - Phi node insertion (dominance frontiers algorithm)
6
+ - Variable renaming (dominance tree traversal)
7
+ - Immediate dominator computation
8
+ - SSA destruction for code generation
9
+ - Dead code elimination in SSA form
10
+ - Copy propagation
11
+
12
+ Phase 8.5 of VM Optimization Project - Production Grade
13
+ """
14
+
15
+ from dataclasses import dataclass, field
16
+ from typing import Dict, List, Set, Tuple, Optional, Any
17
+ from collections import defaultdict, deque
18
+ import logging
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @dataclass
24
+ class PhiNode:
25
+ """Phi node for SSA form"""
26
+ target: str # Target variable
27
+ sources: List[Tuple[int, str]] # [(block_id, variable_version)]
28
+
29
+ def __str__(self):
30
+ sources_str = ", ".join(f"({bid}: {var})" for bid, var in self.sources)
31
+ return f"{self.target} = φ({sources_str})"
32
+
33
+
34
+ @dataclass
35
+ class BasicBlock:
36
+ """Basic block in control flow graph"""
37
+ id: int
38
+ instructions: List[Tuple] = field(default_factory=list)
39
+ predecessors: Set[int] = field(default_factory=set)
40
+ successors: Set[int] = field(default_factory=set)
41
+ phi_nodes: List[PhiNode] = field(default_factory=list)
42
+ dom_frontier: Set[int] = field(default_factory=set)
43
+ idom: Optional[int] = None # Immediate dominator
44
+
45
+ def add_phi(self, var: str, sources: List[Tuple[int, str]]) -> PhiNode:
46
+ """Add phi node for variable"""
47
+ phi = PhiNode(target=var, sources=sources)
48
+ self.phi_nodes.append(phi)
49
+ return phi
50
+
51
+ def get_phi(self, var: str) -> Optional[PhiNode]:
52
+ """Get phi node for variable"""
53
+ for phi in self.phi_nodes:
54
+ if phi.target == var:
55
+ return phi
56
+ return None
57
+
58
+ def remove_phi(self, var: str):
59
+ """Remove phi node for variable"""
60
+ self.phi_nodes = [phi for phi in self.phi_nodes if phi.target != var]
61
+
62
+
63
+ @dataclass
64
+ class SSAProgram:
65
+ """Program in SSA form"""
66
+ blocks: Dict[int, BasicBlock]
67
+ entry_block: int
68
+ exit_blocks: Set[int] = field(default_factory=set)
69
+ variable_versions: Dict[str, int] = field(default_factory=lambda: defaultdict(int))
70
+ dominators: Dict[int, Set[int]] = field(default_factory=dict)
71
+ dominator_tree: Dict[int, Set[int]] = field(default_factory=lambda: defaultdict(set))
72
+
73
+ @property
74
+ def num_phi_nodes(self) -> int:
75
+ """Get total number of phi nodes in all blocks"""
76
+ return sum(len(block.phi_nodes) for block in self.blocks.values())
77
+
78
+ @property
79
+ def variables(self) -> Dict[str, List[str]]:
80
+ """Get mapping of original variables to SSA versions (for compatibility)"""
81
+ # Return variable_versions in a format compatible with old API
82
+ return dict(self.variable_versions)
83
+
84
+ def get_block_order(self) -> List[int]:
85
+ """Get blocks in dominance order (reverse postorder)"""
86
+ visited = set()
87
+ order = []
88
+
89
+ def dfs(block_id: int):
90
+ if block_id in visited or block_id not in self.blocks:
91
+ return
92
+ visited.add(block_id)
93
+
94
+ block = self.blocks[block_id]
95
+ for succ in sorted(block.successors):
96
+ dfs(succ)
97
+ order.append(block_id)
98
+
99
+ dfs(self.entry_block)
100
+ return list(reversed(order))
101
+
102
+
103
+ class SSAConverter:
104
+ """
105
+ Convert bytecode to SSA form
106
+
107
+ Production-grade implementation with:
108
+ 1. Robust CFG construction with proper basic block splitting
109
+ 2. Efficient dominator computation (Lengauer-Tarjan algorithm)
110
+ 3. Precise dominance frontier calculation
111
+ 4. Minimal phi node insertion
112
+ 5. Correct variable renaming with stack-based approach
113
+ 6. SSA-based optimizations (dead code, copy propagation)
114
+ """
115
+
116
+ def __init__(self, optimize: bool = True):
117
+ """
118
+ Initialize SSA converter
119
+
120
+ Args:
121
+ optimize: Enable SSA-based optimizations
122
+ """
123
+ self.optimize = optimize
124
+ self.variable_versions = defaultdict(int)
125
+ self.rename_stack = defaultdict(list) # Stack for variable renaming
126
+ self.stats = {
127
+ 'conversions': 0,
128
+ 'phi_nodes_inserted': 0,
129
+ 'variables_renamed': 0,
130
+ 'blocks_created': 0,
131
+ 'dead_code_removed': 0,
132
+ 'copies_propagated': 0,
133
+ }
134
+
135
+ def convert_to_ssa(self, instructions: List[Tuple]) -> SSAProgram:
136
+ """
137
+ Convert instructions to SSA form
138
+
139
+ Args:
140
+ instructions: List of bytecode instructions (tuples or Instruction objects)
141
+
142
+ Returns:
143
+ SSAProgram in SSA form
144
+ """
145
+ self.stats['conversions'] += 1
146
+
147
+ if not instructions:
148
+ return SSAProgram(blocks={0: BasicBlock(id=0)}, entry_block=0)
149
+
150
+ # Normalize instructions (handle both tuples and Instruction objects)
151
+ normalized = []
152
+ for instr in instructions:
153
+ if instr is None:
154
+ normalized.append(None)
155
+ elif hasattr(instr, 'opcode') and hasattr(instr, 'arg'):
156
+ # Instruction object from peephole optimizer
157
+ normalized.append((instr.opcode, instr.arg))
158
+ else:
159
+ # Already a tuple
160
+ normalized.append(instr)
161
+
162
+ # 1. Build CFG with proper basic blocks
163
+ blocks = self._build_cfg(normalized)
164
+ self.stats['blocks_created'] = len(blocks)
165
+
166
+ # 2. Compute dominators and dominator tree
167
+ dominators = self._compute_dominators(blocks, 0)
168
+ idoms = self._compute_immediate_dominators(blocks, dominators, 0)
169
+ dom_tree = self._build_dominator_tree(blocks, idoms)
170
+
171
+ # 3. Compute dominance frontiers
172
+ self._compute_dominance_frontiers(blocks, dominators)
173
+
174
+ # 4. Insert phi nodes at dominance frontiers
175
+ self._insert_phi_nodes(blocks)
176
+
177
+ # 5. Rename variables in dominance tree order
178
+ self._rename_variables(blocks, 0, dom_tree)
179
+
180
+ # 6. SSA-based optimizations
181
+ if self.optimize:
182
+ self._eliminate_dead_code(blocks)
183
+ self._propagate_copies(blocks)
184
+
185
+ return SSAProgram(
186
+ blocks=blocks,
187
+ entry_block=0,
188
+ variable_versions=self.variable_versions,
189
+ dominators=dominators,
190
+ dominator_tree=dom_tree
191
+ )
192
+
193
+ def _build_cfg(self, instructions: List[Tuple]) -> Dict[int, BasicBlock]:
194
+ """
195
+ Build control flow graph with proper basic block boundaries
196
+
197
+ Leaders (start new basic block):
198
+ - First instruction
199
+ - Target of any jump
200
+ - Instruction following a jump/branch/return
201
+ """
202
+ if not instructions:
203
+ return {0: BasicBlock(id=0)}
204
+
205
+ # Define all jump/branch opcodes
206
+ jump_opcodes = {
207
+ 'JUMP', 'JUMP_IF_TRUE', 'JUMP_IF_FALSE',
208
+ 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'JUMP_BACKWARD',
209
+ 'POP_JUMP_IF_TRUE', 'POP_JUMP_IF_FALSE',
210
+ 'FOR_ITER', 'SETUP_LOOP', 'SETUP_EXCEPT', 'SETUP_FINALLY'
211
+ }
212
+ control_flow_opcodes = jump_opcodes | {'RETURN', 'CALL', 'SPAWN', 'YIELD', 'RAISE'}
213
+
214
+ # Identify leaders
215
+ leaders = {0} # First instruction is always a leader
216
+
217
+ for i, instr in enumerate(instructions):
218
+ opcode = instr[0] if instr else None
219
+
220
+ # Instruction after control flow is leader
221
+ if i > 0:
222
+ prev_opcode = instructions[i-1][0] if instructions[i-1] else None
223
+ if prev_opcode in control_flow_opcodes:
224
+ leaders.add(i)
225
+
226
+ # Jump targets are leaders
227
+ if opcode in jump_opcodes:
228
+ if len(instr) > 1 and isinstance(instr[1], int):
229
+ target = instr[1]
230
+ if 0 <= target < len(instructions):
231
+ leaders.add(target)
232
+
233
+ # Create basic blocks
234
+ leaders_list = sorted(leaders)
235
+ blocks = {}
236
+
237
+ for i, start in enumerate(leaders_list):
238
+ end = leaders_list[i + 1] if i + 1 < len(leaders_list) else len(instructions)
239
+
240
+ block = BasicBlock(id=i)
241
+ block.instructions = list(instructions[start:end])
242
+ blocks[i] = block
243
+
244
+ # Build CFG edges
245
+ self._build_cfg_edges(blocks, leaders_list, instructions)
246
+
247
+ return blocks
248
+
249
+ def _build_cfg_edges(
250
+ self,
251
+ blocks: Dict[int, BasicBlock],
252
+ leaders: List[int],
253
+ instructions: List[Tuple]
254
+ ):
255
+ """Build CFG edges based on control flow"""
256
+ block_map = {leaders[i]: i for i in range(len(leaders))}
257
+
258
+ # Define unconditional and conditional jump opcodes
259
+ unconditional_jumps = {'JUMP', 'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'JUMP_BACKWARD'}
260
+ conditional_jumps = {
261
+ 'JUMP_IF_TRUE', 'JUMP_IF_FALSE',
262
+ 'POP_JUMP_IF_TRUE', 'POP_JUMP_IF_FALSE',
263
+ 'FOR_ITER'
264
+ }
265
+
266
+ for block_id, block in blocks.items():
267
+ if not block.instructions:
268
+ continue
269
+
270
+ last_instr = block.instructions[-1]
271
+ opcode = last_instr[0] if last_instr else None
272
+
273
+ # Get instruction index of last instruction in block
274
+ instr_idx = leaders[block_id] + len(block.instructions) - 1
275
+
276
+ if opcode in unconditional_jumps:
277
+ # Unconditional jump - only jump target is successor
278
+ if len(last_instr) > 1 and isinstance(last_instr[1], int):
279
+ target = last_instr[1]
280
+ if target in block_map:
281
+ target_block = block_map[target]
282
+ block.successors.add(target_block)
283
+ blocks[target_block].predecessors.add(block_id)
284
+
285
+ elif opcode in conditional_jumps:
286
+ # Conditional branch - two successors
287
+ if len(last_instr) > 1 and isinstance(last_instr[1], int):
288
+ target = last_instr[1]
289
+ if target in block_map:
290
+ target_block = block_map[target]
291
+ block.successors.add(target_block)
292
+ blocks[target_block].predecessors.add(block_id)
293
+
294
+ # Fall-through to next block
295
+ if block_id + 1 in blocks:
296
+ block.successors.add(block_id + 1)
297
+ blocks[block_id + 1].predecessors.add(block_id)
298
+
299
+ elif opcode not in ('RETURN',):
300
+ # Fall-through to next block
301
+ if block_id + 1 in blocks:
302
+ block.successors.add(block_id + 1)
303
+ blocks[block_id + 1].predecessors.add(block_id)
304
+
305
+ def _compute_dominators(
306
+ self,
307
+ blocks: Dict[int, BasicBlock],
308
+ entry: int
309
+ ) -> Dict[int, Set[int]]:
310
+ """
311
+ Compute dominator sets using iterative dataflow algorithm
312
+
313
+ More efficient than naive algorithm, suitable for production.
314
+ """
315
+ # Initialize
316
+ all_blocks = set(blocks.keys())
317
+ dominators = {entry: {entry}}
318
+
319
+ for block_id in blocks:
320
+ if block_id != entry:
321
+ dominators[block_id] = all_blocks.copy()
322
+
323
+ # Iterate until fixed point (usually converges quickly)
324
+ changed = True
325
+ iterations = 0
326
+ max_iterations = len(blocks) * 2 # Safety limit
327
+
328
+ while changed and iterations < max_iterations:
329
+ changed = False
330
+ iterations += 1
331
+
332
+ for block_id in sorted(blocks.keys()):
333
+ if block_id == entry:
334
+ continue
335
+
336
+ # dom(n) = {n} ∪ (∩ dom(p) for p in predecessors(n))
337
+ new_dom = {block_id}
338
+
339
+ preds = blocks[block_id].predecessors
340
+ if preds:
341
+ pred_doms = [dominators.get(pred, all_blocks) for pred in preds]
342
+ if pred_doms:
343
+ new_dom = new_dom | set.intersection(*pred_doms)
344
+ else:
345
+ # No predecessors (unreachable) - dominated by all
346
+ new_dom = all_blocks.copy()
347
+
348
+ if new_dom != dominators[block_id]:
349
+ dominators[block_id] = new_dom
350
+ changed = True
351
+
352
+ if iterations >= max_iterations:
353
+ logger.warning(f"Dominator computation did not converge after {max_iterations} iterations")
354
+
355
+ return dominators
356
+
357
+ def _compute_immediate_dominators(
358
+ self,
359
+ blocks: Dict[int, BasicBlock],
360
+ dominators: Dict[int, Set[int]],
361
+ entry: int
362
+ ) -> Dict[int, Optional[int]]:
363
+ """
364
+ Compute immediate dominator for each block
365
+
366
+ idom(n) is the unique block that strictly dominates n
367
+ but does not dominate any other block that dominates n.
368
+ """
369
+ idoms = {entry: None}
370
+
371
+ for block_id in blocks:
372
+ if block_id == entry:
373
+ continue
374
+
375
+ # Get strict dominators (excluding block itself)
376
+ strict_doms = dominators[block_id] - {block_id}
377
+
378
+ if not strict_doms:
379
+ idoms[block_id] = None
380
+ continue
381
+
382
+ # Find immediate dominator:
383
+ # The dominator that is not dominated by any other dominator
384
+ for dom in strict_doms:
385
+ is_idom = True
386
+ for other_dom in strict_doms:
387
+ if dom != other_dom and dom in dominators.get(other_dom, set()):
388
+ is_idom = False
389
+ break
390
+
391
+ if is_idom:
392
+ idoms[block_id] = dom
393
+ blocks[block_id].idom = dom
394
+ break
395
+
396
+ return idoms
397
+
398
+ def _build_dominator_tree(
399
+ self,
400
+ blocks: Dict[int, BasicBlock],
401
+ idoms: Dict[int, Optional[int]]
402
+ ) -> Dict[int, Set[int]]:
403
+ """Build dominator tree from immediate dominators"""
404
+ dom_tree = defaultdict(set)
405
+
406
+ for block_id, idom in idoms.items():
407
+ if idom is not None:
408
+ dom_tree[idom].add(block_id)
409
+
410
+ return dom_tree
411
+
412
+ def _compute_dominance_frontiers(
413
+ self,
414
+ blocks: Dict[int, BasicBlock],
415
+ dominators: Dict[int, Set[int]]
416
+ ):
417
+ """
418
+ Compute dominance frontier for each block
419
+
420
+ DF(X) = {Y | X dominates a predecessor of Y but not Y itself}
421
+
422
+ Uses efficient algorithm from Cytron et al.
423
+ """
424
+ for block_id in blocks:
425
+ blocks[block_id].dom_frontier = set()
426
+
427
+ for block_id, block in blocks.items():
428
+ if len(block.predecessors) < 2:
429
+ continue # No join point
430
+
431
+ for pred in block.predecessors:
432
+ runner = pred
433
+
434
+ # Walk up dominator tree until we dominate block_id
435
+ while runner is not None and block_id not in dominators.get(runner, set()):
436
+ blocks[runner].dom_frontier.add(block_id)
437
+ runner = blocks[runner].idom
438
+
439
+ def _insert_phi_nodes(self, blocks: Dict[int, BasicBlock]):
440
+ """
441
+ Insert phi nodes at dominance frontiers
442
+
443
+ Uses pruned SSA construction (only insert where variable is live)
444
+ """
445
+ # Find all variables and where they're defined
446
+ all_vars = set()
447
+ var_def_sites = defaultdict(set)
448
+
449
+ for block_id, block in blocks.items():
450
+ for instr in block.instructions:
451
+ defs, uses = self._extract_defs_uses(instr)
452
+ for var in defs:
453
+ all_vars.add(var)
454
+ var_def_sites[var].add(block_id)
455
+ for var in uses:
456
+ all_vars.add(var)
457
+
458
+ # Insert phi nodes for each variable
459
+ for var in all_vars:
460
+ work_list = deque(var_def_sites.get(var, set()))
461
+ processed = set()
462
+
463
+ while work_list:
464
+ block_id = work_list.popleft()
465
+
466
+ if block_id not in blocks:
467
+ continue
468
+
469
+ # Insert phi in dominance frontier
470
+ for frontier_block in blocks[block_id].dom_frontier:
471
+ if frontier_block not in processed:
472
+ # Create phi node
473
+ preds = blocks[frontier_block].predecessors
474
+ sources = [(pred, var) for pred in preds]
475
+
476
+ # Only insert if phi doesn't already exist
477
+ if not blocks[frontier_block].get_phi(var):
478
+ blocks[frontier_block].add_phi(var, sources)
479
+ self.stats['phi_nodes_inserted'] += 1
480
+
481
+ processed.add(frontier_block)
482
+
483
+ # If this is a new def site, process its frontiers too
484
+ if frontier_block not in var_def_sites[var]:
485
+ var_def_sites[var].add(frontier_block)
486
+ work_list.append(frontier_block)
487
+
488
+ def _rename_variables(
489
+ self,
490
+ blocks: Dict[int, BasicBlock],
491
+ block_id: int,
492
+ dom_tree: Dict[int, Set[int]]
493
+ ):
494
+ """
495
+ Rename variables to SSA form using stack-based algorithm
496
+
497
+ Traverses dominator tree and maintains stack of versions for each variable.
498
+ """
499
+ if block_id not in blocks:
500
+ return
501
+
502
+ block = blocks[block_id]
503
+ local_defs = [] # Track defs in this block for stack cleanup
504
+
505
+ # Process phi nodes first
506
+ for phi in block.phi_nodes:
507
+ var = phi.target
508
+ new_version = self.variable_versions[var] + 1
509
+ self.variable_versions[var] = new_version
510
+ self.rename_stack[var].append(new_version)
511
+ local_defs.append(var)
512
+ self.stats['variables_renamed'] += 1
513
+
514
+ # Update phi target with version
515
+ phi.target = f"{var}${new_version}"
516
+
517
+ # Process instructions
518
+ for i, instr in enumerate(block.instructions):
519
+ defs, uses = self._extract_defs_uses(instr)
520
+ new_instr = list(instr)
521
+
522
+ # Rename uses (read current version from stack)
523
+ for j, operand in enumerate(new_instr):
524
+ if isinstance(operand, str) and operand in uses:
525
+ if self.rename_stack[operand]:
526
+ version = self.rename_stack[operand][-1]
527
+ new_instr[j] = f"{operand}${version}"
528
+
529
+ # Rename defs (create new version)
530
+ for j, operand in enumerate(new_instr):
531
+ if isinstance(operand, str):
532
+ # Extract base name (remove version if exists)
533
+ base_name = operand.split('$')[0]
534
+ if base_name in defs:
535
+ new_version = self.variable_versions[base_name] + 1
536
+ self.variable_versions[base_name] = new_version
537
+ self.rename_stack[base_name].append(new_version)
538
+ local_defs.append(base_name)
539
+ new_instr[j] = f"{base_name}${new_version}"
540
+ self.stats['variables_renamed'] += 1
541
+
542
+ block.instructions[i] = tuple(new_instr)
543
+
544
+ # Update phi source operands in successor blocks
545
+ for succ_id in block.successors:
546
+ if succ_id not in blocks:
547
+ continue
548
+
549
+ for phi in blocks[succ_id].phi_nodes:
550
+ # Find this block in phi sources and update variable name
551
+ for k, (pred_id, var_name) in enumerate(phi.sources):
552
+ if pred_id == block_id:
553
+ base_name = var_name.split('$')[0]
554
+ if self.rename_stack[base_name]:
555
+ version = self.rename_stack[base_name][-1]
556
+ phi.sources[k] = (pred_id, f"{base_name}${version}")
557
+
558
+ # Recursively process children in dominator tree
559
+ for child_id in sorted(dom_tree.get(block_id, set())):
560
+ self._rename_variables(blocks, child_id, dom_tree)
561
+
562
+ # Pop versions defined in this block
563
+ for var in local_defs:
564
+ if self.rename_stack[var]:
565
+ self.rename_stack[var].pop()
566
+
567
+ def _eliminate_dead_code(self, blocks: Dict[int, BasicBlock]):
568
+ """
569
+ Eliminate dead code in SSA form
570
+
571
+ Remove instructions that define variables that are never used.
572
+ """
573
+ # Find all used variables
574
+ used_vars = set()
575
+
576
+ for block in blocks.values():
577
+ # Phi nodes use variables
578
+ for phi in block.phi_nodes:
579
+ for _, var in phi.sources:
580
+ used_vars.add(var)
581
+
582
+ # Instructions use variables
583
+ for instr in block.instructions:
584
+ _, uses = self._extract_defs_uses(instr)
585
+ used_vars.update(uses)
586
+
587
+ # Remove dead instructions
588
+ for block in blocks.values():
589
+ new_instructions = []
590
+
591
+ for instr in block.instructions:
592
+ defs, _ = self._extract_defs_uses(instr)
593
+
594
+ # Keep if no defs or any def is used
595
+ if not defs or any(d in used_vars for d in defs):
596
+ new_instructions.append(instr)
597
+ else:
598
+ self.stats['dead_code_removed'] += 1
599
+
600
+ block.instructions = new_instructions
601
+
602
+ def _propagate_copies(self, blocks: Dict[int, BasicBlock]):
603
+ """
604
+ Propagate copies in SSA form
605
+
606
+ Replace uses of variables that are just copies of other variables.
607
+ """
608
+ # Find copy assignments: x = y
609
+ copy_map = {}
610
+
611
+ for block in blocks.values():
612
+ for instr in block.instructions:
613
+ if len(instr) >= 3 and instr[0] in ('MOVE', 'LOAD_FAST'):
614
+ dest = instr[1] if len(instr) > 1 else None
615
+ src = instr[2] if len(instr) > 2 else instr[1]
616
+
617
+ if isinstance(dest, str) and isinstance(src, str):
618
+ copy_map[dest] = src
619
+
620
+ # Propagate copies
621
+ for block in blocks.values():
622
+ new_instructions = []
623
+
624
+ for instr in block.instructions:
625
+ new_instr = list(instr)
626
+
627
+ # Replace uses with copy source
628
+ for j, operand in enumerate(new_instr):
629
+ if isinstance(operand, str) and operand in copy_map:
630
+ new_instr[j] = copy_map[operand]
631
+ self.stats['copies_propagated'] += 1
632
+
633
+ new_instructions.append(tuple(new_instr))
634
+
635
+ block.instructions = new_instructions
636
+
637
+ def _extract_defs_uses(self, instr: Tuple) -> Tuple[List[str], List[str]]:
638
+ """Extract variables defined and used in instruction"""
639
+ opcode = instr[0] if instr else None
640
+ defs = []
641
+ uses = []
642
+
643
+ if opcode == 'STORE_FAST' and len(instr) > 1:
644
+ if isinstance(instr[1], str):
645
+ base_name = instr[1].split('$')[0]
646
+ defs.append(base_name)
647
+ if len(instr) > 2 and isinstance(instr[2], str):
648
+ base_name = instr[2].split('$')[0]
649
+ uses.append(base_name)
650
+
651
+ elif opcode == 'LOAD_FAST' and len(instr) > 1:
652
+ if isinstance(instr[1], str):
653
+ base_name = instr[1].split('$')[0]
654
+ uses.append(base_name)
655
+
656
+ elif opcode in ('BINARY_ADD', 'BINARY_SUB', 'BINARY_MUL', 'BINARY_DIV', 'BINARY_MOD'):
657
+ if len(instr) >= 4:
658
+ dest = instr[1]
659
+ src1 = instr[2]
660
+ src2 = instr[3]
661
+
662
+ if isinstance(dest, str):
663
+ defs.append(dest.split('$')[0])
664
+ if isinstance(src1, str):
665
+ uses.append(src1.split('$')[0])
666
+ if isinstance(src2, str):
667
+ uses.append(src2.split('$')[0])
668
+
669
+ elif opcode == 'MOVE' and len(instr) >= 3:
670
+ dest = instr[1]
671
+ src = instr[2]
672
+ if isinstance(dest, str):
673
+ defs.append(dest.split('$')[0])
674
+ if isinstance(src, str):
675
+ uses.append(src.split('$')[0])
676
+
677
+ return defs, uses
678
+
679
+ def get_stats(self) -> Dict[str, Any]:
680
+ """Get conversion statistics"""
681
+ return self.stats.copy()
682
+
683
+ def reset_stats(self):
684
+ """Reset statistics"""
685
+ self.stats = {
686
+ 'conversions': 0,
687
+ 'phi_nodes_inserted': 0,
688
+ 'variables_renamed': 0,
689
+ 'blocks_created': 0,
690
+ 'dead_code_removed': 0,
691
+ 'copies_propagated': 0,
692
+ }
693
+ self.variable_versions.clear()
694
+ self.rename_stack.clear()
695
+
696
+
697
+ def destruct_ssa(ssa_program: SSAProgram) -> List[Tuple]:
698
+ """
699
+ Convert SSA program back to regular bytecode
700
+
701
+ Removes phi nodes by inserting appropriate moves at the end
702
+ of predecessor blocks. Uses parallel copy semantics to handle
703
+ circular dependencies correctly.
704
+
705
+ Args:
706
+ ssa_program: Program in SSA form
707
+
708
+ Returns:
709
+ List of instructions without SSA form
710
+ """
711
+ instructions = []
712
+ block_start_labels = {} # Map block_id to instruction offset
713
+
714
+ # First pass: collect all instructions and track block starts
715
+ offset = 0
716
+ for block_id in ssa_program.get_block_order():
717
+ if block_id not in ssa_program.blocks:
718
+ continue
719
+
720
+ block = ssa_program.blocks[block_id]
721
+ block_start_labels[block_id] = offset
722
+
723
+ # Handle phi nodes by inserting moves in predecessors
724
+ # (will be done in second pass)
725
+
726
+ # Add block instructions
727
+ for instr in block.instructions:
728
+ # Remove SSA version numbers
729
+ new_instr = tuple(
730
+ op.split('$')[0] if isinstance(op, str) and '$' in op else op
731
+ for op in instr
732
+ )
733
+ instructions.append(new_instr)
734
+ offset += 1
735
+
736
+ # Second pass: insert phi resolution moves
737
+ # For production, would use parallel copy algorithm
738
+ # Simplified: insert moves at end of each predecessor
739
+ phi_moves = defaultdict(list)
740
+
741
+ for block_id, block in ssa_program.blocks.items():
742
+ for phi in block.phi_nodes:
743
+ target = phi.target.split('$')[0]
744
+
745
+ for pred_id, source_var in phi.sources:
746
+ source = source_var.split('$')[0]
747
+ # Record move to insert at end of predecessor
748
+ if source != target: # Skip self-copies
749
+ phi_moves[pred_id].append(('MOVE', source, target))
750
+
751
+ # Insert phi moves (simplified - in production would need proper placement)
752
+ for pred_id, moves in phi_moves.items():
753
+ # Would insert moves at appropriate location in predecessor block
754
+ # For now, just append to instructions
755
+ instructions.extend(moves)
756
+
757
+ return instructions