zexus 1.6.8 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/README.md +12 -5
  2. package/package.json +1 -1
  3. package/src/__init__.py +7 -0
  4. package/src/zexus/__init__.py +1 -1
  5. package/src/zexus/__pycache__/__init__.cpython-312.pyc +0 -0
  6. package/src/zexus/__pycache__/capability_system.cpython-312.pyc +0 -0
  7. package/src/zexus/__pycache__/debug_sanitizer.cpython-312.pyc +0 -0
  8. package/src/zexus/__pycache__/environment.cpython-312.pyc +0 -0
  9. package/src/zexus/__pycache__/error_reporter.cpython-312.pyc +0 -0
  10. package/src/zexus/__pycache__/input_validation.cpython-312.pyc +0 -0
  11. package/src/zexus/__pycache__/lexer.cpython-312.pyc +0 -0
  12. package/src/zexus/__pycache__/module_cache.cpython-312.pyc +0 -0
  13. package/src/zexus/__pycache__/module_manager.cpython-312.pyc +0 -0
  14. package/src/zexus/__pycache__/object.cpython-312.pyc +0 -0
  15. package/src/zexus/__pycache__/security.cpython-312.pyc +0 -0
  16. package/src/zexus/__pycache__/security_enforcement.cpython-312.pyc +0 -0
  17. package/src/zexus/__pycache__/syntax_validator.cpython-312.pyc +0 -0
  18. package/src/zexus/__pycache__/zexus_ast.cpython-312.pyc +0 -0
  19. package/src/zexus/__pycache__/zexus_token.cpython-312.pyc +0 -0
  20. package/src/zexus/access_control_system/__pycache__/__init__.cpython-312.pyc +0 -0
  21. package/src/zexus/access_control_system/__pycache__/access_control.cpython-312.pyc +0 -0
  22. package/src/zexus/advanced_types.py +17 -2
  23. package/src/zexus/blockchain/__init__.py +411 -0
  24. package/src/zexus/blockchain/accelerator.py +1160 -0
  25. package/src/zexus/blockchain/chain.py +660 -0
  26. package/src/zexus/blockchain/consensus.py +821 -0
  27. package/src/zexus/blockchain/contract_vm.py +1019 -0
  28. package/src/zexus/blockchain/crypto.py +79 -14
  29. package/src/zexus/blockchain/events.py +526 -0
  30. package/src/zexus/blockchain/loadtest.py +721 -0
  31. package/src/zexus/blockchain/monitoring.py +350 -0
  32. package/src/zexus/blockchain/mpt.py +716 -0
  33. package/src/zexus/blockchain/multichain.py +951 -0
  34. package/src/zexus/blockchain/multiprocess_executor.py +338 -0
  35. package/src/zexus/blockchain/network.py +886 -0
  36. package/src/zexus/blockchain/node.py +666 -0
  37. package/src/zexus/blockchain/rpc.py +1203 -0
  38. package/src/zexus/blockchain/rust_bridge.py +421 -0
  39. package/src/zexus/blockchain/storage.py +423 -0
  40. package/src/zexus/blockchain/tokens.py +750 -0
  41. package/src/zexus/blockchain/upgradeable.py +1004 -0
  42. package/src/zexus/blockchain/verification.py +1602 -0
  43. package/src/zexus/blockchain/wallet.py +621 -0
  44. package/src/zexus/capability_system.py +184 -9
  45. package/src/zexus/cli/__pycache__/main.cpython-312.pyc +0 -0
  46. package/src/zexus/cli/main.py +383 -34
  47. package/src/zexus/cli/zpm.py +1 -1
  48. package/src/zexus/compiler/__pycache__/bytecode.cpython-312.pyc +0 -0
  49. package/src/zexus/compiler/__pycache__/lexer.cpython-312.pyc +0 -0
  50. package/src/zexus/compiler/__pycache__/parser.cpython-312.pyc +0 -0
  51. package/src/zexus/compiler/__pycache__/semantic.cpython-312.pyc +0 -0
  52. package/src/zexus/compiler/__pycache__/zexus_ast.cpython-312.pyc +0 -0
  53. package/src/zexus/compiler/bytecode.py +124 -7
  54. package/src/zexus/compiler/compat_runtime.py +6 -2
  55. package/src/zexus/compiler/lexer.py +16 -5
  56. package/src/zexus/compiler/parser.py +108 -7
  57. package/src/zexus/compiler/semantic.py +18 -19
  58. package/src/zexus/compiler/zexus_ast.py +26 -1
  59. package/src/zexus/concurrency_system.py +79 -0
  60. package/src/zexus/config.py +54 -0
  61. package/src/zexus/crypto_bridge.py +244 -8
  62. package/src/zexus/dap/__init__.py +10 -0
  63. package/src/zexus/dap/__main__.py +4 -0
  64. package/src/zexus/dap/dap_server.py +391 -0
  65. package/src/zexus/dap/debug_engine.py +298 -0
  66. package/src/zexus/environment.py +112 -9
  67. package/src/zexus/evaluator/__pycache__/bytecode_compiler.cpython-312.pyc +0 -0
  68. package/src/zexus/evaluator/__pycache__/core.cpython-312.pyc +0 -0
  69. package/src/zexus/evaluator/__pycache__/expressions.cpython-312.pyc +0 -0
  70. package/src/zexus/evaluator/__pycache__/functions.cpython-312.pyc +0 -0
  71. package/src/zexus/evaluator/__pycache__/resource_limiter.cpython-312.pyc +0 -0
  72. package/src/zexus/evaluator/__pycache__/statements.cpython-312.pyc +0 -0
  73. package/src/zexus/evaluator/__pycache__/unified_execution.cpython-312.pyc +0 -0
  74. package/src/zexus/evaluator/__pycache__/utils.cpython-312.pyc +0 -0
  75. package/src/zexus/evaluator/bytecode_compiler.py +457 -37
  76. package/src/zexus/evaluator/core.py +644 -50
  77. package/src/zexus/evaluator/expressions.py +358 -62
  78. package/src/zexus/evaluator/functions.py +458 -20
  79. package/src/zexus/evaluator/resource_limiter.py +4 -4
  80. package/src/zexus/evaluator/statements.py +774 -122
  81. package/src/zexus/evaluator/unified_execution.py +573 -72
  82. package/src/zexus/evaluator/utils.py +14 -2
  83. package/src/zexus/evaluator_original.py +1 -1
  84. package/src/zexus/event_loop.py +186 -0
  85. package/src/zexus/lexer.py +742 -458
  86. package/src/zexus/lsp/__init__.py +1 -1
  87. package/src/zexus/lsp/definition_provider.py +163 -9
  88. package/src/zexus/lsp/server.py +22 -8
  89. package/src/zexus/lsp/symbol_provider.py +182 -9
  90. package/src/zexus/module_cache.py +239 -9
  91. package/src/zexus/module_manager.py +129 -1
  92. package/src/zexus/object.py +76 -6
  93. package/src/zexus/parser/__pycache__/parser.cpython-312.pyc +0 -0
  94. package/src/zexus/parser/__pycache__/strategy_context.cpython-312.pyc +0 -0
  95. package/src/zexus/parser/__pycache__/strategy_structural.cpython-312.pyc +0 -0
  96. package/src/zexus/parser/parser.py +1349 -408
  97. package/src/zexus/parser/strategy_context.py +755 -58
  98. package/src/zexus/parser/strategy_structural.py +121 -21
  99. package/src/zexus/persistence.py +15 -1
  100. package/src/zexus/renderer/__init__.py +61 -0
  101. package/src/zexus/renderer/__pycache__/__init__.cpython-312.pyc +0 -0
  102. package/src/zexus/renderer/__pycache__/backend.cpython-312.pyc +0 -0
  103. package/src/zexus/renderer/__pycache__/canvas.cpython-312.pyc +0 -0
  104. package/src/zexus/renderer/__pycache__/color_system.cpython-312.pyc +0 -0
  105. package/src/zexus/renderer/__pycache__/layout.cpython-312.pyc +0 -0
  106. package/src/zexus/renderer/__pycache__/main_renderer.cpython-312.pyc +0 -0
  107. package/src/zexus/renderer/__pycache__/painter.cpython-312.pyc +0 -0
  108. package/src/zexus/renderer/backend.py +261 -0
  109. package/src/zexus/renderer/canvas.py +78 -0
  110. package/src/zexus/renderer/color_system.py +201 -0
  111. package/src/zexus/renderer/graphics.py +31 -0
  112. package/src/zexus/renderer/layout.py +222 -0
  113. package/src/zexus/renderer/main_renderer.py +66 -0
  114. package/src/zexus/renderer/painter.py +30 -0
  115. package/src/zexus/renderer/tk_backend.py +208 -0
  116. package/src/zexus/renderer/web_backend.py +260 -0
  117. package/src/zexus/runtime/__init__.py +10 -2
  118. package/src/zexus/runtime/__pycache__/__init__.cpython-312.pyc +0 -0
  119. package/src/zexus/runtime/__pycache__/async_runtime.cpython-312.pyc +0 -0
  120. package/src/zexus/runtime/__pycache__/load_manager.cpython-312.pyc +0 -0
  121. package/src/zexus/runtime/file_flags.py +137 -0
  122. package/src/zexus/runtime/load_manager.py +368 -0
  123. package/src/zexus/safety/__pycache__/__init__.cpython-312.pyc +0 -0
  124. package/src/zexus/safety/__pycache__/memory_safety.cpython-312.pyc +0 -0
  125. package/src/zexus/security.py +424 -34
  126. package/src/zexus/stdlib/fs.py +23 -18
  127. package/src/zexus/stdlib/http.py +289 -186
  128. package/src/zexus/stdlib/sockets.py +207 -163
  129. package/src/zexus/stdlib/websockets.py +282 -0
  130. package/src/zexus/stdlib_integration.py +369 -2
  131. package/src/zexus/strategy_recovery.py +6 -3
  132. package/src/zexus/type_checker.py +423 -0
  133. package/src/zexus/virtual_filesystem.py +189 -2
  134. package/src/zexus/vm/__init__.py +113 -3
  135. package/src/zexus/vm/__pycache__/async_optimizer.cpython-312.pyc +0 -0
  136. package/src/zexus/vm/__pycache__/bytecode.cpython-312.pyc +0 -0
  137. package/src/zexus/vm/__pycache__/bytecode_converter.cpython-312.pyc +0 -0
  138. package/src/zexus/vm/__pycache__/cache.cpython-312.pyc +0 -0
  139. package/src/zexus/vm/__pycache__/compiler.cpython-312.pyc +0 -0
  140. package/src/zexus/vm/__pycache__/gas_metering.cpython-312.pyc +0 -0
  141. package/src/zexus/vm/__pycache__/jit.cpython-312.pyc +0 -0
  142. package/src/zexus/vm/__pycache__/parallel_vm.cpython-312.pyc +0 -0
  143. package/src/zexus/vm/__pycache__/vm.cpython-312.pyc +0 -0
  144. package/src/zexus/vm/async_optimizer.py +80 -6
  145. package/src/zexus/vm/binary_bytecode.py +659 -0
  146. package/src/zexus/vm/bytecode.py +59 -11
  147. package/src/zexus/vm/bytecode_converter.py +26 -12
  148. package/src/zexus/vm/cabi.c +1985 -0
  149. package/src/zexus/vm/cabi.cpython-312-x86_64-linux-gnu.so +0 -0
  150. package/src/zexus/vm/cabi.h +127 -0
  151. package/src/zexus/vm/cache.py +561 -17
  152. package/src/zexus/vm/compiler.py +818 -51
  153. package/src/zexus/vm/fastops.c +15743 -0
  154. package/src/zexus/vm/fastops.cpython-312-x86_64-linux-gnu.so +0 -0
  155. package/src/zexus/vm/fastops.pyx +288 -0
  156. package/src/zexus/vm/gas_metering.py +50 -9
  157. package/src/zexus/vm/jit.py +364 -20
  158. package/src/zexus/vm/native_jit_backend.py +1816 -0
  159. package/src/zexus/vm/native_runtime.cpp +1388 -0
  160. package/src/zexus/vm/native_runtime.cpython-312-x86_64-linux-gnu.so +0 -0
  161. package/src/zexus/vm/optimizer.py +161 -11
  162. package/src/zexus/vm/parallel_vm.py +140 -45
  163. package/src/zexus/vm/peephole_optimizer.py +82 -4
  164. package/src/zexus/vm/profiler.py +38 -18
  165. package/src/zexus/vm/register_allocator.py +16 -5
  166. package/src/zexus/vm/register_vm.py +8 -5
  167. package/src/zexus/vm/vm.py +3581 -531
  168. package/src/zexus/vm/wasm_compiler.py +658 -0
  169. package/src/zexus/zexus_ast.py +137 -11
  170. package/src/zexus/zexus_token.py +16 -5
  171. package/src/zexus/zpm/installer.py +55 -15
  172. package/src/zexus/zpm/package_manager.py +1 -1
  173. package/src/zexus/zpm/registry.py +257 -28
  174. package/src/zexus.egg-info/PKG-INFO +16 -6
  175. package/src/zexus.egg-info/SOURCES.txt +129 -17
  176. package/src/zexus.egg-info/entry_points.txt +1 -0
  177. package/src/zexus.egg-info/requires.txt +4 -0
@@ -6,10 +6,13 @@ the same code multiple times. Features include:
6
6
  - LRU (Least Recently Used) eviction policy
7
7
  - Cache statistics tracking
8
8
  - AST-based cache keys
9
+ - File-based cache keys (path + mtime for cross-run persistence)
10
+ - Pattern recognition cache (reuse bytecode for similar AST shapes)
9
11
  - Optional persistent disk cache
10
12
  - Memory-efficient storage
11
13
 
12
14
  Part of Phase 4: Bytecode Caching Enhancement
15
+ Enhanced: File-based persistent caching for faster repeat runs
13
16
  """
14
17
 
15
18
  import hashlib
@@ -17,12 +20,35 @@ import json
17
20
  import pickle
18
21
  import time
19
22
  from collections import OrderedDict
20
- from dataclasses import dataclass, field
23
+ from dataclasses import dataclass
21
24
  from pathlib import Path
22
- from typing import Any, Dict, Optional, Tuple
25
+ from typing import Any, Dict, List, Optional
23
26
 
24
27
  from .bytecode import Bytecode
25
28
 
29
+ # Binary bytecode helpers (Phase 1)
30
+ try:
31
+ from .binary_bytecode import (
32
+ serialize as _zxc_serialize,
33
+ deserialize as _zxc_deserialize,
34
+ serialize_multi as _zxc_serialize_multi,
35
+ deserialize_multi as _zxc_deserialize_multi,
36
+ )
37
+ _ZXC_AVAILABLE = True
38
+ except Exception:
39
+ _ZXC_AVAILABLE = False
40
+
41
+ CACHE_VERSION = 2
42
+
43
+
44
+ @dataclass
45
+ class FileMetadata:
46
+ """Metadata for file-based cache entries"""
47
+ file_path: str
48
+ mtime: float
49
+ size: int
50
+ content_hash: str # Hash of file content for extra validation
51
+
26
52
 
27
53
  @dataclass
28
54
  class CacheStats:
@@ -33,6 +59,9 @@ class CacheStats:
33
59
  memory_bytes: int = 0
34
60
  total_entries: int = 0
35
61
  hit_rate: float = 0.0
62
+ file_hits: int = 0 # File-based cache hits
63
+ file_misses: int = 0 # File-based cache misses
64
+ pattern_hits: int = 0 # Pattern cache hits
36
65
 
37
66
  def update_hit_rate(self):
38
67
  """Update hit rate percentage"""
@@ -47,7 +76,10 @@ class CacheStats:
47
76
  'evictions': self.evictions,
48
77
  'memory_bytes': self.memory_bytes,
49
78
  'total_entries': self.total_entries,
50
- 'hit_rate': round(self.hit_rate, 2)
79
+ 'hit_rate': round(self.hit_rate, 2),
80
+ 'file_hits': self.file_hits,
81
+ 'file_misses': self.file_misses,
82
+ 'pattern_hits': self.pattern_hits
51
83
  }
52
84
 
53
85
 
@@ -117,6 +149,17 @@ class BytecodeCache:
117
149
  # LRU cache using OrderedDict (insertion order preserved)
118
150
  self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
119
151
 
152
+ # File-based cache: maps (file_path, mtime) -> list of bytecode entries
153
+ # This persists across interpreter runs when persistent=True
154
+ self._file_cache: Dict[str, Dict] = {} # file_path -> {mtime, content_hash, bytecodes: []}
155
+
156
+ # Pattern cache: maps AST structure hash -> bytecode
157
+ # Allows reusing bytecode for similar code patterns across files
158
+ self._pattern_cache: OrderedDict[str, CacheEntry] = OrderedDict()
159
+ self._max_patterns = 500 # Max pattern cache entries
160
+ self._pattern_memory_bytes = 0
161
+ self._max_pattern_memory_bytes = max(1, self.max_memory_bytes // 4)
162
+
120
163
  # Statistics
121
164
  self.stats = CacheStats()
122
165
 
@@ -125,6 +168,8 @@ class BytecodeCache:
125
168
  if persistent:
126
169
  self.cache_dir = Path(cache_dir) if cache_dir else Path.home() / '.zexus' / 'cache'
127
170
  self.cache_dir.mkdir(parents=True, exist_ok=True)
171
+ # Load file cache index from disk
172
+ self._load_file_cache_index()
128
173
  if self.debug:
129
174
  print(f"📦 Cache: Persistent cache enabled at {self.cache_dir}")
130
175
 
@@ -144,7 +189,10 @@ class BytecodeCache:
144
189
  try:
145
190
  # Convert AST to hashable representation
146
191
  ast_repr = self._ast_to_dict(ast_node)
147
- ast_json = json.dumps(ast_repr, sort_keys=True)
192
+ ast_json = json.dumps(
193
+ {"__cache_version__": CACHE_VERSION, "ast": ast_repr},
194
+ sort_keys=True,
195
+ )
148
196
  return hashlib.md5(ast_json.encode()).hexdigest()
149
197
  except Exception as e:
150
198
  # Fallback to string representation
@@ -398,6 +446,9 @@ class BytecodeCache:
398
446
  def clear(self):
399
447
  """Clear entire cache"""
400
448
  self._cache.clear()
449
+ self._file_cache.clear()
450
+ self._pattern_cache.clear()
451
+ self._pattern_memory_bytes = 0
401
452
  self.stats = CacheStats()
402
453
 
403
454
  if self.debug:
@@ -407,6 +458,12 @@ class BytecodeCache:
407
458
  if self.persistent and self.cache_dir:
408
459
  for cache_file in self.cache_dir.glob('*.cache'):
409
460
  cache_file.unlink()
461
+ for cache_file in self.cache_dir.glob('*.zxc'):
462
+ cache_file.unlink()
463
+ # Clear file cache index
464
+ index_file = self.cache_dir / 'file_index.cache'
465
+ if index_file.exists():
466
+ index_file.unlink()
410
467
 
411
468
  def get_stats(self) -> Dict[str, Any]:
412
469
  """
@@ -416,7 +473,15 @@ class BytecodeCache:
416
473
  Dictionary with cache statistics
417
474
  """
418
475
  self.stats.update_hit_rate()
419
- return self.stats.to_dict()
476
+ base = self.stats.to_dict()
477
+ base['pattern_memory_bytes'] = self._pattern_memory_bytes
478
+ return base
479
+
480
+ def _evict_pattern_lru(self) -> None:
481
+ if not self._pattern_cache:
482
+ return
483
+ _, entry = self._pattern_cache.popitem(last=False)
484
+ self._pattern_memory_bytes = max(0, self._pattern_memory_bytes - entry.size_bytes)
420
485
 
421
486
  def reset_stats(self):
422
487
  """Reset statistics (keeps cache entries)"""
@@ -428,14 +493,20 @@ class BytecodeCache:
428
493
  # ==================== Persistent Cache Methods ====================
429
494
 
430
495
  def _save_to_disk(self, key: str, bytecode: Bytecode):
431
- """Save bytecode to disk cache"""
496
+ """Save bytecode to disk cache (binary .zxc format when available)."""
432
497
  if not self.cache_dir:
433
498
  return
434
499
 
435
500
  try:
436
- cache_file = self.cache_dir / f"{key}.cache"
437
- with open(cache_file, 'wb') as f:
438
- pickle.dump(bytecode, f, protocol=pickle.HIGHEST_PROTOCOL)
501
+ if _ZXC_AVAILABLE:
502
+ cache_file = self.cache_dir / f"{key}.zxc"
503
+ data = _zxc_serialize(bytecode)
504
+ with open(cache_file, 'wb') as f:
505
+ f.write(data)
506
+ else:
507
+ cache_file = self.cache_dir / f"{key}.cache"
508
+ with open(cache_file, 'wb') as f:
509
+ pickle.dump(bytecode, f, protocol=pickle.HIGHEST_PROTOCOL)
439
510
 
440
511
  if self.debug:
441
512
  print(f"💾 Cache: Saved to disk {key[:8]}...")
@@ -444,19 +515,29 @@ class BytecodeCache:
444
515
  print(f"⚠️ Cache: Failed to save to disk: {e}")
445
516
 
446
517
  def _load_from_disk(self, key: str) -> Optional[Bytecode]:
447
- """Load bytecode from disk cache"""
518
+ """Load bytecode from disk cache (.zxc first, then legacy .cache)."""
448
519
  if not self.cache_dir:
449
520
  return None
450
521
 
451
522
  try:
523
+ # Try .zxc binary format first
524
+ if _ZXC_AVAILABLE:
525
+ zxc_file = self.cache_dir / f"{key}.zxc"
526
+ if zxc_file.exists():
527
+ with open(zxc_file, 'rb') as f:
528
+ data = f.read()
529
+ bytecode = _zxc_deserialize(data)
530
+ if self.debug:
531
+ print(f"💾 Cache: Loaded .zxc from disk {key[:8]}...")
532
+ return bytecode
533
+
534
+ # Fallback to legacy pickle format
452
535
  cache_file = self.cache_dir / f"{key}.cache"
453
536
  if cache_file.exists():
454
537
  with open(cache_file, 'rb') as f:
455
538
  bytecode = pickle.load(f)
456
-
457
539
  if self.debug:
458
- print(f"💾 Cache: Loaded from disk {key[:8]}...")
459
-
540
+ print(f"💾 Cache: Loaded .cache from disk {key[:8]}...")
460
541
  return bytecode
461
542
  except Exception as e:
462
543
  if self.debug:
@@ -465,18 +546,476 @@ class BytecodeCache:
465
546
  return None
466
547
 
467
548
  def _delete_from_disk(self, key: str):
468
- """Delete cache entry from disk"""
549
+ """Delete cache entry from disk (both .zxc and legacy .cache)."""
469
550
  if not self.cache_dir:
470
551
  return
471
552
 
472
553
  try:
473
- cache_file = self.cache_dir / f"{key}.cache"
474
- if cache_file.exists():
475
- cache_file.unlink()
554
+ for suffix in ('.zxc', '.cache'):
555
+ cache_file = self.cache_dir / f"{key}{suffix}"
556
+ if cache_file.exists():
557
+ cache_file.unlink()
476
558
  except Exception as e:
477
559
  if self.debug:
478
560
  print(f"⚠️ Cache: Failed to delete from disk: {e}")
479
561
 
562
+ # ==================== File-Based Cache Methods ====================
563
+ # These methods enable faster repeat runs by caching based on file path + mtime
564
+
565
+ def _get_file_metadata(self, file_path: str) -> Optional[FileMetadata]:
566
+ """Get metadata for a source file"""
567
+ try:
568
+ path = Path(file_path)
569
+ if not path.exists():
570
+ return None
571
+
572
+ stat = path.stat()
573
+ # Read file content for hash
574
+ content = path.read_text(encoding='utf-8')
575
+ content_hash = hashlib.md5(content.encode()).hexdigest()
576
+
577
+ return FileMetadata(
578
+ file_path=str(path.resolve()),
579
+ mtime=stat.st_mtime,
580
+ size=stat.st_size,
581
+ content_hash=content_hash
582
+ )
583
+ except Exception as e:
584
+ if self.debug:
585
+ print(f"⚠️ Cache: Failed to get file metadata: {e}")
586
+ return None
587
+
588
+ def _file_cache_key(self, file_path: str) -> str:
589
+ """Generate cache key from file path"""
590
+ # Normalize path and create stable key
591
+ normalized = str(Path(file_path).resolve())
592
+ return hashlib.md5(normalized.encode()).hexdigest()
593
+
594
+ def get_by_file(self, file_path: str) -> Optional[List[Bytecode]]:
595
+ """
596
+ Get all cached bytecode for a source file
597
+
598
+ This enables faster repeat runs - if the file hasn't changed,
599
+ we can reuse all previously compiled bytecode.
600
+
601
+ Args:
602
+ file_path: Path to the source file
603
+
604
+ Returns:
605
+ List of cached bytecode objects, or None if cache invalid/missing
606
+ """
607
+ metadata = self._get_file_metadata(file_path)
608
+ if not metadata:
609
+ self.stats.file_misses += 1
610
+ return None
611
+
612
+ file_key = self._file_cache_key(file_path)
613
+
614
+ # Check memory cache first
615
+ if file_key in self._file_cache:
616
+ cached = self._file_cache[file_key]
617
+ # Validate: file hasn't been modified
618
+ if (cached.get('mtime') == metadata.mtime and
619
+ cached.get('content_hash') == metadata.content_hash and
620
+ cached.get('version') == CACHE_VERSION):
621
+ self.stats.file_hits += 1
622
+ if self.debug:
623
+ print(f"✅ FileCache: HIT {file_path} ({len(cached.get('bytecodes', []))} entries)")
624
+ return cached.get('bytecodes', [])
625
+ else:
626
+ # File changed, invalidate
627
+ if self.debug:
628
+ print(f"🔄 FileCache: STALE {file_path} (file modified)")
629
+ del self._file_cache[file_key]
630
+
631
+ # Check disk cache if persistent
632
+ if self.persistent:
633
+ loaded = self._load_file_bytecode(file_key, metadata)
634
+ if loaded:
635
+ self.stats.file_hits += 1
636
+ return loaded
637
+
638
+ self.stats.file_misses += 1
639
+ if self.debug:
640
+ print(f"❌ FileCache: MISS {file_path}")
641
+ return None
642
+
643
+ def put_by_file(self, file_path: str, bytecodes: List[Bytecode]):
644
+ """
645
+ Store all bytecode for a source file
646
+
647
+ Args:
648
+ file_path: Path to the source file
649
+ bytecodes: List of compiled bytecode objects
650
+ """
651
+ metadata = self._get_file_metadata(file_path)
652
+ if not metadata:
653
+ return
654
+
655
+ file_key = self._file_cache_key(file_path)
656
+
657
+ # Store in memory cache
658
+ self._file_cache[file_key] = {
659
+ 'file_path': metadata.file_path,
660
+ 'mtime': metadata.mtime,
661
+ 'size': metadata.size,
662
+ 'content_hash': metadata.content_hash,
663
+ 'version': CACHE_VERSION,
664
+ 'bytecodes': bytecodes,
665
+ 'cached_at': time.time()
666
+ }
667
+
668
+ if self.debug:
669
+ print(f"💾 FileCache: PUT {file_path} ({len(bytecodes)} entries)")
670
+
671
+ # Save to disk if persistent
672
+ if self.persistent:
673
+ self._save_file_bytecode(file_key, metadata, bytecodes)
674
+ self._save_file_cache_index()
675
+
676
+ def invalidate_file(self, file_path: str):
677
+ """Invalidate cache for a specific file"""
678
+ file_key = self._file_cache_key(file_path)
679
+
680
+ if file_key in self._file_cache:
681
+ del self._file_cache[file_key]
682
+ if self.debug:
683
+ print(f"🗑️ FileCache: Invalidated {file_path}")
684
+
685
+ # Remove from disk
686
+ if self.persistent:
687
+ self._delete_file_bytecode(file_key)
688
+ self._save_file_cache_index()
689
+
690
+ def _save_file_bytecode(self, file_key: str, metadata: FileMetadata, bytecodes: List[Bytecode]):
691
+ """Save file bytecode to disk (binary .zxc multi-container when available)."""
692
+ if not self.cache_dir:
693
+ return
694
+
695
+ try:
696
+ meta_dict = {
697
+ 'file_path': metadata.file_path,
698
+ 'mtime': metadata.mtime,
699
+ 'size': metadata.size,
700
+ 'content_hash': metadata.content_hash,
701
+ 'version': CACHE_VERSION,
702
+ 'cached_at': time.time(),
703
+ }
704
+ if _ZXC_AVAILABLE:
705
+ # Binary envelope: JSON metadata + ZXCM multi-container
706
+ meta_bytes = json.dumps(meta_dict).encode('utf-8')
707
+ bc_bytes = _zxc_serialize_multi(bytecodes)
708
+ cache_file = self.cache_dir / f"file_{file_key}.zxc"
709
+ with open(cache_file, 'wb') as f:
710
+ # 4-byte meta length prefix + meta + bc_bytes
711
+ import struct as _st
712
+ f.write(_st.pack('<I', len(meta_bytes)))
713
+ f.write(meta_bytes)
714
+ f.write(bc_bytes)
715
+ else:
716
+ cache_file = self.cache_dir / f"file_{file_key}.cache"
717
+ data = {'metadata': meta_dict, 'bytecodes': bytecodes, 'cached_at': time.time()}
718
+ with open(cache_file, 'wb') as f:
719
+ pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
720
+
721
+ if self.debug:
722
+ print(f"💾 FileCache: Saved to disk {file_key[:8]}...")
723
+ except Exception as e:
724
+ if self.debug:
725
+ print(f"⚠️ FileCache: Failed to save: {e}")
726
+
727
+ def _load_file_bytecode(self, file_key: str, current_metadata: FileMetadata) -> Optional[List[Bytecode]]:
728
+ """Load file bytecode from disk and validate (.zxc first, then legacy)."""
729
+ if not self.cache_dir:
730
+ return None
731
+
732
+ cached_meta = None
733
+ bytecodes = None
734
+ cached_at = time.time()
735
+ cache_file = None
736
+
737
+ try:
738
+ # Try .zxc binary format first
739
+ if _ZXC_AVAILABLE:
740
+ zxc_file = self.cache_dir / f"file_{file_key}.zxc"
741
+ if zxc_file.exists():
742
+ import struct as _st
743
+ with open(zxc_file, 'rb') as f:
744
+ raw = f.read()
745
+ meta_len = _st.unpack_from('<I', raw, 0)[0]
746
+ meta_json = raw[4:4 + meta_len]
747
+ cached_meta = json.loads(meta_json)
748
+ cached_at = cached_meta.get('cached_at', time.time())
749
+ bytecodes = _zxc_deserialize_multi(raw[4 + meta_len:])
750
+ cache_file = zxc_file
751
+
752
+ # Fallback to legacy pickle
753
+ if bytecodes is None:
754
+ pkl_file = self.cache_dir / f"file_{file_key}.cache"
755
+ if pkl_file.exists():
756
+ with open(pkl_file, 'rb') as f:
757
+ data = pickle.load(f)
758
+ cached_meta = data.get('metadata', {})
759
+ bytecodes = data.get('bytecodes', [])
760
+ cached_at = data.get('cached_at', time.time())
761
+ cache_file = pkl_file
762
+
763
+ if cached_meta is None or bytecodes is None:
764
+ return None
765
+
766
+ # Validate metadata matches
767
+ if (cached_meta.get('mtime') == current_metadata.mtime and
768
+ cached_meta.get('content_hash') == current_metadata.content_hash and
769
+ cached_meta.get('version') == CACHE_VERSION):
770
+
771
+ self._file_cache[file_key] = {
772
+ 'file_path': current_metadata.file_path,
773
+ 'mtime': current_metadata.mtime,
774
+ 'size': current_metadata.size,
775
+ 'content_hash': current_metadata.content_hash,
776
+ 'bytecodes': bytecodes,
777
+ 'cached_at': cached_at,
778
+ }
779
+ if self.debug:
780
+ print(f"💾 FileCache: Loaded from disk {file_key[:8]}... ({len(bytecodes)} entries)")
781
+ return bytecodes
782
+ else:
783
+ if cache_file:
784
+ cache_file.unlink()
785
+ if self.debug:
786
+ print(f"🔄 FileCache: Removed stale disk cache {file_key[:8]}...")
787
+ except Exception as e:
788
+ if self.debug:
789
+ print(f"⚠️ FileCache: Failed to load: {e}")
790
+
791
+ return None
792
+
793
+ def _delete_file_bytecode(self, file_key: str):
794
+ """Delete file bytecode from disk (both .zxc and legacy .cache)."""
795
+ if not self.cache_dir:
796
+ return
797
+
798
+ try:
799
+ for suffix in ('.zxc', '.cache'):
800
+ cache_file = self.cache_dir / f"file_{file_key}{suffix}"
801
+ if cache_file.exists():
802
+ cache_file.unlink()
803
+ except Exception as e:
804
+ if self.debug:
805
+ print(f"⚠️ FileCache: Failed to delete: {e}")
806
+
807
+ def _load_file_cache_index(self):
808
+ """Load file cache index from disk on startup"""
809
+ if not self.cache_dir:
810
+ return
811
+
812
+ try:
813
+ index_file = self.cache_dir / 'file_index.cache'
814
+ if index_file.exists():
815
+ with open(index_file, 'rb') as f:
816
+ index = pickle.load(f)
817
+ if self.debug:
818
+ print(f"📂 FileCache: Loaded index with {len(index)} files")
819
+ except Exception as e:
820
+ if self.debug:
821
+ print(f"⚠️ FileCache: Failed to load index: {e}")
822
+
823
+ def _save_file_cache_index(self):
824
+ """Save file cache index to disk"""
825
+ if not self.cache_dir:
826
+ return
827
+
828
+ try:
829
+ index_file = self.cache_dir / 'file_index.cache'
830
+ # Just save file keys and metadata (not bytecode)
831
+ index = {
832
+ key: {
833
+ 'file_path': data.get('file_path'),
834
+ 'mtime': data.get('mtime'),
835
+ 'content_hash': data.get('content_hash'),
836
+ 'cached_at': data.get('cached_at')
837
+ }
838
+ for key, data in self._file_cache.items()
839
+ }
840
+ with open(index_file, 'wb') as f:
841
+ pickle.dump(index, f, protocol=pickle.HIGHEST_PROTOCOL)
842
+ except Exception as e:
843
+ if self.debug:
844
+ print(f"⚠️ FileCache: Failed to save index: {e}")
845
+
846
+ # ==================== Pattern Cache Methods ====================
847
+ # These methods enable reusing bytecode for similar code patterns
848
+
849
+ def _get_pattern_hash(self, ast_node: Any) -> str:
850
+ """
851
+ Generate a structural hash for AST pattern matching
852
+
853
+ Unlike _hash_ast which includes literal values, this creates
854
+ a hash of just the AST structure (node types, not values).
855
+ This allows matching similar patterns like:
856
+ for i in range(100) <-> for j in range(500)
857
+ """
858
+ try:
859
+ pattern = self._ast_to_pattern(ast_node)
860
+ return hashlib.md5(json.dumps(pattern, sort_keys=True).encode()).hexdigest()
861
+ except Exception:
862
+ return ""
863
+
864
+ def _ast_to_pattern(self, node: Any, depth: int = 0, max_depth: int = 30) -> Any:
865
+ """
866
+ Convert AST to structural pattern (ignoring literal values)
867
+
868
+ This extracts just the shape of the code, not specific values.
869
+ """
870
+ if depth > max_depth:
871
+ return {'_type': 'MAX_DEPTH'}
872
+
873
+ if node is None:
874
+ return None
875
+
876
+ # For primitives, just record the type not the value
877
+ if isinstance(node, (int, float)):
878
+ return {'_type': 'number'}
879
+ if isinstance(node, str):
880
+ return {'_type': 'string'}
881
+ if isinstance(node, bool):
882
+ return {'_type': 'bool'}
883
+
884
+ if isinstance(node, (list, tuple)):
885
+ return [self._ast_to_pattern(item, depth + 1, max_depth) for item in node]
886
+
887
+ if isinstance(node, dict):
888
+ return {k: self._ast_to_pattern(v, depth + 1, max_depth) for k, v in node.items()}
889
+
890
+ if hasattr(node, '__dict__'):
891
+ result = {'_type': type(node).__name__}
892
+ for key, value in node.__dict__.items():
893
+ if not key.startswith('_'):
894
+ # Skip 'name' and 'value' fields as they vary
895
+ if key in ('name', 'value', 'literal', 'identifier'):
896
+ result[key] = {'_type': type(value).__name__ if value else 'none'}
897
+ else:
898
+ result[key] = self._ast_to_pattern(value, depth + 1, max_depth)
899
+ return result
900
+
901
+ return {'_type': type(node).__name__}
902
+
903
+ def get_by_pattern(self, ast_node: Any) -> Optional[Bytecode]:
904
+ """
905
+ Get cached bytecode matching AST pattern
906
+
907
+ This enables reusing compiled bytecode for similar code shapes.
908
+ E.g., two for-loops with different bounds can share bytecode.
909
+
910
+ Args:
911
+ ast_node: AST node to match
912
+
913
+ Returns:
914
+ Matching bytecode or None
915
+ """
916
+ pattern_hash = self._get_pattern_hash(ast_node)
917
+ if not pattern_hash:
918
+ return None
919
+
920
+ if pattern_hash in self._pattern_cache:
921
+ entry = self._pattern_cache[pattern_hash]
922
+ entry.update_access()
923
+ self._pattern_cache.move_to_end(pattern_hash)
924
+ self.stats.pattern_hits += 1
925
+
926
+ if self.debug:
927
+ print(f"✅ PatternCache: HIT {pattern_hash[:8]}...")
928
+
929
+ return entry.bytecode
930
+
931
+ return None
932
+
933
+ def put_by_pattern(self, ast_node: Any, bytecode: Bytecode):
934
+ """
935
+ Store bytecode by pattern for future reuse
936
+
937
+ Args:
938
+ ast_node: AST node (pattern source)
939
+ bytecode: Compiled bytecode
940
+ """
941
+ pattern_hash = self._get_pattern_hash(ast_node)
942
+ if not pattern_hash:
943
+ return
944
+
945
+ # Evict if at capacity or over memory budget
946
+ while len(self._pattern_cache) >= self._max_patterns:
947
+ self._evict_pattern_lru()
948
+
949
+ size = self._estimate_size(bytecode)
950
+ while self._pattern_cache and (self._pattern_memory_bytes + size) > self._max_pattern_memory_bytes:
951
+ self._evict_pattern_lru()
952
+ entry = CacheEntry(
953
+ bytecode=bytecode,
954
+ timestamp=time.time(),
955
+ access_count=1,
956
+ size_bytes=size
957
+ )
958
+
959
+ self._pattern_cache[pattern_hash] = entry
960
+ self._pattern_memory_bytes += size
961
+
962
+ if self.debug:
963
+ print(f"💾 PatternCache: PUT {pattern_hash[:8]}...")
964
+
965
+ def is_file_cached(self, file_path: str) -> bool:
966
+ """
967
+ Check if a file has valid cached bytecode
968
+
969
+ Args:
970
+ file_path: Path to source file
971
+
972
+ Returns:
973
+ True if valid cache exists
974
+ """
975
+ metadata = self._get_file_metadata(file_path)
976
+ if not metadata:
977
+ return False
978
+
979
+ file_key = self._file_cache_key(file_path)
980
+
981
+ # Check memory cache
982
+ if file_key in self._file_cache:
983
+ cached = self._file_cache[file_key]
984
+ if (cached.get('mtime') == metadata.mtime and
985
+ cached.get('content_hash') == metadata.content_hash):
986
+ return True
987
+
988
+ # Check disk cache
989
+ if self.persistent and self.cache_dir:
990
+ cache_file = self.cache_dir / f"file_{file_key}.cache"
991
+ if cache_file.exists():
992
+ try:
993
+ with open(cache_file, 'rb') as f:
994
+ data = pickle.load(f)
995
+ cached_meta = data.get('metadata', {})
996
+ return (cached_meta.get('mtime') == metadata.mtime and
997
+ cached_meta.get('content_hash') == metadata.content_hash)
998
+ except Exception:
999
+ pass
1000
+
1001
+ return False
1002
+
1003
+ def get_file_cache_info(self, file_path: str) -> Optional[Dict[str, Any]]:
1004
+ """Get information about cached file"""
1005
+ file_key = self._file_cache_key(file_path)
1006
+
1007
+ if file_key in self._file_cache:
1008
+ cached = self._file_cache[file_key]
1009
+ return {
1010
+ 'file_path': cached.get('file_path'),
1011
+ 'mtime': cached.get('mtime'),
1012
+ 'content_hash': cached.get('content_hash'),
1013
+ 'bytecode_count': len(cached.get('bytecodes', [])),
1014
+ 'cached_at': cached.get('cached_at')
1015
+ }
1016
+
1017
+ return None
1018
+
480
1019
  # ==================== Utility Methods ====================
481
1020
 
482
1021
  def size(self) -> int:
@@ -528,5 +1067,10 @@ class BytecodeCache:
528
1067
  def __repr__(self) -> str:
529
1068
  """String representation"""
530
1069
  return (f"BytecodeCache(size={len(self._cache)}/{self.max_size}, "
1070
+ f"files={len(self._file_cache)}, patterns={len(self._pattern_cache)}, "
531
1071
  f"memory={self.memory_usage_mb():.2f}MB, "
532
1072
  f"hit_rate={self.stats.hit_rate:.1f}%)")
1073
+
1074
+ def __bool__(self) -> bool:
1075
+ """Ensure the cache instance is truthy even when empty."""
1076
+ return True