ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,40 @@
1
+ """Caching framework for ML4T Diagnostic computations.
2
+
3
+ Provides intelligent caching for expensive statistical computations
4
+ with support for both memory and disk caching.
5
+
6
+ Examples:
7
+ >>> from ml4t.diagnostic.caching import Cache, CacheConfig
8
+ >>>
9
+ >>> # Create cache
10
+ >>> cache = Cache(CacheConfig(enabled=True, ttl_seconds=3600))
11
+ >>>
12
+ >>> # Use cache decorator
13
+ >>> @cache.cached()
14
+ >>> def expensive_computation(data, config):
15
+ ... return compute_stats(data)
16
+ >>>
17
+ >>> # Or use cache directly
18
+ >>> key = cache.generate_key(data, config)
19
+ >>> result = cache.get(key)
20
+ >>> if result is None:
21
+ ... result = expensive_computation(data, config)
22
+ ... cache.set(key, result)
23
+ """
24
+
25
+ from ml4t.diagnostic.caching.cache import Cache, CacheBackend, CacheConfig, CacheKey
26
+ from ml4t.diagnostic.caching.decorators import cache_key, cached
27
+ from ml4t.diagnostic.caching.smart_cache import SmartCache
28
+
29
+ __all__ = [
30
+ # Core cache
31
+ "Cache",
32
+ "CacheConfig",
33
+ "CacheKey",
34
+ "CacheBackend",
35
+ # Smart cache for multi-signal analysis
36
+ "SmartCache",
37
+ # Decorators
38
+ "cached",
39
+ "cache_key",
40
+ ]
@@ -0,0 +1,331 @@
1
+ """Core caching implementation with pluggable backends."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ import pickle
8
+ from collections import OrderedDict
9
+ from datetime import UTC, datetime
10
+ from enum import Enum
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ from pydantic import BaseModel, Field
15
+
16
+
17
+ class CacheBackend(str, Enum):
18
+ """Cache storage backend options."""
19
+
20
+ MEMORY = "memory"
21
+ DISK = "disk"
22
+ DISABLED = "disabled"
23
+
24
+
25
+ class CacheConfig(BaseModel):
26
+ """Configuration for cache behavior.
27
+
28
+ Attributes:
29
+ enabled: Whether caching is enabled
30
+ backend: Storage backend to use
31
+ ttl_seconds: Time-to-live for cache entries (None = no expiration)
32
+ max_memory_items: Max items in memory cache (LRU eviction)
33
+ disk_path: Path for disk cache storage
34
+ compression: Whether to compress disk cache entries
35
+ """
36
+
37
+ enabled: bool = True
38
+ backend: CacheBackend = CacheBackend.MEMORY
39
+ ttl_seconds: int | None = Field(default=3600, description="Cache TTL in seconds")
40
+ max_memory_items: int = Field(default=100, description="Max memory cache size")
41
+ disk_path: Path = Field(
42
+ default_factory=lambda: Path(".qeval_cache"),
43
+ description="Disk cache directory",
44
+ )
45
+ compression: bool = Field(default=False, description="Compress disk cache")
46
+
47
+
48
+ class CacheKey:
49
+ """Cache key with content-based hashing.
50
+
51
+ Generates stable cache keys from arbitrary data and configuration.
52
+
53
+ Examples:
54
+ >>> key = CacheKey.generate(data=df, config={"alpha": 0.05})
55
+ >>> key_str = str(key) # "sha256:abc123..."
56
+ """
57
+
58
+ def __init__(self, hash_value: str, algorithm: str = "sha256"):
59
+ """Initialize cache key.
60
+
61
+ Args:
62
+ hash_value: Hash digest as hex string
63
+ algorithm: Hash algorithm used
64
+ """
65
+ self.hash_value = hash_value
66
+ self.algorithm = algorithm
67
+
68
+ @classmethod
69
+ def generate(cls, **kwargs: Any) -> CacheKey:
70
+ """Generate cache key from arbitrary keyword arguments.
71
+
72
+ Args:
73
+ **kwargs: Data to hash (must be JSON-serializable or have __hash__)
74
+
75
+ Returns:
76
+ CacheKey instance
77
+
78
+ Examples:
79
+ >>> key = CacheKey.generate(data=data_hash, config={"alpha": 0.05})
80
+ """
81
+ # Convert to stable JSON representation
82
+ stable_repr = cls._to_stable_repr(kwargs)
83
+
84
+ # Hash it
85
+ hasher = hashlib.sha256()
86
+ hasher.update(stable_repr.encode("utf-8"))
87
+
88
+ return cls(hash_value=hasher.hexdigest(), algorithm="sha256")
89
+
90
+ @staticmethod
91
+ def _to_stable_repr(obj: Any) -> str:
92
+ """Convert object to stable string representation.
93
+
94
+ Args:
95
+ obj: Object to convert
96
+
97
+ Returns:
98
+ Stable string representation
99
+ """
100
+ if isinstance(obj, dict):
101
+ # Sort keys for stability
102
+ items = sorted(obj.items())
103
+ return json.dumps(items, sort_keys=True, default=str)
104
+ elif isinstance(obj, list | tuple):
105
+ return json.dumps(obj, default=str)
106
+ else:
107
+ return json.dumps(obj, default=str)
108
+
109
+ def __str__(self) -> str:
110
+ """String representation."""
111
+ return f"{self.algorithm}:{self.hash_value}"
112
+
113
+ def __repr__(self) -> str:
114
+ """Developer representation."""
115
+ return f"CacheKey({self.algorithm}:{self.hash_value[:12]}...)"
116
+
117
+ def __eq__(self, other: object) -> bool:
118
+ """Equality comparison."""
119
+ if not isinstance(other, CacheKey):
120
+ return False
121
+ return self.hash_value == other.hash_value
122
+
123
+ def __hash__(self) -> int:
124
+ """Hash for use as dict key."""
125
+ return hash(self.hash_value)
126
+
127
+
128
+ class CacheEntry:
129
+ """Cache entry with metadata."""
130
+
131
+ def __init__(self, value: Any, created_at: datetime, ttl_seconds: int | None = None):
132
+ """Initialize cache entry.
133
+
134
+ Args:
135
+ value: Cached value
136
+ created_at: Creation timestamp
137
+ ttl_seconds: Time-to-live in seconds (None = no expiration)
138
+ """
139
+ self.value = value
140
+ self.created_at = created_at
141
+ self.ttl_seconds = ttl_seconds
142
+
143
+ def is_expired(self) -> bool:
144
+ """Check if entry has expired.
145
+
146
+ Returns:
147
+ True if expired, False otherwise
148
+ """
149
+ if self.ttl_seconds is None:
150
+ return False
151
+
152
+ now = datetime.now(UTC)
153
+ age = (now - self.created_at).total_seconds()
154
+ return age > self.ttl_seconds
155
+
156
+
157
+ class Cache:
158
+ """Multi-backend cache for expensive computations.
159
+
160
+ Supports memory and disk backends with automatic expiration and LRU eviction.
161
+
162
+ Examples:
163
+ >>> cache = Cache(CacheConfig(enabled=True, backend=CacheBackend.MEMORY))
164
+ >>>
165
+ >>> # Generate key
166
+ >>> key = cache.generate_key(data=data_hash, config=config)
167
+ >>>
168
+ >>> # Get/set
169
+ >>> result = cache.get(key)
170
+ >>> if result is None:
171
+ ... result = expensive_computation()
172
+ ... cache.set(key, result)
173
+ """
174
+
175
+ def __init__(self, config: CacheConfig):
176
+ """Initialize cache.
177
+
178
+ Args:
179
+ config: Cache configuration
180
+ """
181
+ self.config = config
182
+ self._memory_cache: OrderedDict[CacheKey, CacheEntry] = OrderedDict()
183
+
184
+ # Create disk cache directory if needed
185
+ if config.backend == CacheBackend.DISK and config.enabled:
186
+ config.disk_path.mkdir(parents=True, exist_ok=True)
187
+
188
+ def generate_key(self, **kwargs: Any) -> CacheKey:
189
+ """Generate cache key from data and configuration.
190
+
191
+ Args:
192
+ **kwargs: Data to hash
193
+
194
+ Returns:
195
+ Cache key
196
+
197
+ Examples:
198
+ >>> key = cache.generate_key(data=data_hash, config={"alpha": 0.05})
199
+ """
200
+ return CacheKey.generate(**kwargs)
201
+
202
+ def get(self, key: CacheKey) -> Any | None:
203
+ """Get value from cache.
204
+
205
+ Args:
206
+ key: Cache key
207
+
208
+ Returns:
209
+ Cached value or None if not found/expired
210
+ """
211
+ if not self.config.enabled:
212
+ return None
213
+
214
+ if self.config.backend == CacheBackend.MEMORY:
215
+ return self._get_memory(key)
216
+ elif self.config.backend == CacheBackend.DISK:
217
+ return self._get_disk(key)
218
+ else:
219
+ return None
220
+
221
+ def set(self, key: CacheKey, value: Any) -> None:
222
+ """Store value in cache.
223
+
224
+ Args:
225
+ key: Cache key
226
+ value: Value to cache
227
+ """
228
+ if not self.config.enabled:
229
+ return
230
+
231
+ if self.config.backend == CacheBackend.MEMORY:
232
+ self._set_memory(key, value)
233
+ elif self.config.backend == CacheBackend.DISK:
234
+ self._set_disk(key, value)
235
+
236
+ def invalidate(self, key: CacheKey) -> None:
237
+ """Invalidate specific cache entry.
238
+
239
+ Args:
240
+ key: Cache key to invalidate
241
+ """
242
+ if self.config.backend == CacheBackend.MEMORY:
243
+ self._memory_cache.pop(key, None)
244
+ elif self.config.backend == CacheBackend.DISK:
245
+ cache_file = self._get_disk_path(key)
246
+ if cache_file.exists():
247
+ cache_file.unlink()
248
+
249
+ def clear(self) -> None:
250
+ """Clear all cache entries."""
251
+ if self.config.backend == CacheBackend.MEMORY:
252
+ self._memory_cache.clear()
253
+ elif self.config.backend == CacheBackend.DISK and self.config.disk_path.exists():
254
+ for cache_file in self.config.disk_path.glob("*.pkl"):
255
+ cache_file.unlink()
256
+
257
+ def _get_memory(self, key: CacheKey) -> Any | None:
258
+ """Get from memory cache with LRU update."""
259
+ entry = self._memory_cache.get(key)
260
+
261
+ if entry is None:
262
+ return None
263
+
264
+ # Check expiration
265
+ if entry.is_expired():
266
+ self._memory_cache.pop(key)
267
+ return None
268
+
269
+ # Move to end (LRU)
270
+ self._memory_cache.move_to_end(key)
271
+
272
+ return entry.value
273
+
274
+ def _set_memory(self, key: CacheKey, value: Any) -> None:
275
+ """Set in memory cache with LRU eviction."""
276
+ # Check size limit
277
+ while len(self._memory_cache) >= self.config.max_memory_items:
278
+ # Remove oldest (first) item
279
+ self._memory_cache.popitem(last=False)
280
+
281
+ # Add new entry
282
+ entry = CacheEntry(
283
+ value=value,
284
+ created_at=datetime.now(UTC),
285
+ ttl_seconds=self.config.ttl_seconds,
286
+ )
287
+ self._memory_cache[key] = entry
288
+
289
+ def _get_disk(self, key: CacheKey) -> Any | None:
290
+ """Get from disk cache."""
291
+ cache_file = self._get_disk_path(key)
292
+
293
+ if not cache_file.exists():
294
+ return None
295
+
296
+ try:
297
+ with open(cache_file, "rb") as f:
298
+ entry = pickle.load(f)
299
+
300
+ # Check expiration
301
+ if entry.is_expired():
302
+ cache_file.unlink()
303
+ return None
304
+
305
+ return entry.value
306
+ except Exception:
307
+ # Corrupted cache file - remove it
308
+ if cache_file.exists():
309
+ cache_file.unlink()
310
+ return None
311
+
312
+ def _set_disk(self, key: CacheKey, value: Any) -> None:
313
+ """Set in disk cache."""
314
+ cache_file = self._get_disk_path(key)
315
+
316
+ entry = CacheEntry(
317
+ value=value,
318
+ created_at=datetime.now(UTC),
319
+ ttl_seconds=self.config.ttl_seconds,
320
+ )
321
+
322
+ try:
323
+ with open(cache_file, "wb") as f:
324
+ pickle.dump(entry, f)
325
+ except Exception:
326
+ # Failed to cache - not critical
327
+ pass
328
+
329
+ def _get_disk_path(self, key: CacheKey) -> Path:
330
+ """Get disk path for cache key."""
331
+ return self.config.disk_path / f"{key.hash_value}.pkl"
@@ -0,0 +1,131 @@
1
+ """Decorators for automatic caching of function results."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import functools
6
+ import hashlib
7
+ from collections.abc import Callable
8
+ from typing import Any, TypeVar, cast
9
+
10
+ from ml4t.diagnostic.caching.cache import Cache, CacheConfig, CacheKey
11
+
12
+ # Type variable for generic function
13
+ F = TypeVar("F", bound=Callable[..., Any])
14
+
15
+
16
+ def cache_key(*args: Any, **kwargs: Any) -> str:
17
+ """Generate cache key from function arguments.
18
+
19
+ Args:
20
+ *args: Positional arguments
21
+ **kwargs: Keyword arguments
22
+
23
+ Returns:
24
+ Cache key string
25
+ """
26
+ # Create stable representation
27
+ key_parts = []
28
+
29
+ # Add args
30
+ for arg in args:
31
+ if hasattr(arg, "__hash__"):
32
+ try:
33
+ key_parts.append(str(hash(arg)))
34
+ except TypeError:
35
+ # Not hashable - use repr
36
+ key_parts.append(repr(arg))
37
+ else:
38
+ key_parts.append(repr(arg))
39
+
40
+ # Add kwargs
41
+ for k, v in sorted(kwargs.items()):
42
+ if hasattr(v, "__hash__"):
43
+ try:
44
+ key_parts.append(f"{k}={hash(v)}")
45
+ except TypeError:
46
+ key_parts.append(f"{k}={repr(v)}")
47
+ else:
48
+ key_parts.append(f"{k}={repr(v)}")
49
+
50
+ # Hash combined key
51
+ combined = "|".join(key_parts)
52
+ hasher = hashlib.sha256()
53
+ hasher.update(combined.encode("utf-8"))
54
+
55
+ return hasher.hexdigest()
56
+
57
+
58
+ def cached(
59
+ cache: Cache | None = None,
60
+ config: CacheConfig | None = None,
61
+ key_func: Callable[..., str] | None = None,
62
+ ) -> Callable[[F], F]:
63
+ """Decorator for automatic function result caching.
64
+
65
+ Args:
66
+ cache: Cache instance to use (creates default if None)
67
+ config: Cache configuration (used if cache is None)
68
+ key_func: Custom key generation function
69
+
70
+ Returns:
71
+ Decorated function
72
+
73
+ Examples:
74
+ >>> # Use default cache
75
+ >>> @cached()
76
+ >>> def compute_stats(data, alpha=0.05):
77
+ ... return expensive_computation(data, alpha)
78
+ >>>
79
+ >>> # Use custom cache
80
+ >>> cache = Cache(CacheConfig(ttl_seconds=7200))
81
+ >>> @cached(cache=cache)
82
+ >>> def compute_metrics(data):
83
+ ... return expensive_metrics(data)
84
+ >>>
85
+ >>> # Use custom key function
86
+ >>> def my_key_func(data, config):
87
+ ... return f"{data.shape}_{config['alpha']}"
88
+ >>>
89
+ >>> @cached(key_func=my_key_func)
90
+ >>> def analyze(data, config):
91
+ ... return analysis(data, config)
92
+ """
93
+
94
+ def decorator(func: F) -> F:
95
+ # Create cache if needed - use local variable for type safety
96
+ cache_config = config or CacheConfig()
97
+ cache_instance: Cache = cache if cache is not None else Cache(cache_config)
98
+
99
+ @functools.wraps(func)
100
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
101
+ # Skip cache if disabled
102
+ if not cache_instance.config.enabled:
103
+ return func(*args, **kwargs)
104
+
105
+ # Generate cache key
106
+ key_str = (
107
+ key_func(*args, **kwargs) if key_func is not None else cache_key(*args, **kwargs)
108
+ )
109
+
110
+ key = CacheKey(hash_value=key_str, algorithm="sha256")
111
+
112
+ # Check cache
113
+ result = cache_instance.get(key)
114
+ if result is not None:
115
+ return result
116
+
117
+ # Compute and cache
118
+ result = func(*args, **kwargs)
119
+ cache_instance.set(key, result)
120
+
121
+ return result
122
+
123
+ # Add cache control methods using setattr (function attributes)
124
+ # Use setattr to bypass static type checking for dynamic attributes
125
+ setattr(wrapper, "cache", cache_instance) # noqa: B010
126
+ setattr(wrapper, "cache_clear", lambda: cache_instance.clear()) # noqa: B010
127
+ setattr(wrapper, "cache_invalidate", lambda key: cache_instance.invalidate(key)) # noqa: B010
128
+
129
+ return cast(F, wrapper)
130
+
131
+ return decorator