ml4t-diagnostic 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml4t/diagnostic/AGENT.md +25 -0
- ml4t/diagnostic/__init__.py +166 -0
- ml4t/diagnostic/backends/__init__.py +10 -0
- ml4t/diagnostic/backends/adapter.py +192 -0
- ml4t/diagnostic/backends/polars_backend.py +899 -0
- ml4t/diagnostic/caching/__init__.py +40 -0
- ml4t/diagnostic/caching/cache.py +331 -0
- ml4t/diagnostic/caching/decorators.py +131 -0
- ml4t/diagnostic/caching/smart_cache.py +339 -0
- ml4t/diagnostic/config/AGENT.md +24 -0
- ml4t/diagnostic/config/README.md +267 -0
- ml4t/diagnostic/config/__init__.py +219 -0
- ml4t/diagnostic/config/barrier_config.py +277 -0
- ml4t/diagnostic/config/base.py +301 -0
- ml4t/diagnostic/config/event_config.py +148 -0
- ml4t/diagnostic/config/feature_config.py +404 -0
- ml4t/diagnostic/config/multi_signal_config.py +55 -0
- ml4t/diagnostic/config/portfolio_config.py +215 -0
- ml4t/diagnostic/config/report_config.py +391 -0
- ml4t/diagnostic/config/sharpe_config.py +202 -0
- ml4t/diagnostic/config/signal_config.py +206 -0
- ml4t/diagnostic/config/trade_analysis_config.py +310 -0
- ml4t/diagnostic/config/validation.py +279 -0
- ml4t/diagnostic/core/__init__.py +29 -0
- ml4t/diagnostic/core/numba_utils.py +315 -0
- ml4t/diagnostic/core/purging.py +372 -0
- ml4t/diagnostic/core/sampling.py +471 -0
- ml4t/diagnostic/errors/__init__.py +205 -0
- ml4t/diagnostic/evaluation/AGENT.md +26 -0
- ml4t/diagnostic/evaluation/__init__.py +437 -0
- ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
- ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
- ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
- ml4t/diagnostic/evaluation/dashboard.py +715 -0
- ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
- ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
- ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
- ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
- ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
- ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
- ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
- ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
- ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
- ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
- ml4t/diagnostic/evaluation/event_analysis.py +647 -0
- ml4t/diagnostic/evaluation/excursion.py +390 -0
- ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
- ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
- ml4t/diagnostic/evaluation/framework.py +935 -0
- ml4t/diagnostic/evaluation/metric_registry.py +255 -0
- ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
- ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
- ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
- ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
- ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
- ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
- ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
- ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
- ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
- ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
- ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
- ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
- ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
- ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
- ml4t/diagnostic/evaluation/multi_signal.py +550 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
- ml4t/diagnostic/evaluation/report_generation.py +824 -0
- ml4t/diagnostic/evaluation/signal_selector.py +452 -0
- ml4t/diagnostic/evaluation/stat_registry.py +139 -0
- ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
- ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
- ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
- ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
- ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
- ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
- ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
- ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
- ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
- ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
- ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
- ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
- ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
- ml4t/diagnostic/evaluation/stats/moments.py +164 -0
- ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
- ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
- ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
- ml4t/diagnostic/evaluation/themes.py +330 -0
- ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
- ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
- ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
- ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
- ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
- ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
- ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
- ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
- ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
- ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
- ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
- ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
- ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
- ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
- ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
- ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
- ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
- ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
- ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
- ml4t/diagnostic/evaluation/validated_cv.py +535 -0
- ml4t/diagnostic/evaluation/visualization.py +1050 -0
- ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
- ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
- ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
- ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
- ml4t/diagnostic/integration/__init__.py +48 -0
- ml4t/diagnostic/integration/backtest_contract.py +671 -0
- ml4t/diagnostic/integration/data_contract.py +316 -0
- ml4t/diagnostic/integration/engineer_contract.py +226 -0
- ml4t/diagnostic/logging/__init__.py +77 -0
- ml4t/diagnostic/logging/logger.py +245 -0
- ml4t/diagnostic/logging/performance.py +234 -0
- ml4t/diagnostic/logging/progress.py +234 -0
- ml4t/diagnostic/logging/wandb.py +412 -0
- ml4t/diagnostic/metrics/__init__.py +9 -0
- ml4t/diagnostic/metrics/percentiles.py +128 -0
- ml4t/diagnostic/py.typed +1 -0
- ml4t/diagnostic/reporting/__init__.py +43 -0
- ml4t/diagnostic/reporting/base.py +130 -0
- ml4t/diagnostic/reporting/html_renderer.py +275 -0
- ml4t/diagnostic/reporting/json_renderer.py +51 -0
- ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
- ml4t/diagnostic/results/AGENT.md +24 -0
- ml4t/diagnostic/results/__init__.py +105 -0
- ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
- ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
- ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
- ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
- ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
- ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
- ml4t/diagnostic/results/barrier_results/validation.py +38 -0
- ml4t/diagnostic/results/base.py +177 -0
- ml4t/diagnostic/results/event_results.py +349 -0
- ml4t/diagnostic/results/feature_results.py +787 -0
- ml4t/diagnostic/results/multi_signal_results.py +431 -0
- ml4t/diagnostic/results/portfolio_results.py +281 -0
- ml4t/diagnostic/results/sharpe_results.py +448 -0
- ml4t/diagnostic/results/signal_results/__init__.py +74 -0
- ml4t/diagnostic/results/signal_results/ic.py +581 -0
- ml4t/diagnostic/results/signal_results/irtc.py +110 -0
- ml4t/diagnostic/results/signal_results/quantile.py +392 -0
- ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
- ml4t/diagnostic/results/signal_results/turnover.py +213 -0
- ml4t/diagnostic/results/signal_results/validation.py +147 -0
- ml4t/diagnostic/signal/AGENT.md +17 -0
- ml4t/diagnostic/signal/__init__.py +69 -0
- ml4t/diagnostic/signal/_report.py +152 -0
- ml4t/diagnostic/signal/_utils.py +261 -0
- ml4t/diagnostic/signal/core.py +275 -0
- ml4t/diagnostic/signal/quantile.py +148 -0
- ml4t/diagnostic/signal/result.py +214 -0
- ml4t/diagnostic/signal/signal_ic.py +129 -0
- ml4t/diagnostic/signal/turnover.py +182 -0
- ml4t/diagnostic/splitters/AGENT.md +19 -0
- ml4t/diagnostic/splitters/__init__.py +36 -0
- ml4t/diagnostic/splitters/base.py +501 -0
- ml4t/diagnostic/splitters/calendar.py +421 -0
- ml4t/diagnostic/splitters/calendar_config.py +91 -0
- ml4t/diagnostic/splitters/combinatorial.py +1064 -0
- ml4t/diagnostic/splitters/config.py +322 -0
- ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
- ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
- ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
- ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
- ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
- ml4t/diagnostic/splitters/group_isolation.py +329 -0
- ml4t/diagnostic/splitters/persistence.py +316 -0
- ml4t/diagnostic/splitters/utils.py +207 -0
- ml4t/diagnostic/splitters/walk_forward.py +757 -0
- ml4t/diagnostic/utils/__init__.py +42 -0
- ml4t/diagnostic/utils/config.py +542 -0
- ml4t/diagnostic/utils/dependencies.py +318 -0
- ml4t/diagnostic/utils/sessions.py +127 -0
- ml4t/diagnostic/validation/__init__.py +54 -0
- ml4t/diagnostic/validation/dataframe.py +274 -0
- ml4t/diagnostic/validation/returns.py +280 -0
- ml4t/diagnostic/validation/timeseries.py +299 -0
- ml4t/diagnostic/visualization/AGENT.md +19 -0
- ml4t/diagnostic/visualization/__init__.py +223 -0
- ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
- ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
- ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
- ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
- ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
- ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
- ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
- ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
- ml4t/diagnostic/visualization/barrier_plots.py +782 -0
- ml4t/diagnostic/visualization/core.py +1060 -0
- ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
- ml4t/diagnostic/visualization/dashboards/base.py +582 -0
- ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
- ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
- ml4t/diagnostic/visualization/dashboards.py +43 -0
- ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
- ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
- ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
- ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
- ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
- ml4t/diagnostic/visualization/feature_plots.py +888 -0
- ml4t/diagnostic/visualization/interaction_plots.py +618 -0
- ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
- ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
- ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
- ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
- ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
- ml4t/diagnostic/visualization/report_generation.py +1343 -0
- ml4t/diagnostic/visualization/signal/__init__.py +103 -0
- ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
- ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
- ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
- ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
- ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
- ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
- ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
- ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
- ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
- ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
- ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
- ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Caching framework for ML4T Diagnostic computations.
|
|
2
|
+
|
|
3
|
+
Provides intelligent caching for expensive statistical computations
|
|
4
|
+
with support for both memory and disk caching.
|
|
5
|
+
|
|
6
|
+
Examples:
|
|
7
|
+
>>> from ml4t.diagnostic.caching import Cache, CacheConfig
|
|
8
|
+
>>>
|
|
9
|
+
>>> # Create cache
|
|
10
|
+
>>> cache = Cache(CacheConfig(enabled=True, ttl_seconds=3600))
|
|
11
|
+
>>>
|
|
12
|
+
>>> # Use cache decorator
|
|
13
|
+
>>> @cache.cached()
|
|
14
|
+
>>> def expensive_computation(data, config):
|
|
15
|
+
... return compute_stats(data)
|
|
16
|
+
>>>
|
|
17
|
+
>>> # Or use cache directly
|
|
18
|
+
>>> key = cache.generate_key(data, config)
|
|
19
|
+
>>> result = cache.get(key)
|
|
20
|
+
>>> if result is None:
|
|
21
|
+
... result = expensive_computation(data, config)
|
|
22
|
+
... cache.set(key, result)
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from ml4t.diagnostic.caching.cache import Cache, CacheBackend, CacheConfig, CacheKey
|
|
26
|
+
from ml4t.diagnostic.caching.decorators import cache_key, cached
|
|
27
|
+
from ml4t.diagnostic.caching.smart_cache import SmartCache
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
# Core cache
|
|
31
|
+
"Cache",
|
|
32
|
+
"CacheConfig",
|
|
33
|
+
"CacheKey",
|
|
34
|
+
"CacheBackend",
|
|
35
|
+
# Smart cache for multi-signal analysis
|
|
36
|
+
"SmartCache",
|
|
37
|
+
# Decorators
|
|
38
|
+
"cached",
|
|
39
|
+
"cache_key",
|
|
40
|
+
]
|
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
"""Core caching implementation with pluggable backends."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
import pickle
|
|
8
|
+
from collections import OrderedDict
|
|
9
|
+
from datetime import UTC, datetime
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from pydantic import BaseModel, Field
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CacheBackend(str, Enum):
|
|
18
|
+
"""Cache storage backend options."""
|
|
19
|
+
|
|
20
|
+
MEMORY = "memory"
|
|
21
|
+
DISK = "disk"
|
|
22
|
+
DISABLED = "disabled"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class CacheConfig(BaseModel):
|
|
26
|
+
"""Configuration for cache behavior.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
enabled: Whether caching is enabled
|
|
30
|
+
backend: Storage backend to use
|
|
31
|
+
ttl_seconds: Time-to-live for cache entries (None = no expiration)
|
|
32
|
+
max_memory_items: Max items in memory cache (LRU eviction)
|
|
33
|
+
disk_path: Path for disk cache storage
|
|
34
|
+
compression: Whether to compress disk cache entries
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
enabled: bool = True
|
|
38
|
+
backend: CacheBackend = CacheBackend.MEMORY
|
|
39
|
+
ttl_seconds: int | None = Field(default=3600, description="Cache TTL in seconds")
|
|
40
|
+
max_memory_items: int = Field(default=100, description="Max memory cache size")
|
|
41
|
+
disk_path: Path = Field(
|
|
42
|
+
default_factory=lambda: Path(".qeval_cache"),
|
|
43
|
+
description="Disk cache directory",
|
|
44
|
+
)
|
|
45
|
+
compression: bool = Field(default=False, description="Compress disk cache")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class CacheKey:
|
|
49
|
+
"""Cache key with content-based hashing.
|
|
50
|
+
|
|
51
|
+
Generates stable cache keys from arbitrary data and configuration.
|
|
52
|
+
|
|
53
|
+
Examples:
|
|
54
|
+
>>> key = CacheKey.generate(data=df, config={"alpha": 0.05})
|
|
55
|
+
>>> key_str = str(key) # "sha256:abc123..."
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(self, hash_value: str, algorithm: str = "sha256"):
|
|
59
|
+
"""Initialize cache key.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
hash_value: Hash digest as hex string
|
|
63
|
+
algorithm: Hash algorithm used
|
|
64
|
+
"""
|
|
65
|
+
self.hash_value = hash_value
|
|
66
|
+
self.algorithm = algorithm
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def generate(cls, **kwargs: Any) -> CacheKey:
|
|
70
|
+
"""Generate cache key from arbitrary keyword arguments.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
**kwargs: Data to hash (must be JSON-serializable or have __hash__)
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
CacheKey instance
|
|
77
|
+
|
|
78
|
+
Examples:
|
|
79
|
+
>>> key = CacheKey.generate(data=data_hash, config={"alpha": 0.05})
|
|
80
|
+
"""
|
|
81
|
+
# Convert to stable JSON representation
|
|
82
|
+
stable_repr = cls._to_stable_repr(kwargs)
|
|
83
|
+
|
|
84
|
+
# Hash it
|
|
85
|
+
hasher = hashlib.sha256()
|
|
86
|
+
hasher.update(stable_repr.encode("utf-8"))
|
|
87
|
+
|
|
88
|
+
return cls(hash_value=hasher.hexdigest(), algorithm="sha256")
|
|
89
|
+
|
|
90
|
+
@staticmethod
|
|
91
|
+
def _to_stable_repr(obj: Any) -> str:
|
|
92
|
+
"""Convert object to stable string representation.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
obj: Object to convert
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
Stable string representation
|
|
99
|
+
"""
|
|
100
|
+
if isinstance(obj, dict):
|
|
101
|
+
# Sort keys for stability
|
|
102
|
+
items = sorted(obj.items())
|
|
103
|
+
return json.dumps(items, sort_keys=True, default=str)
|
|
104
|
+
elif isinstance(obj, list | tuple):
|
|
105
|
+
return json.dumps(obj, default=str)
|
|
106
|
+
else:
|
|
107
|
+
return json.dumps(obj, default=str)
|
|
108
|
+
|
|
109
|
+
def __str__(self) -> str:
|
|
110
|
+
"""String representation."""
|
|
111
|
+
return f"{self.algorithm}:{self.hash_value}"
|
|
112
|
+
|
|
113
|
+
def __repr__(self) -> str:
|
|
114
|
+
"""Developer representation."""
|
|
115
|
+
return f"CacheKey({self.algorithm}:{self.hash_value[:12]}...)"
|
|
116
|
+
|
|
117
|
+
def __eq__(self, other: object) -> bool:
|
|
118
|
+
"""Equality comparison."""
|
|
119
|
+
if not isinstance(other, CacheKey):
|
|
120
|
+
return False
|
|
121
|
+
return self.hash_value == other.hash_value
|
|
122
|
+
|
|
123
|
+
def __hash__(self) -> int:
|
|
124
|
+
"""Hash for use as dict key."""
|
|
125
|
+
return hash(self.hash_value)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class CacheEntry:
|
|
129
|
+
"""Cache entry with metadata."""
|
|
130
|
+
|
|
131
|
+
def __init__(self, value: Any, created_at: datetime, ttl_seconds: int | None = None):
|
|
132
|
+
"""Initialize cache entry.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
value: Cached value
|
|
136
|
+
created_at: Creation timestamp
|
|
137
|
+
ttl_seconds: Time-to-live in seconds (None = no expiration)
|
|
138
|
+
"""
|
|
139
|
+
self.value = value
|
|
140
|
+
self.created_at = created_at
|
|
141
|
+
self.ttl_seconds = ttl_seconds
|
|
142
|
+
|
|
143
|
+
def is_expired(self) -> bool:
|
|
144
|
+
"""Check if entry has expired.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
True if expired, False otherwise
|
|
148
|
+
"""
|
|
149
|
+
if self.ttl_seconds is None:
|
|
150
|
+
return False
|
|
151
|
+
|
|
152
|
+
now = datetime.now(UTC)
|
|
153
|
+
age = (now - self.created_at).total_seconds()
|
|
154
|
+
return age > self.ttl_seconds
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
class Cache:
|
|
158
|
+
"""Multi-backend cache for expensive computations.
|
|
159
|
+
|
|
160
|
+
Supports memory and disk backends with automatic expiration and LRU eviction.
|
|
161
|
+
|
|
162
|
+
Examples:
|
|
163
|
+
>>> cache = Cache(CacheConfig(enabled=True, backend=CacheBackend.MEMORY))
|
|
164
|
+
>>>
|
|
165
|
+
>>> # Generate key
|
|
166
|
+
>>> key = cache.generate_key(data=data_hash, config=config)
|
|
167
|
+
>>>
|
|
168
|
+
>>> # Get/set
|
|
169
|
+
>>> result = cache.get(key)
|
|
170
|
+
>>> if result is None:
|
|
171
|
+
... result = expensive_computation()
|
|
172
|
+
... cache.set(key, result)
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
def __init__(self, config: CacheConfig):
|
|
176
|
+
"""Initialize cache.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
config: Cache configuration
|
|
180
|
+
"""
|
|
181
|
+
self.config = config
|
|
182
|
+
self._memory_cache: OrderedDict[CacheKey, CacheEntry] = OrderedDict()
|
|
183
|
+
|
|
184
|
+
# Create disk cache directory if needed
|
|
185
|
+
if config.backend == CacheBackend.DISK and config.enabled:
|
|
186
|
+
config.disk_path.mkdir(parents=True, exist_ok=True)
|
|
187
|
+
|
|
188
|
+
def generate_key(self, **kwargs: Any) -> CacheKey:
|
|
189
|
+
"""Generate cache key from data and configuration.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
**kwargs: Data to hash
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Cache key
|
|
196
|
+
|
|
197
|
+
Examples:
|
|
198
|
+
>>> key = cache.generate_key(data=data_hash, config={"alpha": 0.05})
|
|
199
|
+
"""
|
|
200
|
+
return CacheKey.generate(**kwargs)
|
|
201
|
+
|
|
202
|
+
def get(self, key: CacheKey) -> Any | None:
|
|
203
|
+
"""Get value from cache.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
key: Cache key
|
|
207
|
+
|
|
208
|
+
Returns:
|
|
209
|
+
Cached value or None if not found/expired
|
|
210
|
+
"""
|
|
211
|
+
if not self.config.enabled:
|
|
212
|
+
return None
|
|
213
|
+
|
|
214
|
+
if self.config.backend == CacheBackend.MEMORY:
|
|
215
|
+
return self._get_memory(key)
|
|
216
|
+
elif self.config.backend == CacheBackend.DISK:
|
|
217
|
+
return self._get_disk(key)
|
|
218
|
+
else:
|
|
219
|
+
return None
|
|
220
|
+
|
|
221
|
+
def set(self, key: CacheKey, value: Any) -> None:
|
|
222
|
+
"""Store value in cache.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
key: Cache key
|
|
226
|
+
value: Value to cache
|
|
227
|
+
"""
|
|
228
|
+
if not self.config.enabled:
|
|
229
|
+
return
|
|
230
|
+
|
|
231
|
+
if self.config.backend == CacheBackend.MEMORY:
|
|
232
|
+
self._set_memory(key, value)
|
|
233
|
+
elif self.config.backend == CacheBackend.DISK:
|
|
234
|
+
self._set_disk(key, value)
|
|
235
|
+
|
|
236
|
+
def invalidate(self, key: CacheKey) -> None:
|
|
237
|
+
"""Invalidate specific cache entry.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
key: Cache key to invalidate
|
|
241
|
+
"""
|
|
242
|
+
if self.config.backend == CacheBackend.MEMORY:
|
|
243
|
+
self._memory_cache.pop(key, None)
|
|
244
|
+
elif self.config.backend == CacheBackend.DISK:
|
|
245
|
+
cache_file = self._get_disk_path(key)
|
|
246
|
+
if cache_file.exists():
|
|
247
|
+
cache_file.unlink()
|
|
248
|
+
|
|
249
|
+
def clear(self) -> None:
|
|
250
|
+
"""Clear all cache entries."""
|
|
251
|
+
if self.config.backend == CacheBackend.MEMORY:
|
|
252
|
+
self._memory_cache.clear()
|
|
253
|
+
elif self.config.backend == CacheBackend.DISK and self.config.disk_path.exists():
|
|
254
|
+
for cache_file in self.config.disk_path.glob("*.pkl"):
|
|
255
|
+
cache_file.unlink()
|
|
256
|
+
|
|
257
|
+
def _get_memory(self, key: CacheKey) -> Any | None:
|
|
258
|
+
"""Get from memory cache with LRU update."""
|
|
259
|
+
entry = self._memory_cache.get(key)
|
|
260
|
+
|
|
261
|
+
if entry is None:
|
|
262
|
+
return None
|
|
263
|
+
|
|
264
|
+
# Check expiration
|
|
265
|
+
if entry.is_expired():
|
|
266
|
+
self._memory_cache.pop(key)
|
|
267
|
+
return None
|
|
268
|
+
|
|
269
|
+
# Move to end (LRU)
|
|
270
|
+
self._memory_cache.move_to_end(key)
|
|
271
|
+
|
|
272
|
+
return entry.value
|
|
273
|
+
|
|
274
|
+
def _set_memory(self, key: CacheKey, value: Any) -> None:
|
|
275
|
+
"""Set in memory cache with LRU eviction."""
|
|
276
|
+
# Check size limit
|
|
277
|
+
while len(self._memory_cache) >= self.config.max_memory_items:
|
|
278
|
+
# Remove oldest (first) item
|
|
279
|
+
self._memory_cache.popitem(last=False)
|
|
280
|
+
|
|
281
|
+
# Add new entry
|
|
282
|
+
entry = CacheEntry(
|
|
283
|
+
value=value,
|
|
284
|
+
created_at=datetime.now(UTC),
|
|
285
|
+
ttl_seconds=self.config.ttl_seconds,
|
|
286
|
+
)
|
|
287
|
+
self._memory_cache[key] = entry
|
|
288
|
+
|
|
289
|
+
def _get_disk(self, key: CacheKey) -> Any | None:
|
|
290
|
+
"""Get from disk cache."""
|
|
291
|
+
cache_file = self._get_disk_path(key)
|
|
292
|
+
|
|
293
|
+
if not cache_file.exists():
|
|
294
|
+
return None
|
|
295
|
+
|
|
296
|
+
try:
|
|
297
|
+
with open(cache_file, "rb") as f:
|
|
298
|
+
entry = pickle.load(f)
|
|
299
|
+
|
|
300
|
+
# Check expiration
|
|
301
|
+
if entry.is_expired():
|
|
302
|
+
cache_file.unlink()
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
return entry.value
|
|
306
|
+
except Exception:
|
|
307
|
+
# Corrupted cache file - remove it
|
|
308
|
+
if cache_file.exists():
|
|
309
|
+
cache_file.unlink()
|
|
310
|
+
return None
|
|
311
|
+
|
|
312
|
+
def _set_disk(self, key: CacheKey, value: Any) -> None:
|
|
313
|
+
"""Set in disk cache."""
|
|
314
|
+
cache_file = self._get_disk_path(key)
|
|
315
|
+
|
|
316
|
+
entry = CacheEntry(
|
|
317
|
+
value=value,
|
|
318
|
+
created_at=datetime.now(UTC),
|
|
319
|
+
ttl_seconds=self.config.ttl_seconds,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
try:
|
|
323
|
+
with open(cache_file, "wb") as f:
|
|
324
|
+
pickle.dump(entry, f)
|
|
325
|
+
except Exception:
|
|
326
|
+
# Failed to cache - not critical
|
|
327
|
+
pass
|
|
328
|
+
|
|
329
|
+
def _get_disk_path(self, key: CacheKey) -> Path:
|
|
330
|
+
"""Get disk path for cache key."""
|
|
331
|
+
return self.config.disk_path / f"{key.hash_value}.pkl"
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""Decorators for automatic caching of function results."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import functools
|
|
6
|
+
import hashlib
|
|
7
|
+
from collections.abc import Callable
|
|
8
|
+
from typing import Any, TypeVar, cast
|
|
9
|
+
|
|
10
|
+
from ml4t.diagnostic.caching.cache import Cache, CacheConfig, CacheKey
|
|
11
|
+
|
|
12
|
+
# Type variable for generic function
|
|
13
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def cache_key(*args: Any, **kwargs: Any) -> str:
|
|
17
|
+
"""Generate cache key from function arguments.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
*args: Positional arguments
|
|
21
|
+
**kwargs: Keyword arguments
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
Cache key string
|
|
25
|
+
"""
|
|
26
|
+
# Create stable representation
|
|
27
|
+
key_parts = []
|
|
28
|
+
|
|
29
|
+
# Add args
|
|
30
|
+
for arg in args:
|
|
31
|
+
if hasattr(arg, "__hash__"):
|
|
32
|
+
try:
|
|
33
|
+
key_parts.append(str(hash(arg)))
|
|
34
|
+
except TypeError:
|
|
35
|
+
# Not hashable - use repr
|
|
36
|
+
key_parts.append(repr(arg))
|
|
37
|
+
else:
|
|
38
|
+
key_parts.append(repr(arg))
|
|
39
|
+
|
|
40
|
+
# Add kwargs
|
|
41
|
+
for k, v in sorted(kwargs.items()):
|
|
42
|
+
if hasattr(v, "__hash__"):
|
|
43
|
+
try:
|
|
44
|
+
key_parts.append(f"{k}={hash(v)}")
|
|
45
|
+
except TypeError:
|
|
46
|
+
key_parts.append(f"{k}={repr(v)}")
|
|
47
|
+
else:
|
|
48
|
+
key_parts.append(f"{k}={repr(v)}")
|
|
49
|
+
|
|
50
|
+
# Hash combined key
|
|
51
|
+
combined = "|".join(key_parts)
|
|
52
|
+
hasher = hashlib.sha256()
|
|
53
|
+
hasher.update(combined.encode("utf-8"))
|
|
54
|
+
|
|
55
|
+
return hasher.hexdigest()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def cached(
|
|
59
|
+
cache: Cache | None = None,
|
|
60
|
+
config: CacheConfig | None = None,
|
|
61
|
+
key_func: Callable[..., str] | None = None,
|
|
62
|
+
) -> Callable[[F], F]:
|
|
63
|
+
"""Decorator for automatic function result caching.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
cache: Cache instance to use (creates default if None)
|
|
67
|
+
config: Cache configuration (used if cache is None)
|
|
68
|
+
key_func: Custom key generation function
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Decorated function
|
|
72
|
+
|
|
73
|
+
Examples:
|
|
74
|
+
>>> # Use default cache
|
|
75
|
+
>>> @cached()
|
|
76
|
+
>>> def compute_stats(data, alpha=0.05):
|
|
77
|
+
... return expensive_computation(data, alpha)
|
|
78
|
+
>>>
|
|
79
|
+
>>> # Use custom cache
|
|
80
|
+
>>> cache = Cache(CacheConfig(ttl_seconds=7200))
|
|
81
|
+
>>> @cached(cache=cache)
|
|
82
|
+
>>> def compute_metrics(data):
|
|
83
|
+
... return expensive_metrics(data)
|
|
84
|
+
>>>
|
|
85
|
+
>>> # Use custom key function
|
|
86
|
+
>>> def my_key_func(data, config):
|
|
87
|
+
... return f"{data.shape}_{config['alpha']}"
|
|
88
|
+
>>>
|
|
89
|
+
>>> @cached(key_func=my_key_func)
|
|
90
|
+
>>> def analyze(data, config):
|
|
91
|
+
... return analysis(data, config)
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
def decorator(func: F) -> F:
|
|
95
|
+
# Create cache if needed - use local variable for type safety
|
|
96
|
+
cache_config = config or CacheConfig()
|
|
97
|
+
cache_instance: Cache = cache if cache is not None else Cache(cache_config)
|
|
98
|
+
|
|
99
|
+
@functools.wraps(func)
|
|
100
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
101
|
+
# Skip cache if disabled
|
|
102
|
+
if not cache_instance.config.enabled:
|
|
103
|
+
return func(*args, **kwargs)
|
|
104
|
+
|
|
105
|
+
# Generate cache key
|
|
106
|
+
key_str = (
|
|
107
|
+
key_func(*args, **kwargs) if key_func is not None else cache_key(*args, **kwargs)
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
key = CacheKey(hash_value=key_str, algorithm="sha256")
|
|
111
|
+
|
|
112
|
+
# Check cache
|
|
113
|
+
result = cache_instance.get(key)
|
|
114
|
+
if result is not None:
|
|
115
|
+
return result
|
|
116
|
+
|
|
117
|
+
# Compute and cache
|
|
118
|
+
result = func(*args, **kwargs)
|
|
119
|
+
cache_instance.set(key, result)
|
|
120
|
+
|
|
121
|
+
return result
|
|
122
|
+
|
|
123
|
+
# Add cache control methods using setattr (function attributes)
|
|
124
|
+
# Use setattr to bypass static type checking for dynamic attributes
|
|
125
|
+
setattr(wrapper, "cache", cache_instance) # noqa: B010
|
|
126
|
+
setattr(wrapper, "cache_clear", lambda: cache_instance.clear()) # noqa: B010
|
|
127
|
+
setattr(wrapper, "cache_invalidate", lambda key: cache_instance.invalidate(key)) # noqa: B010
|
|
128
|
+
|
|
129
|
+
return cast(F, wrapper)
|
|
130
|
+
|
|
131
|
+
return decorator
|