ml4t-diagnostic 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml4t/diagnostic/AGENT.md +25 -0
- ml4t/diagnostic/__init__.py +166 -0
- ml4t/diagnostic/backends/__init__.py +10 -0
- ml4t/diagnostic/backends/adapter.py +192 -0
- ml4t/diagnostic/backends/polars_backend.py +899 -0
- ml4t/diagnostic/caching/__init__.py +40 -0
- ml4t/diagnostic/caching/cache.py +331 -0
- ml4t/diagnostic/caching/decorators.py +131 -0
- ml4t/diagnostic/caching/smart_cache.py +339 -0
- ml4t/diagnostic/config/AGENT.md +24 -0
- ml4t/diagnostic/config/README.md +267 -0
- ml4t/diagnostic/config/__init__.py +219 -0
- ml4t/diagnostic/config/barrier_config.py +277 -0
- ml4t/diagnostic/config/base.py +301 -0
- ml4t/diagnostic/config/event_config.py +148 -0
- ml4t/diagnostic/config/feature_config.py +404 -0
- ml4t/diagnostic/config/multi_signal_config.py +55 -0
- ml4t/diagnostic/config/portfolio_config.py +215 -0
- ml4t/diagnostic/config/report_config.py +391 -0
- ml4t/diagnostic/config/sharpe_config.py +202 -0
- ml4t/diagnostic/config/signal_config.py +206 -0
- ml4t/diagnostic/config/trade_analysis_config.py +310 -0
- ml4t/diagnostic/config/validation.py +279 -0
- ml4t/diagnostic/core/__init__.py +29 -0
- ml4t/diagnostic/core/numba_utils.py +315 -0
- ml4t/diagnostic/core/purging.py +372 -0
- ml4t/diagnostic/core/sampling.py +471 -0
- ml4t/diagnostic/errors/__init__.py +205 -0
- ml4t/diagnostic/evaluation/AGENT.md +26 -0
- ml4t/diagnostic/evaluation/__init__.py +437 -0
- ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
- ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
- ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
- ml4t/diagnostic/evaluation/dashboard.py +715 -0
- ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
- ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
- ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
- ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
- ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
- ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
- ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
- ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
- ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
- ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
- ml4t/diagnostic/evaluation/event_analysis.py +647 -0
- ml4t/diagnostic/evaluation/excursion.py +390 -0
- ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
- ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
- ml4t/diagnostic/evaluation/framework.py +935 -0
- ml4t/diagnostic/evaluation/metric_registry.py +255 -0
- ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
- ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
- ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
- ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
- ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
- ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
- ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
- ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
- ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
- ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
- ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
- ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
- ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
- ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
- ml4t/diagnostic/evaluation/multi_signal.py +550 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
- ml4t/diagnostic/evaluation/report_generation.py +824 -0
- ml4t/diagnostic/evaluation/signal_selector.py +452 -0
- ml4t/diagnostic/evaluation/stat_registry.py +139 -0
- ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
- ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
- ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
- ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
- ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
- ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
- ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
- ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
- ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
- ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
- ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
- ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
- ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
- ml4t/diagnostic/evaluation/stats/moments.py +164 -0
- ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
- ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
- ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
- ml4t/diagnostic/evaluation/themes.py +330 -0
- ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
- ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
- ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
- ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
- ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
- ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
- ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
- ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
- ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
- ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
- ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
- ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
- ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
- ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
- ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
- ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
- ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
- ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
- ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
- ml4t/diagnostic/evaluation/validated_cv.py +535 -0
- ml4t/diagnostic/evaluation/visualization.py +1050 -0
- ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
- ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
- ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
- ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
- ml4t/diagnostic/integration/__init__.py +48 -0
- ml4t/diagnostic/integration/backtest_contract.py +671 -0
- ml4t/diagnostic/integration/data_contract.py +316 -0
- ml4t/diagnostic/integration/engineer_contract.py +226 -0
- ml4t/diagnostic/logging/__init__.py +77 -0
- ml4t/diagnostic/logging/logger.py +245 -0
- ml4t/diagnostic/logging/performance.py +234 -0
- ml4t/diagnostic/logging/progress.py +234 -0
- ml4t/diagnostic/logging/wandb.py +412 -0
- ml4t/diagnostic/metrics/__init__.py +9 -0
- ml4t/diagnostic/metrics/percentiles.py +128 -0
- ml4t/diagnostic/py.typed +1 -0
- ml4t/diagnostic/reporting/__init__.py +43 -0
- ml4t/diagnostic/reporting/base.py +130 -0
- ml4t/diagnostic/reporting/html_renderer.py +275 -0
- ml4t/diagnostic/reporting/json_renderer.py +51 -0
- ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
- ml4t/diagnostic/results/AGENT.md +24 -0
- ml4t/diagnostic/results/__init__.py +105 -0
- ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
- ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
- ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
- ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
- ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
- ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
- ml4t/diagnostic/results/barrier_results/validation.py +38 -0
- ml4t/diagnostic/results/base.py +177 -0
- ml4t/diagnostic/results/event_results.py +349 -0
- ml4t/diagnostic/results/feature_results.py +787 -0
- ml4t/diagnostic/results/multi_signal_results.py +431 -0
- ml4t/diagnostic/results/portfolio_results.py +281 -0
- ml4t/diagnostic/results/sharpe_results.py +448 -0
- ml4t/diagnostic/results/signal_results/__init__.py +74 -0
- ml4t/diagnostic/results/signal_results/ic.py +581 -0
- ml4t/diagnostic/results/signal_results/irtc.py +110 -0
- ml4t/diagnostic/results/signal_results/quantile.py +392 -0
- ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
- ml4t/diagnostic/results/signal_results/turnover.py +213 -0
- ml4t/diagnostic/results/signal_results/validation.py +147 -0
- ml4t/diagnostic/signal/AGENT.md +17 -0
- ml4t/diagnostic/signal/__init__.py +69 -0
- ml4t/diagnostic/signal/_report.py +152 -0
- ml4t/diagnostic/signal/_utils.py +261 -0
- ml4t/diagnostic/signal/core.py +275 -0
- ml4t/diagnostic/signal/quantile.py +148 -0
- ml4t/diagnostic/signal/result.py +214 -0
- ml4t/diagnostic/signal/signal_ic.py +129 -0
- ml4t/diagnostic/signal/turnover.py +182 -0
- ml4t/diagnostic/splitters/AGENT.md +19 -0
- ml4t/diagnostic/splitters/__init__.py +36 -0
- ml4t/diagnostic/splitters/base.py +501 -0
- ml4t/diagnostic/splitters/calendar.py +421 -0
- ml4t/diagnostic/splitters/calendar_config.py +91 -0
- ml4t/diagnostic/splitters/combinatorial.py +1064 -0
- ml4t/diagnostic/splitters/config.py +322 -0
- ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
- ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
- ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
- ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
- ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
- ml4t/diagnostic/splitters/group_isolation.py +329 -0
- ml4t/diagnostic/splitters/persistence.py +316 -0
- ml4t/diagnostic/splitters/utils.py +207 -0
- ml4t/diagnostic/splitters/walk_forward.py +757 -0
- ml4t/diagnostic/utils/__init__.py +42 -0
- ml4t/diagnostic/utils/config.py +542 -0
- ml4t/diagnostic/utils/dependencies.py +318 -0
- ml4t/diagnostic/utils/sessions.py +127 -0
- ml4t/diagnostic/validation/__init__.py +54 -0
- ml4t/diagnostic/validation/dataframe.py +274 -0
- ml4t/diagnostic/validation/returns.py +280 -0
- ml4t/diagnostic/validation/timeseries.py +299 -0
- ml4t/diagnostic/visualization/AGENT.md +19 -0
- ml4t/diagnostic/visualization/__init__.py +223 -0
- ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
- ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
- ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
- ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
- ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
- ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
- ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
- ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
- ml4t/diagnostic/visualization/barrier_plots.py +782 -0
- ml4t/diagnostic/visualization/core.py +1060 -0
- ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
- ml4t/diagnostic/visualization/dashboards/base.py +582 -0
- ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
- ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
- ml4t/diagnostic/visualization/dashboards.py +43 -0
- ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
- ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
- ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
- ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
- ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
- ml4t/diagnostic/visualization/feature_plots.py +888 -0
- ml4t/diagnostic/visualization/interaction_plots.py +618 -0
- ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
- ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
- ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
- ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
- ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
- ml4t/diagnostic/visualization/report_generation.py +1343 -0
- ml4t/diagnostic/visualization/signal/__init__.py +103 -0
- ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
- ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
- ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
- ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
- ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
- ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
- ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
- ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
- ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
- ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
- ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
- ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"""Template matching for hypothesis generation.
|
|
2
|
+
|
|
3
|
+
Loads templates from YAML and matches them against error pattern features.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import fnmatch
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from importlib import resources
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import yaml
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class Template:
|
|
19
|
+
"""A hypothesis generation template.
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
name: Unique template identifier
|
|
23
|
+
description: Human-readable description
|
|
24
|
+
feature_patterns: Glob patterns to match feature names
|
|
25
|
+
conditions: Matching conditions (direction, significance)
|
|
26
|
+
hypothesis_template: String template with {feature} placeholder
|
|
27
|
+
actions: List of remediation suggestions
|
|
28
|
+
confidence_base: Base confidence score (0-1)
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
name: str
|
|
32
|
+
description: str
|
|
33
|
+
feature_patterns: list[str]
|
|
34
|
+
conditions: dict[str, str]
|
|
35
|
+
hypothesis_template: str
|
|
36
|
+
actions: list[str]
|
|
37
|
+
confidence_base: float
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def load_templates(library: str = "comprehensive") -> list[Template]:
|
|
41
|
+
"""Load templates from the YAML file.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
library: Which template library to load ('comprehensive' or 'minimal')
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
List of Template objects
|
|
48
|
+
|
|
49
|
+
Raises:
|
|
50
|
+
ValueError: If library name is invalid
|
|
51
|
+
FileNotFoundError: If templates.yaml is not found
|
|
52
|
+
"""
|
|
53
|
+
# Try to load from package resources first
|
|
54
|
+
try:
|
|
55
|
+
files = resources.files("ml4t.diagnostic.evaluation.trade_shap.hypotheses")
|
|
56
|
+
yaml_content = files.joinpath("templates.yaml").read_text()
|
|
57
|
+
except (TypeError, FileNotFoundError):
|
|
58
|
+
# Fall back to direct file path
|
|
59
|
+
template_path = Path(__file__).parent / "templates.yaml"
|
|
60
|
+
if not template_path.exists():
|
|
61
|
+
raise FileNotFoundError(f"Template file not found: {template_path}") from None
|
|
62
|
+
yaml_content = template_path.read_text()
|
|
63
|
+
|
|
64
|
+
data = yaml.safe_load(yaml_content)
|
|
65
|
+
|
|
66
|
+
if library not in data:
|
|
67
|
+
raise ValueError(f"Unknown template library: '{library}'. Available: {list(data.keys())}")
|
|
68
|
+
|
|
69
|
+
templates = []
|
|
70
|
+
for item in data[library]:
|
|
71
|
+
templates.append(
|
|
72
|
+
Template(
|
|
73
|
+
name=item["name"],
|
|
74
|
+
description=item["description"],
|
|
75
|
+
feature_patterns=item["feature_patterns"],
|
|
76
|
+
conditions=item["conditions"],
|
|
77
|
+
hypothesis_template=item["hypothesis_template"],
|
|
78
|
+
actions=item["actions"],
|
|
79
|
+
confidence_base=item["confidence_base"],
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
return templates
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass
|
|
87
|
+
class MatchResult:
|
|
88
|
+
"""Result of template matching.
|
|
89
|
+
|
|
90
|
+
Attributes:
|
|
91
|
+
template: The matched template
|
|
92
|
+
confidence: Adjusted confidence score
|
|
93
|
+
matched_features: Features that matched the template
|
|
94
|
+
primary_feature: Primary feature for hypothesis generation
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
template: Template
|
|
98
|
+
confidence: float
|
|
99
|
+
matched_features: list[dict[str, Any]]
|
|
100
|
+
primary_feature: dict[str, Any] | None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class TemplateMatcher:
|
|
104
|
+
"""Matches error patterns against hypothesis templates.
|
|
105
|
+
|
|
106
|
+
Attributes:
|
|
107
|
+
templates: List of templates to match against
|
|
108
|
+
|
|
109
|
+
Example:
|
|
110
|
+
>>> matcher = TemplateMatcher(load_templates('comprehensive'))
|
|
111
|
+
>>> result = matcher.match(pattern_features)
|
|
112
|
+
>>> if result:
|
|
113
|
+
... print(result.template.hypothesis_template)
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
def __init__(self, templates: list[Template]) -> None:
|
|
117
|
+
"""Initialize matcher with templates.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
templates: List of templates to match against
|
|
121
|
+
"""
|
|
122
|
+
self.templates = templates
|
|
123
|
+
|
|
124
|
+
def match(
|
|
125
|
+
self,
|
|
126
|
+
pattern_features: list[dict[str, Any]],
|
|
127
|
+
) -> MatchResult | None:
|
|
128
|
+
"""Find best matching template for pattern features.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
pattern_features: List of feature dicts with:
|
|
132
|
+
- name: Feature name
|
|
133
|
+
- mean_shap: Mean SHAP value
|
|
134
|
+
- p_value_t: T-test p-value
|
|
135
|
+
- p_value_mw: Mann-Whitney p-value
|
|
136
|
+
- is_significant: Whether feature is significant
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
MatchResult if a match is found, None otherwise
|
|
140
|
+
"""
|
|
141
|
+
best_match: MatchResult | None = None
|
|
142
|
+
best_confidence = 0.0
|
|
143
|
+
|
|
144
|
+
for template in self.templates:
|
|
145
|
+
result = self._match_template(template, pattern_features)
|
|
146
|
+
if result and result.confidence > best_confidence:
|
|
147
|
+
best_match = result
|
|
148
|
+
best_confidence = result.confidence
|
|
149
|
+
|
|
150
|
+
return best_match
|
|
151
|
+
|
|
152
|
+
def _match_template(
|
|
153
|
+
self,
|
|
154
|
+
template: Template,
|
|
155
|
+
pattern_features: list[dict[str, Any]],
|
|
156
|
+
) -> MatchResult | None:
|
|
157
|
+
"""Check if pattern matches a specific template.
|
|
158
|
+
|
|
159
|
+
Returns MatchResult if match successful, None otherwise.
|
|
160
|
+
"""
|
|
161
|
+
# Find features matching template patterns
|
|
162
|
+
matched_features = []
|
|
163
|
+
for feat in pattern_features:
|
|
164
|
+
feat_name = feat["name"]
|
|
165
|
+
for pattern in template.feature_patterns:
|
|
166
|
+
if fnmatch.fnmatch(feat_name.lower(), pattern.lower()):
|
|
167
|
+
matched_features.append(feat)
|
|
168
|
+
break
|
|
169
|
+
|
|
170
|
+
# No matches
|
|
171
|
+
if not matched_features:
|
|
172
|
+
return None
|
|
173
|
+
|
|
174
|
+
conditions = template.conditions
|
|
175
|
+
|
|
176
|
+
# Check significance requirement
|
|
177
|
+
if conditions.get("significance") == "required":
|
|
178
|
+
if not any(f["is_significant"] for f in matched_features):
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
# Get significant features
|
|
182
|
+
sig_features = [f for f in matched_features if f["is_significant"]]
|
|
183
|
+
|
|
184
|
+
# Get primary feature for direction check
|
|
185
|
+
if sig_features:
|
|
186
|
+
# Use most significant feature (lowest p-value)
|
|
187
|
+
primary_feature = min(
|
|
188
|
+
sig_features,
|
|
189
|
+
key=lambda x: min(x["p_value_t"], x["p_value_mw"]),
|
|
190
|
+
)
|
|
191
|
+
else:
|
|
192
|
+
primary_feature = matched_features[0]
|
|
193
|
+
|
|
194
|
+
# Check direction condition
|
|
195
|
+
direction = conditions.get("direction", "any")
|
|
196
|
+
shap_val = primary_feature["mean_shap"]
|
|
197
|
+
|
|
198
|
+
if direction == "high" and shap_val <= 0:
|
|
199
|
+
return None
|
|
200
|
+
elif direction == "low" and shap_val >= 0:
|
|
201
|
+
return None
|
|
202
|
+
elif direction == "positive" and shap_val <= 0:
|
|
203
|
+
return None
|
|
204
|
+
elif direction == "negative" and shap_val >= 0:
|
|
205
|
+
return None
|
|
206
|
+
elif direction == "extreme" and abs(shap_val) < 0.1:
|
|
207
|
+
return None
|
|
208
|
+
elif direction == "moderate" and (abs(shap_val) < 0.05 or abs(shap_val) > 0.3):
|
|
209
|
+
return None
|
|
210
|
+
|
|
211
|
+
# Calculate confidence
|
|
212
|
+
confidence = self._calculate_confidence(
|
|
213
|
+
template, matched_features, sig_features, pattern_features
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
return MatchResult(
|
|
217
|
+
template=template,
|
|
218
|
+
confidence=confidence,
|
|
219
|
+
matched_features=matched_features,
|
|
220
|
+
primary_feature=primary_feature,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
def _calculate_confidence(
|
|
224
|
+
self,
|
|
225
|
+
template: Template,
|
|
226
|
+
matched_features: list[dict[str, Any]],
|
|
227
|
+
sig_features: list[dict[str, Any]],
|
|
228
|
+
all_features: list[dict[str, Any]],
|
|
229
|
+
) -> float:
|
|
230
|
+
"""Calculate adjusted confidence for a match."""
|
|
231
|
+
base_confidence = template.confidence_base
|
|
232
|
+
|
|
233
|
+
# Boost for matching multiple features
|
|
234
|
+
match_ratio = len(matched_features) / max(len(all_features), 1)
|
|
235
|
+
match_boost = min(0.1, match_ratio * 0.15)
|
|
236
|
+
|
|
237
|
+
# Boost for significant features
|
|
238
|
+
n_significant = sum(1 for f in matched_features if f["is_significant"])
|
|
239
|
+
significance_boost = min(0.1, n_significant / max(len(all_features), 1) * 0.2)
|
|
240
|
+
|
|
241
|
+
# Boost for strong statistical significance
|
|
242
|
+
if sig_features:
|
|
243
|
+
avg_p = sum(min(f["p_value_t"], f["p_value_mw"]) for f in sig_features) / len(
|
|
244
|
+
sig_features
|
|
245
|
+
)
|
|
246
|
+
p_value_boost = max(0.0, (0.05 - avg_p) * 2.0)
|
|
247
|
+
else:
|
|
248
|
+
p_value_boost = 0.0
|
|
249
|
+
|
|
250
|
+
confidence = base_confidence + match_boost + significance_boost + p_value_boost
|
|
251
|
+
return min(1.0, confidence)
|