ml4t-diagnostic 0.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ml4t/diagnostic/AGENT.md +25 -0
- ml4t/diagnostic/__init__.py +166 -0
- ml4t/diagnostic/backends/__init__.py +10 -0
- ml4t/diagnostic/backends/adapter.py +192 -0
- ml4t/diagnostic/backends/polars_backend.py +899 -0
- ml4t/diagnostic/caching/__init__.py +40 -0
- ml4t/diagnostic/caching/cache.py +331 -0
- ml4t/diagnostic/caching/decorators.py +131 -0
- ml4t/diagnostic/caching/smart_cache.py +339 -0
- ml4t/diagnostic/config/AGENT.md +24 -0
- ml4t/diagnostic/config/README.md +267 -0
- ml4t/diagnostic/config/__init__.py +219 -0
- ml4t/diagnostic/config/barrier_config.py +277 -0
- ml4t/diagnostic/config/base.py +301 -0
- ml4t/diagnostic/config/event_config.py +148 -0
- ml4t/diagnostic/config/feature_config.py +404 -0
- ml4t/diagnostic/config/multi_signal_config.py +55 -0
- ml4t/diagnostic/config/portfolio_config.py +215 -0
- ml4t/diagnostic/config/report_config.py +391 -0
- ml4t/diagnostic/config/sharpe_config.py +202 -0
- ml4t/diagnostic/config/signal_config.py +206 -0
- ml4t/diagnostic/config/trade_analysis_config.py +310 -0
- ml4t/diagnostic/config/validation.py +279 -0
- ml4t/diagnostic/core/__init__.py +29 -0
- ml4t/diagnostic/core/numba_utils.py +315 -0
- ml4t/diagnostic/core/purging.py +372 -0
- ml4t/diagnostic/core/sampling.py +471 -0
- ml4t/diagnostic/errors/__init__.py +205 -0
- ml4t/diagnostic/evaluation/AGENT.md +26 -0
- ml4t/diagnostic/evaluation/__init__.py +437 -0
- ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
- ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
- ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
- ml4t/diagnostic/evaluation/dashboard.py +715 -0
- ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
- ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
- ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
- ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
- ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
- ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
- ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
- ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
- ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
- ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
- ml4t/diagnostic/evaluation/event_analysis.py +647 -0
- ml4t/diagnostic/evaluation/excursion.py +390 -0
- ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
- ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
- ml4t/diagnostic/evaluation/framework.py +935 -0
- ml4t/diagnostic/evaluation/metric_registry.py +255 -0
- ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
- ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
- ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
- ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
- ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
- ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
- ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
- ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
- ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
- ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
- ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
- ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
- ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
- ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
- ml4t/diagnostic/evaluation/multi_signal.py +550 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
- ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
- ml4t/diagnostic/evaluation/report_generation.py +824 -0
- ml4t/diagnostic/evaluation/signal_selector.py +452 -0
- ml4t/diagnostic/evaluation/stat_registry.py +139 -0
- ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
- ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
- ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
- ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
- ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
- ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
- ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
- ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
- ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
- ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
- ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
- ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
- ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
- ml4t/diagnostic/evaluation/stats/moments.py +164 -0
- ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
- ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
- ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
- ml4t/diagnostic/evaluation/themes.py +330 -0
- ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
- ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
- ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
- ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
- ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
- ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
- ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
- ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
- ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
- ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
- ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
- ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
- ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
- ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
- ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
- ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
- ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
- ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
- ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
- ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
- ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
- ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
- ml4t/diagnostic/evaluation/validated_cv.py +535 -0
- ml4t/diagnostic/evaluation/visualization.py +1050 -0
- ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
- ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
- ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
- ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
- ml4t/diagnostic/integration/__init__.py +48 -0
- ml4t/diagnostic/integration/backtest_contract.py +671 -0
- ml4t/diagnostic/integration/data_contract.py +316 -0
- ml4t/diagnostic/integration/engineer_contract.py +226 -0
- ml4t/diagnostic/logging/__init__.py +77 -0
- ml4t/diagnostic/logging/logger.py +245 -0
- ml4t/diagnostic/logging/performance.py +234 -0
- ml4t/diagnostic/logging/progress.py +234 -0
- ml4t/diagnostic/logging/wandb.py +412 -0
- ml4t/diagnostic/metrics/__init__.py +9 -0
- ml4t/diagnostic/metrics/percentiles.py +128 -0
- ml4t/diagnostic/py.typed +1 -0
- ml4t/diagnostic/reporting/__init__.py +43 -0
- ml4t/diagnostic/reporting/base.py +130 -0
- ml4t/diagnostic/reporting/html_renderer.py +275 -0
- ml4t/diagnostic/reporting/json_renderer.py +51 -0
- ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
- ml4t/diagnostic/results/AGENT.md +24 -0
- ml4t/diagnostic/results/__init__.py +105 -0
- ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
- ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
- ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
- ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
- ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
- ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
- ml4t/diagnostic/results/barrier_results/validation.py +38 -0
- ml4t/diagnostic/results/base.py +177 -0
- ml4t/diagnostic/results/event_results.py +349 -0
- ml4t/diagnostic/results/feature_results.py +787 -0
- ml4t/diagnostic/results/multi_signal_results.py +431 -0
- ml4t/diagnostic/results/portfolio_results.py +281 -0
- ml4t/diagnostic/results/sharpe_results.py +448 -0
- ml4t/diagnostic/results/signal_results/__init__.py +74 -0
- ml4t/diagnostic/results/signal_results/ic.py +581 -0
- ml4t/diagnostic/results/signal_results/irtc.py +110 -0
- ml4t/diagnostic/results/signal_results/quantile.py +392 -0
- ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
- ml4t/diagnostic/results/signal_results/turnover.py +213 -0
- ml4t/diagnostic/results/signal_results/validation.py +147 -0
- ml4t/diagnostic/signal/AGENT.md +17 -0
- ml4t/diagnostic/signal/__init__.py +69 -0
- ml4t/diagnostic/signal/_report.py +152 -0
- ml4t/diagnostic/signal/_utils.py +261 -0
- ml4t/diagnostic/signal/core.py +275 -0
- ml4t/diagnostic/signal/quantile.py +148 -0
- ml4t/diagnostic/signal/result.py +214 -0
- ml4t/diagnostic/signal/signal_ic.py +129 -0
- ml4t/diagnostic/signal/turnover.py +182 -0
- ml4t/diagnostic/splitters/AGENT.md +19 -0
- ml4t/diagnostic/splitters/__init__.py +36 -0
- ml4t/diagnostic/splitters/base.py +501 -0
- ml4t/diagnostic/splitters/calendar.py +421 -0
- ml4t/diagnostic/splitters/calendar_config.py +91 -0
- ml4t/diagnostic/splitters/combinatorial.py +1064 -0
- ml4t/diagnostic/splitters/config.py +322 -0
- ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
- ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
- ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
- ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
- ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
- ml4t/diagnostic/splitters/group_isolation.py +329 -0
- ml4t/diagnostic/splitters/persistence.py +316 -0
- ml4t/diagnostic/splitters/utils.py +207 -0
- ml4t/diagnostic/splitters/walk_forward.py +757 -0
- ml4t/diagnostic/utils/__init__.py +42 -0
- ml4t/diagnostic/utils/config.py +542 -0
- ml4t/diagnostic/utils/dependencies.py +318 -0
- ml4t/diagnostic/utils/sessions.py +127 -0
- ml4t/diagnostic/validation/__init__.py +54 -0
- ml4t/diagnostic/validation/dataframe.py +274 -0
- ml4t/diagnostic/validation/returns.py +280 -0
- ml4t/diagnostic/validation/timeseries.py +299 -0
- ml4t/diagnostic/visualization/AGENT.md +19 -0
- ml4t/diagnostic/visualization/__init__.py +223 -0
- ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
- ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
- ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
- ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
- ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
- ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
- ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
- ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
- ml4t/diagnostic/visualization/barrier_plots.py +782 -0
- ml4t/diagnostic/visualization/core.py +1060 -0
- ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
- ml4t/diagnostic/visualization/dashboards/base.py +582 -0
- ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
- ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
- ml4t/diagnostic/visualization/dashboards.py +43 -0
- ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
- ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
- ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
- ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
- ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
- ml4t/diagnostic/visualization/feature_plots.py +888 -0
- ml4t/diagnostic/visualization/interaction_plots.py +618 -0
- ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
- ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
- ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
- ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
- ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
- ml4t/diagnostic/visualization/report_generation.py +1343 -0
- ml4t/diagnostic/visualization/signal/__init__.py +103 -0
- ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
- ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
- ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
- ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
- ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
- ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
- ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
- ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
- ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
- ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
- ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
- ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
"""Price path excursion analysis for TP/SL parameter selection.
|
|
2
|
+
|
|
3
|
+
This module provides tools to analyze price movement distributions over various
|
|
4
|
+
horizons, helping traders set take-profit and stop-loss levels based on
|
|
5
|
+
empirical price behavior.
|
|
6
|
+
|
|
7
|
+
**Key Distinction from Trade MFE/MAE**:
|
|
8
|
+
|
|
9
|
+
- **Trade MFE/MAE** (backtest library): Tracks best/worst unrealized return
|
|
10
|
+
during actual trades. Used for exit efficiency analysis.
|
|
11
|
+
|
|
12
|
+
- **Price Excursion Analysis** (this module): Analyzes potential price movements
|
|
13
|
+
over horizons BEFORE trading. Used for parameter selection (TP/SL levels).
|
|
14
|
+
|
|
15
|
+
Example workflow:
|
|
16
|
+
>>> # 1. Analyze historical price movements
|
|
17
|
+
>>> result = analyze_excursions(prices, horizons=[30, 60, 120])
|
|
18
|
+
>>>
|
|
19
|
+
>>> # 2. See distribution of movements
|
|
20
|
+
>>> print(result.percentiles)
|
|
21
|
+
>>>
|
|
22
|
+
>>> # 3. Choose TP/SL based on percentiles
|
|
23
|
+
>>> # e.g., 75th percentile MFE at 60 bars = 2.5% → use 2% take-profit
|
|
24
|
+
>>> tp_level = result.get_percentile(horizon=60, percentile=75, side="mfe")
|
|
25
|
+
>>>
|
|
26
|
+
>>> # 4. Use these informed parameters in triple barrier labeling
|
|
27
|
+
>>> from ml4t.engineer.labeling import triple_barrier_labels
|
|
28
|
+
>>> labels = triple_barrier_labels(prices, upper_barrier=tp_level, ...)
|
|
29
|
+
|
|
30
|
+
Warning:
|
|
31
|
+
⚠️ FORWARD-LOOKING ANALYSIS
|
|
32
|
+
This computes future price movements for parameter selection.
|
|
33
|
+
DO NOT use excursion values as ML features (data leakage).
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
from __future__ import annotations
|
|
37
|
+
|
|
38
|
+
from dataclasses import dataclass, field
|
|
39
|
+
from typing import TYPE_CHECKING, Literal
|
|
40
|
+
|
|
41
|
+
import numpy as np
|
|
42
|
+
import polars as pl
|
|
43
|
+
|
|
44
|
+
if TYPE_CHECKING:
|
|
45
|
+
import pandas as pd
|
|
46
|
+
from numpy.typing import NDArray
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class ExcursionStats:
|
|
51
|
+
"""Statistics for excursions at a single horizon."""
|
|
52
|
+
|
|
53
|
+
horizon: int
|
|
54
|
+
n_samples: int
|
|
55
|
+
|
|
56
|
+
# MFE (Maximum Favorable Excursion) stats
|
|
57
|
+
mfe_mean: float
|
|
58
|
+
mfe_std: float
|
|
59
|
+
mfe_median: float
|
|
60
|
+
mfe_skewness: float
|
|
61
|
+
|
|
62
|
+
# MAE (Maximum Adverse Excursion) stats
|
|
63
|
+
mae_mean: float
|
|
64
|
+
mae_std: float
|
|
65
|
+
mae_median: float
|
|
66
|
+
mae_skewness: float
|
|
67
|
+
|
|
68
|
+
# Percentiles (stored as dicts)
|
|
69
|
+
mfe_percentiles: dict[float, float] = field(default_factory=dict)
|
|
70
|
+
mae_percentiles: dict[float, float] = field(default_factory=dict)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class ExcursionAnalysisResult:
|
|
75
|
+
"""Result container for price excursion analysis.
|
|
76
|
+
|
|
77
|
+
Attributes:
|
|
78
|
+
horizons: List of horizons analyzed
|
|
79
|
+
n_samples: Number of valid samples used
|
|
80
|
+
return_type: Type of returns computed ('pct', 'log', 'abs')
|
|
81
|
+
statistics: Per-horizon statistics
|
|
82
|
+
percentile_matrix: DataFrame with horizons × percentiles
|
|
83
|
+
excursions: Raw excursion values (optional, can be large)
|
|
84
|
+
rolling_stats: Rolling statistics over time (optional)
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
horizons: list[int]
|
|
88
|
+
n_samples: int
|
|
89
|
+
return_type: str
|
|
90
|
+
statistics: dict[int, ExcursionStats]
|
|
91
|
+
percentile_matrix: pl.DataFrame
|
|
92
|
+
excursions: pl.DataFrame | None = None
|
|
93
|
+
rolling_stats: pl.DataFrame | None = None
|
|
94
|
+
|
|
95
|
+
def get_percentile(self, horizon: int, percentile: float, side: Literal["mfe", "mae"]) -> float:
|
|
96
|
+
"""Get a specific percentile value.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
horizon: The horizon to query
|
|
100
|
+
percentile: Percentile (0-100)
|
|
101
|
+
side: 'mfe' for favorable or 'mae' for adverse
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
The percentile value
|
|
105
|
+
|
|
106
|
+
Example:
|
|
107
|
+
>>> result.get_percentile(horizon=60, percentile=75, side="mfe")
|
|
108
|
+
0.025 # 75th percentile MFE at 60 bars is 2.5%
|
|
109
|
+
"""
|
|
110
|
+
if horizon not in self.statistics:
|
|
111
|
+
raise ValueError(f"Horizon {horizon} not in analysis. Available: {self.horizons}")
|
|
112
|
+
|
|
113
|
+
stats = self.statistics[horizon]
|
|
114
|
+
percentiles = stats.mfe_percentiles if side == "mfe" else stats.mae_percentiles
|
|
115
|
+
|
|
116
|
+
if percentile not in percentiles:
|
|
117
|
+
raise ValueError(
|
|
118
|
+
f"Percentile {percentile} not computed. Available: {list(percentiles.keys())}"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return percentiles[percentile]
|
|
122
|
+
|
|
123
|
+
def summary(self) -> str:
|
|
124
|
+
"""Generate a text summary of the analysis."""
|
|
125
|
+
lines = [
|
|
126
|
+
"Price Excursion Analysis Summary",
|
|
127
|
+
"=" * 40,
|
|
128
|
+
f"Samples: {self.n_samples:,}",
|
|
129
|
+
f"Return type: {self.return_type}",
|
|
130
|
+
f"Horizons: {self.horizons}",
|
|
131
|
+
"",
|
|
132
|
+
"MFE (Maximum Favorable Excursion):",
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
for h in self.horizons:
|
|
136
|
+
stats = self.statistics[h]
|
|
137
|
+
p50 = stats.mfe_percentiles.get(50, stats.mfe_median)
|
|
138
|
+
p90 = stats.mfe_percentiles.get(90, 0)
|
|
139
|
+
lines.append(f" {h:3d} bars: median={p50:+.2%}, 90th={p90:+.2%}")
|
|
140
|
+
|
|
141
|
+
lines.append("")
|
|
142
|
+
lines.append("MAE (Maximum Adverse Excursion):")
|
|
143
|
+
|
|
144
|
+
for h in self.horizons:
|
|
145
|
+
stats = self.statistics[h]
|
|
146
|
+
p50 = stats.mae_percentiles.get(50, stats.mae_median)
|
|
147
|
+
p10 = stats.mae_percentiles.get(10, 0)
|
|
148
|
+
lines.append(f" {h:3d} bars: median={p50:+.2%}, 10th={p10:+.2%}")
|
|
149
|
+
|
|
150
|
+
return "\n".join(lines)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def compute_excursions(
|
|
154
|
+
prices: pl.Series | pd.Series | NDArray,
|
|
155
|
+
horizons: list[int],
|
|
156
|
+
return_type: Literal["pct", "log", "abs"] = "pct",
|
|
157
|
+
) -> pl.DataFrame:
|
|
158
|
+
"""Compute MFE/MAE for each horizon.
|
|
159
|
+
|
|
160
|
+
For each bar t and horizon h:
|
|
161
|
+
- MFE[t,h] = max(prices[t:t+h]) / prices[t] - 1 (for pct)
|
|
162
|
+
- MAE[t,h] = min(prices[t:t+h]) / prices[t] - 1 (for pct)
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
prices: Price series (close prices typically)
|
|
166
|
+
horizons: List of horizons to compute (e.g., [15, 30, 60])
|
|
167
|
+
return_type: How to compute returns:
|
|
168
|
+
- 'pct': Percentage returns (default)
|
|
169
|
+
- 'log': Log returns
|
|
170
|
+
- 'abs': Absolute price changes
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
DataFrame with columns: mfe_{h}, mae_{h} for each horizon h
|
|
174
|
+
|
|
175
|
+
Example:
|
|
176
|
+
>>> prices = pl.Series([100, 102, 98, 105, 103, 101])
|
|
177
|
+
>>> result = compute_excursions(prices, horizons=[2, 3])
|
|
178
|
+
>>> print(result)
|
|
179
|
+
shape: (3, 4)
|
|
180
|
+
┌──────────┬──────────┬──────────┬──────────┐
|
|
181
|
+
│ mfe_2 ┆ mae_2 ┆ mfe_3 ┆ mae_3 │
|
|
182
|
+
│ --- ┆ --- ┆ --- ┆ --- │
|
|
183
|
+
│ f64 ┆ f64 ┆ f64 ┆ f64 │
|
|
184
|
+
╞══════════╪══════════╪══════════╪══════════╡
|
|
185
|
+
│ 0.02 ┆ -0.02 ┆ 0.05 ┆ -0.02 │
|
|
186
|
+
│ ... ┆ ... ┆ ... ┆ ... │
|
|
187
|
+
└──────────┴──────────┴──────────┴──────────┘
|
|
188
|
+
"""
|
|
189
|
+
# Convert to numpy for computation
|
|
190
|
+
if isinstance(prices, pl.Series):
|
|
191
|
+
price_array = prices.to_numpy()
|
|
192
|
+
elif isinstance(prices, np.ndarray):
|
|
193
|
+
price_array = prices
|
|
194
|
+
elif hasattr(prices, "to_numpy"): # pandas Series
|
|
195
|
+
price_array = prices.to_numpy()
|
|
196
|
+
else:
|
|
197
|
+
price_array = np.asarray(prices)
|
|
198
|
+
|
|
199
|
+
price_array = price_array.astype(np.float64)
|
|
200
|
+
n = len(price_array)
|
|
201
|
+
|
|
202
|
+
# Validate
|
|
203
|
+
if n < max(horizons) + 1:
|
|
204
|
+
raise ValueError(
|
|
205
|
+
f"Price series too short ({n}) for max horizon ({max(horizons)}). Need at least {max(horizons) + 1} prices."
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Pre-allocate result arrays
|
|
209
|
+
results = {}
|
|
210
|
+
max_horizon = max(horizons)
|
|
211
|
+
|
|
212
|
+
for h in horizons:
|
|
213
|
+
mfe = np.full(n - max_horizon, np.nan)
|
|
214
|
+
mae = np.full(n - max_horizon, np.nan)
|
|
215
|
+
|
|
216
|
+
for i in range(n - max_horizon):
|
|
217
|
+
entry_price = price_array[i]
|
|
218
|
+
if entry_price <= 0 or np.isnan(entry_price):
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
window = price_array[i : i + h + 1] # Include entry price
|
|
222
|
+
if np.any(np.isnan(window)) or np.any(window <= 0):
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
max_price = np.max(window)
|
|
226
|
+
min_price = np.min(window)
|
|
227
|
+
|
|
228
|
+
if return_type == "pct":
|
|
229
|
+
mfe[i] = (max_price - entry_price) / entry_price
|
|
230
|
+
mae[i] = (min_price - entry_price) / entry_price
|
|
231
|
+
elif return_type == "log":
|
|
232
|
+
mfe[i] = np.log(max_price / entry_price)
|
|
233
|
+
mae[i] = np.log(min_price / entry_price)
|
|
234
|
+
elif return_type == "abs":
|
|
235
|
+
mfe[i] = max_price - entry_price
|
|
236
|
+
mae[i] = min_price - entry_price
|
|
237
|
+
else:
|
|
238
|
+
raise ValueError(f"Unknown return_type: {return_type}")
|
|
239
|
+
|
|
240
|
+
results[f"mfe_{h}"] = mfe
|
|
241
|
+
results[f"mae_{h}"] = mae
|
|
242
|
+
|
|
243
|
+
return pl.DataFrame(results)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def analyze_excursions(
|
|
247
|
+
prices: pl.Series | pd.Series | NDArray,
|
|
248
|
+
horizons: list[int] | None = None,
|
|
249
|
+
return_type: Literal["pct", "log", "abs"] = "pct",
|
|
250
|
+
percentiles: list[float] | None = None,
|
|
251
|
+
keep_raw: bool = False,
|
|
252
|
+
rolling_window: int | None = None,
|
|
253
|
+
) -> ExcursionAnalysisResult:
|
|
254
|
+
"""Analyze price excursions with statistics and percentiles.
|
|
255
|
+
|
|
256
|
+
This is the main entry point for price excursion analysis. It computes
|
|
257
|
+
MFE/MAE distributions and provides statistics useful for setting
|
|
258
|
+
take-profit and stop-loss levels.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
prices: Price series (close prices typically)
|
|
262
|
+
horizons: List of horizons to analyze. Default: [15, 30, 60]
|
|
263
|
+
return_type: How to compute returns ('pct', 'log', 'abs')
|
|
264
|
+
percentiles: Percentiles to compute. Default: [10, 25, 50, 75, 90]
|
|
265
|
+
keep_raw: If True, include raw excursion values in result
|
|
266
|
+
rolling_window: If provided, compute rolling statistics over this window
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
ExcursionAnalysisResult with statistics and percentiles
|
|
270
|
+
|
|
271
|
+
Example:
|
|
272
|
+
>>> import polars as pl
|
|
273
|
+
>>> prices = pl.Series(np.random.randn(1000).cumsum() + 100)
|
|
274
|
+
>>> result = analyze_excursions(prices, horizons=[30, 60, 120])
|
|
275
|
+
>>>
|
|
276
|
+
>>> # View summary
|
|
277
|
+
>>> print(result.summary())
|
|
278
|
+
>>>
|
|
279
|
+
>>> # Get specific percentile for parameter selection
|
|
280
|
+
>>> tp_level = result.get_percentile(horizon=60, percentile=75, side="mfe")
|
|
281
|
+
>>> sl_level = result.get_percentile(horizon=60, percentile=25, side="mae")
|
|
282
|
+
>>> print(f"Suggested TP: {tp_level:.2%}, SL: {sl_level:.2%}")
|
|
283
|
+
"""
|
|
284
|
+
# Defaults
|
|
285
|
+
if horizons is None:
|
|
286
|
+
horizons = [15, 30, 60]
|
|
287
|
+
if percentiles is None:
|
|
288
|
+
percentiles = [10, 25, 50, 75, 90]
|
|
289
|
+
|
|
290
|
+
# Sort horizons
|
|
291
|
+
horizons = sorted(horizons)
|
|
292
|
+
|
|
293
|
+
# Compute raw excursions
|
|
294
|
+
excursions = compute_excursions(prices, horizons, return_type)
|
|
295
|
+
n_samples = len(excursions)
|
|
296
|
+
|
|
297
|
+
# Compute statistics per horizon
|
|
298
|
+
statistics = {}
|
|
299
|
+
percentile_rows = []
|
|
300
|
+
|
|
301
|
+
for h in horizons:
|
|
302
|
+
mfe_col = f"mfe_{h}"
|
|
303
|
+
mae_col = f"mae_{h}"
|
|
304
|
+
|
|
305
|
+
mfe_values = excursions[mfe_col].drop_nulls().to_numpy()
|
|
306
|
+
mae_values = excursions[mae_col].drop_nulls().to_numpy()
|
|
307
|
+
|
|
308
|
+
# Skip if no valid data
|
|
309
|
+
if len(mfe_values) == 0:
|
|
310
|
+
continue
|
|
311
|
+
|
|
312
|
+
# Compute percentiles
|
|
313
|
+
mfe_pcts = {p: float(np.percentile(mfe_values, p)) for p in percentiles}
|
|
314
|
+
mae_pcts = {p: float(np.percentile(mae_values, p)) for p in percentiles}
|
|
315
|
+
|
|
316
|
+
# Compute statistics
|
|
317
|
+
from scipy.stats import skew
|
|
318
|
+
|
|
319
|
+
stats = ExcursionStats(
|
|
320
|
+
horizon=h,
|
|
321
|
+
n_samples=len(mfe_values),
|
|
322
|
+
mfe_mean=float(np.mean(mfe_values)),
|
|
323
|
+
mfe_std=float(np.std(mfe_values)),
|
|
324
|
+
mfe_median=float(np.median(mfe_values)),
|
|
325
|
+
mfe_skewness=float(skew(mfe_values)) if len(mfe_values) > 2 else 0.0,
|
|
326
|
+
mae_mean=float(np.mean(mae_values)),
|
|
327
|
+
mae_std=float(np.std(mae_values)),
|
|
328
|
+
mae_median=float(np.median(mae_values)),
|
|
329
|
+
mae_skewness=float(skew(mae_values)) if len(mae_values) > 2 else 0.0,
|
|
330
|
+
mfe_percentiles=mfe_pcts,
|
|
331
|
+
mae_percentiles=mae_pcts,
|
|
332
|
+
)
|
|
333
|
+
statistics[h] = stats
|
|
334
|
+
|
|
335
|
+
# Build percentile matrix row
|
|
336
|
+
row = {"horizon": h, "side": "mfe"}
|
|
337
|
+
row.update({f"p{int(p)}": v for p, v in mfe_pcts.items()})
|
|
338
|
+
percentile_rows.append(row)
|
|
339
|
+
|
|
340
|
+
row = {"horizon": h, "side": "mae"}
|
|
341
|
+
row.update({f"p{int(p)}": v for p, v in mae_pcts.items()})
|
|
342
|
+
percentile_rows.append(row)
|
|
343
|
+
|
|
344
|
+
percentile_matrix = pl.DataFrame(percentile_rows)
|
|
345
|
+
|
|
346
|
+
# Compute rolling stats if requested
|
|
347
|
+
rolling_stats = None
|
|
348
|
+
if rolling_window is not None:
|
|
349
|
+
rolling_stats = _compute_rolling_excursion_stats(excursions, horizons, rolling_window)
|
|
350
|
+
|
|
351
|
+
return ExcursionAnalysisResult(
|
|
352
|
+
horizons=horizons,
|
|
353
|
+
n_samples=n_samples,
|
|
354
|
+
return_type=return_type,
|
|
355
|
+
statistics=statistics,
|
|
356
|
+
percentile_matrix=percentile_matrix,
|
|
357
|
+
excursions=excursions if keep_raw else None,
|
|
358
|
+
rolling_stats=rolling_stats,
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _compute_rolling_excursion_stats(
|
|
363
|
+
excursions: pl.DataFrame, horizons: list[int], window: int
|
|
364
|
+
) -> pl.DataFrame:
|
|
365
|
+
"""Compute rolling statistics for excursions.
|
|
366
|
+
|
|
367
|
+
This allows seeing how excursion distributions change over time,
|
|
368
|
+
useful for detecting regime changes.
|
|
369
|
+
"""
|
|
370
|
+
results = []
|
|
371
|
+
|
|
372
|
+
for h in horizons:
|
|
373
|
+
mfe_col = f"mfe_{h}"
|
|
374
|
+
mae_col = f"mae_{h}"
|
|
375
|
+
|
|
376
|
+
# Rolling median and std
|
|
377
|
+
rolling_df = excursions.select(
|
|
378
|
+
[
|
|
379
|
+
pl.col(mfe_col).rolling_median(window).alias(f"mfe_{h}_median"),
|
|
380
|
+
pl.col(mfe_col).rolling_std(window).alias(f"mfe_{h}_std"),
|
|
381
|
+
pl.col(mae_col).rolling_median(window).alias(f"mae_{h}_median"),
|
|
382
|
+
pl.col(mae_col).rolling_std(window).alias(f"mae_{h}_std"),
|
|
383
|
+
]
|
|
384
|
+
)
|
|
385
|
+
results.append(rolling_df)
|
|
386
|
+
|
|
387
|
+
# Combine all horizons
|
|
388
|
+
if results:
|
|
389
|
+
return pl.concat(results, how="horizontal")
|
|
390
|
+
return pl.DataFrame()
|