ml4t-diagnostic 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. ml4t/diagnostic/AGENT.md +25 -0
  2. ml4t/diagnostic/__init__.py +166 -0
  3. ml4t/diagnostic/backends/__init__.py +10 -0
  4. ml4t/diagnostic/backends/adapter.py +192 -0
  5. ml4t/diagnostic/backends/polars_backend.py +899 -0
  6. ml4t/diagnostic/caching/__init__.py +40 -0
  7. ml4t/diagnostic/caching/cache.py +331 -0
  8. ml4t/diagnostic/caching/decorators.py +131 -0
  9. ml4t/diagnostic/caching/smart_cache.py +339 -0
  10. ml4t/diagnostic/config/AGENT.md +24 -0
  11. ml4t/diagnostic/config/README.md +267 -0
  12. ml4t/diagnostic/config/__init__.py +219 -0
  13. ml4t/diagnostic/config/barrier_config.py +277 -0
  14. ml4t/diagnostic/config/base.py +301 -0
  15. ml4t/diagnostic/config/event_config.py +148 -0
  16. ml4t/diagnostic/config/feature_config.py +404 -0
  17. ml4t/diagnostic/config/multi_signal_config.py +55 -0
  18. ml4t/diagnostic/config/portfolio_config.py +215 -0
  19. ml4t/diagnostic/config/report_config.py +391 -0
  20. ml4t/diagnostic/config/sharpe_config.py +202 -0
  21. ml4t/diagnostic/config/signal_config.py +206 -0
  22. ml4t/diagnostic/config/trade_analysis_config.py +310 -0
  23. ml4t/diagnostic/config/validation.py +279 -0
  24. ml4t/diagnostic/core/__init__.py +29 -0
  25. ml4t/diagnostic/core/numba_utils.py +315 -0
  26. ml4t/diagnostic/core/purging.py +372 -0
  27. ml4t/diagnostic/core/sampling.py +471 -0
  28. ml4t/diagnostic/errors/__init__.py +205 -0
  29. ml4t/diagnostic/evaluation/AGENT.md +26 -0
  30. ml4t/diagnostic/evaluation/__init__.py +437 -0
  31. ml4t/diagnostic/evaluation/autocorrelation.py +531 -0
  32. ml4t/diagnostic/evaluation/barrier_analysis.py +1050 -0
  33. ml4t/diagnostic/evaluation/binary_metrics.py +910 -0
  34. ml4t/diagnostic/evaluation/dashboard.py +715 -0
  35. ml4t/diagnostic/evaluation/diagnostic_plots.py +1037 -0
  36. ml4t/diagnostic/evaluation/distribution/__init__.py +499 -0
  37. ml4t/diagnostic/evaluation/distribution/moments.py +299 -0
  38. ml4t/diagnostic/evaluation/distribution/tails.py +777 -0
  39. ml4t/diagnostic/evaluation/distribution/tests.py +470 -0
  40. ml4t/diagnostic/evaluation/drift/__init__.py +139 -0
  41. ml4t/diagnostic/evaluation/drift/analysis.py +432 -0
  42. ml4t/diagnostic/evaluation/drift/domain_classifier.py +517 -0
  43. ml4t/diagnostic/evaluation/drift/population_stability_index.py +310 -0
  44. ml4t/diagnostic/evaluation/drift/wasserstein.py +388 -0
  45. ml4t/diagnostic/evaluation/event_analysis.py +647 -0
  46. ml4t/diagnostic/evaluation/excursion.py +390 -0
  47. ml4t/diagnostic/evaluation/feature_diagnostics.py +873 -0
  48. ml4t/diagnostic/evaluation/feature_outcome.py +666 -0
  49. ml4t/diagnostic/evaluation/framework.py +935 -0
  50. ml4t/diagnostic/evaluation/metric_registry.py +255 -0
  51. ml4t/diagnostic/evaluation/metrics/AGENT.md +23 -0
  52. ml4t/diagnostic/evaluation/metrics/__init__.py +133 -0
  53. ml4t/diagnostic/evaluation/metrics/basic.py +160 -0
  54. ml4t/diagnostic/evaluation/metrics/conditional_ic.py +469 -0
  55. ml4t/diagnostic/evaluation/metrics/feature_outcome.py +475 -0
  56. ml4t/diagnostic/evaluation/metrics/ic_statistics.py +446 -0
  57. ml4t/diagnostic/evaluation/metrics/importance_analysis.py +338 -0
  58. ml4t/diagnostic/evaluation/metrics/importance_classical.py +375 -0
  59. ml4t/diagnostic/evaluation/metrics/importance_mda.py +371 -0
  60. ml4t/diagnostic/evaluation/metrics/importance_shap.py +715 -0
  61. ml4t/diagnostic/evaluation/metrics/information_coefficient.py +527 -0
  62. ml4t/diagnostic/evaluation/metrics/interactions.py +772 -0
  63. ml4t/diagnostic/evaluation/metrics/monotonicity.py +226 -0
  64. ml4t/diagnostic/evaluation/metrics/risk_adjusted.py +324 -0
  65. ml4t/diagnostic/evaluation/multi_signal.py +550 -0
  66. ml4t/diagnostic/evaluation/portfolio_analysis/__init__.py +83 -0
  67. ml4t/diagnostic/evaluation/portfolio_analysis/analysis.py +734 -0
  68. ml4t/diagnostic/evaluation/portfolio_analysis/metrics.py +589 -0
  69. ml4t/diagnostic/evaluation/portfolio_analysis/results.py +334 -0
  70. ml4t/diagnostic/evaluation/report_generation.py +824 -0
  71. ml4t/diagnostic/evaluation/signal_selector.py +452 -0
  72. ml4t/diagnostic/evaluation/stat_registry.py +139 -0
  73. ml4t/diagnostic/evaluation/stationarity/__init__.py +97 -0
  74. ml4t/diagnostic/evaluation/stationarity/analysis.py +518 -0
  75. ml4t/diagnostic/evaluation/stationarity/augmented_dickey_fuller.py +296 -0
  76. ml4t/diagnostic/evaluation/stationarity/kpss_test.py +308 -0
  77. ml4t/diagnostic/evaluation/stationarity/phillips_perron.py +365 -0
  78. ml4t/diagnostic/evaluation/stats/AGENT.md +43 -0
  79. ml4t/diagnostic/evaluation/stats/__init__.py +191 -0
  80. ml4t/diagnostic/evaluation/stats/backtest_overfitting.py +219 -0
  81. ml4t/diagnostic/evaluation/stats/bootstrap.py +228 -0
  82. ml4t/diagnostic/evaluation/stats/deflated_sharpe_ratio.py +591 -0
  83. ml4t/diagnostic/evaluation/stats/false_discovery_rate.py +295 -0
  84. ml4t/diagnostic/evaluation/stats/hac_standard_errors.py +108 -0
  85. ml4t/diagnostic/evaluation/stats/minimum_track_record.py +408 -0
  86. ml4t/diagnostic/evaluation/stats/moments.py +164 -0
  87. ml4t/diagnostic/evaluation/stats/rademacher_adjustment.py +436 -0
  88. ml4t/diagnostic/evaluation/stats/reality_check.py +155 -0
  89. ml4t/diagnostic/evaluation/stats/sharpe_inference.py +219 -0
  90. ml4t/diagnostic/evaluation/themes.py +330 -0
  91. ml4t/diagnostic/evaluation/threshold_analysis.py +957 -0
  92. ml4t/diagnostic/evaluation/trade_analysis.py +1136 -0
  93. ml4t/diagnostic/evaluation/trade_dashboard/__init__.py +32 -0
  94. ml4t/diagnostic/evaluation/trade_dashboard/app.py +315 -0
  95. ml4t/diagnostic/evaluation/trade_dashboard/export/__init__.py +18 -0
  96. ml4t/diagnostic/evaluation/trade_dashboard/export/csv.py +82 -0
  97. ml4t/diagnostic/evaluation/trade_dashboard/export/html.py +276 -0
  98. ml4t/diagnostic/evaluation/trade_dashboard/io.py +166 -0
  99. ml4t/diagnostic/evaluation/trade_dashboard/normalize.py +304 -0
  100. ml4t/diagnostic/evaluation/trade_dashboard/stats.py +386 -0
  101. ml4t/diagnostic/evaluation/trade_dashboard/style.py +79 -0
  102. ml4t/diagnostic/evaluation/trade_dashboard/tabs/__init__.py +21 -0
  103. ml4t/diagnostic/evaluation/trade_dashboard/tabs/patterns.py +354 -0
  104. ml4t/diagnostic/evaluation/trade_dashboard/tabs/shap_analysis.py +280 -0
  105. ml4t/diagnostic/evaluation/trade_dashboard/tabs/stat_validation.py +186 -0
  106. ml4t/diagnostic/evaluation/trade_dashboard/tabs/worst_trades.py +236 -0
  107. ml4t/diagnostic/evaluation/trade_dashboard/types.py +129 -0
  108. ml4t/diagnostic/evaluation/trade_shap/__init__.py +102 -0
  109. ml4t/diagnostic/evaluation/trade_shap/alignment.py +188 -0
  110. ml4t/diagnostic/evaluation/trade_shap/characterize.py +413 -0
  111. ml4t/diagnostic/evaluation/trade_shap/cluster.py +302 -0
  112. ml4t/diagnostic/evaluation/trade_shap/explain.py +208 -0
  113. ml4t/diagnostic/evaluation/trade_shap/hypotheses/__init__.py +23 -0
  114. ml4t/diagnostic/evaluation/trade_shap/hypotheses/generator.py +290 -0
  115. ml4t/diagnostic/evaluation/trade_shap/hypotheses/matcher.py +251 -0
  116. ml4t/diagnostic/evaluation/trade_shap/hypotheses/templates.yaml +467 -0
  117. ml4t/diagnostic/evaluation/trade_shap/models.py +386 -0
  118. ml4t/diagnostic/evaluation/trade_shap/normalize.py +116 -0
  119. ml4t/diagnostic/evaluation/trade_shap/pipeline.py +263 -0
  120. ml4t/diagnostic/evaluation/trade_shap_dashboard.py +283 -0
  121. ml4t/diagnostic/evaluation/trade_shap_diagnostics.py +588 -0
  122. ml4t/diagnostic/evaluation/validated_cv.py +535 -0
  123. ml4t/diagnostic/evaluation/visualization.py +1050 -0
  124. ml4t/diagnostic/evaluation/volatility/__init__.py +45 -0
  125. ml4t/diagnostic/evaluation/volatility/analysis.py +351 -0
  126. ml4t/diagnostic/evaluation/volatility/arch.py +258 -0
  127. ml4t/diagnostic/evaluation/volatility/garch.py +460 -0
  128. ml4t/diagnostic/integration/__init__.py +48 -0
  129. ml4t/diagnostic/integration/backtest_contract.py +671 -0
  130. ml4t/diagnostic/integration/data_contract.py +316 -0
  131. ml4t/diagnostic/integration/engineer_contract.py +226 -0
  132. ml4t/diagnostic/logging/__init__.py +77 -0
  133. ml4t/diagnostic/logging/logger.py +245 -0
  134. ml4t/diagnostic/logging/performance.py +234 -0
  135. ml4t/diagnostic/logging/progress.py +234 -0
  136. ml4t/diagnostic/logging/wandb.py +412 -0
  137. ml4t/diagnostic/metrics/__init__.py +9 -0
  138. ml4t/diagnostic/metrics/percentiles.py +128 -0
  139. ml4t/diagnostic/py.typed +1 -0
  140. ml4t/diagnostic/reporting/__init__.py +43 -0
  141. ml4t/diagnostic/reporting/base.py +130 -0
  142. ml4t/diagnostic/reporting/html_renderer.py +275 -0
  143. ml4t/diagnostic/reporting/json_renderer.py +51 -0
  144. ml4t/diagnostic/reporting/markdown_renderer.py +117 -0
  145. ml4t/diagnostic/results/AGENT.md +24 -0
  146. ml4t/diagnostic/results/__init__.py +105 -0
  147. ml4t/diagnostic/results/barrier_results/__init__.py +36 -0
  148. ml4t/diagnostic/results/barrier_results/hit_rate.py +304 -0
  149. ml4t/diagnostic/results/barrier_results/precision_recall.py +266 -0
  150. ml4t/diagnostic/results/barrier_results/profit_factor.py +297 -0
  151. ml4t/diagnostic/results/barrier_results/tearsheet.py +397 -0
  152. ml4t/diagnostic/results/barrier_results/time_to_target.py +305 -0
  153. ml4t/diagnostic/results/barrier_results/validation.py +38 -0
  154. ml4t/diagnostic/results/base.py +177 -0
  155. ml4t/diagnostic/results/event_results.py +349 -0
  156. ml4t/diagnostic/results/feature_results.py +787 -0
  157. ml4t/diagnostic/results/multi_signal_results.py +431 -0
  158. ml4t/diagnostic/results/portfolio_results.py +281 -0
  159. ml4t/diagnostic/results/sharpe_results.py +448 -0
  160. ml4t/diagnostic/results/signal_results/__init__.py +74 -0
  161. ml4t/diagnostic/results/signal_results/ic.py +581 -0
  162. ml4t/diagnostic/results/signal_results/irtc.py +110 -0
  163. ml4t/diagnostic/results/signal_results/quantile.py +392 -0
  164. ml4t/diagnostic/results/signal_results/tearsheet.py +456 -0
  165. ml4t/diagnostic/results/signal_results/turnover.py +213 -0
  166. ml4t/diagnostic/results/signal_results/validation.py +147 -0
  167. ml4t/diagnostic/signal/AGENT.md +17 -0
  168. ml4t/diagnostic/signal/__init__.py +69 -0
  169. ml4t/diagnostic/signal/_report.py +152 -0
  170. ml4t/diagnostic/signal/_utils.py +261 -0
  171. ml4t/diagnostic/signal/core.py +275 -0
  172. ml4t/diagnostic/signal/quantile.py +148 -0
  173. ml4t/diagnostic/signal/result.py +214 -0
  174. ml4t/diagnostic/signal/signal_ic.py +129 -0
  175. ml4t/diagnostic/signal/turnover.py +182 -0
  176. ml4t/diagnostic/splitters/AGENT.md +19 -0
  177. ml4t/diagnostic/splitters/__init__.py +36 -0
  178. ml4t/diagnostic/splitters/base.py +501 -0
  179. ml4t/diagnostic/splitters/calendar.py +421 -0
  180. ml4t/diagnostic/splitters/calendar_config.py +91 -0
  181. ml4t/diagnostic/splitters/combinatorial.py +1064 -0
  182. ml4t/diagnostic/splitters/config.py +322 -0
  183. ml4t/diagnostic/splitters/cpcv/__init__.py +57 -0
  184. ml4t/diagnostic/splitters/cpcv/combinations.py +119 -0
  185. ml4t/diagnostic/splitters/cpcv/partitioning.py +263 -0
  186. ml4t/diagnostic/splitters/cpcv/purge_engine.py +379 -0
  187. ml4t/diagnostic/splitters/cpcv/windows.py +190 -0
  188. ml4t/diagnostic/splitters/group_isolation.py +329 -0
  189. ml4t/diagnostic/splitters/persistence.py +316 -0
  190. ml4t/diagnostic/splitters/utils.py +207 -0
  191. ml4t/diagnostic/splitters/walk_forward.py +757 -0
  192. ml4t/diagnostic/utils/__init__.py +42 -0
  193. ml4t/diagnostic/utils/config.py +542 -0
  194. ml4t/diagnostic/utils/dependencies.py +318 -0
  195. ml4t/diagnostic/utils/sessions.py +127 -0
  196. ml4t/diagnostic/validation/__init__.py +54 -0
  197. ml4t/diagnostic/validation/dataframe.py +274 -0
  198. ml4t/diagnostic/validation/returns.py +280 -0
  199. ml4t/diagnostic/validation/timeseries.py +299 -0
  200. ml4t/diagnostic/visualization/AGENT.md +19 -0
  201. ml4t/diagnostic/visualization/__init__.py +223 -0
  202. ml4t/diagnostic/visualization/backtest/__init__.py +98 -0
  203. ml4t/diagnostic/visualization/backtest/cost_attribution.py +762 -0
  204. ml4t/diagnostic/visualization/backtest/executive_summary.py +895 -0
  205. ml4t/diagnostic/visualization/backtest/interactive_controls.py +673 -0
  206. ml4t/diagnostic/visualization/backtest/statistical_validity.py +874 -0
  207. ml4t/diagnostic/visualization/backtest/tearsheet.py +565 -0
  208. ml4t/diagnostic/visualization/backtest/template_system.py +373 -0
  209. ml4t/diagnostic/visualization/backtest/trade_plots.py +1172 -0
  210. ml4t/diagnostic/visualization/barrier_plots.py +782 -0
  211. ml4t/diagnostic/visualization/core.py +1060 -0
  212. ml4t/diagnostic/visualization/dashboards/__init__.py +36 -0
  213. ml4t/diagnostic/visualization/dashboards/base.py +582 -0
  214. ml4t/diagnostic/visualization/dashboards/importance.py +801 -0
  215. ml4t/diagnostic/visualization/dashboards/interaction.py +263 -0
  216. ml4t/diagnostic/visualization/dashboards.py +43 -0
  217. ml4t/diagnostic/visualization/data_extraction/__init__.py +48 -0
  218. ml4t/diagnostic/visualization/data_extraction/importance.py +649 -0
  219. ml4t/diagnostic/visualization/data_extraction/interaction.py +504 -0
  220. ml4t/diagnostic/visualization/data_extraction/types.py +113 -0
  221. ml4t/diagnostic/visualization/data_extraction/validation.py +66 -0
  222. ml4t/diagnostic/visualization/feature_plots.py +888 -0
  223. ml4t/diagnostic/visualization/interaction_plots.py +618 -0
  224. ml4t/diagnostic/visualization/portfolio/__init__.py +41 -0
  225. ml4t/diagnostic/visualization/portfolio/dashboard.py +514 -0
  226. ml4t/diagnostic/visualization/portfolio/drawdown_plots.py +341 -0
  227. ml4t/diagnostic/visualization/portfolio/returns_plots.py +487 -0
  228. ml4t/diagnostic/visualization/portfolio/risk_plots.py +301 -0
  229. ml4t/diagnostic/visualization/report_generation.py +1343 -0
  230. ml4t/diagnostic/visualization/signal/__init__.py +103 -0
  231. ml4t/diagnostic/visualization/signal/dashboard.py +911 -0
  232. ml4t/diagnostic/visualization/signal/event_plots.py +514 -0
  233. ml4t/diagnostic/visualization/signal/ic_plots.py +635 -0
  234. ml4t/diagnostic/visualization/signal/multi_signal_dashboard.py +974 -0
  235. ml4t/diagnostic/visualization/signal/multi_signal_plots.py +603 -0
  236. ml4t/diagnostic/visualization/signal/quantile_plots.py +625 -0
  237. ml4t/diagnostic/visualization/signal/turnover_plots.py +400 -0
  238. ml4t/diagnostic/visualization/trade_shap/__init__.py +90 -0
  239. ml4t_diagnostic-0.1.0a1.dist-info/METADATA +1044 -0
  240. ml4t_diagnostic-0.1.0a1.dist-info/RECORD +242 -0
  241. ml4t_diagnostic-0.1.0a1.dist-info/WHEEL +4 -0
  242. ml4t_diagnostic-0.1.0a1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,390 @@
1
+ """Price path excursion analysis for TP/SL parameter selection.
2
+
3
+ This module provides tools to analyze price movement distributions over various
4
+ horizons, helping traders set take-profit and stop-loss levels based on
5
+ empirical price behavior.
6
+
7
+ **Key Distinction from Trade MFE/MAE**:
8
+
9
+ - **Trade MFE/MAE** (backtest library): Tracks best/worst unrealized return
10
+ during actual trades. Used for exit efficiency analysis.
11
+
12
+ - **Price Excursion Analysis** (this module): Analyzes potential price movements
13
+ over horizons BEFORE trading. Used for parameter selection (TP/SL levels).
14
+
15
+ Example workflow:
16
+ >>> # 1. Analyze historical price movements
17
+ >>> result = analyze_excursions(prices, horizons=[30, 60, 120])
18
+ >>>
19
+ >>> # 2. See distribution of movements
20
+ >>> print(result.percentiles)
21
+ >>>
22
+ >>> # 3. Choose TP/SL based on percentiles
23
+ >>> # e.g., 75th percentile MFE at 60 bars = 2.5% → use 2% take-profit
24
+ >>> tp_level = result.get_percentile(horizon=60, percentile=75, side="mfe")
25
+ >>>
26
+ >>> # 4. Use these informed parameters in triple barrier labeling
27
+ >>> from ml4t.engineer.labeling import triple_barrier_labels
28
+ >>> labels = triple_barrier_labels(prices, upper_barrier=tp_level, ...)
29
+
30
+ Warning:
31
+ ⚠️ FORWARD-LOOKING ANALYSIS
32
+ This computes future price movements for parameter selection.
33
+ DO NOT use excursion values as ML features (data leakage).
34
+ """
35
+
36
+ from __future__ import annotations
37
+
38
+ from dataclasses import dataclass, field
39
+ from typing import TYPE_CHECKING, Literal
40
+
41
+ import numpy as np
42
+ import polars as pl
43
+
44
+ if TYPE_CHECKING:
45
+ import pandas as pd
46
+ from numpy.typing import NDArray
47
+
48
+
49
+ @dataclass
50
+ class ExcursionStats:
51
+ """Statistics for excursions at a single horizon."""
52
+
53
+ horizon: int
54
+ n_samples: int
55
+
56
+ # MFE (Maximum Favorable Excursion) stats
57
+ mfe_mean: float
58
+ mfe_std: float
59
+ mfe_median: float
60
+ mfe_skewness: float
61
+
62
+ # MAE (Maximum Adverse Excursion) stats
63
+ mae_mean: float
64
+ mae_std: float
65
+ mae_median: float
66
+ mae_skewness: float
67
+
68
+ # Percentiles (stored as dicts)
69
+ mfe_percentiles: dict[float, float] = field(default_factory=dict)
70
+ mae_percentiles: dict[float, float] = field(default_factory=dict)
71
+
72
+
73
+ @dataclass
74
+ class ExcursionAnalysisResult:
75
+ """Result container for price excursion analysis.
76
+
77
+ Attributes:
78
+ horizons: List of horizons analyzed
79
+ n_samples: Number of valid samples used
80
+ return_type: Type of returns computed ('pct', 'log', 'abs')
81
+ statistics: Per-horizon statistics
82
+ percentile_matrix: DataFrame with horizons × percentiles
83
+ excursions: Raw excursion values (optional, can be large)
84
+ rolling_stats: Rolling statistics over time (optional)
85
+ """
86
+
87
+ horizons: list[int]
88
+ n_samples: int
89
+ return_type: str
90
+ statistics: dict[int, ExcursionStats]
91
+ percentile_matrix: pl.DataFrame
92
+ excursions: pl.DataFrame | None = None
93
+ rolling_stats: pl.DataFrame | None = None
94
+
95
+ def get_percentile(self, horizon: int, percentile: float, side: Literal["mfe", "mae"]) -> float:
96
+ """Get a specific percentile value.
97
+
98
+ Args:
99
+ horizon: The horizon to query
100
+ percentile: Percentile (0-100)
101
+ side: 'mfe' for favorable or 'mae' for adverse
102
+
103
+ Returns:
104
+ The percentile value
105
+
106
+ Example:
107
+ >>> result.get_percentile(horizon=60, percentile=75, side="mfe")
108
+ 0.025 # 75th percentile MFE at 60 bars is 2.5%
109
+ """
110
+ if horizon not in self.statistics:
111
+ raise ValueError(f"Horizon {horizon} not in analysis. Available: {self.horizons}")
112
+
113
+ stats = self.statistics[horizon]
114
+ percentiles = stats.mfe_percentiles if side == "mfe" else stats.mae_percentiles
115
+
116
+ if percentile not in percentiles:
117
+ raise ValueError(
118
+ f"Percentile {percentile} not computed. Available: {list(percentiles.keys())}"
119
+ )
120
+
121
+ return percentiles[percentile]
122
+
123
+ def summary(self) -> str:
124
+ """Generate a text summary of the analysis."""
125
+ lines = [
126
+ "Price Excursion Analysis Summary",
127
+ "=" * 40,
128
+ f"Samples: {self.n_samples:,}",
129
+ f"Return type: {self.return_type}",
130
+ f"Horizons: {self.horizons}",
131
+ "",
132
+ "MFE (Maximum Favorable Excursion):",
133
+ ]
134
+
135
+ for h in self.horizons:
136
+ stats = self.statistics[h]
137
+ p50 = stats.mfe_percentiles.get(50, stats.mfe_median)
138
+ p90 = stats.mfe_percentiles.get(90, 0)
139
+ lines.append(f" {h:3d} bars: median={p50:+.2%}, 90th={p90:+.2%}")
140
+
141
+ lines.append("")
142
+ lines.append("MAE (Maximum Adverse Excursion):")
143
+
144
+ for h in self.horizons:
145
+ stats = self.statistics[h]
146
+ p50 = stats.mae_percentiles.get(50, stats.mae_median)
147
+ p10 = stats.mae_percentiles.get(10, 0)
148
+ lines.append(f" {h:3d} bars: median={p50:+.2%}, 10th={p10:+.2%}")
149
+
150
+ return "\n".join(lines)
151
+
152
+
153
+ def compute_excursions(
154
+ prices: pl.Series | pd.Series | NDArray,
155
+ horizons: list[int],
156
+ return_type: Literal["pct", "log", "abs"] = "pct",
157
+ ) -> pl.DataFrame:
158
+ """Compute MFE/MAE for each horizon.
159
+
160
+ For each bar t and horizon h:
161
+ - MFE[t,h] = max(prices[t:t+h]) / prices[t] - 1 (for pct)
162
+ - MAE[t,h] = min(prices[t:t+h]) / prices[t] - 1 (for pct)
163
+
164
+ Args:
165
+ prices: Price series (close prices typically)
166
+ horizons: List of horizons to compute (e.g., [15, 30, 60])
167
+ return_type: How to compute returns:
168
+ - 'pct': Percentage returns (default)
169
+ - 'log': Log returns
170
+ - 'abs': Absolute price changes
171
+
172
+ Returns:
173
+ DataFrame with columns: mfe_{h}, mae_{h} for each horizon h
174
+
175
+ Example:
176
+ >>> prices = pl.Series([100, 102, 98, 105, 103, 101])
177
+ >>> result = compute_excursions(prices, horizons=[2, 3])
178
+ >>> print(result)
179
+ shape: (3, 4)
180
+ ┌──────────┬──────────┬──────────┬──────────┐
181
+ │ mfe_2 ┆ mae_2 ┆ mfe_3 ┆ mae_3 │
182
+ │ --- ┆ --- ┆ --- ┆ --- │
183
+ │ f64 ┆ f64 ┆ f64 ┆ f64 │
184
+ ╞══════════╪══════════╪══════════╪══════════╡
185
+ │ 0.02 ┆ -0.02 ┆ 0.05 ┆ -0.02 │
186
+ │ ... ┆ ... ┆ ... ┆ ... │
187
+ └──────────┴──────────┴──────────┴──────────┘
188
+ """
189
+ # Convert to numpy for computation
190
+ if isinstance(prices, pl.Series):
191
+ price_array = prices.to_numpy()
192
+ elif isinstance(prices, np.ndarray):
193
+ price_array = prices
194
+ elif hasattr(prices, "to_numpy"): # pandas Series
195
+ price_array = prices.to_numpy()
196
+ else:
197
+ price_array = np.asarray(prices)
198
+
199
+ price_array = price_array.astype(np.float64)
200
+ n = len(price_array)
201
+
202
+ # Validate
203
+ if n < max(horizons) + 1:
204
+ raise ValueError(
205
+ f"Price series too short ({n}) for max horizon ({max(horizons)}). Need at least {max(horizons) + 1} prices."
206
+ )
207
+
208
+ # Pre-allocate result arrays
209
+ results = {}
210
+ max_horizon = max(horizons)
211
+
212
+ for h in horizons:
213
+ mfe = np.full(n - max_horizon, np.nan)
214
+ mae = np.full(n - max_horizon, np.nan)
215
+
216
+ for i in range(n - max_horizon):
217
+ entry_price = price_array[i]
218
+ if entry_price <= 0 or np.isnan(entry_price):
219
+ continue
220
+
221
+ window = price_array[i : i + h + 1] # Include entry price
222
+ if np.any(np.isnan(window)) or np.any(window <= 0):
223
+ continue
224
+
225
+ max_price = np.max(window)
226
+ min_price = np.min(window)
227
+
228
+ if return_type == "pct":
229
+ mfe[i] = (max_price - entry_price) / entry_price
230
+ mae[i] = (min_price - entry_price) / entry_price
231
+ elif return_type == "log":
232
+ mfe[i] = np.log(max_price / entry_price)
233
+ mae[i] = np.log(min_price / entry_price)
234
+ elif return_type == "abs":
235
+ mfe[i] = max_price - entry_price
236
+ mae[i] = min_price - entry_price
237
+ else:
238
+ raise ValueError(f"Unknown return_type: {return_type}")
239
+
240
+ results[f"mfe_{h}"] = mfe
241
+ results[f"mae_{h}"] = mae
242
+
243
+ return pl.DataFrame(results)
244
+
245
+
246
+ def analyze_excursions(
247
+ prices: pl.Series | pd.Series | NDArray,
248
+ horizons: list[int] | None = None,
249
+ return_type: Literal["pct", "log", "abs"] = "pct",
250
+ percentiles: list[float] | None = None,
251
+ keep_raw: bool = False,
252
+ rolling_window: int | None = None,
253
+ ) -> ExcursionAnalysisResult:
254
+ """Analyze price excursions with statistics and percentiles.
255
+
256
+ This is the main entry point for price excursion analysis. It computes
257
+ MFE/MAE distributions and provides statistics useful for setting
258
+ take-profit and stop-loss levels.
259
+
260
+ Args:
261
+ prices: Price series (close prices typically)
262
+ horizons: List of horizons to analyze. Default: [15, 30, 60]
263
+ return_type: How to compute returns ('pct', 'log', 'abs')
264
+ percentiles: Percentiles to compute. Default: [10, 25, 50, 75, 90]
265
+ keep_raw: If True, include raw excursion values in result
266
+ rolling_window: If provided, compute rolling statistics over this window
267
+
268
+ Returns:
269
+ ExcursionAnalysisResult with statistics and percentiles
270
+
271
+ Example:
272
+ >>> import polars as pl
273
+ >>> prices = pl.Series(np.random.randn(1000).cumsum() + 100)
274
+ >>> result = analyze_excursions(prices, horizons=[30, 60, 120])
275
+ >>>
276
+ >>> # View summary
277
+ >>> print(result.summary())
278
+ >>>
279
+ >>> # Get specific percentile for parameter selection
280
+ >>> tp_level = result.get_percentile(horizon=60, percentile=75, side="mfe")
281
+ >>> sl_level = result.get_percentile(horizon=60, percentile=25, side="mae")
282
+ >>> print(f"Suggested TP: {tp_level:.2%}, SL: {sl_level:.2%}")
283
+ """
284
+ # Defaults
285
+ if horizons is None:
286
+ horizons = [15, 30, 60]
287
+ if percentiles is None:
288
+ percentiles = [10, 25, 50, 75, 90]
289
+
290
+ # Sort horizons
291
+ horizons = sorted(horizons)
292
+
293
+ # Compute raw excursions
294
+ excursions = compute_excursions(prices, horizons, return_type)
295
+ n_samples = len(excursions)
296
+
297
+ # Compute statistics per horizon
298
+ statistics = {}
299
+ percentile_rows = []
300
+
301
+ for h in horizons:
302
+ mfe_col = f"mfe_{h}"
303
+ mae_col = f"mae_{h}"
304
+
305
+ mfe_values = excursions[mfe_col].drop_nulls().to_numpy()
306
+ mae_values = excursions[mae_col].drop_nulls().to_numpy()
307
+
308
+ # Skip if no valid data
309
+ if len(mfe_values) == 0:
310
+ continue
311
+
312
+ # Compute percentiles
313
+ mfe_pcts = {p: float(np.percentile(mfe_values, p)) for p in percentiles}
314
+ mae_pcts = {p: float(np.percentile(mae_values, p)) for p in percentiles}
315
+
316
+ # Compute statistics
317
+ from scipy.stats import skew
318
+
319
+ stats = ExcursionStats(
320
+ horizon=h,
321
+ n_samples=len(mfe_values),
322
+ mfe_mean=float(np.mean(mfe_values)),
323
+ mfe_std=float(np.std(mfe_values)),
324
+ mfe_median=float(np.median(mfe_values)),
325
+ mfe_skewness=float(skew(mfe_values)) if len(mfe_values) > 2 else 0.0,
326
+ mae_mean=float(np.mean(mae_values)),
327
+ mae_std=float(np.std(mae_values)),
328
+ mae_median=float(np.median(mae_values)),
329
+ mae_skewness=float(skew(mae_values)) if len(mae_values) > 2 else 0.0,
330
+ mfe_percentiles=mfe_pcts,
331
+ mae_percentiles=mae_pcts,
332
+ )
333
+ statistics[h] = stats
334
+
335
+ # Build percentile matrix row
336
+ row = {"horizon": h, "side": "mfe"}
337
+ row.update({f"p{int(p)}": v for p, v in mfe_pcts.items()})
338
+ percentile_rows.append(row)
339
+
340
+ row = {"horizon": h, "side": "mae"}
341
+ row.update({f"p{int(p)}": v for p, v in mae_pcts.items()})
342
+ percentile_rows.append(row)
343
+
344
+ percentile_matrix = pl.DataFrame(percentile_rows)
345
+
346
+ # Compute rolling stats if requested
347
+ rolling_stats = None
348
+ if rolling_window is not None:
349
+ rolling_stats = _compute_rolling_excursion_stats(excursions, horizons, rolling_window)
350
+
351
+ return ExcursionAnalysisResult(
352
+ horizons=horizons,
353
+ n_samples=n_samples,
354
+ return_type=return_type,
355
+ statistics=statistics,
356
+ percentile_matrix=percentile_matrix,
357
+ excursions=excursions if keep_raw else None,
358
+ rolling_stats=rolling_stats,
359
+ )
360
+
361
+
362
+ def _compute_rolling_excursion_stats(
363
+ excursions: pl.DataFrame, horizons: list[int], window: int
364
+ ) -> pl.DataFrame:
365
+ """Compute rolling statistics for excursions.
366
+
367
+ This allows seeing how excursion distributions change over time,
368
+ useful for detecting regime changes.
369
+ """
370
+ results = []
371
+
372
+ for h in horizons:
373
+ mfe_col = f"mfe_{h}"
374
+ mae_col = f"mae_{h}"
375
+
376
+ # Rolling median and std
377
+ rolling_df = excursions.select(
378
+ [
379
+ pl.col(mfe_col).rolling_median(window).alias(f"mfe_{h}_median"),
380
+ pl.col(mfe_col).rolling_std(window).alias(f"mfe_{h}_std"),
381
+ pl.col(mae_col).rolling_median(window).alias(f"mae_{h}_median"),
382
+ pl.col(mae_col).rolling_std(window).alias(f"mae_{h}_std"),
383
+ ]
384
+ )
385
+ results.append(rolling_df)
386
+
387
+ # Combine all horizons
388
+ if results:
389
+ return pl.concat(results, how="horizontal")
390
+ return pl.DataFrame()