aponyx 0.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aponyx/__init__.py +14 -0
- aponyx/backtest/__init__.py +31 -0
- aponyx/backtest/adapters.py +77 -0
- aponyx/backtest/config.py +84 -0
- aponyx/backtest/engine.py +560 -0
- aponyx/backtest/protocols.py +101 -0
- aponyx/backtest/registry.py +334 -0
- aponyx/backtest/strategy_catalog.json +50 -0
- aponyx/cli/__init__.py +5 -0
- aponyx/cli/commands/__init__.py +8 -0
- aponyx/cli/commands/clean.py +349 -0
- aponyx/cli/commands/list.py +302 -0
- aponyx/cli/commands/report.py +167 -0
- aponyx/cli/commands/run.py +377 -0
- aponyx/cli/main.py +125 -0
- aponyx/config/__init__.py +82 -0
- aponyx/data/__init__.py +99 -0
- aponyx/data/bloomberg_config.py +306 -0
- aponyx/data/bloomberg_instruments.json +26 -0
- aponyx/data/bloomberg_securities.json +42 -0
- aponyx/data/cache.py +294 -0
- aponyx/data/fetch.py +659 -0
- aponyx/data/fetch_registry.py +135 -0
- aponyx/data/loaders.py +205 -0
- aponyx/data/providers/__init__.py +13 -0
- aponyx/data/providers/bloomberg.py +383 -0
- aponyx/data/providers/file.py +111 -0
- aponyx/data/registry.py +500 -0
- aponyx/data/requirements.py +96 -0
- aponyx/data/sample_data.py +415 -0
- aponyx/data/schemas.py +60 -0
- aponyx/data/sources.py +171 -0
- aponyx/data/synthetic_params.json +46 -0
- aponyx/data/transforms.py +336 -0
- aponyx/data/validation.py +308 -0
- aponyx/docs/__init__.py +24 -0
- aponyx/docs/adding_data_providers.md +682 -0
- aponyx/docs/cdx_knowledge_base.md +455 -0
- aponyx/docs/cdx_overlay_strategy.md +135 -0
- aponyx/docs/cli_guide.md +607 -0
- aponyx/docs/governance_design.md +551 -0
- aponyx/docs/logging_design.md +251 -0
- aponyx/docs/performance_evaluation_design.md +265 -0
- aponyx/docs/python_guidelines.md +786 -0
- aponyx/docs/signal_registry_usage.md +369 -0
- aponyx/docs/signal_suitability_design.md +558 -0
- aponyx/docs/visualization_design.md +277 -0
- aponyx/evaluation/__init__.py +11 -0
- aponyx/evaluation/performance/__init__.py +24 -0
- aponyx/evaluation/performance/adapters.py +109 -0
- aponyx/evaluation/performance/analyzer.py +384 -0
- aponyx/evaluation/performance/config.py +320 -0
- aponyx/evaluation/performance/decomposition.py +304 -0
- aponyx/evaluation/performance/metrics.py +761 -0
- aponyx/evaluation/performance/registry.py +327 -0
- aponyx/evaluation/performance/report.py +541 -0
- aponyx/evaluation/suitability/__init__.py +67 -0
- aponyx/evaluation/suitability/config.py +143 -0
- aponyx/evaluation/suitability/evaluator.py +389 -0
- aponyx/evaluation/suitability/registry.py +328 -0
- aponyx/evaluation/suitability/report.py +398 -0
- aponyx/evaluation/suitability/scoring.py +367 -0
- aponyx/evaluation/suitability/tests.py +303 -0
- aponyx/examples/01_generate_synthetic_data.py +53 -0
- aponyx/examples/02_fetch_data_file.py +82 -0
- aponyx/examples/03_fetch_data_bloomberg.py +104 -0
- aponyx/examples/04_compute_signal.py +164 -0
- aponyx/examples/05_evaluate_suitability.py +224 -0
- aponyx/examples/06_run_backtest.py +242 -0
- aponyx/examples/07_analyze_performance.py +214 -0
- aponyx/examples/08_visualize_results.py +272 -0
- aponyx/main.py +7 -0
- aponyx/models/__init__.py +45 -0
- aponyx/models/config.py +83 -0
- aponyx/models/indicator_transformation.json +52 -0
- aponyx/models/indicators.py +292 -0
- aponyx/models/metadata.py +447 -0
- aponyx/models/orchestrator.py +213 -0
- aponyx/models/registry.py +860 -0
- aponyx/models/score_transformation.json +42 -0
- aponyx/models/signal_catalog.json +29 -0
- aponyx/models/signal_composer.py +513 -0
- aponyx/models/signal_transformation.json +29 -0
- aponyx/persistence/__init__.py +16 -0
- aponyx/persistence/json_io.py +132 -0
- aponyx/persistence/parquet_io.py +378 -0
- aponyx/py.typed +0 -0
- aponyx/reporting/__init__.py +10 -0
- aponyx/reporting/generator.py +517 -0
- aponyx/visualization/__init__.py +20 -0
- aponyx/visualization/app.py +37 -0
- aponyx/visualization/plots.py +309 -0
- aponyx/visualization/visualizer.py +242 -0
- aponyx/workflows/__init__.py +18 -0
- aponyx/workflows/concrete_steps.py +720 -0
- aponyx/workflows/config.py +122 -0
- aponyx/workflows/engine.py +279 -0
- aponyx/workflows/registry.py +116 -0
- aponyx/workflows/steps.py +180 -0
- aponyx-0.1.18.dist-info/METADATA +552 -0
- aponyx-0.1.18.dist-info/RECORD +104 -0
- aponyx-0.1.18.dist-info/WHEEL +4 -0
- aponyx-0.1.18.dist-info/entry_points.txt +2 -0
- aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,384 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core performance analyzer for backtest results.
|
|
3
|
+
|
|
4
|
+
Orchestrates comprehensive performance evaluation including extended metrics,
|
|
5
|
+
subperiod stability analysis, return attribution, and interpretive summaries.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
|
|
14
|
+
from aponyx import __version__
|
|
15
|
+
from aponyx.backtest import BacktestResult
|
|
16
|
+
|
|
17
|
+
from .config import PerformanceConfig, PerformanceMetrics, PerformanceResult
|
|
18
|
+
from .decomposition import compute_attribution
|
|
19
|
+
from .metrics import compute_all_metrics
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _split_into_subperiods(
|
|
25
|
+
df: pd.DataFrame,
|
|
26
|
+
n_subperiods: int,
|
|
27
|
+
) -> list[pd.DataFrame]:
|
|
28
|
+
"""
|
|
29
|
+
Split DataFrame into n equal subperiods.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
df : pd.DataFrame
|
|
34
|
+
DataFrame with DatetimeIndex to split.
|
|
35
|
+
n_subperiods : int
|
|
36
|
+
Number of equal subperiods.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
list[pd.DataFrame]
|
|
41
|
+
List of n DataFrame subperiods.
|
|
42
|
+
|
|
43
|
+
Notes
|
|
44
|
+
-----
|
|
45
|
+
Uses integer division to ensure equal sizes.
|
|
46
|
+
Last subperiod may be slightly larger if length not divisible.
|
|
47
|
+
"""
|
|
48
|
+
total_len = len(df)
|
|
49
|
+
period_len = total_len // n_subperiods
|
|
50
|
+
|
|
51
|
+
subperiods = []
|
|
52
|
+
for i in range(n_subperiods):
|
|
53
|
+
start_idx = i * period_len
|
|
54
|
+
if i == n_subperiods - 1:
|
|
55
|
+
# Last period gets remainder
|
|
56
|
+
end_idx = total_len
|
|
57
|
+
else:
|
|
58
|
+
end_idx = (i + 1) * period_len
|
|
59
|
+
|
|
60
|
+
subperiods.append(df.iloc[start_idx:end_idx])
|
|
61
|
+
|
|
62
|
+
return subperiods
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _compute_subperiod_metrics(
|
|
66
|
+
pnl_df: pd.DataFrame,
|
|
67
|
+
positions_df: pd.DataFrame,
|
|
68
|
+
n_subperiods: int,
|
|
69
|
+
rolling_window: int = 63,
|
|
70
|
+
) -> dict[str, Any]:
|
|
71
|
+
"""
|
|
72
|
+
Compute comprehensive metrics for each subperiod using compute_all_metrics.
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
pnl_df : pd.DataFrame
|
|
77
|
+
P&L DataFrame with 'net_pnl' and 'cumulative_pnl' columns.
|
|
78
|
+
positions_df : pd.DataFrame
|
|
79
|
+
Position DataFrame with 'position' and 'days_held' columns.
|
|
80
|
+
n_subperiods : int
|
|
81
|
+
Number of subperiods for analysis.
|
|
82
|
+
rolling_window : int
|
|
83
|
+
Rolling window for metrics computation. Default: 63.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
dict[str, Any]
|
|
88
|
+
Subperiod analysis with keys:
|
|
89
|
+
- 'periods': List of PerformanceMetrics objects per subperiod
|
|
90
|
+
- 'subperiod_returns': List of total returns per period
|
|
91
|
+
- 'subperiod_sharpes': List of Sharpe ratios per period
|
|
92
|
+
- 'positive_periods': Count of profitable periods
|
|
93
|
+
- 'consistency_rate': Proportion of profitable periods
|
|
94
|
+
|
|
95
|
+
Notes
|
|
96
|
+
-----
|
|
97
|
+
Now uses compute_all_metrics to get all 21 metrics per subperiod.
|
|
98
|
+
Stores full PerformanceMetrics dataclass objects in 'periods' list.
|
|
99
|
+
"""
|
|
100
|
+
logger.debug("Computing subperiod metrics: n_subperiods=%d", n_subperiods)
|
|
101
|
+
|
|
102
|
+
pnl_subperiods = _split_into_subperiods(pnl_df, n_subperiods)
|
|
103
|
+
pos_subperiods = _split_into_subperiods(positions_df, n_subperiods)
|
|
104
|
+
|
|
105
|
+
periods_metrics = []
|
|
106
|
+
subperiod_returns = []
|
|
107
|
+
subperiod_sharpes = []
|
|
108
|
+
|
|
109
|
+
for i, (sub_pnl, sub_pos) in enumerate(zip(pnl_subperiods, pos_subperiods)):
|
|
110
|
+
# Compute all metrics for this subperiod
|
|
111
|
+
metrics = compute_all_metrics(sub_pnl, sub_pos, rolling_window)
|
|
112
|
+
periods_metrics.append(metrics)
|
|
113
|
+
|
|
114
|
+
# Extract key values for summary stats
|
|
115
|
+
subperiod_returns.append(metrics.total_return)
|
|
116
|
+
subperiod_sharpes.append(metrics.sharpe_ratio)
|
|
117
|
+
|
|
118
|
+
logger.debug(
|
|
119
|
+
"Subperiod %d: return=%.2f, sharpe=%.2f, trades=%d",
|
|
120
|
+
i + 1,
|
|
121
|
+
metrics.total_return,
|
|
122
|
+
metrics.sharpe_ratio,
|
|
123
|
+
metrics.n_trades,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
positive_periods = sum(1 for r in subperiod_returns if r > 0)
|
|
127
|
+
consistency_rate = positive_periods / n_subperiods
|
|
128
|
+
|
|
129
|
+
return {
|
|
130
|
+
"periods": periods_metrics,
|
|
131
|
+
"subperiod_returns": subperiod_returns,
|
|
132
|
+
"subperiod_sharpes": subperiod_sharpes,
|
|
133
|
+
"positive_periods": positive_periods,
|
|
134
|
+
"consistency_rate": consistency_rate,
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _compute_stability_score(subperiod_analysis: dict[str, Any]) -> float:
|
|
139
|
+
"""
|
|
140
|
+
Compute overall stability score from subperiod analysis.
|
|
141
|
+
|
|
142
|
+
Score combines consistency rate and Sharpe stability.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
subperiod_analysis : dict[str, Any]
|
|
147
|
+
Subperiod metrics from _compute_subperiod_metrics.
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
float
|
|
152
|
+
Stability score (0-1 scale).
|
|
153
|
+
|
|
154
|
+
Notes
|
|
155
|
+
-----
|
|
156
|
+
Weights: 60% consistency rate, 40% Sharpe stability.
|
|
157
|
+
Sharpe stability measured as proportion of positive Sharpe periods.
|
|
158
|
+
"""
|
|
159
|
+
consistency_rate = subperiod_analysis["consistency_rate"]
|
|
160
|
+
sharpes = subperiod_analysis["subperiod_sharpes"]
|
|
161
|
+
|
|
162
|
+
# Sharpe stability: proportion with positive Sharpe
|
|
163
|
+
positive_sharpes = sum(1 for s in sharpes if s > 0)
|
|
164
|
+
sharpe_stability = positive_sharpes / len(sharpes) if sharpes else 0.0
|
|
165
|
+
|
|
166
|
+
# Combined score
|
|
167
|
+
stability_score = 0.6 * consistency_rate + 0.4 * sharpe_stability
|
|
168
|
+
|
|
169
|
+
logger.debug(
|
|
170
|
+
"Stability score: %.3f (consistency=%.1f%%, sharpe_stability=%.1f%%)",
|
|
171
|
+
stability_score,
|
|
172
|
+
consistency_rate * 100,
|
|
173
|
+
sharpe_stability * 100,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
return stability_score
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _generate_summary(
|
|
180
|
+
metrics: PerformanceMetrics,
|
|
181
|
+
subperiod_analysis: dict[str, Any],
|
|
182
|
+
attribution: dict[str, dict[str, float]],
|
|
183
|
+
stability_score: float,
|
|
184
|
+
) -> str:
|
|
185
|
+
"""
|
|
186
|
+
Generate interpretive summary of performance evaluation.
|
|
187
|
+
|
|
188
|
+
Parameters
|
|
189
|
+
----------
|
|
190
|
+
metrics : PerformanceMetrics
|
|
191
|
+
Comprehensive performance metrics (basic + extended).
|
|
192
|
+
subperiod_analysis : dict[str, Any]
|
|
193
|
+
Subperiod stability results.
|
|
194
|
+
attribution : dict[str, dict[str, float]]
|
|
195
|
+
Return attribution breakdown.
|
|
196
|
+
stability_score : float
|
|
197
|
+
Overall stability score.
|
|
198
|
+
|
|
199
|
+
Returns
|
|
200
|
+
-------
|
|
201
|
+
str
|
|
202
|
+
Multi-line interpretive summary text.
|
|
203
|
+
"""
|
|
204
|
+
# Key metrics (access dataclass fields)
|
|
205
|
+
profit_factor = metrics.profit_factor
|
|
206
|
+
tail_ratio = metrics.tail_ratio
|
|
207
|
+
consistency = metrics.consistency_score
|
|
208
|
+
positive_periods = subperiod_analysis["positive_periods"]
|
|
209
|
+
n_periods = len(subperiod_analysis["subperiod_returns"])
|
|
210
|
+
|
|
211
|
+
# Attribution insights
|
|
212
|
+
long_pct = attribution["direction"]["long_pct"]
|
|
213
|
+
|
|
214
|
+
summary_lines = []
|
|
215
|
+
|
|
216
|
+
# Overall assessment
|
|
217
|
+
if stability_score >= 0.7:
|
|
218
|
+
assessment = "Strong and stable performance"
|
|
219
|
+
elif stability_score >= 0.5:
|
|
220
|
+
assessment = "Moderate performance with acceptable stability"
|
|
221
|
+
else:
|
|
222
|
+
assessment = "Inconsistent performance requiring review"
|
|
223
|
+
|
|
224
|
+
summary_lines.append(
|
|
225
|
+
f"Overall: {assessment} (stability score: {stability_score:.2f})"
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# Profitability
|
|
229
|
+
if profit_factor > 1.5:
|
|
230
|
+
summary_lines.append(
|
|
231
|
+
f"Profitability: Strong (profit factor {profit_factor:.2f})"
|
|
232
|
+
)
|
|
233
|
+
elif profit_factor > 1.0:
|
|
234
|
+
summary_lines.append(
|
|
235
|
+
f"Profitability: Positive (profit factor {profit_factor:.2f})"
|
|
236
|
+
)
|
|
237
|
+
else:
|
|
238
|
+
summary_lines.append(f"Profitability: Weak (profit factor {profit_factor:.2f})")
|
|
239
|
+
|
|
240
|
+
# Risk characteristics
|
|
241
|
+
if tail_ratio > 1.2:
|
|
242
|
+
summary_lines.append(
|
|
243
|
+
f"Risk profile: Favorable asymmetry (tail ratio {tail_ratio:.2f})"
|
|
244
|
+
)
|
|
245
|
+
elif tail_ratio > 0.8:
|
|
246
|
+
summary_lines.append(f"Risk profile: Balanced (tail ratio {tail_ratio:.2f})")
|
|
247
|
+
else:
|
|
248
|
+
summary_lines.append(
|
|
249
|
+
f"Risk profile: Negative skew (tail ratio {tail_ratio:.2f})"
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
# Temporal stability
|
|
253
|
+
summary_lines.append(
|
|
254
|
+
f"Temporal consistency: {positive_periods}/{n_periods} profitable periods ({consistency:.1%} positive windows)"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Directional bias
|
|
258
|
+
if abs(long_pct) > 0.7:
|
|
259
|
+
direction = "long" if long_pct > 0 else "short"
|
|
260
|
+
summary_lines.append(
|
|
261
|
+
f"Strong {direction} directional bias ({abs(long_pct):.1%})"
|
|
262
|
+
)
|
|
263
|
+
else:
|
|
264
|
+
summary_lines.append(f"Balanced directional exposure (long: {long_pct:.1%})")
|
|
265
|
+
|
|
266
|
+
return "\n".join(summary_lines)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def analyze_backtest_performance(
|
|
270
|
+
backtest_result: BacktestResult,
|
|
271
|
+
config: PerformanceConfig | None = None,
|
|
272
|
+
) -> PerformanceResult:
|
|
273
|
+
"""
|
|
274
|
+
Perform comprehensive performance evaluation of backtest results.
|
|
275
|
+
|
|
276
|
+
Orchestrates computation of extended metrics, subperiod stability analysis,
|
|
277
|
+
return attribution, and interpretive summary.
|
|
278
|
+
|
|
279
|
+
Parameters
|
|
280
|
+
----------
|
|
281
|
+
backtest_result : BacktestResult
|
|
282
|
+
Backtest output containing positions, P&L, and metadata.
|
|
283
|
+
config : PerformanceConfig | None
|
|
284
|
+
Evaluation configuration. If None, uses defaults.
|
|
285
|
+
|
|
286
|
+
Returns
|
|
287
|
+
-------
|
|
288
|
+
PerformanceResult
|
|
289
|
+
Structured evaluation results with metrics, attribution, and summary.
|
|
290
|
+
|
|
291
|
+
Raises
|
|
292
|
+
------
|
|
293
|
+
ValueError
|
|
294
|
+
If backtest result has insufficient data or invalid structure.
|
|
295
|
+
|
|
296
|
+
Notes
|
|
297
|
+
-----
|
|
298
|
+
Requires backtest_result.pnl to have DatetimeIndex and columns:
|
|
299
|
+
'net_pnl', 'cumulative_pnl'.
|
|
300
|
+
|
|
301
|
+
Requires backtest_result.positions to have columns:
|
|
302
|
+
'signal', 'position'.
|
|
303
|
+
|
|
304
|
+
Examples
|
|
305
|
+
--------
|
|
306
|
+
>>> result = run_backtest(signal, cdx_df, config)
|
|
307
|
+
>>> performance = analyze_backtest_performance(result)
|
|
308
|
+
>>> print(performance.summary)
|
|
309
|
+
>>> print(f"Stability: {performance.stability_score:.2f}")
|
|
310
|
+
"""
|
|
311
|
+
if config is None:
|
|
312
|
+
config = PerformanceConfig()
|
|
313
|
+
|
|
314
|
+
logger.info("Analyzing backtest performance: config=%s", config)
|
|
315
|
+
|
|
316
|
+
# Validate input
|
|
317
|
+
pnl_df = backtest_result.pnl
|
|
318
|
+
positions_df = backtest_result.positions
|
|
319
|
+
|
|
320
|
+
if len(pnl_df) < config.min_obs:
|
|
321
|
+
raise ValueError(
|
|
322
|
+
f"Insufficient observations: {len(pnl_df)} < {config.min_obs} (min_obs)"
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
if not isinstance(pnl_df.index, pd.DatetimeIndex):
|
|
326
|
+
raise ValueError("pnl_df must have DatetimeIndex")
|
|
327
|
+
|
|
328
|
+
required_pnl_cols = {"net_pnl", "cumulative_pnl"}
|
|
329
|
+
if not required_pnl_cols.issubset(pnl_df.columns):
|
|
330
|
+
raise ValueError(f"pnl_df missing required columns: {required_pnl_cols}")
|
|
331
|
+
|
|
332
|
+
required_pos_cols = {"signal", "position"}
|
|
333
|
+
if not required_pos_cols.issubset(positions_df.columns):
|
|
334
|
+
raise ValueError(f"positions_df missing required columns: {required_pos_cols}")
|
|
335
|
+
|
|
336
|
+
# Compute all performance metrics (basic + extended)
|
|
337
|
+
metrics = compute_all_metrics(pnl_df, positions_df, config.rolling_window)
|
|
338
|
+
|
|
339
|
+
# Subperiod stability analysis
|
|
340
|
+
subperiod_analysis = _compute_subperiod_metrics(
|
|
341
|
+
pnl_df, positions_df, config.n_subperiods, config.rolling_window
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# Return attribution
|
|
345
|
+
attribution = compute_attribution(
|
|
346
|
+
pnl_df, positions_df, n_quantiles=config.attribution_quantiles
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
# Overall stability score
|
|
350
|
+
stability_score = _compute_stability_score(subperiod_analysis)
|
|
351
|
+
|
|
352
|
+
# Generate interpretive summary
|
|
353
|
+
summary = _generate_summary(
|
|
354
|
+
metrics, subperiod_analysis, attribution, stability_score
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
# Build result
|
|
358
|
+
timestamp = datetime.now().isoformat()
|
|
359
|
+
|
|
360
|
+
metadata = {
|
|
361
|
+
"evaluator_version": __version__,
|
|
362
|
+
"signal_id": backtest_result.metadata.get("signal_id", "unknown"),
|
|
363
|
+
"strategy_id": backtest_result.metadata.get("strategy_id", "unknown"),
|
|
364
|
+
"backtest_config": backtest_result.metadata.get("config", {}),
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
result = PerformanceResult(
|
|
368
|
+
metrics=metrics,
|
|
369
|
+
subperiod_analysis=subperiod_analysis,
|
|
370
|
+
attribution=attribution,
|
|
371
|
+
stability_score=stability_score,
|
|
372
|
+
summary=summary,
|
|
373
|
+
timestamp=timestamp,
|
|
374
|
+
config=config,
|
|
375
|
+
metadata=metadata,
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
logger.info(
|
|
379
|
+
"Performance evaluation complete: stability=%.2f, profit_factor=%.2f",
|
|
380
|
+
stability_score,
|
|
381
|
+
metrics.profit_factor,
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
return result
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration for backtest performance evaluation.
|
|
3
|
+
|
|
4
|
+
Defines immutable configuration parameters for performance analysis
|
|
5
|
+
including subperiod analysis, rolling metrics, and reporting options.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class PerformanceConfig:
|
|
16
|
+
"""
|
|
17
|
+
Configuration for backtest performance evaluation.
|
|
18
|
+
|
|
19
|
+
This immutable dataclass defines all parameters controlling the performance
|
|
20
|
+
evaluation process, including minimum observations, subperiod stability checks,
|
|
21
|
+
rolling metric windows, and reporting preferences.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
min_obs : int
|
|
26
|
+
Minimum number of observations required for reliable analysis.
|
|
27
|
+
Must be at least 100. Default: 252 (one trading year).
|
|
28
|
+
n_subperiods : int
|
|
29
|
+
Number of equal subperiods for stability analysis.
|
|
30
|
+
Must be at least 2. Default: 4 (quarterly).
|
|
31
|
+
risk_free_rate : float
|
|
32
|
+
Annual risk-free rate for Sharpe/Sortino calculations.
|
|
33
|
+
Must be non-negative. Default: 0.0.
|
|
34
|
+
rolling_window : int
|
|
35
|
+
Window length (days) for rolling metric calculations.
|
|
36
|
+
Must be at least 20. Default: 63 (3 months).
|
|
37
|
+
report_format : str
|
|
38
|
+
Output format for performance reports.
|
|
39
|
+
Must be 'markdown', 'json', or 'html'. Default: 'markdown'.
|
|
40
|
+
attribution_quantiles : int
|
|
41
|
+
Number of signal quantile buckets for attribution analysis.
|
|
42
|
+
Must be at least 2. Default: 3 (terciles: low/mid/high).
|
|
43
|
+
|
|
44
|
+
Raises
|
|
45
|
+
------
|
|
46
|
+
ValueError
|
|
47
|
+
If any validation constraint is violated.
|
|
48
|
+
|
|
49
|
+
Examples
|
|
50
|
+
--------
|
|
51
|
+
>>> config = PerformanceConfig() # Use defaults
|
|
52
|
+
>>> config = PerformanceConfig(n_subperiods=6, rolling_window=126)
|
|
53
|
+
>>> config = PerformanceConfig(
|
|
54
|
+
... min_obs=500,
|
|
55
|
+
... risk_free_rate=0.02,
|
|
56
|
+
... attribution_quantiles=5,
|
|
57
|
+
... )
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
min_obs: int = 252
|
|
61
|
+
n_subperiods: int = 4
|
|
62
|
+
risk_free_rate: float = 0.0
|
|
63
|
+
rolling_window: int = 63
|
|
64
|
+
report_format: str = "markdown"
|
|
65
|
+
attribution_quantiles: int = 3
|
|
66
|
+
starting_capital: float = 100000.0
|
|
67
|
+
benchmark: pd.Series | None = None
|
|
68
|
+
|
|
69
|
+
def __post_init__(self) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Validate configuration parameters.
|
|
72
|
+
|
|
73
|
+
Checks that observation counts are sufficient, subperiod and window
|
|
74
|
+
settings are valid, risk-free rate is non-negative, and report format
|
|
75
|
+
is supported.
|
|
76
|
+
|
|
77
|
+
Raises
|
|
78
|
+
------
|
|
79
|
+
ValueError
|
|
80
|
+
If any validation constraint is violated.
|
|
81
|
+
"""
|
|
82
|
+
# Validate minimum observations
|
|
83
|
+
if self.min_obs < 100:
|
|
84
|
+
raise ValueError(
|
|
85
|
+
f"min_obs must be at least 100 for reliable analysis, got {self.min_obs}"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Validate subperiods
|
|
89
|
+
if self.n_subperiods < 2:
|
|
90
|
+
raise ValueError(
|
|
91
|
+
f"n_subperiods must be at least 2 for stability analysis, got {self.n_subperiods}"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Validate risk-free rate
|
|
95
|
+
if self.risk_free_rate < 0:
|
|
96
|
+
raise ValueError(
|
|
97
|
+
f"risk_free_rate must be non-negative, got {self.risk_free_rate}"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Validate rolling window
|
|
101
|
+
if self.rolling_window < 20:
|
|
102
|
+
raise ValueError(
|
|
103
|
+
f"rolling_window must be at least 20 days, got {self.rolling_window}"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Validate report format
|
|
107
|
+
valid_formats = {"markdown", "json", "html"}
|
|
108
|
+
if self.report_format not in valid_formats:
|
|
109
|
+
raise ValueError(
|
|
110
|
+
f"report_format must be one of {valid_formats}, got '{self.report_format}'"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Validate attribution quantiles
|
|
114
|
+
if self.attribution_quantiles < 2:
|
|
115
|
+
raise ValueError(
|
|
116
|
+
f"attribution_quantiles must be at least 2, got {self.attribution_quantiles}"
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclass
|
|
121
|
+
class PerformanceMetrics:
|
|
122
|
+
"""
|
|
123
|
+
Comprehensive performance metrics for backtest evaluation.
|
|
124
|
+
|
|
125
|
+
Contains all performance statistics organized by category: returns,
|
|
126
|
+
risk-adjusted metrics, trade-level statistics, and stability measures.
|
|
127
|
+
Combines basic backtest statistics with extended risk analysis.
|
|
128
|
+
|
|
129
|
+
Attributes
|
|
130
|
+
----------
|
|
131
|
+
total_return : float
|
|
132
|
+
Total P&L over backtest period.
|
|
133
|
+
annualized_return : float
|
|
134
|
+
Total return annualized to yearly basis (assumes 252 trading days).
|
|
135
|
+
sharpe_ratio : float
|
|
136
|
+
Annualized Sharpe ratio using daily P&L volatility.
|
|
137
|
+
sortino_ratio : float
|
|
138
|
+
Annualized Sortino ratio using downside deviation only.
|
|
139
|
+
calmar_ratio : float
|
|
140
|
+
Annualized return divided by absolute max drawdown.
|
|
141
|
+
max_drawdown : float
|
|
142
|
+
Maximum peak-to-trough decline in cumulative P&L.
|
|
143
|
+
annualized_volatility : float
|
|
144
|
+
Annualized standard deviation of daily returns.
|
|
145
|
+
n_trades : int
|
|
146
|
+
Total number of round-trip trades.
|
|
147
|
+
hit_rate : float
|
|
148
|
+
Proportion of profitable trades (0.0 to 1.0).
|
|
149
|
+
avg_win : float
|
|
150
|
+
Average P&L of winning trades.
|
|
151
|
+
avg_loss : float
|
|
152
|
+
Average P&L of losing trades (negative value).
|
|
153
|
+
win_loss_ratio : float
|
|
154
|
+
Absolute value of avg_win / avg_loss.
|
|
155
|
+
avg_holding_days : float
|
|
156
|
+
Average days per trade.
|
|
157
|
+
rolling_sharpe_mean : float
|
|
158
|
+
Average rolling Sharpe ratio over rolling window.
|
|
159
|
+
rolling_sharpe_std : float
|
|
160
|
+
Volatility of rolling Sharpe ratio.
|
|
161
|
+
max_dd_recovery_days : float
|
|
162
|
+
Days to recover from maximum drawdown (np.inf if not recovered).
|
|
163
|
+
avg_recovery_days : float
|
|
164
|
+
Average recovery time across all drawdown periods.
|
|
165
|
+
n_drawdowns : int
|
|
166
|
+
Number of distinct drawdown periods.
|
|
167
|
+
tail_ratio : float
|
|
168
|
+
Ratio of 95th percentile gain to 5th percentile loss.
|
|
169
|
+
profit_factor : float
|
|
170
|
+
Ratio of gross profits to gross losses.
|
|
171
|
+
consistency_score : float
|
|
172
|
+
Proportion of positive 21-day rolling windows (0-1 scale).
|
|
173
|
+
|
|
174
|
+
Notes
|
|
175
|
+
-----
|
|
176
|
+
All ratios use risk-free rate = 0 for simplicity.
|
|
177
|
+
Metrics are based on daily P&L, not mark-to-market equity curve.
|
|
178
|
+
This structure consolidates basic and extended metrics into a single
|
|
179
|
+
comprehensive result for unified access.
|
|
180
|
+
"""
|
|
181
|
+
|
|
182
|
+
# Returns
|
|
183
|
+
total_return: float
|
|
184
|
+
annualized_return: float
|
|
185
|
+
|
|
186
|
+
# Risk-adjusted metrics
|
|
187
|
+
sharpe_ratio: float
|
|
188
|
+
sortino_ratio: float
|
|
189
|
+
calmar_ratio: float
|
|
190
|
+
max_drawdown: float
|
|
191
|
+
annualized_volatility: float
|
|
192
|
+
|
|
193
|
+
# Trade statistics
|
|
194
|
+
n_trades: int
|
|
195
|
+
hit_rate: float
|
|
196
|
+
avg_win: float
|
|
197
|
+
avg_loss: float
|
|
198
|
+
win_loss_ratio: float
|
|
199
|
+
avg_holding_days: float
|
|
200
|
+
|
|
201
|
+
# Stability metrics
|
|
202
|
+
rolling_sharpe_mean: float
|
|
203
|
+
rolling_sharpe_std: float
|
|
204
|
+
max_dd_recovery_days: float
|
|
205
|
+
avg_recovery_days: float
|
|
206
|
+
n_drawdowns: int
|
|
207
|
+
tail_ratio: float
|
|
208
|
+
profit_factor: float
|
|
209
|
+
consistency_score: float
|
|
210
|
+
|
|
211
|
+
# Benchmark metrics (optional, populated when benchmark provided)
|
|
212
|
+
alpha: float | None = None
|
|
213
|
+
beta: float | None = None
|
|
214
|
+
information_ratio: float | None = None
|
|
215
|
+
r_squared: float | None = None
|
|
216
|
+
|
|
217
|
+
def to_dict(self) -> dict[str, float | int]:
|
|
218
|
+
"""
|
|
219
|
+
Convert metrics to dictionary for JSON serialization.
|
|
220
|
+
|
|
221
|
+
Returns
|
|
222
|
+
-------
|
|
223
|
+
dict[str, float | int]
|
|
224
|
+
All metrics as key-value pairs.
|
|
225
|
+
|
|
226
|
+
Notes
|
|
227
|
+
-----
|
|
228
|
+
Uses dataclass asdict for automatic field extraction.
|
|
229
|
+
"""
|
|
230
|
+
from dataclasses import asdict
|
|
231
|
+
|
|
232
|
+
return asdict(self)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
@dataclass
|
|
236
|
+
class PerformanceResult:
|
|
237
|
+
"""
|
|
238
|
+
Container for performance evaluation results.
|
|
239
|
+
|
|
240
|
+
This dataclass stores all outputs from a backtest performance analysis,
|
|
241
|
+
including comprehensive metrics, subperiod stability assessment, attribution
|
|
242
|
+
breakdown, and interpretive summary.
|
|
243
|
+
|
|
244
|
+
Attributes
|
|
245
|
+
----------
|
|
246
|
+
metrics : PerformanceMetrics
|
|
247
|
+
Comprehensive performance metrics including basic backtest statistics
|
|
248
|
+
(Sharpe, max drawdown, hit rate, trades) and extended stability analysis
|
|
249
|
+
(rolling Sharpe, recovery time, tail ratios, consistency).
|
|
250
|
+
subperiod_analysis : dict[str, Any]
|
|
251
|
+
Stability assessment across temporal subperiods.
|
|
252
|
+
Contains list of PerformanceMetrics per period under 'periods' key,
|
|
253
|
+
plus consistency scores and summary statistics.
|
|
254
|
+
attribution : dict[str, dict[str, float]]
|
|
255
|
+
Return attribution by various dimensions.
|
|
256
|
+
Includes breakdown by trade direction, signal quantile,
|
|
257
|
+
and win/loss decomposition.
|
|
258
|
+
stability_score : float
|
|
259
|
+
Overall stability metric (0-1 scale) measuring consistency
|
|
260
|
+
of performance across subperiods.
|
|
261
|
+
summary : str
|
|
262
|
+
Interpretive text summarizing key findings and recommendations.
|
|
263
|
+
timestamp : str
|
|
264
|
+
ISO 8601 timestamp of evaluation execution.
|
|
265
|
+
config : PerformanceConfig
|
|
266
|
+
Configuration used for this evaluation.
|
|
267
|
+
metadata : dict[str, Any]
|
|
268
|
+
Additional context including signal_id, strategy_id, and
|
|
269
|
+
evaluator version.
|
|
270
|
+
|
|
271
|
+
Notes
|
|
272
|
+
-----
|
|
273
|
+
This structure is designed for easy serialization to JSON and
|
|
274
|
+
integration with visualization and reporting layers.
|
|
275
|
+
"""
|
|
276
|
+
|
|
277
|
+
metrics: PerformanceMetrics
|
|
278
|
+
subperiod_analysis: dict[str, Any]
|
|
279
|
+
attribution: dict[str, dict[str, float]]
|
|
280
|
+
stability_score: float
|
|
281
|
+
summary: str
|
|
282
|
+
timestamp: str
|
|
283
|
+
config: PerformanceConfig
|
|
284
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
285
|
+
|
|
286
|
+
def to_dict(self) -> dict[str, Any]:
|
|
287
|
+
"""
|
|
288
|
+
Convert result to dictionary for JSON serialization.
|
|
289
|
+
|
|
290
|
+
Returns
|
|
291
|
+
-------
|
|
292
|
+
dict[str, Any]
|
|
293
|
+
Complete result as nested dictionary with all fields.
|
|
294
|
+
|
|
295
|
+
Notes
|
|
296
|
+
-----
|
|
297
|
+
Config is converted to dict using dataclass asdict functionality.
|
|
298
|
+
Metrics are serialized using PerformanceMetrics.to_dict() method.
|
|
299
|
+
Subperiod periods list is converted from PerformanceMetrics objects to dicts.
|
|
300
|
+
"""
|
|
301
|
+
from dataclasses import asdict
|
|
302
|
+
|
|
303
|
+
# Serialize subperiod analysis, converting PerformanceMetrics to dicts
|
|
304
|
+
subperiod_dict = self.subperiod_analysis.copy()
|
|
305
|
+
if "periods" in subperiod_dict and isinstance(subperiod_dict["periods"], list):
|
|
306
|
+
subperiod_dict["periods"] = [
|
|
307
|
+
asdict(p) if hasattr(p, "__dataclass_fields__") else p
|
|
308
|
+
for p in subperiod_dict["periods"]
|
|
309
|
+
]
|
|
310
|
+
|
|
311
|
+
return {
|
|
312
|
+
"metrics": asdict(self.metrics),
|
|
313
|
+
"subperiod_analysis": subperiod_dict,
|
|
314
|
+
"attribution": self.attribution,
|
|
315
|
+
"stability_score": self.stability_score,
|
|
316
|
+
"summary": self.summary,
|
|
317
|
+
"timestamp": self.timestamp,
|
|
318
|
+
"config": asdict(self.config),
|
|
319
|
+
"metadata": self.metadata,
|
|
320
|
+
}
|