aponyx 0.1.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. aponyx/__init__.py +14 -0
  2. aponyx/backtest/__init__.py +31 -0
  3. aponyx/backtest/adapters.py +77 -0
  4. aponyx/backtest/config.py +84 -0
  5. aponyx/backtest/engine.py +560 -0
  6. aponyx/backtest/protocols.py +101 -0
  7. aponyx/backtest/registry.py +334 -0
  8. aponyx/backtest/strategy_catalog.json +50 -0
  9. aponyx/cli/__init__.py +5 -0
  10. aponyx/cli/commands/__init__.py +8 -0
  11. aponyx/cli/commands/clean.py +349 -0
  12. aponyx/cli/commands/list.py +302 -0
  13. aponyx/cli/commands/report.py +167 -0
  14. aponyx/cli/commands/run.py +377 -0
  15. aponyx/cli/main.py +125 -0
  16. aponyx/config/__init__.py +82 -0
  17. aponyx/data/__init__.py +99 -0
  18. aponyx/data/bloomberg_config.py +306 -0
  19. aponyx/data/bloomberg_instruments.json +26 -0
  20. aponyx/data/bloomberg_securities.json +42 -0
  21. aponyx/data/cache.py +294 -0
  22. aponyx/data/fetch.py +659 -0
  23. aponyx/data/fetch_registry.py +135 -0
  24. aponyx/data/loaders.py +205 -0
  25. aponyx/data/providers/__init__.py +13 -0
  26. aponyx/data/providers/bloomberg.py +383 -0
  27. aponyx/data/providers/file.py +111 -0
  28. aponyx/data/registry.py +500 -0
  29. aponyx/data/requirements.py +96 -0
  30. aponyx/data/sample_data.py +415 -0
  31. aponyx/data/schemas.py +60 -0
  32. aponyx/data/sources.py +171 -0
  33. aponyx/data/synthetic_params.json +46 -0
  34. aponyx/data/transforms.py +336 -0
  35. aponyx/data/validation.py +308 -0
  36. aponyx/docs/__init__.py +24 -0
  37. aponyx/docs/adding_data_providers.md +682 -0
  38. aponyx/docs/cdx_knowledge_base.md +455 -0
  39. aponyx/docs/cdx_overlay_strategy.md +135 -0
  40. aponyx/docs/cli_guide.md +607 -0
  41. aponyx/docs/governance_design.md +551 -0
  42. aponyx/docs/logging_design.md +251 -0
  43. aponyx/docs/performance_evaluation_design.md +265 -0
  44. aponyx/docs/python_guidelines.md +786 -0
  45. aponyx/docs/signal_registry_usage.md +369 -0
  46. aponyx/docs/signal_suitability_design.md +558 -0
  47. aponyx/docs/visualization_design.md +277 -0
  48. aponyx/evaluation/__init__.py +11 -0
  49. aponyx/evaluation/performance/__init__.py +24 -0
  50. aponyx/evaluation/performance/adapters.py +109 -0
  51. aponyx/evaluation/performance/analyzer.py +384 -0
  52. aponyx/evaluation/performance/config.py +320 -0
  53. aponyx/evaluation/performance/decomposition.py +304 -0
  54. aponyx/evaluation/performance/metrics.py +761 -0
  55. aponyx/evaluation/performance/registry.py +327 -0
  56. aponyx/evaluation/performance/report.py +541 -0
  57. aponyx/evaluation/suitability/__init__.py +67 -0
  58. aponyx/evaluation/suitability/config.py +143 -0
  59. aponyx/evaluation/suitability/evaluator.py +389 -0
  60. aponyx/evaluation/suitability/registry.py +328 -0
  61. aponyx/evaluation/suitability/report.py +398 -0
  62. aponyx/evaluation/suitability/scoring.py +367 -0
  63. aponyx/evaluation/suitability/tests.py +303 -0
  64. aponyx/examples/01_generate_synthetic_data.py +53 -0
  65. aponyx/examples/02_fetch_data_file.py +82 -0
  66. aponyx/examples/03_fetch_data_bloomberg.py +104 -0
  67. aponyx/examples/04_compute_signal.py +164 -0
  68. aponyx/examples/05_evaluate_suitability.py +224 -0
  69. aponyx/examples/06_run_backtest.py +242 -0
  70. aponyx/examples/07_analyze_performance.py +214 -0
  71. aponyx/examples/08_visualize_results.py +272 -0
  72. aponyx/main.py +7 -0
  73. aponyx/models/__init__.py +45 -0
  74. aponyx/models/config.py +83 -0
  75. aponyx/models/indicator_transformation.json +52 -0
  76. aponyx/models/indicators.py +292 -0
  77. aponyx/models/metadata.py +447 -0
  78. aponyx/models/orchestrator.py +213 -0
  79. aponyx/models/registry.py +860 -0
  80. aponyx/models/score_transformation.json +42 -0
  81. aponyx/models/signal_catalog.json +29 -0
  82. aponyx/models/signal_composer.py +513 -0
  83. aponyx/models/signal_transformation.json +29 -0
  84. aponyx/persistence/__init__.py +16 -0
  85. aponyx/persistence/json_io.py +132 -0
  86. aponyx/persistence/parquet_io.py +378 -0
  87. aponyx/py.typed +0 -0
  88. aponyx/reporting/__init__.py +10 -0
  89. aponyx/reporting/generator.py +517 -0
  90. aponyx/visualization/__init__.py +20 -0
  91. aponyx/visualization/app.py +37 -0
  92. aponyx/visualization/plots.py +309 -0
  93. aponyx/visualization/visualizer.py +242 -0
  94. aponyx/workflows/__init__.py +18 -0
  95. aponyx/workflows/concrete_steps.py +720 -0
  96. aponyx/workflows/config.py +122 -0
  97. aponyx/workflows/engine.py +279 -0
  98. aponyx/workflows/registry.py +116 -0
  99. aponyx/workflows/steps.py +180 -0
  100. aponyx-0.1.18.dist-info/METADATA +552 -0
  101. aponyx-0.1.18.dist-info/RECORD +104 -0
  102. aponyx-0.1.18.dist-info/WHEEL +4 -0
  103. aponyx-0.1.18.dist-info/entry_points.txt +2 -0
  104. aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,384 @@
1
+ """
2
+ Core performance analyzer for backtest results.
3
+
4
+ Orchestrates comprehensive performance evaluation including extended metrics,
5
+ subperiod stability analysis, return attribution, and interpretive summaries.
6
+ """
7
+
8
+ import logging
9
+ from datetime import datetime
10
+ from typing import Any
11
+
12
+ import pandas as pd
13
+
14
+ from aponyx import __version__
15
+ from aponyx.backtest import BacktestResult
16
+
17
+ from .config import PerformanceConfig, PerformanceMetrics, PerformanceResult
18
+ from .decomposition import compute_attribution
19
+ from .metrics import compute_all_metrics
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ def _split_into_subperiods(
25
+ df: pd.DataFrame,
26
+ n_subperiods: int,
27
+ ) -> list[pd.DataFrame]:
28
+ """
29
+ Split DataFrame into n equal subperiods.
30
+
31
+ Parameters
32
+ ----------
33
+ df : pd.DataFrame
34
+ DataFrame with DatetimeIndex to split.
35
+ n_subperiods : int
36
+ Number of equal subperiods.
37
+
38
+ Returns
39
+ -------
40
+ list[pd.DataFrame]
41
+ List of n DataFrame subperiods.
42
+
43
+ Notes
44
+ -----
45
+ Uses integer division to ensure equal sizes.
46
+ Last subperiod may be slightly larger if length not divisible.
47
+ """
48
+ total_len = len(df)
49
+ period_len = total_len // n_subperiods
50
+
51
+ subperiods = []
52
+ for i in range(n_subperiods):
53
+ start_idx = i * period_len
54
+ if i == n_subperiods - 1:
55
+ # Last period gets remainder
56
+ end_idx = total_len
57
+ else:
58
+ end_idx = (i + 1) * period_len
59
+
60
+ subperiods.append(df.iloc[start_idx:end_idx])
61
+
62
+ return subperiods
63
+
64
+
65
+ def _compute_subperiod_metrics(
66
+ pnl_df: pd.DataFrame,
67
+ positions_df: pd.DataFrame,
68
+ n_subperiods: int,
69
+ rolling_window: int = 63,
70
+ ) -> dict[str, Any]:
71
+ """
72
+ Compute comprehensive metrics for each subperiod using compute_all_metrics.
73
+
74
+ Parameters
75
+ ----------
76
+ pnl_df : pd.DataFrame
77
+ P&L DataFrame with 'net_pnl' and 'cumulative_pnl' columns.
78
+ positions_df : pd.DataFrame
79
+ Position DataFrame with 'position' and 'days_held' columns.
80
+ n_subperiods : int
81
+ Number of subperiods for analysis.
82
+ rolling_window : int
83
+ Rolling window for metrics computation. Default: 63.
84
+
85
+ Returns
86
+ -------
87
+ dict[str, Any]
88
+ Subperiod analysis with keys:
89
+ - 'periods': List of PerformanceMetrics objects per subperiod
90
+ - 'subperiod_returns': List of total returns per period
91
+ - 'subperiod_sharpes': List of Sharpe ratios per period
92
+ - 'positive_periods': Count of profitable periods
93
+ - 'consistency_rate': Proportion of profitable periods
94
+
95
+ Notes
96
+ -----
97
+ Now uses compute_all_metrics to get all 21 metrics per subperiod.
98
+ Stores full PerformanceMetrics dataclass objects in 'periods' list.
99
+ """
100
+ logger.debug("Computing subperiod metrics: n_subperiods=%d", n_subperiods)
101
+
102
+ pnl_subperiods = _split_into_subperiods(pnl_df, n_subperiods)
103
+ pos_subperiods = _split_into_subperiods(positions_df, n_subperiods)
104
+
105
+ periods_metrics = []
106
+ subperiod_returns = []
107
+ subperiod_sharpes = []
108
+
109
+ for i, (sub_pnl, sub_pos) in enumerate(zip(pnl_subperiods, pos_subperiods)):
110
+ # Compute all metrics for this subperiod
111
+ metrics = compute_all_metrics(sub_pnl, sub_pos, rolling_window)
112
+ periods_metrics.append(metrics)
113
+
114
+ # Extract key values for summary stats
115
+ subperiod_returns.append(metrics.total_return)
116
+ subperiod_sharpes.append(metrics.sharpe_ratio)
117
+
118
+ logger.debug(
119
+ "Subperiod %d: return=%.2f, sharpe=%.2f, trades=%d",
120
+ i + 1,
121
+ metrics.total_return,
122
+ metrics.sharpe_ratio,
123
+ metrics.n_trades,
124
+ )
125
+
126
+ positive_periods = sum(1 for r in subperiod_returns if r > 0)
127
+ consistency_rate = positive_periods / n_subperiods
128
+
129
+ return {
130
+ "periods": periods_metrics,
131
+ "subperiod_returns": subperiod_returns,
132
+ "subperiod_sharpes": subperiod_sharpes,
133
+ "positive_periods": positive_periods,
134
+ "consistency_rate": consistency_rate,
135
+ }
136
+
137
+
138
+ def _compute_stability_score(subperiod_analysis: dict[str, Any]) -> float:
139
+ """
140
+ Compute overall stability score from subperiod analysis.
141
+
142
+ Score combines consistency rate and Sharpe stability.
143
+
144
+ Parameters
145
+ ----------
146
+ subperiod_analysis : dict[str, Any]
147
+ Subperiod metrics from _compute_subperiod_metrics.
148
+
149
+ Returns
150
+ -------
151
+ float
152
+ Stability score (0-1 scale).
153
+
154
+ Notes
155
+ -----
156
+ Weights: 60% consistency rate, 40% Sharpe stability.
157
+ Sharpe stability measured as proportion of positive Sharpe periods.
158
+ """
159
+ consistency_rate = subperiod_analysis["consistency_rate"]
160
+ sharpes = subperiod_analysis["subperiod_sharpes"]
161
+
162
+ # Sharpe stability: proportion with positive Sharpe
163
+ positive_sharpes = sum(1 for s in sharpes if s > 0)
164
+ sharpe_stability = positive_sharpes / len(sharpes) if sharpes else 0.0
165
+
166
+ # Combined score
167
+ stability_score = 0.6 * consistency_rate + 0.4 * sharpe_stability
168
+
169
+ logger.debug(
170
+ "Stability score: %.3f (consistency=%.1f%%, sharpe_stability=%.1f%%)",
171
+ stability_score,
172
+ consistency_rate * 100,
173
+ sharpe_stability * 100,
174
+ )
175
+
176
+ return stability_score
177
+
178
+
179
+ def _generate_summary(
180
+ metrics: PerformanceMetrics,
181
+ subperiod_analysis: dict[str, Any],
182
+ attribution: dict[str, dict[str, float]],
183
+ stability_score: float,
184
+ ) -> str:
185
+ """
186
+ Generate interpretive summary of performance evaluation.
187
+
188
+ Parameters
189
+ ----------
190
+ metrics : PerformanceMetrics
191
+ Comprehensive performance metrics (basic + extended).
192
+ subperiod_analysis : dict[str, Any]
193
+ Subperiod stability results.
194
+ attribution : dict[str, dict[str, float]]
195
+ Return attribution breakdown.
196
+ stability_score : float
197
+ Overall stability score.
198
+
199
+ Returns
200
+ -------
201
+ str
202
+ Multi-line interpretive summary text.
203
+ """
204
+ # Key metrics (access dataclass fields)
205
+ profit_factor = metrics.profit_factor
206
+ tail_ratio = metrics.tail_ratio
207
+ consistency = metrics.consistency_score
208
+ positive_periods = subperiod_analysis["positive_periods"]
209
+ n_periods = len(subperiod_analysis["subperiod_returns"])
210
+
211
+ # Attribution insights
212
+ long_pct = attribution["direction"]["long_pct"]
213
+
214
+ summary_lines = []
215
+
216
+ # Overall assessment
217
+ if stability_score >= 0.7:
218
+ assessment = "Strong and stable performance"
219
+ elif stability_score >= 0.5:
220
+ assessment = "Moderate performance with acceptable stability"
221
+ else:
222
+ assessment = "Inconsistent performance requiring review"
223
+
224
+ summary_lines.append(
225
+ f"Overall: {assessment} (stability score: {stability_score:.2f})"
226
+ )
227
+
228
+ # Profitability
229
+ if profit_factor > 1.5:
230
+ summary_lines.append(
231
+ f"Profitability: Strong (profit factor {profit_factor:.2f})"
232
+ )
233
+ elif profit_factor > 1.0:
234
+ summary_lines.append(
235
+ f"Profitability: Positive (profit factor {profit_factor:.2f})"
236
+ )
237
+ else:
238
+ summary_lines.append(f"Profitability: Weak (profit factor {profit_factor:.2f})")
239
+
240
+ # Risk characteristics
241
+ if tail_ratio > 1.2:
242
+ summary_lines.append(
243
+ f"Risk profile: Favorable asymmetry (tail ratio {tail_ratio:.2f})"
244
+ )
245
+ elif tail_ratio > 0.8:
246
+ summary_lines.append(f"Risk profile: Balanced (tail ratio {tail_ratio:.2f})")
247
+ else:
248
+ summary_lines.append(
249
+ f"Risk profile: Negative skew (tail ratio {tail_ratio:.2f})"
250
+ )
251
+
252
+ # Temporal stability
253
+ summary_lines.append(
254
+ f"Temporal consistency: {positive_periods}/{n_periods} profitable periods ({consistency:.1%} positive windows)"
255
+ )
256
+
257
+ # Directional bias
258
+ if abs(long_pct) > 0.7:
259
+ direction = "long" if long_pct > 0 else "short"
260
+ summary_lines.append(
261
+ f"Strong {direction} directional bias ({abs(long_pct):.1%})"
262
+ )
263
+ else:
264
+ summary_lines.append(f"Balanced directional exposure (long: {long_pct:.1%})")
265
+
266
+ return "\n".join(summary_lines)
267
+
268
+
269
+ def analyze_backtest_performance(
270
+ backtest_result: BacktestResult,
271
+ config: PerformanceConfig | None = None,
272
+ ) -> PerformanceResult:
273
+ """
274
+ Perform comprehensive performance evaluation of backtest results.
275
+
276
+ Orchestrates computation of extended metrics, subperiod stability analysis,
277
+ return attribution, and interpretive summary.
278
+
279
+ Parameters
280
+ ----------
281
+ backtest_result : BacktestResult
282
+ Backtest output containing positions, P&L, and metadata.
283
+ config : PerformanceConfig | None
284
+ Evaluation configuration. If None, uses defaults.
285
+
286
+ Returns
287
+ -------
288
+ PerformanceResult
289
+ Structured evaluation results with metrics, attribution, and summary.
290
+
291
+ Raises
292
+ ------
293
+ ValueError
294
+ If backtest result has insufficient data or invalid structure.
295
+
296
+ Notes
297
+ -----
298
+ Requires backtest_result.pnl to have DatetimeIndex and columns:
299
+ 'net_pnl', 'cumulative_pnl'.
300
+
301
+ Requires backtest_result.positions to have columns:
302
+ 'signal', 'position'.
303
+
304
+ Examples
305
+ --------
306
+ >>> result = run_backtest(signal, cdx_df, config)
307
+ >>> performance = analyze_backtest_performance(result)
308
+ >>> print(performance.summary)
309
+ >>> print(f"Stability: {performance.stability_score:.2f}")
310
+ """
311
+ if config is None:
312
+ config = PerformanceConfig()
313
+
314
+ logger.info("Analyzing backtest performance: config=%s", config)
315
+
316
+ # Validate input
317
+ pnl_df = backtest_result.pnl
318
+ positions_df = backtest_result.positions
319
+
320
+ if len(pnl_df) < config.min_obs:
321
+ raise ValueError(
322
+ f"Insufficient observations: {len(pnl_df)} < {config.min_obs} (min_obs)"
323
+ )
324
+
325
+ if not isinstance(pnl_df.index, pd.DatetimeIndex):
326
+ raise ValueError("pnl_df must have DatetimeIndex")
327
+
328
+ required_pnl_cols = {"net_pnl", "cumulative_pnl"}
329
+ if not required_pnl_cols.issubset(pnl_df.columns):
330
+ raise ValueError(f"pnl_df missing required columns: {required_pnl_cols}")
331
+
332
+ required_pos_cols = {"signal", "position"}
333
+ if not required_pos_cols.issubset(positions_df.columns):
334
+ raise ValueError(f"positions_df missing required columns: {required_pos_cols}")
335
+
336
+ # Compute all performance metrics (basic + extended)
337
+ metrics = compute_all_metrics(pnl_df, positions_df, config.rolling_window)
338
+
339
+ # Subperiod stability analysis
340
+ subperiod_analysis = _compute_subperiod_metrics(
341
+ pnl_df, positions_df, config.n_subperiods, config.rolling_window
342
+ )
343
+
344
+ # Return attribution
345
+ attribution = compute_attribution(
346
+ pnl_df, positions_df, n_quantiles=config.attribution_quantiles
347
+ )
348
+
349
+ # Overall stability score
350
+ stability_score = _compute_stability_score(subperiod_analysis)
351
+
352
+ # Generate interpretive summary
353
+ summary = _generate_summary(
354
+ metrics, subperiod_analysis, attribution, stability_score
355
+ )
356
+
357
+ # Build result
358
+ timestamp = datetime.now().isoformat()
359
+
360
+ metadata = {
361
+ "evaluator_version": __version__,
362
+ "signal_id": backtest_result.metadata.get("signal_id", "unknown"),
363
+ "strategy_id": backtest_result.metadata.get("strategy_id", "unknown"),
364
+ "backtest_config": backtest_result.metadata.get("config", {}),
365
+ }
366
+
367
+ result = PerformanceResult(
368
+ metrics=metrics,
369
+ subperiod_analysis=subperiod_analysis,
370
+ attribution=attribution,
371
+ stability_score=stability_score,
372
+ summary=summary,
373
+ timestamp=timestamp,
374
+ config=config,
375
+ metadata=metadata,
376
+ )
377
+
378
+ logger.info(
379
+ "Performance evaluation complete: stability=%.2f, profit_factor=%.2f",
380
+ stability_score,
381
+ metrics.profit_factor,
382
+ )
383
+
384
+ return result
@@ -0,0 +1,320 @@
1
+ """
2
+ Configuration for backtest performance evaluation.
3
+
4
+ Defines immutable configuration parameters for performance analysis
5
+ including subperiod analysis, rolling metrics, and reporting options.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+ from typing import Any
10
+
11
+ import pandas as pd
12
+
13
+
14
+ @dataclass(frozen=True)
15
+ class PerformanceConfig:
16
+ """
17
+ Configuration for backtest performance evaluation.
18
+
19
+ This immutable dataclass defines all parameters controlling the performance
20
+ evaluation process, including minimum observations, subperiod stability checks,
21
+ rolling metric windows, and reporting preferences.
22
+
23
+ Parameters
24
+ ----------
25
+ min_obs : int
26
+ Minimum number of observations required for reliable analysis.
27
+ Must be at least 100. Default: 252 (one trading year).
28
+ n_subperiods : int
29
+ Number of equal subperiods for stability analysis.
30
+ Must be at least 2. Default: 4 (quarterly).
31
+ risk_free_rate : float
32
+ Annual risk-free rate for Sharpe/Sortino calculations.
33
+ Must be non-negative. Default: 0.0.
34
+ rolling_window : int
35
+ Window length (days) for rolling metric calculations.
36
+ Must be at least 20. Default: 63 (3 months).
37
+ report_format : str
38
+ Output format for performance reports.
39
+ Must be 'markdown', 'json', or 'html'. Default: 'markdown'.
40
+ attribution_quantiles : int
41
+ Number of signal quantile buckets for attribution analysis.
42
+ Must be at least 2. Default: 3 (terciles: low/mid/high).
43
+
44
+ Raises
45
+ ------
46
+ ValueError
47
+ If any validation constraint is violated.
48
+
49
+ Examples
50
+ --------
51
+ >>> config = PerformanceConfig() # Use defaults
52
+ >>> config = PerformanceConfig(n_subperiods=6, rolling_window=126)
53
+ >>> config = PerformanceConfig(
54
+ ... min_obs=500,
55
+ ... risk_free_rate=0.02,
56
+ ... attribution_quantiles=5,
57
+ ... )
58
+ """
59
+
60
+ min_obs: int = 252
61
+ n_subperiods: int = 4
62
+ risk_free_rate: float = 0.0
63
+ rolling_window: int = 63
64
+ report_format: str = "markdown"
65
+ attribution_quantiles: int = 3
66
+ starting_capital: float = 100000.0
67
+ benchmark: pd.Series | None = None
68
+
69
+ def __post_init__(self) -> None:
70
+ """
71
+ Validate configuration parameters.
72
+
73
+ Checks that observation counts are sufficient, subperiod and window
74
+ settings are valid, risk-free rate is non-negative, and report format
75
+ is supported.
76
+
77
+ Raises
78
+ ------
79
+ ValueError
80
+ If any validation constraint is violated.
81
+ """
82
+ # Validate minimum observations
83
+ if self.min_obs < 100:
84
+ raise ValueError(
85
+ f"min_obs must be at least 100 for reliable analysis, got {self.min_obs}"
86
+ )
87
+
88
+ # Validate subperiods
89
+ if self.n_subperiods < 2:
90
+ raise ValueError(
91
+ f"n_subperiods must be at least 2 for stability analysis, got {self.n_subperiods}"
92
+ )
93
+
94
+ # Validate risk-free rate
95
+ if self.risk_free_rate < 0:
96
+ raise ValueError(
97
+ f"risk_free_rate must be non-negative, got {self.risk_free_rate}"
98
+ )
99
+
100
+ # Validate rolling window
101
+ if self.rolling_window < 20:
102
+ raise ValueError(
103
+ f"rolling_window must be at least 20 days, got {self.rolling_window}"
104
+ )
105
+
106
+ # Validate report format
107
+ valid_formats = {"markdown", "json", "html"}
108
+ if self.report_format not in valid_formats:
109
+ raise ValueError(
110
+ f"report_format must be one of {valid_formats}, got '{self.report_format}'"
111
+ )
112
+
113
+ # Validate attribution quantiles
114
+ if self.attribution_quantiles < 2:
115
+ raise ValueError(
116
+ f"attribution_quantiles must be at least 2, got {self.attribution_quantiles}"
117
+ )
118
+
119
+
120
+ @dataclass
121
+ class PerformanceMetrics:
122
+ """
123
+ Comprehensive performance metrics for backtest evaluation.
124
+
125
+ Contains all performance statistics organized by category: returns,
126
+ risk-adjusted metrics, trade-level statistics, and stability measures.
127
+ Combines basic backtest statistics with extended risk analysis.
128
+
129
+ Attributes
130
+ ----------
131
+ total_return : float
132
+ Total P&L over backtest period.
133
+ annualized_return : float
134
+ Total return annualized to yearly basis (assumes 252 trading days).
135
+ sharpe_ratio : float
136
+ Annualized Sharpe ratio using daily P&L volatility.
137
+ sortino_ratio : float
138
+ Annualized Sortino ratio using downside deviation only.
139
+ calmar_ratio : float
140
+ Annualized return divided by absolute max drawdown.
141
+ max_drawdown : float
142
+ Maximum peak-to-trough decline in cumulative P&L.
143
+ annualized_volatility : float
144
+ Annualized standard deviation of daily returns.
145
+ n_trades : int
146
+ Total number of round-trip trades.
147
+ hit_rate : float
148
+ Proportion of profitable trades (0.0 to 1.0).
149
+ avg_win : float
150
+ Average P&L of winning trades.
151
+ avg_loss : float
152
+ Average P&L of losing trades (negative value).
153
+ win_loss_ratio : float
154
+ Absolute value of avg_win / avg_loss.
155
+ avg_holding_days : float
156
+ Average days per trade.
157
+ rolling_sharpe_mean : float
158
+ Average rolling Sharpe ratio over rolling window.
159
+ rolling_sharpe_std : float
160
+ Volatility of rolling Sharpe ratio.
161
+ max_dd_recovery_days : float
162
+ Days to recover from maximum drawdown (np.inf if not recovered).
163
+ avg_recovery_days : float
164
+ Average recovery time across all drawdown periods.
165
+ n_drawdowns : int
166
+ Number of distinct drawdown periods.
167
+ tail_ratio : float
168
+ Ratio of 95th percentile gain to 5th percentile loss.
169
+ profit_factor : float
170
+ Ratio of gross profits to gross losses.
171
+ consistency_score : float
172
+ Proportion of positive 21-day rolling windows (0-1 scale).
173
+
174
+ Notes
175
+ -----
176
+ All ratios use risk-free rate = 0 for simplicity.
177
+ Metrics are based on daily P&L, not mark-to-market equity curve.
178
+ This structure consolidates basic and extended metrics into a single
179
+ comprehensive result for unified access.
180
+ """
181
+
182
+ # Returns
183
+ total_return: float
184
+ annualized_return: float
185
+
186
+ # Risk-adjusted metrics
187
+ sharpe_ratio: float
188
+ sortino_ratio: float
189
+ calmar_ratio: float
190
+ max_drawdown: float
191
+ annualized_volatility: float
192
+
193
+ # Trade statistics
194
+ n_trades: int
195
+ hit_rate: float
196
+ avg_win: float
197
+ avg_loss: float
198
+ win_loss_ratio: float
199
+ avg_holding_days: float
200
+
201
+ # Stability metrics
202
+ rolling_sharpe_mean: float
203
+ rolling_sharpe_std: float
204
+ max_dd_recovery_days: float
205
+ avg_recovery_days: float
206
+ n_drawdowns: int
207
+ tail_ratio: float
208
+ profit_factor: float
209
+ consistency_score: float
210
+
211
+ # Benchmark metrics (optional, populated when benchmark provided)
212
+ alpha: float | None = None
213
+ beta: float | None = None
214
+ information_ratio: float | None = None
215
+ r_squared: float | None = None
216
+
217
+ def to_dict(self) -> dict[str, float | int]:
218
+ """
219
+ Convert metrics to dictionary for JSON serialization.
220
+
221
+ Returns
222
+ -------
223
+ dict[str, float | int]
224
+ All metrics as key-value pairs.
225
+
226
+ Notes
227
+ -----
228
+ Uses dataclass asdict for automatic field extraction.
229
+ """
230
+ from dataclasses import asdict
231
+
232
+ return asdict(self)
233
+
234
+
235
+ @dataclass
236
+ class PerformanceResult:
237
+ """
238
+ Container for performance evaluation results.
239
+
240
+ This dataclass stores all outputs from a backtest performance analysis,
241
+ including comprehensive metrics, subperiod stability assessment, attribution
242
+ breakdown, and interpretive summary.
243
+
244
+ Attributes
245
+ ----------
246
+ metrics : PerformanceMetrics
247
+ Comprehensive performance metrics including basic backtest statistics
248
+ (Sharpe, max drawdown, hit rate, trades) and extended stability analysis
249
+ (rolling Sharpe, recovery time, tail ratios, consistency).
250
+ subperiod_analysis : dict[str, Any]
251
+ Stability assessment across temporal subperiods.
252
+ Contains list of PerformanceMetrics per period under 'periods' key,
253
+ plus consistency scores and summary statistics.
254
+ attribution : dict[str, dict[str, float]]
255
+ Return attribution by various dimensions.
256
+ Includes breakdown by trade direction, signal quantile,
257
+ and win/loss decomposition.
258
+ stability_score : float
259
+ Overall stability metric (0-1 scale) measuring consistency
260
+ of performance across subperiods.
261
+ summary : str
262
+ Interpretive text summarizing key findings and recommendations.
263
+ timestamp : str
264
+ ISO 8601 timestamp of evaluation execution.
265
+ config : PerformanceConfig
266
+ Configuration used for this evaluation.
267
+ metadata : dict[str, Any]
268
+ Additional context including signal_id, strategy_id, and
269
+ evaluator version.
270
+
271
+ Notes
272
+ -----
273
+ This structure is designed for easy serialization to JSON and
274
+ integration with visualization and reporting layers.
275
+ """
276
+
277
+ metrics: PerformanceMetrics
278
+ subperiod_analysis: dict[str, Any]
279
+ attribution: dict[str, dict[str, float]]
280
+ stability_score: float
281
+ summary: str
282
+ timestamp: str
283
+ config: PerformanceConfig
284
+ metadata: dict[str, Any] = field(default_factory=dict)
285
+
286
+ def to_dict(self) -> dict[str, Any]:
287
+ """
288
+ Convert result to dictionary for JSON serialization.
289
+
290
+ Returns
291
+ -------
292
+ dict[str, Any]
293
+ Complete result as nested dictionary with all fields.
294
+
295
+ Notes
296
+ -----
297
+ Config is converted to dict using dataclass asdict functionality.
298
+ Metrics are serialized using PerformanceMetrics.to_dict() method.
299
+ Subperiod periods list is converted from PerformanceMetrics objects to dicts.
300
+ """
301
+ from dataclasses import asdict
302
+
303
+ # Serialize subperiod analysis, converting PerformanceMetrics to dicts
304
+ subperiod_dict = self.subperiod_analysis.copy()
305
+ if "periods" in subperiod_dict and isinstance(subperiod_dict["periods"], list):
306
+ subperiod_dict["periods"] = [
307
+ asdict(p) if hasattr(p, "__dataclass_fields__") else p
308
+ for p in subperiod_dict["periods"]
309
+ ]
310
+
311
+ return {
312
+ "metrics": asdict(self.metrics),
313
+ "subperiod_analysis": subperiod_dict,
314
+ "attribution": self.attribution,
315
+ "stability_score": self.stability_score,
316
+ "summary": self.summary,
317
+ "timestamp": self.timestamp,
318
+ "config": asdict(self.config),
319
+ "metadata": self.metadata,
320
+ }