aponyx 0.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aponyx/__init__.py +14 -0
- aponyx/backtest/__init__.py +31 -0
- aponyx/backtest/adapters.py +77 -0
- aponyx/backtest/config.py +84 -0
- aponyx/backtest/engine.py +560 -0
- aponyx/backtest/protocols.py +101 -0
- aponyx/backtest/registry.py +334 -0
- aponyx/backtest/strategy_catalog.json +50 -0
- aponyx/cli/__init__.py +5 -0
- aponyx/cli/commands/__init__.py +8 -0
- aponyx/cli/commands/clean.py +349 -0
- aponyx/cli/commands/list.py +302 -0
- aponyx/cli/commands/report.py +167 -0
- aponyx/cli/commands/run.py +377 -0
- aponyx/cli/main.py +125 -0
- aponyx/config/__init__.py +82 -0
- aponyx/data/__init__.py +99 -0
- aponyx/data/bloomberg_config.py +306 -0
- aponyx/data/bloomberg_instruments.json +26 -0
- aponyx/data/bloomberg_securities.json +42 -0
- aponyx/data/cache.py +294 -0
- aponyx/data/fetch.py +659 -0
- aponyx/data/fetch_registry.py +135 -0
- aponyx/data/loaders.py +205 -0
- aponyx/data/providers/__init__.py +13 -0
- aponyx/data/providers/bloomberg.py +383 -0
- aponyx/data/providers/file.py +111 -0
- aponyx/data/registry.py +500 -0
- aponyx/data/requirements.py +96 -0
- aponyx/data/sample_data.py +415 -0
- aponyx/data/schemas.py +60 -0
- aponyx/data/sources.py +171 -0
- aponyx/data/synthetic_params.json +46 -0
- aponyx/data/transforms.py +336 -0
- aponyx/data/validation.py +308 -0
- aponyx/docs/__init__.py +24 -0
- aponyx/docs/adding_data_providers.md +682 -0
- aponyx/docs/cdx_knowledge_base.md +455 -0
- aponyx/docs/cdx_overlay_strategy.md +135 -0
- aponyx/docs/cli_guide.md +607 -0
- aponyx/docs/governance_design.md +551 -0
- aponyx/docs/logging_design.md +251 -0
- aponyx/docs/performance_evaluation_design.md +265 -0
- aponyx/docs/python_guidelines.md +786 -0
- aponyx/docs/signal_registry_usage.md +369 -0
- aponyx/docs/signal_suitability_design.md +558 -0
- aponyx/docs/visualization_design.md +277 -0
- aponyx/evaluation/__init__.py +11 -0
- aponyx/evaluation/performance/__init__.py +24 -0
- aponyx/evaluation/performance/adapters.py +109 -0
- aponyx/evaluation/performance/analyzer.py +384 -0
- aponyx/evaluation/performance/config.py +320 -0
- aponyx/evaluation/performance/decomposition.py +304 -0
- aponyx/evaluation/performance/metrics.py +761 -0
- aponyx/evaluation/performance/registry.py +327 -0
- aponyx/evaluation/performance/report.py +541 -0
- aponyx/evaluation/suitability/__init__.py +67 -0
- aponyx/evaluation/suitability/config.py +143 -0
- aponyx/evaluation/suitability/evaluator.py +389 -0
- aponyx/evaluation/suitability/registry.py +328 -0
- aponyx/evaluation/suitability/report.py +398 -0
- aponyx/evaluation/suitability/scoring.py +367 -0
- aponyx/evaluation/suitability/tests.py +303 -0
- aponyx/examples/01_generate_synthetic_data.py +53 -0
- aponyx/examples/02_fetch_data_file.py +82 -0
- aponyx/examples/03_fetch_data_bloomberg.py +104 -0
- aponyx/examples/04_compute_signal.py +164 -0
- aponyx/examples/05_evaluate_suitability.py +224 -0
- aponyx/examples/06_run_backtest.py +242 -0
- aponyx/examples/07_analyze_performance.py +214 -0
- aponyx/examples/08_visualize_results.py +272 -0
- aponyx/main.py +7 -0
- aponyx/models/__init__.py +45 -0
- aponyx/models/config.py +83 -0
- aponyx/models/indicator_transformation.json +52 -0
- aponyx/models/indicators.py +292 -0
- aponyx/models/metadata.py +447 -0
- aponyx/models/orchestrator.py +213 -0
- aponyx/models/registry.py +860 -0
- aponyx/models/score_transformation.json +42 -0
- aponyx/models/signal_catalog.json +29 -0
- aponyx/models/signal_composer.py +513 -0
- aponyx/models/signal_transformation.json +29 -0
- aponyx/persistence/__init__.py +16 -0
- aponyx/persistence/json_io.py +132 -0
- aponyx/persistence/parquet_io.py +378 -0
- aponyx/py.typed +0 -0
- aponyx/reporting/__init__.py +10 -0
- aponyx/reporting/generator.py +517 -0
- aponyx/visualization/__init__.py +20 -0
- aponyx/visualization/app.py +37 -0
- aponyx/visualization/plots.py +309 -0
- aponyx/visualization/visualizer.py +242 -0
- aponyx/workflows/__init__.py +18 -0
- aponyx/workflows/concrete_steps.py +720 -0
- aponyx/workflows/config.py +122 -0
- aponyx/workflows/engine.py +279 -0
- aponyx/workflows/registry.py +116 -0
- aponyx/workflows/steps.py +180 -0
- aponyx-0.1.18.dist-info/METADATA +552 -0
- aponyx-0.1.18.dist-info/RECORD +104 -0
- aponyx-0.1.18.dist-info/WHEEL +4 -0
- aponyx-0.1.18.dist-info/entry_points.txt +2 -0
- aponyx-0.1.18.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,761 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Extended risk and stability metrics for performance evaluation.
|
|
3
|
+
|
|
4
|
+
Provides advanced metrics beyond standard backtest statistics, including
|
|
5
|
+
rolling performance diagnostics, drawdown recovery analysis, tail risk,
|
|
6
|
+
and consistency measures. Consolidates all performance metrics (basic + extended)
|
|
7
|
+
into a unified computation function.
|
|
8
|
+
|
|
9
|
+
Uses quantstats library for standard metric calculations.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import warnings
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
import quantstats as qs # type: ignore[import-untyped]
|
|
18
|
+
|
|
19
|
+
from .config import PerformanceMetrics
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def convert_pnl_to_returns(
|
|
25
|
+
pnl_df: pd.DataFrame,
|
|
26
|
+
starting_capital: float = 100000.0,
|
|
27
|
+
) -> pd.Series:
|
|
28
|
+
"""
|
|
29
|
+
Convert cumulative P&L to percentage returns for quantstats compatibility.
|
|
30
|
+
|
|
31
|
+
Transforms dollar P&L into equity curve returns by treating cumulative P&L
|
|
32
|
+
as portfolio gains/losses relative to starting capital.
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
pnl_df : pd.DataFrame
|
|
37
|
+
P&L DataFrame with 'cumulative_pnl' column and DatetimeIndex.
|
|
38
|
+
starting_capital : float
|
|
39
|
+
Initial capital for percentage calculation. Default: 100,000.
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
pd.Series
|
|
44
|
+
Daily percentage returns with same index as pnl_df.
|
|
45
|
+
|
|
46
|
+
Notes
|
|
47
|
+
-----
|
|
48
|
+
This conversion assumes constant notional (no capital additions/withdrawals)
|
|
49
|
+
and may not reflect true equity dynamics for leveraged strategies or
|
|
50
|
+
strategies with variable position sizing.
|
|
51
|
+
|
|
52
|
+
The equity curve is calculated as: starting_capital + cumulative_pnl
|
|
53
|
+
Returns are computed as percentage changes in the equity curve.
|
|
54
|
+
|
|
55
|
+
Examples
|
|
56
|
+
--------
|
|
57
|
+
>>> returns = convert_pnl_to_returns(result.pnl, starting_capital=100000)
|
|
58
|
+
>>> print(f"First return: {returns.iloc[1]:.4%}")
|
|
59
|
+
"""
|
|
60
|
+
equity_curve = starting_capital + pnl_df["cumulative_pnl"]
|
|
61
|
+
returns = equity_curve.pct_change().fillna(0.0)
|
|
62
|
+
returns.name = "returns"
|
|
63
|
+
|
|
64
|
+
logger.debug(
|
|
65
|
+
"Converted P&L to returns: capital=$%.0f, observations=%d",
|
|
66
|
+
starting_capital,
|
|
67
|
+
len(returns),
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
return returns
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def compute_all_metrics(
|
|
74
|
+
pnl_df: pd.DataFrame,
|
|
75
|
+
positions_df: pd.DataFrame,
|
|
76
|
+
rolling_window: int = 63,
|
|
77
|
+
starting_capital: float = 100000.0,
|
|
78
|
+
benchmark: pd.Series | None = None,
|
|
79
|
+
) -> PerformanceMetrics:
|
|
80
|
+
"""
|
|
81
|
+
Compute all performance metrics (basic + extended) from backtest results.
|
|
82
|
+
|
|
83
|
+
Consolidates computation of 21+ comprehensive metrics including returns,
|
|
84
|
+
risk-adjusted ratios, trade statistics, and stability measures. Uses
|
|
85
|
+
quantstats library for standard metrics when available, with fallback
|
|
86
|
+
to custom implementations.
|
|
87
|
+
|
|
88
|
+
Parameters
|
|
89
|
+
----------
|
|
90
|
+
pnl_df : pd.DataFrame
|
|
91
|
+
Daily P&L data with 'net_pnl' and 'cumulative_pnl' columns.
|
|
92
|
+
positions_df : pd.DataFrame
|
|
93
|
+
Daily position data with 'position' and 'days_held' columns.
|
|
94
|
+
rolling_window : int
|
|
95
|
+
Window length for rolling metrics. Default: 63 days (3 months).
|
|
96
|
+
starting_capital : float
|
|
97
|
+
Initial capital for returns conversion. Default: 100,000.
|
|
98
|
+
benchmark : pd.Series | None
|
|
99
|
+
Benchmark returns series for relative performance metrics.
|
|
100
|
+
Must be daily percentage returns (not prices). Default: None.
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
PerformanceMetrics
|
|
105
|
+
Complete set of performance statistics organized by category.
|
|
106
|
+
Includes 21 base metrics plus 4 benchmark metrics when benchmark provided.
|
|
107
|
+
|
|
108
|
+
Notes
|
|
109
|
+
-----
|
|
110
|
+
Calculations assume:
|
|
111
|
+
- 252 trading days per year for annualization
|
|
112
|
+
- No risk-free rate (excess returns = total returns)
|
|
113
|
+
- Daily P&L represents actual trading results
|
|
114
|
+
|
|
115
|
+
15 metrics are computed using quantstats for consistency with industry
|
|
116
|
+
standards. Trade-level metrics (n_trades, avg_holding_days) and recovery
|
|
117
|
+
metrics remain custom implementations as quantstats does not support these.
|
|
118
|
+
|
|
119
|
+
Benchmark must be provided as returns series (not prices). Quantstats
|
|
120
|
+
will handle date alignment automatically via match_dates=True.
|
|
121
|
+
|
|
122
|
+
Examples
|
|
123
|
+
--------
|
|
124
|
+
>>> from aponyx.evaluation.performance import compute_all_metrics
|
|
125
|
+
>>> metrics = compute_all_metrics(result.pnl, result.positions)
|
|
126
|
+
>>> print(f"Sharpe: {metrics.sharpe_ratio:.2f}, Trades: {metrics.n_trades}")
|
|
127
|
+
|
|
128
|
+
>>> # With benchmark comparison
|
|
129
|
+
>>> benchmark_returns = pd.Series(...) # Daily returns
|
|
130
|
+
>>> metrics = compute_all_metrics(
|
|
131
|
+
... result.pnl, result.positions, benchmark=benchmark_returns
|
|
132
|
+
... )
|
|
133
|
+
>>> print(f"Alpha: {metrics.alpha:.4f}, Beta: {metrics.beta:.2f}")
|
|
134
|
+
"""
|
|
135
|
+
from aponyx.evaluation.performance.config import PerformanceMetrics
|
|
136
|
+
|
|
137
|
+
logger.debug(
|
|
138
|
+
"Computing all performance metrics: rolling_window=%d",
|
|
139
|
+
rolling_window,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# ==================== Shared Intermediates ====================
|
|
143
|
+
daily_pnl = pnl_df["net_pnl"]
|
|
144
|
+
cum_pnl = pnl_df["cumulative_pnl"]
|
|
145
|
+
|
|
146
|
+
# Convert P&L to returns for quantstats
|
|
147
|
+
returns = convert_pnl_to_returns(pnl_df, starting_capital)
|
|
148
|
+
|
|
149
|
+
# ==================== Quantstats Metrics ====================
|
|
150
|
+
logger.debug("Computing metrics using quantstats")
|
|
151
|
+
|
|
152
|
+
# Suppress quantstats RuntimeWarnings for edge cases (zero std, zero max_dd)
|
|
153
|
+
# These occur when returns have zero variance or no drawdowns, which is valid data
|
|
154
|
+
with warnings.catch_warnings():
|
|
155
|
+
warnings.filterwarnings(
|
|
156
|
+
"ignore",
|
|
157
|
+
message="invalid value encountered in scalar divide",
|
|
158
|
+
category=RuntimeWarning,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Return metrics
|
|
162
|
+
total_return = float(qs.stats.comp(returns))
|
|
163
|
+
annualized_return = float(qs.stats.cagr(returns, periods=252))
|
|
164
|
+
|
|
165
|
+
# Risk-adjusted metrics
|
|
166
|
+
sharpe_ratio = float(qs.stats.sharpe(returns, periods=252))
|
|
167
|
+
sortino_ratio = float(qs.stats.sortino(returns, periods=252))
|
|
168
|
+
calmar_ratio = float(qs.stats.calmar(returns, periods=252))
|
|
169
|
+
max_drawdown = float(qs.stats.max_drawdown(returns))
|
|
170
|
+
annualized_vol = float(qs.stats.volatility(returns, periods=252))
|
|
171
|
+
|
|
172
|
+
# Tail and profitability metrics
|
|
173
|
+
tail_ratio = float(qs.stats.tail_ratio(returns))
|
|
174
|
+
profit_factor = float(qs.stats.profit_factor(returns))
|
|
175
|
+
|
|
176
|
+
# Rolling Sharpe statistics
|
|
177
|
+
rolling_sharpe = qs.stats.rolling_sharpe(returns, rolling_period=rolling_window)
|
|
178
|
+
rolling_sharpe_mean = float(rolling_sharpe.mean())
|
|
179
|
+
rolling_sharpe_std = float(rolling_sharpe.std())
|
|
180
|
+
|
|
181
|
+
# Drawdown count
|
|
182
|
+
dd_series = qs.stats.to_drawdown_series(returns)
|
|
183
|
+
dd_details = qs.stats.drawdown_details(dd_series)
|
|
184
|
+
n_drawdowns_qs = len(dd_details)
|
|
185
|
+
|
|
186
|
+
# Benchmark metrics (if provided)
|
|
187
|
+
alpha = None
|
|
188
|
+
beta = None
|
|
189
|
+
information_ratio = None
|
|
190
|
+
r_squared = None
|
|
191
|
+
|
|
192
|
+
if benchmark is not None:
|
|
193
|
+
try:
|
|
194
|
+
# Suppress quantstats RuntimeWarnings for edge cases
|
|
195
|
+
with warnings.catch_warnings():
|
|
196
|
+
warnings.filterwarnings(
|
|
197
|
+
"ignore",
|
|
198
|
+
message="invalid value encountered in scalar divide",
|
|
199
|
+
category=RuntimeWarning,
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Compute benchmark metrics using quantstats
|
|
203
|
+
greeks = qs.stats.greeks(returns, benchmark, periods=252)
|
|
204
|
+
alpha = float(greeks.iloc[0]) if len(greeks) > 0 else None
|
|
205
|
+
beta = float(greeks.iloc[1]) if len(greeks) > 1 else None
|
|
206
|
+
|
|
207
|
+
# Information ratio
|
|
208
|
+
information_ratio = float(
|
|
209
|
+
qs.stats.information_ratio(returns, benchmark)
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# R-squared (correlation with benchmark)
|
|
213
|
+
r_squared = float(qs.stats.r_squared(returns, benchmark))
|
|
214
|
+
|
|
215
|
+
logger.debug(
|
|
216
|
+
"Computed benchmark metrics: alpha=%.4f, beta=%.2f, IR=%.2f, R²=%.2f",
|
|
217
|
+
alpha or 0,
|
|
218
|
+
beta or 0,
|
|
219
|
+
information_ratio or 0,
|
|
220
|
+
r_squared or 0,
|
|
221
|
+
)
|
|
222
|
+
except Exception as e:
|
|
223
|
+
logger.warning("Failed to compute benchmark metrics: %s", e)
|
|
224
|
+
|
|
225
|
+
# ==================== Trade Statistics (Custom - quantstats doesn't support) ====================
|
|
226
|
+
# These require position tracking which quantstats doesn't support
|
|
227
|
+
prev_position = positions_df["position"].shift(1).fillna(0)
|
|
228
|
+
position_entries = (prev_position == 0) & (positions_df["position"] != 0)
|
|
229
|
+
n_trades = position_entries.sum()
|
|
230
|
+
|
|
231
|
+
# Compute P&L per trade
|
|
232
|
+
position_changes = (positions_df["position"] != prev_position).astype(int)
|
|
233
|
+
trade_id = position_changes.cumsum()
|
|
234
|
+
active_trades = positions_df[positions_df["position"] != 0].copy()
|
|
235
|
+
|
|
236
|
+
if len(active_trades) > 0:
|
|
237
|
+
active_trades["trade_id"] = trade_id[positions_df["position"] != 0]
|
|
238
|
+
trade_pnls = (
|
|
239
|
+
pnl_df.loc[active_trades.index]
|
|
240
|
+
.groupby(active_trades["trade_id"])["net_pnl"]
|
|
241
|
+
.sum()
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
trade_pnls_array = trade_pnls.values
|
|
245
|
+
winning_trades = trade_pnls_array[trade_pnls_array > 0]
|
|
246
|
+
losing_trades = trade_pnls_array[trade_pnls_array < 0]
|
|
247
|
+
|
|
248
|
+
hit_rate = (
|
|
249
|
+
len(winning_trades) / len(trade_pnls_array)
|
|
250
|
+
if len(trade_pnls_array) > 0
|
|
251
|
+
else 0.0
|
|
252
|
+
)
|
|
253
|
+
avg_win = winning_trades.mean() if len(winning_trades) > 0 else 0.0
|
|
254
|
+
avg_loss = losing_trades.mean() if len(losing_trades) > 0 else 0.0
|
|
255
|
+
|
|
256
|
+
if avg_loss < 0:
|
|
257
|
+
win_loss_ratio = abs(avg_win / avg_loss)
|
|
258
|
+
else:
|
|
259
|
+
win_loss_ratio = 0.0
|
|
260
|
+
else:
|
|
261
|
+
hit_rate = 0.0
|
|
262
|
+
avg_win = 0.0
|
|
263
|
+
avg_loss = 0.0
|
|
264
|
+
win_loss_ratio = 0.0
|
|
265
|
+
|
|
266
|
+
# Holding period statistics
|
|
267
|
+
holding_periods = positions_df[positions_df["position"] != 0]["days_held"]
|
|
268
|
+
avg_holding_days = holding_periods.mean() if len(holding_periods) > 0 else 0.0
|
|
269
|
+
|
|
270
|
+
# ==================== Recovery Metrics (Custom - quantstats doesn't provide) ====================
|
|
271
|
+
# Quantstats doesn't provide recovery time analysis
|
|
272
|
+
# Recompute drawdown for recovery analysis
|
|
273
|
+
running_max = cum_pnl.expanding().max()
|
|
274
|
+
drawdown = cum_pnl - running_max
|
|
275
|
+
|
|
276
|
+
recovery_stats = _compute_drawdown_recovery_optimized(
|
|
277
|
+
cum_pnl, running_max, drawdown
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# ==================== Consistency Score (Always Custom) ====================
|
|
281
|
+
consistency_score = compute_consistency_score(daily_pnl, window=21)
|
|
282
|
+
|
|
283
|
+
# ==================== Assemble Result ====================
|
|
284
|
+
logger.debug(
|
|
285
|
+
"Computed %d metrics: sharpe=%.2f, trades=%d, profit_factor=%.2f",
|
|
286
|
+
21 + (4 if benchmark is not None else 0),
|
|
287
|
+
sharpe_ratio,
|
|
288
|
+
n_trades,
|
|
289
|
+
profit_factor,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
return PerformanceMetrics(
|
|
293
|
+
# Returns
|
|
294
|
+
total_return=total_return,
|
|
295
|
+
annualized_return=annualized_return,
|
|
296
|
+
# Risk-adjusted
|
|
297
|
+
sharpe_ratio=sharpe_ratio,
|
|
298
|
+
sortino_ratio=sortino_ratio,
|
|
299
|
+
calmar_ratio=calmar_ratio,
|
|
300
|
+
max_drawdown=max_drawdown,
|
|
301
|
+
annualized_volatility=annualized_vol,
|
|
302
|
+
# Trade stats (always custom)
|
|
303
|
+
n_trades=int(n_trades),
|
|
304
|
+
hit_rate=hit_rate,
|
|
305
|
+
avg_win=avg_win,
|
|
306
|
+
avg_loss=avg_loss,
|
|
307
|
+
win_loss_ratio=win_loss_ratio,
|
|
308
|
+
avg_holding_days=avg_holding_days,
|
|
309
|
+
# Stability
|
|
310
|
+
rolling_sharpe_mean=rolling_sharpe_mean,
|
|
311
|
+
rolling_sharpe_std=rolling_sharpe_std,
|
|
312
|
+
max_dd_recovery_days=recovery_stats["max_dd_recovery_days"],
|
|
313
|
+
avg_recovery_days=recovery_stats["avg_recovery_days"],
|
|
314
|
+
n_drawdowns=int(n_drawdowns_qs),
|
|
315
|
+
tail_ratio=tail_ratio,
|
|
316
|
+
profit_factor=profit_factor,
|
|
317
|
+
consistency_score=consistency_score,
|
|
318
|
+
# Benchmark metrics (optional)
|
|
319
|
+
alpha=alpha,
|
|
320
|
+
beta=beta,
|
|
321
|
+
information_ratio=information_ratio,
|
|
322
|
+
r_squared=r_squared,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def _compute_drawdown_recovery_optimized(
|
|
327
|
+
cumulative_pnl: pd.Series,
|
|
328
|
+
running_max: pd.Series,
|
|
329
|
+
drawdown: pd.Series,
|
|
330
|
+
) -> dict[str, float]:
|
|
331
|
+
"""
|
|
332
|
+
Compute drawdown recovery using pre-computed intermediates.
|
|
333
|
+
|
|
334
|
+
Optimized version that accepts pre-computed running_max and drawdown
|
|
335
|
+
to avoid redundant calculation when called from compute_all_metrics.
|
|
336
|
+
|
|
337
|
+
Parameters
|
|
338
|
+
----------
|
|
339
|
+
cumulative_pnl : pd.Series
|
|
340
|
+
Cumulative P&L time series.
|
|
341
|
+
running_max : pd.Series
|
|
342
|
+
Expanding maximum of cumulative P&L.
|
|
343
|
+
drawdown : pd.Series
|
|
344
|
+
Drawdown series (cumulative_pnl - running_max).
|
|
345
|
+
|
|
346
|
+
Returns
|
|
347
|
+
-------
|
|
348
|
+
dict[str, float]
|
|
349
|
+
Recovery statistics (max_dd_recovery_days, avg_recovery_days, n_drawdowns).
|
|
350
|
+
"""
|
|
351
|
+
logger.debug("Computing drawdown recovery from pre-computed intermediates")
|
|
352
|
+
|
|
353
|
+
# Find maximum drawdown
|
|
354
|
+
max_dd_idx = drawdown.idxmin()
|
|
355
|
+
|
|
356
|
+
# Find when max drawdown started
|
|
357
|
+
peaks_before = running_max[:max_dd_idx]
|
|
358
|
+
if len(peaks_before) > 0:
|
|
359
|
+
max_dd_start = peaks_before[peaks_before == running_max[max_dd_idx]].index[-1]
|
|
360
|
+
else:
|
|
361
|
+
max_dd_start = cumulative_pnl.index[0]
|
|
362
|
+
|
|
363
|
+
# Find recovery point
|
|
364
|
+
peak_level = running_max[max_dd_idx]
|
|
365
|
+
recovery_mask = (cumulative_pnl.index > max_dd_idx) & (cumulative_pnl >= peak_level)
|
|
366
|
+
|
|
367
|
+
if recovery_mask.any():
|
|
368
|
+
recovery_idx = cumulative_pnl[recovery_mask].index[0]
|
|
369
|
+
max_dd_recovery_days = (recovery_idx - max_dd_start).days
|
|
370
|
+
else:
|
|
371
|
+
max_dd_recovery_days = np.inf
|
|
372
|
+
|
|
373
|
+
# Count all drawdown periods
|
|
374
|
+
in_drawdown = drawdown < 0
|
|
375
|
+
drawdown_starts = (~in_drawdown.shift(1, fill_value=False)) & in_drawdown
|
|
376
|
+
n_drawdowns = drawdown_starts.sum()
|
|
377
|
+
|
|
378
|
+
# Compute average recovery time
|
|
379
|
+
recovery_times = []
|
|
380
|
+
current_dd_start = None
|
|
381
|
+
|
|
382
|
+
for idx in cumulative_pnl.index:
|
|
383
|
+
if drawdown[idx] < 0 and current_dd_start is None:
|
|
384
|
+
current_dd_start = idx
|
|
385
|
+
elif drawdown[idx] == 0 and current_dd_start is not None:
|
|
386
|
+
recovery_days = (idx - current_dd_start).days
|
|
387
|
+
recovery_times.append(recovery_days)
|
|
388
|
+
current_dd_start = None
|
|
389
|
+
|
|
390
|
+
avg_recovery_days = np.mean(recovery_times) if recovery_times else 0.0
|
|
391
|
+
|
|
392
|
+
return {
|
|
393
|
+
"max_dd_recovery_days": max_dd_recovery_days,
|
|
394
|
+
"avg_recovery_days": avg_recovery_days,
|
|
395
|
+
"n_drawdowns": int(n_drawdowns),
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def compute_rolling_sharpe(
|
|
400
|
+
pnl_series: pd.Series,
|
|
401
|
+
window: int = 63,
|
|
402
|
+
) -> pd.Series:
|
|
403
|
+
"""
|
|
404
|
+
Compute rolling Sharpe ratio over specified window.
|
|
405
|
+
|
|
406
|
+
Parameters
|
|
407
|
+
----------
|
|
408
|
+
pnl_series : pd.Series
|
|
409
|
+
Daily P&L time series with DatetimeIndex.
|
|
410
|
+
window : int
|
|
411
|
+
Rolling window length in days. Default: 63 (3 months).
|
|
412
|
+
|
|
413
|
+
Returns
|
|
414
|
+
-------
|
|
415
|
+
pd.Series
|
|
416
|
+
Rolling annualized Sharpe ratio.
|
|
417
|
+
|
|
418
|
+
Notes
|
|
419
|
+
-----
|
|
420
|
+
Assumes 252 trading days per year for annualization.
|
|
421
|
+
Uses zero risk-free rate for simplicity.
|
|
422
|
+
First (window - 1) values will be NaN.
|
|
423
|
+
|
|
424
|
+
Examples
|
|
425
|
+
--------
|
|
426
|
+
>>> rolling_sharpe = compute_rolling_sharpe(pnl_df['net_pnl'], window=63)
|
|
427
|
+
>>> print(f"Latest 3M Sharpe: {rolling_sharpe.iloc[-1]:.2f}")
|
|
428
|
+
"""
|
|
429
|
+
logger.debug("Computing rolling Sharpe: window=%d days", window)
|
|
430
|
+
|
|
431
|
+
rolling_mean = pnl_series.rolling(window).mean()
|
|
432
|
+
rolling_std = pnl_series.rolling(window).std()
|
|
433
|
+
|
|
434
|
+
# Annualize (handle zero std)
|
|
435
|
+
rolling_sharpe = rolling_mean / rolling_std * np.sqrt(252)
|
|
436
|
+
rolling_sharpe = rolling_sharpe.fillna(0.0)
|
|
437
|
+
|
|
438
|
+
valid_count = (rolling_mean.notna() & rolling_std.notna()).sum()
|
|
439
|
+
logger.debug("Rolling Sharpe computed: %d valid observations", valid_count)
|
|
440
|
+
|
|
441
|
+
return rolling_sharpe
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def compute_drawdown_recovery_time(cumulative_pnl: pd.Series) -> dict[str, float]:
|
|
445
|
+
"""
|
|
446
|
+
Compute drawdown recovery statistics.
|
|
447
|
+
|
|
448
|
+
Calculates time required to recover from maximum drawdown and
|
|
449
|
+
average recovery time across all drawdown periods.
|
|
450
|
+
|
|
451
|
+
Parameters
|
|
452
|
+
----------
|
|
453
|
+
cumulative_pnl : pd.Series
|
|
454
|
+
Cumulative P&L time series with DatetimeIndex.
|
|
455
|
+
|
|
456
|
+
Returns
|
|
457
|
+
-------
|
|
458
|
+
dict[str, float]
|
|
459
|
+
Dictionary with keys:
|
|
460
|
+
- 'max_dd_recovery_days': Days to recover from max drawdown (np.inf if not recovered)
|
|
461
|
+
- 'avg_recovery_days': Average recovery time across all drawdowns
|
|
462
|
+
- 'n_drawdowns': Number of distinct drawdown periods
|
|
463
|
+
|
|
464
|
+
Notes
|
|
465
|
+
-----
|
|
466
|
+
A drawdown period starts when equity falls below previous peak
|
|
467
|
+
and ends when equity reaches a new peak.
|
|
468
|
+
|
|
469
|
+
Examples
|
|
470
|
+
--------
|
|
471
|
+
>>> recovery = compute_drawdown_recovery_time(pnl_df['cumulative_pnl'])
|
|
472
|
+
>>> print(f"Max DD recovery: {recovery['max_dd_recovery_days']:.0f} days")
|
|
473
|
+
"""
|
|
474
|
+
logger.debug("Computing drawdown recovery metrics")
|
|
475
|
+
|
|
476
|
+
running_max = cumulative_pnl.expanding().max()
|
|
477
|
+
drawdown = cumulative_pnl - running_max
|
|
478
|
+
|
|
479
|
+
# Find maximum drawdown
|
|
480
|
+
max_dd_idx = drawdown.idxmin()
|
|
481
|
+
|
|
482
|
+
# Find when max drawdown started (last peak before max DD)
|
|
483
|
+
peaks_before = running_max[:max_dd_idx]
|
|
484
|
+
if len(peaks_before) > 0:
|
|
485
|
+
max_dd_start = peaks_before[peaks_before == running_max[max_dd_idx]].index[-1]
|
|
486
|
+
else:
|
|
487
|
+
max_dd_start = cumulative_pnl.index[0]
|
|
488
|
+
|
|
489
|
+
# Find recovery point (when equity reaches peak level again)
|
|
490
|
+
peak_level = running_max[max_dd_idx]
|
|
491
|
+
recovery_mask = (cumulative_pnl.index > max_dd_idx) & (cumulative_pnl >= peak_level)
|
|
492
|
+
|
|
493
|
+
if recovery_mask.any():
|
|
494
|
+
recovery_idx = cumulative_pnl[recovery_mask].index[0]
|
|
495
|
+
max_dd_recovery_days = (recovery_idx - max_dd_start).days
|
|
496
|
+
else:
|
|
497
|
+
max_dd_recovery_days = np.inf
|
|
498
|
+
|
|
499
|
+
# Count all drawdown periods
|
|
500
|
+
in_drawdown = drawdown < 0
|
|
501
|
+
drawdown_starts = (~in_drawdown.shift(1, fill_value=False)) & in_drawdown
|
|
502
|
+
n_drawdowns = drawdown_starts.sum()
|
|
503
|
+
|
|
504
|
+
# Compute average recovery time for all recovered drawdowns
|
|
505
|
+
recovery_times = []
|
|
506
|
+
current_dd_start = None
|
|
507
|
+
|
|
508
|
+
for idx in cumulative_pnl.index:
|
|
509
|
+
if drawdown[idx] < 0 and current_dd_start is None:
|
|
510
|
+
# Start of new drawdown
|
|
511
|
+
current_dd_start = idx
|
|
512
|
+
elif drawdown[idx] == 0 and current_dd_start is not None:
|
|
513
|
+
# Recovery from drawdown
|
|
514
|
+
recovery_days = (idx - current_dd_start).days
|
|
515
|
+
recovery_times.append(recovery_days)
|
|
516
|
+
current_dd_start = None
|
|
517
|
+
|
|
518
|
+
avg_recovery_days = np.mean(recovery_times) if recovery_times else 0.0
|
|
519
|
+
|
|
520
|
+
logger.debug(
|
|
521
|
+
"Drawdown recovery: max_dd_recovery=%.0f days, n_drawdowns=%d",
|
|
522
|
+
max_dd_recovery_days if max_dd_recovery_days != np.inf else -1,
|
|
523
|
+
n_drawdowns,
|
|
524
|
+
)
|
|
525
|
+
|
|
526
|
+
return {
|
|
527
|
+
"max_dd_recovery_days": max_dd_recovery_days,
|
|
528
|
+
"avg_recovery_days": avg_recovery_days,
|
|
529
|
+
"n_drawdowns": int(n_drawdowns),
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def compute_tail_ratio(pnl_series: pd.Series, percentile: float = 95.0) -> float:
|
|
534
|
+
"""
|
|
535
|
+
Compute tail ratio as measure of upside vs downside tail risk.
|
|
536
|
+
|
|
537
|
+
Ratio of absolute values of right tail (gains) to left tail (losses).
|
|
538
|
+
Values > 1 indicate favorable asymmetry (larger wins than losses).
|
|
539
|
+
|
|
540
|
+
Parameters
|
|
541
|
+
----------
|
|
542
|
+
pnl_series : pd.Series
|
|
543
|
+
Daily P&L time series.
|
|
544
|
+
percentile : float
|
|
545
|
+
Percentile for tail definition. Default: 95.0 (top/bottom 5%).
|
|
546
|
+
|
|
547
|
+
Returns
|
|
548
|
+
-------
|
|
549
|
+
float
|
|
550
|
+
Tail ratio (right_tail / abs(left_tail)).
|
|
551
|
+
Returns 0 if insufficient data or undefined.
|
|
552
|
+
|
|
553
|
+
Notes
|
|
554
|
+
-----
|
|
555
|
+
Tail ratio complements traditional skewness by focusing on
|
|
556
|
+
extreme outcomes rather than entire distribution.
|
|
557
|
+
|
|
558
|
+
Examples
|
|
559
|
+
--------
|
|
560
|
+
>>> tail_ratio = compute_tail_ratio(pnl_df['net_pnl'])
|
|
561
|
+
>>> print(f"Tail ratio: {tail_ratio:.2f}") # > 1 is favorable
|
|
562
|
+
"""
|
|
563
|
+
logger.debug("Computing tail ratio: percentile=%.1f", percentile)
|
|
564
|
+
|
|
565
|
+
if len(pnl_series) < 20:
|
|
566
|
+
logger.warning(
|
|
567
|
+
"Insufficient data for tail ratio: %d observations", len(pnl_series)
|
|
568
|
+
)
|
|
569
|
+
return 0.0
|
|
570
|
+
|
|
571
|
+
right_tail = np.percentile(pnl_series, percentile)
|
|
572
|
+
left_tail = np.percentile(pnl_series, 100 - percentile)
|
|
573
|
+
|
|
574
|
+
if left_tail < 0:
|
|
575
|
+
tail_ratio = abs(right_tail / left_tail)
|
|
576
|
+
else:
|
|
577
|
+
tail_ratio = 0.0
|
|
578
|
+
|
|
579
|
+
logger.debug(
|
|
580
|
+
"Tail ratio: %.3f (right=%.2f, left=%.2f)", tail_ratio, right_tail, left_tail
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
return tail_ratio
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
def compute_profit_factor(pnl_series: pd.Series) -> float:
|
|
587
|
+
"""
|
|
588
|
+
Compute profit factor as ratio of gross profits to gross losses.
|
|
589
|
+
|
|
590
|
+
Parameters
|
|
591
|
+
----------
|
|
592
|
+
pnl_series : pd.Series
|
|
593
|
+
Daily P&L time series.
|
|
594
|
+
|
|
595
|
+
Returns
|
|
596
|
+
-------
|
|
597
|
+
float
|
|
598
|
+
Profit factor (sum of gains / abs(sum of losses)).
|
|
599
|
+
Returns 0 if no losses or insufficient data.
|
|
600
|
+
|
|
601
|
+
Notes
|
|
602
|
+
-----
|
|
603
|
+
Profit factor > 1 indicates profitable strategy.
|
|
604
|
+
Differs from win/loss ratio by using sums, not averages.
|
|
605
|
+
|
|
606
|
+
Examples
|
|
607
|
+
--------
|
|
608
|
+
>>> pf = compute_profit_factor(pnl_df['net_pnl'])
|
|
609
|
+
>>> print(f"Profit factor: {pf:.2f}") # > 1 is profitable
|
|
610
|
+
"""
|
|
611
|
+
logger.debug("Computing profit factor")
|
|
612
|
+
|
|
613
|
+
gross_profit = pnl_series[pnl_series > 0].sum()
|
|
614
|
+
gross_loss = abs(pnl_series[pnl_series < 0].sum())
|
|
615
|
+
|
|
616
|
+
if gross_loss > 0:
|
|
617
|
+
profit_factor = gross_profit / gross_loss
|
|
618
|
+
else:
|
|
619
|
+
profit_factor = 0.0 if gross_profit == 0 else np.inf
|
|
620
|
+
|
|
621
|
+
logger.debug(
|
|
622
|
+
"Profit factor: %.3f (profit=%.2f, loss=%.2f)",
|
|
623
|
+
profit_factor,
|
|
624
|
+
gross_profit,
|
|
625
|
+
gross_loss,
|
|
626
|
+
)
|
|
627
|
+
|
|
628
|
+
return profit_factor
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def compute_consistency_score(pnl_series: pd.Series, window: int = 21) -> float:
|
|
632
|
+
"""
|
|
633
|
+
Compute consistency score as proportion of positive rolling windows.
|
|
634
|
+
|
|
635
|
+
Measures how consistently the strategy generates positive returns
|
|
636
|
+
over rolling periods.
|
|
637
|
+
|
|
638
|
+
Parameters
|
|
639
|
+
----------
|
|
640
|
+
pnl_series : pd.Series
|
|
641
|
+
Daily P&L time series.
|
|
642
|
+
window : int
|
|
643
|
+
Rolling window length in days. Default: 21 (1 month).
|
|
644
|
+
|
|
645
|
+
Returns
|
|
646
|
+
-------
|
|
647
|
+
float
|
|
648
|
+
Consistency score (0-1 scale).
|
|
649
|
+
Proportion of rolling windows with positive cumulative P&L.
|
|
650
|
+
|
|
651
|
+
Notes
|
|
652
|
+
-----
|
|
653
|
+
Higher scores indicate more consistent performance.
|
|
654
|
+
Complements traditional Sharpe by focusing on win frequency
|
|
655
|
+
rather than risk-adjusted returns.
|
|
656
|
+
|
|
657
|
+
Examples
|
|
658
|
+
--------
|
|
659
|
+
>>> consistency = compute_consistency_score(pnl_df['net_pnl'], window=21)
|
|
660
|
+
>>> print(f"Consistency: {consistency:.1%}") # Higher is better
|
|
661
|
+
"""
|
|
662
|
+
logger.debug("Computing consistency score: window=%d days", window)
|
|
663
|
+
|
|
664
|
+
rolling_sum = pnl_series.rolling(window).sum()
|
|
665
|
+
positive_windows = (rolling_sum > 0).sum()
|
|
666
|
+
total_windows = rolling_sum.notna().sum()
|
|
667
|
+
|
|
668
|
+
if total_windows > 0:
|
|
669
|
+
consistency = positive_windows / total_windows
|
|
670
|
+
else:
|
|
671
|
+
consistency = 0.0
|
|
672
|
+
|
|
673
|
+
logger.debug(
|
|
674
|
+
"Consistency score: %.3f (%d/%d positive windows)",
|
|
675
|
+
consistency,
|
|
676
|
+
positive_windows,
|
|
677
|
+
total_windows,
|
|
678
|
+
)
|
|
679
|
+
|
|
680
|
+
return consistency
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
def compute_extended_metrics(
|
|
684
|
+
pnl_df: pd.DataFrame,
|
|
685
|
+
rolling_window: int = 63,
|
|
686
|
+
) -> dict[str, float]:
|
|
687
|
+
"""
|
|
688
|
+
Compute all extended risk and stability metrics.
|
|
689
|
+
|
|
690
|
+
Orchestrates computation of rolling Sharpe, drawdown recovery,
|
|
691
|
+
tail ratios, profit factor, and consistency metrics.
|
|
692
|
+
|
|
693
|
+
Parameters
|
|
694
|
+
----------
|
|
695
|
+
pnl_df : pd.DataFrame
|
|
696
|
+
P&L DataFrame with 'net_pnl' and 'cumulative_pnl' columns.
|
|
697
|
+
rolling_window : int
|
|
698
|
+
Window length for rolling metrics. Default: 63 days.
|
|
699
|
+
|
|
700
|
+
Returns
|
|
701
|
+
-------
|
|
702
|
+
dict[str, float]
|
|
703
|
+
Dictionary with all extended metrics:
|
|
704
|
+
- rolling_sharpe_mean: Average rolling Sharpe
|
|
705
|
+
- rolling_sharpe_std: Volatility of rolling Sharpe
|
|
706
|
+
- max_dd_recovery_days: Recovery time from max drawdown
|
|
707
|
+
- avg_recovery_days: Average recovery across all drawdowns
|
|
708
|
+
- n_drawdowns: Count of drawdown periods
|
|
709
|
+
- tail_ratio: Upside/downside tail ratio
|
|
710
|
+
- profit_factor: Gross profits / gross losses
|
|
711
|
+
- consistency_score: Proportion of positive rolling windows
|
|
712
|
+
|
|
713
|
+
Notes
|
|
714
|
+
-----
|
|
715
|
+
This function provides a comprehensive risk profile beyond
|
|
716
|
+
standard backtest metrics. All metrics are computed from
|
|
717
|
+
daily P&L, not equity curve.
|
|
718
|
+
|
|
719
|
+
Examples
|
|
720
|
+
--------
|
|
721
|
+
>>> extended = compute_extended_metrics(result.pnl, rolling_window=63)
|
|
722
|
+
>>> print(f"Avg rolling Sharpe: {extended['rolling_sharpe_mean']:.2f}")
|
|
723
|
+
"""
|
|
724
|
+
logger.info("Computing extended risk metrics: window=%d days", rolling_window)
|
|
725
|
+
|
|
726
|
+
# Rolling Sharpe statistics
|
|
727
|
+
rolling_sharpe = compute_rolling_sharpe(pnl_df["net_pnl"], window=rolling_window)
|
|
728
|
+
rolling_sharpe_mean = rolling_sharpe.mean()
|
|
729
|
+
rolling_sharpe_std = rolling_sharpe.std()
|
|
730
|
+
|
|
731
|
+
# Drawdown recovery
|
|
732
|
+
recovery_stats = compute_drawdown_recovery_time(pnl_df["cumulative_pnl"])
|
|
733
|
+
|
|
734
|
+
# Tail risk
|
|
735
|
+
tail_ratio = compute_tail_ratio(pnl_df["net_pnl"])
|
|
736
|
+
|
|
737
|
+
# Profitability metrics
|
|
738
|
+
profit_factor = compute_profit_factor(pnl_df["net_pnl"])
|
|
739
|
+
|
|
740
|
+
# Consistency
|
|
741
|
+
consistency_score = compute_consistency_score(pnl_df["net_pnl"], window=21)
|
|
742
|
+
|
|
743
|
+
metrics = {
|
|
744
|
+
"rolling_sharpe_mean": rolling_sharpe_mean,
|
|
745
|
+
"rolling_sharpe_std": rolling_sharpe_std,
|
|
746
|
+
"max_dd_recovery_days": recovery_stats["max_dd_recovery_days"],
|
|
747
|
+
"avg_recovery_days": recovery_stats["avg_recovery_days"],
|
|
748
|
+
"n_drawdowns": recovery_stats["n_drawdowns"],
|
|
749
|
+
"tail_ratio": tail_ratio,
|
|
750
|
+
"profit_factor": profit_factor,
|
|
751
|
+
"consistency_score": consistency_score,
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
logger.info(
|
|
755
|
+
"Extended metrics computed: profit_factor=%.2f, tail_ratio=%.2f, consistency=%.1f%%",
|
|
756
|
+
profit_factor,
|
|
757
|
+
tail_ratio,
|
|
758
|
+
consistency_score * 100,
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
return metrics
|