sigma-terminal 2.0.1__py3-none-any.whl → 3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sigma/__init__.py +182 -6
- sigma/__main__.py +2 -2
- sigma/analytics/__init__.py +636 -0
- sigma/app.py +563 -898
- sigma/backtest.py +372 -0
- sigma/charts.py +407 -0
- sigma/cli.py +434 -0
- sigma/comparison.py +611 -0
- sigma/config.py +195 -0
- sigma/core/__init__.py +4 -17
- sigma/core/engine.py +493 -0
- sigma/core/intent.py +595 -0
- sigma/core/models.py +516 -125
- sigma/data/__init__.py +681 -0
- sigma/data/models.py +130 -0
- sigma/llm.py +401 -0
- sigma/monitoring.py +666 -0
- sigma/portfolio.py +697 -0
- sigma/reporting.py +658 -0
- sigma/robustness.py +675 -0
- sigma/setup.py +305 -402
- sigma/strategy.py +753 -0
- sigma/tools/backtest.py +23 -5
- sigma/tools.py +617 -0
- sigma/visualization.py +766 -0
- sigma_terminal-3.2.0.dist-info/METADATA +298 -0
- sigma_terminal-3.2.0.dist-info/RECORD +30 -0
- sigma_terminal-3.2.0.dist-info/entry_points.txt +6 -0
- sigma_terminal-3.2.0.dist-info/licenses/LICENSE +25 -0
- sigma/core/agent.py +0 -205
- sigma/core/config.py +0 -119
- sigma/core/llm.py +0 -794
- sigma/tools/__init__.py +0 -5
- sigma/tools/charts.py +0 -400
- sigma/tools/financial.py +0 -1457
- sigma/ui/__init__.py +0 -1
- sigma_terminal-2.0.1.dist-info/METADATA +0 -222
- sigma_terminal-2.0.1.dist-info/RECORD +0 -19
- sigma_terminal-2.0.1.dist-info/entry_points.txt +0 -2
- sigma_terminal-2.0.1.dist-info/licenses/LICENSE +0 -42
- {sigma_terminal-2.0.1.dist-info → sigma_terminal-3.2.0.dist-info}/WHEEL +0 -0
sigma/robustness.py
ADDED
|
@@ -0,0 +1,675 @@
|
|
|
1
|
+
"""Robustness engine - Stress tests, overfitting detection, explainability."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
from scipy import stats
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# ============================================================================
|
|
13
|
+
# DATA MODELS
|
|
14
|
+
# ============================================================================
|
|
15
|
+
|
|
16
|
+
class StressScenario(BaseModel):
|
|
17
|
+
"""Stress test scenario definition."""
|
|
18
|
+
|
|
19
|
+
name: str
|
|
20
|
+
description: str
|
|
21
|
+
shocks: Dict[str, float] # Asset/factor -> shock magnitude
|
|
22
|
+
correlation_adjustment: Optional[float] = None # Increase correlations
|
|
23
|
+
duration_days: Optional[int] = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class RobustnessResult(BaseModel):
|
|
27
|
+
"""Robustness test result."""
|
|
28
|
+
|
|
29
|
+
test_name: str
|
|
30
|
+
passed: bool
|
|
31
|
+
score: float # 0-1
|
|
32
|
+
details: Dict[str, Any]
|
|
33
|
+
recommendations: List[str]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ============================================================================
|
|
37
|
+
# STRESS TESTER
|
|
38
|
+
# ============================================================================
|
|
39
|
+
|
|
40
|
+
class StressTester:
|
|
41
|
+
"""
|
|
42
|
+
Run stress tests on portfolios and strategies.
|
|
43
|
+
Tests include historical scenarios, hypothetical scenarios, and factor shocks.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
# Historical stress scenarios
|
|
47
|
+
HISTORICAL_SCENARIOS = {
|
|
48
|
+
"2008_financial_crisis": StressScenario(
|
|
49
|
+
name="2008 Financial Crisis",
|
|
50
|
+
description="Global financial crisis peak",
|
|
51
|
+
shocks={"equity": -0.50, "credit": -0.30, "rates": -0.20, "volatility": 2.5},
|
|
52
|
+
correlation_adjustment=0.3,
|
|
53
|
+
duration_days=252,
|
|
54
|
+
),
|
|
55
|
+
"2020_covid_crash": StressScenario(
|
|
56
|
+
name="2020 COVID Crash",
|
|
57
|
+
description="COVID-19 market crash",
|
|
58
|
+
shocks={"equity": -0.34, "credit": -0.15, "oil": -0.70, "volatility": 3.0},
|
|
59
|
+
correlation_adjustment=0.4,
|
|
60
|
+
duration_days=30,
|
|
61
|
+
),
|
|
62
|
+
"2022_rate_shock": StressScenario(
|
|
63
|
+
name="2022 Rate Shock",
|
|
64
|
+
description="Fed rate hiking cycle",
|
|
65
|
+
shocks={"equity": -0.25, "bonds": -0.15, "tech": -0.35, "rates": 0.30},
|
|
66
|
+
duration_days=252,
|
|
67
|
+
),
|
|
68
|
+
"flash_crash": StressScenario(
|
|
69
|
+
name="Flash Crash",
|
|
70
|
+
description="Sudden market dislocation",
|
|
71
|
+
shocks={"equity": -0.10, "volatility": 2.0},
|
|
72
|
+
duration_days=1,
|
|
73
|
+
),
|
|
74
|
+
"stagflation": StressScenario(
|
|
75
|
+
name="Stagflation",
|
|
76
|
+
description="High inflation + low growth",
|
|
77
|
+
shocks={"equity": -0.20, "bonds": -0.10, "commodities": 0.30, "rates": 0.15},
|
|
78
|
+
duration_days=504,
|
|
79
|
+
),
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
def run_stress_test(
|
|
83
|
+
self,
|
|
84
|
+
returns: pd.DataFrame,
|
|
85
|
+
weights: Dict[str, float],
|
|
86
|
+
scenario: StressScenario,
|
|
87
|
+
) -> Dict[str, Any]:
|
|
88
|
+
"""Run a stress test on a portfolio."""
|
|
89
|
+
|
|
90
|
+
# Map assets to factors (simplified)
|
|
91
|
+
factor_mapping = {
|
|
92
|
+
"equity": ["SPY", "QQQ", "IWM", "VTI"],
|
|
93
|
+
"bonds": ["TLT", "BND", "AGG", "IEF"],
|
|
94
|
+
"tech": ["QQQ", "XLK"],
|
|
95
|
+
"credit": ["HYG", "LQD"],
|
|
96
|
+
"commodities": ["GLD", "USO", "DBC"],
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
# Calculate portfolio impact
|
|
100
|
+
portfolio_shock = 0
|
|
101
|
+
asset_impacts = {}
|
|
102
|
+
|
|
103
|
+
for asset, weight in weights.items():
|
|
104
|
+
# Find relevant factor shock
|
|
105
|
+
shock = 0
|
|
106
|
+
for factor, factor_assets in factor_mapping.items():
|
|
107
|
+
if any(fa.lower() in asset.lower() for fa in factor_assets):
|
|
108
|
+
shock = scenario.shocks.get(factor, 0)
|
|
109
|
+
break
|
|
110
|
+
|
|
111
|
+
# Default to equity if no match
|
|
112
|
+
if shock == 0 and "equity" in scenario.shocks:
|
|
113
|
+
shock = scenario.shocks["equity"] * 0.8 # Assume 80% correlation
|
|
114
|
+
|
|
115
|
+
impact = weight * shock
|
|
116
|
+
portfolio_shock += impact
|
|
117
|
+
asset_impacts[asset] = {"shock": shock, "impact": impact}
|
|
118
|
+
|
|
119
|
+
# Adjust for correlation increase during stress
|
|
120
|
+
if scenario.correlation_adjustment:
|
|
121
|
+
# Higher correlation = worse diversification = more severe impact
|
|
122
|
+
portfolio_shock *= (1 + scenario.correlation_adjustment)
|
|
123
|
+
|
|
124
|
+
return {
|
|
125
|
+
"scenario": scenario.name,
|
|
126
|
+
"description": scenario.description,
|
|
127
|
+
"portfolio_impact": portfolio_shock,
|
|
128
|
+
"asset_impacts": asset_impacts,
|
|
129
|
+
"duration_days": scenario.duration_days,
|
|
130
|
+
"survival": portfolio_shock > -0.50, # Survive if < 50% loss
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
def run_all_scenarios(
|
|
134
|
+
self,
|
|
135
|
+
returns: pd.DataFrame,
|
|
136
|
+
weights: Dict[str, float],
|
|
137
|
+
) -> Dict[str, Dict[str, Any]]:
|
|
138
|
+
"""Run all historical stress scenarios."""
|
|
139
|
+
|
|
140
|
+
results = {}
|
|
141
|
+
|
|
142
|
+
for scenario_id, scenario in self.HISTORICAL_SCENARIOS.items():
|
|
143
|
+
results[scenario_id] = self.run_stress_test(returns, weights, scenario)
|
|
144
|
+
|
|
145
|
+
return results
|
|
146
|
+
|
|
147
|
+
def run_custom_shock(
|
|
148
|
+
self,
|
|
149
|
+
returns: pd.DataFrame,
|
|
150
|
+
weights: Dict[str, float],
|
|
151
|
+
shocks: Dict[str, float],
|
|
152
|
+
) -> Dict[str, Any]:
|
|
153
|
+
"""Run a custom shock scenario."""
|
|
154
|
+
|
|
155
|
+
scenario = StressScenario(
|
|
156
|
+
name="Custom Shock",
|
|
157
|
+
description="User-defined scenario",
|
|
158
|
+
shocks=shocks,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
return self.run_stress_test(returns, weights, scenario)
|
|
162
|
+
|
|
163
|
+
def monte_carlo_stress(
|
|
164
|
+
self,
|
|
165
|
+
returns: pd.DataFrame,
|
|
166
|
+
weights: Dict[str, float],
|
|
167
|
+
n_simulations: int = 1000,
|
|
168
|
+
stress_multiplier: float = 2.0,
|
|
169
|
+
) -> Dict[str, Any]:
|
|
170
|
+
"""Monte Carlo stress test with fat tails."""
|
|
171
|
+
|
|
172
|
+
# Calculate portfolio returns
|
|
173
|
+
portfolio_returns = (returns * pd.Series(weights)).sum(axis=1)
|
|
174
|
+
|
|
175
|
+
# Parameters
|
|
176
|
+
mu = portfolio_returns.mean()
|
|
177
|
+
sigma = portfolio_returns.std()
|
|
178
|
+
|
|
179
|
+
# Generate stressed returns (using Student's t for fat tails)
|
|
180
|
+
stressed_returns = stats.t.rvs(
|
|
181
|
+
df=3, # Heavy tails
|
|
182
|
+
loc=mu,
|
|
183
|
+
scale=sigma * stress_multiplier,
|
|
184
|
+
size=(n_simulations, 252)
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Calculate outcomes
|
|
188
|
+
final_values = np.prod(1 + stressed_returns, axis=1)
|
|
189
|
+
max_drawdowns = []
|
|
190
|
+
|
|
191
|
+
for sim in stressed_returns:
|
|
192
|
+
cumulative = np.cumprod(1 + sim)
|
|
193
|
+
running_max = np.maximum.accumulate(cumulative)
|
|
194
|
+
drawdown = (cumulative - running_max) / running_max
|
|
195
|
+
max_drawdowns.append(drawdown.min())
|
|
196
|
+
|
|
197
|
+
return {
|
|
198
|
+
"median_return": np.median(final_values) - 1,
|
|
199
|
+
"worst_5_pct": np.percentile(final_values, 5) - 1,
|
|
200
|
+
"worst_1_pct": np.percentile(final_values, 1) - 1,
|
|
201
|
+
"prob_positive": (final_values > 1).mean(),
|
|
202
|
+
"prob_50pct_loss": (final_values < 0.5).mean(),
|
|
203
|
+
"median_max_drawdown": np.median(max_drawdowns),
|
|
204
|
+
"worst_max_drawdown": np.min(max_drawdowns),
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
# ============================================================================
|
|
209
|
+
# OVERFITTING DETECTOR
|
|
210
|
+
# ============================================================================
|
|
211
|
+
|
|
212
|
+
class OverfittingDetector:
|
|
213
|
+
"""
|
|
214
|
+
Detect signs of overfitting in strategies.
|
|
215
|
+
Uses multiple techniques including:
|
|
216
|
+
- Out-of-sample testing
|
|
217
|
+
- Walk-forward analysis
|
|
218
|
+
- Combinatorial purged cross-validation
|
|
219
|
+
- Deflated Sharpe Ratio
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
def check_overfitting(
|
|
223
|
+
self,
|
|
224
|
+
in_sample_sharpe: float,
|
|
225
|
+
out_sample_sharpe: float,
|
|
226
|
+
n_parameters: int,
|
|
227
|
+
n_trades: int,
|
|
228
|
+
strategy_trials: int = 1,
|
|
229
|
+
) -> RobustnessResult:
|
|
230
|
+
"""Comprehensive overfitting check."""
|
|
231
|
+
|
|
232
|
+
checks = []
|
|
233
|
+
score = 1.0
|
|
234
|
+
|
|
235
|
+
# 1. In-sample vs Out-of-sample degradation
|
|
236
|
+
if in_sample_sharpe > 0:
|
|
237
|
+
degradation = (in_sample_sharpe - out_sample_sharpe) / in_sample_sharpe
|
|
238
|
+
else:
|
|
239
|
+
degradation = 0
|
|
240
|
+
|
|
241
|
+
if degradation > 0.5:
|
|
242
|
+
checks.append(f"High performance degradation: {degradation:.0%}")
|
|
243
|
+
score -= 0.3
|
|
244
|
+
elif degradation > 0.3:
|
|
245
|
+
checks.append(f"Moderate performance degradation: {degradation:.0%}")
|
|
246
|
+
score -= 0.15
|
|
247
|
+
|
|
248
|
+
# 2. Parameter to trades ratio
|
|
249
|
+
if n_parameters > 0:
|
|
250
|
+
ratio = n_trades / n_parameters
|
|
251
|
+
if ratio < 10:
|
|
252
|
+
checks.append(f"Low trades per parameter: {ratio:.1f}")
|
|
253
|
+
score -= 0.25
|
|
254
|
+
elif ratio < 20:
|
|
255
|
+
checks.append(f"Marginal trades per parameter: {ratio:.1f}")
|
|
256
|
+
score -= 0.1
|
|
257
|
+
|
|
258
|
+
# 3. Deflated Sharpe Ratio (Bailey & López de Prado)
|
|
259
|
+
deflated_sharpe = self._deflated_sharpe_ratio(
|
|
260
|
+
in_sample_sharpe, n_trades, strategy_trials
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
if deflated_sharpe < 0:
|
|
264
|
+
checks.append(f"Negative deflated Sharpe: {deflated_sharpe:.2f}")
|
|
265
|
+
score -= 0.3
|
|
266
|
+
elif deflated_sharpe < in_sample_sharpe * 0.5:
|
|
267
|
+
checks.append(f"Low deflated Sharpe: {deflated_sharpe:.2f}")
|
|
268
|
+
score -= 0.15
|
|
269
|
+
|
|
270
|
+
# 4. Suspiciously high Sharpe
|
|
271
|
+
if in_sample_sharpe > 3:
|
|
272
|
+
checks.append(f"Unusually high Sharpe ratio: {in_sample_sharpe:.2f}")
|
|
273
|
+
score -= 0.2
|
|
274
|
+
|
|
275
|
+
passed = score >= 0.5
|
|
276
|
+
|
|
277
|
+
recommendations = []
|
|
278
|
+
if degradation > 0.3:
|
|
279
|
+
recommendations.append("Reduce model complexity")
|
|
280
|
+
recommendations.append("Use regularization")
|
|
281
|
+
if n_parameters > 0 and n_trades / n_parameters < 20:
|
|
282
|
+
recommendations.append("Reduce number of parameters")
|
|
283
|
+
recommendations.append("Collect more data")
|
|
284
|
+
if in_sample_sharpe > 3:
|
|
285
|
+
recommendations.append("Verify data quality")
|
|
286
|
+
recommendations.append("Check for look-ahead bias")
|
|
287
|
+
|
|
288
|
+
return RobustnessResult(
|
|
289
|
+
test_name="Overfitting Detection",
|
|
290
|
+
passed=passed,
|
|
291
|
+
score=max(0, score),
|
|
292
|
+
details={
|
|
293
|
+
"in_sample_sharpe": in_sample_sharpe,
|
|
294
|
+
"out_sample_sharpe": out_sample_sharpe,
|
|
295
|
+
"degradation": degradation,
|
|
296
|
+
"deflated_sharpe": deflated_sharpe,
|
|
297
|
+
"trades_per_parameter": n_trades / n_parameters if n_parameters > 0 else float('inf'),
|
|
298
|
+
"checks": checks,
|
|
299
|
+
},
|
|
300
|
+
recommendations=recommendations,
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
def _deflated_sharpe_ratio(
|
|
304
|
+
self,
|
|
305
|
+
sharpe: float,
|
|
306
|
+
n_observations: int,
|
|
307
|
+
n_trials: int,
|
|
308
|
+
) -> float:
|
|
309
|
+
"""
|
|
310
|
+
Calculate Deflated Sharpe Ratio.
|
|
311
|
+
Adjusts for multiple testing bias.
|
|
312
|
+
"""
|
|
313
|
+
|
|
314
|
+
if n_trials <= 1 or n_observations <= 1:
|
|
315
|
+
return sharpe
|
|
316
|
+
|
|
317
|
+
# Expected maximum Sharpe from random strategies
|
|
318
|
+
euler_gamma = 0.5772156649
|
|
319
|
+
expected_max = (1 - euler_gamma) * stats.norm.ppf(1 - 1/n_trials) + \
|
|
320
|
+
euler_gamma * stats.norm.ppf(1 - 1/(n_trials * np.e))
|
|
321
|
+
|
|
322
|
+
# Adjusted for observations
|
|
323
|
+
expected_max *= np.sqrt(252 / n_observations)
|
|
324
|
+
|
|
325
|
+
# Deflated Sharpe
|
|
326
|
+
deflated = sharpe - expected_max
|
|
327
|
+
|
|
328
|
+
return deflated
|
|
329
|
+
|
|
330
|
+
def walk_forward_test(
|
|
331
|
+
self,
|
|
332
|
+
returns: pd.Series,
|
|
333
|
+
signal_func, # Function that generates signals
|
|
334
|
+
train_period: int = 252,
|
|
335
|
+
test_period: int = 63,
|
|
336
|
+
) -> Dict[str, Any]:
|
|
337
|
+
"""Run walk-forward analysis."""
|
|
338
|
+
|
|
339
|
+
results = []
|
|
340
|
+
|
|
341
|
+
i = train_period
|
|
342
|
+
while i + test_period <= len(returns):
|
|
343
|
+
# Train period
|
|
344
|
+
train_returns = returns.iloc[i-train_period:i]
|
|
345
|
+
|
|
346
|
+
# Generate signal on train data
|
|
347
|
+
signal = signal_func(train_returns)
|
|
348
|
+
|
|
349
|
+
# Test period
|
|
350
|
+
test_returns = returns.iloc[i:i+test_period]
|
|
351
|
+
|
|
352
|
+
# Calculate test performance
|
|
353
|
+
strategy_returns = test_returns * signal
|
|
354
|
+
sharpe = strategy_returns.mean() / strategy_returns.std() * np.sqrt(252) if strategy_returns.std() > 0 else 0
|
|
355
|
+
|
|
356
|
+
results.append({
|
|
357
|
+
"period_start": returns.index[i],
|
|
358
|
+
"period_end": returns.index[min(i+test_period-1, len(returns)-1)],
|
|
359
|
+
"sharpe": sharpe,
|
|
360
|
+
"return": (1 + strategy_returns).prod() - 1,
|
|
361
|
+
})
|
|
362
|
+
|
|
363
|
+
i += test_period
|
|
364
|
+
|
|
365
|
+
# Analyze consistency
|
|
366
|
+
sharpes = [r["sharpe"] for r in results]
|
|
367
|
+
|
|
368
|
+
return {
|
|
369
|
+
"periods": results,
|
|
370
|
+
"mean_sharpe": np.mean(sharpes),
|
|
371
|
+
"std_sharpe": np.std(sharpes),
|
|
372
|
+
"pct_positive": sum(1 for s in sharpes if s > 0) / len(sharpes),
|
|
373
|
+
"worst_period": min(sharpes),
|
|
374
|
+
"best_period": max(sharpes),
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
# ============================================================================
|
|
379
|
+
# EXPLAINABILITY ENGINE
|
|
380
|
+
# ============================================================================
|
|
381
|
+
|
|
382
|
+
class ExplainabilityEngine:
|
|
383
|
+
"""
|
|
384
|
+
Make strategy and model decisions explainable.
|
|
385
|
+
"""
|
|
386
|
+
|
|
387
|
+
def explain_trade(
|
|
388
|
+
self,
|
|
389
|
+
signal: float,
|
|
390
|
+
features: Dict[str, float],
|
|
391
|
+
thresholds: Dict[str, float],
|
|
392
|
+
) -> Dict[str, Any]:
|
|
393
|
+
"""Explain why a trade signal was generated."""
|
|
394
|
+
|
|
395
|
+
reasons = []
|
|
396
|
+
|
|
397
|
+
for feature, value in features.items():
|
|
398
|
+
threshold = thresholds.get(feature)
|
|
399
|
+
if threshold is None:
|
|
400
|
+
continue
|
|
401
|
+
|
|
402
|
+
if value > threshold:
|
|
403
|
+
reasons.append({
|
|
404
|
+
"feature": feature,
|
|
405
|
+
"value": value,
|
|
406
|
+
"threshold": threshold,
|
|
407
|
+
"direction": "above",
|
|
408
|
+
"contribution": "bullish",
|
|
409
|
+
})
|
|
410
|
+
elif value < -threshold:
|
|
411
|
+
reasons.append({
|
|
412
|
+
"feature": feature,
|
|
413
|
+
"value": value,
|
|
414
|
+
"threshold": -threshold,
|
|
415
|
+
"direction": "below",
|
|
416
|
+
"contribution": "bearish",
|
|
417
|
+
})
|
|
418
|
+
|
|
419
|
+
# Determine primary driver
|
|
420
|
+
if reasons:
|
|
421
|
+
primary = max(reasons, key=lambda x: abs(x["value"]))
|
|
422
|
+
else:
|
|
423
|
+
primary = None
|
|
424
|
+
|
|
425
|
+
return {
|
|
426
|
+
"signal": signal,
|
|
427
|
+
"direction": "long" if signal > 0 else "short" if signal < 0 else "neutral",
|
|
428
|
+
"reasons": reasons,
|
|
429
|
+
"primary_driver": primary,
|
|
430
|
+
"confidence": min(abs(signal), 1.0),
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
def explain_performance(
|
|
434
|
+
self,
|
|
435
|
+
returns: pd.Series,
|
|
436
|
+
benchmark_returns: pd.Series = None,
|
|
437
|
+
) -> Dict[str, Any]:
|
|
438
|
+
"""Explain performance attribution."""
|
|
439
|
+
|
|
440
|
+
# Calculate various return components
|
|
441
|
+
total_return = (1 + returns).prod() - 1
|
|
442
|
+
|
|
443
|
+
# Contribution from positive vs negative days
|
|
444
|
+
positive_contrib = returns[returns > 0].sum()
|
|
445
|
+
negative_contrib = returns[returns < 0].sum()
|
|
446
|
+
|
|
447
|
+
# Best and worst periods
|
|
448
|
+
monthly = returns.resample('M').apply(lambda x: (1 + x).prod() - 1)
|
|
449
|
+
best_month = monthly.idxmax()
|
|
450
|
+
worst_month = monthly.idxmin()
|
|
451
|
+
|
|
452
|
+
# Win rate analysis
|
|
453
|
+
win_rate = (returns > 0).mean()
|
|
454
|
+
avg_win = returns[returns > 0].mean() if win_rate > 0 else 0
|
|
455
|
+
avg_loss = returns[returns < 0].mean() if win_rate < 1 else 0
|
|
456
|
+
|
|
457
|
+
explanation = {
|
|
458
|
+
"total_return": total_return,
|
|
459
|
+
"positive_contribution": positive_contrib,
|
|
460
|
+
"negative_contribution": negative_contrib,
|
|
461
|
+
"best_month": {"date": str(best_month), "return": monthly[best_month]},
|
|
462
|
+
"worst_month": {"date": str(worst_month), "return": monthly[worst_month]},
|
|
463
|
+
"win_rate": win_rate,
|
|
464
|
+
"average_win": avg_win,
|
|
465
|
+
"average_loss": avg_loss,
|
|
466
|
+
"profit_factor": abs(positive_contrib / negative_contrib) if negative_contrib != 0 else float('inf'),
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
# Alpha decomposition if benchmark provided
|
|
470
|
+
if benchmark_returns is not None:
|
|
471
|
+
aligned = pd.concat([returns, benchmark_returns], axis=1).dropna()
|
|
472
|
+
aligned.columns = ["strategy", "benchmark"]
|
|
473
|
+
|
|
474
|
+
# Beta and alpha
|
|
475
|
+
cov = np.cov(aligned["strategy"], aligned["benchmark"])
|
|
476
|
+
beta = cov[0, 1] / cov[1, 1] if cov[1, 1] != 0 else 1
|
|
477
|
+
|
|
478
|
+
benchmark_contrib = beta * (1 + aligned["benchmark"]).prod() - 1
|
|
479
|
+
alpha_contrib = total_return - benchmark_contrib
|
|
480
|
+
|
|
481
|
+
explanation["beta"] = beta
|
|
482
|
+
explanation["benchmark_contribution"] = benchmark_contrib
|
|
483
|
+
explanation["alpha_contribution"] = alpha_contrib
|
|
484
|
+
|
|
485
|
+
return explanation
|
|
486
|
+
|
|
487
|
+
def counterfactual_analysis(
|
|
488
|
+
self,
|
|
489
|
+
returns: pd.Series,
|
|
490
|
+
signal: pd.Series,
|
|
491
|
+
alternative_signal: pd.Series,
|
|
492
|
+
) -> Dict[str, Any]:
|
|
493
|
+
"""What-if analysis with alternative signals."""
|
|
494
|
+
|
|
495
|
+
# Actual performance
|
|
496
|
+
actual_returns = returns * signal
|
|
497
|
+
actual_total = (1 + actual_returns).prod() - 1
|
|
498
|
+
actual_sharpe = actual_returns.mean() / actual_returns.std() * np.sqrt(252) if actual_returns.std() > 0 else 0
|
|
499
|
+
|
|
500
|
+
# Alternative performance
|
|
501
|
+
alt_returns = returns * alternative_signal
|
|
502
|
+
alt_total = (1 + alt_returns).prod() - 1
|
|
503
|
+
alt_sharpe = alt_returns.mean() / alt_returns.std() * np.sqrt(252) if alt_returns.std() > 0 else 0
|
|
504
|
+
|
|
505
|
+
# Difference analysis
|
|
506
|
+
diff_returns = alt_returns - actual_returns
|
|
507
|
+
|
|
508
|
+
return {
|
|
509
|
+
"actual": {
|
|
510
|
+
"total_return": actual_total,
|
|
511
|
+
"sharpe": actual_sharpe,
|
|
512
|
+
},
|
|
513
|
+
"alternative": {
|
|
514
|
+
"total_return": alt_total,
|
|
515
|
+
"sharpe": alt_sharpe,
|
|
516
|
+
},
|
|
517
|
+
"difference": {
|
|
518
|
+
"return_diff": alt_total - actual_total,
|
|
519
|
+
"sharpe_diff": alt_sharpe - actual_sharpe,
|
|
520
|
+
"better_alternative": alt_sharpe > actual_sharpe,
|
|
521
|
+
},
|
|
522
|
+
"attribution": {
|
|
523
|
+
"positive_changes": (diff_returns > 0).sum(),
|
|
524
|
+
"negative_changes": (diff_returns < 0).sum(),
|
|
525
|
+
"total_impact": diff_returns.sum(),
|
|
526
|
+
},
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
# ============================================================================
|
|
531
|
+
# SAMPLE SIZE VALIDATOR
|
|
532
|
+
# ============================================================================
|
|
533
|
+
|
|
534
|
+
class SampleSizeValidator:
|
|
535
|
+
"""Validate statistical significance of results."""
|
|
536
|
+
|
|
537
|
+
@staticmethod
|
|
538
|
+
def minimum_trades(
|
|
539
|
+
target_sharpe: float = 1.0,
|
|
540
|
+
significance: float = 0.05,
|
|
541
|
+
power: float = 0.80,
|
|
542
|
+
) -> int:
|
|
543
|
+
"""Calculate minimum trades needed for statistical significance."""
|
|
544
|
+
|
|
545
|
+
# Using standard power analysis for Sharpe ratio
|
|
546
|
+
# n = ((z_alpha + z_beta) / sharpe)^2
|
|
547
|
+
|
|
548
|
+
z_alpha = stats.norm.ppf(1 - significance / 2)
|
|
549
|
+
z_beta = stats.norm.ppf(power)
|
|
550
|
+
|
|
551
|
+
# Adjusted for daily returns (Sharpe is annualized)
|
|
552
|
+
daily_sharpe = target_sharpe / np.sqrt(252)
|
|
553
|
+
|
|
554
|
+
n = ((z_alpha + z_beta) / daily_sharpe) ** 2
|
|
555
|
+
|
|
556
|
+
return int(np.ceil(n))
|
|
557
|
+
|
|
558
|
+
@staticmethod
|
|
559
|
+
def sharpe_confidence_interval(
|
|
560
|
+
sharpe: float,
|
|
561
|
+
n_observations: int,
|
|
562
|
+
confidence: float = 0.95,
|
|
563
|
+
) -> Tuple[float, float]:
|
|
564
|
+
"""Calculate confidence interval for Sharpe ratio."""
|
|
565
|
+
|
|
566
|
+
# Standard error of Sharpe ratio
|
|
567
|
+
se = np.sqrt((1 + 0.5 * sharpe**2) / n_observations)
|
|
568
|
+
|
|
569
|
+
z = stats.norm.ppf(1 - (1 - confidence) / 2)
|
|
570
|
+
|
|
571
|
+
lower = sharpe - z * se
|
|
572
|
+
upper = sharpe + z * se
|
|
573
|
+
|
|
574
|
+
return lower, upper
|
|
575
|
+
|
|
576
|
+
@staticmethod
|
|
577
|
+
def is_significant(
|
|
578
|
+
sharpe: float,
|
|
579
|
+
n_observations: int,
|
|
580
|
+
significance: float = 0.05,
|
|
581
|
+
) -> Dict[str, Any]:
|
|
582
|
+
"""Test if Sharpe ratio is statistically significant."""
|
|
583
|
+
|
|
584
|
+
# Standard error
|
|
585
|
+
se = np.sqrt((1 + 0.5 * sharpe**2) / n_observations)
|
|
586
|
+
|
|
587
|
+
# t-statistic (testing H0: Sharpe = 0)
|
|
588
|
+
t_stat = sharpe / se
|
|
589
|
+
|
|
590
|
+
# p-value (two-tailed)
|
|
591
|
+
p_value = 2 * (1 - stats.t.cdf(abs(t_stat), df=n_observations - 1))
|
|
592
|
+
|
|
593
|
+
return {
|
|
594
|
+
"sharpe": sharpe,
|
|
595
|
+
"standard_error": se,
|
|
596
|
+
"t_statistic": t_stat,
|
|
597
|
+
"p_value": p_value,
|
|
598
|
+
"is_significant": p_value < significance,
|
|
599
|
+
"significance_level": significance,
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
# ============================================================================
|
|
604
|
+
# BIAS DETECTOR
|
|
605
|
+
# ============================================================================
|
|
606
|
+
|
|
607
|
+
class BiasDetector:
|
|
608
|
+
"""Detect common biases in backtests."""
|
|
609
|
+
|
|
610
|
+
@staticmethod
|
|
611
|
+
def check_lookahead_bias(
|
|
612
|
+
signal_dates: pd.DatetimeIndex,
|
|
613
|
+
data_dates: pd.DatetimeIndex,
|
|
614
|
+
) -> Dict[str, Any]:
|
|
615
|
+
"""Check for look-ahead bias in signals."""
|
|
616
|
+
|
|
617
|
+
violations = []
|
|
618
|
+
|
|
619
|
+
for signal_date in signal_dates:
|
|
620
|
+
# Check if signal uses future data
|
|
621
|
+
future_data = data_dates[data_dates > signal_date]
|
|
622
|
+
if len(future_data) > 0:
|
|
623
|
+
# This is expected for most data
|
|
624
|
+
pass
|
|
625
|
+
|
|
626
|
+
# Look for signals that precede data
|
|
627
|
+
for i, signal_date in enumerate(signal_dates):
|
|
628
|
+
if i > 0:
|
|
629
|
+
prev_signal = signal_dates[i-1]
|
|
630
|
+
data_between = data_dates[(data_dates > prev_signal) & (data_dates <= signal_date)]
|
|
631
|
+
if len(data_between) == 0:
|
|
632
|
+
violations.append({
|
|
633
|
+
"signal_date": signal_date,
|
|
634
|
+
"issue": "Signal generated without new data",
|
|
635
|
+
})
|
|
636
|
+
|
|
637
|
+
return {
|
|
638
|
+
"violations": violations,
|
|
639
|
+
"violation_count": len(violations),
|
|
640
|
+
"passed": len(violations) == 0,
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
@staticmethod
|
|
644
|
+
def check_survivorship_bias(
|
|
645
|
+
universe_dates: Dict[str, Tuple[str, str]], # symbol -> (start, end)
|
|
646
|
+
backtest_start: str,
|
|
647
|
+
) -> Dict[str, Any]:
|
|
648
|
+
"""Check for survivorship bias in universe."""
|
|
649
|
+
|
|
650
|
+
# Count symbols that existed at backtest start
|
|
651
|
+
survivors = 0
|
|
652
|
+
non_survivors = 0
|
|
653
|
+
|
|
654
|
+
backtest_start_dt = pd.Timestamp(backtest_start)
|
|
655
|
+
|
|
656
|
+
for symbol, (start, end) in universe_dates.items():
|
|
657
|
+
start_dt = pd.Timestamp(start)
|
|
658
|
+
end_dt = pd.Timestamp(end) if end else pd.Timestamp.now()
|
|
659
|
+
|
|
660
|
+
if start_dt <= backtest_start_dt:
|
|
661
|
+
if end_dt >= pd.Timestamp.now() - pd.Timedelta(days=30):
|
|
662
|
+
survivors += 1
|
|
663
|
+
else:
|
|
664
|
+
non_survivors += 1
|
|
665
|
+
|
|
666
|
+
total = survivors + non_survivors
|
|
667
|
+
survivor_pct = survivors / total if total > 0 else 0
|
|
668
|
+
|
|
669
|
+
return {
|
|
670
|
+
"survivors": survivors,
|
|
671
|
+
"non_survivors": non_survivors,
|
|
672
|
+
"survivor_percentage": survivor_pct,
|
|
673
|
+
"potential_bias": survivor_pct > 0.9,
|
|
674
|
+
"recommendation": "Include delisted securities" if survivor_pct > 0.9 else "Universe appears balanced",
|
|
675
|
+
}
|