sigma-terminal 2.0.1__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sigma/robustness.py ADDED
@@ -0,0 +1,675 @@
1
+ """Robustness engine - Stress tests, overfitting detection, explainability."""
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+ from enum import Enum
7
+ from pydantic import BaseModel, Field
8
+
9
+ from scipy import stats
10
+
11
+
12
+ # ============================================================================
13
+ # DATA MODELS
14
+ # ============================================================================
15
+
16
+ class StressScenario(BaseModel):
17
+ """Stress test scenario definition."""
18
+
19
+ name: str
20
+ description: str
21
+ shocks: Dict[str, float] # Asset/factor -> shock magnitude
22
+ correlation_adjustment: Optional[float] = None # Increase correlations
23
+ duration_days: Optional[int] = None
24
+
25
+
26
+ class RobustnessResult(BaseModel):
27
+ """Robustness test result."""
28
+
29
+ test_name: str
30
+ passed: bool
31
+ score: float # 0-1
32
+ details: Dict[str, Any]
33
+ recommendations: List[str]
34
+
35
+
36
+ # ============================================================================
37
+ # STRESS TESTER
38
+ # ============================================================================
39
+
40
+ class StressTester:
41
+ """
42
+ Run stress tests on portfolios and strategies.
43
+ Tests include historical scenarios, hypothetical scenarios, and factor shocks.
44
+ """
45
+
46
+ # Historical stress scenarios
47
+ HISTORICAL_SCENARIOS = {
48
+ "2008_financial_crisis": StressScenario(
49
+ name="2008 Financial Crisis",
50
+ description="Global financial crisis peak",
51
+ shocks={"equity": -0.50, "credit": -0.30, "rates": -0.20, "volatility": 2.5},
52
+ correlation_adjustment=0.3,
53
+ duration_days=252,
54
+ ),
55
+ "2020_covid_crash": StressScenario(
56
+ name="2020 COVID Crash",
57
+ description="COVID-19 market crash",
58
+ shocks={"equity": -0.34, "credit": -0.15, "oil": -0.70, "volatility": 3.0},
59
+ correlation_adjustment=0.4,
60
+ duration_days=30,
61
+ ),
62
+ "2022_rate_shock": StressScenario(
63
+ name="2022 Rate Shock",
64
+ description="Fed rate hiking cycle",
65
+ shocks={"equity": -0.25, "bonds": -0.15, "tech": -0.35, "rates": 0.30},
66
+ duration_days=252,
67
+ ),
68
+ "flash_crash": StressScenario(
69
+ name="Flash Crash",
70
+ description="Sudden market dislocation",
71
+ shocks={"equity": -0.10, "volatility": 2.0},
72
+ duration_days=1,
73
+ ),
74
+ "stagflation": StressScenario(
75
+ name="Stagflation",
76
+ description="High inflation + low growth",
77
+ shocks={"equity": -0.20, "bonds": -0.10, "commodities": 0.30, "rates": 0.15},
78
+ duration_days=504,
79
+ ),
80
+ }
81
+
82
+ def run_stress_test(
83
+ self,
84
+ returns: pd.DataFrame,
85
+ weights: Dict[str, float],
86
+ scenario: StressScenario,
87
+ ) -> Dict[str, Any]:
88
+ """Run a stress test on a portfolio."""
89
+
90
+ # Map assets to factors (simplified)
91
+ factor_mapping = {
92
+ "equity": ["SPY", "QQQ", "IWM", "VTI"],
93
+ "bonds": ["TLT", "BND", "AGG", "IEF"],
94
+ "tech": ["QQQ", "XLK"],
95
+ "credit": ["HYG", "LQD"],
96
+ "commodities": ["GLD", "USO", "DBC"],
97
+ }
98
+
99
+ # Calculate portfolio impact
100
+ portfolio_shock = 0
101
+ asset_impacts = {}
102
+
103
+ for asset, weight in weights.items():
104
+ # Find relevant factor shock
105
+ shock = 0
106
+ for factor, factor_assets in factor_mapping.items():
107
+ if any(fa.lower() in asset.lower() for fa in factor_assets):
108
+ shock = scenario.shocks.get(factor, 0)
109
+ break
110
+
111
+ # Default to equity if no match
112
+ if shock == 0 and "equity" in scenario.shocks:
113
+ shock = scenario.shocks["equity"] * 0.8 # Assume 80% correlation
114
+
115
+ impact = weight * shock
116
+ portfolio_shock += impact
117
+ asset_impacts[asset] = {"shock": shock, "impact": impact}
118
+
119
+ # Adjust for correlation increase during stress
120
+ if scenario.correlation_adjustment:
121
+ # Higher correlation = worse diversification = more severe impact
122
+ portfolio_shock *= (1 + scenario.correlation_adjustment)
123
+
124
+ return {
125
+ "scenario": scenario.name,
126
+ "description": scenario.description,
127
+ "portfolio_impact": portfolio_shock,
128
+ "asset_impacts": asset_impacts,
129
+ "duration_days": scenario.duration_days,
130
+ "survival": portfolio_shock > -0.50, # Survive if < 50% loss
131
+ }
132
+
133
+ def run_all_scenarios(
134
+ self,
135
+ returns: pd.DataFrame,
136
+ weights: Dict[str, float],
137
+ ) -> Dict[str, Dict[str, Any]]:
138
+ """Run all historical stress scenarios."""
139
+
140
+ results = {}
141
+
142
+ for scenario_id, scenario in self.HISTORICAL_SCENARIOS.items():
143
+ results[scenario_id] = self.run_stress_test(returns, weights, scenario)
144
+
145
+ return results
146
+
147
+ def run_custom_shock(
148
+ self,
149
+ returns: pd.DataFrame,
150
+ weights: Dict[str, float],
151
+ shocks: Dict[str, float],
152
+ ) -> Dict[str, Any]:
153
+ """Run a custom shock scenario."""
154
+
155
+ scenario = StressScenario(
156
+ name="Custom Shock",
157
+ description="User-defined scenario",
158
+ shocks=shocks,
159
+ )
160
+
161
+ return self.run_stress_test(returns, weights, scenario)
162
+
163
+ def monte_carlo_stress(
164
+ self,
165
+ returns: pd.DataFrame,
166
+ weights: Dict[str, float],
167
+ n_simulations: int = 1000,
168
+ stress_multiplier: float = 2.0,
169
+ ) -> Dict[str, Any]:
170
+ """Monte Carlo stress test with fat tails."""
171
+
172
+ # Calculate portfolio returns
173
+ portfolio_returns = (returns * pd.Series(weights)).sum(axis=1)
174
+
175
+ # Parameters
176
+ mu = portfolio_returns.mean()
177
+ sigma = portfolio_returns.std()
178
+
179
+ # Generate stressed returns (using Student's t for fat tails)
180
+ stressed_returns = stats.t.rvs(
181
+ df=3, # Heavy tails
182
+ loc=mu,
183
+ scale=sigma * stress_multiplier,
184
+ size=(n_simulations, 252)
185
+ )
186
+
187
+ # Calculate outcomes
188
+ final_values = np.prod(1 + stressed_returns, axis=1)
189
+ max_drawdowns = []
190
+
191
+ for sim in stressed_returns:
192
+ cumulative = np.cumprod(1 + sim)
193
+ running_max = np.maximum.accumulate(cumulative)
194
+ drawdown = (cumulative - running_max) / running_max
195
+ max_drawdowns.append(drawdown.min())
196
+
197
+ return {
198
+ "median_return": np.median(final_values) - 1,
199
+ "worst_5_pct": np.percentile(final_values, 5) - 1,
200
+ "worst_1_pct": np.percentile(final_values, 1) - 1,
201
+ "prob_positive": (final_values > 1).mean(),
202
+ "prob_50pct_loss": (final_values < 0.5).mean(),
203
+ "median_max_drawdown": np.median(max_drawdowns),
204
+ "worst_max_drawdown": np.min(max_drawdowns),
205
+ }
206
+
207
+
208
+ # ============================================================================
209
+ # OVERFITTING DETECTOR
210
+ # ============================================================================
211
+
212
+ class OverfittingDetector:
213
+ """
214
+ Detect signs of overfitting in strategies.
215
+ Uses multiple techniques including:
216
+ - Out-of-sample testing
217
+ - Walk-forward analysis
218
+ - Combinatorial purged cross-validation
219
+ - Deflated Sharpe Ratio
220
+ """
221
+
222
+ def check_overfitting(
223
+ self,
224
+ in_sample_sharpe: float,
225
+ out_sample_sharpe: float,
226
+ n_parameters: int,
227
+ n_trades: int,
228
+ strategy_trials: int = 1,
229
+ ) -> RobustnessResult:
230
+ """Comprehensive overfitting check."""
231
+
232
+ checks = []
233
+ score = 1.0
234
+
235
+ # 1. In-sample vs Out-of-sample degradation
236
+ if in_sample_sharpe > 0:
237
+ degradation = (in_sample_sharpe - out_sample_sharpe) / in_sample_sharpe
238
+ else:
239
+ degradation = 0
240
+
241
+ if degradation > 0.5:
242
+ checks.append(f"High performance degradation: {degradation:.0%}")
243
+ score -= 0.3
244
+ elif degradation > 0.3:
245
+ checks.append(f"Moderate performance degradation: {degradation:.0%}")
246
+ score -= 0.15
247
+
248
+ # 2. Parameter to trades ratio
249
+ if n_parameters > 0:
250
+ ratio = n_trades / n_parameters
251
+ if ratio < 10:
252
+ checks.append(f"Low trades per parameter: {ratio:.1f}")
253
+ score -= 0.25
254
+ elif ratio < 20:
255
+ checks.append(f"Marginal trades per parameter: {ratio:.1f}")
256
+ score -= 0.1
257
+
258
+ # 3. Deflated Sharpe Ratio (Bailey & López de Prado)
259
+ deflated_sharpe = self._deflated_sharpe_ratio(
260
+ in_sample_sharpe, n_trades, strategy_trials
261
+ )
262
+
263
+ if deflated_sharpe < 0:
264
+ checks.append(f"Negative deflated Sharpe: {deflated_sharpe:.2f}")
265
+ score -= 0.3
266
+ elif deflated_sharpe < in_sample_sharpe * 0.5:
267
+ checks.append(f"Low deflated Sharpe: {deflated_sharpe:.2f}")
268
+ score -= 0.15
269
+
270
+ # 4. Suspiciously high Sharpe
271
+ if in_sample_sharpe > 3:
272
+ checks.append(f"Unusually high Sharpe ratio: {in_sample_sharpe:.2f}")
273
+ score -= 0.2
274
+
275
+ passed = score >= 0.5
276
+
277
+ recommendations = []
278
+ if degradation > 0.3:
279
+ recommendations.append("Reduce model complexity")
280
+ recommendations.append("Use regularization")
281
+ if n_parameters > 0 and n_trades / n_parameters < 20:
282
+ recommendations.append("Reduce number of parameters")
283
+ recommendations.append("Collect more data")
284
+ if in_sample_sharpe > 3:
285
+ recommendations.append("Verify data quality")
286
+ recommendations.append("Check for look-ahead bias")
287
+
288
+ return RobustnessResult(
289
+ test_name="Overfitting Detection",
290
+ passed=passed,
291
+ score=max(0, score),
292
+ details={
293
+ "in_sample_sharpe": in_sample_sharpe,
294
+ "out_sample_sharpe": out_sample_sharpe,
295
+ "degradation": degradation,
296
+ "deflated_sharpe": deflated_sharpe,
297
+ "trades_per_parameter": n_trades / n_parameters if n_parameters > 0 else float('inf'),
298
+ "checks": checks,
299
+ },
300
+ recommendations=recommendations,
301
+ )
302
+
303
+ def _deflated_sharpe_ratio(
304
+ self,
305
+ sharpe: float,
306
+ n_observations: int,
307
+ n_trials: int,
308
+ ) -> float:
309
+ """
310
+ Calculate Deflated Sharpe Ratio.
311
+ Adjusts for multiple testing bias.
312
+ """
313
+
314
+ if n_trials <= 1 or n_observations <= 1:
315
+ return sharpe
316
+
317
+ # Expected maximum Sharpe from random strategies
318
+ euler_gamma = 0.5772156649
319
+ expected_max = (1 - euler_gamma) * stats.norm.ppf(1 - 1/n_trials) + \
320
+ euler_gamma * stats.norm.ppf(1 - 1/(n_trials * np.e))
321
+
322
+ # Adjusted for observations
323
+ expected_max *= np.sqrt(252 / n_observations)
324
+
325
+ # Deflated Sharpe
326
+ deflated = sharpe - expected_max
327
+
328
+ return deflated
329
+
330
+ def walk_forward_test(
331
+ self,
332
+ returns: pd.Series,
333
+ signal_func, # Function that generates signals
334
+ train_period: int = 252,
335
+ test_period: int = 63,
336
+ ) -> Dict[str, Any]:
337
+ """Run walk-forward analysis."""
338
+
339
+ results = []
340
+
341
+ i = train_period
342
+ while i + test_period <= len(returns):
343
+ # Train period
344
+ train_returns = returns.iloc[i-train_period:i]
345
+
346
+ # Generate signal on train data
347
+ signal = signal_func(train_returns)
348
+
349
+ # Test period
350
+ test_returns = returns.iloc[i:i+test_period]
351
+
352
+ # Calculate test performance
353
+ strategy_returns = test_returns * signal
354
+ sharpe = strategy_returns.mean() / strategy_returns.std() * np.sqrt(252) if strategy_returns.std() > 0 else 0
355
+
356
+ results.append({
357
+ "period_start": returns.index[i],
358
+ "period_end": returns.index[min(i+test_period-1, len(returns)-1)],
359
+ "sharpe": sharpe,
360
+ "return": (1 + strategy_returns).prod() - 1,
361
+ })
362
+
363
+ i += test_period
364
+
365
+ # Analyze consistency
366
+ sharpes = [r["sharpe"] for r in results]
367
+
368
+ return {
369
+ "periods": results,
370
+ "mean_sharpe": np.mean(sharpes),
371
+ "std_sharpe": np.std(sharpes),
372
+ "pct_positive": sum(1 for s in sharpes if s > 0) / len(sharpes),
373
+ "worst_period": min(sharpes),
374
+ "best_period": max(sharpes),
375
+ }
376
+
377
+
378
+ # ============================================================================
379
+ # EXPLAINABILITY ENGINE
380
+ # ============================================================================
381
+
382
+ class ExplainabilityEngine:
383
+ """
384
+ Make strategy and model decisions explainable.
385
+ """
386
+
387
+ def explain_trade(
388
+ self,
389
+ signal: float,
390
+ features: Dict[str, float],
391
+ thresholds: Dict[str, float],
392
+ ) -> Dict[str, Any]:
393
+ """Explain why a trade signal was generated."""
394
+
395
+ reasons = []
396
+
397
+ for feature, value in features.items():
398
+ threshold = thresholds.get(feature)
399
+ if threshold is None:
400
+ continue
401
+
402
+ if value > threshold:
403
+ reasons.append({
404
+ "feature": feature,
405
+ "value": value,
406
+ "threshold": threshold,
407
+ "direction": "above",
408
+ "contribution": "bullish",
409
+ })
410
+ elif value < -threshold:
411
+ reasons.append({
412
+ "feature": feature,
413
+ "value": value,
414
+ "threshold": -threshold,
415
+ "direction": "below",
416
+ "contribution": "bearish",
417
+ })
418
+
419
+ # Determine primary driver
420
+ if reasons:
421
+ primary = max(reasons, key=lambda x: abs(x["value"]))
422
+ else:
423
+ primary = None
424
+
425
+ return {
426
+ "signal": signal,
427
+ "direction": "long" if signal > 0 else "short" if signal < 0 else "neutral",
428
+ "reasons": reasons,
429
+ "primary_driver": primary,
430
+ "confidence": min(abs(signal), 1.0),
431
+ }
432
+
433
+ def explain_performance(
434
+ self,
435
+ returns: pd.Series,
436
+ benchmark_returns: pd.Series = None,
437
+ ) -> Dict[str, Any]:
438
+ """Explain performance attribution."""
439
+
440
+ # Calculate various return components
441
+ total_return = (1 + returns).prod() - 1
442
+
443
+ # Contribution from positive vs negative days
444
+ positive_contrib = returns[returns > 0].sum()
445
+ negative_contrib = returns[returns < 0].sum()
446
+
447
+ # Best and worst periods
448
+ monthly = returns.resample('M').apply(lambda x: (1 + x).prod() - 1)
449
+ best_month = monthly.idxmax()
450
+ worst_month = monthly.idxmin()
451
+
452
+ # Win rate analysis
453
+ win_rate = (returns > 0).mean()
454
+ avg_win = returns[returns > 0].mean() if win_rate > 0 else 0
455
+ avg_loss = returns[returns < 0].mean() if win_rate < 1 else 0
456
+
457
+ explanation = {
458
+ "total_return": total_return,
459
+ "positive_contribution": positive_contrib,
460
+ "negative_contribution": negative_contrib,
461
+ "best_month": {"date": str(best_month), "return": monthly[best_month]},
462
+ "worst_month": {"date": str(worst_month), "return": monthly[worst_month]},
463
+ "win_rate": win_rate,
464
+ "average_win": avg_win,
465
+ "average_loss": avg_loss,
466
+ "profit_factor": abs(positive_contrib / negative_contrib) if negative_contrib != 0 else float('inf'),
467
+ }
468
+
469
+ # Alpha decomposition if benchmark provided
470
+ if benchmark_returns is not None:
471
+ aligned = pd.concat([returns, benchmark_returns], axis=1).dropna()
472
+ aligned.columns = ["strategy", "benchmark"]
473
+
474
+ # Beta and alpha
475
+ cov = np.cov(aligned["strategy"], aligned["benchmark"])
476
+ beta = cov[0, 1] / cov[1, 1] if cov[1, 1] != 0 else 1
477
+
478
+ benchmark_contrib = beta * (1 + aligned["benchmark"]).prod() - 1
479
+ alpha_contrib = total_return - benchmark_contrib
480
+
481
+ explanation["beta"] = beta
482
+ explanation["benchmark_contribution"] = benchmark_contrib
483
+ explanation["alpha_contribution"] = alpha_contrib
484
+
485
+ return explanation
486
+
487
+ def counterfactual_analysis(
488
+ self,
489
+ returns: pd.Series,
490
+ signal: pd.Series,
491
+ alternative_signal: pd.Series,
492
+ ) -> Dict[str, Any]:
493
+ """What-if analysis with alternative signals."""
494
+
495
+ # Actual performance
496
+ actual_returns = returns * signal
497
+ actual_total = (1 + actual_returns).prod() - 1
498
+ actual_sharpe = actual_returns.mean() / actual_returns.std() * np.sqrt(252) if actual_returns.std() > 0 else 0
499
+
500
+ # Alternative performance
501
+ alt_returns = returns * alternative_signal
502
+ alt_total = (1 + alt_returns).prod() - 1
503
+ alt_sharpe = alt_returns.mean() / alt_returns.std() * np.sqrt(252) if alt_returns.std() > 0 else 0
504
+
505
+ # Difference analysis
506
+ diff_returns = alt_returns - actual_returns
507
+
508
+ return {
509
+ "actual": {
510
+ "total_return": actual_total,
511
+ "sharpe": actual_sharpe,
512
+ },
513
+ "alternative": {
514
+ "total_return": alt_total,
515
+ "sharpe": alt_sharpe,
516
+ },
517
+ "difference": {
518
+ "return_diff": alt_total - actual_total,
519
+ "sharpe_diff": alt_sharpe - actual_sharpe,
520
+ "better_alternative": alt_sharpe > actual_sharpe,
521
+ },
522
+ "attribution": {
523
+ "positive_changes": (diff_returns > 0).sum(),
524
+ "negative_changes": (diff_returns < 0).sum(),
525
+ "total_impact": diff_returns.sum(),
526
+ },
527
+ }
528
+
529
+
530
+ # ============================================================================
531
+ # SAMPLE SIZE VALIDATOR
532
+ # ============================================================================
533
+
534
+ class SampleSizeValidator:
535
+ """Validate statistical significance of results."""
536
+
537
+ @staticmethod
538
+ def minimum_trades(
539
+ target_sharpe: float = 1.0,
540
+ significance: float = 0.05,
541
+ power: float = 0.80,
542
+ ) -> int:
543
+ """Calculate minimum trades needed for statistical significance."""
544
+
545
+ # Using standard power analysis for Sharpe ratio
546
+ # n = ((z_alpha + z_beta) / sharpe)^2
547
+
548
+ z_alpha = stats.norm.ppf(1 - significance / 2)
549
+ z_beta = stats.norm.ppf(power)
550
+
551
+ # Adjusted for daily returns (Sharpe is annualized)
552
+ daily_sharpe = target_sharpe / np.sqrt(252)
553
+
554
+ n = ((z_alpha + z_beta) / daily_sharpe) ** 2
555
+
556
+ return int(np.ceil(n))
557
+
558
+ @staticmethod
559
+ def sharpe_confidence_interval(
560
+ sharpe: float,
561
+ n_observations: int,
562
+ confidence: float = 0.95,
563
+ ) -> Tuple[float, float]:
564
+ """Calculate confidence interval for Sharpe ratio."""
565
+
566
+ # Standard error of Sharpe ratio
567
+ se = np.sqrt((1 + 0.5 * sharpe**2) / n_observations)
568
+
569
+ z = stats.norm.ppf(1 - (1 - confidence) / 2)
570
+
571
+ lower = sharpe - z * se
572
+ upper = sharpe + z * se
573
+
574
+ return lower, upper
575
+
576
+ @staticmethod
577
+ def is_significant(
578
+ sharpe: float,
579
+ n_observations: int,
580
+ significance: float = 0.05,
581
+ ) -> Dict[str, Any]:
582
+ """Test if Sharpe ratio is statistically significant."""
583
+
584
+ # Standard error
585
+ se = np.sqrt((1 + 0.5 * sharpe**2) / n_observations)
586
+
587
+ # t-statistic (testing H0: Sharpe = 0)
588
+ t_stat = sharpe / se
589
+
590
+ # p-value (two-tailed)
591
+ p_value = 2 * (1 - stats.t.cdf(abs(t_stat), df=n_observations - 1))
592
+
593
+ return {
594
+ "sharpe": sharpe,
595
+ "standard_error": se,
596
+ "t_statistic": t_stat,
597
+ "p_value": p_value,
598
+ "is_significant": p_value < significance,
599
+ "significance_level": significance,
600
+ }
601
+
602
+
603
+ # ============================================================================
604
+ # BIAS DETECTOR
605
+ # ============================================================================
606
+
607
+ class BiasDetector:
608
+ """Detect common biases in backtests."""
609
+
610
+ @staticmethod
611
+ def check_lookahead_bias(
612
+ signal_dates: pd.DatetimeIndex,
613
+ data_dates: pd.DatetimeIndex,
614
+ ) -> Dict[str, Any]:
615
+ """Check for look-ahead bias in signals."""
616
+
617
+ violations = []
618
+
619
+ for signal_date in signal_dates:
620
+ # Check if signal uses future data
621
+ future_data = data_dates[data_dates > signal_date]
622
+ if len(future_data) > 0:
623
+ # This is expected for most data
624
+ pass
625
+
626
+ # Look for signals that precede data
627
+ for i, signal_date in enumerate(signal_dates):
628
+ if i > 0:
629
+ prev_signal = signal_dates[i-1]
630
+ data_between = data_dates[(data_dates > prev_signal) & (data_dates <= signal_date)]
631
+ if len(data_between) == 0:
632
+ violations.append({
633
+ "signal_date": signal_date,
634
+ "issue": "Signal generated without new data",
635
+ })
636
+
637
+ return {
638
+ "violations": violations,
639
+ "violation_count": len(violations),
640
+ "passed": len(violations) == 0,
641
+ }
642
+
643
+ @staticmethod
644
+ def check_survivorship_bias(
645
+ universe_dates: Dict[str, Tuple[str, str]], # symbol -> (start, end)
646
+ backtest_start: str,
647
+ ) -> Dict[str, Any]:
648
+ """Check for survivorship bias in universe."""
649
+
650
+ # Count symbols that existed at backtest start
651
+ survivors = 0
652
+ non_survivors = 0
653
+
654
+ backtest_start_dt = pd.Timestamp(backtest_start)
655
+
656
+ for symbol, (start, end) in universe_dates.items():
657
+ start_dt = pd.Timestamp(start)
658
+ end_dt = pd.Timestamp(end) if end else pd.Timestamp.now()
659
+
660
+ if start_dt <= backtest_start_dt:
661
+ if end_dt >= pd.Timestamp.now() - pd.Timedelta(days=30):
662
+ survivors += 1
663
+ else:
664
+ non_survivors += 1
665
+
666
+ total = survivors + non_survivors
667
+ survivor_pct = survivors / total if total > 0 else 0
668
+
669
+ return {
670
+ "survivors": survivors,
671
+ "non_survivors": non_survivors,
672
+ "survivor_percentage": survivor_pct,
673
+ "potential_bias": survivor_pct > 0.9,
674
+ "recommendation": "Include delisted securities" if survivor_pct > 0.9 else "Universe appears balanced",
675
+ }