sigma-terminal 2.0.2__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sigma/comparison.py ADDED
@@ -0,0 +1,611 @@
1
+ """Comparison engine - Compare anything finance."""
2
+
3
+ import asyncio
4
+ from datetime import date, timedelta
5
+ from typing import Any, Dict, List, Optional, Tuple, Union
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+ from .core.models import (
11
+ AssetClass,
12
+ PerformanceMetrics,
13
+ ComparisonResult,
14
+ detect_asset_class,
15
+ )
16
+ from .analytics import (
17
+ PerformanceAnalytics,
18
+ FactorAnalyzer,
19
+ CorrelationAnalyzer,
20
+ SeasonalityAnalyzer,
21
+ )
22
+
23
+
24
+ class ComparisonEngine:
25
+ """
26
+ Compare anything finance - stocks, ETFs, portfolios with full metrics.
27
+ Translates vague prompts into measurable criteria.
28
+ """
29
+
30
+ def __init__(self):
31
+ self.performance = PerformanceAnalytics()
32
+ self.factor = FactorAnalyzer()
33
+ self.correlation = CorrelationAnalyzer()
34
+ self.seasonality = SeasonalityAnalyzer()
35
+
36
+ async def compare(
37
+ self,
38
+ assets: List[str],
39
+ returns_data: Dict[str, pd.Series],
40
+ benchmark: str = "SPY",
41
+ benchmark_returns: Optional[pd.Series] = None,
42
+ fundamentals: Optional[Dict[str, Dict]] = None,
43
+ etf_data: Optional[Dict[str, Dict]] = None,
44
+ criteria: Optional[Dict[str, float]] = None,
45
+ ) -> Dict[str, Any]:
46
+ """
47
+ Comprehensive multi-asset comparison.
48
+
49
+ Args:
50
+ assets: List of asset symbols to compare
51
+ returns_data: Dictionary of symbol -> returns series
52
+ benchmark: Benchmark symbol
53
+ benchmark_returns: Benchmark returns series
54
+ fundamentals: Optional fundamental data for equities
55
+ etf_data: Optional ETF-specific data (holdings, fees, etc.)
56
+ criteria: Optional weighted criteria for scoring
57
+
58
+ Returns:
59
+ Comprehensive comparison result
60
+ """
61
+
62
+ results = {
63
+ "assets": assets,
64
+ "benchmark": benchmark,
65
+ "comparison_date": date.today().isoformat(),
66
+ }
67
+
68
+ # 1. PERFORMANCE COMPARISON
69
+ performance_comparison = await self._compare_performance(
70
+ assets, returns_data, benchmark_returns
71
+ )
72
+ results["performance"] = performance_comparison
73
+
74
+ # 2. RISK COMPARISON
75
+ risk_comparison = await self._compare_risk(
76
+ assets, returns_data, benchmark_returns
77
+ )
78
+ results["risk"] = risk_comparison
79
+
80
+ # 3. BEHAVIOR COMPARISON
81
+ behavior_comparison = await self._compare_behavior(
82
+ assets, returns_data
83
+ )
84
+ results["behavior"] = behavior_comparison
85
+
86
+ # 4. CORRELATION ANALYSIS
87
+ correlation_analysis = await self._analyze_correlations(
88
+ assets, returns_data
89
+ )
90
+ results["correlations"] = correlation_analysis
91
+
92
+ # 5. FUNDAMENTAL COMPARISON (if equities)
93
+ if fundamentals:
94
+ fundamental_comparison = self._compare_fundamentals(assets, fundamentals)
95
+ results["fundamentals"] = fundamental_comparison
96
+
97
+ # 6. ETF-SPECIFIC COMPARISON (if ETFs)
98
+ if etf_data:
99
+ etf_comparison = self._compare_etfs(assets, etf_data)
100
+ results["etf_analysis"] = etf_comparison
101
+
102
+ # 7. SCORING AND RANKING
103
+ scoring = self._score_and_rank(
104
+ assets,
105
+ performance_comparison,
106
+ risk_comparison,
107
+ behavior_comparison,
108
+ criteria
109
+ )
110
+ results["scoring"] = scoring
111
+
112
+ # 8. TRADEOFF ANALYSIS
113
+ tradeoffs = self._analyze_tradeoffs(
114
+ assets,
115
+ performance_comparison,
116
+ risk_comparison
117
+ )
118
+ results["tradeoffs"] = tradeoffs
119
+
120
+ # 9. RECOMMENDATION
121
+ recommendation = self._generate_recommendation(
122
+ assets, scoring, criteria
123
+ )
124
+ results["recommendation"] = recommendation
125
+
126
+ return results
127
+
128
+ async def _compare_performance(
129
+ self,
130
+ assets: List[str],
131
+ returns_data: Dict[str, pd.Series],
132
+ benchmark_returns: Optional[pd.Series],
133
+ ) -> Dict[str, Dict]:
134
+ """Compare performance metrics across assets."""
135
+
136
+ comparison = {}
137
+
138
+ for asset in assets:
139
+ if asset not in returns_data:
140
+ continue
141
+
142
+ returns = returns_data[asset]
143
+ metrics = self.performance.calculate_metrics(
144
+ returns,
145
+ benchmark_returns,
146
+ risk_free_rate=0.05 # ~5% risk-free rate
147
+ )
148
+
149
+ comparison[asset] = {
150
+ "total_return": metrics.get("total_return", 0),
151
+ "cagr": metrics.get("cagr", 0),
152
+ "volatility": metrics.get("volatility", 0),
153
+ "sharpe_ratio": metrics.get("sharpe_ratio", 0),
154
+ "sortino_ratio": metrics.get("sortino_ratio", 0),
155
+ "max_drawdown": metrics.get("max_drawdown", 0),
156
+ "calmar_ratio": metrics.get("calmar_ratio", 0),
157
+ "alpha": metrics.get("alpha", 0),
158
+ "beta": metrics.get("beta", 1),
159
+ "r_squared": metrics.get("r_squared", 0),
160
+ }
161
+
162
+ return comparison
163
+
164
+ async def _compare_risk(
165
+ self,
166
+ assets: List[str],
167
+ returns_data: Dict[str, pd.Series],
168
+ benchmark_returns: Optional[pd.Series],
169
+ ) -> Dict[str, Dict]:
170
+ """Compare risk metrics across assets."""
171
+
172
+ comparison = {}
173
+
174
+ for asset in assets:
175
+ if asset not in returns_data:
176
+ continue
177
+
178
+ returns = returns_data[asset]
179
+ metrics = self.performance.calculate_metrics(returns, benchmark_returns)
180
+
181
+ comparison[asset] = {
182
+ "volatility": metrics.get("volatility", 0),
183
+ "downside_deviation": metrics.get("downside_deviation", 0),
184
+ "max_drawdown": metrics.get("max_drawdown", 0),
185
+ "max_dd_duration": metrics.get("max_dd_duration", 0),
186
+ "var_95": metrics.get("var_95", 0),
187
+ "cvar_95": metrics.get("cvar_95", 0),
188
+ "beta": metrics.get("beta", 1),
189
+ }
190
+
191
+ # Tail risk metrics
192
+ returns_arr = returns.dropna()
193
+ if len(returns_arr) > 100:
194
+ # Skewness and kurtosis
195
+ comparison[asset]["skewness"] = returns_arr.skew()
196
+ comparison[asset]["kurtosis"] = returns_arr.kurtosis()
197
+
198
+ # Tail ratio
199
+ upper_tail = returns_arr.quantile(0.95)
200
+ lower_tail = abs(returns_arr.quantile(0.05))
201
+ comparison[asset]["tail_ratio"] = upper_tail / lower_tail if lower_tail != 0 else 1
202
+
203
+ return comparison
204
+
205
+ async def _compare_behavior(
206
+ self,
207
+ assets: List[str],
208
+ returns_data: Dict[str, pd.Series],
209
+ ) -> Dict[str, Dict]:
210
+ """Compare behavioral characteristics."""
211
+
212
+ comparison = {}
213
+
214
+ for asset in assets:
215
+ if asset not in returns_data:
216
+ continue
217
+
218
+ returns = returns_data[asset]
219
+
220
+ # Momentum score (recent performance relative to history)
221
+ if len(returns) > 252:
222
+ recent_return = (1 + returns.iloc[-63:]).prod() - 1 # 3 months
223
+ historical_vol = returns.iloc[:-63].std()
224
+ momentum_score = recent_return / (historical_vol * np.sqrt(63)) if historical_vol > 0 else 0
225
+ else:
226
+ momentum_score = 0
227
+
228
+ # Mean reversion score (autocorrelation)
229
+ if len(returns) > 21:
230
+ autocorr = returns.autocorr(lag=1)
231
+ mean_reversion_score = -autocorr # Negative autocorr = mean reverting
232
+ else:
233
+ mean_reversion_score = 0
234
+
235
+ # Trend persistence
236
+ if len(returns) > 252:
237
+ monthly_returns = returns.resample('M').apply(lambda x: (1 + x).prod() - 1)
238
+ if len(monthly_returns) > 2:
239
+ trend_persistence = monthly_returns.autocorr(lag=1)
240
+ else:
241
+ trend_persistence = 0
242
+ else:
243
+ trend_persistence = 0
244
+
245
+ # Seasonality strength
246
+ seasonality = self.seasonality.monthly_seasonality(returns)
247
+ if seasonality:
248
+ returns_by_month = [s["mean_return"] for s in seasonality.values()]
249
+ seasonality_strength = np.std(returns_by_month) if returns_by_month else 0
250
+ else:
251
+ seasonality_strength = 0
252
+
253
+ comparison[asset] = {
254
+ "momentum_score": momentum_score,
255
+ "mean_reversion_score": mean_reversion_score,
256
+ "trend_persistence": trend_persistence,
257
+ "seasonality_strength": seasonality_strength,
258
+ "win_rate": (returns > 0).mean(),
259
+ }
260
+
261
+ return comparison
262
+
263
+ async def _analyze_correlations(
264
+ self,
265
+ assets: List[str],
266
+ returns_data: Dict[str, pd.Series],
267
+ ) -> Dict[str, Any]:
268
+ """Analyze correlations between assets."""
269
+
270
+ # Build returns DataFrame
271
+ df = pd.DataFrame({a: returns_data[a] for a in assets if a in returns_data})
272
+ df = df.dropna()
273
+
274
+ if len(df) < 30:
275
+ return {"error": "Insufficient data for correlation analysis"}
276
+
277
+ # Correlation matrix
278
+ corr_matrix = self.correlation.correlation_matrix(df)
279
+
280
+ # Average pairwise correlation
281
+ mask = np.triu(np.ones_like(corr_matrix, dtype=bool), k=1)
282
+ avg_correlation = corr_matrix.where(mask).stack().mean()
283
+
284
+ # Clustered correlation
285
+ if len(assets) >= 3:
286
+ n_clusters = min(3, len(assets))
287
+ clustered_corr, clusters = self.correlation.clustered_correlation(df, n_clusters)
288
+ else:
289
+ clustered_corr = corr_matrix
290
+ clusters = [[a] for a in assets]
291
+
292
+ return {
293
+ "correlation_matrix": corr_matrix.to_dict(),
294
+ "average_correlation": avg_correlation,
295
+ "clusters": clusters,
296
+ }
297
+
298
+ def _compare_fundamentals(
299
+ self,
300
+ assets: List[str],
301
+ fundamentals: Dict[str, Dict],
302
+ ) -> Dict[str, Dict]:
303
+ """Compare fundamental metrics for equities."""
304
+
305
+ comparison = {}
306
+
307
+ for asset in assets:
308
+ if asset not in fundamentals:
309
+ continue
310
+
311
+ data = fundamentals[asset]
312
+
313
+ comparison[asset] = {
314
+ # Valuation
315
+ "pe_ratio": data.get("trailingPE", data.get("forwardPE")),
316
+ "pb_ratio": data.get("priceToBook"),
317
+ "ps_ratio": data.get("priceToSalesTrailing12Months"),
318
+ "ev_ebitda": data.get("enterpriseToEbitda"),
319
+
320
+ # Growth
321
+ "revenue_growth": data.get("revenueGrowth"),
322
+ "earnings_growth": data.get("earningsGrowth"),
323
+
324
+ # Profitability
325
+ "profit_margin": data.get("profitMargins"),
326
+ "operating_margin": data.get("operatingMargins"),
327
+ "roe": data.get("returnOnEquity"),
328
+ "roa": data.get("returnOnAssets"),
329
+
330
+ # Financial health
331
+ "debt_to_equity": data.get("debtToEquity"),
332
+ "current_ratio": data.get("currentRatio"),
333
+ "quick_ratio": data.get("quickRatio"),
334
+
335
+ # Dividends
336
+ "dividend_yield": data.get("dividendYield"),
337
+ "payout_ratio": data.get("payoutRatio"),
338
+
339
+ # Size
340
+ "market_cap": data.get("marketCap"),
341
+ }
342
+
343
+ return comparison
344
+
345
+ def _compare_etfs(
346
+ self,
347
+ assets: List[str],
348
+ etf_data: Dict[str, Dict],
349
+ ) -> Dict[str, Any]:
350
+ """Compare ETF-specific metrics."""
351
+
352
+ comparison = {}
353
+
354
+ for asset in assets:
355
+ if asset not in etf_data:
356
+ continue
357
+
358
+ data = etf_data[asset]
359
+
360
+ comparison[asset] = {
361
+ "expense_ratio": data.get("expense_ratio"),
362
+ "aum": data.get("aum"),
363
+ "holdings_count": data.get("holdings_count"),
364
+ "top_10_weight": data.get("top_10_weight"),
365
+ "tracking_error": data.get("tracking_error"),
366
+ "premium_discount": data.get("premium_discount"),
367
+ }
368
+
369
+ # Holdings overlap analysis
370
+ if len(assets) >= 2:
371
+ overlaps = {}
372
+ for i, asset1 in enumerate(assets):
373
+ for asset2 in assets[i+1:]:
374
+ if asset1 in etf_data and asset2 in etf_data:
375
+ holdings1 = set(etf_data.get(asset1, {}).get("holdings", []))
376
+ holdings2 = set(etf_data.get(asset2, {}).get("holdings", []))
377
+ if holdings1 and holdings2:
378
+ overlap = len(holdings1 & holdings2) / min(len(holdings1), len(holdings2))
379
+ overlaps[f"{asset1}_vs_{asset2}"] = overlap
380
+
381
+ comparison["holdings_overlap"] = overlaps
382
+
383
+ return comparison
384
+
385
+ def _score_and_rank(
386
+ self,
387
+ assets: List[str],
388
+ performance: Dict[str, Dict],
389
+ risk: Dict[str, Dict],
390
+ behavior: Dict[str, Dict],
391
+ criteria: Optional[Dict[str, float]] = None,
392
+ ) -> Dict[str, Any]:
393
+ """Score and rank assets based on criteria."""
394
+
395
+ # Default criteria weights
396
+ default_criteria = {
397
+ "sharpe_ratio": 0.25,
398
+ "cagr": 0.20,
399
+ "max_drawdown": -0.20, # Negative because lower is better
400
+ "volatility": -0.15, # Negative because lower is better
401
+ "sortino_ratio": 0.10,
402
+ "momentum_score": 0.10,
403
+ }
404
+
405
+ weights = criteria or default_criteria
406
+
407
+ # Calculate scores
408
+ scores = {}
409
+ for asset in assets:
410
+ if asset not in performance:
411
+ continue
412
+
413
+ score = 0
414
+ score_details = {}
415
+
416
+ for metric, weight in weights.items():
417
+ value = 0
418
+
419
+ if metric in performance.get(asset, {}):
420
+ value = performance[asset][metric]
421
+ elif metric in risk.get(asset, {}):
422
+ value = risk[asset][metric]
423
+ elif metric in behavior.get(asset, {}):
424
+ value = behavior[asset][metric]
425
+
426
+ # Handle None values
427
+ if value is None:
428
+ value = 0
429
+
430
+ contribution = value * weight
431
+ score += contribution
432
+ score_details[metric] = {
433
+ "value": value,
434
+ "weight": weight,
435
+ "contribution": contribution,
436
+ }
437
+
438
+ scores[asset] = {
439
+ "total_score": score,
440
+ "details": score_details,
441
+ }
442
+
443
+ # Rank assets
444
+ ranking = sorted(
445
+ [(asset, data["total_score"]) for asset, data in scores.items()],
446
+ key=lambda x: x[1],
447
+ reverse=True
448
+ )
449
+
450
+ return {
451
+ "scores": scores,
452
+ "ranking": [r[0] for r in ranking],
453
+ "criteria_used": weights,
454
+ }
455
+
456
+ def _analyze_tradeoffs(
457
+ self,
458
+ assets: List[str],
459
+ performance: Dict[str, Dict],
460
+ risk: Dict[str, Dict],
461
+ ) -> List[str]:
462
+ """Analyze tradeoffs between assets."""
463
+
464
+ tradeoffs = []
465
+
466
+ if len(assets) < 2:
467
+ return tradeoffs
468
+
469
+ # Find best and worst in each category
470
+ metrics_to_compare = [
471
+ ("sharpe_ratio", "risk-adjusted returns", True),
472
+ ("cagr", "returns", True),
473
+ ("max_drawdown", "drawdown protection", False),
474
+ ("volatility", "stability", False),
475
+ ]
476
+
477
+ for metric, description, higher_better in metrics_to_compare:
478
+ values = {}
479
+ for asset in assets:
480
+ if metric in performance.get(asset, {}):
481
+ values[asset] = performance[asset][metric]
482
+ elif metric in risk.get(asset, {}):
483
+ values[asset] = risk[asset][metric]
484
+
485
+ if len(values) >= 2:
486
+ sorted_assets = sorted(values.items(), key=lambda x: x[1], reverse=higher_better)
487
+ best = sorted_assets[0]
488
+ worst = sorted_assets[-1]
489
+
490
+ if best[0] != worst[0]:
491
+ tradeoffs.append(
492
+ f"{best[0]} offers better {description} ({metric}: {best[1]:.2f}) "
493
+ f"vs {worst[0]} ({worst[1]:.2f})"
494
+ )
495
+
496
+ return tradeoffs
497
+
498
+ def _generate_recommendation(
499
+ self,
500
+ assets: List[str],
501
+ scoring: Dict[str, Any],
502
+ criteria: Optional[Dict[str, float]],
503
+ ) -> Dict[str, Any]:
504
+ """Generate a recommendation based on comparison."""
505
+
506
+ ranking = scoring.get("ranking", [])
507
+ scores = scoring.get("scores", {})
508
+
509
+ if not ranking:
510
+ return {"recommendation": "Insufficient data for recommendation"}
511
+
512
+ top_pick = ranking[0]
513
+ top_score = scores.get(top_pick, {}).get("total_score", 0)
514
+
515
+ # Check if there's a clear winner
516
+ if len(ranking) >= 2:
517
+ second_score = scores.get(ranking[1], {}).get("total_score", 0)
518
+ margin = (top_score - second_score) / abs(second_score) if second_score != 0 else float('inf')
519
+
520
+ if margin > 0.2:
521
+ confidence = "high"
522
+ explanation = f"{top_pick} is clearly the best option with a {margin:.0%} advantage."
523
+ elif margin > 0.05:
524
+ confidence = "moderate"
525
+ explanation = f"{top_pick} edges out {ranking[1]}, but the difference is modest."
526
+ else:
527
+ confidence = "low"
528
+ explanation = f"{top_pick} and {ranking[1]} are very close. Consider other factors."
529
+ else:
530
+ confidence = "n/a"
531
+ explanation = "Only one asset to evaluate."
532
+
533
+ return {
534
+ "top_pick": top_pick,
535
+ "confidence": confidence,
536
+ "explanation": explanation,
537
+ "full_ranking": ranking,
538
+ "key_strengths": self._identify_strengths(top_pick, scores.get(top_pick, {}).get("details", {})),
539
+ "key_weaknesses": self._identify_weaknesses(top_pick, scores.get(top_pick, {}).get("details", {})),
540
+ }
541
+
542
+ def _identify_strengths(self, asset: str, details: Dict) -> List[str]:
543
+ """Identify key strengths of an asset."""
544
+ strengths = []
545
+ for metric, info in details.items():
546
+ if info.get("contribution", 0) > 0:
547
+ strengths.append(f"Strong {metric}: {info.get('value', 0):.2f}")
548
+ return strengths[:3] # Top 3
549
+
550
+ def _identify_weaknesses(self, asset: str, details: Dict) -> List[str]:
551
+ """Identify key weaknesses of an asset."""
552
+ weaknesses = []
553
+ for metric, info in details.items():
554
+ if info.get("contribution", 0) < 0:
555
+ weaknesses.append(f"Weak {metric}: {info.get('value', 0):.2f}")
556
+ return weaknesses[:3] # Top 3
557
+
558
+
559
+ # ============================================================================
560
+ # MACRO SENSITIVITY ANALYSIS
561
+ # ============================================================================
562
+
563
+ class MacroSensitivityAnalyzer:
564
+ """Analyze sensitivity to macro factors."""
565
+
566
+ MACRO_PROXIES = {
567
+ "rates": "TLT", # Long-term treasuries (inverse proxy for rates)
568
+ "inflation": "TIP", # TIPS
569
+ "oil": "USO", # Oil
570
+ "usd": "UUP", # US Dollar
571
+ "gold": "GLD", # Gold
572
+ "credit": "HYG", # High yield (credit risk)
573
+ "vix": "VIXY", # Volatility
574
+ }
575
+
576
+ @staticmethod
577
+ async def analyze_macro_sensitivity(
578
+ asset_returns: pd.Series,
579
+ macro_returns: Dict[str, pd.Series],
580
+ ) -> Dict[str, Dict[str, float]]:
581
+ """Analyze asset sensitivity to macro factors."""
582
+
583
+ results = {}
584
+
585
+ for factor_name, factor_returns in macro_returns.items():
586
+ # Align data
587
+ aligned = pd.concat([asset_returns, factor_returns], axis=1).dropna()
588
+
589
+ if len(aligned) < 30:
590
+ continue
591
+
592
+ asset_ret = aligned.iloc[:, 0]
593
+ factor_ret = aligned.iloc[:, 1]
594
+
595
+ # Calculate beta (sensitivity)
596
+ cov = np.cov(asset_ret, factor_ret)
597
+ beta = cov[0, 1] / cov[1, 1] if cov[1, 1] != 0 else 0
598
+
599
+ # Correlation
600
+ corr = asset_ret.corr(factor_ret)
601
+
602
+ # R-squared
603
+ r_squared = corr ** 2
604
+
605
+ results[factor_name] = {
606
+ "beta": beta,
607
+ "correlation": corr,
608
+ "r_squared": r_squared,
609
+ }
610
+
611
+ return results