sigma-terminal 2.0.2__py3-none-any.whl → 3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sigma/__init__.py +182 -6
- sigma/__main__.py +2 -2
- sigma/analytics/__init__.py +636 -0
- sigma/app.py +563 -898
- sigma/backtest.py +372 -0
- sigma/charts.py +407 -0
- sigma/cli.py +434 -0
- sigma/comparison.py +611 -0
- sigma/config.py +195 -0
- sigma/core/__init__.py +4 -17
- sigma/core/engine.py +493 -0
- sigma/core/intent.py +595 -0
- sigma/core/models.py +516 -125
- sigma/data/__init__.py +681 -0
- sigma/data/models.py +130 -0
- sigma/llm.py +401 -0
- sigma/monitoring.py +666 -0
- sigma/portfolio.py +697 -0
- sigma/reporting.py +658 -0
- sigma/robustness.py +675 -0
- sigma/setup.py +305 -402
- sigma/strategy.py +753 -0
- sigma/tools/backtest.py +23 -5
- sigma/tools.py +617 -0
- sigma/visualization.py +766 -0
- sigma_terminal-3.2.0.dist-info/METADATA +298 -0
- sigma_terminal-3.2.0.dist-info/RECORD +30 -0
- sigma_terminal-3.2.0.dist-info/entry_points.txt +6 -0
- sigma_terminal-3.2.0.dist-info/licenses/LICENSE +25 -0
- sigma/core/agent.py +0 -205
- sigma/core/config.py +0 -119
- sigma/core/llm.py +0 -794
- sigma/tools/__init__.py +0 -5
- sigma/tools/charts.py +0 -400
- sigma/tools/financial.py +0 -1457
- sigma/ui/__init__.py +0 -1
- sigma_terminal-2.0.2.dist-info/METADATA +0 -222
- sigma_terminal-2.0.2.dist-info/RECORD +0 -19
- sigma_terminal-2.0.2.dist-info/entry_points.txt +0 -2
- sigma_terminal-2.0.2.dist-info/licenses/LICENSE +0 -42
- {sigma_terminal-2.0.2.dist-info → sigma_terminal-3.2.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,636 @@
|
|
|
1
|
+
"""Advanced analytics module for Sigma."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from datetime import datetime, date, timedelta
|
|
5
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from scipy import stats
|
|
10
|
+
from scipy.cluster.hierarchy import linkage, fcluster
|
|
11
|
+
from scipy.spatial.distance import squareform
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# ============================================================================
|
|
15
|
+
# PERFORMANCE ANALYTICS
|
|
16
|
+
# ============================================================================
|
|
17
|
+
|
|
18
|
+
class PerformanceAnalytics:
|
|
19
|
+
"""Comprehensive performance metrics calculation."""
|
|
20
|
+
|
|
21
|
+
@staticmethod
|
|
22
|
+
def calculate_metrics(
|
|
23
|
+
returns: pd.Series,
|
|
24
|
+
benchmark_returns: Optional[pd.Series] = None,
|
|
25
|
+
risk_free_rate: float = 0.0,
|
|
26
|
+
periods_per_year: int = 252,
|
|
27
|
+
) -> Dict[str, float]:
|
|
28
|
+
"""Calculate comprehensive performance metrics."""
|
|
29
|
+
|
|
30
|
+
# Clean returns
|
|
31
|
+
returns = returns.dropna()
|
|
32
|
+
n = len(returns)
|
|
33
|
+
|
|
34
|
+
if n < 2:
|
|
35
|
+
return {}
|
|
36
|
+
|
|
37
|
+
# Basic metrics
|
|
38
|
+
total_return = (1 + returns).prod() - 1
|
|
39
|
+
cagr = (1 + total_return) ** (periods_per_year / n) - 1
|
|
40
|
+
volatility = returns.std() * np.sqrt(periods_per_year)
|
|
41
|
+
|
|
42
|
+
# Downside metrics
|
|
43
|
+
negative_returns = returns[returns < 0]
|
|
44
|
+
downside_deviation = negative_returns.std() * np.sqrt(periods_per_year) if len(negative_returns) > 0 else 0
|
|
45
|
+
|
|
46
|
+
# Drawdown
|
|
47
|
+
cumulative = (1 + returns).cumprod()
|
|
48
|
+
running_max = cumulative.expanding().max()
|
|
49
|
+
drawdown = (cumulative - running_max) / running_max
|
|
50
|
+
max_drawdown = drawdown.min()
|
|
51
|
+
|
|
52
|
+
# Drawdown duration
|
|
53
|
+
in_drawdown = drawdown < 0
|
|
54
|
+
dd_groups = (in_drawdown != in_drawdown.shift()).cumsum()
|
|
55
|
+
dd_durations = in_drawdown.groupby(dd_groups).sum()
|
|
56
|
+
max_dd_duration = dd_durations.max() if len(dd_durations) > 0 else 0
|
|
57
|
+
|
|
58
|
+
# Risk-adjusted metrics
|
|
59
|
+
excess_return = cagr - risk_free_rate
|
|
60
|
+
sharpe = excess_return / volatility if volatility > 0 else 0
|
|
61
|
+
sortino = excess_return / downside_deviation if downside_deviation > 0 else 0
|
|
62
|
+
calmar = cagr / abs(max_drawdown) if max_drawdown != 0 else 0
|
|
63
|
+
|
|
64
|
+
# VaR and CVaR
|
|
65
|
+
var_95 = returns.quantile(0.05)
|
|
66
|
+
cvar_95 = returns[returns <= var_95].mean() if len(returns[returns <= var_95]) > 0 else var_95
|
|
67
|
+
|
|
68
|
+
# Win rate
|
|
69
|
+
win_rate = (returns > 0).mean()
|
|
70
|
+
|
|
71
|
+
# Profit factor
|
|
72
|
+
gains = returns[returns > 0].sum()
|
|
73
|
+
losses = abs(returns[returns < 0].sum())
|
|
74
|
+
profit_factor = gains / losses if losses > 0 else float('inf')
|
|
75
|
+
|
|
76
|
+
metrics = {
|
|
77
|
+
"total_return": total_return,
|
|
78
|
+
"cagr": cagr,
|
|
79
|
+
"volatility": volatility,
|
|
80
|
+
"downside_deviation": downside_deviation,
|
|
81
|
+
"max_drawdown": max_drawdown,
|
|
82
|
+
"max_dd_duration": int(max_dd_duration),
|
|
83
|
+
"sharpe_ratio": sharpe,
|
|
84
|
+
"sortino_ratio": sortino,
|
|
85
|
+
"calmar_ratio": calmar,
|
|
86
|
+
"var_95": var_95,
|
|
87
|
+
"cvar_95": cvar_95,
|
|
88
|
+
"win_rate": win_rate,
|
|
89
|
+
"profit_factor": profit_factor,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
# Beta and alpha if benchmark provided
|
|
93
|
+
if benchmark_returns is not None:
|
|
94
|
+
aligned = pd.concat([returns, benchmark_returns], axis=1).dropna()
|
|
95
|
+
if len(aligned) > 10:
|
|
96
|
+
cov = np.cov(aligned.iloc[:, 0], aligned.iloc[:, 1])
|
|
97
|
+
beta = cov[0, 1] / cov[1, 1] if cov[1, 1] != 0 else 0
|
|
98
|
+
alpha = cagr - (risk_free_rate + beta * (aligned.iloc[:, 1].mean() * periods_per_year - risk_free_rate))
|
|
99
|
+
|
|
100
|
+
# R-squared
|
|
101
|
+
correlation = aligned.corr().iloc[0, 1]
|
|
102
|
+
r_squared = correlation ** 2
|
|
103
|
+
|
|
104
|
+
# Tracking error
|
|
105
|
+
tracking_diff = aligned.iloc[:, 0] - aligned.iloc[:, 1]
|
|
106
|
+
tracking_error = tracking_diff.std() * np.sqrt(periods_per_year)
|
|
107
|
+
|
|
108
|
+
# Information ratio
|
|
109
|
+
info_ratio = tracking_diff.mean() * periods_per_year / tracking_error if tracking_error > 0 else 0
|
|
110
|
+
|
|
111
|
+
# Treynor ratio
|
|
112
|
+
treynor = excess_return / beta if beta != 0 else 0
|
|
113
|
+
|
|
114
|
+
metrics.update({
|
|
115
|
+
"beta": beta,
|
|
116
|
+
"alpha": alpha,
|
|
117
|
+
"r_squared": r_squared,
|
|
118
|
+
"tracking_error": tracking_error,
|
|
119
|
+
"information_ratio": info_ratio,
|
|
120
|
+
"treynor_ratio": treynor,
|
|
121
|
+
})
|
|
122
|
+
|
|
123
|
+
return metrics
|
|
124
|
+
|
|
125
|
+
@staticmethod
|
|
126
|
+
def rolling_metrics(
|
|
127
|
+
returns: pd.Series,
|
|
128
|
+
window: int = 252,
|
|
129
|
+
periods_per_year: int = 252,
|
|
130
|
+
) -> pd.DataFrame:
|
|
131
|
+
"""Calculate rolling performance metrics."""
|
|
132
|
+
|
|
133
|
+
rolling_return = returns.rolling(window).apply(lambda x: (1 + x).prod() - 1)
|
|
134
|
+
rolling_vol = returns.rolling(window).std() * np.sqrt(periods_per_year)
|
|
135
|
+
rolling_sharpe = (rolling_return - 0) / rolling_vol
|
|
136
|
+
|
|
137
|
+
# Rolling max drawdown
|
|
138
|
+
def max_dd(x):
|
|
139
|
+
cumulative = (1 + x).cumprod()
|
|
140
|
+
running_max = cumulative.expanding().max()
|
|
141
|
+
drawdown = (cumulative - running_max) / running_max
|
|
142
|
+
return drawdown.min()
|
|
143
|
+
|
|
144
|
+
rolling_dd = returns.rolling(window).apply(max_dd)
|
|
145
|
+
|
|
146
|
+
return pd.DataFrame({
|
|
147
|
+
"rolling_return": rolling_return,
|
|
148
|
+
"rolling_volatility": rolling_vol,
|
|
149
|
+
"rolling_sharpe": rolling_sharpe,
|
|
150
|
+
"rolling_max_dd": rolling_dd,
|
|
151
|
+
})
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
# ============================================================================
|
|
155
|
+
# REGIME DETECTION
|
|
156
|
+
# ============================================================================
|
|
157
|
+
|
|
158
|
+
class RegimeDetector:
|
|
159
|
+
"""Detect market regimes using various methods."""
|
|
160
|
+
|
|
161
|
+
@staticmethod
|
|
162
|
+
def volatility_regime(
|
|
163
|
+
returns: pd.Series,
|
|
164
|
+
short_window: int = 21,
|
|
165
|
+
long_window: int = 63,
|
|
166
|
+
threshold: float = 1.5,
|
|
167
|
+
) -> pd.Series:
|
|
168
|
+
"""Detect volatility regimes."""
|
|
169
|
+
short_vol = returns.rolling(short_window).std()
|
|
170
|
+
long_vol = returns.rolling(long_window).std()
|
|
171
|
+
|
|
172
|
+
ratio = short_vol / long_vol
|
|
173
|
+
|
|
174
|
+
regimes = pd.Series(index=returns.index, data="normal")
|
|
175
|
+
regimes[ratio > threshold] = "high_vol"
|
|
176
|
+
regimes[ratio < 1 / threshold] = "low_vol"
|
|
177
|
+
|
|
178
|
+
return regimes
|
|
179
|
+
|
|
180
|
+
@staticmethod
|
|
181
|
+
def trend_regime(
|
|
182
|
+
prices: pd.Series,
|
|
183
|
+
short_window: int = 20,
|
|
184
|
+
long_window: int = 50,
|
|
185
|
+
) -> pd.Series:
|
|
186
|
+
"""Detect trend regimes using moving averages."""
|
|
187
|
+
short_ma = prices.rolling(short_window).mean()
|
|
188
|
+
long_ma = prices.rolling(long_window).mean()
|
|
189
|
+
|
|
190
|
+
regimes = pd.Series(index=prices.index, data="sideways")
|
|
191
|
+
regimes[(short_ma > long_ma) & (prices > short_ma)] = "bull"
|
|
192
|
+
regimes[(short_ma < long_ma) & (prices < short_ma)] = "bear"
|
|
193
|
+
|
|
194
|
+
return regimes
|
|
195
|
+
|
|
196
|
+
@staticmethod
|
|
197
|
+
def correlation_regime(
|
|
198
|
+
returns_matrix: pd.DataFrame,
|
|
199
|
+
window: int = 63,
|
|
200
|
+
high_corr_threshold: float = 0.7,
|
|
201
|
+
) -> pd.Series:
|
|
202
|
+
"""Detect correlation regimes (risk-on/risk-off)."""
|
|
203
|
+
|
|
204
|
+
rolling_corr = returns_matrix.rolling(window).corr()
|
|
205
|
+
|
|
206
|
+
# Average pairwise correlation over time
|
|
207
|
+
def avg_corr(df):
|
|
208
|
+
if len(df) < window:
|
|
209
|
+
return np.nan
|
|
210
|
+
corr = df.corr()
|
|
211
|
+
mask = np.triu(np.ones_like(corr, dtype=bool), k=1)
|
|
212
|
+
return corr.where(mask).stack().mean()
|
|
213
|
+
|
|
214
|
+
avg_correlations = returns_matrix.rolling(window).apply(
|
|
215
|
+
lambda x: returns_matrix.loc[x.index].corr().values[np.triu_indices(len(returns_matrix.columns), k=1)].mean(),
|
|
216
|
+
raw=False
|
|
217
|
+
).mean(axis=1)
|
|
218
|
+
|
|
219
|
+
regimes = pd.Series(index=returns_matrix.index, data="normal")
|
|
220
|
+
regimes[avg_correlations > high_corr_threshold] = "high_correlation"
|
|
221
|
+
regimes[avg_correlations < 0.3] = "low_correlation"
|
|
222
|
+
|
|
223
|
+
return regimes
|
|
224
|
+
|
|
225
|
+
@staticmethod
|
|
226
|
+
def hidden_markov_regime(
|
|
227
|
+
returns: pd.Series,
|
|
228
|
+
n_regimes: int = 2,
|
|
229
|
+
) -> Tuple[pd.Series, Dict[str, Any]]:
|
|
230
|
+
"""Simple regime detection using return distribution clustering."""
|
|
231
|
+
# Simplified HMM-like approach using rolling statistics
|
|
232
|
+
|
|
233
|
+
window = 21
|
|
234
|
+
rolling_mean = returns.rolling(window).mean()
|
|
235
|
+
rolling_vol = returns.rolling(window).std()
|
|
236
|
+
|
|
237
|
+
# Classify based on mean and volatility
|
|
238
|
+
combined = pd.DataFrame({
|
|
239
|
+
'mean': rolling_mean,
|
|
240
|
+
'vol': rolling_vol
|
|
241
|
+
}).dropna()
|
|
242
|
+
|
|
243
|
+
# Simple k-means-like classification
|
|
244
|
+
mean_threshold = combined['mean'].median()
|
|
245
|
+
vol_threshold = combined['vol'].median()
|
|
246
|
+
|
|
247
|
+
regimes = pd.Series(index=returns.index, data=0)
|
|
248
|
+
|
|
249
|
+
bull_mask = (combined['mean'] > mean_threshold) & (combined['vol'] < vol_threshold)
|
|
250
|
+
bear_mask = (combined['mean'] < mean_threshold) & (combined['vol'] > vol_threshold)
|
|
251
|
+
|
|
252
|
+
regimes.loc[bull_mask.index[bull_mask]] = 1 # Bull
|
|
253
|
+
regimes.loc[bear_mask.index[bear_mask]] = -1 # Bear
|
|
254
|
+
|
|
255
|
+
regime_labels = {-1: "bear", 0: "neutral", 1: "bull"}
|
|
256
|
+
labeled_regimes = regimes.map(regime_labels)
|
|
257
|
+
|
|
258
|
+
# Transition matrix
|
|
259
|
+
transitions = {}
|
|
260
|
+
for from_regime in regime_labels.values():
|
|
261
|
+
transitions[from_regime] = {}
|
|
262
|
+
for to_regime in regime_labels.values():
|
|
263
|
+
mask = (labeled_regimes.shift(1) == from_regime) & (labeled_regimes == to_regime)
|
|
264
|
+
from_mask = labeled_regimes.shift(1) == from_regime
|
|
265
|
+
prob = mask.sum() / from_mask.sum() if from_mask.sum() > 0 else 0
|
|
266
|
+
transitions[from_regime][to_regime] = prob
|
|
267
|
+
|
|
268
|
+
return labeled_regimes, {"transition_matrix": transitions}
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
# ============================================================================
|
|
272
|
+
# SEASONALITY ANALYSIS
|
|
273
|
+
# ============================================================================
|
|
274
|
+
|
|
275
|
+
class SeasonalityAnalyzer:
|
|
276
|
+
"""Analyze seasonal patterns in returns."""
|
|
277
|
+
|
|
278
|
+
@staticmethod
|
|
279
|
+
def monthly_seasonality(returns: pd.Series) -> Dict[int, Dict[str, float]]:
|
|
280
|
+
"""Analyze month-of-year seasonality."""
|
|
281
|
+
monthly = returns.groupby(returns.index.month)
|
|
282
|
+
|
|
283
|
+
result = {}
|
|
284
|
+
for month in range(1, 13):
|
|
285
|
+
if month in monthly.groups:
|
|
286
|
+
month_returns = monthly.get_group(month)
|
|
287
|
+
result[month] = {
|
|
288
|
+
"mean_return": month_returns.mean(),
|
|
289
|
+
"median_return": month_returns.median(),
|
|
290
|
+
"win_rate": (month_returns > 0).mean(),
|
|
291
|
+
"volatility": month_returns.std(),
|
|
292
|
+
"count": len(month_returns),
|
|
293
|
+
"t_stat": stats.ttest_1samp(month_returns, 0)[0] if len(month_returns) > 1 else 0,
|
|
294
|
+
"p_value": stats.ttest_1samp(month_returns, 0)[1] if len(month_returns) > 1 else 1,
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
return result
|
|
298
|
+
|
|
299
|
+
@staticmethod
|
|
300
|
+
def day_of_week_seasonality(returns: pd.Series) -> Dict[int, Dict[str, float]]:
|
|
301
|
+
"""Analyze day-of-week seasonality."""
|
|
302
|
+
daily = returns.groupby(returns.index.dayofweek)
|
|
303
|
+
|
|
304
|
+
day_names = {0: "Monday", 1: "Tuesday", 2: "Wednesday", 3: "Thursday", 4: "Friday"}
|
|
305
|
+
|
|
306
|
+
result = {}
|
|
307
|
+
for day in range(5):
|
|
308
|
+
if day in daily.groups:
|
|
309
|
+
day_returns = daily.get_group(day)
|
|
310
|
+
result[day_names[day]] = {
|
|
311
|
+
"mean_return": day_returns.mean(),
|
|
312
|
+
"median_return": day_returns.median(),
|
|
313
|
+
"win_rate": (day_returns > 0).mean(),
|
|
314
|
+
"count": len(day_returns),
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
return result
|
|
318
|
+
|
|
319
|
+
@staticmethod
|
|
320
|
+
def pre_post_event_returns(
|
|
321
|
+
returns: pd.Series,
|
|
322
|
+
event_dates: List[date],
|
|
323
|
+
pre_days: int = 5,
|
|
324
|
+
post_days: int = 5,
|
|
325
|
+
) -> Dict[str, Any]:
|
|
326
|
+
"""Analyze returns around events."""
|
|
327
|
+
|
|
328
|
+
pre_returns = []
|
|
329
|
+
post_returns = []
|
|
330
|
+
|
|
331
|
+
for event_date in event_dates:
|
|
332
|
+
event_idx = returns.index.get_indexer([event_date], method='nearest')[0]
|
|
333
|
+
|
|
334
|
+
if event_idx >= pre_days and event_idx < len(returns) - post_days:
|
|
335
|
+
pre_ret = returns.iloc[event_idx - pre_days:event_idx].sum()
|
|
336
|
+
post_ret = returns.iloc[event_idx:event_idx + post_days].sum()
|
|
337
|
+
pre_returns.append(pre_ret)
|
|
338
|
+
post_returns.append(post_ret)
|
|
339
|
+
|
|
340
|
+
return {
|
|
341
|
+
"pre_event": {
|
|
342
|
+
"mean": np.mean(pre_returns) if pre_returns else 0,
|
|
343
|
+
"median": np.median(pre_returns) if pre_returns else 0,
|
|
344
|
+
"win_rate": np.mean([r > 0 for r in pre_returns]) if pre_returns else 0,
|
|
345
|
+
},
|
|
346
|
+
"post_event": {
|
|
347
|
+
"mean": np.mean(post_returns) if post_returns else 0,
|
|
348
|
+
"median": np.median(post_returns) if post_returns else 0,
|
|
349
|
+
"win_rate": np.mean([r > 0 for r in post_returns]) if post_returns else 0,
|
|
350
|
+
},
|
|
351
|
+
"events_analyzed": len(pre_returns),
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
# ============================================================================
|
|
356
|
+
# FACTOR ANALYSIS
|
|
357
|
+
# ============================================================================
|
|
358
|
+
|
|
359
|
+
class FactorAnalyzer:
|
|
360
|
+
"""Factor exposure and attribution analysis."""
|
|
361
|
+
|
|
362
|
+
# Standard factor definitions
|
|
363
|
+
FACTORS = {
|
|
364
|
+
"market": "SPY",
|
|
365
|
+
"size": "IWM", # Small cap proxy
|
|
366
|
+
"value": "IVE", # S&P 500 Value
|
|
367
|
+
"momentum": "MTUM", # Momentum factor ETF
|
|
368
|
+
"quality": "QUAL", # Quality factor ETF
|
|
369
|
+
"low_vol": "USMV", # Low volatility ETF
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
@staticmethod
|
|
373
|
+
def calculate_factor_exposures(
|
|
374
|
+
returns: pd.Series,
|
|
375
|
+
factor_returns: Dict[str, pd.Series],
|
|
376
|
+
window: Optional[int] = None,
|
|
377
|
+
) -> Dict[str, float]:
|
|
378
|
+
"""Calculate factor exposures using regression."""
|
|
379
|
+
|
|
380
|
+
# Align all series
|
|
381
|
+
aligned = pd.DataFrame({"asset": returns})
|
|
382
|
+
for name, factor in factor_returns.items():
|
|
383
|
+
aligned[name] = factor
|
|
384
|
+
|
|
385
|
+
aligned = aligned.dropna()
|
|
386
|
+
|
|
387
|
+
if len(aligned) < 30:
|
|
388
|
+
return {}
|
|
389
|
+
|
|
390
|
+
y = aligned["asset"].values
|
|
391
|
+
X = aligned.drop("asset", axis=1).values
|
|
392
|
+
X = np.column_stack([np.ones(len(X)), X]) # Add intercept
|
|
393
|
+
|
|
394
|
+
# OLS regression
|
|
395
|
+
try:
|
|
396
|
+
coeffs, residuals, rank, s = np.linalg.lstsq(X, y, rcond=None)
|
|
397
|
+
except:
|
|
398
|
+
return {}
|
|
399
|
+
|
|
400
|
+
# R-squared
|
|
401
|
+
y_pred = X @ coeffs
|
|
402
|
+
ss_res = np.sum((y - y_pred) ** 2)
|
|
403
|
+
ss_tot = np.sum((y - y.mean()) ** 2)
|
|
404
|
+
r_squared = 1 - (ss_res / ss_tot) if ss_tot > 0 else 0
|
|
405
|
+
|
|
406
|
+
# Residual volatility
|
|
407
|
+
residual_vol = np.std(y - y_pred) * np.sqrt(252)
|
|
408
|
+
|
|
409
|
+
exposures = {
|
|
410
|
+
"alpha": coeffs[0] * 252, # Annualized
|
|
411
|
+
"r_squared": r_squared,
|
|
412
|
+
"residual_vol": residual_vol,
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
factor_names = list(factor_returns.keys())
|
|
416
|
+
for i, name in enumerate(factor_names):
|
|
417
|
+
exposures[f"{name}_beta"] = coeffs[i + 1]
|
|
418
|
+
|
|
419
|
+
return exposures
|
|
420
|
+
|
|
421
|
+
@staticmethod
|
|
422
|
+
def rolling_factor_exposures(
|
|
423
|
+
returns: pd.Series,
|
|
424
|
+
factor_returns: Dict[str, pd.Series],
|
|
425
|
+
window: int = 252,
|
|
426
|
+
) -> pd.DataFrame:
|
|
427
|
+
"""Calculate rolling factor exposures."""
|
|
428
|
+
|
|
429
|
+
results = []
|
|
430
|
+
|
|
431
|
+
aligned = pd.DataFrame({"asset": returns})
|
|
432
|
+
for name, factor in factor_returns.items():
|
|
433
|
+
aligned[name] = factor
|
|
434
|
+
aligned = aligned.dropna()
|
|
435
|
+
|
|
436
|
+
for i in range(window, len(aligned)):
|
|
437
|
+
subset = aligned.iloc[i - window:i]
|
|
438
|
+
exposures = FactorAnalyzer.calculate_factor_exposures(
|
|
439
|
+
subset["asset"],
|
|
440
|
+
{k: subset[k] for k in factor_returns.keys()}
|
|
441
|
+
)
|
|
442
|
+
exposures["date"] = aligned.index[i]
|
|
443
|
+
results.append(exposures)
|
|
444
|
+
|
|
445
|
+
return pd.DataFrame(results).set_index("date") if results else pd.DataFrame()
|
|
446
|
+
|
|
447
|
+
@staticmethod
|
|
448
|
+
def return_attribution(
|
|
449
|
+
returns: pd.Series,
|
|
450
|
+
factor_returns: Dict[str, pd.Series],
|
|
451
|
+
exposures: Dict[str, float],
|
|
452
|
+
) -> Dict[str, float]:
|
|
453
|
+
"""Attribute returns to factors."""
|
|
454
|
+
|
|
455
|
+
attribution = {"total_return": returns.sum()}
|
|
456
|
+
|
|
457
|
+
for factor_name, factor_ret in factor_returns.items():
|
|
458
|
+
beta_key = f"{factor_name}_beta"
|
|
459
|
+
if beta_key in exposures:
|
|
460
|
+
contribution = factor_ret.sum() * exposures[beta_key]
|
|
461
|
+
attribution[f"{factor_name}_contribution"] = contribution
|
|
462
|
+
|
|
463
|
+
# Residual (alpha) contribution
|
|
464
|
+
factor_contributions = sum(
|
|
465
|
+
v for k, v in attribution.items() if k.endswith("_contribution")
|
|
466
|
+
)
|
|
467
|
+
attribution["alpha_contribution"] = attribution["total_return"] - factor_contributions
|
|
468
|
+
|
|
469
|
+
return attribution
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
# ============================================================================
|
|
473
|
+
# CORRELATION ANALYSIS
|
|
474
|
+
# ============================================================================
|
|
475
|
+
|
|
476
|
+
class CorrelationAnalyzer:
|
|
477
|
+
"""Advanced correlation analysis."""
|
|
478
|
+
|
|
479
|
+
@staticmethod
|
|
480
|
+
def correlation_matrix(returns: pd.DataFrame) -> pd.DataFrame:
|
|
481
|
+
"""Calculate correlation matrix."""
|
|
482
|
+
return returns.corr()
|
|
483
|
+
|
|
484
|
+
@staticmethod
|
|
485
|
+
def rolling_correlation(
|
|
486
|
+
returns1: pd.Series,
|
|
487
|
+
returns2: pd.Series,
|
|
488
|
+
window: int = 63,
|
|
489
|
+
) -> pd.Series:
|
|
490
|
+
"""Calculate rolling correlation between two series."""
|
|
491
|
+
return returns1.rolling(window).corr(returns2)
|
|
492
|
+
|
|
493
|
+
@staticmethod
|
|
494
|
+
def clustered_correlation(
|
|
495
|
+
returns: pd.DataFrame,
|
|
496
|
+
n_clusters: int = 3,
|
|
497
|
+
) -> Tuple[pd.DataFrame, List[List[str]]]:
|
|
498
|
+
"""Cluster assets by correlation."""
|
|
499
|
+
|
|
500
|
+
corr = returns.corr()
|
|
501
|
+
|
|
502
|
+
# Convert correlation to distance
|
|
503
|
+
distance = 1 - corr.abs()
|
|
504
|
+
|
|
505
|
+
# Hierarchical clustering
|
|
506
|
+
linkage_matrix = linkage(squareform(distance), method='ward')
|
|
507
|
+
clusters = fcluster(linkage_matrix, n_clusters, criterion='maxclust')
|
|
508
|
+
|
|
509
|
+
# Group assets by cluster
|
|
510
|
+
cluster_groups = [[] for _ in range(n_clusters)]
|
|
511
|
+
for asset, cluster in zip(corr.columns, clusters):
|
|
512
|
+
cluster_groups[cluster - 1].append(asset)
|
|
513
|
+
|
|
514
|
+
# Reorder correlation matrix by cluster
|
|
515
|
+
ordered_assets = [asset for group in cluster_groups for asset in group]
|
|
516
|
+
ordered_corr = corr.loc[ordered_assets, ordered_assets]
|
|
517
|
+
|
|
518
|
+
return ordered_corr, cluster_groups
|
|
519
|
+
|
|
520
|
+
@staticmethod
|
|
521
|
+
def correlation_breakdown_by_regime(
|
|
522
|
+
returns: pd.DataFrame,
|
|
523
|
+
regimes: pd.Series,
|
|
524
|
+
) -> Dict[str, pd.DataFrame]:
|
|
525
|
+
"""Calculate correlation matrices by regime."""
|
|
526
|
+
|
|
527
|
+
results = {}
|
|
528
|
+
|
|
529
|
+
for regime in regimes.unique():
|
|
530
|
+
mask = regimes == regime
|
|
531
|
+
regime_returns = returns.loc[mask]
|
|
532
|
+
|
|
533
|
+
if len(regime_returns) > 30:
|
|
534
|
+
results[regime] = regime_returns.corr()
|
|
535
|
+
|
|
536
|
+
return results
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
# ============================================================================
|
|
540
|
+
# MONTE CARLO SIMULATION
|
|
541
|
+
# ============================================================================
|
|
542
|
+
|
|
543
|
+
class MonteCarloSimulator:
|
|
544
|
+
"""Monte Carlo portfolio simulations."""
|
|
545
|
+
|
|
546
|
+
@staticmethod
|
|
547
|
+
def bootstrap_returns(
|
|
548
|
+
returns: pd.Series,
|
|
549
|
+
n_simulations: int = 1000,
|
|
550
|
+
horizon_days: int = 252,
|
|
551
|
+
block_size: int = 21, # Block bootstrap for autocorrelation
|
|
552
|
+
) -> np.ndarray:
|
|
553
|
+
"""Generate bootstrapped return paths."""
|
|
554
|
+
|
|
555
|
+
n_blocks = horizon_days // block_size + 1
|
|
556
|
+
simulated_paths = np.zeros((n_simulations, horizon_days))
|
|
557
|
+
|
|
558
|
+
returns_arr = returns.values
|
|
559
|
+
n_obs = len(returns_arr)
|
|
560
|
+
|
|
561
|
+
for sim in range(n_simulations):
|
|
562
|
+
path = []
|
|
563
|
+
for _ in range(n_blocks):
|
|
564
|
+
start_idx = np.random.randint(0, n_obs - block_size)
|
|
565
|
+
block = returns_arr[start_idx:start_idx + block_size]
|
|
566
|
+
path.extend(block)
|
|
567
|
+
|
|
568
|
+
simulated_paths[sim] = path[:horizon_days]
|
|
569
|
+
|
|
570
|
+
return simulated_paths
|
|
571
|
+
|
|
572
|
+
@staticmethod
|
|
573
|
+
def simulate_portfolio_value(
|
|
574
|
+
returns_paths: np.ndarray,
|
|
575
|
+
initial_value: float = 100000,
|
|
576
|
+
) -> Dict[str, Any]:
|
|
577
|
+
"""Simulate portfolio values and calculate statistics."""
|
|
578
|
+
|
|
579
|
+
cumulative_returns = (1 + returns_paths).cumprod(axis=1)
|
|
580
|
+
portfolio_values = initial_value * cumulative_returns
|
|
581
|
+
|
|
582
|
+
final_values = portfolio_values[:, -1]
|
|
583
|
+
|
|
584
|
+
return {
|
|
585
|
+
"mean_final_value": final_values.mean(),
|
|
586
|
+
"median_final_value": np.median(final_values),
|
|
587
|
+
"std_final_value": final_values.std(),
|
|
588
|
+
"percentiles": {
|
|
589
|
+
"5th": np.percentile(final_values, 5),
|
|
590
|
+
"25th": np.percentile(final_values, 25),
|
|
591
|
+
"50th": np.percentile(final_values, 50),
|
|
592
|
+
"75th": np.percentile(final_values, 75),
|
|
593
|
+
"95th": np.percentile(final_values, 95),
|
|
594
|
+
},
|
|
595
|
+
"prob_loss": (final_values < initial_value).mean(),
|
|
596
|
+
"prob_double": (final_values > 2 * initial_value).mean(),
|
|
597
|
+
"worst_case": final_values.min(),
|
|
598
|
+
"best_case": final_values.max(),
|
|
599
|
+
"paths": portfolio_values,
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
@staticmethod
|
|
603
|
+
def scenario_analysis(
|
|
604
|
+
portfolio_returns: pd.Series,
|
|
605
|
+
scenarios: Dict[str, float],
|
|
606
|
+
) -> Dict[str, Dict[str, float]]:
|
|
607
|
+
"""Analyze portfolio under different scenarios."""
|
|
608
|
+
|
|
609
|
+
current_value = 100000
|
|
610
|
+
results = {}
|
|
611
|
+
|
|
612
|
+
for scenario_name, shock in scenarios.items():
|
|
613
|
+
# Simple scenario: apply shock to returns
|
|
614
|
+
shocked_return = shock
|
|
615
|
+
new_value = current_value * (1 + shocked_return)
|
|
616
|
+
|
|
617
|
+
results[scenario_name] = {
|
|
618
|
+
"shock": shock,
|
|
619
|
+
"new_value": new_value,
|
|
620
|
+
"pnl": new_value - current_value,
|
|
621
|
+
"pnl_pct": shocked_return,
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
return results
|
|
625
|
+
|
|
626
|
+
# Common scenarios
|
|
627
|
+
SCENARIOS = {
|
|
628
|
+
"rates_up_100bp": -0.05, # Simplified impact
|
|
629
|
+
"rates_down_100bp": 0.05,
|
|
630
|
+
"market_crash_10pct": -0.10,
|
|
631
|
+
"market_rally_10pct": 0.10,
|
|
632
|
+
"vol_spike_50pct": -0.03,
|
|
633
|
+
"usd_strengthen_5pct": -0.02,
|
|
634
|
+
"oil_up_20pct": 0.01,
|
|
635
|
+
"recession": -0.15,
|
|
636
|
+
}
|