pyfolioanalytics 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,61 @@
1
+ Metadata-Version: 2.3
2
+ Name: pyfolioanalytics
3
+ Version: 0.1.0
4
+ Summary: Add your description here
5
+ Author: curry tang
6
+ Author-email: curry tang <twn39@163.com>
7
+ Requires-Dist: cvxopt>=1.3.3
8
+ Requires-Dist: cvxpy>=1.8.1
9
+ Requires-Dist: numpy>=2.4.2
10
+ Requires-Dist: pandas>=3.0.1
11
+ Requires-Dist: pyscipopt>=6.1.0
12
+ Requires-Dist: scikit-learn>=1.8.0
13
+ Requires-Dist: scipy>=1.17.1
14
+ Requires-Python: >=3.12
15
+ Description-Content-Type: text/markdown
16
+
17
+ # PyFolioAnalytics
18
+
19
+ Python implementation of the R package [PortfolioAnalytics](https://github.com/braverock/PortfolioAnalytics).
20
+
21
+ ## Features
22
+
23
+ - [x] **Portfolio Specification**: Support for Box, Group, Turnover, Transaction Costs, and Position Limit constraints.
24
+ - [x] **Optimization Engines**:
25
+ - **CVXPY**: Linear, Quadratic (MVO), and Mixed-Integer programming.
26
+ - **SciPy (SLSQP)**: Non-linear optimization for Equal Risk Contribution (ERC).
27
+ - **Differential Evolution**: Global heuristic search for non-convex problems.
28
+ - [x] **Risk Modeling**:
29
+ - Gaussian and Modified (Cornish-Fisher) VaR and ES.
30
+ - Path-dependent measures: MaxDrawdown and AverageDrawdown.
31
+ - [x] **Statistical Models**:
32
+ - Black-Litterman posterior estimation.
33
+ - Statistical Factor Models (PCA).
34
+ - Meucci Entropy Pooling for view integration.
35
+ - [x] **Backtesting**: Rolling-window and expanding-window rebalancing with flexible frequencies.
36
+ - [x] **Hierarchical Structures**: Support for Regime Switching and Multi-layer portfolio architectures.
37
+
38
+ ## Installation
39
+
40
+ ```bash
41
+ uv sync
42
+ ```
43
+
44
+ ## Testing & Validation
45
+
46
+ This library has been rigorously cross-validated against the original R `PortfolioAnalytics` and `PerformanceAnalytics` libraries using:
47
+ 1. **EDHEC Dataset**: Benchmark hedge fund index data.
48
+ 2. **Real Stock Data**: AAPL, MSFT, GOOGL, AMZN, META (2020-2026).
49
+ 3. **Macro Asset Data**: SPY, QQQ, GLD, TLT, BRK.B (2020-2026).
50
+
51
+ To run the parity tests:
52
+ ```bash
53
+ uv run pytest
54
+ ```
55
+
56
+ ## Structure
57
+
58
+ - `src/pyfolioanalytics/`: Core package source.
59
+ - `data/`: Sample datasets (EDHEC, Real Stock returns).
60
+ - `tests/`: Comprehensive test suite including multi-dataset cross-validation.
61
+ - `third_party/PortfolioAnalytics/`: Original R source for reference.
@@ -0,0 +1,45 @@
1
+ # PyFolioAnalytics
2
+
3
+ Python implementation of the R package [PortfolioAnalytics](https://github.com/braverock/PortfolioAnalytics).
4
+
5
+ ## Features
6
+
7
+ - [x] **Portfolio Specification**: Support for Box, Group, Turnover, Transaction Costs, and Position Limit constraints.
8
+ - [x] **Optimization Engines**:
9
+ - **CVXPY**: Linear, Quadratic (MVO), and Mixed-Integer programming.
10
+ - **SciPy (SLSQP)**: Non-linear optimization for Equal Risk Contribution (ERC).
11
+ - **Differential Evolution**: Global heuristic search for non-convex problems.
12
+ - [x] **Risk Modeling**:
13
+ - Gaussian and Modified (Cornish-Fisher) VaR and ES.
14
+ - Path-dependent measures: MaxDrawdown and AverageDrawdown.
15
+ - [x] **Statistical Models**:
16
+ - Black-Litterman posterior estimation.
17
+ - Statistical Factor Models (PCA).
18
+ - Meucci Entropy Pooling for view integration.
19
+ - [x] **Backtesting**: Rolling-window and expanding-window rebalancing with flexible frequencies.
20
+ - [x] **Hierarchical Structures**: Support for Regime Switching and Multi-layer portfolio architectures.
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ uv sync
26
+ ```
27
+
28
+ ## Testing & Validation
29
+
30
+ This library has been rigorously cross-validated against the original R `PortfolioAnalytics` and `PerformanceAnalytics` libraries using:
31
+ 1. **EDHEC Dataset**: Benchmark hedge fund index data.
32
+ 2. **Real Stock Data**: AAPL, MSFT, GOOGL, AMZN, META (2020-2026).
33
+ 3. **Macro Asset Data**: SPY, QQQ, GLD, TLT, BRK.B (2020-2026).
34
+
35
+ To run the parity tests:
36
+ ```bash
37
+ uv run pytest
38
+ ```
39
+
40
+ ## Structure
41
+
42
+ - `src/pyfolioanalytics/`: Core package source.
43
+ - `data/`: Sample datasets (EDHEC, Real Stock returns).
44
+ - `tests/`: Comprehensive test suite including multi-dataset cross-validation.
45
+ - `third_party/PortfolioAnalytics/`: Original R source for reference.
@@ -0,0 +1,45 @@
1
+ [project]
2
+ name = "pyfolioanalytics"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "curry tang", email = "twn39@163.com" }
8
+ ]
9
+ requires-python = ">=3.12"
10
+ dependencies = [
11
+ "cvxopt>=1.3.3",
12
+ "cvxpy>=1.8.1",
13
+ "numpy>=2.4.2",
14
+ "pandas>=3.0.1",
15
+ "pyscipopt>=6.1.0",
16
+ "scikit-learn>=1.8.0",
17
+ "scipy>=1.17.1",
18
+ ]
19
+
20
+ [build-system]
21
+ requires = ["uv_build>=0.9.26,<0.10.0"]
22
+ build-backend = "uv_build"
23
+
24
+ [tool.uv.build-backend]
25
+ source-exclude = [
26
+ "third_party/**",
27
+ "GEMINI.md",
28
+ "data/**",
29
+ "scripts/**"
30
+ ]
31
+
32
+ [dependency-groups]
33
+ dev = [
34
+ "pyportfolioopt>=1.6.0",
35
+ "pytest>=9.0.2",
36
+ "ruff>=0.15.4",
37
+ "ty>=0.0.20",
38
+ "yfinance>=1.2.0",
39
+ "riskfolio-lib>=7.2.1",
40
+ ]
41
+
42
+ [tool.pytest.ini_options]
43
+ testpaths = ["tests"]
44
+ norecursedirs = ["third_party", ".*", "cvxopt", "cvxpy", "numpy", "pandas", "pyscipopt", "riskfolio-lib", "scikit-learn", "scipy", "yfinance"]
45
+ addopts = "-v"
@@ -0,0 +1,117 @@
1
+ import pandas as pd
2
+ from typing import Dict, Any, List, Union
3
+ from .portfolio import Portfolio, RegimePortfolio
4
+ from .optimize import optimize_portfolio
5
+
6
+
7
+ class BacktestResult:
8
+ def __init__(
9
+ self,
10
+ weights: pd.DataFrame,
11
+ returns: pd.Series,
12
+ opt_results: List[Dict[str, Any]],
13
+ ):
14
+ self.weights = weights
15
+ self.returns = returns
16
+ self.portfolio_returns = returns # Alias for backward compatibility
17
+ self.opt_results = opt_results
18
+
19
+
20
+ def backtest_portfolio(
21
+ R: pd.DataFrame,
22
+ portfolio: Union[Portfolio, RegimePortfolio],
23
+ rebalance_periods: str = "ME",
24
+ optimize_method: str = "ROI",
25
+ **kwargs,
26
+ ) -> BacktestResult:
27
+ """
28
+ Simple walk-forward backtest with rebalancing.
29
+ """
30
+ # Handle rebalance_on from PortfolioAnalytics style
31
+ rebalance_on = kwargs.get("rebalance_on")
32
+ if rebalance_on:
33
+ mapping = {
34
+ "months": "ME",
35
+ "quarters": "QE",
36
+ "years": "YE",
37
+ "weeks": "W",
38
+ "days": "D",
39
+ }
40
+ rebalance_periods = mapping.get(rebalance_on, rebalance_periods)
41
+
42
+ # Ensure R index is datetime
43
+ if not isinstance(R.index, pd.DatetimeIndex):
44
+ R.index = pd.to_datetime(R.index)
45
+
46
+ # Identify rebalancing dates
47
+ rebal_dates = pd.date_range(
48
+ start=R.index[0], end=R.index[-1], freq=rebalance_periods
49
+ )
50
+ if rebal_dates[0] > R.index[0]:
51
+ rebal_dates = rebal_dates.insert(0, R.index[0])
52
+
53
+ rolling_window = kwargs.get("rolling_window")
54
+ regimes = kwargs.get("regimes")
55
+
56
+ all_weights = []
57
+ all_opt_results = []
58
+ current_weights = pd.Series(1.0 / len(R.columns), index=R.columns)
59
+
60
+ for i in range(len(rebal_dates) - 1):
61
+ start_date = rebal_dates[i]
62
+ end_date = rebal_dates[i + 1]
63
+
64
+ # Data for optimization
65
+ if rolling_window:
66
+ # Find integer index of start_date
67
+ loc = R.index.get_indexer([start_date], method="pad")[0]
68
+ start_idx = max(0, loc - rolling_window)
69
+ R_train = R.iloc[start_idx:loc]
70
+ else:
71
+ R_train = R[:start_date]
72
+
73
+ if len(R_train) >= 2:
74
+ active_portfolio = portfolio
75
+ if isinstance(portfolio, RegimePortfolio):
76
+ if regimes is not None:
77
+ # Use the regime of the current rebalance date
78
+ current_regime = regimes.asof(start_date)
79
+ active_portfolio = portfolio.get_portfolio(current_regime)
80
+ else:
81
+ active_portfolio = portfolio.get_portfolio("default")
82
+
83
+ res = optimize_portfolio(
84
+ R_train, active_portfolio, optimize_method=optimize_method, **kwargs
85
+ )
86
+ if res["weights"] is not None:
87
+ current_weights = res["weights"]
88
+ opt_info = {
89
+ "date": start_date,
90
+ "weights": current_weights,
91
+ "portfolio": active_portfolio,
92
+ "status": res["status"],
93
+ }
94
+ # Ensure moments and other metadata are passed through if present
95
+ if "moments" in res:
96
+ opt_info["moments"] = res["moments"]
97
+ all_opt_results.append(opt_info)
98
+
99
+ # Apply weights to the period
100
+ R_period = R[start_date:end_date]
101
+ if not R_period.empty:
102
+ weights_df = pd.DataFrame(
103
+ [current_weights] * len(R_period), index=R_period.index
104
+ )
105
+ all_weights.append(weights_df)
106
+
107
+ if not all_weights:
108
+ return BacktestResult(pd.DataFrame(), pd.Series(), [])
109
+
110
+ full_weights = pd.concat(all_weights)
111
+ port_returns = (full_weights * R.loc[full_weights.index]).sum(axis=1)
112
+
113
+ return BacktestResult(full_weights, port_returns, all_opt_results)
114
+
115
+
116
+ # Alias for backward compatibility
117
+ optimize_portfolio_rebalancing = backtest_portfolio
@@ -0,0 +1,41 @@
1
+ import numpy as np
2
+ from typing import Dict, Any, Optional
3
+
4
+
5
+ def black_litterman(
6
+ sigma: np.ndarray,
7
+ w_mkt: np.ndarray,
8
+ P: np.ndarray,
9
+ q: np.ndarray,
10
+ tau: float = 0.05,
11
+ risk_aversion: float = 2.5,
12
+ Omega: Optional[np.ndarray] = None,
13
+ ) -> Dict[str, Any]:
14
+ """
15
+ Standard Black-Litterman Model.
16
+ - sigma: Covariance matrix (N x N)
17
+ - w_mkt: Market weights (N x 1)
18
+ - P: View matrix (K x N)
19
+ - q: View returns (K x 1)
20
+ - tau: Scalar indicating confidence in prior (default 0.05)
21
+ - risk_aversion: Lambda (default 2.5)
22
+ - Omega: View uncertainty matrix (K x K). If None, calculated via He-Litterman.
23
+ """
24
+ # 1. Implied Equilibrium Returns
25
+ Pi = risk_aversion * sigma @ w_mkt
26
+
27
+ # 2. View Uncertainty (Omega)
28
+ if Omega is None:
29
+ # He-Litterman method: Omega = diag(P * (tau * sigma) * P')
30
+ Omega = np.diag(np.diag(P @ (tau * sigma) @ P.T))
31
+
32
+ # 3. Posterior Mean
33
+ # mu_bl = Pi + tau*sigma*P' * (P*tau*sigma*P' + Omega)^-1 * (q - P*Pi)
34
+ M_inv = np.linalg.inv(P @ (tau * sigma) @ P.T + Omega)
35
+ mu_bl = Pi + (tau * sigma @ P.T) @ M_inv @ (q - P @ Pi)
36
+
37
+ # 4. Posterior Covariance
38
+ # sigma_bl = (1+tau)*sigma - tau^2 * sigma * P' * (P*tau*sigma*P' + Omega)^-1 * P * sigma
39
+ sigma_bl = (1 + tau) * sigma - (tau**2 * sigma @ P.T) @ M_inv @ (P @ sigma)
40
+
41
+ return {"mu": mu_bl, "sigma": sigma_bl}
@@ -0,0 +1,302 @@
1
+ import numpy as np
2
+ from typing import List, Tuple, Dict, Any, Optional
3
+
4
+
5
+
6
+ class CLA:
7
+ """
8
+ Critical Line Algorithm (CLA) for Mean-Variance Optimization.
9
+ Based on the implementation by Marcos Lopez de Prado.
10
+ """
11
+
12
+ def __init__(
13
+ self,
14
+ expected_returns: np.ndarray,
15
+ cov_matrix: np.ndarray,
16
+ lower_bounds: np.ndarray,
17
+ upper_bounds: np.ndarray,
18
+ ):
19
+ self.mu = expected_returns.reshape(-1, 1)
20
+ self.sigma = cov_matrix
21
+ self.lb = lower_bounds.reshape(-1, 1)
22
+ self.ub = upper_bounds.reshape(-1, 1)
23
+ self.n = len(self.mu)
24
+
25
+ self.w = [] # solution weights at turning points
26
+ self.ls = [] # lambdas at turning points
27
+ self.g = [] # gammas at turning points
28
+ self.f = [] # free sets at turning points
29
+
30
+ @staticmethod
31
+ def _infnone(x):
32
+ return float("-inf") if x is None else x
33
+
34
+ def _init_algo(self) -> Tuple[List[int], np.ndarray]:
35
+ # Form structured array of (id, mu)
36
+ idx = np.argsort(self.mu.flatten())
37
+
38
+ # 3) First free weight
39
+ # Start with all at lower bounds
40
+ i, w = self.n, np.copy(self.lb)
41
+ while np.sum(w) < 1.0 and i > 0:
42
+ i -= 1
43
+ idx_i = idx[i]
44
+ w[idx_i] = self.ub[idx_i]
45
+
46
+ # Adjust last modified asset to meet sum(w) = 1
47
+ if np.sum(w) > 1.0:
48
+ w[idx[i]] += 1.0 - np.sum(w)
49
+
50
+ return [idx[i]], w
51
+
52
+ def _get_matrices(
53
+ self, f: List[int], w: np.ndarray
54
+ ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
55
+ b = list(set(range(self.n)) - set(f))
56
+ covarF = self.sigma[np.ix_(f, f)]
57
+ meanF = self.mu[f]
58
+ covarFB = self.sigma[np.ix_(f, b)]
59
+ wB = w[b]
60
+ return covarF, covarFB, meanF, wB
61
+
62
+ def _compute_bi(self, c, bi):
63
+ if c > 0:
64
+ return bi[1]
65
+ if c < 0:
66
+ return bi[0]
67
+ return bi[0]
68
+
69
+ def _compute_lambda(
70
+ self,
71
+ covarF_inv: np.ndarray,
72
+ covarFB: np.ndarray,
73
+ meanF: np.ndarray,
74
+ wB: np.ndarray,
75
+ i: int,
76
+ bi: Any,
77
+ ) -> Tuple[Optional[float], Optional[float]]:
78
+ onesF = np.ones((len(meanF), 1))
79
+ c1 = onesF.T @ covarF_inv @ onesF
80
+ c2 = covarF_inv @ meanF
81
+ c3 = onesF.T @ covarF_inv @ meanF
82
+ c4 = covarF_inv @ onesF
83
+
84
+ c = -c1 * c2[i] + c3 * c4[i]
85
+ c_val = c.item()
86
+ if abs(c_val) < 1e-12:
87
+ return None, None
88
+
89
+ if isinstance(bi, list):
90
+ bi = self._compute_bi(c_val, bi)
91
+
92
+ if len(wB) == 0:
93
+ res = (c4[i] - c1 * bi) / c
94
+ else:
95
+ onesB = np.ones((len(wB), 1))
96
+ l1 = onesB.T @ wB
97
+ l2 = covarF_inv @ covarFB
98
+ l3 = l2 @ wB
99
+ l4 = onesF.T @ l3
100
+ res = ((1 - l1 + l4) * c4[i] - c1 * (bi + l3[i])) / c
101
+ return float(res.item()), float(bi)
102
+
103
+ def _compute_w(
104
+ self,
105
+ covarF_inv: np.ndarray,
106
+ covarFB: np.ndarray,
107
+ meanF: np.ndarray,
108
+ wB: np.ndarray,
109
+ lam: float,
110
+ ) -> Tuple[np.ndarray, float]:
111
+ onesF = np.ones((len(meanF), 1))
112
+ g1 = onesF.T @ covarF_inv @ meanF
113
+ g2 = onesF.T @ covarF_inv @ onesF
114
+
115
+ if len(wB) == 0:
116
+ g = -lam * g1 / g2 + 1 / g2
117
+ w1 = np.zeros(onesF.shape)
118
+ else:
119
+ onesB = np.ones((len(wB), 1))
120
+ g3 = onesB.T @ wB
121
+ g4 = covarF_inv @ covarFB
122
+ w1 = g4 @ wB
123
+ g5 = onesF.T @ w1
124
+ g = -lam * g1 / g2 + (1 - g3 + g5) / g2
125
+
126
+ g_val = float(g.item())
127
+ w2 = covarF_inv @ onesF
128
+ w3 = covarF_inv @ meanF
129
+ wF = -w1 + g_val * w2 + lam * w3
130
+ return wF, g_val
131
+
132
+ def solve(self):
133
+ f, w = self._init_algo()
134
+ self.w.append(np.copy(w))
135
+ self.ls.append(None)
136
+ self.g.append(None)
137
+ self.f.append(f[:])
138
+
139
+ while True:
140
+ # Case A: Bound one free weight
141
+ l_in = None
142
+ if len(f) > 1:
143
+ covarF, covarFB, meanF, wB = self._get_matrices(f, w)
144
+ covarF_inv = np.linalg.inv(covarF)
145
+ for j, idx in enumerate(f):
146
+ lam, bi = self._compute_lambda(
147
+ covarF_inv,
148
+ covarFB,
149
+ meanF,
150
+ wB,
151
+ j,
152
+ [self.lb[idx].item(), self.ub[idx].item()],
153
+ )
154
+ if self._infnone(lam) > self._infnone(l_in):
155
+ l_in, i_in, bi_in = lam, idx, bi
156
+
157
+ # Case B: Free one bounded weight
158
+ l_out = None
159
+ b = list(set(range(self.n)) - set(f))
160
+ if len(b) > 0:
161
+ for idx in b:
162
+ f_temp = f + [idx]
163
+ covarF, covarFB, meanF, wB = self._get_matrices(f_temp, w)
164
+ covarF_inv = np.linalg.inv(covarF)
165
+ lam, bi = self._compute_lambda(
166
+ covarF_inv, covarFB, meanF, wB, len(f_temp) - 1, w[idx].item()
167
+ )
168
+
169
+ if (
170
+ self.ls[-1] is None or lam < self.ls[-1]
171
+ ) and lam > self._infnone(l_out):
172
+ l_out, i_out = lam, idx
173
+
174
+ if self._infnone(l_in) < 0 and self._infnone(l_out) < 0:
175
+ # Minimum Variance Solution
176
+ self.ls.append(0.0)
177
+ covarF, covarFB, meanF, wB = self._get_matrices(f, w)
178
+ covarF_inv = np.linalg.inv(covarF)
179
+ wF, g = self._compute_w(
180
+ covarF_inv, covarFB, np.zeros(meanF.shape), wB, 0.0
181
+ )
182
+ else:
183
+ if self._infnone(l_in) > self._infnone(l_out):
184
+ self.ls.append(l_in)
185
+ f.remove(i_in)
186
+ w[i_in] = bi_in
187
+ else:
188
+ self.ls.append(l_out)
189
+ f.append(i_out)
190
+ covarF, covarFB, meanF, wB = self._get_matrices(f, w)
191
+ covarF_inv = np.linalg.inv(covarF)
192
+ wF, g = self._compute_w(covarF_inv, covarFB, meanF, wB, self.ls[-1])
193
+
194
+ for j, idx in enumerate(f):
195
+ w[idx] = wF[j]
196
+
197
+ self.w.append(np.copy(w))
198
+ self.g.append(g)
199
+ self.f.append(f[:])
200
+
201
+ if self.ls[-1] == 0:
202
+ break
203
+
204
+ self._purge_num_err(1e-10)
205
+ self._purge_excess()
206
+
207
+ def _purge_num_err(self, tol: float):
208
+ i = 0
209
+ while i < len(self.w):
210
+ w = self.w[i]
211
+ if (
212
+ abs(np.sum(w) - 1.0) > tol
213
+ or np.any(w < self.lb - tol)
214
+ or np.any(w > self.ub + tol)
215
+ ):
216
+ del self.w[i], self.ls[i], self.g[i], self.f[i]
217
+ else:
218
+ i += 1
219
+
220
+ def _purge_excess(self):
221
+ i = 0
222
+ while i < len(self.w) - 1:
223
+ mu = (self.w[i].T @ self.mu).item()
224
+ j = i + 1
225
+ removed = False
226
+ while j < len(self.w):
227
+ mu_next = (self.w[j].T @ self.mu).item()
228
+ if mu < mu_next:
229
+ del self.w[i], self.ls[i], self.g[i], self.f[i]
230
+ removed = True
231
+ break
232
+ j += 1
233
+ if not removed:
234
+ i += 1
235
+
236
+ def max_sharpe(self, risk_free_rate: float = 0.0) -> np.ndarray:
237
+ if not self.w:
238
+ self.solve()
239
+
240
+ def sr_func(alpha, w0, w1):
241
+ w = alpha * w0 + (1 - alpha) * w1
242
+ ret = (w.T @ self.mu).item() - risk_free_rate
243
+ vol = np.sqrt((w.T @ self.sigma @ w).item())
244
+ if vol < 1e-12:
245
+ return 0.0
246
+ return -(ret / vol) # Minimize negative SR
247
+
248
+ from scipy.optimize import minimize_scalar
249
+
250
+ best_w = self.w[0]
251
+ max_sr = -np.inf
252
+
253
+ for i in range(len(self.w) - 1):
254
+ res = minimize_scalar(
255
+ sr_func,
256
+ bounds=(0, 1),
257
+ args=(self.w[i], self.w[i + 1]),
258
+ method="bounded",
259
+ )
260
+ w_opt = res.x * self.w[i] + (1 - res.x) * self.w[i + 1]
261
+ sr = -res.fun
262
+ if sr > max_sr:
263
+ max_sr = sr
264
+ best_w = w_opt
265
+ return best_w.flatten()
266
+
267
+ def min_volatility(self) -> np.ndarray:
268
+ if not self.w:
269
+ self.solve()
270
+ vols = [np.sqrt((w.T @ self.sigma @ w).item()) for w in self.w]
271
+ return self.w[np.argmin(vols)].flatten()
272
+
273
+ def efficient_frontier(
274
+ self, points: int = 100
275
+ ) -> Tuple[np.ndarray, np.ndarray, List[np.ndarray]]:
276
+ if not self.w:
277
+ self.solve()
278
+ mu_list, sigma_list, weights_list = [], [], []
279
+
280
+ n_segments = len(self.w) - 1
281
+ if n_segments <= 0:
282
+ w = self.w[0]
283
+ return (
284
+ np.array([(w.T @ self.mu).item()]),
285
+ np.array([np.sqrt((w.T @ self.sigma @ w).item())]),
286
+ [w.flatten()],
287
+ )
288
+
289
+ points_per_segment = max(2, points // n_segments)
290
+
291
+ for i in range(n_segments):
292
+ alphas = np.linspace(0, 1, points_per_segment)
293
+ if i < n_segments - 1:
294
+ alphas = alphas[:-1] # avoid duplicate points
295
+
296
+ for alpha in alphas:
297
+ w = alpha * self.w[i + 1] + (1 - alpha) * self.w[i]
298
+ weights_list.append(w.flatten())
299
+ mu_list.append((w.T @ self.mu).item())
300
+ sigma_list.append(np.sqrt((w.T @ self.sigma @ w).item()))
301
+
302
+ return np.array(mu_list), np.array(sigma_list), weights_list
@@ -0,0 +1,67 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ from typing import Dict, Any
4
+
5
+
6
+ def statistical_factor_model(R: pd.DataFrame, k: int = 3) -> Dict[str, Any]:
7
+ """
8
+ Extract statistical factors using PCA.
9
+ Returns:
10
+ - factors: Factor returns (T x k)
11
+ - loadings: Factor loadings (N x k)
12
+ - alpha: Intercepts (N x 1)
13
+ - residuals: Residual returns (T x N)
14
+ """
15
+ T, N = R.shape
16
+ # Center returns
17
+ mu = R.mean()
18
+ R_centered = R - mu
19
+
20
+ # PCA via SVD
21
+ U, S, Vt = np.linalg.svd(R_centered, full_matrices=False)
22
+
23
+ # Factors (principal components)
24
+ # R = U S V'
25
+ # Factors = U S
26
+ factors_mat = U[:, :k] @ np.diag(S[:k])
27
+ factors = pd.DataFrame(
28
+ factors_mat, index=R.index, columns=[f"Factor.{i + 1}" for i in range(k)]
29
+ )
30
+
31
+ # Loadings (eigenvectors)
32
+ # Vt is (N x N), top k rows are loadings
33
+ loadings = Vt[:k, :].T
34
+
35
+ # Alphas and Residuals
36
+ # R = alpha + Loadings * Factors + Residuals
37
+ # For statistical factors, alpha is often mean return
38
+ alpha = mu.values.reshape(-1, 1)
39
+
40
+ # Reconstruction
41
+ R_hat = factors_mat @ loadings.T
42
+ residuals = R_centered.values - R_hat
43
+
44
+ return {
45
+ "factors": factors,
46
+ "loadings": pd.DataFrame(loadings, index=R.columns, columns=factors.columns),
47
+ "alpha": pd.Series(alpha.flatten(), index=R.columns),
48
+ "residuals": pd.DataFrame(residuals, index=R.index, columns=R.columns),
49
+ }
50
+
51
+
52
+ def factor_model_covariance(model_results: Dict[str, Any]) -> np.ndarray:
53
+ """
54
+ Calculate the factor model covariance matrix.
55
+ Sigma = Beta * Sigma_f * Beta' + Diag(Sigma_e)
56
+ """
57
+ B = model_results["loadings"].values
58
+ factors = model_results["factors"].values
59
+ residuals = model_results["residuals"].values
60
+
61
+ # Covariance of factors
62
+ Sigma_f = np.cov(factors, rowvar=False)
63
+
64
+ # Diagonal matrix of residual variances
65
+ Sigma_e = np.diag(np.var(residuals, axis=0, ddof=1))
66
+
67
+ return B @ Sigma_f @ B.T + Sigma_e