PyPI - pyfolioanalytics - Versions diffs - 0.1.0__tar.gz - Mend

pyfolioanalytics 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

pyfolioanalytics-0.1.0/PKG-INFO +61 -0
pyfolioanalytics-0.1.0/README.md +45 -0
pyfolioanalytics-0.1.0/pyproject.toml +45 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/__init__.py +0 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/backtest.py +117 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/black_litterman.py +41 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/cla.py +302 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/factors.py +67 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/meucci.py +78 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/ml.py +165 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/moments.py +89 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/optimize.py +351 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/portfolio.py +228 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/py.typed +0 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/random_portfolios.py +86 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/risk.py +178 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/rmt.py +203 -0
pyfolioanalytics-0.1.0/src/pyfolioanalytics/solvers.py +608 -0

pyfolioanalytics-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,61 @@
+Metadata-Version: 2.3
+Name: pyfolioanalytics
+Version: 0.1.0
+Summary: Add your description here
+Author: curry tang
+Author-email: curry tang <twn39@163.com>
+Requires-Dist: cvxopt>=1.3.3
+Requires-Dist: cvxpy>=1.8.1
+Requires-Dist: numpy>=2.4.2
+Requires-Dist: pandas>=3.0.1
+Requires-Dist: pyscipopt>=6.1.0
+Requires-Dist: scikit-learn>=1.8.0
+Requires-Dist: scipy>=1.17.1
+Requires-Python: >=3.12
+Description-Content-Type: text/markdown
+# PyFolioAnalytics
+Python implementation of the R package [PortfolioAnalytics](https://github.com/braverock/PortfolioAnalytics).
+## Features
+- [x] **Portfolio Specification**: Support for Box, Group, Turnover, Transaction Costs, and Position Limit constraints.
+- [x] **Optimization Engines**:
+    - **CVXPY**: Linear, Quadratic (MVO), and Mixed-Integer programming.
+    - **SciPy (SLSQP)**: Non-linear optimization for Equal Risk Contribution (ERC).
+    - **Differential Evolution**: Global heuristic search for non-convex problems.
+- [x] **Risk Modeling**:
+    - Gaussian and Modified (Cornish-Fisher) VaR and ES.
+    - Path-dependent measures: MaxDrawdown and AverageDrawdown.
+- [x] **Statistical Models**:
+    - Black-Litterman posterior estimation.
+    - Statistical Factor Models (PCA).
+    - Meucci Entropy Pooling for view integration.
+- [x] **Backtesting**: Rolling-window and expanding-window rebalancing with flexible frequencies.
+- [x] **Hierarchical Structures**: Support for Regime Switching and Multi-layer portfolio architectures.
+## Installation
+```bash
+uv sync
+```
+## Testing & Validation
+This library has been rigorously cross-validated against the original R `PortfolioAnalytics` and `PerformanceAnalytics` libraries using:
+1.  **EDHEC Dataset**: Benchmark hedge fund index data.
+2.  **Real Stock Data**: AAPL, MSFT, GOOGL, AMZN, META (2020-2026).
+3.  **Macro Asset Data**: SPY, QQQ, GLD, TLT, BRK.B (2020-2026).
+To run the parity tests:
+```bash
+uv run pytest
+```
+## Structure
+- `src/pyfolioanalytics/`: Core package source.
+- `data/`: Sample datasets (EDHEC, Real Stock returns).
+- `tests/`: Comprehensive test suite including multi-dataset cross-validation.
+- `third_party/PortfolioAnalytics/`: Original R source for reference.

pyfolioanalytics-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,45 @@
+# PyFolioAnalytics
+Python implementation of the R package [PortfolioAnalytics](https://github.com/braverock/PortfolioAnalytics).
+## Features
+- [x] **Portfolio Specification**: Support for Box, Group, Turnover, Transaction Costs, and Position Limit constraints.
+- [x] **Optimization Engines**:
+    - **CVXPY**: Linear, Quadratic (MVO), and Mixed-Integer programming.
+    - **SciPy (SLSQP)**: Non-linear optimization for Equal Risk Contribution (ERC).
+    - **Differential Evolution**: Global heuristic search for non-convex problems.
+- [x] **Risk Modeling**:
+    - Gaussian and Modified (Cornish-Fisher) VaR and ES.
+    - Path-dependent measures: MaxDrawdown and AverageDrawdown.
+- [x] **Statistical Models**:
+    - Black-Litterman posterior estimation.
+    - Statistical Factor Models (PCA).
+    - Meucci Entropy Pooling for view integration.
+- [x] **Backtesting**: Rolling-window and expanding-window rebalancing with flexible frequencies.
+- [x] **Hierarchical Structures**: Support for Regime Switching and Multi-layer portfolio architectures.
+## Installation
+```bash
+uv sync
+```
+## Testing & Validation
+This library has been rigorously cross-validated against the original R `PortfolioAnalytics` and `PerformanceAnalytics` libraries using:
+1.  **EDHEC Dataset**: Benchmark hedge fund index data.
+2.  **Real Stock Data**: AAPL, MSFT, GOOGL, AMZN, META (2020-2026).
+3.  **Macro Asset Data**: SPY, QQQ, GLD, TLT, BRK.B (2020-2026).
+To run the parity tests:
+```bash
+uv run pytest
+```
+## Structure
+- `src/pyfolioanalytics/`: Core package source.
+- `data/`: Sample datasets (EDHEC, Real Stock returns).
+- `tests/`: Comprehensive test suite including multi-dataset cross-validation.
+- `third_party/PortfolioAnalytics/`: Original R source for reference.

pyfolioanalytics-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,45 @@
+[project]
+name = "pyfolioanalytics"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+authors = [
+    { name = "curry tang", email = "twn39@163.com" }
+]
+requires-python = ">=3.12"
+dependencies = [
+    "cvxopt>=1.3.3",
+    "cvxpy>=1.8.1",
+    "numpy>=2.4.2",
+    "pandas>=3.0.1",
+    "pyscipopt>=6.1.0",
+    "scikit-learn>=1.8.0",
+    "scipy>=1.17.1",
+]
+[build-system]
+requires = ["uv_build>=0.9.26,<0.10.0"]
+build-backend = "uv_build"
+[tool.uv.build-backend]
+source-exclude = [
+    "third_party/**",
+    "GEMINI.md",
+    "data/**",
+    "scripts/**"
+]
+[dependency-groups]
+dev = [
+    "pyportfolioopt>=1.6.0",
+    "pytest>=9.0.2",
+    "ruff>=0.15.4",
+    "ty>=0.0.20",
+    "yfinance>=1.2.0",
+    "riskfolio-lib>=7.2.1",
+]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+norecursedirs = ["third_party", ".*", "cvxopt", "cvxpy", "numpy", "pandas", "pyscipopt", "riskfolio-lib", "scikit-learn", "scipy", "yfinance"]
+addopts = "-v"

pyfolioanalytics-0.1.0/src/pyfolioanalytics/__init__.py ADDED Viewed

File without changes

pyfolioanalytics-0.1.0/src/pyfolioanalytics/backtest.py ADDED Viewed

@@ -0,0 +1,117 @@
+import pandas as pd
+from typing import Dict, Any, List, Union
+from .portfolio import Portfolio, RegimePortfolio
+from .optimize import optimize_portfolio
+class BacktestResult:
+    def __init__(
+        self,
+        weights: pd.DataFrame,
+        returns: pd.Series,
+        opt_results: List[Dict[str, Any]],
+    ):
+        self.weights = weights
+        self.returns = returns
+        self.portfolio_returns = returns  # Alias for backward compatibility
+        self.opt_results = opt_results
+def backtest_portfolio(
+    R: pd.DataFrame,
+    portfolio: Union[Portfolio, RegimePortfolio],
+    rebalance_periods: str = "ME",
+    optimize_method: str = "ROI",
+    **kwargs,
+) -> BacktestResult:
+    """
+    Simple walk-forward backtest with rebalancing.
+    """
+    # Handle rebalance_on from PortfolioAnalytics style
+    rebalance_on = kwargs.get("rebalance_on")
+    if rebalance_on:
+        mapping = {
+            "months": "ME",
+            "quarters": "QE",
+            "years": "YE",
+            "weeks": "W",
+            "days": "D",
+        }
+        rebalance_periods = mapping.get(rebalance_on, rebalance_periods)
+    # Ensure R index is datetime
+    if not isinstance(R.index, pd.DatetimeIndex):
+        R.index = pd.to_datetime(R.index)
+    # Identify rebalancing dates
+    rebal_dates = pd.date_range(
+        start=R.index[0], end=R.index[-1], freq=rebalance_periods
+    )
+    if rebal_dates[0] > R.index[0]:
+        rebal_dates = rebal_dates.insert(0, R.index[0])
+    rolling_window = kwargs.get("rolling_window")
+    regimes = kwargs.get("regimes")
+    all_weights = []
+    all_opt_results = []
+    current_weights = pd.Series(1.0 / len(R.columns), index=R.columns)
+    for i in range(len(rebal_dates) - 1):
+        start_date = rebal_dates[i]
+        end_date = rebal_dates[i + 1]
+        # Data for optimization
+        if rolling_window:
+            # Find integer index of start_date
+            loc = R.index.get_indexer([start_date], method="pad")[0]
+            start_idx = max(0, loc - rolling_window)
+            R_train = R.iloc[start_idx:loc]
+        else:
+            R_train = R[:start_date]
+        if len(R_train) >= 2:
+            active_portfolio = portfolio
+            if isinstance(portfolio, RegimePortfolio):
+                if regimes is not None:
+                    # Use the regime of the current rebalance date
+                    current_regime = regimes.asof(start_date)
+                    active_portfolio = portfolio.get_portfolio(current_regime)
+                else:
+                    active_portfolio = portfolio.get_portfolio("default")
+            res = optimize_portfolio(
+                R_train, active_portfolio, optimize_method=optimize_method, **kwargs
+            )
+            if res["weights"] is not None:
+                current_weights = res["weights"]
+                opt_info = {
+                    "date": start_date,
+                    "weights": current_weights,
+                    "portfolio": active_portfolio,
+                    "status": res["status"],
+                }
+                # Ensure moments and other metadata are passed through if present
+                if "moments" in res:
+                    opt_info["moments"] = res["moments"]
+                all_opt_results.append(opt_info)
+        # Apply weights to the period
+        R_period = R[start_date:end_date]
+        if not R_period.empty:
+            weights_df = pd.DataFrame(
+                [current_weights] * len(R_period), index=R_period.index
+            )
+            all_weights.append(weights_df)
+    if not all_weights:
+        return BacktestResult(pd.DataFrame(), pd.Series(), [])
+    full_weights = pd.concat(all_weights)
+    port_returns = (full_weights * R.loc[full_weights.index]).sum(axis=1)
+    return BacktestResult(full_weights, port_returns, all_opt_results)
+# Alias for backward compatibility
+optimize_portfolio_rebalancing = backtest_portfolio

pyfolioanalytics-0.1.0/src/pyfolioanalytics/black_litterman.py ADDED Viewed

@@ -0,0 +1,41 @@
+import numpy as np
+from typing import Dict, Any, Optional
+def black_litterman(
+    sigma: np.ndarray,
+    w_mkt: np.ndarray,
+    P: np.ndarray,
+    q: np.ndarray,
+    tau: float = 0.05,
+    risk_aversion: float = 2.5,
+    Omega: Optional[np.ndarray] = None,
+) -> Dict[str, Any]:
+    """
+    Standard Black-Litterman Model.
+    - sigma: Covariance matrix (N x N)
+    - w_mkt: Market weights (N x 1)
+    - P: View matrix (K x N)
+    - q: View returns (K x 1)
+    - tau: Scalar indicating confidence in prior (default 0.05)
+    - risk_aversion: Lambda (default 2.5)
+    - Omega: View uncertainty matrix (K x K). If None, calculated via He-Litterman.
+    """
+    # 1. Implied Equilibrium Returns
+    Pi = risk_aversion * sigma @ w_mkt
+    # 2. View Uncertainty (Omega)
+    if Omega is None:
+        # He-Litterman method: Omega = diag(P * (tau * sigma) * P')
+        Omega = np.diag(np.diag(P @ (tau * sigma) @ P.T))
+    # 3. Posterior Mean
+    # mu_bl = Pi + tau*sigma*P' * (P*tau*sigma*P' + Omega)^-1 * (q - P*Pi)
+    M_inv = np.linalg.inv(P @ (tau * sigma) @ P.T + Omega)
+    mu_bl = Pi + (tau * sigma @ P.T) @ M_inv @ (q - P @ Pi)
+    # 4. Posterior Covariance
+    # sigma_bl = (1+tau)*sigma - tau^2 * sigma * P' * (P*tau*sigma*P' + Omega)^-1 * P * sigma
+    sigma_bl = (1 + tau) * sigma - (tau**2 * sigma @ P.T) @ M_inv @ (P @ sigma)
+    return {"mu": mu_bl, "sigma": sigma_bl}

pyfolioanalytics-0.1.0/src/pyfolioanalytics/cla.py ADDED Viewed

@@ -0,0 +1,302 @@
+import numpy as np
+from typing import List, Tuple, Dict, Any, Optional
+class CLA:
+    """
+    Critical Line Algorithm (CLA) for Mean-Variance Optimization.
+    Based on the implementation by Marcos Lopez de Prado.
+    """
+    def __init__(
+        self,
+        expected_returns: np.ndarray,
+        cov_matrix: np.ndarray,
+        lower_bounds: np.ndarray,
+        upper_bounds: np.ndarray,
+    ):
+        self.mu = expected_returns.reshape(-1, 1)
+        self.sigma = cov_matrix
+        self.lb = lower_bounds.reshape(-1, 1)
+        self.ub = upper_bounds.reshape(-1, 1)
+        self.n = len(self.mu)
+        self.w = []  # solution weights at turning points
+        self.ls = []  # lambdas at turning points
+        self.g = []  # gammas at turning points
+        self.f = []  # free sets at turning points
+    @staticmethod
+    def _infnone(x):
+        return float("-inf") if x is None else x
+    def _init_algo(self) -> Tuple[List[int], np.ndarray]:
+        # Form structured array of (id, mu)
+        idx = np.argsort(self.mu.flatten())
+        # 3) First free weight
+        # Start with all at lower bounds
+        i, w = self.n, np.copy(self.lb)
+        while np.sum(w) < 1.0 and i > 0:
+            i -= 1
+            idx_i = idx[i]
+            w[idx_i] = self.ub[idx_i]
+        # Adjust last modified asset to meet sum(w) = 1
+        if np.sum(w) > 1.0:
+            w[idx[i]] += 1.0 - np.sum(w)
+        return [idx[i]], w
+    def _get_matrices(
+        self, f: List[int], w: np.ndarray
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+        b = list(set(range(self.n)) - set(f))
+        covarF = self.sigma[np.ix_(f, f)]
+        meanF = self.mu[f]
+        covarFB = self.sigma[np.ix_(f, b)]
+        wB = w[b]
+        return covarF, covarFB, meanF, wB
+    def _compute_bi(self, c, bi):
+        if c > 0:
+            return bi[1]
+        if c < 0:
+            return bi[0]
+        return bi[0]
+    def _compute_lambda(
+        self,
+        covarF_inv: np.ndarray,
+        covarFB: np.ndarray,
+        meanF: np.ndarray,
+        wB: np.ndarray,
+        i: int,
+        bi: Any,
+    ) -> Tuple[Optional[float], Optional[float]]:
+        onesF = np.ones((len(meanF), 1))
+        c1 = onesF.T @ covarF_inv @ onesF
+        c2 = covarF_inv @ meanF
+        c3 = onesF.T @ covarF_inv @ meanF
+        c4 = covarF_inv @ onesF
+        c = -c1 * c2[i] + c3 * c4[i]
+        c_val = c.item()
+        if abs(c_val) < 1e-12:
+            return None, None
+        if isinstance(bi, list):
+            bi = self._compute_bi(c_val, bi)
+        if len(wB) == 0:
+            res = (c4[i] - c1 * bi) / c
+        else:
+            onesB = np.ones((len(wB), 1))
+            l1 = onesB.T @ wB
+            l2 = covarF_inv @ covarFB
+            l3 = l2 @ wB
+            l4 = onesF.T @ l3
+            res = ((1 - l1 + l4) * c4[i] - c1 * (bi + l3[i])) / c
+        return float(res.item()), float(bi)
+    def _compute_w(
+        self,
+        covarF_inv: np.ndarray,
+        covarFB: np.ndarray,
+        meanF: np.ndarray,
+        wB: np.ndarray,
+        lam: float,
+    ) -> Tuple[np.ndarray, float]:
+        onesF = np.ones((len(meanF), 1))
+        g1 = onesF.T @ covarF_inv @ meanF
+        g2 = onesF.T @ covarF_inv @ onesF
+        if len(wB) == 0:
+            g = -lam * g1 / g2 + 1 / g2
+            w1 = np.zeros(onesF.shape)
+        else:
+            onesB = np.ones((len(wB), 1))
+            g3 = onesB.T @ wB
+            g4 = covarF_inv @ covarFB
+            w1 = g4 @ wB
+            g5 = onesF.T @ w1
+            g = -lam * g1 / g2 + (1 - g3 + g5) / g2
+        g_val = float(g.item())
+        w2 = covarF_inv @ onesF
+        w3 = covarF_inv @ meanF
+        wF = -w1 + g_val * w2 + lam * w3
+        return wF, g_val
+    def solve(self):
+        f, w = self._init_algo()
+        self.w.append(np.copy(w))
+        self.ls.append(None)
+        self.g.append(None)
+        self.f.append(f[:])
+        while True:
+            # Case A: Bound one free weight
+            l_in = None
+            if len(f) > 1:
+                covarF, covarFB, meanF, wB = self._get_matrices(f, w)
+                covarF_inv = np.linalg.inv(covarF)
+                for j, idx in enumerate(f):
+                    lam, bi = self._compute_lambda(
+                        covarF_inv,
+                        covarFB,
+                        meanF,
+                        wB,
+                        j,
+                        [self.lb[idx].item(), self.ub[idx].item()],
+                    )
+                    if self._infnone(lam) > self._infnone(l_in):
+                        l_in, i_in, bi_in = lam, idx, bi
+            # Case B: Free one bounded weight
+            l_out = None
+            b = list(set(range(self.n)) - set(f))
+            if len(b) > 0:
+                for idx in b:
+                    f_temp = f + [idx]
+                    covarF, covarFB, meanF, wB = self._get_matrices(f_temp, w)
+                    covarF_inv = np.linalg.inv(covarF)
+                    lam, bi = self._compute_lambda(
+                        covarF_inv, covarFB, meanF, wB, len(f_temp) - 1, w[idx].item()
+                    )
+                    if (
+                        self.ls[-1] is None or lam < self.ls[-1]
+                    ) and lam > self._infnone(l_out):
+                        l_out, i_out = lam, idx
+            if self._infnone(l_in) < 0 and self._infnone(l_out) < 0:
+                # Minimum Variance Solution
+                self.ls.append(0.0)
+                covarF, covarFB, meanF, wB = self._get_matrices(f, w)
+                covarF_inv = np.linalg.inv(covarF)
+                wF, g = self._compute_w(
+                    covarF_inv, covarFB, np.zeros(meanF.shape), wB, 0.0
+                )
+            else:
+                if self._infnone(l_in) > self._infnone(l_out):
+                    self.ls.append(l_in)
+                    f.remove(i_in)
+                    w[i_in] = bi_in
+                else:
+                    self.ls.append(l_out)
+                    f.append(i_out)
+                covarF, covarFB, meanF, wB = self._get_matrices(f, w)
+                covarF_inv = np.linalg.inv(covarF)
+                wF, g = self._compute_w(covarF_inv, covarFB, meanF, wB, self.ls[-1])
+            for j, idx in enumerate(f):
+                w[idx] = wF[j]
+            self.w.append(np.copy(w))
+            self.g.append(g)
+            self.f.append(f[:])
+            if self.ls[-1] == 0:
+                break
+        self._purge_num_err(1e-10)
+        self._purge_excess()
+    def _purge_num_err(self, tol: float):
+        i = 0
+        while i < len(self.w):
+            w = self.w[i]
+            if (
+                abs(np.sum(w) - 1.0) > tol
+                or np.any(w < self.lb - tol)
+                or np.any(w > self.ub + tol)
+            ):
+                del self.w[i], self.ls[i], self.g[i], self.f[i]
+            else:
+                i += 1
+    def _purge_excess(self):
+        i = 0
+        while i < len(self.w) - 1:
+            mu = (self.w[i].T @ self.mu).item()
+            j = i + 1
+            removed = False
+            while j < len(self.w):
+                mu_next = (self.w[j].T @ self.mu).item()
+                if mu < mu_next:
+                    del self.w[i], self.ls[i], self.g[i], self.f[i]
+                    removed = True
+                    break
+                j += 1
+            if not removed:
+                i += 1
+    def max_sharpe(self, risk_free_rate: float = 0.0) -> np.ndarray:
+        if not self.w:
+            self.solve()
+        def sr_func(alpha, w0, w1):
+            w = alpha * w0 + (1 - alpha) * w1
+            ret = (w.T @ self.mu).item() - risk_free_rate
+            vol = np.sqrt((w.T @ self.sigma @ w).item())
+            if vol < 1e-12:
+                return 0.0
+            return -(ret / vol)  # Minimize negative SR
+        from scipy.optimize import minimize_scalar
+        best_w = self.w[0]
+        max_sr = -np.inf
+        for i in range(len(self.w) - 1):
+            res = minimize_scalar(
+                sr_func,
+                bounds=(0, 1),
+                args=(self.w[i], self.w[i + 1]),
+                method="bounded",
+            )
+            w_opt = res.x * self.w[i] + (1 - res.x) * self.w[i + 1]
+            sr = -res.fun
+            if sr > max_sr:
+                max_sr = sr
+                best_w = w_opt
+        return best_w.flatten()
+    def min_volatility(self) -> np.ndarray:
+        if not self.w:
+            self.solve()
+        vols = [np.sqrt((w.T @ self.sigma @ w).item()) for w in self.w]
+        return self.w[np.argmin(vols)].flatten()
+    def efficient_frontier(
+        self, points: int = 100
+    ) -> Tuple[np.ndarray, np.ndarray, List[np.ndarray]]:
+        if not self.w:
+            self.solve()
+        mu_list, sigma_list, weights_list = [], [], []
+        n_segments = len(self.w) - 1
+        if n_segments <= 0:
+            w = self.w[0]
+            return (
+                np.array([(w.T @ self.mu).item()]),
+                np.array([np.sqrt((w.T @ self.sigma @ w).item())]),
+                [w.flatten()],
+            )
+        points_per_segment = max(2, points // n_segments)
+        for i in range(n_segments):
+            alphas = np.linspace(0, 1, points_per_segment)
+            if i < n_segments - 1:
+                alphas = alphas[:-1]  # avoid duplicate points
+            for alpha in alphas:
+                w = alpha * self.w[i + 1] + (1 - alpha) * self.w[i]
+                weights_list.append(w.flatten())
+                mu_list.append((w.T @ self.mu).item())
+                sigma_list.append(np.sqrt((w.T @ self.sigma @ w).item()))
+        return np.array(mu_list), np.array(sigma_list), weights_list

pyfolioanalytics-0.1.0/src/pyfolioanalytics/factors.py ADDED Viewed

@@ -0,0 +1,67 @@
+import numpy as np
+import pandas as pd
+from typing import Dict, Any
+def statistical_factor_model(R: pd.DataFrame, k: int = 3) -> Dict[str, Any]:
+    """
+    Extract statistical factors using PCA.
+    Returns:
+    - factors: Factor returns (T x k)
+    - loadings: Factor loadings (N x k)
+    - alpha: Intercepts (N x 1)
+    - residuals: Residual returns (T x N)
+    """
+    T, N = R.shape
+    # Center returns
+    mu = R.mean()
+    R_centered = R - mu
+    # PCA via SVD
+    U, S, Vt = np.linalg.svd(R_centered, full_matrices=False)
+    # Factors (principal components)
+    # R = U S V'
+    # Factors = U S
+    factors_mat = U[:, :k] @ np.diag(S[:k])
+    factors = pd.DataFrame(
+        factors_mat, index=R.index, columns=[f"Factor.{i + 1}" for i in range(k)]
+    )
+    # Loadings (eigenvectors)
+    # Vt is (N x N), top k rows are loadings
+    loadings = Vt[:k, :].T
+    # Alphas and Residuals
+    # R = alpha + Loadings * Factors + Residuals
+    # For statistical factors, alpha is often mean return
+    alpha = mu.values.reshape(-1, 1)
+    # Reconstruction
+    R_hat = factors_mat @ loadings.T
+    residuals = R_centered.values - R_hat
+    return {
+        "factors": factors,
+        "loadings": pd.DataFrame(loadings, index=R.columns, columns=factors.columns),
+        "alpha": pd.Series(alpha.flatten(), index=R.columns),
+        "residuals": pd.DataFrame(residuals, index=R.index, columns=R.columns),
+    }
+def factor_model_covariance(model_results: Dict[str, Any]) -> np.ndarray:
+    """
+    Calculate the factor model covariance matrix.
+    Sigma = Beta * Sigma_f * Beta' + Diag(Sigma_e)
+    """
+    B = model_results["loadings"].values
+    factors = model_results["factors"].values
+    residuals = model_results["residuals"].values
+    # Covariance of factors
+    Sigma_f = np.cov(factors, rowvar=False)
+    # Diagonal matrix of residual variances
+    Sigma_e = np.diag(np.var(residuals, axis=0, ddof=1))
+    return B @ Sigma_f @ B.T + Sigma_e