factorlasso 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- factorlasso-0.1.0/LICENSE +21 -0
- factorlasso-0.1.0/PKG-INFO +228 -0
- factorlasso-0.1.0/README.md +186 -0
- factorlasso-0.1.0/factorlasso/__init__.py +69 -0
- factorlasso-0.1.0/factorlasso/ewm_utils.py +305 -0
- factorlasso-0.1.0/factorlasso/factor_covar.py +378 -0
- factorlasso-0.1.0/factorlasso/lasso_estimator.py +819 -0
- factorlasso-0.1.0/factorlasso.egg-info/PKG-INFO +228 -0
- factorlasso-0.1.0/factorlasso.egg-info/SOURCES.txt +13 -0
- factorlasso-0.1.0/factorlasso.egg-info/dependency_links.txt +1 -0
- factorlasso-0.1.0/factorlasso.egg-info/requires.txt +14 -0
- factorlasso-0.1.0/factorlasso.egg-info/top_level.txt +1 -0
- factorlasso-0.1.0/pyproject.toml +86 -0
- factorlasso-0.1.0/setup.cfg +4 -0
- factorlasso-0.1.0/tests/test_factorlasso.py +586 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Artur Sepp
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: factorlasso
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Sparse factor model estimation with sign-constrained LASSO, prior-centered regularisation, and hierarchical group LASSO (HCGL)
|
|
5
|
+
Author-email: Artur Sepp <artursepp@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ArturSepp/factorlasso
|
|
8
|
+
Project-URL: Documentation, https://factorlasso.readthedocs.io
|
|
9
|
+
Project-URL: Repository, https://github.com/ArturSepp/factorlasso
|
|
10
|
+
Project-URL: Issues, https://github.com/ArturSepp/factorlasso/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/ArturSepp/factorlasso/blob/main/CHANGELOG.md
|
|
12
|
+
Keywords: lasso,group-lasso,factor-model,penalized-regression,sparse-regression,sign-constraints,covariance-estimation,cvxpy,multi-output-regression
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
25
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
26
|
+
Requires-Python: >=3.9
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: numpy>=1.22
|
|
30
|
+
Requires-Dist: pandas>=1.4
|
|
31
|
+
Requires-Dist: scipy>=1.9
|
|
32
|
+
Requires-Dist: cvxpy>=1.3
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
36
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
37
|
+
Provides-Extra: docs
|
|
38
|
+
Requires-Dist: sphinx>=6.0; extra == "docs"
|
|
39
|
+
Requires-Dist: sphinx-rtd-theme>=1.0; extra == "docs"
|
|
40
|
+
Requires-Dist: numpydoc>=1.5; extra == "docs"
|
|
41
|
+
Dynamic: license-file
|
|
42
|
+
|
|
43
|
+
# factorlasso
|
|
44
|
+
|
|
45
|
+
**Sparse factor model estimation with sign-constrained LASSO, prior-centered regularisation, and hierarchical group LASSO (HCGL)**
|
|
46
|
+
|
|
47
|
+
[](https://github.com/ArturSepp/factorlasso/actions)
|
|
48
|
+
[](https://pypi.org/project/factorlasso/)
|
|
49
|
+
[](https://pypi.org/project/factorlasso/)
|
|
50
|
+
[](https://github.com/ArturSepp/factorlasso)
|
|
51
|
+
[](LICENSE)
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Overview
|
|
56
|
+
|
|
57
|
+
`factorlasso` solves the sparse multi-output regression problem
|
|
58
|
+
|
|
59
|
+
$$Y_t = \alpha + \beta X_t + \varepsilon_t$$
|
|
60
|
+
|
|
61
|
+
where $\beta$ is $(N \times M)$, $\alpha$ is $(N \times 1)$ intercept, $Y_t$ is $(N \times 1)$, and $X_t$ is $(M \times 1)$, under:
|
|
62
|
+
|
|
63
|
+
- **Sign constraints** on individual coefficients (non-negative, non-positive, zero, or free)
|
|
64
|
+
- **Prior-centered regularisation** — penalise $\|\beta - \beta_0\|$ instead of $\|\beta\|$, shrinking toward domain-specific priors
|
|
65
|
+
- **Group structure** — Group LASSO with user-defined groups or automatic hierarchical clustering (HCGL)
|
|
66
|
+
- **EWMA-weighted observations** — exponential decay for non-stationary data
|
|
67
|
+
- **NaN-aware estimation** — validity masking handles variables with different observation lengths
|
|
68
|
+
|
|
69
|
+
After estimation, `factorlasso` assembles the consistent factor covariance decomposition
|
|
70
|
+
|
|
71
|
+
$$\Sigma_y = \beta\,\Sigma_x\,\beta^\top + D$$
|
|
72
|
+
|
|
73
|
+
where $\Sigma_x$ is the factor covariance and $D$ is diagonal idiosyncratic variance.
|
|
74
|
+
|
|
75
|
+
**No existing Python package** combines sign-constrained penalised regression with prior-centered shrinkage and integrated factor covariance assembly.
|
|
76
|
+
|
|
77
|
+
## Installation
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
pip install factorlasso
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Quick Start
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
import numpy as np, pandas as pd
|
|
87
|
+
from factorlasso import LassoModel, LassoModelType
|
|
88
|
+
|
|
89
|
+
# Simulate: Y = X @ beta_true.T + noise
|
|
90
|
+
np.random.seed(42)
|
|
91
|
+
T, M, N = 200, 3, 5
|
|
92
|
+
X = pd.DataFrame(np.random.randn(T, M), columns=['f0', 'f1', 'f2'])
|
|
93
|
+
beta_true = np.array([[1, 0, .5], [0, 1, 0], [.3, 0, 0], [0, .8, .2], [1, .5, 0]])
|
|
94
|
+
Y = pd.DataFrame(X.values @ beta_true.T + .1*np.random.randn(T, N),
|
|
95
|
+
columns=[f'y{i}' for i in range(N)])
|
|
96
|
+
|
|
97
|
+
model = LassoModel(model_type=LassoModelType.LASSO, reg_lambda=1e-4)
|
|
98
|
+
model.fit(x=X, y=Y)
|
|
99
|
+
print(model.coef_.round(2)) # β (N × M)
|
|
100
|
+
print(model.intercept_.round(4)) # α (N,)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Predict and Score (scikit-learn compatible)
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
y_hat = model.predict(X) # Ŷ = α + X β'
|
|
107
|
+
r2 = model.score(X, Y) # mean R² across response variables
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Sign Constraints
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
# 1 = non-negative, -1 = non-positive, 0 = zero, NaN = free
|
|
114
|
+
signs = pd.DataFrame([[1, np.nan, 1], [np.nan, 1, 0], [1, 0, np.nan],
|
|
115
|
+
[np.nan, 1, 1], [1, 1, np.nan]],
|
|
116
|
+
index=Y.columns, columns=X.columns)
|
|
117
|
+
|
|
118
|
+
model = LassoModel(reg_lambda=1e-4, factors_beta_loading_signs=signs)
|
|
119
|
+
model.fit(x=X, y=Y)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
### Prior-Centered Regularisation
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
beta_prior = pd.DataFrame(beta_true, index=Y.columns, columns=X.columns)
|
|
126
|
+
model = LassoModel(reg_lambda=1e-2, factors_beta_prior=beta_prior)
|
|
127
|
+
model.fit(x=X, y=Y) # shrinks toward beta_prior instead of zero
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Hierarchical Clustering Group LASSO (HCGL)
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
model = LassoModel(
|
|
134
|
+
model_type=LassoModelType.GROUP_LASSO_CLUSTERS,
|
|
135
|
+
reg_lambda=1e-5, span=52,
|
|
136
|
+
)
|
|
137
|
+
model.fit(x=X, y=Y)
|
|
138
|
+
print(model.clusters) # auto-discovered groups
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Factor Covariance Assembly
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from factorlasso import CurrentFactorCovarData, VarianceColumns
|
|
145
|
+
from factorlasso.ewm_utils import compute_ewm_covar
|
|
146
|
+
|
|
147
|
+
# Assemble Sigma_y = beta @ Sigma_x @ beta.T + D
|
|
148
|
+
sigma_y = CurrentFactorCovarData(
|
|
149
|
+
x_covar=factor_covariance,
|
|
150
|
+
y_betas=model.coef_,
|
|
151
|
+
y_variances=diagnostics_df,
|
|
152
|
+
).get_y_covar()
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## API Summary
|
|
156
|
+
|
|
157
|
+
The API follows scikit-learn conventions: `fit` / `predict` / `score`.
|
|
158
|
+
|
|
159
|
+
| Method | Description |
|
|
160
|
+
|--------|-------------|
|
|
161
|
+
| `model.fit(x, y)` | Estimate α, β — returns `self` |
|
|
162
|
+
| `model.predict(x)` | Return Ŷ = α + X β' |
|
|
163
|
+
| `model.score(x, y)` | Return mean R² |
|
|
164
|
+
|
|
165
|
+
| Fitted attribute | Shape | Description |
|
|
166
|
+
|-----------------|-------|-------------|
|
|
167
|
+
| `coef_` | (N, M) | Factor loadings β |
|
|
168
|
+
| `intercept_` | (N,) | Intercept α |
|
|
169
|
+
| `estimated_betas` | (N, M) | Alias for `coef_` (backward compat) |
|
|
170
|
+
| `clusters_` | (N,) | HCGL cluster labels |
|
|
171
|
+
|
|
172
|
+
## Estimation Methods
|
|
173
|
+
|
|
174
|
+
| Method | `LassoModelType` | Penalty |
|
|
175
|
+
|--------|-------------------|---------|
|
|
176
|
+
| LASSO | `LASSO` | $\lambda\|\beta - \beta_0\|_1$ |
|
|
177
|
+
| Group LASSO | `GROUP_LASSO` | $\sum_g \lambda\sqrt{|g|/G}\|\beta_{g,:} - \beta_{0,g,:}\|_2$ |
|
|
178
|
+
| HCGL | `GROUP_LASSO_CLUSTERS` | Same as Group LASSO with auto-clustering |
|
|
179
|
+
|
|
180
|
+
All methods support sign constraints, prior-centered shrinkage, EWMA weighting, and NaN-aware estimation.
|
|
181
|
+
|
|
182
|
+
## Applications
|
|
183
|
+
|
|
184
|
+
The methodology is domain-agnostic. Examples are provided for:
|
|
185
|
+
|
|
186
|
+
- **Finance** — Multi-asset factor models with sign-constrained betas and consistent covariance estimation ([`examples/finance_factor_model.py`](examples/finance_factor_model.py))
|
|
187
|
+
- **Genomics** — Gene expression driven by pathway activity factors with biological sign priors ([`examples/genomics_factor_model.py`](examples/genomics_factor_model.py))
|
|
188
|
+
|
|
189
|
+
The same estimation problem (sparse factor loadings with sign priors and consistent covariance) appears in macro-econometrics, signal processing, and multi-task learning.
|
|
190
|
+
|
|
191
|
+
## Dependencies
|
|
192
|
+
|
|
193
|
+
Only standard scientific Python:
|
|
194
|
+
|
|
195
|
+
- `numpy ≥ 1.22`
|
|
196
|
+
- `pandas ≥ 1.4`
|
|
197
|
+
- `scipy ≥ 1.9`
|
|
198
|
+
- `cvxpy ≥ 1.3`
|
|
199
|
+
|
|
200
|
+
## Related Packages
|
|
201
|
+
|
|
202
|
+
| Package | Key Difference |
|
|
203
|
+
|---------|----------------|
|
|
204
|
+
| [scikit-learn](https://scikit-learn.org/) `Lasso` | No sign constraints, no multi-output Group LASSO |
|
|
205
|
+
| [skglm](https://contrib.scikit-learn.org/skglm/) | No sign constraints, no prior-centered shrinkage |
|
|
206
|
+
| [abess](https://abess.readthedocs.io/) | Best-subset selection (L0), not L1/Group L2 |
|
|
207
|
+
| [group-lasso](https://pypi.org/project/group-lasso/) | No sign constraints, no EWMA, no prior-centered |
|
|
208
|
+
|
|
209
|
+
`factorlasso` is the only package that combines sign-constrained penalised regression, prior-centered shrinkage, HCGL clustering, and integrated factor covariance assembly.
|
|
210
|
+
|
|
211
|
+
## References
|
|
212
|
+
|
|
213
|
+
Sepp A., Ossa I., Kastenholz M. (2026), "Robust Optimization of Strategic and Tactical Asset Allocation for Multi-Asset Portfolios", *The Journal of Portfolio Management*, 52(4), 86–120. [Paper link](https://eprints.pm-research.com/17511/143431/index.html)
|
|
214
|
+
|
|
215
|
+
## Citation
|
|
216
|
+
|
|
217
|
+
```bibtex
|
|
218
|
+
@software{sepp2026factorlasso,
|
|
219
|
+
author = {Sepp, Artur},
|
|
220
|
+
title = {factorlasso: Sparse Factor Model Estimation with Constrained LASSO in Python},
|
|
221
|
+
year = {2026},
|
|
222
|
+
url = {https://github.com/ArturSepp/factorlasso}
|
|
223
|
+
}
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
## License
|
|
227
|
+
|
|
228
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# factorlasso
|
|
2
|
+
|
|
3
|
+
**Sparse factor model estimation with sign-constrained LASSO, prior-centered regularisation, and hierarchical group LASSO (HCGL)**
|
|
4
|
+
|
|
5
|
+
[](https://github.com/ArturSepp/factorlasso/actions)
|
|
6
|
+
[](https://pypi.org/project/factorlasso/)
|
|
7
|
+
[](https://pypi.org/project/factorlasso/)
|
|
8
|
+
[](https://github.com/ArturSepp/factorlasso)
|
|
9
|
+
[](LICENSE)
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Overview
|
|
14
|
+
|
|
15
|
+
`factorlasso` solves the sparse multi-output regression problem
|
|
16
|
+
|
|
17
|
+
$$Y_t = \alpha + \beta X_t + \varepsilon_t$$
|
|
18
|
+
|
|
19
|
+
where $\beta$ is $(N \times M)$, $\alpha$ is $(N \times 1)$ intercept, $Y_t$ is $(N \times 1)$, and $X_t$ is $(M \times 1)$, under:
|
|
20
|
+
|
|
21
|
+
- **Sign constraints** on individual coefficients (non-negative, non-positive, zero, or free)
|
|
22
|
+
- **Prior-centered regularisation** — penalise $\|\beta - \beta_0\|$ instead of $\|\beta\|$, shrinking toward domain-specific priors
|
|
23
|
+
- **Group structure** — Group LASSO with user-defined groups or automatic hierarchical clustering (HCGL)
|
|
24
|
+
- **EWMA-weighted observations** — exponential decay for non-stationary data
|
|
25
|
+
- **NaN-aware estimation** — validity masking handles variables with different observation lengths
|
|
26
|
+
|
|
27
|
+
After estimation, `factorlasso` assembles the consistent factor covariance decomposition
|
|
28
|
+
|
|
29
|
+
$$\Sigma_y = \beta\,\Sigma_x\,\beta^\top + D$$
|
|
30
|
+
|
|
31
|
+
where $\Sigma_x$ is the factor covariance and $D$ is diagonal idiosyncratic variance.
|
|
32
|
+
|
|
33
|
+
**No existing Python package** combines sign-constrained penalised regression with prior-centered shrinkage and integrated factor covariance assembly.
|
|
34
|
+
|
|
35
|
+
## Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install factorlasso
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Quick Start
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
import numpy as np, pandas as pd
|
|
45
|
+
from factorlasso import LassoModel, LassoModelType
|
|
46
|
+
|
|
47
|
+
# Simulate: Y = X @ beta_true.T + noise
|
|
48
|
+
np.random.seed(42)
|
|
49
|
+
T, M, N = 200, 3, 5
|
|
50
|
+
X = pd.DataFrame(np.random.randn(T, M), columns=['f0', 'f1', 'f2'])
|
|
51
|
+
beta_true = np.array([[1, 0, .5], [0, 1, 0], [.3, 0, 0], [0, .8, .2], [1, .5, 0]])
|
|
52
|
+
Y = pd.DataFrame(X.values @ beta_true.T + .1*np.random.randn(T, N),
|
|
53
|
+
columns=[f'y{i}' for i in range(N)])
|
|
54
|
+
|
|
55
|
+
model = LassoModel(model_type=LassoModelType.LASSO, reg_lambda=1e-4)
|
|
56
|
+
model.fit(x=X, y=Y)
|
|
57
|
+
print(model.coef_.round(2)) # β (N × M)
|
|
58
|
+
print(model.intercept_.round(4)) # α (N,)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Predict and Score (scikit-learn compatible)
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
y_hat = model.predict(X) # Ŷ = α + X β'
|
|
65
|
+
r2 = model.score(X, Y) # mean R² across response variables
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Sign Constraints
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
# 1 = non-negative, -1 = non-positive, 0 = zero, NaN = free
|
|
72
|
+
signs = pd.DataFrame([[1, np.nan, 1], [np.nan, 1, 0], [1, 0, np.nan],
|
|
73
|
+
[np.nan, 1, 1], [1, 1, np.nan]],
|
|
74
|
+
index=Y.columns, columns=X.columns)
|
|
75
|
+
|
|
76
|
+
model = LassoModel(reg_lambda=1e-4, factors_beta_loading_signs=signs)
|
|
77
|
+
model.fit(x=X, y=Y)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Prior-Centered Regularisation
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
beta_prior = pd.DataFrame(beta_true, index=Y.columns, columns=X.columns)
|
|
84
|
+
model = LassoModel(reg_lambda=1e-2, factors_beta_prior=beta_prior)
|
|
85
|
+
model.fit(x=X, y=Y) # shrinks toward beta_prior instead of zero
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Hierarchical Clustering Group LASSO (HCGL)
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
model = LassoModel(
|
|
92
|
+
model_type=LassoModelType.GROUP_LASSO_CLUSTERS,
|
|
93
|
+
reg_lambda=1e-5, span=52,
|
|
94
|
+
)
|
|
95
|
+
model.fit(x=X, y=Y)
|
|
96
|
+
print(model.clusters) # auto-discovered groups
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Factor Covariance Assembly
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from factorlasso import CurrentFactorCovarData, VarianceColumns
|
|
103
|
+
from factorlasso.ewm_utils import compute_ewm_covar
|
|
104
|
+
|
|
105
|
+
# Assemble Sigma_y = beta @ Sigma_x @ beta.T + D
|
|
106
|
+
sigma_y = CurrentFactorCovarData(
|
|
107
|
+
x_covar=factor_covariance,
|
|
108
|
+
y_betas=model.coef_,
|
|
109
|
+
y_variances=diagnostics_df,
|
|
110
|
+
).get_y_covar()
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
## API Summary
|
|
114
|
+
|
|
115
|
+
The API follows scikit-learn conventions: `fit` / `predict` / `score`.
|
|
116
|
+
|
|
117
|
+
| Method | Description |
|
|
118
|
+
|--------|-------------|
|
|
119
|
+
| `model.fit(x, y)` | Estimate α, β — returns `self` |
|
|
120
|
+
| `model.predict(x)` | Return Ŷ = α + X β' |
|
|
121
|
+
| `model.score(x, y)` | Return mean R² |
|
|
122
|
+
|
|
123
|
+
| Fitted attribute | Shape | Description |
|
|
124
|
+
|-----------------|-------|-------------|
|
|
125
|
+
| `coef_` | (N, M) | Factor loadings β |
|
|
126
|
+
| `intercept_` | (N,) | Intercept α |
|
|
127
|
+
| `estimated_betas` | (N, M) | Alias for `coef_` (backward compat) |
|
|
128
|
+
| `clusters_` | (N,) | HCGL cluster labels |
|
|
129
|
+
|
|
130
|
+
## Estimation Methods
|
|
131
|
+
|
|
132
|
+
| Method | `LassoModelType` | Penalty |
|
|
133
|
+
|--------|-------------------|---------|
|
|
134
|
+
| LASSO | `LASSO` | $\lambda\|\beta - \beta_0\|_1$ |
|
|
135
|
+
| Group LASSO | `GROUP_LASSO` | $\sum_g \lambda\sqrt{|g|/G}\|\beta_{g,:} - \beta_{0,g,:}\|_2$ |
|
|
136
|
+
| HCGL | `GROUP_LASSO_CLUSTERS` | Same as Group LASSO with auto-clustering |
|
|
137
|
+
|
|
138
|
+
All methods support sign constraints, prior-centered shrinkage, EWMA weighting, and NaN-aware estimation.
|
|
139
|
+
|
|
140
|
+
## Applications
|
|
141
|
+
|
|
142
|
+
The methodology is domain-agnostic. Examples are provided for:
|
|
143
|
+
|
|
144
|
+
- **Finance** — Multi-asset factor models with sign-constrained betas and consistent covariance estimation ([`examples/finance_factor_model.py`](examples/finance_factor_model.py))
|
|
145
|
+
- **Genomics** — Gene expression driven by pathway activity factors with biological sign priors ([`examples/genomics_factor_model.py`](examples/genomics_factor_model.py))
|
|
146
|
+
|
|
147
|
+
The same estimation problem (sparse factor loadings with sign priors and consistent covariance) appears in macro-econometrics, signal processing, and multi-task learning.
|
|
148
|
+
|
|
149
|
+
## Dependencies
|
|
150
|
+
|
|
151
|
+
Only standard scientific Python:
|
|
152
|
+
|
|
153
|
+
- `numpy ≥ 1.22`
|
|
154
|
+
- `pandas ≥ 1.4`
|
|
155
|
+
- `scipy ≥ 1.9`
|
|
156
|
+
- `cvxpy ≥ 1.3`
|
|
157
|
+
|
|
158
|
+
## Related Packages
|
|
159
|
+
|
|
160
|
+
| Package | Key Difference |
|
|
161
|
+
|---------|----------------|
|
|
162
|
+
| [scikit-learn](https://scikit-learn.org/) `Lasso` | No sign constraints, no multi-output Group LASSO |
|
|
163
|
+
| [skglm](https://contrib.scikit-learn.org/skglm/) | No sign constraints, no prior-centered shrinkage |
|
|
164
|
+
| [abess](https://abess.readthedocs.io/) | Best-subset selection (L0), not L1/Group L2 |
|
|
165
|
+
| [group-lasso](https://pypi.org/project/group-lasso/) | No sign constraints, no EWMA, no prior-centered |
|
|
166
|
+
|
|
167
|
+
`factorlasso` is the only package that combines sign-constrained penalised regression, prior-centered shrinkage, HCGL clustering, and integrated factor covariance assembly.
|
|
168
|
+
|
|
169
|
+
## References
|
|
170
|
+
|
|
171
|
+
Sepp A., Ossa I., Kastenholz M. (2026), "Robust Optimization of Strategic and Tactical Asset Allocation for Multi-Asset Portfolios", *The Journal of Portfolio Management*, 52(4), 86–120. [Paper link](https://eprints.pm-research.com/17511/143431/index.html)
|
|
172
|
+
|
|
173
|
+
## Citation
|
|
174
|
+
|
|
175
|
+
```bibtex
|
|
176
|
+
@software{sepp2026factorlasso,
|
|
177
|
+
author = {Sepp, Artur},
|
|
178
|
+
title = {factorlasso: Sparse Factor Model Estimation with Constrained LASSO in Python},
|
|
179
|
+
year = {2026},
|
|
180
|
+
url = {https://github.com/ArturSepp/factorlasso}
|
|
181
|
+
}
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## License
|
|
185
|
+
|
|
186
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""
|
|
2
|
+
factorlasso — Sparse factor model estimation with constrained LASSO
|
|
3
|
+
===================================================================
|
|
4
|
+
|
|
5
|
+
Estimate sparse multi-output regression coefficients with sign
|
|
6
|
+
constraints, prior-centered regularisation, and hierarchical group
|
|
7
|
+
structure (HCGL), then assemble consistent factor covariance matrices.
|
|
8
|
+
|
|
9
|
+
Quick start
|
|
10
|
+
-----------
|
|
11
|
+
>>> from factorlasso import LassoModel, LassoModelType
|
|
12
|
+
>>> model = LassoModel(model_type=LassoModelType.LASSO, reg_lambda=1e-4)
|
|
13
|
+
>>> model.fit(x=X, y=Y)
|
|
14
|
+
|
|
15
|
+
Full pipeline
|
|
16
|
+
-------------
|
|
17
|
+
>>> from factorlasso import LassoModel, CurrentFactorCovarData, VarianceColumns
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
__version__ = "0.1.0"
|
|
21
|
+
|
|
22
|
+
# --- Core estimator ---
|
|
23
|
+
# --- Utilities ---
|
|
24
|
+
from factorlasso.ewm_utils import (
|
|
25
|
+
compute_ewm,
|
|
26
|
+
compute_ewm_covar,
|
|
27
|
+
compute_ewm_covar_newey_west,
|
|
28
|
+
compute_expanding_power,
|
|
29
|
+
set_group_loadings,
|
|
30
|
+
squeeze_covariance_matrix,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# --- Factor covariance assembly ---
|
|
34
|
+
from factorlasso.factor_covar import (
|
|
35
|
+
CurrentFactorCovarData,
|
|
36
|
+
RollingFactorCovarData,
|
|
37
|
+
VarianceColumns,
|
|
38
|
+
)
|
|
39
|
+
from factorlasso.lasso_estimator import (
|
|
40
|
+
LassoEstimationResult,
|
|
41
|
+
LassoModel,
|
|
42
|
+
LassoModelType,
|
|
43
|
+
compute_clusters_from_corr_matrix,
|
|
44
|
+
get_x_y_np,
|
|
45
|
+
solve_group_lasso_cvx_problem,
|
|
46
|
+
solve_lasso_cvx_problem,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
__all__ = [
|
|
50
|
+
# Estimator
|
|
51
|
+
"LassoModelType",
|
|
52
|
+
"LassoModel",
|
|
53
|
+
"LassoEstimationResult",
|
|
54
|
+
"solve_lasso_cvx_problem",
|
|
55
|
+
"solve_group_lasso_cvx_problem",
|
|
56
|
+
"get_x_y_np",
|
|
57
|
+
"compute_clusters_from_corr_matrix",
|
|
58
|
+
# Factor covariance
|
|
59
|
+
"VarianceColumns",
|
|
60
|
+
"CurrentFactorCovarData",
|
|
61
|
+
"RollingFactorCovarData",
|
|
62
|
+
# Utilities
|
|
63
|
+
"compute_ewm",
|
|
64
|
+
"compute_ewm_covar",
|
|
65
|
+
"compute_ewm_covar_newey_west",
|
|
66
|
+
"compute_expanding_power",
|
|
67
|
+
"set_group_loadings",
|
|
68
|
+
"squeeze_covariance_matrix",
|
|
69
|
+
]
|