skfolio 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/datasets/__init__.py +2 -0
- skfolio/datasets/_base.py +51 -0
- skfolio/distance/_distance.py +15 -4
- skfolio/model_selection/_combinatorial.py +2 -2
- skfolio/model_selection/_validation.py +70 -15
- skfolio/model_selection/_walk_forward.py +3 -3
- skfolio/moments/__init__.py +2 -0
- skfolio/moments/covariance/__init__.py +11 -11
- skfolio/moments/covariance/_base.py +10 -9
- skfolio/moments/covariance/_denoise_covariance.py +181 -0
- skfolio/moments/covariance/_detone_covariance.py +158 -0
- skfolio/moments/covariance/_empirical_covariance.py +100 -0
- skfolio/moments/covariance/_ew_covariance.py +109 -0
- skfolio/moments/covariance/_gerber_covariance.py +157 -0
- skfolio/moments/covariance/_graphical_lasso_cv.py +194 -0
- skfolio/moments/covariance/_implied_covariance.py +454 -0
- skfolio/moments/covariance/_ledoit_wolf.py +140 -0
- skfolio/moments/covariance/_oas.py +115 -0
- skfolio/moments/covariance/_shrunk_covariance.py +104 -0
- skfolio/moments/expected_returns/__init__.py +4 -7
- skfolio/moments/expected_returns/_empirical_mu.py +63 -0
- skfolio/moments/expected_returns/_equilibrium_mu.py +124 -0
- skfolio/moments/expected_returns/_ew_mu.py +69 -0
- skfolio/moments/expected_returns/{_expected_returns.py → _shrunk_mu.py} +22 -200
- skfolio/optimization/cluster/_nco.py +46 -8
- skfolio/optimization/cluster/hierarchical/_base.py +21 -1
- skfolio/optimization/cluster/hierarchical/_herc.py +18 -4
- skfolio/optimization/cluster/hierarchical/_hrp.py +13 -4
- skfolio/optimization/convex/_base.py +10 -1
- skfolio/optimization/convex/_distributionally_robust.py +12 -2
- skfolio/optimization/convex/_maximum_diversification.py +9 -2
- skfolio/optimization/convex/_mean_risk.py +33 -6
- skfolio/optimization/convex/_risk_budgeting.py +5 -2
- skfolio/optimization/ensemble/_stacking.py +32 -9
- skfolio/optimization/naive/_naive.py +20 -2
- skfolio/population/_population.py +2 -0
- skfolio/prior/_base.py +1 -1
- skfolio/prior/_black_litterman.py +20 -2
- skfolio/prior/_empirical.py +38 -5
- skfolio/prior/_factor_model.py +44 -7
- skfolio/uncertainty_set/_base.py +30 -9
- skfolio/uncertainty_set/_bootstrap.py +26 -10
- skfolio/uncertainty_set/_empirical.py +25 -10
- skfolio/utils/stats.py +24 -3
- skfolio/utils/tools.py +213 -79
- {skfolio-0.2.3.dist-info → skfolio-0.3.0.dist-info}/METADATA +3 -2
- skfolio-0.3.0.dist-info/RECORD +91 -0
- {skfolio-0.2.3.dist-info → skfolio-0.3.0.dist-info}/WHEEL +1 -1
- skfolio/moments/covariance/_covariance.py +0 -1114
- skfolio-0.2.3.dist-info/RECORD +0 -79
- {skfolio-0.2.3.dist-info → skfolio-0.3.0.dist-info}/LICENSE +0 -0
- {skfolio-0.2.3.dist-info → skfolio-0.3.0.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,7 @@
|
|
11
11
|
import numpy as np
|
12
12
|
import numpy.typing as npt
|
13
13
|
import scipy.stats as st
|
14
|
+
import sklearn.utils.metadata_routing as skm
|
14
15
|
|
15
16
|
from skfolio.prior import BasePrior, EmpiricalPrior
|
16
17
|
from skfolio.uncertainty_set._base import (
|
@@ -86,8 +87,6 @@ class BootstrapMuUncertaintySet(BaseMuUncertaintySet):
|
|
86
87
|
Patton, Politis & White (2009).
|
87
88
|
"""
|
88
89
|
|
89
|
-
prior_estimator_: BasePrior
|
90
|
-
|
91
90
|
def __init__(
|
92
91
|
self,
|
93
92
|
prior_estimator: BasePrior | None = None,
|
@@ -97,7 +96,7 @@ class BootstrapMuUncertaintySet(BaseMuUncertaintySet):
|
|
97
96
|
block_size: float | None = None,
|
98
97
|
seed: int | None = None,
|
99
98
|
):
|
100
|
-
|
99
|
+
super().__init__(prior_estimator=prior_estimator)
|
101
100
|
self.confidence_level = confidence_level
|
102
101
|
self.diagonal = diagonal
|
103
102
|
self.n_bootstrap_samples = n_bootstrap_samples
|
@@ -105,7 +104,7 @@ class BootstrapMuUncertaintySet(BaseMuUncertaintySet):
|
|
105
104
|
self.seed = seed
|
106
105
|
|
107
106
|
def fit(
|
108
|
-
self, X: npt.ArrayLike, y: npt.ArrayLike | None = None
|
107
|
+
self, X: npt.ArrayLike, y: npt.ArrayLike | None = None, **fit_params
|
109
108
|
) -> "BootstrapMuUncertaintySet":
|
110
109
|
"""Fit the Bootstrap Mu Uncertainty set estimator.
|
111
110
|
|
@@ -118,18 +117,27 @@ class BootstrapMuUncertaintySet(BaseMuUncertaintySet):
|
|
118
117
|
Price returns of factors.
|
119
118
|
The default is `None`.
|
120
119
|
|
120
|
+
**fit_params : dict
|
121
|
+
Parameters to pass to the underlying estimators.
|
122
|
+
Only available if `enable_metadata_routing=True`, which can be
|
123
|
+
set by using ``sklearn.set_config(enable_metadata_routing=True)``.
|
124
|
+
See :ref:`Metadata Routing User Guide <metadata_routing>` for
|
125
|
+
more details.
|
126
|
+
|
121
127
|
Returns
|
122
128
|
-------
|
123
129
|
self : BootstrapMuUncertaintySet
|
124
130
|
Fitted estimator.
|
125
131
|
"""
|
132
|
+
routed_params = skm.process_routing(self, "fit", **fit_params)
|
133
|
+
|
126
134
|
self.prior_estimator_ = check_estimator(
|
127
135
|
self.prior_estimator,
|
128
136
|
default=EmpiricalPrior(),
|
129
137
|
check_type=BasePrior,
|
130
138
|
)
|
131
139
|
# fitting estimators
|
132
|
-
self.prior_estimator_.fit(X, y)
|
140
|
+
self.prior_estimator_.fit(X, y, **routed_params.prior_estimator.fit)
|
133
141
|
mu = self.prior_estimator_.prior_model_.mu
|
134
142
|
returns = self.prior_estimator_.prior_model_.returns
|
135
143
|
n_assets = returns.shape[1]
|
@@ -217,8 +225,6 @@ class BootstrapCovarianceUncertaintySet(BaseCovarianceUncertaintySet):
|
|
217
225
|
Patton, Politis & White (2009).
|
218
226
|
"""
|
219
227
|
|
220
|
-
prior_estimator_: BasePrior
|
221
|
-
|
222
228
|
def __init__(
|
223
229
|
self,
|
224
230
|
prior_estimator: BasePrior | None = None,
|
@@ -228,14 +234,16 @@ class BootstrapCovarianceUncertaintySet(BaseCovarianceUncertaintySet):
|
|
228
234
|
block_size: float | None = None,
|
229
235
|
seed: int | None = None,
|
230
236
|
):
|
231
|
-
|
237
|
+
super().__init__(prior_estimator=prior_estimator)
|
232
238
|
self.confidence_level = confidence_level
|
233
239
|
self.diagonal = diagonal
|
234
240
|
self.n_bootstrap_samples = n_bootstrap_samples
|
235
241
|
self.block_size = block_size
|
236
242
|
self.seed = seed
|
237
243
|
|
238
|
-
def fit(
|
244
|
+
def fit(
|
245
|
+
self, X: npt.ArrayLike, y=None, **fit_params
|
246
|
+
) -> "BootstrapCovarianceUncertaintySet":
|
239
247
|
"""Fit the Bootstrap Covariance Uncertainty set estimator.
|
240
248
|
|
241
249
|
Parameters
|
@@ -247,11 +255,19 @@ class BootstrapCovarianceUncertaintySet(BaseCovarianceUncertaintySet):
|
|
247
255
|
Price returns of factors.
|
248
256
|
The default is `None`.
|
249
257
|
|
258
|
+
**fit_params : dict
|
259
|
+
Parameters to pass to the underlying estimators.
|
260
|
+
Only available if `enable_metadata_routing=True`, which can be
|
261
|
+
set by using ``sklearn.set_config(enable_metadata_routing=True)``.
|
262
|
+
See :ref:`Metadata Routing User Guide <metadata_routing>` for
|
263
|
+
more details.
|
264
|
+
|
250
265
|
Returns
|
251
266
|
-------
|
252
267
|
self : EmpiricalCovarianceUncertaintySet
|
253
268
|
Fitted estimator.
|
254
269
|
"""
|
270
|
+
routed_params = skm.process_routing(self, "fit", **fit_params)
|
255
271
|
|
256
272
|
self.prior_estimator_ = check_estimator(
|
257
273
|
self.prior_estimator,
|
@@ -259,7 +275,7 @@ class BootstrapCovarianceUncertaintySet(BaseCovarianceUncertaintySet):
|
|
259
275
|
check_type=BasePrior,
|
260
276
|
)
|
261
277
|
# fitting estimators
|
262
|
-
self.prior_estimator_.fit(X, y)
|
278
|
+
self.prior_estimator_.fit(X, y, **routed_params.prior_estimator.fit)
|
263
279
|
covariance = self.prior_estimator_.prior_model_.covariance
|
264
280
|
returns = self.prior_estimator_.prior_model_.returns
|
265
281
|
n_assets = returns.shape[1]
|
@@ -11,6 +11,7 @@
|
|
11
11
|
import numpy as np
|
12
12
|
import numpy.typing as npt
|
13
13
|
import scipy.stats as st
|
14
|
+
import sklearn.utils.metadata_routing as skm
|
14
15
|
|
15
16
|
from skfolio.prior import BasePrior, EmpiricalPrior
|
16
17
|
from skfolio.uncertainty_set._base import (
|
@@ -77,20 +78,18 @@ class EmpiricalMuUncertaintySet(BaseMuUncertaintySet):
|
|
77
78
|
Schöttle & Werner (2009).
|
78
79
|
"""
|
79
80
|
|
80
|
-
prior_estimator_: BasePrior
|
81
|
-
|
82
81
|
def __init__(
|
83
82
|
self,
|
84
83
|
prior_estimator: BasePrior | None = None,
|
85
84
|
confidence_level: float = 0.95,
|
86
85
|
diagonal: bool = True,
|
87
86
|
):
|
88
|
-
|
87
|
+
super().__init__(prior_estimator=prior_estimator)
|
89
88
|
self.confidence_level = confidence_level
|
90
89
|
self.diagonal = diagonal
|
91
90
|
|
92
91
|
def fit(
|
93
|
-
self, X: npt.ArrayLike, y: npt.ArrayLike | None = None
|
92
|
+
self, X: npt.ArrayLike, y: npt.ArrayLike | None = None, **fit_params
|
94
93
|
) -> "EmpiricalMuUncertaintySet":
|
95
94
|
"""Fit the Empirical Mu Uncertainty set estimator.
|
96
95
|
|
@@ -103,18 +102,27 @@ class EmpiricalMuUncertaintySet(BaseMuUncertaintySet):
|
|
103
102
|
Price returns of factors.
|
104
103
|
The default is `None`.
|
105
104
|
|
105
|
+
**fit_params : dict
|
106
|
+
Parameters to pass to the underlying estimators.
|
107
|
+
Only available if `enable_metadata_routing=True`, which can be
|
108
|
+
set by using ``sklearn.set_config(enable_metadata_routing=True)``.
|
109
|
+
See :ref:`Metadata Routing User Guide <metadata_routing>` for
|
110
|
+
more details.
|
111
|
+
|
106
112
|
Returns
|
107
113
|
-------
|
108
114
|
self : EmpiricalMuUncertaintySet
|
109
115
|
Fitted estimator.
|
110
116
|
"""
|
117
|
+
routed_params = skm.process_routing(self, "fit", **fit_params)
|
118
|
+
|
111
119
|
self.prior_estimator_ = check_estimator(
|
112
120
|
self.prior_estimator,
|
113
121
|
default=EmpiricalPrior(),
|
114
122
|
check_type=BasePrior,
|
115
123
|
)
|
116
124
|
# fitting estimators
|
117
|
-
self.prior_estimator_.fit(X, y)
|
125
|
+
self.prior_estimator_.fit(X, y, **routed_params.prior_estimator.fit)
|
118
126
|
|
119
127
|
prior_model = self.prior_estimator_.prior_model_
|
120
128
|
n_observations, n_assets = prior_model.returns.shape
|
@@ -185,20 +193,18 @@ class EmpiricalCovarianceUncertaintySet(BaseCovarianceUncertaintySet):
|
|
185
193
|
Schöttle & Werner (2009).
|
186
194
|
"""
|
187
195
|
|
188
|
-
prior_estimator_: BasePrior
|
189
|
-
|
190
196
|
def __init__(
|
191
197
|
self,
|
192
198
|
prior_estimator: BasePrior | None = None,
|
193
199
|
confidence_level: float = 0.95,
|
194
200
|
diagonal: bool = True,
|
195
201
|
):
|
196
|
-
|
202
|
+
super().__init__(prior_estimator=prior_estimator)
|
197
203
|
self.confidence_level = confidence_level
|
198
204
|
self.diagonal = diagonal
|
199
205
|
|
200
206
|
def fit(
|
201
|
-
self, X: npt.ArrayLike, y: npt.ArrayLike | None = None
|
207
|
+
self, X: npt.ArrayLike, y: npt.ArrayLike | None = None, **fit_params
|
202
208
|
) -> "EmpiricalCovarianceUncertaintySet":
|
203
209
|
"""Fit the Empirical Covariance Uncertainty set estimator.
|
204
210
|
|
@@ -211,18 +217,27 @@ class EmpiricalCovarianceUncertaintySet(BaseCovarianceUncertaintySet):
|
|
211
217
|
Price returns of factors.
|
212
218
|
The default is `None`.
|
213
219
|
|
220
|
+
**fit_params : dict
|
221
|
+
Parameters to pass to the underlying estimators.
|
222
|
+
Only available if `enable_metadata_routing=True`, which can be
|
223
|
+
set by using ``sklearn.set_config(enable_metadata_routing=True)``.
|
224
|
+
See :ref:`Metadata Routing User Guide <metadata_routing>` for
|
225
|
+
more details.
|
226
|
+
|
214
227
|
Returns
|
215
228
|
-------
|
216
229
|
self : EmpiricalCovarianceUncertaintySet
|
217
230
|
Fitted estimator.
|
218
231
|
"""
|
232
|
+
routed_params = skm.process_routing(self, "fit", **fit_params)
|
233
|
+
|
219
234
|
self.prior_estimator_ = check_estimator(
|
220
235
|
self.prior_estimator,
|
221
236
|
default=EmpiricalPrior(),
|
222
237
|
check_type=BasePrior,
|
223
238
|
)
|
224
239
|
# fitting estimators
|
225
|
-
self.prior_estimator_.fit(X, y)
|
240
|
+
self.prior_estimator_.fit(X, y, **routed_params.prior_estimator.fit)
|
226
241
|
|
227
242
|
prior_model = self.prior_estimator_.prior_model_
|
228
243
|
n_observations, n_assets = prior_model.returns.shape
|
skfolio/utils/stats.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
"""Tools module"""
|
2
2
|
|
3
|
+
import warnings
|
4
|
+
|
3
5
|
# Copyright (c) 2023
|
4
6
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
7
|
# License: BSD 3 clause
|
6
8
|
# Implementation derived from:
|
7
9
|
# Riskfolio-Lib, Copyright (c) 2020-2023, Dany Cajas, Licensed under BSD 3 clause.
|
8
10
|
# Statsmodels, Copyright (C) 2006, Jonathan E. Taylor, Licensed under BSD 3 clause.
|
9
|
-
|
10
11
|
from enum import auto
|
11
12
|
|
12
13
|
import numpy as np
|
@@ -102,7 +103,7 @@ def n_bins_knuth(x: np.ndarray) -> int:
|
|
102
103
|
x = np.sort(x)
|
103
104
|
n = len(x)
|
104
105
|
|
105
|
-
def func(y: float
|
106
|
+
def func(y: np.ndarray) -> float:
|
106
107
|
y = y[0]
|
107
108
|
if y <= 0:
|
108
109
|
return np.inf
|
@@ -301,9 +302,18 @@ def corr_to_cov(corr: np.ndarray, std: np.ndarray):
|
|
301
302
|
_CLIPPING_VALUE = 1e-13
|
302
303
|
|
303
304
|
|
304
|
-
def cov_nearest(
|
305
|
+
def cov_nearest(
|
306
|
+
cov: np.ndarray,
|
307
|
+
higham: bool = False,
|
308
|
+
higham_max_iteration: int = 100,
|
309
|
+
warn: bool = False,
|
310
|
+
):
|
305
311
|
"""Compute the nearest covariance matrix that is positive definite and with a
|
306
312
|
cholesky decomposition than can be computed. The variance is left unchanged.
|
313
|
+
A covariance matrix that is not positive definite often occurs in high
|
314
|
+
dimensional problems. It can be due to multicollinearity, floating-point
|
315
|
+
inaccuracies, or when the number of observations is smaller than the number of
|
316
|
+
assets.
|
307
317
|
|
308
318
|
First, it converts the covariance matrix to a correlation matrix.
|
309
319
|
Then, it finds the nearest correlation matrix and converts it back to a covariance
|
@@ -330,6 +340,10 @@ def cov_nearest(cov: np.ndarray, higham: bool = False, higham_max_iteration: int
|
|
330
340
|
Maximum number of iteration of the Higham & Nick (2002) algorithm.
|
331
341
|
The default value is `100`.
|
332
342
|
|
343
|
+
warn : bool, default=False
|
344
|
+
If this is set to True, a user warning is emitted when the covariance matrix
|
345
|
+
is not positive definite and replaced by the nearest. The default is False.
|
346
|
+
|
333
347
|
Returns
|
334
348
|
-------
|
335
349
|
cov : ndarray
|
@@ -348,6 +362,13 @@ def cov_nearest(cov: np.ndarray, higham: bool = False, higham_max_iteration: int
|
|
348
362
|
if is_cholesky_dec(cov) and is_positive_definite(cov):
|
349
363
|
return cov
|
350
364
|
|
365
|
+
if warn:
|
366
|
+
warnings.warn(
|
367
|
+
"The covariance matrix is not positive definite. "
|
368
|
+
f"The {'Higham' if higham else 'Clipping'} algorithm will be used to find "
|
369
|
+
"the nearest positive definite covariance.",
|
370
|
+
stacklevel=2,
|
371
|
+
)
|
351
372
|
corr, std = cov_to_corr(cov)
|
352
373
|
|
353
374
|
if higham:
|
skfolio/utils/tools.py
CHANGED
@@ -15,6 +15,7 @@ from typing import Any
|
|
15
15
|
import numpy as np
|
16
16
|
import numpy.typing as npt
|
17
17
|
import pandas as pd
|
18
|
+
import scipy.sparse as sp
|
18
19
|
import sklearn as sk
|
19
20
|
import sklearn.base as skb
|
20
21
|
|
@@ -29,9 +30,11 @@ __all__ = [
|
|
29
30
|
"safe_split",
|
30
31
|
"fit_single_estimator",
|
31
32
|
"fit_and_predict",
|
33
|
+
"safe_indexing",
|
32
34
|
"deduplicate_names",
|
33
35
|
"default_asset_names",
|
34
36
|
"check_estimator",
|
37
|
+
"get_feature_names",
|
35
38
|
]
|
36
39
|
|
37
40
|
GenericAlias = type(list[int])
|
@@ -115,6 +118,144 @@ def _make_key(args, kwds) -> int:
|
|
115
118
|
return hash(key)
|
116
119
|
|
117
120
|
|
121
|
+
def _make_indexable(iterable):
|
122
|
+
"""Ensure iterable supports indexing or convert to an indexable variant.
|
123
|
+
|
124
|
+
Convert sparse matrices to csr and other non-indexable iterable to arrays.
|
125
|
+
Let `None` and indexable objects (e.g. pandas dataframes) pass unchanged.
|
126
|
+
|
127
|
+
Parameters
|
128
|
+
----------
|
129
|
+
iterable : {list, dataframe, ndarray, sparse matrix} or None
|
130
|
+
Object to be converted to an indexable iterable.
|
131
|
+
"""
|
132
|
+
if sp.issparse(iterable):
|
133
|
+
return iterable.tocsr()
|
134
|
+
elif hasattr(iterable, "__getitem__") or hasattr(iterable, "iloc"):
|
135
|
+
return iterable
|
136
|
+
elif iterable is None:
|
137
|
+
return iterable
|
138
|
+
return np.array(iterable)
|
139
|
+
|
140
|
+
|
141
|
+
def _check_method_params(
|
142
|
+
X: npt.ArrayLike, params: dict, indices: np.ndarray = None, axis: int = 0
|
143
|
+
):
|
144
|
+
"""Check and validate the parameters passed to a specific
|
145
|
+
method like `fit`.
|
146
|
+
|
147
|
+
Parameters
|
148
|
+
----------
|
149
|
+
X : array-like of shape (n_samples, n_features)
|
150
|
+
Data array.
|
151
|
+
|
152
|
+
params : dict
|
153
|
+
Dictionary containing the parameters passed to the method.
|
154
|
+
|
155
|
+
indices : ndarray of shape (n_samples,), default=None
|
156
|
+
Indices to be selected if the parameter has the same size as `X`.
|
157
|
+
|
158
|
+
axis : int, default=0
|
159
|
+
The axis along which `X` will be sub-sampled. `axis=0` will select
|
160
|
+
rows while `axis=1` will select columns.
|
161
|
+
|
162
|
+
Returns
|
163
|
+
-------
|
164
|
+
method_params_validated : dict
|
165
|
+
Validated parameters. We ensure that the values support indexing.
|
166
|
+
"""
|
167
|
+
# noinspection PyUnresolvedReferences
|
168
|
+
n_observations = X.shape[0]
|
169
|
+
method_params_validated = {}
|
170
|
+
for param_key, param_value in params.items():
|
171
|
+
if param_value.shape[0] != n_observations:
|
172
|
+
raise ValueError(
|
173
|
+
f"param_key has wrong number of observations, "
|
174
|
+
f"received={param_value.shape[0]}, "
|
175
|
+
f"expected={n_observations}"
|
176
|
+
)
|
177
|
+
method_params_validated[param_key] = _make_indexable(param_value)
|
178
|
+
method_params_validated[param_key] = safe_indexing(
|
179
|
+
X=method_params_validated[param_key], indices=indices, axis=axis
|
180
|
+
)
|
181
|
+
return method_params_validated
|
182
|
+
|
183
|
+
|
184
|
+
def safe_indexing(
|
185
|
+
X: npt.ArrayLike | pd.DataFrame, indices: npt.ArrayLike | None, axis: int = 0
|
186
|
+
):
|
187
|
+
"""Return rows, items or columns of X using indices.
|
188
|
+
|
189
|
+
Parameters
|
190
|
+
----------
|
191
|
+
X : array-like
|
192
|
+
Data from which to sample rows.
|
193
|
+
|
194
|
+
indices : array-like, optional
|
195
|
+
Indices of rows or columns.
|
196
|
+
The default (`None`) is to select the entire data.
|
197
|
+
|
198
|
+
axis : int, default=0
|
199
|
+
The axis along which `X` will be sub-sampled. `axis=0` will select
|
200
|
+
rows while `axis=1` will select columns.
|
201
|
+
|
202
|
+
Returns
|
203
|
+
-------
|
204
|
+
subset :
|
205
|
+
Subset of X on axis 0.
|
206
|
+
"""
|
207
|
+
if indices is None:
|
208
|
+
return X
|
209
|
+
if hasattr(X, "iloc"):
|
210
|
+
return X.take(indices, axis=axis)
|
211
|
+
if axis == 0:
|
212
|
+
return X[indices]
|
213
|
+
return X[:, indices]
|
214
|
+
|
215
|
+
|
216
|
+
def safe_split(
|
217
|
+
X: npt.ArrayLike,
|
218
|
+
y: npt.ArrayLike | None = None,
|
219
|
+
indices: np.ndarray | None = None,
|
220
|
+
axis: int = 0,
|
221
|
+
):
|
222
|
+
"""Create subset of dataset.
|
223
|
+
|
224
|
+
Slice X, y according to indices for cross-validation.
|
225
|
+
|
226
|
+
Parameters
|
227
|
+
----------
|
228
|
+
X : array-like
|
229
|
+
Data to be indexed.
|
230
|
+
|
231
|
+
y : array-like
|
232
|
+
Data to be indexed.
|
233
|
+
|
234
|
+
indices : ndarray of int, optional
|
235
|
+
Rows or columns to select from X and y.
|
236
|
+
The default (`None`) is to select the entire data.
|
237
|
+
|
238
|
+
axis : int, default=0
|
239
|
+
The axis along which `X` will be sub-sampled. `axis=0` will select
|
240
|
+
rows while `axis=1` will select columns.
|
241
|
+
|
242
|
+
Returns
|
243
|
+
-------
|
244
|
+
X_subset : array-like
|
245
|
+
Indexed data.
|
246
|
+
|
247
|
+
y_subset : array-like
|
248
|
+
Indexed targets.
|
249
|
+
"""
|
250
|
+
|
251
|
+
X_subset = safe_indexing(X, indices=indices, axis=axis)
|
252
|
+
if y is not None:
|
253
|
+
y_subset = safe_indexing(y, indices=indices, axis=axis)
|
254
|
+
else:
|
255
|
+
y_subset = None
|
256
|
+
return X_subset, y_subset
|
257
|
+
|
258
|
+
|
118
259
|
def cache_method(cache_name: str) -> Callable:
|
119
260
|
"""Decorator that caches class methods results into a class dictionary.
|
120
261
|
|
@@ -348,86 +489,11 @@ def bisection(x: list[np.ndarray]) -> Iterator[list[np.ndarray, np.ndarray]]:
|
|
348
489
|
yield [e[0:mid], e[mid:n]]
|
349
490
|
|
350
491
|
|
351
|
-
def safe_indexing(
|
352
|
-
X: npt.ArrayLike | pd.DataFrame, indices: npt.ArrayLike | None, axis: int = 0
|
353
|
-
):
|
354
|
-
"""
|
355
|
-
Return rows, items or columns of X using indices.
|
356
|
-
|
357
|
-
Parameters
|
358
|
-
----------
|
359
|
-
X : array-like
|
360
|
-
Data from which to sample rows.
|
361
|
-
|
362
|
-
indices : array-like, optional
|
363
|
-
Indices of rows or columns.
|
364
|
-
The default (`None`) is to select the entire data.
|
365
|
-
|
366
|
-
axis : int, default=0
|
367
|
-
The axis along which `X` will be sub-sampled. `axis=0` will select
|
368
|
-
rows while `axis=1` will select columns.
|
369
|
-
|
370
|
-
Returns
|
371
|
-
-------
|
372
|
-
subset :
|
373
|
-
Subset of X on axis 0.
|
374
|
-
"""
|
375
|
-
if indices is None:
|
376
|
-
return X
|
377
|
-
if hasattr(X, "iloc"):
|
378
|
-
return X.take(indices, axis=axis)
|
379
|
-
if axis == 0:
|
380
|
-
return X[indices]
|
381
|
-
return X[:, indices]
|
382
|
-
|
383
|
-
|
384
|
-
def safe_split(
|
385
|
-
X: npt.ArrayLike,
|
386
|
-
y: npt.ArrayLike | None = None,
|
387
|
-
indices: np.ndarray | None = None,
|
388
|
-
axis: int = 0,
|
389
|
-
):
|
390
|
-
"""Create subset of dataset.
|
391
|
-
|
392
|
-
Slice X, y according to indices for cross-validation.
|
393
|
-
|
394
|
-
Parameters
|
395
|
-
----------
|
396
|
-
X : array-like
|
397
|
-
Data to be indexed.
|
398
|
-
|
399
|
-
y : array-like
|
400
|
-
Data to be indexed.
|
401
|
-
|
402
|
-
indices : ndarray of int, optional
|
403
|
-
Rows or columns to select from X and y.
|
404
|
-
The default (`None`) is to select the entire data.
|
405
|
-
|
406
|
-
axis : int, default=0
|
407
|
-
The axis along which `X` will be sub-sampled. `axis=0` will select
|
408
|
-
rows while `axis=1` will select columns.
|
409
|
-
|
410
|
-
Returns
|
411
|
-
-------
|
412
|
-
X_subset : array-like
|
413
|
-
Indexed data.
|
414
|
-
|
415
|
-
y_subset : array-like
|
416
|
-
Indexed targets.
|
417
|
-
"""
|
418
|
-
|
419
|
-
X_subset = safe_indexing(X, indices=indices, axis=axis)
|
420
|
-
if y is not None:
|
421
|
-
y_subset = safe_indexing(y, indices=indices, axis=axis)
|
422
|
-
else:
|
423
|
-
y_subset = None
|
424
|
-
return X_subset, y_subset
|
425
|
-
|
426
|
-
|
427
492
|
def fit_single_estimator(
|
428
493
|
estimator: Any,
|
429
494
|
X: npt.ArrayLike,
|
430
|
-
y: npt.ArrayLike | None
|
495
|
+
y: npt.ArrayLike | None,
|
496
|
+
fit_params: dict,
|
431
497
|
indices: np.ndarray | None = None,
|
432
498
|
axis: int = 0,
|
433
499
|
):
|
@@ -444,6 +510,9 @@ def fit_single_estimator(
|
|
444
510
|
y : array-like of shape (n_observations, n_targets), optional
|
445
511
|
The target array if provided.
|
446
512
|
|
513
|
+
fit_params : dict
|
514
|
+
Parameters that will be passed to `estimator.fit`.
|
515
|
+
|
447
516
|
indices : ndarray of int, optional
|
448
517
|
Rows or columns to select from X and y.
|
449
518
|
The default (`None`) is to select the entire data.
|
@@ -457,9 +526,11 @@ def fit_single_estimator(
|
|
457
526
|
fitted_estimator : estimator
|
458
527
|
The fitted estimator.
|
459
528
|
"""
|
529
|
+
fit_params = fit_params if fit_params is not None else {}
|
530
|
+
fit_params = _check_method_params(X, params=fit_params, indices=indices, axis=axis)
|
460
531
|
|
461
532
|
X, y = safe_split(X, y, indices=indices, axis=axis)
|
462
|
-
estimator.fit(X, y)
|
533
|
+
estimator.fit(X, y, **fit_params)
|
463
534
|
return estimator
|
464
535
|
|
465
536
|
|
@@ -493,7 +564,7 @@ def fit_and_predict(
|
|
493
564
|
Indices of test samples or list of indices.
|
494
565
|
|
495
566
|
fit_params : dict
|
496
|
-
Parameters that will be passed to
|
567
|
+
Parameters that will be passed to `estimator.fit`.
|
497
568
|
|
498
569
|
method : str
|
499
570
|
Invokes the passed method name of the passed estimator.
|
@@ -511,6 +582,8 @@ def fit_and_predict(
|
|
511
582
|
results of calling 'estimator.method' on each test set in `test`.
|
512
583
|
"""
|
513
584
|
fit_params = fit_params if fit_params is not None else {}
|
585
|
+
fit_params = _check_method_params(X, params=fit_params, indices=train)
|
586
|
+
|
514
587
|
X, y = safe_split(X, y, indices=column_indices, axis=1)
|
515
588
|
X_train, y_train = safe_split(X, y, indices=train, axis=0)
|
516
589
|
if y_train is None:
|
@@ -570,3 +643,64 @@ def deduplicate_names(names: npt.ArrayLike) -> list[str]:
|
|
570
643
|
names[i] = f"{col}_{cur_count}"
|
571
644
|
counts[col] = cur_count + 1
|
572
645
|
return names
|
646
|
+
|
647
|
+
|
648
|
+
def get_feature_names(X):
|
649
|
+
"""Get feature names from X.
|
650
|
+
|
651
|
+
Support for other array containers should place its implementation here.
|
652
|
+
|
653
|
+
Parameters
|
654
|
+
----------
|
655
|
+
X : {ndarray, dataframe} of shape (n_samples, n_features)
|
656
|
+
Array container to extract feature names.
|
657
|
+
|
658
|
+
- pandas dataframe : The columns will be considered to be feature
|
659
|
+
names. If the dataframe contains non-string feature names, `None` is
|
660
|
+
returned.
|
661
|
+
- All other array containers will return `None`.
|
662
|
+
|
663
|
+
Returns
|
664
|
+
-------
|
665
|
+
names: ndarray or None
|
666
|
+
Feature names of `X`. Unrecognized array containers will return `None`.
|
667
|
+
"""
|
668
|
+
feature_names = None
|
669
|
+
|
670
|
+
# extract feature names for support array containers
|
671
|
+
if isinstance(X, pd.DataFrame):
|
672
|
+
# Make sure we can inspect columns names from pandas, even with
|
673
|
+
# versions too old to expose a working implementation of
|
674
|
+
# __dataframe__.column_names() and avoid introducing any
|
675
|
+
# additional copy.
|
676
|
+
# TODO: remove the pandas-specific branch once the minimum supported
|
677
|
+
# version of pandas has a working implementation of
|
678
|
+
# __dataframe__.column_names() that is guaranteed to not introduce any
|
679
|
+
# additional copy of the data without having to impose allow_copy=False
|
680
|
+
# that could fail with other libraries. Note: in the longer term, we
|
681
|
+
# could decide to instead rely on the __dataframe_namespace__ API once
|
682
|
+
# adopted by our minimally supported pandas version.
|
683
|
+
feature_names = np.asarray(X.columns, dtype=object)
|
684
|
+
elif hasattr(X, "__dataframe__"):
|
685
|
+
df_protocol = X.__dataframe__()
|
686
|
+
feature_names = np.asarray(list(df_protocol.column_names()), dtype=object)
|
687
|
+
|
688
|
+
if feature_names is None or len(feature_names) == 0:
|
689
|
+
return
|
690
|
+
|
691
|
+
types = sorted(t.__qualname__ for t in set(type(v) for v in feature_names))
|
692
|
+
|
693
|
+
# mixed type of string and non-string is not supported
|
694
|
+
if len(types) > 1 and "str" in types:
|
695
|
+
raise TypeError(
|
696
|
+
"Feature names are only supported if all input features have string names, "
|
697
|
+
f"but your input has {types} as feature name / column name types. "
|
698
|
+
"If you want feature names to be stored and validated, you must convert "
|
699
|
+
"them all to strings, by using X.columns = X.columns.astype(str) for "
|
700
|
+
"example. Otherwise you can remove feature / column names from your input "
|
701
|
+
"data, or convert them all to a non-string data type."
|
702
|
+
)
|
703
|
+
|
704
|
+
# Only feature names of all strings are supported
|
705
|
+
if len(types) == 1 and types[0] == "str":
|
706
|
+
return feature_names
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: skfolio
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.0
|
4
4
|
Summary: Portfolio optimization built on top of scikit-learn
|
5
5
|
Author-email: Hugo Delatte <delatte.hugo@gmail.com>
|
6
6
|
Maintainer-email: Hugo Delatte <delatte.hugo@gmail.com>
|
@@ -60,7 +60,7 @@ Requires-Dist: numpy <2.0.0,>=1.23.4
|
|
60
60
|
Requires-Dist: scipy >=1.8.0
|
61
61
|
Requires-Dist: pandas >=1.4.1
|
62
62
|
Requires-Dist: cvxpy >=1.4.1
|
63
|
-
Requires-Dist: scikit-learn >=1.
|
63
|
+
Requires-Dist: scikit-learn >=1.5.0
|
64
64
|
Requires-Dist: joblib >=1.3.2
|
65
65
|
Requires-Dist: plotly >=5.22.0
|
66
66
|
Provides-Extra: docs
|
@@ -237,6 +237,7 @@ Available models
|
|
237
237
|
* Oracle Approximating Shrinkage
|
238
238
|
* Shrunk Covariance
|
239
239
|
* Graphical Lasso CV
|
240
|
+
* Implied Covariance
|
240
241
|
|
241
242
|
* Distance Estimator:
|
242
243
|
* Pearson Distance
|