skfolio 0.2.3__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. skfolio/datasets/__init__.py +2 -0
  2. skfolio/datasets/_base.py +51 -0
  3. skfolio/distance/_distance.py +15 -4
  4. skfolio/model_selection/_combinatorial.py +2 -2
  5. skfolio/model_selection/_validation.py +70 -15
  6. skfolio/model_selection/_walk_forward.py +3 -3
  7. skfolio/moments/__init__.py +2 -0
  8. skfolio/moments/covariance/__init__.py +11 -11
  9. skfolio/moments/covariance/_base.py +10 -9
  10. skfolio/moments/covariance/_denoise_covariance.py +181 -0
  11. skfolio/moments/covariance/_detone_covariance.py +158 -0
  12. skfolio/moments/covariance/_empirical_covariance.py +100 -0
  13. skfolio/moments/covariance/_ew_covariance.py +109 -0
  14. skfolio/moments/covariance/_gerber_covariance.py +157 -0
  15. skfolio/moments/covariance/_graphical_lasso_cv.py +194 -0
  16. skfolio/moments/covariance/_implied_covariance.py +462 -0
  17. skfolio/moments/covariance/_ledoit_wolf.py +140 -0
  18. skfolio/moments/covariance/_oas.py +115 -0
  19. skfolio/moments/covariance/_shrunk_covariance.py +104 -0
  20. skfolio/moments/expected_returns/__init__.py +4 -7
  21. skfolio/moments/expected_returns/_empirical_mu.py +63 -0
  22. skfolio/moments/expected_returns/_equilibrium_mu.py +124 -0
  23. skfolio/moments/expected_returns/_ew_mu.py +69 -0
  24. skfolio/moments/expected_returns/{_expected_returns.py → _shrunk_mu.py} +22 -200
  25. skfolio/optimization/cluster/_nco.py +46 -8
  26. skfolio/optimization/cluster/hierarchical/_base.py +21 -1
  27. skfolio/optimization/cluster/hierarchical/_herc.py +18 -4
  28. skfolio/optimization/cluster/hierarchical/_hrp.py +13 -4
  29. skfolio/optimization/convex/_base.py +10 -1
  30. skfolio/optimization/convex/_distributionally_robust.py +12 -2
  31. skfolio/optimization/convex/_maximum_diversification.py +9 -2
  32. skfolio/optimization/convex/_mean_risk.py +33 -6
  33. skfolio/optimization/convex/_risk_budgeting.py +5 -2
  34. skfolio/optimization/ensemble/_stacking.py +32 -9
  35. skfolio/optimization/naive/_naive.py +20 -2
  36. skfolio/population/_population.py +2 -0
  37. skfolio/prior/_base.py +1 -1
  38. skfolio/prior/_black_litterman.py +20 -2
  39. skfolio/prior/_empirical.py +38 -5
  40. skfolio/prior/_factor_model.py +44 -7
  41. skfolio/uncertainty_set/_base.py +30 -9
  42. skfolio/uncertainty_set/_bootstrap.py +26 -10
  43. skfolio/uncertainty_set/_empirical.py +25 -10
  44. skfolio/utils/stats.py +24 -3
  45. skfolio/utils/tools.py +213 -79
  46. {skfolio-0.2.3.dist-info → skfolio-0.3.1.dist-info}/METADATA +3 -2
  47. skfolio-0.3.1.dist-info/RECORD +91 -0
  48. {skfolio-0.2.3.dist-info → skfolio-0.3.1.dist-info}/WHEEL +1 -1
  49. skfolio/moments/covariance/_covariance.py +0 -1114
  50. skfolio-0.2.3.dist-info/RECORD +0 -79
  51. {skfolio-0.2.3.dist-info → skfolio-0.3.1.dist-info}/LICENSE +0 -0
  52. {skfolio-0.2.3.dist-info → skfolio-0.3.1.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,7 @@
11
11
  import numpy as np
12
12
  import numpy.typing as npt
13
13
  import scipy.stats as st
14
+ import sklearn.utils.metadata_routing as skm
14
15
 
15
16
  from skfolio.prior import BasePrior, EmpiricalPrior
16
17
  from skfolio.uncertainty_set._base import (
@@ -86,8 +87,6 @@ class BootstrapMuUncertaintySet(BaseMuUncertaintySet):
86
87
  Patton, Politis & White (2009).
87
88
  """
88
89
 
89
- prior_estimator_: BasePrior
90
-
91
90
  def __init__(
92
91
  self,
93
92
  prior_estimator: BasePrior | None = None,
@@ -97,7 +96,7 @@ class BootstrapMuUncertaintySet(BaseMuUncertaintySet):
97
96
  block_size: float | None = None,
98
97
  seed: int | None = None,
99
98
  ):
100
- self.prior_estimator = prior_estimator
99
+ super().__init__(prior_estimator=prior_estimator)
101
100
  self.confidence_level = confidence_level
102
101
  self.diagonal = diagonal
103
102
  self.n_bootstrap_samples = n_bootstrap_samples
@@ -105,7 +104,7 @@ class BootstrapMuUncertaintySet(BaseMuUncertaintySet):
105
104
  self.seed = seed
106
105
 
107
106
  def fit(
108
- self, X: npt.ArrayLike, y: npt.ArrayLike | None = None
107
+ self, X: npt.ArrayLike, y: npt.ArrayLike | None = None, **fit_params
109
108
  ) -> "BootstrapMuUncertaintySet":
110
109
  """Fit the Bootstrap Mu Uncertainty set estimator.
111
110
 
@@ -118,18 +117,27 @@ class BootstrapMuUncertaintySet(BaseMuUncertaintySet):
118
117
  Price returns of factors.
119
118
  The default is `None`.
120
119
 
120
+ **fit_params : dict
121
+ Parameters to pass to the underlying estimators.
122
+ Only available if `enable_metadata_routing=True`, which can be
123
+ set by using ``sklearn.set_config(enable_metadata_routing=True)``.
124
+ See :ref:`Metadata Routing User Guide <metadata_routing>` for
125
+ more details.
126
+
121
127
  Returns
122
128
  -------
123
129
  self : BootstrapMuUncertaintySet
124
130
  Fitted estimator.
125
131
  """
132
+ routed_params = skm.process_routing(self, "fit", **fit_params)
133
+
126
134
  self.prior_estimator_ = check_estimator(
127
135
  self.prior_estimator,
128
136
  default=EmpiricalPrior(),
129
137
  check_type=BasePrior,
130
138
  )
131
139
  # fitting estimators
132
- self.prior_estimator_.fit(X, y)
140
+ self.prior_estimator_.fit(X, y, **routed_params.prior_estimator.fit)
133
141
  mu = self.prior_estimator_.prior_model_.mu
134
142
  returns = self.prior_estimator_.prior_model_.returns
135
143
  n_assets = returns.shape[1]
@@ -217,8 +225,6 @@ class BootstrapCovarianceUncertaintySet(BaseCovarianceUncertaintySet):
217
225
  Patton, Politis & White (2009).
218
226
  """
219
227
 
220
- prior_estimator_: BasePrior
221
-
222
228
  def __init__(
223
229
  self,
224
230
  prior_estimator: BasePrior | None = None,
@@ -228,14 +234,16 @@ class BootstrapCovarianceUncertaintySet(BaseCovarianceUncertaintySet):
228
234
  block_size: float | None = None,
229
235
  seed: int | None = None,
230
236
  ):
231
- self.prior_estimator = prior_estimator
237
+ super().__init__(prior_estimator=prior_estimator)
232
238
  self.confidence_level = confidence_level
233
239
  self.diagonal = diagonal
234
240
  self.n_bootstrap_samples = n_bootstrap_samples
235
241
  self.block_size = block_size
236
242
  self.seed = seed
237
243
 
238
- def fit(self, X: npt.ArrayLike, y=None) -> "BootstrapCovarianceUncertaintySet":
244
+ def fit(
245
+ self, X: npt.ArrayLike, y=None, **fit_params
246
+ ) -> "BootstrapCovarianceUncertaintySet":
239
247
  """Fit the Bootstrap Covariance Uncertainty set estimator.
240
248
 
241
249
  Parameters
@@ -247,11 +255,19 @@ class BootstrapCovarianceUncertaintySet(BaseCovarianceUncertaintySet):
247
255
  Price returns of factors.
248
256
  The default is `None`.
249
257
 
258
+ **fit_params : dict
259
+ Parameters to pass to the underlying estimators.
260
+ Only available if `enable_metadata_routing=True`, which can be
261
+ set by using ``sklearn.set_config(enable_metadata_routing=True)``.
262
+ See :ref:`Metadata Routing User Guide <metadata_routing>` for
263
+ more details.
264
+
250
265
  Returns
251
266
  -------
252
267
  self : EmpiricalCovarianceUncertaintySet
253
268
  Fitted estimator.
254
269
  """
270
+ routed_params = skm.process_routing(self, "fit", **fit_params)
255
271
 
256
272
  self.prior_estimator_ = check_estimator(
257
273
  self.prior_estimator,
@@ -259,7 +275,7 @@ class BootstrapCovarianceUncertaintySet(BaseCovarianceUncertaintySet):
259
275
  check_type=BasePrior,
260
276
  )
261
277
  # fitting estimators
262
- self.prior_estimator_.fit(X, y)
278
+ self.prior_estimator_.fit(X, y, **routed_params.prior_estimator.fit)
263
279
  covariance = self.prior_estimator_.prior_model_.covariance
264
280
  returns = self.prior_estimator_.prior_model_.returns
265
281
  n_assets = returns.shape[1]
@@ -11,6 +11,7 @@
11
11
  import numpy as np
12
12
  import numpy.typing as npt
13
13
  import scipy.stats as st
14
+ import sklearn.utils.metadata_routing as skm
14
15
 
15
16
  from skfolio.prior import BasePrior, EmpiricalPrior
16
17
  from skfolio.uncertainty_set._base import (
@@ -77,20 +78,18 @@ class EmpiricalMuUncertaintySet(BaseMuUncertaintySet):
77
78
  Schöttle & Werner (2009).
78
79
  """
79
80
 
80
- prior_estimator_: BasePrior
81
-
82
81
  def __init__(
83
82
  self,
84
83
  prior_estimator: BasePrior | None = None,
85
84
  confidence_level: float = 0.95,
86
85
  diagonal: bool = True,
87
86
  ):
88
- self.prior_estimator = prior_estimator
87
+ super().__init__(prior_estimator=prior_estimator)
89
88
  self.confidence_level = confidence_level
90
89
  self.diagonal = diagonal
91
90
 
92
91
  def fit(
93
- self, X: npt.ArrayLike, y: npt.ArrayLike | None = None
92
+ self, X: npt.ArrayLike, y: npt.ArrayLike | None = None, **fit_params
94
93
  ) -> "EmpiricalMuUncertaintySet":
95
94
  """Fit the Empirical Mu Uncertainty set estimator.
96
95
 
@@ -103,18 +102,27 @@ class EmpiricalMuUncertaintySet(BaseMuUncertaintySet):
103
102
  Price returns of factors.
104
103
  The default is `None`.
105
104
 
105
+ **fit_params : dict
106
+ Parameters to pass to the underlying estimators.
107
+ Only available if `enable_metadata_routing=True`, which can be
108
+ set by using ``sklearn.set_config(enable_metadata_routing=True)``.
109
+ See :ref:`Metadata Routing User Guide <metadata_routing>` for
110
+ more details.
111
+
106
112
  Returns
107
113
  -------
108
114
  self : EmpiricalMuUncertaintySet
109
115
  Fitted estimator.
110
116
  """
117
+ routed_params = skm.process_routing(self, "fit", **fit_params)
118
+
111
119
  self.prior_estimator_ = check_estimator(
112
120
  self.prior_estimator,
113
121
  default=EmpiricalPrior(),
114
122
  check_type=BasePrior,
115
123
  )
116
124
  # fitting estimators
117
- self.prior_estimator_.fit(X, y)
125
+ self.prior_estimator_.fit(X, y, **routed_params.prior_estimator.fit)
118
126
 
119
127
  prior_model = self.prior_estimator_.prior_model_
120
128
  n_observations, n_assets = prior_model.returns.shape
@@ -185,20 +193,18 @@ class EmpiricalCovarianceUncertaintySet(BaseCovarianceUncertaintySet):
185
193
  Schöttle & Werner (2009).
186
194
  """
187
195
 
188
- prior_estimator_: BasePrior
189
-
190
196
  def __init__(
191
197
  self,
192
198
  prior_estimator: BasePrior | None = None,
193
199
  confidence_level: float = 0.95,
194
200
  diagonal: bool = True,
195
201
  ):
196
- self.prior_estimator = prior_estimator
202
+ super().__init__(prior_estimator=prior_estimator)
197
203
  self.confidence_level = confidence_level
198
204
  self.diagonal = diagonal
199
205
 
200
206
  def fit(
201
- self, X: npt.ArrayLike, y: npt.ArrayLike | None = None
207
+ self, X: npt.ArrayLike, y: npt.ArrayLike | None = None, **fit_params
202
208
  ) -> "EmpiricalCovarianceUncertaintySet":
203
209
  """Fit the Empirical Covariance Uncertainty set estimator.
204
210
 
@@ -211,18 +217,27 @@ class EmpiricalCovarianceUncertaintySet(BaseCovarianceUncertaintySet):
211
217
  Price returns of factors.
212
218
  The default is `None`.
213
219
 
220
+ **fit_params : dict
221
+ Parameters to pass to the underlying estimators.
222
+ Only available if `enable_metadata_routing=True`, which can be
223
+ set by using ``sklearn.set_config(enable_metadata_routing=True)``.
224
+ See :ref:`Metadata Routing User Guide <metadata_routing>` for
225
+ more details.
226
+
214
227
  Returns
215
228
  -------
216
229
  self : EmpiricalCovarianceUncertaintySet
217
230
  Fitted estimator.
218
231
  """
232
+ routed_params = skm.process_routing(self, "fit", **fit_params)
233
+
219
234
  self.prior_estimator_ = check_estimator(
220
235
  self.prior_estimator,
221
236
  default=EmpiricalPrior(),
222
237
  check_type=BasePrior,
223
238
  )
224
239
  # fitting estimators
225
- self.prior_estimator_.fit(X, y)
240
+ self.prior_estimator_.fit(X, y, **routed_params.prior_estimator.fit)
226
241
 
227
242
  prior_model = self.prior_estimator_.prior_model_
228
243
  n_observations, n_assets = prior_model.returns.shape
skfolio/utils/stats.py CHANGED
@@ -1,12 +1,13 @@
1
1
  """Tools module"""
2
2
 
3
+ import warnings
4
+
3
5
  # Copyright (c) 2023
4
6
  # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
7
  # License: BSD 3 clause
6
8
  # Implementation derived from:
7
9
  # Riskfolio-Lib, Copyright (c) 2020-2023, Dany Cajas, Licensed under BSD 3 clause.
8
10
  # Statsmodels, Copyright (C) 2006, Jonathan E. Taylor, Licensed under BSD 3 clause.
9
-
10
11
  from enum import auto
11
12
 
12
13
  import numpy as np
@@ -102,7 +103,7 @@ def n_bins_knuth(x: np.ndarray) -> int:
102
103
  x = np.sort(x)
103
104
  n = len(x)
104
105
 
105
- def func(y: float):
106
+ def func(y: np.ndarray) -> float:
106
107
  y = y[0]
107
108
  if y <= 0:
108
109
  return np.inf
@@ -301,9 +302,18 @@ def corr_to_cov(corr: np.ndarray, std: np.ndarray):
301
302
  _CLIPPING_VALUE = 1e-13
302
303
 
303
304
 
304
- def cov_nearest(cov: np.ndarray, higham: bool = False, higham_max_iteration: int = 100):
305
+ def cov_nearest(
306
+ cov: np.ndarray,
307
+ higham: bool = False,
308
+ higham_max_iteration: int = 100,
309
+ warn: bool = False,
310
+ ):
305
311
  """Compute the nearest covariance matrix that is positive definite and with a
306
312
  cholesky decomposition than can be computed. The variance is left unchanged.
313
+ A covariance matrix that is not positive definite often occurs in high
314
+ dimensional problems. It can be due to multicollinearity, floating-point
315
+ inaccuracies, or when the number of observations is smaller than the number of
316
+ assets.
307
317
 
308
318
  First, it converts the covariance matrix to a correlation matrix.
309
319
  Then, it finds the nearest correlation matrix and converts it back to a covariance
@@ -330,6 +340,10 @@ def cov_nearest(cov: np.ndarray, higham: bool = False, higham_max_iteration: int
330
340
  Maximum number of iteration of the Higham & Nick (2002) algorithm.
331
341
  The default value is `100`.
332
342
 
343
+ warn : bool, default=False
344
+ If this is set to True, a user warning is emitted when the covariance matrix
345
+ is not positive definite and replaced by the nearest. The default is False.
346
+
333
347
  Returns
334
348
  -------
335
349
  cov : ndarray
@@ -348,6 +362,13 @@ def cov_nearest(cov: np.ndarray, higham: bool = False, higham_max_iteration: int
348
362
  if is_cholesky_dec(cov) and is_positive_definite(cov):
349
363
  return cov
350
364
 
365
+ if warn:
366
+ warnings.warn(
367
+ "The covariance matrix is not positive definite. "
368
+ f"The {'Higham' if higham else 'Clipping'} algorithm will be used to find "
369
+ "the nearest positive definite covariance.",
370
+ stacklevel=2,
371
+ )
351
372
  corr, std = cov_to_corr(cov)
352
373
 
353
374
  if higham:
skfolio/utils/tools.py CHANGED
@@ -15,6 +15,7 @@ from typing import Any
15
15
  import numpy as np
16
16
  import numpy.typing as npt
17
17
  import pandas as pd
18
+ import scipy.sparse as sp
18
19
  import sklearn as sk
19
20
  import sklearn.base as skb
20
21
 
@@ -29,9 +30,11 @@ __all__ = [
29
30
  "safe_split",
30
31
  "fit_single_estimator",
31
32
  "fit_and_predict",
33
+ "safe_indexing",
32
34
  "deduplicate_names",
33
35
  "default_asset_names",
34
36
  "check_estimator",
37
+ "get_feature_names",
35
38
  ]
36
39
 
37
40
  GenericAlias = type(list[int])
@@ -115,6 +118,144 @@ def _make_key(args, kwds) -> int:
115
118
  return hash(key)
116
119
 
117
120
 
121
+ def _make_indexable(iterable):
122
+ """Ensure iterable supports indexing or convert to an indexable variant.
123
+
124
+ Convert sparse matrices to csr and other non-indexable iterable to arrays.
125
+ Let `None` and indexable objects (e.g. pandas dataframes) pass unchanged.
126
+
127
+ Parameters
128
+ ----------
129
+ iterable : {list, dataframe, ndarray, sparse matrix} or None
130
+ Object to be converted to an indexable iterable.
131
+ """
132
+ if sp.issparse(iterable):
133
+ return iterable.tocsr()
134
+ elif hasattr(iterable, "__getitem__") or hasattr(iterable, "iloc"):
135
+ return iterable
136
+ elif iterable is None:
137
+ return iterable
138
+ return np.array(iterable)
139
+
140
+
141
+ def _check_method_params(
142
+ X: npt.ArrayLike, params: dict, indices: np.ndarray = None, axis: int = 0
143
+ ):
144
+ """Check and validate the parameters passed to a specific
145
+ method like `fit`.
146
+
147
+ Parameters
148
+ ----------
149
+ X : array-like of shape (n_samples, n_features)
150
+ Data array.
151
+
152
+ params : dict
153
+ Dictionary containing the parameters passed to the method.
154
+
155
+ indices : ndarray of shape (n_samples,), default=None
156
+ Indices to be selected if the parameter has the same size as `X`.
157
+
158
+ axis : int, default=0
159
+ The axis along which `X` will be sub-sampled. `axis=0` will select
160
+ rows while `axis=1` will select columns.
161
+
162
+ Returns
163
+ -------
164
+ method_params_validated : dict
165
+ Validated parameters. We ensure that the values support indexing.
166
+ """
167
+ # noinspection PyUnresolvedReferences
168
+ n_observations = X.shape[0]
169
+ method_params_validated = {}
170
+ for param_key, param_value in params.items():
171
+ if param_value.shape[0] != n_observations:
172
+ raise ValueError(
173
+ f"param_key has wrong number of observations, "
174
+ f"received={param_value.shape[0]}, "
175
+ f"expected={n_observations}"
176
+ )
177
+ method_params_validated[param_key] = _make_indexable(param_value)
178
+ method_params_validated[param_key] = safe_indexing(
179
+ X=method_params_validated[param_key], indices=indices, axis=axis
180
+ )
181
+ return method_params_validated
182
+
183
+
184
+ def safe_indexing(
185
+ X: npt.ArrayLike | pd.DataFrame, indices: npt.ArrayLike | None, axis: int = 0
186
+ ):
187
+ """Return rows, items or columns of X using indices.
188
+
189
+ Parameters
190
+ ----------
191
+ X : array-like
192
+ Data from which to sample rows.
193
+
194
+ indices : array-like, optional
195
+ Indices of rows or columns.
196
+ The default (`None`) is to select the entire data.
197
+
198
+ axis : int, default=0
199
+ The axis along which `X` will be sub-sampled. `axis=0` will select
200
+ rows while `axis=1` will select columns.
201
+
202
+ Returns
203
+ -------
204
+ subset :
205
+ Subset of X on axis 0.
206
+ """
207
+ if indices is None:
208
+ return X
209
+ if hasattr(X, "iloc"):
210
+ return X.take(indices, axis=axis)
211
+ if axis == 0:
212
+ return X[indices]
213
+ return X[:, indices]
214
+
215
+
216
+ def safe_split(
217
+ X: npt.ArrayLike,
218
+ y: npt.ArrayLike | None = None,
219
+ indices: np.ndarray | None = None,
220
+ axis: int = 0,
221
+ ):
222
+ """Create subset of dataset.
223
+
224
+ Slice X, y according to indices for cross-validation.
225
+
226
+ Parameters
227
+ ----------
228
+ X : array-like
229
+ Data to be indexed.
230
+
231
+ y : array-like
232
+ Data to be indexed.
233
+
234
+ indices : ndarray of int, optional
235
+ Rows or columns to select from X and y.
236
+ The default (`None`) is to select the entire data.
237
+
238
+ axis : int, default=0
239
+ The axis along which `X` will be sub-sampled. `axis=0` will select
240
+ rows while `axis=1` will select columns.
241
+
242
+ Returns
243
+ -------
244
+ X_subset : array-like
245
+ Indexed data.
246
+
247
+ y_subset : array-like
248
+ Indexed targets.
249
+ """
250
+
251
+ X_subset = safe_indexing(X, indices=indices, axis=axis)
252
+ if y is not None:
253
+ y_subset = safe_indexing(y, indices=indices, axis=axis)
254
+ else:
255
+ y_subset = None
256
+ return X_subset, y_subset
257
+
258
+
118
259
  def cache_method(cache_name: str) -> Callable:
119
260
  """Decorator that caches class methods results into a class dictionary.
120
261
 
@@ -348,86 +489,11 @@ def bisection(x: list[np.ndarray]) -> Iterator[list[np.ndarray, np.ndarray]]:
348
489
  yield [e[0:mid], e[mid:n]]
349
490
 
350
491
 
351
- def safe_indexing(
352
- X: npt.ArrayLike | pd.DataFrame, indices: npt.ArrayLike | None, axis: int = 0
353
- ):
354
- """
355
- Return rows, items or columns of X using indices.
356
-
357
- Parameters
358
- ----------
359
- X : array-like
360
- Data from which to sample rows.
361
-
362
- indices : array-like, optional
363
- Indices of rows or columns.
364
- The default (`None`) is to select the entire data.
365
-
366
- axis : int, default=0
367
- The axis along which `X` will be sub-sampled. `axis=0` will select
368
- rows while `axis=1` will select columns.
369
-
370
- Returns
371
- -------
372
- subset :
373
- Subset of X on axis 0.
374
- """
375
- if indices is None:
376
- return X
377
- if hasattr(X, "iloc"):
378
- return X.take(indices, axis=axis)
379
- if axis == 0:
380
- return X[indices]
381
- return X[:, indices]
382
-
383
-
384
- def safe_split(
385
- X: npt.ArrayLike,
386
- y: npt.ArrayLike | None = None,
387
- indices: np.ndarray | None = None,
388
- axis: int = 0,
389
- ):
390
- """Create subset of dataset.
391
-
392
- Slice X, y according to indices for cross-validation.
393
-
394
- Parameters
395
- ----------
396
- X : array-like
397
- Data to be indexed.
398
-
399
- y : array-like
400
- Data to be indexed.
401
-
402
- indices : ndarray of int, optional
403
- Rows or columns to select from X and y.
404
- The default (`None`) is to select the entire data.
405
-
406
- axis : int, default=0
407
- The axis along which `X` will be sub-sampled. `axis=0` will select
408
- rows while `axis=1` will select columns.
409
-
410
- Returns
411
- -------
412
- X_subset : array-like
413
- Indexed data.
414
-
415
- y_subset : array-like
416
- Indexed targets.
417
- """
418
-
419
- X_subset = safe_indexing(X, indices=indices, axis=axis)
420
- if y is not None:
421
- y_subset = safe_indexing(y, indices=indices, axis=axis)
422
- else:
423
- y_subset = None
424
- return X_subset, y_subset
425
-
426
-
427
492
  def fit_single_estimator(
428
493
  estimator: Any,
429
494
  X: npt.ArrayLike,
430
- y: npt.ArrayLike | None = None,
495
+ y: npt.ArrayLike | None,
496
+ fit_params: dict,
431
497
  indices: np.ndarray | None = None,
432
498
  axis: int = 0,
433
499
  ):
@@ -444,6 +510,9 @@ def fit_single_estimator(
444
510
  y : array-like of shape (n_observations, n_targets), optional
445
511
  The target array if provided.
446
512
 
513
+ fit_params : dict
514
+ Parameters that will be passed to `estimator.fit`.
515
+
447
516
  indices : ndarray of int, optional
448
517
  Rows or columns to select from X and y.
449
518
  The default (`None`) is to select the entire data.
@@ -457,9 +526,11 @@ def fit_single_estimator(
457
526
  fitted_estimator : estimator
458
527
  The fitted estimator.
459
528
  """
529
+ fit_params = fit_params if fit_params is not None else {}
530
+ fit_params = _check_method_params(X, params=fit_params, indices=indices, axis=axis)
460
531
 
461
532
  X, y = safe_split(X, y, indices=indices, axis=axis)
462
- estimator.fit(X, y)
533
+ estimator.fit(X, y, **fit_params)
463
534
  return estimator
464
535
 
465
536
 
@@ -493,7 +564,7 @@ def fit_and_predict(
493
564
  Indices of test samples or list of indices.
494
565
 
495
566
  fit_params : dict
496
- Parameters that will be passed to ``estimator.fit``.
567
+ Parameters that will be passed to `estimator.fit`.
497
568
 
498
569
  method : str
499
570
  Invokes the passed method name of the passed estimator.
@@ -511,6 +582,8 @@ def fit_and_predict(
511
582
  results of calling 'estimator.method' on each test set in `test`.
512
583
  """
513
584
  fit_params = fit_params if fit_params is not None else {}
585
+ fit_params = _check_method_params(X, params=fit_params, indices=train)
586
+
514
587
  X, y = safe_split(X, y, indices=column_indices, axis=1)
515
588
  X_train, y_train = safe_split(X, y, indices=train, axis=0)
516
589
  if y_train is None:
@@ -570,3 +643,64 @@ def deduplicate_names(names: npt.ArrayLike) -> list[str]:
570
643
  names[i] = f"{col}_{cur_count}"
571
644
  counts[col] = cur_count + 1
572
645
  return names
646
+
647
+
648
+ def get_feature_names(X):
649
+ """Get feature names from X.
650
+
651
+ Support for other array containers should place its implementation here.
652
+
653
+ Parameters
654
+ ----------
655
+ X : {ndarray, dataframe} of shape (n_samples, n_features)
656
+ Array container to extract feature names.
657
+
658
+ - pandas dataframe : The columns will be considered to be feature
659
+ names. If the dataframe contains non-string feature names, `None` is
660
+ returned.
661
+ - All other array containers will return `None`.
662
+
663
+ Returns
664
+ -------
665
+ names: ndarray or None
666
+ Feature names of `X`. Unrecognized array containers will return `None`.
667
+ """
668
+ feature_names = None
669
+
670
+ # extract feature names for support array containers
671
+ if isinstance(X, pd.DataFrame):
672
+ # Make sure we can inspect columns names from pandas, even with
673
+ # versions too old to expose a working implementation of
674
+ # __dataframe__.column_names() and avoid introducing any
675
+ # additional copy.
676
+ # TODO: remove the pandas-specific branch once the minimum supported
677
+ # version of pandas has a working implementation of
678
+ # __dataframe__.column_names() that is guaranteed to not introduce any
679
+ # additional copy of the data without having to impose allow_copy=False
680
+ # that could fail with other libraries. Note: in the longer term, we
681
+ # could decide to instead rely on the __dataframe_namespace__ API once
682
+ # adopted by our minimally supported pandas version.
683
+ feature_names = np.asarray(X.columns, dtype=object)
684
+ elif hasattr(X, "__dataframe__"):
685
+ df_protocol = X.__dataframe__()
686
+ feature_names = np.asarray(list(df_protocol.column_names()), dtype=object)
687
+
688
+ if feature_names is None or len(feature_names) == 0:
689
+ return
690
+
691
+ types = sorted(t.__qualname__ for t in set(type(v) for v in feature_names))
692
+
693
+ # mixed type of string and non-string is not supported
694
+ if len(types) > 1 and "str" in types:
695
+ raise TypeError(
696
+ "Feature names are only supported if all input features have string names, "
697
+ f"but your input has {types} as feature name / column name types. "
698
+ "If you want feature names to be stored and validated, you must convert "
699
+ "them all to strings, by using X.columns = X.columns.astype(str) for "
700
+ "example. Otherwise you can remove feature / column names from your input "
701
+ "data, or convert them all to a non-string data type."
702
+ )
703
+
704
+ # Only feature names of all strings are supported
705
+ if len(types) == 1 and types[0] == "str":
706
+ return feature_names
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skfolio
3
- Version: 0.2.3
3
+ Version: 0.3.1
4
4
  Summary: Portfolio optimization built on top of scikit-learn
5
5
  Author-email: Hugo Delatte <delatte.hugo@gmail.com>
6
6
  Maintainer-email: Hugo Delatte <delatte.hugo@gmail.com>
@@ -60,7 +60,7 @@ Requires-Dist: numpy <2.0.0,>=1.23.4
60
60
  Requires-Dist: scipy >=1.8.0
61
61
  Requires-Dist: pandas >=1.4.1
62
62
  Requires-Dist: cvxpy >=1.4.1
63
- Requires-Dist: scikit-learn >=1.3.2
63
+ Requires-Dist: scikit-learn >=1.5.0
64
64
  Requires-Dist: joblib >=1.3.2
65
65
  Requires-Dist: plotly >=5.22.0
66
66
  Provides-Extra: docs
@@ -237,6 +237,7 @@ Available models
237
237
  * Oracle Approximating Shrinkage
238
238
  * Shrunk Covariance
239
239
  * Graphical Lasso CV
240
+ * Implied Covariance
240
241
 
241
242
  * Distance Estimator:
242
243
  * Pearson Distance