skfolio 0.2.3__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {skfolio-0.2.3/src/skfolio.egg-info → skfolio-0.3.0}/PKG-INFO +3 -2
- {skfolio-0.2.3 → skfolio-0.3.0}/README.rst +1 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/pyproject.toml +3 -3
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/datasets/__init__.py +2 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/datasets/_base.py +51 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/distance/_distance.py +15 -4
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/model_selection/_combinatorial.py +2 -2
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/model_selection/_validation.py +70 -15
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/model_selection/_walk_forward.py +3 -3
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/moments/__init__.py +2 -0
- skfolio-0.3.0/src/skfolio/moments/covariance/__init__.py +29 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/moments/covariance/_base.py +10 -9
- skfolio-0.3.0/src/skfolio/moments/covariance/_denoise_covariance.py +181 -0
- skfolio-0.3.0/src/skfolio/moments/covariance/_detone_covariance.py +158 -0
- skfolio-0.3.0/src/skfolio/moments/covariance/_empirical_covariance.py +100 -0
- skfolio-0.3.0/src/skfolio/moments/covariance/_ew_covariance.py +109 -0
- skfolio-0.3.0/src/skfolio/moments/covariance/_gerber_covariance.py +157 -0
- skfolio-0.3.0/src/skfolio/moments/covariance/_graphical_lasso_cv.py +194 -0
- skfolio-0.3.0/src/skfolio/moments/covariance/_implied_covariance.py +454 -0
- skfolio-0.3.0/src/skfolio/moments/covariance/_ledoit_wolf.py +140 -0
- skfolio-0.3.0/src/skfolio/moments/covariance/_oas.py +115 -0
- skfolio-0.3.0/src/skfolio/moments/covariance/_shrunk_covariance.py +104 -0
- skfolio-0.3.0/src/skfolio/moments/expected_returns/__init__.py +18 -0
- skfolio-0.3.0/src/skfolio/moments/expected_returns/_empirical_mu.py +63 -0
- skfolio-0.3.0/src/skfolio/moments/expected_returns/_equilibrium_mu.py +124 -0
- skfolio-0.3.0/src/skfolio/moments/expected_returns/_ew_mu.py +69 -0
- skfolio-0.2.3/src/skfolio/moments/expected_returns/_expected_returns.py → skfolio-0.3.0/src/skfolio/moments/expected_returns/_shrunk_mu.py +22 -200
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/cluster/_nco.py +46 -8
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/cluster/hierarchical/_base.py +21 -1
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/cluster/hierarchical/_herc.py +18 -4
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/cluster/hierarchical/_hrp.py +13 -4
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/convex/_base.py +10 -1
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/convex/_distributionally_robust.py +12 -2
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/convex/_maximum_diversification.py +9 -2
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/convex/_mean_risk.py +33 -6
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/convex/_risk_budgeting.py +5 -2
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/ensemble/_stacking.py +32 -9
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/naive/_naive.py +20 -2
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/population/_population.py +2 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/prior/_base.py +1 -1
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/prior/_black_litterman.py +20 -2
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/prior/_empirical.py +38 -5
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/prior/_factor_model.py +44 -7
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/uncertainty_set/_base.py +30 -9
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/uncertainty_set/_bootstrap.py +26 -10
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/uncertainty_set/_empirical.py +25 -10
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/utils/stats.py +24 -3
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/utils/tools.py +213 -79
- {skfolio-0.2.3 → skfolio-0.3.0/src/skfolio.egg-info}/PKG-INFO +3 -2
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio.egg-info/SOURCES.txt +14 -2
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio.egg-info/requires.txt +1 -1
- skfolio-0.2.3/src/skfolio/moments/covariance/__init__.py +0 -29
- skfolio-0.2.3/src/skfolio/moments/covariance/_covariance.py +0 -1114
- skfolio-0.2.3/src/skfolio/moments/expected_returns/__init__.py +0 -21
- {skfolio-0.2.3 → skfolio-0.3.0}/LICENSE +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/MANIFEST.in +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/setup.cfg +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/cluster/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/cluster/_hierarchical.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/datasets/data/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/datasets/data/factors_dataset.csv.gz +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/datasets/data/sp500_dataset.csv.gz +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/datasets/data/sp500_index.csv.gz +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/distance/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/distance/_base.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/exceptions.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/measures/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/measures/_enums.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/measures/_measures.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/metrics/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/metrics/_scorer.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/model_selection/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/moments/expected_returns/_base.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/_base.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/cluster/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/cluster/hierarchical/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/convex/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/ensemble/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/ensemble/_base.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/optimization/naive/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/population/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/portfolio/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/portfolio/_base.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/portfolio/_multi_period_portfolio.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/portfolio/_portfolio.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/pre_selection/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/pre_selection/_pre_selection.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/preprocessing/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/preprocessing/_returns.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/prior/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/typing.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/uncertainty_set/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/utils/__init__.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/utils/bootstrap.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/utils/equations.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio/utils/sorting.py +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio.egg-info/dependency_links.txt +0 -0
- {skfolio-0.2.3 → skfolio-0.3.0}/src/skfolio.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: skfolio
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.3.0
|
4
4
|
Summary: Portfolio optimization built on top of scikit-learn
|
5
5
|
Author-email: Hugo Delatte <delatte.hugo@gmail.com>
|
6
6
|
Maintainer-email: Hugo Delatte <delatte.hugo@gmail.com>
|
@@ -60,7 +60,7 @@ Requires-Dist: numpy<2.0.0,>=1.23.4
|
|
60
60
|
Requires-Dist: scipy>=1.8.0
|
61
61
|
Requires-Dist: pandas>=1.4.1
|
62
62
|
Requires-Dist: cvxpy>=1.4.1
|
63
|
-
Requires-Dist: scikit-learn>=1.
|
63
|
+
Requires-Dist: scikit-learn>=1.5.0
|
64
64
|
Requires-Dist: joblib>=1.3.2
|
65
65
|
Requires-Dist: plotly>=5.22.0
|
66
66
|
Provides-Extra: tests
|
@@ -238,6 +238,7 @@ Available models
|
|
238
238
|
* Oracle Approximating Shrinkage
|
239
239
|
* Shrunk Covariance
|
240
240
|
* Graphical Lasso CV
|
241
|
+
* Implied Covariance
|
241
242
|
|
242
243
|
* Distance Estimator:
|
243
244
|
* Pearson Distance
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "skfolio"
|
7
|
-
version = "0.
|
7
|
+
version = "0.3.0"
|
8
8
|
maintainers = [
|
9
9
|
{ name = "Hugo Delatte", email = "delatte.hugo@gmail.com" },
|
10
10
|
]
|
@@ -19,7 +19,7 @@ dependencies = [
|
|
19
19
|
"scipy>=1.8.0",
|
20
20
|
"pandas>=1.4.1",
|
21
21
|
"cvxpy>=1.4.1",
|
22
|
-
"scikit-learn>=1.
|
22
|
+
"scikit-learn>=1.5.0",
|
23
23
|
"joblib>=1.3.2",
|
24
24
|
"plotly>=5.22.0"
|
25
25
|
]
|
@@ -106,7 +106,7 @@ commit_message = "v{version} [skip ci]\n\nAutomatically generated by python-sema
|
|
106
106
|
token = { env = "GH_TOKEN" }
|
107
107
|
|
108
108
|
[tool.ruff]
|
109
|
-
include = ["pyproject.toml", "src/**/*.py"]
|
109
|
+
include = ["pyproject.toml", "src/**/*.py", "tests/**/*.py"]
|
110
110
|
line-length = 88
|
111
111
|
src = ["src"]
|
112
112
|
target-version = "py310"
|
@@ -8,6 +8,7 @@ from skfolio.datasets._base import (
|
|
8
8
|
load_ftse100_dataset,
|
9
9
|
load_nasdaq_dataset,
|
10
10
|
load_sp500_dataset,
|
11
|
+
load_sp500_implied_vol_dataset,
|
11
12
|
load_sp500_index,
|
12
13
|
)
|
13
14
|
|
@@ -17,4 +18,5 @@ __all__ = [
|
|
17
18
|
"load_ftse100_dataset",
|
18
19
|
"load_sp500_dataset",
|
19
20
|
"load_sp500_index",
|
21
|
+
"load_sp500_implied_vol_dataset",
|
20
22
|
]
|
@@ -392,3 +392,54 @@ def load_nasdaq_dataset(data_home=None, download_if_missing=True) -> pd.DataFram
|
|
392
392
|
data_filename, data_home=data_home, download_if_missing=download_if_missing
|
393
393
|
)
|
394
394
|
return df
|
395
|
+
|
396
|
+
|
397
|
+
def load_sp500_implied_vol_dataset(
|
398
|
+
data_home=None, download_if_missing=True
|
399
|
+
) -> pd.DataFrame:
|
400
|
+
"""Load the 3 months ATM implied volatility of the 20 assets from the
|
401
|
+
SP500 dataset.
|
402
|
+
|
403
|
+
This dataset is composed of the 3 months ATM implied volatility of 20 assets
|
404
|
+
from the S&P 500 composition starting from 2010-01-04 up to 2022-12-28.
|
405
|
+
|
406
|
+
The data comes from the Yahoo public API option chains.
|
407
|
+
|
408
|
+
============== ==================
|
409
|
+
Observations 3270
|
410
|
+
Assets 20
|
411
|
+
============== ==================
|
412
|
+
|
413
|
+
Parameters
|
414
|
+
----------
|
415
|
+
data_home : str, optional
|
416
|
+
Specify another download and cache folder for the datasets.
|
417
|
+
By default, all skfolio data is stored in `~/skfolio_data` subfolders.
|
418
|
+
|
419
|
+
download_if_missing : bool, default=True
|
420
|
+
If False, raise an OSError if the data is not locally available
|
421
|
+
instead of trying to download the data from the source site.
|
422
|
+
|
423
|
+
Returns
|
424
|
+
-------
|
425
|
+
df : DataFrame of shape (n_observations, n_assets)
|
426
|
+
Implied volatility DataFrame
|
427
|
+
|
428
|
+
Examples
|
429
|
+
--------
|
430
|
+
>>> from skfolio.datasets import load_sp500_implied_vol_dataset
|
431
|
+
>>> implied_vol = load_sp500_implied_vol_dataset()
|
432
|
+
>>> implied_vol.head()
|
433
|
+
AAPL AMD BAC ... UNH WMT XOM
|
434
|
+
Date ...
|
435
|
+
2010-01-04 0.364353 0.572056 0.382926 ... 0.362751 0.171737 0.201485
|
436
|
+
2010-01-05 0.371865 0.568791 0.374699 ... 0.368504 0.174764 0.203852
|
437
|
+
2010-01-06 0.356746 0.558054 0.349220 ... 0.368514 0.171892 0.197475
|
438
|
+
2010-01-07 0.361084 0.560475 0.354942 ... 0.355792 0.169083 0.200046
|
439
|
+
2010-01-08 0.348085 0.543932 0.360345 ... 0.351130 0.170897 0.204832
|
440
|
+
"""
|
441
|
+
data_filename = "sp500_implied_vol_dataset"
|
442
|
+
df = download_dataset(
|
443
|
+
data_filename, data_home=data_home, download_if_missing=download_if_missing
|
444
|
+
)
|
445
|
+
return df
|
@@ -9,7 +9,8 @@ import numpy.typing as npt
|
|
9
9
|
import pandas as pd
|
10
10
|
import scipy.spatial.distance as scd
|
11
11
|
import scipy.stats as sct
|
12
|
-
import sklearn.metrics as
|
12
|
+
import sklearn.metrics as skmc
|
13
|
+
import sklearn.utils.metadata_routing as skm
|
13
14
|
|
14
15
|
from skfolio.distance._base import BaseDistance
|
15
16
|
from skfolio.moments import BaseCovariance, GerberCovariance
|
@@ -300,7 +301,15 @@ class CovarianceDistance(BaseDistance):
|
|
300
301
|
self.absolute = absolute
|
301
302
|
self.power = power
|
302
303
|
|
303
|
-
def
|
304
|
+
def get_metadata_routing(self):
|
305
|
+
# noinspection PyTypeChecker
|
306
|
+
router = skm.MetadataRouter(owner=self.__class__.__name__).add(
|
307
|
+
covariance_estimator=self.covariance_estimator,
|
308
|
+
method_mapping=skm.MethodMapping().add(caller="fit", callee="fit"),
|
309
|
+
)
|
310
|
+
return router
|
311
|
+
|
312
|
+
def fit(self, X: npt.ArrayLike, y=None, **fit_params) -> "CovarianceDistance":
|
304
313
|
"""Fit the Covariance Distance estimator.
|
305
314
|
|
306
315
|
Parameters
|
@@ -316,13 +325,15 @@ class CovarianceDistance(BaseDistance):
|
|
316
325
|
self : CovarianceDistance
|
317
326
|
Fitted estimator.
|
318
327
|
"""
|
328
|
+
routed_params = skm.process_routing(self, "fit", **fit_params)
|
329
|
+
|
319
330
|
# fitting estimators
|
320
331
|
self.covariance_estimator_ = check_estimator(
|
321
332
|
self.covariance_estimator,
|
322
333
|
default=GerberCovariance(),
|
323
334
|
check_type=BaseCovariance,
|
324
335
|
)
|
325
|
-
self.covariance_estimator_.fit(X)
|
336
|
+
self.covariance_estimator_.fit(X, y, **routed_params.covariance_estimator.fit)
|
326
337
|
|
327
338
|
# we validate and convert to numpy after all models have been fitted to keep the
|
328
339
|
# features names information.
|
@@ -512,7 +523,7 @@ class MutualInformation(BaseDistance):
|
|
512
523
|
x = X[:, i]
|
513
524
|
y = X[:, j]
|
514
525
|
contingency = np.histogram2d(x, y, bins=n_bins)[0]
|
515
|
-
mutual_information =
|
526
|
+
mutual_information = skmc.mutual_info_score(
|
516
527
|
None, None, contingency=contingency
|
517
528
|
)
|
518
529
|
entropy_x = sct.entropy(np.histogram(x, n_bins)[0])
|
@@ -18,7 +18,7 @@ import numpy as np
|
|
18
18
|
import numpy.typing as npt
|
19
19
|
import pandas as pd
|
20
20
|
import plotly.graph_objects as go
|
21
|
-
import sklearn.model_selection as
|
21
|
+
import sklearn.model_selection as sks
|
22
22
|
import sklearn.utils as sku
|
23
23
|
|
24
24
|
import skfolio.typing as skt
|
@@ -39,7 +39,7 @@ class BaseCombinatorialCV(ABC):
|
|
39
39
|
"""Return the path id of each test sets in each split"""
|
40
40
|
pass
|
41
41
|
|
42
|
-
__repr__ =
|
42
|
+
__repr__ = sks.BaseCrossValidator.__repr__
|
43
43
|
|
44
44
|
|
45
45
|
# TODO: review params and function naming
|
@@ -12,8 +12,10 @@ import numpy as np
|
|
12
12
|
import numpy.typing as npt
|
13
13
|
import sklearn as sk
|
14
14
|
import sklearn.base as skb
|
15
|
-
import sklearn.
|
15
|
+
import sklearn.exceptions as ske
|
16
|
+
import sklearn.model_selection as sks
|
16
17
|
import sklearn.utils as sku
|
18
|
+
import sklearn.utils.metadata_routing as skm
|
17
19
|
import sklearn.utils.parallel as skp
|
18
20
|
|
19
21
|
from skfolio.model_selection._combinatorial import BaseCombinatorialCV
|
@@ -22,16 +24,26 @@ from skfolio.portfolio import MultiPeriodPortfolio
|
|
22
24
|
from skfolio.utils.tools import fit_and_predict, safe_split
|
23
25
|
|
24
26
|
|
27
|
+
def _routing_enabled():
|
28
|
+
"""Return whether metadata routing is enabled.
|
29
|
+
Returns
|
30
|
+
-------
|
31
|
+
enabled : bool
|
32
|
+
Whether metadata routing is enabled. If the config is not set, it
|
33
|
+
defaults to False.
|
34
|
+
"""
|
35
|
+
return sk.get_config().get("enable_metadata_routing", False)
|
36
|
+
|
37
|
+
|
25
38
|
def cross_val_predict(
|
26
39
|
estimator: skb.BaseEstimator,
|
27
40
|
X: npt.ArrayLike,
|
28
41
|
y: npt.ArrayLike = None,
|
29
|
-
|
30
|
-
cv: skm.BaseCrossValidator | BaseCombinatorialCV | int | None = None,
|
42
|
+
cv: sks.BaseCrossValidator | BaseCombinatorialCV | int | None = None,
|
31
43
|
n_jobs: int | None = None,
|
32
44
|
method: str = "predict",
|
33
45
|
verbose: int = 0,
|
34
|
-
|
46
|
+
params: dict | None = None,
|
35
47
|
pre_dispatch: str = "2*n_jobs",
|
36
48
|
column_indices: np.ndarray | None = None,
|
37
49
|
portfolio_params: dict | None = None,
|
@@ -65,11 +77,6 @@ def cross_val_predict(
|
|
65
77
|
Target data (optional).
|
66
78
|
For example, the price returns of the factors.
|
67
79
|
|
68
|
-
groups : array-like of shape (n_observations,), optional
|
69
|
-
Group labels for the samples used while splitting the dataset into
|
70
|
-
train/test set. Only used in conjunction with a "Group" `cv`
|
71
|
-
instance (e.g., `GroupKFold`).
|
72
|
-
|
73
80
|
cv : int | cross-validation generator, optional
|
74
81
|
Determines the cross-validation splitting strategy.
|
75
82
|
Possible inputs for cv are:
|
@@ -90,8 +97,8 @@ def cross_val_predict(
|
|
90
97
|
verbose : int, default=0
|
91
98
|
The verbosity level.
|
92
99
|
|
93
|
-
|
94
|
-
Parameters to pass to the fit
|
100
|
+
params : dict, optional
|
101
|
+
Parameters to pass to the underlying estimator's ``fit`` and the CV splitter.
|
95
102
|
|
96
103
|
pre_dispatch : int or str, default='2*n_jobs'
|
97
104
|
Controls the number of jobs that get dispatched during parallel
|
@@ -121,10 +128,57 @@ def cross_val_predict(
|
|
121
128
|
predictions : MultiPeriodPortfolio | Population
|
122
129
|
This is the result of calling `predict`
|
123
130
|
"""
|
131
|
+
params = {} if params is None else params
|
132
|
+
|
124
133
|
X, y = safe_split(X, y, indices=column_indices, axis=1)
|
125
|
-
X, y
|
126
|
-
|
127
|
-
|
134
|
+
X, y = sku.indexable(X, y)
|
135
|
+
|
136
|
+
if _routing_enabled():
|
137
|
+
# For estimators, a MetadataRouter is created in get_metadata_routing
|
138
|
+
# methods. For these router methods, we create the router to use
|
139
|
+
# `process_routing` on it.
|
140
|
+
# noinspection PyTypeChecker
|
141
|
+
router = (
|
142
|
+
skm.MetadataRouter(owner="cross_validate")
|
143
|
+
.add(
|
144
|
+
splitter=cv,
|
145
|
+
method_mapping=skm.MethodMapping().add(caller="fit", callee="split"),
|
146
|
+
)
|
147
|
+
.add(
|
148
|
+
estimator=estimator,
|
149
|
+
method_mapping=skm.MethodMapping().add(caller="fit", callee="fit"),
|
150
|
+
)
|
151
|
+
)
|
152
|
+
try:
|
153
|
+
routed_params = skm.process_routing(router, "fit", **params)
|
154
|
+
except ske.UnsetMetadataPassedError as e:
|
155
|
+
# The default exception would mention `fit` since in the above
|
156
|
+
# `process_routing` code, we pass `fit` as the caller. However,
|
157
|
+
# the user is not calling `fit` directly, so we change the message
|
158
|
+
# to make it more suitable for this case.
|
159
|
+
unrequested_params = sorted(e.unrequested_params)
|
160
|
+
raise ske.UnsetMetadataPassedError(
|
161
|
+
message=(
|
162
|
+
f"{unrequested_params} are passed to `cross_val_predict` but are"
|
163
|
+
" not explicitly set as requested or not requested for"
|
164
|
+
f" cross_validate's estimator: {estimator.__class__.__name__} Call"
|
165
|
+
" `.set_fit_request({{metadata}}=True)` on the estimator for"
|
166
|
+
f" each metadata in {unrequested_params} that you want to use and"
|
167
|
+
" `metadata=False` for not using it. See the Metadata Routing User"
|
168
|
+
" guide <https://scikit-learn.org/stable/metadata_routing.html>"
|
169
|
+
" for more information."
|
170
|
+
),
|
171
|
+
unrequested_params=e.unrequested_params,
|
172
|
+
routed_params=e.routed_params,
|
173
|
+
) from None
|
174
|
+
else:
|
175
|
+
routed_params = sku.Bunch()
|
176
|
+
routed_params.splitter = sku.Bunch(split={})
|
177
|
+
routed_params.estimator = sku.Bunch(fit=params)
|
178
|
+
|
179
|
+
cv = sks.check_cv(cv, y)
|
180
|
+
splits = list(cv.split(X, y, **routed_params.splitter.split))
|
181
|
+
|
128
182
|
portfolio_params = {} if portfolio_params is None else portfolio_params.copy()
|
129
183
|
|
130
184
|
# We ensure that the folds are not shuffled
|
@@ -148,6 +202,7 @@ def cross_val_predict(
|
|
148
202
|
# and that it is pickle-able.
|
149
203
|
parallel = skp.Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)
|
150
204
|
# TODO remove when https://github.com/joblib/joblib/issues/1071 is fixed
|
205
|
+
# noinspection PyCallingNonCallable
|
151
206
|
predictions = parallel(
|
152
207
|
skp.delayed(fit_and_predict)(
|
153
208
|
sk.clone(estimator),
|
@@ -155,7 +210,7 @@ def cross_val_predict(
|
|
155
210
|
y,
|
156
211
|
train=train,
|
157
212
|
test=test,
|
158
|
-
fit_params=
|
213
|
+
fit_params=routed_params.estimator.fit,
|
159
214
|
method=method,
|
160
215
|
)
|
161
216
|
for train, test in splits
|
@@ -12,11 +12,11 @@ from collections.abc import Iterator
|
|
12
12
|
|
13
13
|
import numpy as np
|
14
14
|
import numpy.typing as npt
|
15
|
-
import sklearn.model_selection as
|
15
|
+
import sklearn.model_selection as sks
|
16
16
|
import sklearn.utils as sku
|
17
17
|
|
18
18
|
|
19
|
-
class WalkForward(
|
19
|
+
class WalkForward(sks.BaseCrossValidator):
|
20
20
|
"""Walk Forward cross-validator.
|
21
21
|
|
22
22
|
Provides train/test indices to split time series data samples in a walk forward
|
@@ -196,7 +196,7 @@ class WalkForward(skm.BaseCrossValidator):
|
|
196
196
|
)
|
197
197
|
test_start = test_end
|
198
198
|
|
199
|
-
def get_n_splits(self, X
|
199
|
+
def get_n_splits(self, X=None, y=None, groups=None) -> int:
|
200
200
|
"""Returns the number of splitting iterations in the cross-validator
|
201
201
|
|
202
202
|
Parameters
|
@@ -9,6 +9,7 @@ from skfolio.moments.covariance import (
|
|
9
9
|
EmpiricalCovariance,
|
10
10
|
GerberCovariance,
|
11
11
|
GraphicalLassoCV,
|
12
|
+
ImpliedCovariance,
|
12
13
|
LedoitWolf,
|
13
14
|
ShrunkCovariance,
|
14
15
|
)
|
@@ -38,4 +39,5 @@ __all__ = [
|
|
38
39
|
"OAS",
|
39
40
|
"ShrunkCovariance",
|
40
41
|
"GraphicalLassoCV",
|
42
|
+
"ImpliedCovariance",
|
41
43
|
]
|
@@ -0,0 +1,29 @@
|
|
1
|
+
"""Covariance module."""
|
2
|
+
|
3
|
+
from skfolio.moments.covariance._base import (
|
4
|
+
BaseCovariance,
|
5
|
+
)
|
6
|
+
from skfolio.moments.covariance._denoise_covariance import DenoiseCovariance
|
7
|
+
from skfolio.moments.covariance._detone_covariance import DetoneCovariance
|
8
|
+
from skfolio.moments.covariance._empirical_covariance import EmpiricalCovariance
|
9
|
+
from skfolio.moments.covariance._ew_covariance import EWCovariance
|
10
|
+
from skfolio.moments.covariance._gerber_covariance import GerberCovariance
|
11
|
+
from skfolio.moments.covariance._graphical_lasso_cv import GraphicalLassoCV
|
12
|
+
from skfolio.moments.covariance._implied_covariance import ImpliedCovariance
|
13
|
+
from skfolio.moments.covariance._ledoit_wolf import LedoitWolf
|
14
|
+
from skfolio.moments.covariance._oas import OAS
|
15
|
+
from skfolio.moments.covariance._shrunk_covariance import ShrunkCovariance
|
16
|
+
|
17
|
+
__all__ = [
|
18
|
+
"BaseCovariance",
|
19
|
+
"EmpiricalCovariance",
|
20
|
+
"EWCovariance",
|
21
|
+
"GerberCovariance",
|
22
|
+
"DenoiseCovariance",
|
23
|
+
"DetoneCovariance",
|
24
|
+
"LedoitWolf",
|
25
|
+
"OAS",
|
26
|
+
"ShrunkCovariance",
|
27
|
+
"GraphicalLassoCV",
|
28
|
+
"ImpliedCovariance",
|
29
|
+
]
|
@@ -22,19 +22,19 @@ class BaseCovariance(skb.BaseEstimator, ABC):
|
|
22
22
|
|
23
23
|
Parameters
|
24
24
|
----------
|
25
|
-
nearest : bool, default=
|
25
|
+
nearest : bool, default=True
|
26
26
|
If this is set to True, the covariance is replaced by the nearest covariance
|
27
27
|
matrix that is positive definite and with a Cholesky decomposition than can be
|
28
|
-
computed. The variance is left unchanged.
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
For more details, see :func:`~skfolio.
|
33
|
-
The default is `
|
28
|
+
computed. The variance is left unchanged.
|
29
|
+
A covariance matrix that is not positive definite often occurs in high
|
30
|
+
dimensional problems. It can be due to multicollinearity, floating-point
|
31
|
+
inaccuracies, or when the number of observations is smaller than the number of
|
32
|
+
assets. For more details, see :func:`~skfolio.utils.stats.cov_nearest`.
|
33
|
+
The default is `True`.
|
34
34
|
|
35
35
|
higham : bool, default=False
|
36
36
|
If this is set to True, the Higham & Nick (2002) algorithm is used to find the
|
37
|
-
nearest
|
37
|
+
nearest PD covariance, otherwise the eigenvalues are clipped to a threshold
|
38
38
|
above zeros (1e-13). The default is `False` and use the clipping method as the
|
39
39
|
Higham & Nick algorithm can be slow for large datasets.
|
40
40
|
|
@@ -59,7 +59,7 @@ class BaseCovariance(skb.BaseEstimator, ABC):
|
|
59
59
|
@abstractmethod
|
60
60
|
def __init__(
|
61
61
|
self,
|
62
|
-
nearest: bool =
|
62
|
+
nearest: bool = True,
|
63
63
|
higham: bool = False,
|
64
64
|
higham_max_iteration: int = 100,
|
65
65
|
):
|
@@ -103,6 +103,7 @@ class BaseCovariance(skb.BaseEstimator, ABC):
|
|
103
103
|
covariance,
|
104
104
|
higham=self.higham,
|
105
105
|
higham_max_iteration=self.higham_max_iteration,
|
106
|
+
warn=True,
|
106
107
|
)
|
107
108
|
# set covariance
|
108
109
|
self.covariance_ = covariance
|
@@ -0,0 +1,181 @@
|
|
1
|
+
"""Covariance Denoising Estimators."""
|
2
|
+
|
3
|
+
# Copyright (c) 2023
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# License: BSD 3 clause
|
6
|
+
# Implementation derived from:
|
7
|
+
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
|
8
|
+
# Grisel Licensed under BSD 3 clause.
|
9
|
+
|
10
|
+
import numpy as np
|
11
|
+
import numpy.typing as npt
|
12
|
+
import scipy.optimize as sco
|
13
|
+
import sklearn.neighbors as skn
|
14
|
+
import sklearn.utils.metadata_routing as skm
|
15
|
+
|
16
|
+
from skfolio.moments.covariance._base import BaseCovariance
|
17
|
+
from skfolio.moments.covariance._empirical_covariance import EmpiricalCovariance
|
18
|
+
from skfolio.utils.stats import corr_to_cov, cov_to_corr
|
19
|
+
from skfolio.utils.tools import check_estimator
|
20
|
+
|
21
|
+
|
22
|
+
class DenoiseCovariance(BaseCovariance):
|
23
|
+
"""Covariance Denoising estimator.
|
24
|
+
|
25
|
+
The goal of Covariance Denoising is to reduce the noise and enhance the signal of
|
26
|
+
the empirical covariance matrix [1]_.
|
27
|
+
It reduces the ill-conditioning of the traditional covariance estimate by
|
28
|
+
differentiating the eigenvalues associated with noise from the eigenvalues
|
29
|
+
associated with signal.
|
30
|
+
Denoising replaces the eigenvalues of the eigenvectors classified as random by
|
31
|
+
Marčenko-Pastur with a constant eigenvalue.
|
32
|
+
|
33
|
+
Parameters
|
34
|
+
----------
|
35
|
+
covariance_estimator : BaseCovariance, optional
|
36
|
+
:ref:`Covariance estimator <covariance_estimator>` to estimate the covariance
|
37
|
+
matrix that will be denoised.
|
38
|
+
The default (`None`) is to use :class:`~skfolio.moments.EmpiricalCovariance`.
|
39
|
+
|
40
|
+
nearest : bool, default=True
|
41
|
+
If this is set to True, the covariance is replaced by the nearest covariance
|
42
|
+
matrix that is positive definite and with a Cholesky decomposition than can be
|
43
|
+
computed. The variance is left unchanged.
|
44
|
+
A covariance matrix that is not positive definite often occurs in high
|
45
|
+
dimensional problems. It can be due to multicollinearity, floating-point
|
46
|
+
inaccuracies, or when the number of observations is smaller than the number of
|
47
|
+
assets. For more details, see :func:`~skfolio.utils.stats.cov_nearest`.
|
48
|
+
The default is `True`.
|
49
|
+
|
50
|
+
higham : bool, default=False
|
51
|
+
If this is set to True, the Higham & Nick (2002) algorithm is used to find the
|
52
|
+
nearest PD covariance, otherwise the eigenvalues are clipped to a threshold
|
53
|
+
above zeros (1e-13). The default is `False` and use the clipping method as the
|
54
|
+
Higham & Nick algorithm can be slow for large datasets.
|
55
|
+
|
56
|
+
higham_max_iteration : int, default=100
|
57
|
+
Maximum number of iteration of the Higham & Nick (2002) algorithm.
|
58
|
+
The default value is `100`.
|
59
|
+
|
60
|
+
Attributes
|
61
|
+
----------
|
62
|
+
covariance_ : ndarray of shape (n_assets, n_assets)
|
63
|
+
Estimated covariance.
|
64
|
+
|
65
|
+
covariance_estimator_ : BaseCovariance
|
66
|
+
Fitted `covariance_estimator`.
|
67
|
+
|
68
|
+
n_features_in_ : int
|
69
|
+
Number of assets seen during `fit`.
|
70
|
+
|
71
|
+
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
72
|
+
Names of assets seen during `fit`. Defined only when `X`
|
73
|
+
has assets names that are all strings.
|
74
|
+
|
75
|
+
References
|
76
|
+
----------
|
77
|
+
.. [1] "Machine Learning for Asset Managers".
|
78
|
+
Elements in Quantitative Finance.
|
79
|
+
Lòpez de Prado (2020).
|
80
|
+
"""
|
81
|
+
|
82
|
+
covariance_estimator_: BaseCovariance
|
83
|
+
|
84
|
+
def __init__(
|
85
|
+
self,
|
86
|
+
covariance_estimator: BaseCovariance | None = None,
|
87
|
+
nearest: bool = True,
|
88
|
+
higham: bool = False,
|
89
|
+
higham_max_iteration: int = 100,
|
90
|
+
):
|
91
|
+
super().__init__(
|
92
|
+
nearest=nearest,
|
93
|
+
higham=higham,
|
94
|
+
higham_max_iteration=higham_max_iteration,
|
95
|
+
)
|
96
|
+
self.covariance_estimator = covariance_estimator
|
97
|
+
|
98
|
+
def get_metadata_routing(self):
|
99
|
+
# noinspection PyTypeChecker
|
100
|
+
router = skm.MetadataRouter(owner=self.__class__.__name__).add(
|
101
|
+
covariance_estimator=self.covariance_estimator,
|
102
|
+
method_mapping=skm.MethodMapping().add(caller="fit", callee="fit"),
|
103
|
+
)
|
104
|
+
return router
|
105
|
+
|
106
|
+
def fit(self, X: npt.ArrayLike, y=None, **fit_params) -> "DenoiseCovariance":
|
107
|
+
"""Fit the Covariance Denoising estimator.
|
108
|
+
|
109
|
+
Parameters
|
110
|
+
----------
|
111
|
+
X : array-like of shape (n_observations, n_assets)
|
112
|
+
Price returns of the assets.
|
113
|
+
|
114
|
+
y : Ignored
|
115
|
+
Not used, present for API consistency by convention.
|
116
|
+
|
117
|
+
**fit_params : dict
|
118
|
+
Parameters to pass to the underlying estimators.
|
119
|
+
Only available if `enable_metadata_routing=True`, which can be
|
120
|
+
set by using ``sklearn.set_config(enable_metadata_routing=True)``.
|
121
|
+
See :ref:`Metadata Routing User Guide <metadata_routing>` for
|
122
|
+
more details.
|
123
|
+
|
124
|
+
Returns
|
125
|
+
-------
|
126
|
+
self : DenoiseCovariance
|
127
|
+
Fitted estimator.
|
128
|
+
"""
|
129
|
+
routed_params = skm.process_routing(self, "fit", **fit_params)
|
130
|
+
|
131
|
+
# fitting estimators
|
132
|
+
self.covariance_estimator_ = check_estimator(
|
133
|
+
self.covariance_estimator,
|
134
|
+
default=EmpiricalCovariance(),
|
135
|
+
check_type=BaseCovariance,
|
136
|
+
)
|
137
|
+
# noinspection PyArgumentList
|
138
|
+
self.covariance_estimator_.fit(X, y, **routed_params.covariance_estimator.fit)
|
139
|
+
|
140
|
+
# we validate and convert to numpy after all models have been fitted to keep
|
141
|
+
# features names information.
|
142
|
+
X = self._validate_data(X)
|
143
|
+
n_observations, n_assets = X.shape
|
144
|
+
q = n_observations / n_assets
|
145
|
+
corr, std = cov_to_corr(self.covariance_estimator_.covariance_)
|
146
|
+
e_val, e_vec = np.linalg.eigh(corr)
|
147
|
+
indices = e_val.argsort()[::-1]
|
148
|
+
e_val, e_vec = e_val[indices], e_vec[:, indices]
|
149
|
+
|
150
|
+
def _marchenko(x_var):
|
151
|
+
e_min, e_max = (
|
152
|
+
x_var * (1 - (1.0 / q) ** 0.5) ** 2,
|
153
|
+
x_var * (1 + (1.0 / q) ** 0.5) ** 2,
|
154
|
+
)
|
155
|
+
e_val_lin = np.linspace(e_min, e_max, 1000)
|
156
|
+
pdf_0 = (
|
157
|
+
q
|
158
|
+
/ (2 * np.pi * x_var * e_val_lin)
|
159
|
+
* ((e_max - e_val_lin) * (e_val_lin - e_min)) ** 0.5
|
160
|
+
)
|
161
|
+
kde = skn.KernelDensity(kernel="gaussian", bandwidth=0.01).fit(
|
162
|
+
e_val.reshape(-1, 1)
|
163
|
+
)
|
164
|
+
# noinspection PyUnresolvedReferences
|
165
|
+
pdf_1 = np.exp(kde.score_samples(pdf_0.reshape(-1, 1)))
|
166
|
+
return np.sum((pdf_1 - pdf_0) ** 2)
|
167
|
+
|
168
|
+
# noinspection PyTypeChecker
|
169
|
+
res = sco.minimize(_marchenko, x0=0.5, bounds=((1e-5, 1 - 1e-5),))
|
170
|
+
|
171
|
+
var = res["x"][0]
|
172
|
+
n_facts = e_val.shape[0] - e_val[::-1].searchsorted(
|
173
|
+
var * (1 + (1.0 / q) ** 0.5) ** 2
|
174
|
+
)
|
175
|
+
e_val_ = e_val.copy()
|
176
|
+
e_val_[n_facts:] = e_val_[n_facts:].sum() / float(e_val_.shape[0] - n_facts)
|
177
|
+
corr = e_vec @ np.diag(e_val_) @ e_vec.T
|
178
|
+
corr, _ = cov_to_corr(corr)
|
179
|
+
covariance = corr_to_cov(corr, std)
|
180
|
+
self._set_covariance(covariance)
|
181
|
+
return self
|