skfolio 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/__init__.py +2 -2
- skfolio/cluster/__init__.py +1 -1
- skfolio/cluster/_hierarchical.py +1 -1
- skfolio/datasets/__init__.py +1 -1
- skfolio/datasets/_base.py +2 -2
- skfolio/datasets/data/__init__.py +1 -0
- skfolio/distance/__init__.py +1 -1
- skfolio/distance/_base.py +2 -2
- skfolio/distance/_distance.py +4 -4
- skfolio/distribution/__init__.py +56 -0
- skfolio/distribution/_base.py +203 -0
- skfolio/distribution/copula/__init__.py +35 -0
- skfolio/distribution/copula/_base.py +456 -0
- skfolio/distribution/copula/_clayton.py +539 -0
- skfolio/distribution/copula/_gaussian.py +407 -0
- skfolio/distribution/copula/_gumbel.py +560 -0
- skfolio/distribution/copula/_independent.py +196 -0
- skfolio/distribution/copula/_joe.py +609 -0
- skfolio/distribution/copula/_selection.py +111 -0
- skfolio/distribution/copula/_student_t.py +486 -0
- skfolio/distribution/copula/_utils.py +509 -0
- skfolio/distribution/multivariate/__init__.py +11 -0
- skfolio/distribution/multivariate/_base.py +241 -0
- skfolio/distribution/multivariate/_utils.py +632 -0
- skfolio/distribution/multivariate/_vine_copula.py +1254 -0
- skfolio/distribution/univariate/__init__.py +19 -0
- skfolio/distribution/univariate/_base.py +308 -0
- skfolio/distribution/univariate/_gaussian.py +136 -0
- skfolio/distribution/univariate/_johnson_su.py +152 -0
- skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
- skfolio/distribution/univariate/_selection.py +85 -0
- skfolio/distribution/univariate/_student_t.py +144 -0
- skfolio/exceptions.py +6 -6
- skfolio/measures/__init__.py +1 -1
- skfolio/measures/_enums.py +7 -7
- skfolio/measures/_measures.py +4 -7
- skfolio/metrics/__init__.py +2 -0
- skfolio/metrics/_scorer.py +4 -4
- skfolio/model_selection/__init__.py +2 -2
- skfolio/model_selection/_combinatorial.py +15 -12
- skfolio/model_selection/_validation.py +2 -2
- skfolio/model_selection/_walk_forward.py +3 -3
- skfolio/moments/covariance/_base.py +1 -1
- skfolio/moments/covariance/_denoise_covariance.py +1 -1
- skfolio/moments/covariance/_detone_covariance.py +1 -1
- skfolio/moments/covariance/_empirical_covariance.py +1 -1
- skfolio/moments/covariance/_ew_covariance.py +1 -1
- skfolio/moments/covariance/_gerber_covariance.py +1 -1
- skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
- skfolio/moments/covariance/_implied_covariance.py +2 -7
- skfolio/moments/covariance/_ledoit_wolf.py +1 -1
- skfolio/moments/covariance/_oas.py +1 -1
- skfolio/moments/covariance/_shrunk_covariance.py +1 -1
- skfolio/moments/expected_returns/_base.py +1 -1
- skfolio/moments/expected_returns/_empirical_mu.py +1 -1
- skfolio/moments/expected_returns/_equilibrium_mu.py +1 -1
- skfolio/moments/expected_returns/_ew_mu.py +1 -1
- skfolio/moments/expected_returns/_shrunk_mu.py +2 -2
- skfolio/optimization/__init__.py +2 -0
- skfolio/optimization/_base.py +2 -2
- skfolio/optimization/cluster/__init__.py +2 -0
- skfolio/optimization/cluster/_nco.py +7 -7
- skfolio/optimization/cluster/hierarchical/__init__.py +2 -0
- skfolio/optimization/cluster/hierarchical/_base.py +1 -2
- skfolio/optimization/cluster/hierarchical/_herc.py +2 -2
- skfolio/optimization/cluster/hierarchical/_hrp.py +2 -2
- skfolio/optimization/convex/__init__.py +2 -0
- skfolio/optimization/convex/_base.py +8 -8
- skfolio/optimization/convex/_distributionally_robust.py +4 -4
- skfolio/optimization/convex/_maximum_diversification.py +5 -5
- skfolio/optimization/convex/_mean_risk.py +5 -6
- skfolio/optimization/convex/_risk_budgeting.py +3 -3
- skfolio/optimization/ensemble/__init__.py +2 -0
- skfolio/optimization/ensemble/_base.py +2 -2
- skfolio/optimization/ensemble/_stacking.py +1 -1
- skfolio/optimization/naive/__init__.py +2 -0
- skfolio/optimization/naive/_naive.py +1 -1
- skfolio/population/__init__.py +2 -0
- skfolio/population/_population.py +34 -7
- skfolio/portfolio/_base.py +42 -8
- skfolio/portfolio/_multi_period_portfolio.py +3 -2
- skfolio/portfolio/_portfolio.py +4 -4
- skfolio/pre_selection/__init__.py +2 -0
- skfolio/pre_selection/_drop_correlated.py +2 -2
- skfolio/pre_selection/_select_complete.py +25 -26
- skfolio/pre_selection/_select_k_extremes.py +2 -2
- skfolio/pre_selection/_select_non_dominated.py +2 -2
- skfolio/pre_selection/_select_non_expiring.py +2 -2
- skfolio/preprocessing/__init__.py +2 -0
- skfolio/preprocessing/_returns.py +2 -2
- skfolio/prior/__init__.py +4 -0
- skfolio/prior/_base.py +2 -2
- skfolio/prior/_black_litterman.py +5 -3
- skfolio/prior/_empirical.py +3 -1
- skfolio/prior/_factor_model.py +8 -4
- skfolio/prior/_synthetic_data.py +239 -0
- skfolio/synthetic_returns/__init__.py +1 -0
- skfolio/typing.py +1 -1
- skfolio/uncertainty_set/__init__.py +2 -0
- skfolio/uncertainty_set/_base.py +2 -2
- skfolio/uncertainty_set/_bootstrap.py +1 -1
- skfolio/uncertainty_set/_empirical.py +1 -1
- skfolio/utils/__init__.py +1 -0
- skfolio/utils/bootstrap.py +2 -2
- skfolio/utils/equations.py +13 -10
- skfolio/utils/sorting.py +2 -2
- skfolio/utils/stats.py +7 -7
- skfolio/utils/tools.py +76 -12
- {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/METADATA +99 -24
- skfolio-0.8.0.dist-info/RECORD +120 -0
- {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/WHEEL +1 -1
- skfolio-0.7.0.dist-info/RECORD +0 -95
- {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info/licenses}/LICENSE +0 -0
- {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/top_level.txt +0 -0
skfolio/__init__.py
CHANGED
skfolio/cluster/__init__.py
CHANGED
skfolio/cluster/_hierarchical.py
CHANGED
skfolio/datasets/__init__.py
CHANGED
skfolio/datasets/_base.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
# Implementation derived from:
|
7
7
|
# scikit-portfolio, Copyright (c) 2022, Carlo Nicolini, Licensed under MIT Licence.
|
8
8
|
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
|
@@ -74,7 +74,7 @@ def load_gzip_compressed_csv_data(
|
|
74
74
|
encoding="utf-8",
|
75
75
|
datetime_index: bool = True,
|
76
76
|
) -> pd.DataFrame:
|
77
|
-
"""
|
77
|
+
"""Load gzip-compressed csv files with `importlib.resources`.
|
78
78
|
|
79
79
|
1) Open resource file with `importlib.resources.open_binary`
|
80
80
|
2) Decompress csv file with `gzip.open`
|
@@ -0,0 +1 @@
|
|
1
|
+
"""Dataset Data module."""
|
skfolio/distance/__init__.py
CHANGED
skfolio/distance/_base.py
CHANGED
skfolio/distance/_distance.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
"""Distance Estimators"""
|
1
|
+
"""Distance Estimators."""
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import numpy.typing as npt
|
@@ -215,7 +215,7 @@ class SpearmanDistance(BaseDistance):
|
|
215
215
|
self.power = power
|
216
216
|
|
217
217
|
def fit(self, X: npt.ArrayLike, y=None) -> "SpearmanDistance":
|
218
|
-
"""Fit the Spearman
|
218
|
+
"""Fit the Spearman estimator.
|
219
219
|
|
220
220
|
Parameters
|
221
221
|
----------
|
@@ -384,7 +384,7 @@ class DistanceCorrelation(BaseDistance):
|
|
384
384
|
|
385
385
|
@staticmethod
|
386
386
|
def _dcorr(x: np.ndarray, y: np.ndarray):
|
387
|
-
"""Calculate the distance correlation between two variables"""
|
387
|
+
"""Calculate the distance correlation between two variables."""
|
388
388
|
x = scd.squareform(scd.pdist(x.reshape(-1, 1)))
|
389
389
|
y = scd.squareform(scd.pdist(y.reshape(-1, 1)))
|
390
390
|
x = x - x.mean(axis=0)[np.newaxis, :] - x.mean(axis=1)[:, np.newaxis] + x.mean()
|
@@ -0,0 +1,56 @@
|
|
1
|
+
"""Distribution module."""
|
2
|
+
|
3
|
+
from skfolio.distribution._base import BaseDistribution, SelectionCriterion
|
4
|
+
from skfolio.distribution.copula import (
|
5
|
+
BaseBivariateCopula,
|
6
|
+
ClaytonCopula,
|
7
|
+
CopulaRotation,
|
8
|
+
GaussianCopula,
|
9
|
+
GumbelCopula,
|
10
|
+
IndependentCopula,
|
11
|
+
JoeCopula,
|
12
|
+
StudentTCopula,
|
13
|
+
compute_pseudo_observations,
|
14
|
+
empirical_tail_concentration,
|
15
|
+
plot_tail_concentration,
|
16
|
+
select_bivariate_copula,
|
17
|
+
)
|
18
|
+
from skfolio.distribution.multivariate import (
|
19
|
+
BaseMultivariateDist,
|
20
|
+
DependenceMethod,
|
21
|
+
VineCopula,
|
22
|
+
)
|
23
|
+
from skfolio.distribution.univariate import (
|
24
|
+
BaseUnivariateDist,
|
25
|
+
Gaussian,
|
26
|
+
JohnsonSU,
|
27
|
+
NormalInverseGaussian,
|
28
|
+
StudentT,
|
29
|
+
select_univariate_dist,
|
30
|
+
)
|
31
|
+
|
32
|
+
__all__ = [
|
33
|
+
"BaseBivariateCopula",
|
34
|
+
"BaseDistribution",
|
35
|
+
"BaseMultivariateDist",
|
36
|
+
"BaseUnivariateDist",
|
37
|
+
"ClaytonCopula",
|
38
|
+
"CopulaRotation",
|
39
|
+
"DependenceMethod",
|
40
|
+
"Gaussian",
|
41
|
+
"GaussianCopula",
|
42
|
+
"GumbelCopula",
|
43
|
+
"IndependentCopula",
|
44
|
+
"JoeCopula",
|
45
|
+
"JohnsonSU",
|
46
|
+
"NormalInverseGaussian",
|
47
|
+
"SelectionCriterion",
|
48
|
+
"StudentT",
|
49
|
+
"StudentTCopula",
|
50
|
+
"VineCopula",
|
51
|
+
"compute_pseudo_observations",
|
52
|
+
"empirical_tail_concentration",
|
53
|
+
"plot_tail_concentration",
|
54
|
+
"select_bivariate_copula",
|
55
|
+
"select_univariate_dist",
|
56
|
+
]
|
@@ -0,0 +1,203 @@
|
|
1
|
+
"""Base Distribution Estimator."""
|
2
|
+
|
3
|
+
# Copyright (c) 2025
|
4
|
+
# Authors: The skfolio developers
|
5
|
+
# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
|
6
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
7
|
+
|
8
|
+
from abc import ABC, abstractmethod
|
9
|
+
from enum import auto
|
10
|
+
|
11
|
+
import numpy as np
|
12
|
+
import numpy.typing as npt
|
13
|
+
import sklearn.base as skb
|
14
|
+
|
15
|
+
from skfolio.utils.tools import AutoEnum
|
16
|
+
|
17
|
+
|
18
|
+
class SelectionCriterion(AutoEnum):
|
19
|
+
"""Enum representing the selection criteria.
|
20
|
+
|
21
|
+
Attributes
|
22
|
+
----------
|
23
|
+
AIC : str
|
24
|
+
Akaike Information Criterion (AIC)
|
25
|
+
|
26
|
+
BIC : str
|
27
|
+
Bayesian Information Criterion (BIC)
|
28
|
+
"""
|
29
|
+
|
30
|
+
AIC = auto()
|
31
|
+
BIC = auto()
|
32
|
+
|
33
|
+
|
34
|
+
class BaseDistribution(skb.BaseEstimator, ABC):
|
35
|
+
"""Base Distribution Estimator.
|
36
|
+
|
37
|
+
This abstract class serves as a foundation for distribution models in skfolio.
|
38
|
+
|
39
|
+
random_state : int, RandomState instance or None, default=None
|
40
|
+
Seed or random state to ensure reproducibility.
|
41
|
+
"""
|
42
|
+
|
43
|
+
def __init__(self, random_state: int | None = None):
|
44
|
+
self.random_state = random_state
|
45
|
+
|
46
|
+
@property
|
47
|
+
@abstractmethod
|
48
|
+
def n_params(self) -> int:
|
49
|
+
"""Number of model parameters."""
|
50
|
+
pass
|
51
|
+
|
52
|
+
@property
|
53
|
+
@abstractmethod
|
54
|
+
def fitted_repr(self) -> str:
|
55
|
+
"""String representation of the fitted model."""
|
56
|
+
pass
|
57
|
+
|
58
|
+
@abstractmethod
|
59
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "BaseDistribution":
|
60
|
+
"""Fit the univariate distribution model.
|
61
|
+
|
62
|
+
Parameters
|
63
|
+
----------
|
64
|
+
X : array-like of shape (n_observations, n_features)
|
65
|
+
The input data.
|
66
|
+
|
67
|
+
y : None
|
68
|
+
Ignored. Provided for compatibility with scikit-learn's API.
|
69
|
+
|
70
|
+
Returns
|
71
|
+
-------
|
72
|
+
self : BaseDistribution
|
73
|
+
Returns the instance itself.
|
74
|
+
"""
|
75
|
+
pass
|
76
|
+
|
77
|
+
@abstractmethod
|
78
|
+
def score_samples(self, X: npt.ArrayLike) -> np.ndarray:
|
79
|
+
"""Compute the log-likelihood of each sample (log-pdf) under the model.
|
80
|
+
|
81
|
+
Parameters
|
82
|
+
----------
|
83
|
+
X : array-like of shape (n_observations, n_features)
|
84
|
+
The input data.
|
85
|
+
|
86
|
+
Returns
|
87
|
+
-------
|
88
|
+
density : ndarray of shape (n_observations,)
|
89
|
+
Log-likelihood values for each observation in X.
|
90
|
+
"""
|
91
|
+
pass
|
92
|
+
|
93
|
+
def sample(self, n_samples: int = 1):
|
94
|
+
"""Generate random samples from the fitted model.
|
95
|
+
|
96
|
+
Parameters
|
97
|
+
----------
|
98
|
+
n_samples : int, default=1
|
99
|
+
Number of samples to generate.
|
100
|
+
|
101
|
+
Returns
|
102
|
+
-------
|
103
|
+
X : array-like of shape (n_samples, 1)
|
104
|
+
List of samples.
|
105
|
+
"""
|
106
|
+
pass
|
107
|
+
|
108
|
+
def score(self, X: npt.ArrayLike, y=None):
|
109
|
+
"""Compute the total log-likelihood under the model.
|
110
|
+
|
111
|
+
Parameters
|
112
|
+
----------
|
113
|
+
X : array-like of shape (n_observations, n_features)
|
114
|
+
An array of data points for which the total log-likelihood is computed.
|
115
|
+
|
116
|
+
y : None
|
117
|
+
Ignored. Provided for compatibility with scikit-learn's API.
|
118
|
+
|
119
|
+
Returns
|
120
|
+
-------
|
121
|
+
logprob : float
|
122
|
+
The total log-likelihood (sum of log-pdf values).
|
123
|
+
"""
|
124
|
+
return np.sum(self.score_samples(X))
|
125
|
+
|
126
|
+
def aic(self, X: npt.ArrayLike) -> float:
|
127
|
+
r"""Compute the Akaike Information Criterion (AIC) for the model given data X.
|
128
|
+
|
129
|
+
The AIC is defined as:
|
130
|
+
|
131
|
+
.. math::
|
132
|
+
\mathrm{AIC} = -2 \, \log L \;+\; 2 k,
|
133
|
+
|
134
|
+
where
|
135
|
+
|
136
|
+
- :math:`\log L` is the total log-likelihood
|
137
|
+
- :math:`k` is the number of parameters in the model
|
138
|
+
|
139
|
+
A lower AIC value indicates a better trade-off between model fit and complexity.
|
140
|
+
|
141
|
+
Parameters
|
142
|
+
----------
|
143
|
+
X : array-like of shape (n_observations, n_features)
|
144
|
+
The input data on which to compute the AIC.
|
145
|
+
|
146
|
+
Notes
|
147
|
+
-----
|
148
|
+
In practice, both AIC and BIC measure the trade-off between model fit and
|
149
|
+
complexity, but BIC tends to prefer simpler models for large :math:`n`
|
150
|
+
because of the :math:`\ln(n)` term.
|
151
|
+
|
152
|
+
Returns
|
153
|
+
-------
|
154
|
+
aic : float
|
155
|
+
The AIC of the fitted model on the given data.
|
156
|
+
|
157
|
+
References
|
158
|
+
----------
|
159
|
+
.. [1] "A new look at the statistical model identification", Akaike (1974).
|
160
|
+
"""
|
161
|
+
log_likelihood = self.score(X)
|
162
|
+
return 2 * (self.n_params - log_likelihood)
|
163
|
+
|
164
|
+
def bic(self, X: npt.ArrayLike) -> float:
|
165
|
+
r"""Compute the Bayesian Information Criterion (BIC) for the model given data X.
|
166
|
+
|
167
|
+
The BIC is defined as:
|
168
|
+
|
169
|
+
.. math::
|
170
|
+
\mathrm{BIC} = -2 \, \log L \;+\; k \,\ln(n),
|
171
|
+
|
172
|
+
where
|
173
|
+
|
174
|
+
- :math:`\log L` is the (maximized) total log-likelihood
|
175
|
+
- :math:`k` is the number of parameters in the model
|
176
|
+
- :math:`n` is the number of observations
|
177
|
+
|
178
|
+
A lower BIC value suggests a better fit while imposing a stronger penalty
|
179
|
+
for model complexity than the AIC.
|
180
|
+
|
181
|
+
Parameters
|
182
|
+
----------
|
183
|
+
X : array-like of shape (n_observations, n_features)
|
184
|
+
The input data on which to compute the BIC.
|
185
|
+
|
186
|
+
Returns
|
187
|
+
-------
|
188
|
+
bic : float
|
189
|
+
The BIC of the fitted model on the given data.
|
190
|
+
|
191
|
+
Notes
|
192
|
+
-----
|
193
|
+
In practice, both AIC and BIC measure the trade-off between model fit and
|
194
|
+
complexity, but BIC tends to prefer simpler models for large :math:`n`
|
195
|
+
because of the :math:`\ln(n)` term.
|
196
|
+
|
197
|
+
References
|
198
|
+
----------
|
199
|
+
.. [1] "Estimating the dimension of a model", Schwarz, G. (1978).
|
200
|
+
"""
|
201
|
+
log_likelihood = self.score(X)
|
202
|
+
n = X.shape[0]
|
203
|
+
return -2 * log_likelihood + self.n_params * np.log(n)
|
@@ -0,0 +1,35 @@
|
|
1
|
+
"""Copula module."""
|
2
|
+
|
3
|
+
from skfolio.distribution.copula._base import (
|
4
|
+
UNIFORM_MARGINAL_EPSILON,
|
5
|
+
BaseBivariateCopula,
|
6
|
+
)
|
7
|
+
from skfolio.distribution.copula._clayton import ClaytonCopula
|
8
|
+
from skfolio.distribution.copula._gaussian import GaussianCopula
|
9
|
+
from skfolio.distribution.copula._gumbel import GumbelCopula
|
10
|
+
from skfolio.distribution.copula._independent import IndependentCopula
|
11
|
+
from skfolio.distribution.copula._joe import JoeCopula
|
12
|
+
from skfolio.distribution.copula._selection import select_bivariate_copula
|
13
|
+
from skfolio.distribution.copula._student_t import StudentTCopula
|
14
|
+
from skfolio.distribution.copula._utils import (
|
15
|
+
CopulaRotation,
|
16
|
+
compute_pseudo_observations,
|
17
|
+
empirical_tail_concentration,
|
18
|
+
plot_tail_concentration,
|
19
|
+
)
|
20
|
+
|
21
|
+
__all__ = [
|
22
|
+
"UNIFORM_MARGINAL_EPSILON",
|
23
|
+
"BaseBivariateCopula",
|
24
|
+
"ClaytonCopula",
|
25
|
+
"CopulaRotation",
|
26
|
+
"GaussianCopula",
|
27
|
+
"GumbelCopula",
|
28
|
+
"IndependentCopula",
|
29
|
+
"JoeCopula",
|
30
|
+
"StudentTCopula",
|
31
|
+
"compute_pseudo_observations",
|
32
|
+
"empirical_tail_concentration",
|
33
|
+
"plot_tail_concentration",
|
34
|
+
"select_bivariate_copula",
|
35
|
+
]
|