skfolio 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/__init__.py +2 -2
- skfolio/cluster/__init__.py +1 -1
- skfolio/cluster/_hierarchical.py +1 -1
- skfolio/datasets/__init__.py +1 -1
- skfolio/datasets/_base.py +2 -2
- skfolio/datasets/data/__init__.py +1 -0
- skfolio/distance/__init__.py +1 -1
- skfolio/distance/_base.py +2 -2
- skfolio/distance/_distance.py +4 -4
- skfolio/distribution/__init__.py +56 -0
- skfolio/distribution/_base.py +203 -0
- skfolio/distribution/copula/__init__.py +35 -0
- skfolio/distribution/copula/_base.py +456 -0
- skfolio/distribution/copula/_clayton.py +539 -0
- skfolio/distribution/copula/_gaussian.py +407 -0
- skfolio/distribution/copula/_gumbel.py +560 -0
- skfolio/distribution/copula/_independent.py +196 -0
- skfolio/distribution/copula/_joe.py +609 -0
- skfolio/distribution/copula/_selection.py +111 -0
- skfolio/distribution/copula/_student_t.py +486 -0
- skfolio/distribution/copula/_utils.py +509 -0
- skfolio/distribution/multivariate/__init__.py +11 -0
- skfolio/distribution/multivariate/_base.py +241 -0
- skfolio/distribution/multivariate/_utils.py +632 -0
- skfolio/distribution/multivariate/_vine_copula.py +1254 -0
- skfolio/distribution/univariate/__init__.py +19 -0
- skfolio/distribution/univariate/_base.py +308 -0
- skfolio/distribution/univariate/_gaussian.py +136 -0
- skfolio/distribution/univariate/_johnson_su.py +152 -0
- skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
- skfolio/distribution/univariate/_selection.py +85 -0
- skfolio/distribution/univariate/_student_t.py +144 -0
- skfolio/exceptions.py +6 -6
- skfolio/measures/__init__.py +1 -1
- skfolio/measures/_enums.py +7 -7
- skfolio/measures/_measures.py +4 -7
- skfolio/metrics/__init__.py +2 -0
- skfolio/metrics/_scorer.py +4 -4
- skfolio/model_selection/__init__.py +2 -2
- skfolio/model_selection/_combinatorial.py +15 -12
- skfolio/model_selection/_validation.py +2 -2
- skfolio/model_selection/_walk_forward.py +3 -3
- skfolio/moments/covariance/_base.py +1 -1
- skfolio/moments/covariance/_denoise_covariance.py +1 -1
- skfolio/moments/covariance/_detone_covariance.py +1 -1
- skfolio/moments/covariance/_empirical_covariance.py +1 -1
- skfolio/moments/covariance/_ew_covariance.py +1 -1
- skfolio/moments/covariance/_gerber_covariance.py +1 -1
- skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
- skfolio/moments/covariance/_implied_covariance.py +2 -7
- skfolio/moments/covariance/_ledoit_wolf.py +1 -1
- skfolio/moments/covariance/_oas.py +1 -1
- skfolio/moments/covariance/_shrunk_covariance.py +1 -1
- skfolio/moments/expected_returns/_base.py +1 -1
- skfolio/moments/expected_returns/_empirical_mu.py +1 -1
- skfolio/moments/expected_returns/_equilibrium_mu.py +1 -1
- skfolio/moments/expected_returns/_ew_mu.py +1 -1
- skfolio/moments/expected_returns/_shrunk_mu.py +2 -2
- skfolio/optimization/__init__.py +2 -0
- skfolio/optimization/_base.py +2 -2
- skfolio/optimization/cluster/__init__.py +2 -0
- skfolio/optimization/cluster/_nco.py +7 -7
- skfolio/optimization/cluster/hierarchical/__init__.py +2 -0
- skfolio/optimization/cluster/hierarchical/_base.py +1 -2
- skfolio/optimization/cluster/hierarchical/_herc.py +2 -2
- skfolio/optimization/cluster/hierarchical/_hrp.py +2 -2
- skfolio/optimization/convex/__init__.py +2 -0
- skfolio/optimization/convex/_base.py +8 -8
- skfolio/optimization/convex/_distributionally_robust.py +4 -4
- skfolio/optimization/convex/_maximum_diversification.py +5 -5
- skfolio/optimization/convex/_mean_risk.py +5 -6
- skfolio/optimization/convex/_risk_budgeting.py +3 -3
- skfolio/optimization/ensemble/__init__.py +2 -0
- skfolio/optimization/ensemble/_base.py +2 -2
- skfolio/optimization/ensemble/_stacking.py +1 -1
- skfolio/optimization/naive/__init__.py +2 -0
- skfolio/optimization/naive/_naive.py +1 -1
- skfolio/population/__init__.py +2 -0
- skfolio/population/_population.py +35 -9
- skfolio/portfolio/_base.py +42 -8
- skfolio/portfolio/_multi_period_portfolio.py +3 -2
- skfolio/portfolio/_portfolio.py +4 -4
- skfolio/pre_selection/__init__.py +2 -0
- skfolio/pre_selection/_drop_correlated.py +2 -2
- skfolio/pre_selection/_select_complete.py +25 -26
- skfolio/pre_selection/_select_k_extremes.py +2 -2
- skfolio/pre_selection/_select_non_dominated.py +2 -2
- skfolio/pre_selection/_select_non_expiring.py +2 -2
- skfolio/preprocessing/__init__.py +2 -0
- skfolio/preprocessing/_returns.py +2 -2
- skfolio/prior/__init__.py +4 -0
- skfolio/prior/_base.py +2 -2
- skfolio/prior/_black_litterman.py +5 -3
- skfolio/prior/_empirical.py +3 -1
- skfolio/prior/_factor_model.py +8 -4
- skfolio/prior/_synthetic_data.py +239 -0
- skfolio/synthetic_returns/__init__.py +1 -0
- skfolio/typing.py +1 -1
- skfolio/uncertainty_set/__init__.py +2 -0
- skfolio/uncertainty_set/_base.py +2 -2
- skfolio/uncertainty_set/_bootstrap.py +1 -1
- skfolio/uncertainty_set/_empirical.py +1 -1
- skfolio/utils/__init__.py +1 -0
- skfolio/utils/bootstrap.py +2 -2
- skfolio/utils/equations.py +13 -10
- skfolio/utils/sorting.py +2 -2
- skfolio/utils/stats.py +7 -7
- skfolio/utils/tools.py +76 -12
- {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/METADATA +99 -24
- skfolio-0.8.1.dist-info/RECORD +120 -0
- {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/WHEEL +1 -1
- skfolio-0.7.0.dist-info/RECORD +0 -95
- {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info/licenses}/LICENSE +0 -0
- {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,153 @@
|
|
1
|
+
"""Normal Inverse Gaussian Estimator."""
|
2
|
+
|
3
|
+
# Copyright (c) 2025
|
4
|
+
# Authors: The skfolio developers
|
5
|
+
# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
|
6
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
7
|
+
|
8
|
+
import numpy.typing as npt
|
9
|
+
import scipy.stats as st
|
10
|
+
|
11
|
+
from skfolio.distribution.univariate._base import BaseUnivariateDist
|
12
|
+
|
13
|
+
|
14
|
+
class NormalInverseGaussian(BaseUnivariateDist):
|
15
|
+
r"""Normal Inverse Gaussian Distribution Estimation.
|
16
|
+
|
17
|
+
This estimator fits a univariate Normal Inverse Gaussian (NIG) distribution
|
18
|
+
to the input data.
|
19
|
+
|
20
|
+
The probability density function is:
|
21
|
+
|
22
|
+
.. math::
|
23
|
+
|
24
|
+
f(x, a, b) = \frac{a \, K_1(a \sqrt{1 + x^2})}{\pi \sqrt{1 + x^2}} \,
|
25
|
+
\exp(\sqrt{a^2 - b^2} + b x)
|
26
|
+
|
27
|
+
where :math:`x` is a real number, the parameter :math:`a` is the tail
|
28
|
+
heaviness and :math:`b` is the asymmetry parameter satisfying :math:`a > 0`
|
29
|
+
and :math:`|b| <= a`. :math:`K_1` is the modified Bessel function of second kind
|
30
|
+
(`scipy.special.k1`).
|
31
|
+
|
32
|
+
The probability density above is defined in the "standardized" form. To shift
|
33
|
+
and/or scale the distribution use the loc and scale parameters. Specifically,
|
34
|
+
`pdf(x, a, b, loc, scale)` is equivalent to `pdf(y, a, b) / scale` with
|
35
|
+
`y = (x - loc) / scale`.
|
36
|
+
|
37
|
+
For more information, you can refer to the `scipy documentation <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norminvgauss.html#scipy.stats.norminvgauss>`_
|
38
|
+
|
39
|
+
Parameters
|
40
|
+
----------
|
41
|
+
loc : float, optional
|
42
|
+
If provided, the location parameter is fixed to this value during fitting.
|
43
|
+
Otherwise, it is estimated from the data.
|
44
|
+
|
45
|
+
scale : float, optional
|
46
|
+
If provided, the scale parameter is fixed to this value during fitting.
|
47
|
+
Otherwise, it is estimated from the data.
|
48
|
+
|
49
|
+
random_state : int, RandomState instance or None, default=None
|
50
|
+
Seed or random state to ensure reproducibility.
|
51
|
+
|
52
|
+
Attributes
|
53
|
+
----------
|
54
|
+
a_ : float
|
55
|
+
The fitted shape parameter a of the NIG distribution.
|
56
|
+
|
57
|
+
b_ : float
|
58
|
+
The fitted shape parameter b of the NIG distribution.
|
59
|
+
|
60
|
+
loc_ : float
|
61
|
+
The fitted location parameter.
|
62
|
+
|
63
|
+
scale_ : float
|
64
|
+
The fitted scale parameter.
|
65
|
+
|
66
|
+
Examples
|
67
|
+
--------
|
68
|
+
>>> from skfolio.datasets import load_sp500_index
|
69
|
+
>>> from skfolio.preprocessing import prices_to_returns
|
70
|
+
>>> from skfolio.distribution.univariate import NormalInverseGaussian
|
71
|
+
>>>
|
72
|
+
>>> # Load historical prices and convert them to returns
|
73
|
+
>>> prices = load_sp500_index()
|
74
|
+
>>> X = prices_to_returns(prices)
|
75
|
+
>>>
|
76
|
+
>>> # Initialize the estimator.
|
77
|
+
>>> model = NormalInverseGaussian()
|
78
|
+
>>>
|
79
|
+
>>> # Fit the model to the data.
|
80
|
+
>>> model.fit(X)
|
81
|
+
>>>
|
82
|
+
>>> # Display the fitted parameters.
|
83
|
+
>>> print(model.fitted_repr)
|
84
|
+
NormalInverseGaussian(0.422, -0.0321, 0.000913, 0.00739)
|
85
|
+
>>>
|
86
|
+
>>> # Compute the log-likelihood, total log-likelihood, CDF, PPF, AIC, and BIC
|
87
|
+
>>> log_likelihood = model.score_samples(X)
|
88
|
+
>>> score = model.score(X)
|
89
|
+
>>> cdf = model.cdf(X)
|
90
|
+
>>> ppf = model.ppf(X)
|
91
|
+
>>> aic = model.aic(X)
|
92
|
+
>>> bic = model.bic(X)
|
93
|
+
>>>
|
94
|
+
>>> # Generate 5 new samples from the fitted distribution.
|
95
|
+
>>> samples = model.sample(n_samples=5)
|
96
|
+
>>>
|
97
|
+
>>> # Plot the estimated probability density function (PDF).
|
98
|
+
>>> fig = model.plot_pdf()
|
99
|
+
>>> fig.show()
|
100
|
+
"""
|
101
|
+
|
102
|
+
a_: float
|
103
|
+
b_: float
|
104
|
+
loc_: float
|
105
|
+
scale_: float
|
106
|
+
_scipy_model = st.norminvgauss
|
107
|
+
|
108
|
+
def __init__(
|
109
|
+
self,
|
110
|
+
loc: float | None = None,
|
111
|
+
scale: float | None = None,
|
112
|
+
random_state: int | None = None,
|
113
|
+
):
|
114
|
+
super().__init__(random_state=random_state)
|
115
|
+
self.loc = loc
|
116
|
+
self.scale = scale
|
117
|
+
|
118
|
+
@property
|
119
|
+
def _scipy_params(self) -> dict[str, float]:
|
120
|
+
"""Dictionary of parameters to pass to the underlying SciPy distribution."""
|
121
|
+
return {"a": self.a_, "b": self.b_, "loc": self.loc_, "scale": self.scale_}
|
122
|
+
|
123
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "NormalInverseGaussian":
|
124
|
+
"""Fit the univariate Normal Inverse Gaussian distribution model.
|
125
|
+
|
126
|
+
Parameters
|
127
|
+
----------
|
128
|
+
X : array-like of shape (n_observations, 1)
|
129
|
+
The input data. X must contain a single column.
|
130
|
+
|
131
|
+
y : None
|
132
|
+
Ignored. Provided for compatibility with scikit-learn's API.
|
133
|
+
|
134
|
+
Returns
|
135
|
+
-------
|
136
|
+
self : NormalInverseGaussian
|
137
|
+
Returns the instance itself.
|
138
|
+
"""
|
139
|
+
X = self._validate_X(X, reset=True)
|
140
|
+
|
141
|
+
if self.loc is not None and self.scale is not None:
|
142
|
+
raise ValueError("Either loc or scale must be None to be fitted")
|
143
|
+
|
144
|
+
fixed_params = {}
|
145
|
+
if self.loc is not None:
|
146
|
+
fixed_params["floc"] = self.loc
|
147
|
+
if self.scale is not None:
|
148
|
+
fixed_params["fscale"] = self.scale
|
149
|
+
|
150
|
+
self.a_, self.b_, self.loc_, self.scale_ = self._scipy_model.fit(
|
151
|
+
X, **fixed_params
|
152
|
+
)
|
153
|
+
return self
|
@@ -0,0 +1,85 @@
|
|
1
|
+
"""Univariate Distribution Selection."""
|
2
|
+
|
3
|
+
# Copyright (c) 2025
|
4
|
+
# Authors: The skfolio developers
|
5
|
+
# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
|
6
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
import numpy.typing as npt
|
10
|
+
import sklearn as sk
|
11
|
+
|
12
|
+
from skfolio.distribution._base import SelectionCriterion
|
13
|
+
from skfolio.distribution.univariate._base import BaseUnivariateDist
|
14
|
+
from skfolio.distribution.univariate._gaussian import Gaussian
|
15
|
+
from skfolio.distribution.univariate._johnson_su import JohnsonSU
|
16
|
+
from skfolio.distribution.univariate._student_t import StudentT
|
17
|
+
|
18
|
+
|
19
|
+
def select_univariate_dist(
|
20
|
+
X: npt.ArrayLike,
|
21
|
+
distribution_candidates: list[BaseUnivariateDist] | None = None,
|
22
|
+
selection_criterion: SelectionCriterion = SelectionCriterion.AIC,
|
23
|
+
) -> BaseUnivariateDist:
|
24
|
+
"""Select the optimal univariate distribution estimator based on an information
|
25
|
+
criterion.
|
26
|
+
|
27
|
+
For each candidate distribution, the function fits the distribution to X and then
|
28
|
+
computes either the Akaike Information Criterion (AIC) or the Bayesian Information
|
29
|
+
Criterion (BIC). The candidate with the lowest criterion value is returned.
|
30
|
+
|
31
|
+
Parameters
|
32
|
+
----------
|
33
|
+
X : array-like of shape (n_observations, 1)
|
34
|
+
The input data used to fit each candidate distribution.
|
35
|
+
|
36
|
+
distribution_candidates : list of BaseUnivariateDist
|
37
|
+
A list of candidate distribution estimators. Each candidate must be an instance
|
38
|
+
of a class that inherits from `BaseUnivariateDist`.
|
39
|
+
If None, defaults to `[Gaussian(), StudentT(), JohnsonSU()]`.
|
40
|
+
|
41
|
+
selection_criterion : SelectionCriterion, default=SelectionCriterion.AIC
|
42
|
+
The criterion used for model selection. Possible values are:
|
43
|
+
- SelectionCriterion.AIC : Akaike Information Criterion
|
44
|
+
- SelectionCriterion.BIC : Bayesian Information Criterion
|
45
|
+
|
46
|
+
Returns
|
47
|
+
-------
|
48
|
+
BaseUnivariateDist
|
49
|
+
The fitted candidate estimator that minimizes the selected information
|
50
|
+
criterion.
|
51
|
+
|
52
|
+
Raises
|
53
|
+
------
|
54
|
+
ValueError
|
55
|
+
If X does not have exactly one column or if any candidate in the list does not
|
56
|
+
inherit from BaseUnivariateDist.
|
57
|
+
"""
|
58
|
+
if distribution_candidates is None:
|
59
|
+
distribution_candidates = [
|
60
|
+
Gaussian(),
|
61
|
+
StudentT(),
|
62
|
+
JohnsonSU(),
|
63
|
+
]
|
64
|
+
|
65
|
+
X = np.asarray(X)
|
66
|
+
if X.ndim != 2 or X.shape[1] != 1:
|
67
|
+
raise ValueError("X must contains one column for Univariate Distribution")
|
68
|
+
|
69
|
+
results = {}
|
70
|
+
for dist in distribution_candidates:
|
71
|
+
if not isinstance(dist, BaseUnivariateDist):
|
72
|
+
raise ValueError("Each candidate must inherit from `BaseUnivariateDist`")
|
73
|
+
dist = sk.clone(dist)
|
74
|
+
dist.fit(X)
|
75
|
+
|
76
|
+
match selection_criterion:
|
77
|
+
case selection_criterion.AIC:
|
78
|
+
results[dist] = dist.aic(X)
|
79
|
+
case selection_criterion.BIC:
|
80
|
+
results[dist] = dist.bic(X)
|
81
|
+
case _:
|
82
|
+
raise ValueError(f"{selection_criterion} not implemented")
|
83
|
+
|
84
|
+
selected_dist = min(results, key=results.get)
|
85
|
+
return selected_dist
|
@@ -0,0 +1,144 @@
|
|
1
|
+
"""Univariate Student's t Estimation."""
|
2
|
+
|
3
|
+
# Copyright (c) 2025
|
4
|
+
# Authors: The skfolio developers
|
5
|
+
# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
|
6
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
7
|
+
|
8
|
+
import numpy.typing as npt
|
9
|
+
import scipy.stats as st
|
10
|
+
|
11
|
+
from skfolio.distribution.univariate._base import BaseUnivariateDist
|
12
|
+
|
13
|
+
|
14
|
+
class StudentT(BaseUnivariateDist):
|
15
|
+
r"""Student's t Distribution Estimation.
|
16
|
+
|
17
|
+
This estimator fits a univariate Student's t distribution to the input data.
|
18
|
+
|
19
|
+
The probability density function is:
|
20
|
+
|
21
|
+
.. math::
|
22
|
+
|
23
|
+
f(x, \nu) = \frac{\Gamma((\nu+1)/2)}
|
24
|
+
{\sqrt{\pi \nu} \Gamma(\nu/2)}
|
25
|
+
(1+x^2/\nu)^{-(\nu+1)/2}
|
26
|
+
|
27
|
+
where :math:`x` is a real number and the degrees of freedom parameter :math:`\nu`
|
28
|
+
(denoted `dof` in the implementation) satisfies :math:`\nu > 0`. :math:`\Gamma` is
|
29
|
+
the gamma function (`scipy.special.gamma`).
|
30
|
+
|
31
|
+
The probability density above is defined in the "standardized" form. To shift
|
32
|
+
and/or scale the distribution use the loc and scale parameters. Specifically,
|
33
|
+
`pdf(x, df, loc, scale)` is equivalent to `pdf(y, df) / scale` with
|
34
|
+
`y = (x - loc) / scale`.
|
35
|
+
|
36
|
+
For more information, you can refer to the `scipy documentation <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.t.html#scipy.stats.t>`_
|
37
|
+
|
38
|
+
|
39
|
+
Parameters
|
40
|
+
----------
|
41
|
+
loc : float or None, default=None
|
42
|
+
If provided, the location parameter is fixed to this value during fitting.
|
43
|
+
Otherwise, it is estimated from the data.
|
44
|
+
|
45
|
+
scale : float or None, default=None
|
46
|
+
If provided, the scale parameter is fixed to this value during fitting.
|
47
|
+
Otherwise, it is estimated from the data.
|
48
|
+
|
49
|
+
random_state : int, RandomState instance or None, default=None
|
50
|
+
Seed or random state to ensure reproducibility.
|
51
|
+
|
52
|
+
Attributes
|
53
|
+
----------
|
54
|
+
dof_ : float
|
55
|
+
The fitted degrees of freedom for the Student's t distribution.
|
56
|
+
|
57
|
+
loc_ : float
|
58
|
+
The fitted location parameter.
|
59
|
+
|
60
|
+
scale_ : float
|
61
|
+
The fitted scale parameter.
|
62
|
+
|
63
|
+
Examples
|
64
|
+
--------
|
65
|
+
>>> from skfolio.datasets import load_sp500_index
|
66
|
+
>>> from skfolio.preprocessing import prices_to_returns
|
67
|
+
>>> from skfolio.distribution.univariate import StudentT
|
68
|
+
>>>
|
69
|
+
>>> # Load historical prices and convert them to returns
|
70
|
+
>>> prices = load_sp500_index()
|
71
|
+
>>> X = prices_to_returns(prices)
|
72
|
+
>>>
|
73
|
+
>>> # Initialize the estimator.
|
74
|
+
>>> model = StudentT()
|
75
|
+
>>>
|
76
|
+
>>> # Fit the model to the data.
|
77
|
+
>>> model.fit(X)
|
78
|
+
>>>
|
79
|
+
>>> # Display the fitted parameters.
|
80
|
+
>>> print(model.fitted_repr)
|
81
|
+
StudentT(2.75, 0.000618, 0.00681)
|
82
|
+
>>>
|
83
|
+
>>> # Compute the log-likelihood, total log-likelihood, CDF, PPF, AIC, and BIC
|
84
|
+
>>> log_likelihood = model.score_samples(X)
|
85
|
+
>>> score = model.score(X)
|
86
|
+
>>> cdf = model.cdf(X)
|
87
|
+
>>> ppf = model.ppf(X)
|
88
|
+
>>> aic = model.aic(X)
|
89
|
+
>>> bic = model.bic(X)
|
90
|
+
>>>
|
91
|
+
>>> # Generate 5 new samples from the fitted distribution.
|
92
|
+
>>> samples = model.sample(n_samples=5)
|
93
|
+
>>>
|
94
|
+
>>> # Plot the estimated probability density function (PDF).
|
95
|
+
>>> fig = model.plot_pdf()
|
96
|
+
>>> fig.show()
|
97
|
+
"""
|
98
|
+
|
99
|
+
dof_: float
|
100
|
+
loc_: float
|
101
|
+
scale_: float
|
102
|
+
_scipy_model = st.t
|
103
|
+
|
104
|
+
def __init__(
|
105
|
+
self,
|
106
|
+
loc: float | None = None,
|
107
|
+
scale: float | None = None,
|
108
|
+
random_state: int | None = None,
|
109
|
+
):
|
110
|
+
super().__init__(random_state=random_state)
|
111
|
+
self.loc = loc
|
112
|
+
self.scale = scale
|
113
|
+
|
114
|
+
@property
|
115
|
+
def _scipy_params(self) -> dict[str, float]:
|
116
|
+
"""Dictionary of parameters to pass to the underlying SciPy distribution."""
|
117
|
+
return {"loc": self.loc_, "scale": self.scale_, "df": self.dof_}
|
118
|
+
|
119
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "StudentT":
|
120
|
+
"""Fit the univariate Student's t distribution model.
|
121
|
+
|
122
|
+
Parameters
|
123
|
+
----------
|
124
|
+
X : array-like of shape (n_observations, 1)
|
125
|
+
The input data. X must contain a single column.
|
126
|
+
|
127
|
+
y : None
|
128
|
+
Ignored. Provided for compatibility with scikit-learn's API.
|
129
|
+
|
130
|
+
Returns
|
131
|
+
-------
|
132
|
+
self : StudentT
|
133
|
+
Returns the instance itself.
|
134
|
+
"""
|
135
|
+
X = self._validate_X(X, reset=True)
|
136
|
+
|
137
|
+
fixed_params = {}
|
138
|
+
if self.loc is not None:
|
139
|
+
fixed_params["floc"] = self.loc
|
140
|
+
if self.scale is not None:
|
141
|
+
fixed_params["fscale"] = self.scale
|
142
|
+
|
143
|
+
self.dof_, self.loc_, self.scale_ = self._scipy_model.fit(X, **fixed_params)
|
144
|
+
return self
|
skfolio/exceptions.py
CHANGED
@@ -5,7 +5,7 @@ classes used across skfolio.
|
|
5
5
|
|
6
6
|
# Copyright (c) 2023
|
7
7
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
8
|
-
# License: BSD
|
8
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
9
9
|
|
10
10
|
__all__ = [
|
11
11
|
"DuplicateGroupsError",
|
@@ -17,20 +17,20 @@ __all__ = [
|
|
17
17
|
|
18
18
|
|
19
19
|
class OptimizationError(Exception):
|
20
|
-
"""Optimization Did not converge"""
|
20
|
+
"""Optimization Did not converge."""
|
21
21
|
|
22
22
|
|
23
23
|
class EquationToMatrixError(Exception):
|
24
|
-
"""Error while processing equations"""
|
24
|
+
"""Error while processing equations."""
|
25
25
|
|
26
26
|
|
27
27
|
class GroupNotFoundError(Exception):
|
28
|
-
"""Group name not found in the groups"""
|
28
|
+
"""Group name not found in the groups."""
|
29
29
|
|
30
30
|
|
31
31
|
class DuplicateGroupsError(Exception):
|
32
|
-
"""Group name appear in multiple group levels"""
|
32
|
+
"""Group name appear in multiple group levels."""
|
33
33
|
|
34
34
|
|
35
35
|
class NonPositiveVarianceError(Exception):
|
36
|
-
"""Variance negative or null"""
|
36
|
+
"""Variance negative or null."""
|
skfolio/measures/__init__.py
CHANGED
skfolio/measures/_enums.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
|
7
7
|
from abc import abstractmethod
|
8
8
|
from enum import auto
|
@@ -11,10 +11,10 @@ from skfolio.utils.tools import AutoEnum
|
|
11
11
|
|
12
12
|
|
13
13
|
class BaseMeasure(AutoEnum):
|
14
|
-
"""Base Enum of measures"""
|
14
|
+
"""Base Enum of measures."""
|
15
15
|
|
16
16
|
def __repr__(self) -> str:
|
17
|
-
"""Enum representation for improved reading"""
|
17
|
+
"""Enum representation for improved reading."""
|
18
18
|
words = [
|
19
19
|
(
|
20
20
|
word.capitalize()
|
@@ -73,7 +73,7 @@ class BaseMeasure(AutoEnum):
|
|
73
73
|
|
74
74
|
|
75
75
|
class PerfMeasure(BaseMeasure):
|
76
|
-
"""Enumeration of performance measures
|
76
|
+
"""Enumeration of performance measures.
|
77
77
|
|
78
78
|
Attributes
|
79
79
|
----------
|
@@ -103,7 +103,7 @@ class PerfMeasure(BaseMeasure):
|
|
103
103
|
|
104
104
|
|
105
105
|
class RiskMeasure(BaseMeasure):
|
106
|
-
"""Enumeration of risk measures
|
106
|
+
"""Enumeration of risk measures.
|
107
107
|
|
108
108
|
Attributes
|
109
109
|
----------
|
@@ -199,7 +199,7 @@ class RiskMeasure(BaseMeasure):
|
|
199
199
|
|
200
200
|
|
201
201
|
class ExtraRiskMeasure(BaseMeasure):
|
202
|
-
"""Enumeration of other risk measures not used in convex optimization
|
202
|
+
"""Enumeration of other risk measures not used in convex optimization.
|
203
203
|
|
204
204
|
Attributes
|
205
205
|
----------
|
@@ -247,7 +247,7 @@ class ExtraRiskMeasure(BaseMeasure):
|
|
247
247
|
|
248
248
|
|
249
249
|
class RatioMeasure(BaseMeasure):
|
250
|
-
"""Enumeration of ratio measures
|
250
|
+
"""Enumeration of ratio measures.
|
251
251
|
|
252
252
|
Attributes
|
253
253
|
----------
|
skfolio/measures/_measures.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
# Gini mean difference and OWA GMD weights features are derived
|
7
7
|
# from Riskfolio-Lib, Copyright (c) 2020-2023, Dany Cajas, Licensed under BSD 3 clause.
|
8
8
|
|
@@ -179,7 +179,6 @@ def third_central_moment(returns: np.ndarray) -> float:
|
|
179
179
|
value : float
|
180
180
|
Third central moment.
|
181
181
|
"""
|
182
|
-
|
183
182
|
return np.sum(np.power(returns - np.mean(returns, axis=0), 3)) / len(returns)
|
184
183
|
|
185
184
|
|
@@ -200,7 +199,6 @@ def skew(returns: np.ndarray) -> float:
|
|
200
199
|
value : float
|
201
200
|
Skew.
|
202
201
|
"""
|
203
|
-
|
204
202
|
return third_central_moment(returns) / standard_deviation(returns) ** 3
|
205
203
|
|
206
204
|
|
@@ -236,7 +234,6 @@ def kurtosis(returns: np.ndarray) -> float:
|
|
236
234
|
value : float
|
237
235
|
Kurtosis.
|
238
236
|
"""
|
239
|
-
|
240
237
|
return fourth_central_moment(returns) / standard_deviation(returns) ** 4
|
241
238
|
|
242
239
|
|
@@ -571,7 +568,7 @@ def ulcer_index(drawdowns: np.ndarray) -> float:
|
|
571
568
|
def owa_gmd_weights(n_observations: int) -> np.ndarray:
|
572
569
|
"""Compute the OWA weights used for the Gini mean difference (GMD) computation.
|
573
570
|
|
574
|
-
|
571
|
+
Parameters
|
575
572
|
----------
|
576
573
|
n_observations : int
|
577
574
|
Number of observations.
|
@@ -610,8 +607,8 @@ def gini_mean_difference(returns: np.ndarray) -> float:
|
|
610
607
|
|
611
608
|
|
612
609
|
def effective_number_assets(weights: np.ndarray) -> float:
|
613
|
-
r"""
|
614
|
-
Herfindahl index
|
610
|
+
r"""Compute the effective number of assets, defined as the inverse of the
|
611
|
+
Herfindahl index.
|
615
612
|
|
616
613
|
.. math:: N_{eff} = \frac{1}{\Vert w \Vert_{2}^{2}}
|
617
614
|
|
skfolio/metrics/__init__.py
CHANGED
skfolio/metrics/_scorer.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
"""Scorer module"""
|
1
|
+
"""Scorer module."""
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
# Implementation derived from:
|
7
7
|
# scikit-portfolio, Copyright (c) 2022, Carlo Nicolini, Licensed under MIT Licence.
|
8
8
|
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
|
@@ -18,7 +18,7 @@ from skfolio.portfolio import Portfolio
|
|
18
18
|
|
19
19
|
|
20
20
|
class _PortfolioScorer:
|
21
|
-
"""Portfolio Scorer wrapper"""
|
21
|
+
"""Portfolio Scorer wrapper."""
|
22
22
|
|
23
23
|
def __init__(self, score_func: Callable, sign: int, kwargs: dict):
|
24
24
|
self._score_func = score_func
|
@@ -119,7 +119,7 @@ def make_scorer(
|
|
119
119
|
greater_is_better = False
|
120
120
|
|
121
121
|
def score_func(pred: Portfolio) -> float:
|
122
|
-
"""Score function"""
|
122
|
+
"""Score function."""
|
123
123
|
return getattr(pred, measure.value)
|
124
124
|
|
125
125
|
sign = 1 if greater_is_better else -1
|
@@ -1,8 +1,8 @@
|
|
1
|
-
"""Combinatorial module"""
|
1
|
+
"""Combinatorial module."""
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
# Implementation derived from:
|
7
7
|
# scikit-portfolio, Copyright (c) 2022, Carlo Nicolini, Licensed under MIT Licence.
|
8
8
|
# scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
|
@@ -36,7 +36,7 @@ class BaseCombinatorialCV(ABC):
|
|
36
36
|
|
37
37
|
@abstractmethod
|
38
38
|
def get_path_ids(self) -> np.ndarray:
|
39
|
-
"""Return the path id of each test sets in each split"""
|
39
|
+
"""Return the path id of each test sets in each split."""
|
40
40
|
pass
|
41
41
|
|
42
42
|
__repr__ = sks.BaseCrossValidator.__repr__
|
@@ -196,18 +196,19 @@ class CombinatorialPurgedCV(BaseCombinatorialCV):
|
|
196
196
|
|
197
197
|
@property
|
198
198
|
def n_splits(self) -> int:
|
199
|
-
"""Number of splits"""
|
199
|
+
"""Number of splits."""
|
200
200
|
return _n_splits(n_folds=self.n_folds, n_test_folds=self.n_test_folds)
|
201
201
|
|
202
202
|
@property
|
203
203
|
def n_test_paths(self) -> int:
|
204
204
|
"""Number of test paths that can be reconstructed from the train/test
|
205
|
-
combinations
|
205
|
+
combinations.
|
206
|
+
"""
|
206
207
|
return _n_test_paths(n_folds=self.n_folds, n_test_folds=self.n_test_folds)
|
207
208
|
|
208
209
|
@property
|
209
210
|
def test_set_index(self) -> np.ndarray:
|
210
|
-
"""Location of each test set"""
|
211
|
+
"""Location of each test set."""
|
211
212
|
return np.array(
|
212
213
|
list(itertools.combinations(np.arange(self.n_folds), self.n_test_folds))
|
213
214
|
).reshape(-1, self.n_test_folds)
|
@@ -215,7 +216,8 @@ class CombinatorialPurgedCV(BaseCombinatorialCV):
|
|
215
216
|
@property
|
216
217
|
def binary_train_test_sets(self) -> np.ndarray:
|
217
218
|
"""Identify training and test folds for each combinations by assigning `0` to
|
218
|
-
training folds and `1` to test folds
|
219
|
+
training folds and `1` to test folds.
|
220
|
+
"""
|
219
221
|
folds_train_test = np.zeros((self.n_folds, self.n_splits))
|
220
222
|
folds_train_test[
|
221
223
|
self.test_set_index, np.arange(self.n_splits)[:, np.newaxis]
|
@@ -230,7 +232,7 @@ class CombinatorialPurgedCV(BaseCombinatorialCV):
|
|
230
232
|
)
|
231
233
|
|
232
234
|
def get_path_ids(self) -> np.ndarray:
|
233
|
-
"""Return the path id of each test sets in each split"""
|
235
|
+
"""Return the path id of each test sets in each split."""
|
234
236
|
recombine_paths = self.recombined_paths
|
235
237
|
path_ids = np.zeros((self.n_splits, self.n_test_folds), dtype=int)
|
236
238
|
for i in range(self.n_splits):
|
@@ -334,7 +336,7 @@ class CombinatorialPurgedCV(BaseCombinatorialCV):
|
|
334
336
|
)
|
335
337
|
|
336
338
|
def plot_train_test_folds(self) -> skt.Figure:
|
337
|
-
"""Plot the train/test fold locations"""
|
339
|
+
"""Plot the train/test fold locations."""
|
338
340
|
values = self.binary_train_test_sets
|
339
341
|
fill_color = np.where(values == 0, "blue", "red")
|
340
342
|
fill_color = fill_color.astype(object)
|
@@ -367,7 +369,8 @@ class CombinatorialPurgedCV(BaseCombinatorialCV):
|
|
367
369
|
|
368
370
|
def plot_train_test_index(self, X) -> skt.Figure:
|
369
371
|
"""Plot the training and test indices for each combinations by assigning `0` to
|
370
|
-
training, `1` to test and `-1` to both purge and embargo indices.
|
372
|
+
training, `1` to test and `-1` to both purge and embargo indices.
|
373
|
+
"""
|
371
374
|
next(self.split(X))
|
372
375
|
n_samples = X.shape[0]
|
373
376
|
cond = [
|
@@ -425,12 +428,12 @@ def _n_splits(n_folds: int, n_test_folds: int) -> int:
|
|
425
428
|
n_splits : int
|
426
429
|
Number of splits
|
427
430
|
"""
|
428
|
-
return
|
431
|
+
return math.comb(n_folds, n_test_folds)
|
429
432
|
|
430
433
|
|
431
434
|
def _n_test_paths(n_folds: int, n_test_folds: int) -> int:
|
432
435
|
"""Number of test paths that can be reconstructed from the train/test
|
433
|
-
combinations
|
436
|
+
combinations.
|
434
437
|
|
435
438
|
Parameters
|
436
439
|
----------
|