skfolio 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/__init__.py +2 -2
- skfolio/cluster/__init__.py +1 -1
- skfolio/cluster/_hierarchical.py +1 -1
- skfolio/datasets/__init__.py +1 -1
- skfolio/datasets/_base.py +2 -2
- skfolio/datasets/data/__init__.py +1 -0
- skfolio/distance/__init__.py +1 -1
- skfolio/distance/_base.py +2 -2
- skfolio/distance/_distance.py +4 -4
- skfolio/distribution/__init__.py +56 -0
- skfolio/distribution/_base.py +203 -0
- skfolio/distribution/copula/__init__.py +35 -0
- skfolio/distribution/copula/_base.py +456 -0
- skfolio/distribution/copula/_clayton.py +539 -0
- skfolio/distribution/copula/_gaussian.py +407 -0
- skfolio/distribution/copula/_gumbel.py +560 -0
- skfolio/distribution/copula/_independent.py +196 -0
- skfolio/distribution/copula/_joe.py +609 -0
- skfolio/distribution/copula/_selection.py +111 -0
- skfolio/distribution/copula/_student_t.py +486 -0
- skfolio/distribution/copula/_utils.py +509 -0
- skfolio/distribution/multivariate/__init__.py +11 -0
- skfolio/distribution/multivariate/_base.py +241 -0
- skfolio/distribution/multivariate/_utils.py +632 -0
- skfolio/distribution/multivariate/_vine_copula.py +1254 -0
- skfolio/distribution/univariate/__init__.py +19 -0
- skfolio/distribution/univariate/_base.py +308 -0
- skfolio/distribution/univariate/_gaussian.py +136 -0
- skfolio/distribution/univariate/_johnson_su.py +152 -0
- skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
- skfolio/distribution/univariate/_selection.py +85 -0
- skfolio/distribution/univariate/_student_t.py +144 -0
- skfolio/exceptions.py +6 -6
- skfolio/measures/__init__.py +1 -1
- skfolio/measures/_enums.py +7 -7
- skfolio/measures/_measures.py +4 -7
- skfolio/metrics/__init__.py +2 -0
- skfolio/metrics/_scorer.py +4 -4
- skfolio/model_selection/__init__.py +2 -2
- skfolio/model_selection/_combinatorial.py +15 -12
- skfolio/model_selection/_validation.py +2 -2
- skfolio/model_selection/_walk_forward.py +3 -3
- skfolio/moments/covariance/_base.py +1 -1
- skfolio/moments/covariance/_denoise_covariance.py +1 -1
- skfolio/moments/covariance/_detone_covariance.py +1 -1
- skfolio/moments/covariance/_empirical_covariance.py +1 -1
- skfolio/moments/covariance/_ew_covariance.py +1 -1
- skfolio/moments/covariance/_gerber_covariance.py +1 -1
- skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
- skfolio/moments/covariance/_implied_covariance.py +2 -7
- skfolio/moments/covariance/_ledoit_wolf.py +1 -1
- skfolio/moments/covariance/_oas.py +1 -1
- skfolio/moments/covariance/_shrunk_covariance.py +1 -1
- skfolio/moments/expected_returns/_base.py +1 -1
- skfolio/moments/expected_returns/_empirical_mu.py +1 -1
- skfolio/moments/expected_returns/_equilibrium_mu.py +1 -1
- skfolio/moments/expected_returns/_ew_mu.py +1 -1
- skfolio/moments/expected_returns/_shrunk_mu.py +2 -2
- skfolio/optimization/__init__.py +2 -0
- skfolio/optimization/_base.py +2 -2
- skfolio/optimization/cluster/__init__.py +2 -0
- skfolio/optimization/cluster/_nco.py +7 -7
- skfolio/optimization/cluster/hierarchical/__init__.py +2 -0
- skfolio/optimization/cluster/hierarchical/_base.py +1 -2
- skfolio/optimization/cluster/hierarchical/_herc.py +2 -2
- skfolio/optimization/cluster/hierarchical/_hrp.py +2 -2
- skfolio/optimization/convex/__init__.py +2 -0
- skfolio/optimization/convex/_base.py +8 -8
- skfolio/optimization/convex/_distributionally_robust.py +4 -4
- skfolio/optimization/convex/_maximum_diversification.py +5 -5
- skfolio/optimization/convex/_mean_risk.py +5 -6
- skfolio/optimization/convex/_risk_budgeting.py +3 -3
- skfolio/optimization/ensemble/__init__.py +2 -0
- skfolio/optimization/ensemble/_base.py +2 -2
- skfolio/optimization/ensemble/_stacking.py +1 -1
- skfolio/optimization/naive/__init__.py +2 -0
- skfolio/optimization/naive/_naive.py +1 -1
- skfolio/population/__init__.py +2 -0
- skfolio/population/_population.py +35 -9
- skfolio/portfolio/_base.py +42 -8
- skfolio/portfolio/_multi_period_portfolio.py +3 -2
- skfolio/portfolio/_portfolio.py +4 -4
- skfolio/pre_selection/__init__.py +2 -0
- skfolio/pre_selection/_drop_correlated.py +2 -2
- skfolio/pre_selection/_select_complete.py +25 -26
- skfolio/pre_selection/_select_k_extremes.py +2 -2
- skfolio/pre_selection/_select_non_dominated.py +2 -2
- skfolio/pre_selection/_select_non_expiring.py +2 -2
- skfolio/preprocessing/__init__.py +2 -0
- skfolio/preprocessing/_returns.py +2 -2
- skfolio/prior/__init__.py +4 -0
- skfolio/prior/_base.py +2 -2
- skfolio/prior/_black_litterman.py +5 -3
- skfolio/prior/_empirical.py +3 -1
- skfolio/prior/_factor_model.py +8 -4
- skfolio/prior/_synthetic_data.py +239 -0
- skfolio/synthetic_returns/__init__.py +1 -0
- skfolio/typing.py +1 -1
- skfolio/uncertainty_set/__init__.py +2 -0
- skfolio/uncertainty_set/_base.py +2 -2
- skfolio/uncertainty_set/_bootstrap.py +1 -1
- skfolio/uncertainty_set/_empirical.py +1 -1
- skfolio/utils/__init__.py +1 -0
- skfolio/utils/bootstrap.py +2 -2
- skfolio/utils/equations.py +13 -10
- skfolio/utils/sorting.py +2 -2
- skfolio/utils/stats.py +7 -7
- skfolio/utils/tools.py +76 -12
- {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/METADATA +99 -24
- skfolio-0.8.1.dist-info/RECORD +120 -0
- {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/WHEEL +1 -1
- skfolio-0.7.0.dist-info/RECORD +0 -95
- {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info/licenses}/LICENSE +0 -0
- {skfolio-0.7.0.dist-info → skfolio-0.8.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,19 @@
|
|
1
|
+
"""Univariate Distribution module."""
|
2
|
+
|
3
|
+
from skfolio.distribution.univariate._base import BaseUnivariateDist
|
4
|
+
from skfolio.distribution.univariate._gaussian import Gaussian
|
5
|
+
from skfolio.distribution.univariate._johnson_su import JohnsonSU
|
6
|
+
from skfolio.distribution.univariate._normal_inverse_gaussian import (
|
7
|
+
NormalInverseGaussian,
|
8
|
+
)
|
9
|
+
from skfolio.distribution.univariate._selection import select_univariate_dist
|
10
|
+
from skfolio.distribution.univariate._student_t import StudentT
|
11
|
+
|
12
|
+
__all__ = [
|
13
|
+
"BaseUnivariateDist",
|
14
|
+
"Gaussian",
|
15
|
+
"JohnsonSU",
|
16
|
+
"NormalInverseGaussian",
|
17
|
+
"StudentT",
|
18
|
+
"select_univariate_dist",
|
19
|
+
]
|
@@ -0,0 +1,308 @@
|
|
1
|
+
"""Base Univariate Estimator."""
|
2
|
+
|
3
|
+
# Copyright (c) 2025
|
4
|
+
# Authors: The skfolio developers
|
5
|
+
# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
|
6
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
7
|
+
|
8
|
+
import warnings
|
9
|
+
from abc import ABC, abstractmethod
|
10
|
+
|
11
|
+
import numpy as np
|
12
|
+
import numpy.typing as npt
|
13
|
+
import plotly.graph_objects as go
|
14
|
+
import scipy.stats as st
|
15
|
+
import sklearn.utils as sku
|
16
|
+
import sklearn.utils.validation as skv
|
17
|
+
|
18
|
+
from skfolio.distribution._base import BaseDistribution
|
19
|
+
|
20
|
+
|
21
|
+
class BaseUnivariateDist(BaseDistribution, ABC):
|
22
|
+
"""Base Univariate Distribution Estimator.
|
23
|
+
|
24
|
+
This abstract class serves as a foundation for univariate distribution models
|
25
|
+
based on scipy.
|
26
|
+
|
27
|
+
random_state : int, RandomState instance or None, default=None
|
28
|
+
Seed or random state to ensure reproducibility.
|
29
|
+
"""
|
30
|
+
|
31
|
+
_scipy_model: st.rv_continuous
|
32
|
+
|
33
|
+
def __init__(self, random_state: int | None = None):
|
34
|
+
super().__init__(random_state=random_state)
|
35
|
+
|
36
|
+
@property
|
37
|
+
@abstractmethod
|
38
|
+
def _scipy_params(self) -> dict[str, float]:
|
39
|
+
"""Dictionary of parameters to pass to the underlying SciPy distribution."""
|
40
|
+
pass
|
41
|
+
|
42
|
+
@property
|
43
|
+
def n_params(self) -> int:
|
44
|
+
"""Number of model parameters."""
|
45
|
+
return len(self._scipy_params)
|
46
|
+
|
47
|
+
@property
|
48
|
+
def fitted_repr(self) -> str:
|
49
|
+
"""String representation of the fitted univariate distribution."""
|
50
|
+
skv.check_is_fitted(self)
|
51
|
+
params = ", ".join([f"{k}={v:0.2g}" for k, v in self._scipy_params.items()])
|
52
|
+
return f"{self.__class__.__name__}({params})"
|
53
|
+
|
54
|
+
@abstractmethod
|
55
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "BaseUnivariateDist":
|
56
|
+
"""Fit the univariate distribution model.
|
57
|
+
|
58
|
+
Parameters
|
59
|
+
----------
|
60
|
+
X : array-like of shape (n_observations, 1)
|
61
|
+
The input data. X must contain a single column.
|
62
|
+
|
63
|
+
|
64
|
+
y : None
|
65
|
+
Ignored. Provided for compatibility with scikit-learn's API.
|
66
|
+
|
67
|
+
Returns
|
68
|
+
-------
|
69
|
+
self : BaseUnivariateDist
|
70
|
+
Returns the instance itself.
|
71
|
+
"""
|
72
|
+
pass
|
73
|
+
|
74
|
+
def _validate_X(self, X: npt.ArrayLike, reset: bool) -> np.ndarray:
|
75
|
+
"""Validate and convert the input data X.
|
76
|
+
|
77
|
+
Parameters
|
78
|
+
----------
|
79
|
+
X : array-like of shape (n_observations, 1)
|
80
|
+
The input data. X must contain a single column.
|
81
|
+
|
82
|
+
reset : bool, default=True
|
83
|
+
Whether to reset the `n_features_in_` attribute.
|
84
|
+
If False, the input will be checked for consistency with data
|
85
|
+
provided when reset was last True.
|
86
|
+
|
87
|
+
Returns
|
88
|
+
-------
|
89
|
+
validated_X : ndarray of shape (n_observations, 1).
|
90
|
+
The validated input array
|
91
|
+
"""
|
92
|
+
X = skv.validate_data(self, X, dtype=np.float64, reset=reset)
|
93
|
+
if X.shape[1] != 1:
|
94
|
+
raise ValueError(
|
95
|
+
"X should contain a single column for Univariate Distribution"
|
96
|
+
)
|
97
|
+
return X
|
98
|
+
|
99
|
+
def score_samples(self, X: npt.ArrayLike) -> np.ndarray:
|
100
|
+
"""Compute the log-likelihood of each sample (log-pdf) under the model.
|
101
|
+
|
102
|
+
Parameters
|
103
|
+
----------
|
104
|
+
X : array-like of shape (n_observations, 1)
|
105
|
+
An array of points at which to evaluate the log-probability density.
|
106
|
+
The data should be a single feature column.
|
107
|
+
|
108
|
+
Returns
|
109
|
+
-------
|
110
|
+
density : ndarray of shape (n_observations,)
|
111
|
+
Log-likelihood values for each observation in X.
|
112
|
+
"""
|
113
|
+
X = self._validate_X(X, reset=False)
|
114
|
+
log_density = self._scipy_model.logpdf(X, **self._scipy_params).ravel()
|
115
|
+
return log_density
|
116
|
+
|
117
|
+
def sample(self, n_samples: int = 1):
|
118
|
+
"""Generate random samples from the fitted distribution.
|
119
|
+
|
120
|
+
Currently, this is implemented only for gaussian and tophat kernels.
|
121
|
+
|
122
|
+
Parameters
|
123
|
+
----------
|
124
|
+
n_samples : int, default=1
|
125
|
+
Number of samples to generate.
|
126
|
+
|
127
|
+
Returns
|
128
|
+
-------
|
129
|
+
X : array-like of shape (n_samples, 1)
|
130
|
+
List of samples.
|
131
|
+
"""
|
132
|
+
skv.check_is_fitted(self)
|
133
|
+
rng = sku.check_random_state(self.random_state)
|
134
|
+
sample = self._scipy_model.rvs(
|
135
|
+
size=(n_samples, 1), random_state=rng, **self._scipy_params
|
136
|
+
)
|
137
|
+
return sample
|
138
|
+
|
139
|
+
def cdf(self, X: npt.ArrayLike) -> np.ndarray:
|
140
|
+
"""Compute the cumulative distribution function (CDF) for the given data.
|
141
|
+
|
142
|
+
Parameters
|
143
|
+
----------
|
144
|
+
X : array-like of shape (n_observations, 1)
|
145
|
+
Data points at which to evaluate the CDF.
|
146
|
+
|
147
|
+
Returns
|
148
|
+
-------
|
149
|
+
cdf : ndarray of shape (n_observations, 1)
|
150
|
+
The CDF evaluated at each data point.
|
151
|
+
"""
|
152
|
+
skv.check_is_fitted(self)
|
153
|
+
return self._scipy_model.cdf(X, **self._scipy_params)
|
154
|
+
|
155
|
+
def ppf(self, X: npt.ArrayLike) -> np.ndarray:
|
156
|
+
"""Compute the percent point function (inverse of the CDF) for the given
|
157
|
+
probabilities.
|
158
|
+
|
159
|
+
Parameters
|
160
|
+
----------
|
161
|
+
X : array-like of shape (n_observations, 1)
|
162
|
+
Probabilities for which to compute the corresponding quantiles.
|
163
|
+
|
164
|
+
Returns
|
165
|
+
-------
|
166
|
+
ppf : ndarray of shape (n_observations, 1)
|
167
|
+
The quantiles corresponding to the given probabilities.
|
168
|
+
"""
|
169
|
+
skv.check_is_fitted(self)
|
170
|
+
return self._scipy_model.ppf(X, **self._scipy_params)
|
171
|
+
|
172
|
+
def plot_pdf(
|
173
|
+
self, X: npt.ArrayLike | None = None, title: str | None = None
|
174
|
+
) -> go.Figure:
|
175
|
+
"""Plot the probability density function (PDF).
|
176
|
+
|
177
|
+
Parameters
|
178
|
+
----------
|
179
|
+
X : array-like of shape (n_samples, 1), optional
|
180
|
+
If provided, it is used to plot the empirical data KDE for comparison
|
181
|
+
versus the model PDF.
|
182
|
+
|
183
|
+
title : str, optional
|
184
|
+
The title for the plot. If not provided, a default title based on the fitted
|
185
|
+
model's representation is used.
|
186
|
+
|
187
|
+
Returns
|
188
|
+
-------
|
189
|
+
fig : go.Figure
|
190
|
+
A Plotly figure object containing the PDF plot.
|
191
|
+
"""
|
192
|
+
skv.check_is_fitted(self)
|
193
|
+
if title is None:
|
194
|
+
title = f"PDF of {self.__class__.__name__}"
|
195
|
+
if X is not None:
|
196
|
+
title += " vs Empirical KDE"
|
197
|
+
|
198
|
+
# Compute the quantile-based range
|
199
|
+
lower_bound = self.ppf(1e-4)
|
200
|
+
upper_bound = self.ppf(1 - 1e-4)
|
201
|
+
# Generate x values across this range
|
202
|
+
x = np.linspace(lower_bound, upper_bound, 1000)
|
203
|
+
|
204
|
+
traces = []
|
205
|
+
if X is not None:
|
206
|
+
with warnings.catch_warnings():
|
207
|
+
warnings.filterwarnings(
|
208
|
+
"ignore", message="^X has feature names", category=UserWarning
|
209
|
+
)
|
210
|
+
X = self._validate_X(X, reset=False)
|
211
|
+
kde = st.gaussian_kde(X[:, 0])
|
212
|
+
y_kde = kde(x)
|
213
|
+
traces.append(
|
214
|
+
go.Scatter(
|
215
|
+
x=x,
|
216
|
+
y=y_kde,
|
217
|
+
mode="lines",
|
218
|
+
name="Empirical KDE",
|
219
|
+
line=dict(color="rgb(85,168,104)"),
|
220
|
+
fill="tozeroy",
|
221
|
+
)
|
222
|
+
)
|
223
|
+
|
224
|
+
with warnings.catch_warnings():
|
225
|
+
warnings.filterwarnings("ignore", category=UserWarning)
|
226
|
+
pdfs = np.exp(self.score_samples(x.reshape(-1, 1)))
|
227
|
+
traces.append(
|
228
|
+
go.Scatter(
|
229
|
+
x=x,
|
230
|
+
y=pdfs.flatten(),
|
231
|
+
mode="lines",
|
232
|
+
name=self.__class__.__name__,
|
233
|
+
line=dict(color="rgb(31, 119, 180)"),
|
234
|
+
fill="tozeroy",
|
235
|
+
)
|
236
|
+
)
|
237
|
+
|
238
|
+
fig = go.Figure(data=traces)
|
239
|
+
fig.update_layout(
|
240
|
+
title=title,
|
241
|
+
xaxis_title="x",
|
242
|
+
yaxis_title="Probability Density",
|
243
|
+
)
|
244
|
+
fig.update_xaxes(
|
245
|
+
tickformat=".0%",
|
246
|
+
)
|
247
|
+
return fig
|
248
|
+
|
249
|
+
def qq_plot(self, X: npt.ArrayLike, title: str | None = None) -> go.Figure:
|
250
|
+
"""Plot the empirical quantiles of the sample X versus the quantiles of the
|
251
|
+
fitted model.
|
252
|
+
|
253
|
+
Parameters
|
254
|
+
----------
|
255
|
+
X : array-like of shape (n_samples, 1), optional
|
256
|
+
Used to plot the empirical quantiles for comparison versus the model
|
257
|
+
quantiles.
|
258
|
+
|
259
|
+
title : str, optional
|
260
|
+
The title for the plot. If not provided, a default title based on the fitted
|
261
|
+
model's representation is used.
|
262
|
+
|
263
|
+
Returns
|
264
|
+
-------
|
265
|
+
fig : go.Figure
|
266
|
+
A Plotly figure object containing the PDF plot.
|
267
|
+
"""
|
268
|
+
skv.check_is_fitted(self)
|
269
|
+
if title is None:
|
270
|
+
title = f"Q-Q Plot of {self.__class__.__name__} vs Sample Data"
|
271
|
+
|
272
|
+
with warnings.catch_warnings():
|
273
|
+
warnings.filterwarnings(
|
274
|
+
"ignore", message="^X has feature names", category=UserWarning
|
275
|
+
)
|
276
|
+
X = self._validate_X(X, reset=False)
|
277
|
+
|
278
|
+
X_sorted = np.sort(X[:, 0])
|
279
|
+
n = len(X)
|
280
|
+
|
281
|
+
# Compute theoretical quantiles from the model
|
282
|
+
theoretical_quantiles = self.ppf((np.arange(1, n + 1) - 0.5) / n)
|
283
|
+
|
284
|
+
# Create the Q-Q plot using Plotly
|
285
|
+
fig = go.Figure(
|
286
|
+
go.Scatter(
|
287
|
+
x=theoretical_quantiles,
|
288
|
+
y=X_sorted,
|
289
|
+
mode="markers",
|
290
|
+
)
|
291
|
+
)
|
292
|
+
# Add a reference line (45° line)
|
293
|
+
min_val = min(float(theoretical_quantiles[0]), float(X_sorted[0]))
|
294
|
+
max_val = max(float(theoretical_quantiles[-1]), float(X_sorted[-1]))
|
295
|
+
fig.add_trace(
|
296
|
+
go.Scatter(
|
297
|
+
x=[min_val, max_val],
|
298
|
+
y=[min_val, max_val],
|
299
|
+
mode="lines",
|
300
|
+
)
|
301
|
+
)
|
302
|
+
fig.update_layout(
|
303
|
+
title=title,
|
304
|
+
xaxis_title="Theoretical Quantiles",
|
305
|
+
yaxis_title="Sample Quantiles",
|
306
|
+
showlegend=False,
|
307
|
+
)
|
308
|
+
return fig
|
@@ -0,0 +1,136 @@
|
|
1
|
+
"""Univariate Gaussian Estimation."""
|
2
|
+
|
3
|
+
# Copyright (c) 2025
|
4
|
+
# Authors: The skfolio developers
|
5
|
+
# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
|
6
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
7
|
+
|
8
|
+
import numpy.typing as npt
|
9
|
+
import scipy.stats as st
|
10
|
+
|
11
|
+
from skfolio.distribution.univariate._base import BaseUnivariateDist
|
12
|
+
|
13
|
+
|
14
|
+
class Gaussian(BaseUnivariateDist):
|
15
|
+
r"""Gaussian Distribution Estimation.
|
16
|
+
|
17
|
+
This estimator fits a univariate normal (Gaussian) distribution to the input data.
|
18
|
+
|
19
|
+
The probability density function is:
|
20
|
+
|
21
|
+
.. math::
|
22
|
+
|
23
|
+
f(x) = \frac{\exp(-x^2/2)}{\sqrt{2\pi}}
|
24
|
+
|
25
|
+
The probability density above is defined in the "standardized" form. To shift
|
26
|
+
and/or scale the distribution use the loc and scale parameters. Specifically,
|
27
|
+
`pdf(x, loc, scale)` is equivalent to `pdf(y) / scale` with `y = (x - loc) / scale`.
|
28
|
+
|
29
|
+
For more information, you can refer to the `scipy documentation <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html#scipy.stats.norm>`_
|
30
|
+
|
31
|
+
Parameters
|
32
|
+
----------
|
33
|
+
loc : float, optional
|
34
|
+
If provided, the location parameter (mean) is fixed to this value.
|
35
|
+
Otherwise, it is estimated from the data.
|
36
|
+
|
37
|
+
scale : float, optional
|
38
|
+
If provided, the scale parameter (standard deviation) is fixed to this value.
|
39
|
+
Otherwise, it is estimated from the data.
|
40
|
+
|
41
|
+
random_state : int, RandomState instance or None, default=None
|
42
|
+
Seed or random state to ensure reproducibility.
|
43
|
+
|
44
|
+
Attributes
|
45
|
+
----------
|
46
|
+
loc_ : float
|
47
|
+
The fitted location (mean) of the distribution.
|
48
|
+
|
49
|
+
scale_ : float
|
50
|
+
The fitted scale (standard deviation) of the distribution.
|
51
|
+
|
52
|
+
Examples
|
53
|
+
--------
|
54
|
+
>>> from skfolio.datasets import load_sp500_index
|
55
|
+
>>> from skfolio.preprocessing import prices_to_returns
|
56
|
+
>>> from skfolio.distribution.univariate import Gaussian
|
57
|
+
>>>
|
58
|
+
>>> # Load historical prices and convert them to returns
|
59
|
+
>>> prices = load_sp500_index()
|
60
|
+
>>> X = prices_to_returns(prices)
|
61
|
+
>>>
|
62
|
+
>>> # Initialize the Gaussian estimator.
|
63
|
+
>>> model = Gaussian()
|
64
|
+
>>>
|
65
|
+
>>> # Fit the Gaussian model to the data.
|
66
|
+
>>> model.fit(X)
|
67
|
+
>>>
|
68
|
+
>>> # Display the fitted parameters.
|
69
|
+
>>> print(model.fitted_repr)
|
70
|
+
Gaussian(0.00035, 0.0115)
|
71
|
+
>>>
|
72
|
+
>>> # Compute the log-likelihood, total log-likelihood, CDF, PPF, AIC, and BIC
|
73
|
+
>>> log_likelihood = model.score_samples(X)
|
74
|
+
>>> score = model.score(X)
|
75
|
+
>>> cdf = model.cdf(X)
|
76
|
+
>>> ppf = model.ppf(X)
|
77
|
+
>>> aic = model.aic(X)
|
78
|
+
>>> bic = model.bic(X)
|
79
|
+
>>>
|
80
|
+
>>> # Generate 5 new samples from the fitted Gaussian distribution.
|
81
|
+
>>> samples = model.sample(n_samples=5)
|
82
|
+
>>>
|
83
|
+
>>> # Plot the estimated probability density function (PDF).
|
84
|
+
>>> fig = model.plot_pdf()
|
85
|
+
>>> fig.show()
|
86
|
+
"""
|
87
|
+
|
88
|
+
loc_: float
|
89
|
+
scale_: float
|
90
|
+
_scipy_model = st.norm
|
91
|
+
|
92
|
+
def __init__(
|
93
|
+
self,
|
94
|
+
loc: float | None = None,
|
95
|
+
scale: float | None = None,
|
96
|
+
random_state: int | None = None,
|
97
|
+
):
|
98
|
+
super().__init__(random_state=random_state)
|
99
|
+
self.loc = loc
|
100
|
+
self.scale = scale
|
101
|
+
|
102
|
+
@property
|
103
|
+
def _scipy_params(self) -> dict[str, float]:
|
104
|
+
"""Dictionary of parameters to pass to the underlying SciPy distribution."""
|
105
|
+
return {"loc": self.loc_, "scale": self.scale_}
|
106
|
+
|
107
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "Gaussian":
|
108
|
+
"""Fit the univariate Gaussian distribution model.
|
109
|
+
|
110
|
+
Parameters
|
111
|
+
----------
|
112
|
+
X : array-like of shape (n_observations, 1)
|
113
|
+
The input data. X must contain a single column.
|
114
|
+
|
115
|
+
y : None
|
116
|
+
Ignored. Provided for compatibility with scikit-learn's API.
|
117
|
+
|
118
|
+
Returns
|
119
|
+
-------
|
120
|
+
self : Gaussian
|
121
|
+
Returns the instance itself.
|
122
|
+
"""
|
123
|
+
X = self._validate_X(X, reset=True)
|
124
|
+
|
125
|
+
if self.loc is not None and self.scale is not None:
|
126
|
+
raise ValueError("Either loc or scale must be None to be fitted")
|
127
|
+
|
128
|
+
fixed_params = {}
|
129
|
+
if self.loc is not None:
|
130
|
+
fixed_params["floc"] = self.loc
|
131
|
+
if self.scale is not None:
|
132
|
+
fixed_params["fscale"] = self.scale
|
133
|
+
|
134
|
+
self.loc_, self.scale_ = self._scipy_model.fit(X, **fixed_params)
|
135
|
+
|
136
|
+
return self
|
@@ -0,0 +1,152 @@
|
|
1
|
+
"""Johnson SU Estimator."""
|
2
|
+
|
3
|
+
# Copyright (c) 2025
|
4
|
+
# Authors: The skfolio developers
|
5
|
+
# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
|
6
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
7
|
+
|
8
|
+
import numpy.typing as npt
|
9
|
+
import scipy.stats as st
|
10
|
+
|
11
|
+
from skfolio.distribution.univariate._base import BaseUnivariateDist
|
12
|
+
|
13
|
+
|
14
|
+
class JohnsonSU(BaseUnivariateDist):
|
15
|
+
r"""Johnson SU Distribution Estimation.
|
16
|
+
|
17
|
+
This estimator fits a univariate Johnson SU distribution to the input data.
|
18
|
+
The Johnson SU distribution is flexible and can capture both skewness and fat tails,
|
19
|
+
making it appropriate for financial time series modeling.
|
20
|
+
|
21
|
+
The probability density function is:
|
22
|
+
|
23
|
+
.. math::
|
24
|
+
|
25
|
+
f(x, a, b) = \frac{b}{\sqrt{x^2 + 1}}
|
26
|
+
\phi(a + b \log(x + \sqrt{x^2 + 1}))
|
27
|
+
|
28
|
+
where :math:`x`, :math:`a`, and :math:`b` are real scalars; :math:`b > 0`.
|
29
|
+
:math:`\phi` is the pdf of the normal distribution.
|
30
|
+
|
31
|
+
The probability density above is defined in the "standardized" form. To shift
|
32
|
+
and/or scale the distribution use the loc and scale parameters. Specifically,
|
33
|
+
`pdf(x, a, b, loc, scale)` is equivalent to `pdf(y, a, b) / scale` with
|
34
|
+
`y = (x - loc) / scale`.
|
35
|
+
|
36
|
+
For more information, you can refer to the `scipy documentation <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.johnsonsu.html#scipy.stats.johnsonsu>`_
|
37
|
+
|
38
|
+
Parameters
|
39
|
+
----------
|
40
|
+
loc : float, optional
|
41
|
+
If provided, the location parameter is fixed to this value during fitting.
|
42
|
+
Otherwise, it is estimated from the data.
|
43
|
+
|
44
|
+
scale : float, optional
|
45
|
+
If provided, the scale parameter is fixed to this value during fitting.
|
46
|
+
Otherwise, it is estimated from the data.
|
47
|
+
|
48
|
+
random_state : int, RandomState instance or None, default=None
|
49
|
+
Seed or random state to ensure reproducibility.
|
50
|
+
|
51
|
+
Attributes
|
52
|
+
----------
|
53
|
+
a_ : float
|
54
|
+
The fitted first shape parameter of the Johnson SU distribution.
|
55
|
+
|
56
|
+
b_ : float
|
57
|
+
The fitted second shape parameter of the Johnson SU distribution.
|
58
|
+
|
59
|
+
loc_ : float
|
60
|
+
The fitted location parameter.
|
61
|
+
|
62
|
+
scale_ : float
|
63
|
+
The fitted scale parameter.
|
64
|
+
|
65
|
+
Examples
|
66
|
+
--------
|
67
|
+
>>> from skfolio.datasets import load_sp500_index
|
68
|
+
>>> from skfolio.preprocessing import prices_to_returns
|
69
|
+
>>> from skfolio.distribution.univariate import JohnsonSU
|
70
|
+
>>>
|
71
|
+
>>> # Load historical prices and convert them to returns
|
72
|
+
>>> prices = load_sp500_index()
|
73
|
+
>>> X = prices_to_returns(prices)
|
74
|
+
>>>
|
75
|
+
>>> # Initialize the estimator.
|
76
|
+
>>> model = JohnsonSU()
|
77
|
+
>>>
|
78
|
+
>>> # Fit the model to the data.
|
79
|
+
>>> model.fit(X)
|
80
|
+
>>>
|
81
|
+
>>> # Display the fitted parameters.
|
82
|
+
>>> print(model.fitted_repr)
|
83
|
+
JohnsonSU(0.0742, 1.08, 0.00115, 0.00774)
|
84
|
+
>>>
|
85
|
+
>>> # Compute the log-likelihood, total log-likelihood, CDF, PPF, AIC, and BIC
|
86
|
+
>>> log_likelihood = model.score_samples(X)
|
87
|
+
>>> score = model.score(X)
|
88
|
+
>>> cdf = model.cdf(X)
|
89
|
+
>>> ppf = model.ppf(X)
|
90
|
+
>>> aic = model.aic(X)
|
91
|
+
>>> bic = model.bic(X)
|
92
|
+
>>>
|
93
|
+
>>> # Generate 5 new samples from the fitted distribution.
|
94
|
+
>>> samples = model.sample(n_samples=5)
|
95
|
+
>>>
|
96
|
+
>>> # Plot the estimated probability density function (PDF).
|
97
|
+
>>> fig = model.plot_pdf()
|
98
|
+
>>> fig.show()
|
99
|
+
"""
|
100
|
+
|
101
|
+
a_: float
|
102
|
+
b_: float
|
103
|
+
loc_: float
|
104
|
+
scale_: float
|
105
|
+
_scipy_model = st.johnsonsu
|
106
|
+
|
107
|
+
def __init__(
|
108
|
+
self,
|
109
|
+
loc: float | None = None,
|
110
|
+
scale: float | None = None,
|
111
|
+
random_state: int | None = None,
|
112
|
+
):
|
113
|
+
super().__init__(random_state=random_state)
|
114
|
+
self.loc = loc
|
115
|
+
self.scale = scale
|
116
|
+
|
117
|
+
@property
|
118
|
+
def _scipy_params(self) -> dict[str, float]:
|
119
|
+
"""Dictionary of parameters to pass to the underlying SciPy distribution."""
|
120
|
+
return {"a": self.a_, "b": self.b_, "loc": self.loc_, "scale": self.scale_}
|
121
|
+
|
122
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "JohnsonSU":
|
123
|
+
"""Fit the univariate Johnson SU distribution model.
|
124
|
+
|
125
|
+
Parameters
|
126
|
+
----------
|
127
|
+
X : array-like of shape (n_observations, 1)
|
128
|
+
The input data. X must contain a single column.
|
129
|
+
|
130
|
+
y : None
|
131
|
+
Ignored. Provided for compatibility with scikit-learn's API.
|
132
|
+
|
133
|
+
Returns
|
134
|
+
-------
|
135
|
+
self : JohnsonSU
|
136
|
+
Returns the instance itself.
|
137
|
+
"""
|
138
|
+
X = self._validate_X(X, reset=True)
|
139
|
+
|
140
|
+
if self.loc is not None and self.scale is not None:
|
141
|
+
raise ValueError("Either loc or scale must be None to be fitted")
|
142
|
+
|
143
|
+
fixed_params = {}
|
144
|
+
if self.loc is not None:
|
145
|
+
fixed_params["floc"] = self.loc
|
146
|
+
if self.scale is not None:
|
147
|
+
fixed_params["fscale"] = self.scale
|
148
|
+
|
149
|
+
self.a_, self.b_, self.loc_, self.scale_ = self._scipy_model.fit(
|
150
|
+
X, **fixed_params
|
151
|
+
)
|
152
|
+
return self
|