skfolio 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. skfolio/__init__.py +2 -2
  2. skfolio/cluster/__init__.py +1 -1
  3. skfolio/cluster/_hierarchical.py +1 -1
  4. skfolio/datasets/__init__.py +1 -1
  5. skfolio/datasets/_base.py +2 -2
  6. skfolio/datasets/data/__init__.py +1 -0
  7. skfolio/distance/__init__.py +1 -1
  8. skfolio/distance/_base.py +2 -2
  9. skfolio/distance/_distance.py +4 -4
  10. skfolio/distribution/__init__.py +56 -0
  11. skfolio/distribution/_base.py +203 -0
  12. skfolio/distribution/copula/__init__.py +35 -0
  13. skfolio/distribution/copula/_base.py +456 -0
  14. skfolio/distribution/copula/_clayton.py +539 -0
  15. skfolio/distribution/copula/_gaussian.py +407 -0
  16. skfolio/distribution/copula/_gumbel.py +560 -0
  17. skfolio/distribution/copula/_independent.py +196 -0
  18. skfolio/distribution/copula/_joe.py +609 -0
  19. skfolio/distribution/copula/_selection.py +111 -0
  20. skfolio/distribution/copula/_student_t.py +486 -0
  21. skfolio/distribution/copula/_utils.py +509 -0
  22. skfolio/distribution/multivariate/__init__.py +11 -0
  23. skfolio/distribution/multivariate/_base.py +241 -0
  24. skfolio/distribution/multivariate/_utils.py +632 -0
  25. skfolio/distribution/multivariate/_vine_copula.py +1254 -0
  26. skfolio/distribution/univariate/__init__.py +19 -0
  27. skfolio/distribution/univariate/_base.py +308 -0
  28. skfolio/distribution/univariate/_gaussian.py +136 -0
  29. skfolio/distribution/univariate/_johnson_su.py +152 -0
  30. skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
  31. skfolio/distribution/univariate/_selection.py +85 -0
  32. skfolio/distribution/univariate/_student_t.py +144 -0
  33. skfolio/exceptions.py +6 -6
  34. skfolio/measures/__init__.py +1 -1
  35. skfolio/measures/_enums.py +7 -7
  36. skfolio/measures/_measures.py +4 -7
  37. skfolio/metrics/__init__.py +2 -0
  38. skfolio/metrics/_scorer.py +4 -4
  39. skfolio/model_selection/__init__.py +2 -2
  40. skfolio/model_selection/_combinatorial.py +15 -12
  41. skfolio/model_selection/_validation.py +2 -2
  42. skfolio/model_selection/_walk_forward.py +3 -3
  43. skfolio/moments/covariance/_base.py +1 -1
  44. skfolio/moments/covariance/_denoise_covariance.py +1 -1
  45. skfolio/moments/covariance/_detone_covariance.py +1 -1
  46. skfolio/moments/covariance/_empirical_covariance.py +1 -1
  47. skfolio/moments/covariance/_ew_covariance.py +1 -1
  48. skfolio/moments/covariance/_gerber_covariance.py +1 -1
  49. skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
  50. skfolio/moments/covariance/_implied_covariance.py +2 -7
  51. skfolio/moments/covariance/_ledoit_wolf.py +1 -1
  52. skfolio/moments/covariance/_oas.py +1 -1
  53. skfolio/moments/covariance/_shrunk_covariance.py +1 -1
  54. skfolio/moments/expected_returns/_base.py +1 -1
  55. skfolio/moments/expected_returns/_empirical_mu.py +1 -1
  56. skfolio/moments/expected_returns/_equilibrium_mu.py +1 -1
  57. skfolio/moments/expected_returns/_ew_mu.py +1 -1
  58. skfolio/moments/expected_returns/_shrunk_mu.py +2 -2
  59. skfolio/optimization/__init__.py +2 -0
  60. skfolio/optimization/_base.py +2 -2
  61. skfolio/optimization/cluster/__init__.py +2 -0
  62. skfolio/optimization/cluster/_nco.py +7 -7
  63. skfolio/optimization/cluster/hierarchical/__init__.py +2 -0
  64. skfolio/optimization/cluster/hierarchical/_base.py +1 -2
  65. skfolio/optimization/cluster/hierarchical/_herc.py +2 -2
  66. skfolio/optimization/cluster/hierarchical/_hrp.py +2 -2
  67. skfolio/optimization/convex/__init__.py +2 -0
  68. skfolio/optimization/convex/_base.py +8 -8
  69. skfolio/optimization/convex/_distributionally_robust.py +4 -4
  70. skfolio/optimization/convex/_maximum_diversification.py +5 -5
  71. skfolio/optimization/convex/_mean_risk.py +5 -6
  72. skfolio/optimization/convex/_risk_budgeting.py +3 -3
  73. skfolio/optimization/ensemble/__init__.py +2 -0
  74. skfolio/optimization/ensemble/_base.py +2 -2
  75. skfolio/optimization/ensemble/_stacking.py +1 -1
  76. skfolio/optimization/naive/__init__.py +2 -0
  77. skfolio/optimization/naive/_naive.py +1 -1
  78. skfolio/population/__init__.py +2 -0
  79. skfolio/population/_population.py +34 -7
  80. skfolio/portfolio/_base.py +42 -8
  81. skfolio/portfolio/_multi_period_portfolio.py +3 -2
  82. skfolio/portfolio/_portfolio.py +4 -4
  83. skfolio/pre_selection/__init__.py +2 -0
  84. skfolio/pre_selection/_drop_correlated.py +2 -2
  85. skfolio/pre_selection/_select_complete.py +25 -26
  86. skfolio/pre_selection/_select_k_extremes.py +2 -2
  87. skfolio/pre_selection/_select_non_dominated.py +2 -2
  88. skfolio/pre_selection/_select_non_expiring.py +2 -2
  89. skfolio/preprocessing/__init__.py +2 -0
  90. skfolio/preprocessing/_returns.py +2 -2
  91. skfolio/prior/__init__.py +4 -0
  92. skfolio/prior/_base.py +2 -2
  93. skfolio/prior/_black_litterman.py +5 -3
  94. skfolio/prior/_empirical.py +3 -1
  95. skfolio/prior/_factor_model.py +8 -4
  96. skfolio/prior/_synthetic_data.py +239 -0
  97. skfolio/synthetic_returns/__init__.py +1 -0
  98. skfolio/typing.py +1 -1
  99. skfolio/uncertainty_set/__init__.py +2 -0
  100. skfolio/uncertainty_set/_base.py +2 -2
  101. skfolio/uncertainty_set/_bootstrap.py +1 -1
  102. skfolio/uncertainty_set/_empirical.py +1 -1
  103. skfolio/utils/__init__.py +1 -0
  104. skfolio/utils/bootstrap.py +2 -2
  105. skfolio/utils/equations.py +13 -10
  106. skfolio/utils/sorting.py +2 -2
  107. skfolio/utils/stats.py +7 -7
  108. skfolio/utils/tools.py +76 -12
  109. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/METADATA +99 -24
  110. skfolio-0.8.0.dist-info/RECORD +120 -0
  111. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/WHEEL +1 -1
  112. skfolio-0.7.0.dist-info/RECORD +0 -95
  113. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info/licenses}/LICENSE +0 -0
  114. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,19 @@
1
+ """Univariate Distribution module."""
2
+
3
+ from skfolio.distribution.univariate._base import BaseUnivariateDist
4
+ from skfolio.distribution.univariate._gaussian import Gaussian
5
+ from skfolio.distribution.univariate._johnson_su import JohnsonSU
6
+ from skfolio.distribution.univariate._normal_inverse_gaussian import (
7
+ NormalInverseGaussian,
8
+ )
9
+ from skfolio.distribution.univariate._selection import select_univariate_dist
10
+ from skfolio.distribution.univariate._student_t import StudentT
11
+
12
+ __all__ = [
13
+ "BaseUnivariateDist",
14
+ "Gaussian",
15
+ "JohnsonSU",
16
+ "NormalInverseGaussian",
17
+ "StudentT",
18
+ "select_univariate_dist",
19
+ ]
@@ -0,0 +1,308 @@
1
+ """Base Univariate Estimator."""
2
+
3
+ # Copyright (c) 2025
4
+ # Authors: The skfolio developers
5
+ # Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
6
+ # SPDX-License-Identifier: BSD-3-Clause
7
+
8
+ import warnings
9
+ from abc import ABC, abstractmethod
10
+
11
+ import numpy as np
12
+ import numpy.typing as npt
13
+ import plotly.graph_objects as go
14
+ import scipy.stats as st
15
+ import sklearn.utils as sku
16
+ import sklearn.utils.validation as skv
17
+
18
+ from skfolio.distribution._base import BaseDistribution
19
+
20
+
21
+ class BaseUnivariateDist(BaseDistribution, ABC):
22
+ """Base Univariate Distribution Estimator.
23
+
24
+ This abstract class serves as a foundation for univariate distribution models
25
+ based on scipy.
26
+
27
+ random_state : int, RandomState instance or None, default=None
28
+ Seed or random state to ensure reproducibility.
29
+ """
30
+
31
+ _scipy_model: st.rv_continuous
32
+
33
+ def __init__(self, random_state: int | None = None):
34
+ super().__init__(random_state=random_state)
35
+
36
+ @property
37
+ @abstractmethod
38
+ def _scipy_params(self) -> dict[str, float]:
39
+ """Dictionary of parameters to pass to the underlying SciPy distribution."""
40
+ pass
41
+
42
+ @property
43
+ def n_params(self) -> int:
44
+ """Number of model parameters."""
45
+ return len(self._scipy_params)
46
+
47
+ @property
48
+ def fitted_repr(self) -> str:
49
+ """String representation of the fitted univariate distribution."""
50
+ skv.check_is_fitted(self)
51
+ params = ", ".join([f"{k}={v:0.2g}" for k, v in self._scipy_params.items()])
52
+ return f"{self.__class__.__name__}({params})"
53
+
54
+ @abstractmethod
55
+ def fit(self, X: npt.ArrayLike, y=None) -> "BaseUnivariateDist":
56
+ """Fit the univariate distribution model.
57
+
58
+ Parameters
59
+ ----------
60
+ X : array-like of shape (n_observations, 1)
61
+ The input data. X must contain a single column.
62
+
63
+
64
+ y : None
65
+ Ignored. Provided for compatibility with scikit-learn's API.
66
+
67
+ Returns
68
+ -------
69
+ self : BaseUnivariateDist
70
+ Returns the instance itself.
71
+ """
72
+ pass
73
+
74
+ def _validate_X(self, X: npt.ArrayLike, reset: bool) -> np.ndarray:
75
+ """Validate and convert the input data X.
76
+
77
+ Parameters
78
+ ----------
79
+ X : array-like of shape (n_observations, 1)
80
+ The input data. X must contain a single column.
81
+
82
+ reset : bool, default=True
83
+ Whether to reset the `n_features_in_` attribute.
84
+ If False, the input will be checked for consistency with data
85
+ provided when reset was last True.
86
+
87
+ Returns
88
+ -------
89
+ validated_X : ndarray of shape (n_observations, 1).
90
+ The validated input array
91
+ """
92
+ X = skv.validate_data(self, X, dtype=np.float64, reset=reset)
93
+ if X.shape[1] != 1:
94
+ raise ValueError(
95
+ "X should contain a single column for Univariate Distribution"
96
+ )
97
+ return X
98
+
99
+ def score_samples(self, X: npt.ArrayLike) -> np.ndarray:
100
+ """Compute the log-likelihood of each sample (log-pdf) under the model.
101
+
102
+ Parameters
103
+ ----------
104
+ X : array-like of shape (n_observations, 1)
105
+ An array of points at which to evaluate the log-probability density.
106
+ The data should be a single feature column.
107
+
108
+ Returns
109
+ -------
110
+ density : ndarray of shape (n_observations,)
111
+ Log-likelihood values for each observation in X.
112
+ """
113
+ X = self._validate_X(X, reset=False)
114
+ log_density = self._scipy_model.logpdf(X, **self._scipy_params).ravel()
115
+ return log_density
116
+
117
+ def sample(self, n_samples: int = 1):
118
+ """Generate random samples from the fitted distribution.
119
+
120
+ Currently, this is implemented only for gaussian and tophat kernels.
121
+
122
+ Parameters
123
+ ----------
124
+ n_samples : int, default=1
125
+ Number of samples to generate.
126
+
127
+ Returns
128
+ -------
129
+ X : array-like of shape (n_samples, 1)
130
+ List of samples.
131
+ """
132
+ skv.check_is_fitted(self)
133
+ rng = sku.check_random_state(self.random_state)
134
+ sample = self._scipy_model.rvs(
135
+ size=(n_samples, 1), random_state=rng, **self._scipy_params
136
+ )
137
+ return sample
138
+
139
+ def cdf(self, X: npt.ArrayLike) -> np.ndarray:
140
+ """Compute the cumulative distribution function (CDF) for the given data.
141
+
142
+ Parameters
143
+ ----------
144
+ X : array-like of shape (n_observations, 1)
145
+ Data points at which to evaluate the CDF.
146
+
147
+ Returns
148
+ -------
149
+ cdf : ndarray of shape (n_observations, 1)
150
+ The CDF evaluated at each data point.
151
+ """
152
+ skv.check_is_fitted(self)
153
+ return self._scipy_model.cdf(X, **self._scipy_params)
154
+
155
+ def ppf(self, X: npt.ArrayLike) -> np.ndarray:
156
+ """Compute the percent point function (inverse of the CDF) for the given
157
+ probabilities.
158
+
159
+ Parameters
160
+ ----------
161
+ X : array-like of shape (n_observations, 1)
162
+ Probabilities for which to compute the corresponding quantiles.
163
+
164
+ Returns
165
+ -------
166
+ ppf : ndarray of shape (n_observations, 1)
167
+ The quantiles corresponding to the given probabilities.
168
+ """
169
+ skv.check_is_fitted(self)
170
+ return self._scipy_model.ppf(X, **self._scipy_params)
171
+
172
+ def plot_pdf(
173
+ self, X: npt.ArrayLike | None = None, title: str | None = None
174
+ ) -> go.Figure:
175
+ """Plot the probability density function (PDF).
176
+
177
+ Parameters
178
+ ----------
179
+ X : array-like of shape (n_samples, 1), optional
180
+ If provided, it is used to plot the empirical data KDE for comparison
181
+ versus the model PDF.
182
+
183
+ title : str, optional
184
+ The title for the plot. If not provided, a default title based on the fitted
185
+ model's representation is used.
186
+
187
+ Returns
188
+ -------
189
+ fig : go.Figure
190
+ A Plotly figure object containing the PDF plot.
191
+ """
192
+ skv.check_is_fitted(self)
193
+ if title is None:
194
+ title = f"PDF of {self.__class__.__name__}"
195
+ if X is not None:
196
+ title += " vs Empirical KDE"
197
+
198
+ # Compute the quantile-based range
199
+ lower_bound = self.ppf(1e-4)
200
+ upper_bound = self.ppf(1 - 1e-4)
201
+ # Generate x values across this range
202
+ x = np.linspace(lower_bound, upper_bound, 1000)
203
+
204
+ traces = []
205
+ if X is not None:
206
+ with warnings.catch_warnings():
207
+ warnings.filterwarnings(
208
+ "ignore", message="^X has feature names", category=UserWarning
209
+ )
210
+ X = self._validate_X(X, reset=False)
211
+ kde = st.gaussian_kde(X[:, 0])
212
+ y_kde = kde(x)
213
+ traces.append(
214
+ go.Scatter(
215
+ x=x,
216
+ y=y_kde,
217
+ mode="lines",
218
+ name="Empirical KDE",
219
+ line=dict(color="rgb(85,168,104)"),
220
+ fill="tozeroy",
221
+ )
222
+ )
223
+
224
+ with warnings.catch_warnings():
225
+ warnings.filterwarnings("ignore", category=UserWarning)
226
+ pdfs = np.exp(self.score_samples(x.reshape(-1, 1)))
227
+ traces.append(
228
+ go.Scatter(
229
+ x=x,
230
+ y=pdfs.flatten(),
231
+ mode="lines",
232
+ name=self.__class__.__name__,
233
+ line=dict(color="rgb(31, 119, 180)"),
234
+ fill="tozeroy",
235
+ )
236
+ )
237
+
238
+ fig = go.Figure(data=traces)
239
+ fig.update_layout(
240
+ title=title,
241
+ xaxis_title="x",
242
+ yaxis_title="Probability Density",
243
+ )
244
+ fig.update_xaxes(
245
+ tickformat=".0%",
246
+ )
247
+ return fig
248
+
249
+ def qq_plot(self, X: npt.ArrayLike, title: str | None = None) -> go.Figure:
250
+ """Plot the empirical quantiles of the sample X versus the quantiles of the
251
+ fitted model.
252
+
253
+ Parameters
254
+ ----------
255
+ X : array-like of shape (n_samples, 1), optional
256
+ Used to plot the empirical quantiles for comparison versus the model
257
+ quantiles.
258
+
259
+ title : str, optional
260
+ The title for the plot. If not provided, a default title based on the fitted
261
+ model's representation is used.
262
+
263
+ Returns
264
+ -------
265
+ fig : go.Figure
266
+ A Plotly figure object containing the PDF plot.
267
+ """
268
+ skv.check_is_fitted(self)
269
+ if title is None:
270
+ title = f"Q-Q Plot of {self.__class__.__name__} vs Sample Data"
271
+
272
+ with warnings.catch_warnings():
273
+ warnings.filterwarnings(
274
+ "ignore", message="^X has feature names", category=UserWarning
275
+ )
276
+ X = self._validate_X(X, reset=False)
277
+
278
+ X_sorted = np.sort(X[:, 0])
279
+ n = len(X)
280
+
281
+ # Compute theoretical quantiles from the model
282
+ theoretical_quantiles = self.ppf((np.arange(1, n + 1) - 0.5) / n)
283
+
284
+ # Create the Q-Q plot using Plotly
285
+ fig = go.Figure(
286
+ go.Scatter(
287
+ x=theoretical_quantiles,
288
+ y=X_sorted,
289
+ mode="markers",
290
+ )
291
+ )
292
+ # Add a reference line (45° line)
293
+ min_val = min(float(theoretical_quantiles[0]), float(X_sorted[0]))
294
+ max_val = max(float(theoretical_quantiles[-1]), float(X_sorted[-1]))
295
+ fig.add_trace(
296
+ go.Scatter(
297
+ x=[min_val, max_val],
298
+ y=[min_val, max_val],
299
+ mode="lines",
300
+ )
301
+ )
302
+ fig.update_layout(
303
+ title=title,
304
+ xaxis_title="Theoretical Quantiles",
305
+ yaxis_title="Sample Quantiles",
306
+ showlegend=False,
307
+ )
308
+ return fig
@@ -0,0 +1,136 @@
1
+ """Univariate Gaussian Estimation."""
2
+
3
+ # Copyright (c) 2025
4
+ # Authors: The skfolio developers
5
+ # Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
6
+ # SPDX-License-Identifier: BSD-3-Clause
7
+
8
+ import numpy.typing as npt
9
+ import scipy.stats as st
10
+
11
+ from skfolio.distribution.univariate._base import BaseUnivariateDist
12
+
13
+
14
+ class Gaussian(BaseUnivariateDist):
15
+ r"""Gaussian Distribution Estimation.
16
+
17
+ This estimator fits a univariate normal (Gaussian) distribution to the input data.
18
+
19
+ The probability density function is:
20
+
21
+ .. math::
22
+
23
+ f(x) = \frac{\exp(-x^2/2)}{\sqrt{2\pi}}
24
+
25
+ The probability density above is defined in the "standardized" form. To shift
26
+ and/or scale the distribution use the loc and scale parameters. Specifically,
27
+ `pdf(x, loc, scale)` is equivalent to `pdf(y) / scale` with `y = (x - loc) / scale`.
28
+
29
+ For more information, you can refer to the `scipy documentation <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.norm.html#scipy.stats.norm>`_
30
+
31
+ Parameters
32
+ ----------
33
+ loc : float, optional
34
+ If provided, the location parameter (mean) is fixed to this value.
35
+ Otherwise, it is estimated from the data.
36
+
37
+ scale : float, optional
38
+ If provided, the scale parameter (standard deviation) is fixed to this value.
39
+ Otherwise, it is estimated from the data.
40
+
41
+ random_state : int, RandomState instance or None, default=None
42
+ Seed or random state to ensure reproducibility.
43
+
44
+ Attributes
45
+ ----------
46
+ loc_ : float
47
+ The fitted location (mean) of the distribution.
48
+
49
+ scale_ : float
50
+ The fitted scale (standard deviation) of the distribution.
51
+
52
+ Examples
53
+ --------
54
+ >>> from skfolio.datasets import load_sp500_index
55
+ >>> from skfolio.preprocessing import prices_to_returns
56
+ >>> from skfolio.distribution.univariate import Gaussian
57
+ >>>
58
+ >>> # Load historical prices and convert them to returns
59
+ >>> prices = load_sp500_index()
60
+ >>> X = prices_to_returns(prices)
61
+ >>>
62
+ >>> # Initialize the Gaussian estimator.
63
+ >>> model = Gaussian()
64
+ >>>
65
+ >>> # Fit the Gaussian model to the data.
66
+ >>> model.fit(X)
67
+ >>>
68
+ >>> # Display the fitted parameters.
69
+ >>> print(model.fitted_repr)
70
+ Gaussian(0.00035, 0.0115)
71
+ >>>
72
+ >>> # Compute the log-likelihood, total log-likelihood, CDF, PPF, AIC, and BIC
73
+ >>> log_likelihood = model.score_samples(X)
74
+ >>> score = model.score(X)
75
+ >>> cdf = model.cdf(X)
76
+ >>> ppf = model.ppf(X)
77
+ >>> aic = model.aic(X)
78
+ >>> bic = model.bic(X)
79
+ >>>
80
+ >>> # Generate 5 new samples from the fitted Gaussian distribution.
81
+ >>> samples = model.sample(n_samples=5)
82
+ >>>
83
+ >>> # Plot the estimated probability density function (PDF).
84
+ >>> fig = model.plot_pdf()
85
+ >>> fig.show()
86
+ """
87
+
88
+ loc_: float
89
+ scale_: float
90
+ _scipy_model = st.norm
91
+
92
+ def __init__(
93
+ self,
94
+ loc: float | None = None,
95
+ scale: float | None = None,
96
+ random_state: int | None = None,
97
+ ):
98
+ super().__init__(random_state=random_state)
99
+ self.loc = loc
100
+ self.scale = scale
101
+
102
+ @property
103
+ def _scipy_params(self) -> dict[str, float]:
104
+ """Dictionary of parameters to pass to the underlying SciPy distribution."""
105
+ return {"loc": self.loc_, "scale": self.scale_}
106
+
107
+ def fit(self, X: npt.ArrayLike, y=None) -> "Gaussian":
108
+ """Fit the univariate Gaussian distribution model.
109
+
110
+ Parameters
111
+ ----------
112
+ X : array-like of shape (n_observations, 1)
113
+ The input data. X must contain a single column.
114
+
115
+ y : None
116
+ Ignored. Provided for compatibility with scikit-learn's API.
117
+
118
+ Returns
119
+ -------
120
+ self : Gaussian
121
+ Returns the instance itself.
122
+ """
123
+ X = self._validate_X(X, reset=True)
124
+
125
+ if self.loc is not None and self.scale is not None:
126
+ raise ValueError("Either loc or scale must be None to be fitted")
127
+
128
+ fixed_params = {}
129
+ if self.loc is not None:
130
+ fixed_params["floc"] = self.loc
131
+ if self.scale is not None:
132
+ fixed_params["fscale"] = self.scale
133
+
134
+ self.loc_, self.scale_ = self._scipy_model.fit(X, **fixed_params)
135
+
136
+ return self
@@ -0,0 +1,152 @@
1
+ """Johnson SU Estimator."""
2
+
3
+ # Copyright (c) 2025
4
+ # Authors: The skfolio developers
5
+ # Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
6
+ # SPDX-License-Identifier: BSD-3-Clause
7
+
8
+ import numpy.typing as npt
9
+ import scipy.stats as st
10
+
11
+ from skfolio.distribution.univariate._base import BaseUnivariateDist
12
+
13
+
14
+ class JohnsonSU(BaseUnivariateDist):
15
+ r"""Johnson SU Distribution Estimation.
16
+
17
+ This estimator fits a univariate Johnson SU distribution to the input data.
18
+ The Johnson SU distribution is flexible and can capture both skewness and fat tails,
19
+ making it appropriate for financial time series modeling.
20
+
21
+ The probability density function is:
22
+
23
+ .. math::
24
+
25
+ f(x, a, b) = \frac{b}{\sqrt{x^2 + 1}}
26
+ \phi(a + b \log(x + \sqrt{x^2 + 1}))
27
+
28
+ where :math:`x`, :math:`a`, and :math:`b` are real scalars; :math:`b > 0`.
29
+ :math:`\phi` is the pdf of the normal distribution.
30
+
31
+ The probability density above is defined in the "standardized" form. To shift
32
+ and/or scale the distribution use the loc and scale parameters. Specifically,
33
+ `pdf(x, a, b, loc, scale)` is equivalent to `pdf(y, a, b) / scale` with
34
+ `y = (x - loc) / scale`.
35
+
36
+ For more information, you can refer to the `scipy documentation <https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.johnsonsu.html#scipy.stats.johnsonsu>`_
37
+
38
+ Parameters
39
+ ----------
40
+ loc : float, optional
41
+ If provided, the location parameter is fixed to this value during fitting.
42
+ Otherwise, it is estimated from the data.
43
+
44
+ scale : float, optional
45
+ If provided, the scale parameter is fixed to this value during fitting.
46
+ Otherwise, it is estimated from the data.
47
+
48
+ random_state : int, RandomState instance or None, default=None
49
+ Seed or random state to ensure reproducibility.
50
+
51
+ Attributes
52
+ ----------
53
+ a_ : float
54
+ The fitted first shape parameter of the Johnson SU distribution.
55
+
56
+ b_ : float
57
+ The fitted second shape parameter of the Johnson SU distribution.
58
+
59
+ loc_ : float
60
+ The fitted location parameter.
61
+
62
+ scale_ : float
63
+ The fitted scale parameter.
64
+
65
+ Examples
66
+ --------
67
+ >>> from skfolio.datasets import load_sp500_index
68
+ >>> from skfolio.preprocessing import prices_to_returns
69
+ >>> from skfolio.distribution.univariate import JohnsonSU
70
+ >>>
71
+ >>> # Load historical prices and convert them to returns
72
+ >>> prices = load_sp500_index()
73
+ >>> X = prices_to_returns(prices)
74
+ >>>
75
+ >>> # Initialize the estimator.
76
+ >>> model = JohnsonSU()
77
+ >>>
78
+ >>> # Fit the model to the data.
79
+ >>> model.fit(X)
80
+ >>>
81
+ >>> # Display the fitted parameters.
82
+ >>> print(model.fitted_repr)
83
+ JohnsonSU(0.0742, 1.08, 0.00115, 0.00774)
84
+ >>>
85
+ >>> # Compute the log-likelihood, total log-likelihood, CDF, PPF, AIC, and BIC
86
+ >>> log_likelihood = model.score_samples(X)
87
+ >>> score = model.score(X)
88
+ >>> cdf = model.cdf(X)
89
+ >>> ppf = model.ppf(X)
90
+ >>> aic = model.aic(X)
91
+ >>> bic = model.bic(X)
92
+ >>>
93
+ >>> # Generate 5 new samples from the fitted distribution.
94
+ >>> samples = model.sample(n_samples=5)
95
+ >>>
96
+ >>> # Plot the estimated probability density function (PDF).
97
+ >>> fig = model.plot_pdf()
98
+ >>> fig.show()
99
+ """
100
+
101
+ a_: float
102
+ b_: float
103
+ loc_: float
104
+ scale_: float
105
+ _scipy_model = st.johnsonsu
106
+
107
+ def __init__(
108
+ self,
109
+ loc: float | None = None,
110
+ scale: float | None = None,
111
+ random_state: int | None = None,
112
+ ):
113
+ super().__init__(random_state=random_state)
114
+ self.loc = loc
115
+ self.scale = scale
116
+
117
+ @property
118
+ def _scipy_params(self) -> dict[str, float]:
119
+ """Dictionary of parameters to pass to the underlying SciPy distribution."""
120
+ return {"a": self.a_, "b": self.b_, "loc": self.loc_, "scale": self.scale_}
121
+
122
+ def fit(self, X: npt.ArrayLike, y=None) -> "JohnsonSU":
123
+ """Fit the univariate Johnson SU distribution model.
124
+
125
+ Parameters
126
+ ----------
127
+ X : array-like of shape (n_observations, 1)
128
+ The input data. X must contain a single column.
129
+
130
+ y : None
131
+ Ignored. Provided for compatibility with scikit-learn's API.
132
+
133
+ Returns
134
+ -------
135
+ self : JohnsonSU
136
+ Returns the instance itself.
137
+ """
138
+ X = self._validate_X(X, reset=True)
139
+
140
+ if self.loc is not None and self.scale is not None:
141
+ raise ValueError("Either loc or scale must be None to be fitted")
142
+
143
+ fixed_params = {}
144
+ if self.loc is not None:
145
+ fixed_params["floc"] = self.loc
146
+ if self.scale is not None:
147
+ fixed_params["fscale"] = self.scale
148
+
149
+ self.a_, self.b_, self.loc_, self.scale_ = self._scipy_model.fit(
150
+ X, **fixed_params
151
+ )
152
+ return self