skfolio 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. skfolio/__init__.py +2 -2
  2. skfolio/cluster/__init__.py +1 -1
  3. skfolio/cluster/_hierarchical.py +1 -1
  4. skfolio/datasets/__init__.py +1 -1
  5. skfolio/datasets/_base.py +2 -2
  6. skfolio/datasets/data/__init__.py +1 -0
  7. skfolio/distance/__init__.py +1 -1
  8. skfolio/distance/_base.py +2 -2
  9. skfolio/distance/_distance.py +4 -4
  10. skfolio/distribution/__init__.py +56 -0
  11. skfolio/distribution/_base.py +203 -0
  12. skfolio/distribution/copula/__init__.py +35 -0
  13. skfolio/distribution/copula/_base.py +456 -0
  14. skfolio/distribution/copula/_clayton.py +539 -0
  15. skfolio/distribution/copula/_gaussian.py +407 -0
  16. skfolio/distribution/copula/_gumbel.py +560 -0
  17. skfolio/distribution/copula/_independent.py +196 -0
  18. skfolio/distribution/copula/_joe.py +609 -0
  19. skfolio/distribution/copula/_selection.py +111 -0
  20. skfolio/distribution/copula/_student_t.py +486 -0
  21. skfolio/distribution/copula/_utils.py +509 -0
  22. skfolio/distribution/multivariate/__init__.py +11 -0
  23. skfolio/distribution/multivariate/_base.py +241 -0
  24. skfolio/distribution/multivariate/_utils.py +632 -0
  25. skfolio/distribution/multivariate/_vine_copula.py +1254 -0
  26. skfolio/distribution/univariate/__init__.py +19 -0
  27. skfolio/distribution/univariate/_base.py +308 -0
  28. skfolio/distribution/univariate/_gaussian.py +136 -0
  29. skfolio/distribution/univariate/_johnson_su.py +152 -0
  30. skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
  31. skfolio/distribution/univariate/_selection.py +85 -0
  32. skfolio/distribution/univariate/_student_t.py +144 -0
  33. skfolio/exceptions.py +6 -6
  34. skfolio/measures/__init__.py +1 -1
  35. skfolio/measures/_enums.py +7 -7
  36. skfolio/measures/_measures.py +4 -7
  37. skfolio/metrics/__init__.py +2 -0
  38. skfolio/metrics/_scorer.py +4 -4
  39. skfolio/model_selection/__init__.py +2 -2
  40. skfolio/model_selection/_combinatorial.py +15 -12
  41. skfolio/model_selection/_validation.py +2 -2
  42. skfolio/model_selection/_walk_forward.py +3 -3
  43. skfolio/moments/covariance/_base.py +1 -1
  44. skfolio/moments/covariance/_denoise_covariance.py +1 -1
  45. skfolio/moments/covariance/_detone_covariance.py +1 -1
  46. skfolio/moments/covariance/_empirical_covariance.py +1 -1
  47. skfolio/moments/covariance/_ew_covariance.py +1 -1
  48. skfolio/moments/covariance/_gerber_covariance.py +1 -1
  49. skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
  50. skfolio/moments/covariance/_implied_covariance.py +2 -7
  51. skfolio/moments/covariance/_ledoit_wolf.py +1 -1
  52. skfolio/moments/covariance/_oas.py +1 -1
  53. skfolio/moments/covariance/_shrunk_covariance.py +1 -1
  54. skfolio/moments/expected_returns/_base.py +1 -1
  55. skfolio/moments/expected_returns/_empirical_mu.py +1 -1
  56. skfolio/moments/expected_returns/_equilibrium_mu.py +1 -1
  57. skfolio/moments/expected_returns/_ew_mu.py +1 -1
  58. skfolio/moments/expected_returns/_shrunk_mu.py +2 -2
  59. skfolio/optimization/__init__.py +2 -0
  60. skfolio/optimization/_base.py +2 -2
  61. skfolio/optimization/cluster/__init__.py +2 -0
  62. skfolio/optimization/cluster/_nco.py +7 -7
  63. skfolio/optimization/cluster/hierarchical/__init__.py +2 -0
  64. skfolio/optimization/cluster/hierarchical/_base.py +1 -2
  65. skfolio/optimization/cluster/hierarchical/_herc.py +2 -2
  66. skfolio/optimization/cluster/hierarchical/_hrp.py +2 -2
  67. skfolio/optimization/convex/__init__.py +2 -0
  68. skfolio/optimization/convex/_base.py +8 -8
  69. skfolio/optimization/convex/_distributionally_robust.py +4 -4
  70. skfolio/optimization/convex/_maximum_diversification.py +5 -5
  71. skfolio/optimization/convex/_mean_risk.py +5 -6
  72. skfolio/optimization/convex/_risk_budgeting.py +3 -3
  73. skfolio/optimization/ensemble/__init__.py +2 -0
  74. skfolio/optimization/ensemble/_base.py +2 -2
  75. skfolio/optimization/ensemble/_stacking.py +1 -1
  76. skfolio/optimization/naive/__init__.py +2 -0
  77. skfolio/optimization/naive/_naive.py +1 -1
  78. skfolio/population/__init__.py +2 -0
  79. skfolio/population/_population.py +34 -7
  80. skfolio/portfolio/_base.py +42 -8
  81. skfolio/portfolio/_multi_period_portfolio.py +3 -2
  82. skfolio/portfolio/_portfolio.py +4 -4
  83. skfolio/pre_selection/__init__.py +2 -0
  84. skfolio/pre_selection/_drop_correlated.py +2 -2
  85. skfolio/pre_selection/_select_complete.py +25 -26
  86. skfolio/pre_selection/_select_k_extremes.py +2 -2
  87. skfolio/pre_selection/_select_non_dominated.py +2 -2
  88. skfolio/pre_selection/_select_non_expiring.py +2 -2
  89. skfolio/preprocessing/__init__.py +2 -0
  90. skfolio/preprocessing/_returns.py +2 -2
  91. skfolio/prior/__init__.py +4 -0
  92. skfolio/prior/_base.py +2 -2
  93. skfolio/prior/_black_litterman.py +5 -3
  94. skfolio/prior/_empirical.py +3 -1
  95. skfolio/prior/_factor_model.py +8 -4
  96. skfolio/prior/_synthetic_data.py +239 -0
  97. skfolio/synthetic_returns/__init__.py +1 -0
  98. skfolio/typing.py +1 -1
  99. skfolio/uncertainty_set/__init__.py +2 -0
  100. skfolio/uncertainty_set/_base.py +2 -2
  101. skfolio/uncertainty_set/_bootstrap.py +1 -1
  102. skfolio/uncertainty_set/_empirical.py +1 -1
  103. skfolio/utils/__init__.py +1 -0
  104. skfolio/utils/bootstrap.py +2 -2
  105. skfolio/utils/equations.py +13 -10
  106. skfolio/utils/sorting.py +2 -2
  107. skfolio/utils/stats.py +7 -7
  108. skfolio/utils/tools.py +76 -12
  109. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/METADATA +99 -24
  110. skfolio-0.8.0.dist-info/RECORD +120 -0
  111. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/WHEEL +1 -1
  112. skfolio-0.7.0.dist-info/RECORD +0 -95
  113. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info/licenses}/LICENSE +0 -0
  114. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,241 @@
1
+ """Base Multivariate Distribution Estimator."""
2
+
3
+ # Copyright (c) 2025
4
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
+ # Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
6
+ # SPDX-License-Identifier: BSD-3-Clause
7
+
8
+ from abc import ABC, abstractmethod
9
+
10
+ import numpy as np
11
+ import numpy.typing as npt
12
+ import plotly.graph_objects as go
13
+ import sklearn.utils as sku
14
+
15
+ from skfolio.distribution._base import BaseDistribution
16
+
17
+
18
+ class BaseMultivariateDist(BaseDistribution, ABC):
19
+ """Base class for Multivariate Distribution Estimators.
20
+
21
+ This abstract class defines the interface for multivariate distribution models.
22
+
23
+ Parameters
24
+ ----------
25
+ random_state : int, RandomState instance or None, default=None
26
+ Seed or random state to ensure reproducibility.
27
+ """
28
+
29
+ # Used for AIC and BIC
30
+ _n_params: int
31
+
32
+ def __init__(self, random_state: int | None = None):
33
+ super().__init__(random_state=random_state)
34
+
35
+ @property
36
+ @abstractmethod
37
+ def n_params(self) -> int:
38
+ """Number of model parameters."""
39
+ pass
40
+
41
+ @property
42
+ @abstractmethod
43
+ def fitted_repr(self) -> str:
44
+ """String representation of the fitted copula."""
45
+ pass
46
+
47
+ @abstractmethod
48
+ def fit(self, X: npt.ArrayLike, y=None) -> "BaseMultivariateDist":
49
+ """Fit the multivariate distribution model.
50
+
51
+ Parameters
52
+ ----------
53
+ X : array-like of shape (n_observations, n_assets)
54
+ Price returns of the assets.
55
+
56
+ y : None
57
+ Ignored. Provided for compatibility with scikit-learn's API.
58
+
59
+ Returns
60
+ -------
61
+ self : BaseMultivariateDist
62
+ Returns the instance itself.
63
+ """
64
+ pass
65
+
66
+ @abstractmethod
67
+ def score_samples(self, X: npt.ArrayLike) -> np.ndarray:
68
+ """Compute the log-likelihood of each sample (log-pdf) under the distribution
69
+ model.
70
+
71
+ Parameters
72
+ ----------
73
+ X : array-like of shape (n_observations, n_assets)
74
+ Price returns of the assets.
75
+
76
+ Returns
77
+ -------
78
+ density : ndarray of shape (n_observations,)
79
+ The log-likelihood of each sample under the fitted distribution model.
80
+ """
81
+ pass
82
+
83
+ @abstractmethod
84
+ def sample(
85
+ self,
86
+ n_samples: int = 1,
87
+ conditioning: dict[int | str : float | tuple[float, float] | npt.ArrayLike]
88
+ | None = None,
89
+ ) -> np.ndarray:
90
+ """Generate random samples from the distribution model.
91
+
92
+ Parameters
93
+ ----------
94
+ n_samples : int, default=1
95
+ Number of samples to generate.
96
+
97
+ conditioning : dict[int | str, float | tuple[float, float] | array-like], optional
98
+ A dictionary specifying conditioning information for one or more assets.
99
+ The dictionary keys are asset indices or names, and the values define how
100
+ the samples are conditioned for that asset. Three types of conditioning
101
+ values are supported:
102
+
103
+ 1. **Fixed value (float):**
104
+ If a float is provided, all samples are generated under the condition
105
+ that the asset takes exactly that value.
106
+
107
+ 2. **Bounds (tuple of two floats):**
108
+ If a tuple `(min_value, max_value)` is provided, samples are generated
109
+ under the condition that the asset's value falls within the specified
110
+ bounds. Use `-np.Inf` for no lower bound or `np.Inf` for no upper bound.
111
+
112
+ 3. **Array-like (1D array):**
113
+ If an array-like of length `n_samples` is provided, each sample is
114
+ conditioned on the corresponding value in the array for that asset.
115
+
116
+ Returns
117
+ -------
118
+ X : array-like of shape (n_samples, n_assets)
119
+ A two-dimensional array where each row is a multivariate observation sampled
120
+ from the fitted distribution model.
121
+ """
122
+ pass
123
+
124
+ def plot_scatter_matrix(
125
+ self,
126
+ X: npt.ArrayLike | None = None,
127
+ conditioning: dict[int | str : float | tuple[float, float] | npt.ArrayLike]
128
+ | None = None,
129
+ n_samples: int = 1000,
130
+ title: str = "Scatter Matrix",
131
+ ) -> go.Figure:
132
+ """
133
+ Plot the vine copula scatter matrix by generating samples from the fitted
134
+ distribution model and comparing it versus the empirical distribution of `X` if
135
+ provided.
136
+
137
+ Parameters
138
+ ----------
139
+ X : array-like of shape (n_samples, n_assets), optional
140
+ If provided, it is used to plot the empirical scatter matrix for
141
+ comparison versus the vine copula scatter matrix.
142
+
143
+ conditioning : dict[int | str, float | tuple[float, float] | array-like], optional
144
+ A dictionary specifying conditioning information for one or more assets.
145
+ The dictionary keys are asset indices or names, and the values define how
146
+ the samples are conditioned for that asset. Three types of conditioning
147
+ values are supported:
148
+
149
+ 1. **Fixed value (float):**
150
+ If a float is provided, all samples are generated under the condition
151
+ that the asset takes exactly that value.
152
+
153
+ 2. **Bounds (tuple of two floats):**
154
+ If a tuple `(min_value, max_value)` is provided, samples are generated
155
+ under the condition that the asset's value falls within the specified
156
+ bounds. Use `-np.Inf` for no lower bound or `np.Inf` for no upper bound.
157
+
158
+ 3. **Array-like (1D array):**
159
+ If an array-like of length `n_samples` is provided, each sample is
160
+ conditioned on the corresponding value in the array for that asset.
161
+
162
+ n_samples : int, default=1000
163
+ Number of samples used to control the density and readability of the plot.
164
+ If `X` is provided and contains more than `n_samples` rows, a random
165
+ subsample of size `n_samples` is selected. Conversely, if `X` has fewer
166
+ rows than `n_samples`, the value is adjusted to match the number of rows in
167
+ `X` to ensure balanced visualization.
168
+
169
+ title : str, default="Scatter Matrix"
170
+ The title for the plot.
171
+
172
+ Returns
173
+ -------
174
+ fig : plotly.graph_objects.Figure
175
+ A figure object containing the scatter matrix.
176
+ """
177
+ traces = []
178
+ n_assets = self.n_features_in_
179
+ if X is not None:
180
+ X = np.asarray(X)
181
+ if X.ndim != 2:
182
+ raise ValueError("X should be an 2D array")
183
+ if X.shape[1] != n_assets:
184
+ raise ValueError(f"X should have {n_assets} columns")
185
+ if X.shape[0] > n_samples:
186
+ # We subsample for improved graph readability
187
+ rng = sku.check_random_state(self.random_state)
188
+ indices = rng.choice(
189
+ np.arange(X.shape[0]), size=n_samples, replace=False
190
+ )
191
+ X = X[indices, :]
192
+ else:
193
+ # We want same proportion as X to have a balanced graph
194
+ n_samples = X.shape[0]
195
+ traces.append(
196
+ go.Splom(
197
+ dimensions=[
198
+ {"label": self.feature_names_in_[i], "values": X[:, i]}
199
+ for i in range(n_assets)
200
+ ],
201
+ showupperhalf=False,
202
+ diagonal_visible=False,
203
+ marker=dict(
204
+ size=5,
205
+ color="rgb(85,168,104)",
206
+ line=dict(width=0.2, color="white"),
207
+ opacity=0.6,
208
+ ),
209
+ name="Historical",
210
+ showlegend=True,
211
+ )
212
+ )
213
+
214
+ sample = self.sample(n_samples=n_samples, conditioning=conditioning)
215
+
216
+ traces.append(
217
+ go.Splom(
218
+ dimensions=[
219
+ {"label": self.feature_names_in_[i], "values": sample[:, i]}
220
+ for i in range(n_assets)
221
+ ],
222
+ showupperhalf=False,
223
+ diagonal_visible=False,
224
+ marker=dict(
225
+ size=5,
226
+ color="rgb(221,132,82)",
227
+ line=dict(width=0.2, color="white"),
228
+ opacity=0.6,
229
+ ),
230
+ name="Generated",
231
+ showlegend=True,
232
+ )
233
+ )
234
+
235
+ if conditioning is not None:
236
+ # Improve readability
237
+ traces = traces[::-1]
238
+
239
+ fig = go.Figure(data=traces)
240
+ fig.update_layout(title=title)
241
+ return fig