skfolio 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/__init__.py +7 -7
- skfolio/cluster/__init__.py +2 -2
- skfolio/cluster/_hierarchical.py +2 -2
- skfolio/datasets/__init__.py +3 -3
- skfolio/datasets/_base.py +2 -2
- skfolio/datasets/data/__init__.py +1 -0
- skfolio/distance/__init__.py +4 -4
- skfolio/distance/_base.py +2 -2
- skfolio/distance/_distance.py +11 -10
- skfolio/distribution/__init__.py +56 -0
- skfolio/distribution/_base.py +203 -0
- skfolio/distribution/copula/__init__.py +35 -0
- skfolio/distribution/copula/_base.py +456 -0
- skfolio/distribution/copula/_clayton.py +539 -0
- skfolio/distribution/copula/_gaussian.py +407 -0
- skfolio/distribution/copula/_gumbel.py +560 -0
- skfolio/distribution/copula/_independent.py +196 -0
- skfolio/distribution/copula/_joe.py +609 -0
- skfolio/distribution/copula/_selection.py +111 -0
- skfolio/distribution/copula/_student_t.py +486 -0
- skfolio/distribution/copula/_utils.py +509 -0
- skfolio/distribution/multivariate/__init__.py +11 -0
- skfolio/distribution/multivariate/_base.py +241 -0
- skfolio/distribution/multivariate/_utils.py +632 -0
- skfolio/distribution/multivariate/_vine_copula.py +1254 -0
- skfolio/distribution/univariate/__init__.py +19 -0
- skfolio/distribution/univariate/_base.py +308 -0
- skfolio/distribution/univariate/_gaussian.py +136 -0
- skfolio/distribution/univariate/_johnson_su.py +152 -0
- skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
- skfolio/distribution/univariate/_selection.py +85 -0
- skfolio/distribution/univariate/_student_t.py +144 -0
- skfolio/exceptions.py +8 -8
- skfolio/measures/__init__.py +24 -24
- skfolio/measures/_enums.py +7 -7
- skfolio/measures/_measures.py +4 -7
- skfolio/metrics/__init__.py +2 -0
- skfolio/metrics/_scorer.py +4 -4
- skfolio/model_selection/__init__.py +4 -4
- skfolio/model_selection/_combinatorial.py +15 -12
- skfolio/model_selection/_validation.py +2 -2
- skfolio/model_selection/_walk_forward.py +3 -3
- skfolio/moments/__init__.py +11 -11
- skfolio/moments/covariance/__init__.py +6 -6
- skfolio/moments/covariance/_base.py +1 -1
- skfolio/moments/covariance/_denoise_covariance.py +3 -2
- skfolio/moments/covariance/_detone_covariance.py +3 -2
- skfolio/moments/covariance/_empirical_covariance.py +3 -2
- skfolio/moments/covariance/_ew_covariance.py +3 -2
- skfolio/moments/covariance/_gerber_covariance.py +3 -2
- skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
- skfolio/moments/covariance/_implied_covariance.py +3 -8
- skfolio/moments/covariance/_ledoit_wolf.py +1 -1
- skfolio/moments/covariance/_oas.py +1 -1
- skfolio/moments/covariance/_shrunk_covariance.py +1 -1
- skfolio/moments/expected_returns/__init__.py +2 -2
- skfolio/moments/expected_returns/_base.py +1 -1
- skfolio/moments/expected_returns/_empirical_mu.py +3 -2
- skfolio/moments/expected_returns/_equilibrium_mu.py +3 -2
- skfolio/moments/expected_returns/_ew_mu.py +3 -2
- skfolio/moments/expected_returns/_shrunk_mu.py +4 -3
- skfolio/optimization/__init__.py +12 -10
- skfolio/optimization/_base.py +2 -2
- skfolio/optimization/cluster/__init__.py +3 -1
- skfolio/optimization/cluster/_nco.py +10 -9
- skfolio/optimization/cluster/hierarchical/__init__.py +3 -1
- skfolio/optimization/cluster/hierarchical/_base.py +1 -2
- skfolio/optimization/cluster/hierarchical/_herc.py +4 -3
- skfolio/optimization/cluster/hierarchical/_hrp.py +4 -3
- skfolio/optimization/convex/__init__.py +5 -3
- skfolio/optimization/convex/_base.py +10 -9
- skfolio/optimization/convex/_distributionally_robust.py +8 -5
- skfolio/optimization/convex/_maximum_diversification.py +8 -6
- skfolio/optimization/convex/_mean_risk.py +10 -8
- skfolio/optimization/convex/_risk_budgeting.py +6 -4
- skfolio/optimization/ensemble/__init__.py +2 -0
- skfolio/optimization/ensemble/_base.py +2 -2
- skfolio/optimization/ensemble/_stacking.py +3 -3
- skfolio/optimization/naive/__init__.py +3 -1
- skfolio/optimization/naive/_naive.py +4 -3
- skfolio/population/__init__.py +2 -0
- skfolio/population/_population.py +34 -7
- skfolio/portfolio/__init__.py +1 -1
- skfolio/portfolio/_base.py +43 -8
- skfolio/portfolio/_multi_period_portfolio.py +3 -2
- skfolio/portfolio/_portfolio.py +5 -4
- skfolio/pre_selection/__init__.py +3 -1
- skfolio/pre_selection/_drop_correlated.py +3 -3
- skfolio/pre_selection/_select_complete.py +31 -30
- skfolio/pre_selection/_select_k_extremes.py +3 -3
- skfolio/pre_selection/_select_non_dominated.py +3 -3
- skfolio/pre_selection/_select_non_expiring.py +8 -6
- skfolio/preprocessing/__init__.py +2 -0
- skfolio/preprocessing/_returns.py +2 -2
- skfolio/prior/__init__.py +7 -3
- skfolio/prior/_base.py +2 -2
- skfolio/prior/_black_litterman.py +7 -4
- skfolio/prior/_empirical.py +5 -2
- skfolio/prior/_factor_model.py +10 -5
- skfolio/prior/_synthetic_data.py +239 -0
- skfolio/synthetic_returns/__init__.py +1 -0
- skfolio/typing.py +7 -7
- skfolio/uncertainty_set/__init__.py +7 -5
- skfolio/uncertainty_set/_base.py +5 -4
- skfolio/uncertainty_set/_bootstrap.py +1 -1
- skfolio/uncertainty_set/_empirical.py +1 -1
- skfolio/utils/__init__.py +1 -0
- skfolio/utils/bootstrap.py +2 -2
- skfolio/utils/equations.py +13 -10
- skfolio/utils/sorting.py +2 -2
- skfolio/utils/stats.py +15 -15
- skfolio/utils/tools.py +86 -22
- {skfolio-0.6.0.dist-info → skfolio-0.8.0.dist-info}/METADATA +122 -46
- skfolio-0.8.0.dist-info/RECORD +120 -0
- {skfolio-0.6.0.dist-info → skfolio-0.8.0.dist-info}/WHEEL +1 -1
- skfolio-0.6.0.dist-info/RECORD +0 -95
- {skfolio-0.6.0.dist-info → skfolio-0.8.0.dist-info/licenses}/LICENSE +0 -0
- {skfolio-0.6.0.dist-info → skfolio-0.8.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,241 @@
|
|
1
|
+
"""Base Multivariate Distribution Estimator."""
|
2
|
+
|
3
|
+
# Copyright (c) 2025
|
4
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
+
# Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
|
6
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
7
|
+
|
8
|
+
from abc import ABC, abstractmethod
|
9
|
+
|
10
|
+
import numpy as np
|
11
|
+
import numpy.typing as npt
|
12
|
+
import plotly.graph_objects as go
|
13
|
+
import sklearn.utils as sku
|
14
|
+
|
15
|
+
from skfolio.distribution._base import BaseDistribution
|
16
|
+
|
17
|
+
|
18
|
+
class BaseMultivariateDist(BaseDistribution, ABC):
|
19
|
+
"""Base class for Multivariate Distribution Estimators.
|
20
|
+
|
21
|
+
This abstract class defines the interface for multivariate distribution models.
|
22
|
+
|
23
|
+
Parameters
|
24
|
+
----------
|
25
|
+
random_state : int, RandomState instance or None, default=None
|
26
|
+
Seed or random state to ensure reproducibility.
|
27
|
+
"""
|
28
|
+
|
29
|
+
# Used for AIC and BIC
|
30
|
+
_n_params: int
|
31
|
+
|
32
|
+
def __init__(self, random_state: int | None = None):
|
33
|
+
super().__init__(random_state=random_state)
|
34
|
+
|
35
|
+
@property
|
36
|
+
@abstractmethod
|
37
|
+
def n_params(self) -> int:
|
38
|
+
"""Number of model parameters."""
|
39
|
+
pass
|
40
|
+
|
41
|
+
@property
|
42
|
+
@abstractmethod
|
43
|
+
def fitted_repr(self) -> str:
|
44
|
+
"""String representation of the fitted copula."""
|
45
|
+
pass
|
46
|
+
|
47
|
+
@abstractmethod
|
48
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "BaseMultivariateDist":
|
49
|
+
"""Fit the multivariate distribution model.
|
50
|
+
|
51
|
+
Parameters
|
52
|
+
----------
|
53
|
+
X : array-like of shape (n_observations, n_assets)
|
54
|
+
Price returns of the assets.
|
55
|
+
|
56
|
+
y : None
|
57
|
+
Ignored. Provided for compatibility with scikit-learn's API.
|
58
|
+
|
59
|
+
Returns
|
60
|
+
-------
|
61
|
+
self : BaseMultivariateDist
|
62
|
+
Returns the instance itself.
|
63
|
+
"""
|
64
|
+
pass
|
65
|
+
|
66
|
+
@abstractmethod
|
67
|
+
def score_samples(self, X: npt.ArrayLike) -> np.ndarray:
|
68
|
+
"""Compute the log-likelihood of each sample (log-pdf) under the distribution
|
69
|
+
model.
|
70
|
+
|
71
|
+
Parameters
|
72
|
+
----------
|
73
|
+
X : array-like of shape (n_observations, n_assets)
|
74
|
+
Price returns of the assets.
|
75
|
+
|
76
|
+
Returns
|
77
|
+
-------
|
78
|
+
density : ndarray of shape (n_observations,)
|
79
|
+
The log-likelihood of each sample under the fitted distribution model.
|
80
|
+
"""
|
81
|
+
pass
|
82
|
+
|
83
|
+
@abstractmethod
|
84
|
+
def sample(
|
85
|
+
self,
|
86
|
+
n_samples: int = 1,
|
87
|
+
conditioning: dict[int | str : float | tuple[float, float] | npt.ArrayLike]
|
88
|
+
| None = None,
|
89
|
+
) -> np.ndarray:
|
90
|
+
"""Generate random samples from the distribution model.
|
91
|
+
|
92
|
+
Parameters
|
93
|
+
----------
|
94
|
+
n_samples : int, default=1
|
95
|
+
Number of samples to generate.
|
96
|
+
|
97
|
+
conditioning : dict[int | str, float | tuple[float, float] | array-like], optional
|
98
|
+
A dictionary specifying conditioning information for one or more assets.
|
99
|
+
The dictionary keys are asset indices or names, and the values define how
|
100
|
+
the samples are conditioned for that asset. Three types of conditioning
|
101
|
+
values are supported:
|
102
|
+
|
103
|
+
1. **Fixed value (float):**
|
104
|
+
If a float is provided, all samples are generated under the condition
|
105
|
+
that the asset takes exactly that value.
|
106
|
+
|
107
|
+
2. **Bounds (tuple of two floats):**
|
108
|
+
If a tuple `(min_value, max_value)` is provided, samples are generated
|
109
|
+
under the condition that the asset's value falls within the specified
|
110
|
+
bounds. Use `-np.Inf` for no lower bound or `np.Inf` for no upper bound.
|
111
|
+
|
112
|
+
3. **Array-like (1D array):**
|
113
|
+
If an array-like of length `n_samples` is provided, each sample is
|
114
|
+
conditioned on the corresponding value in the array for that asset.
|
115
|
+
|
116
|
+
Returns
|
117
|
+
-------
|
118
|
+
X : array-like of shape (n_samples, n_assets)
|
119
|
+
A two-dimensional array where each row is a multivariate observation sampled
|
120
|
+
from the fitted distribution model.
|
121
|
+
"""
|
122
|
+
pass
|
123
|
+
|
124
|
+
def plot_scatter_matrix(
|
125
|
+
self,
|
126
|
+
X: npt.ArrayLike | None = None,
|
127
|
+
conditioning: dict[int | str : float | tuple[float, float] | npt.ArrayLike]
|
128
|
+
| None = None,
|
129
|
+
n_samples: int = 1000,
|
130
|
+
title: str = "Scatter Matrix",
|
131
|
+
) -> go.Figure:
|
132
|
+
"""
|
133
|
+
Plot the vine copula scatter matrix by generating samples from the fitted
|
134
|
+
distribution model and comparing it versus the empirical distribution of `X` if
|
135
|
+
provided.
|
136
|
+
|
137
|
+
Parameters
|
138
|
+
----------
|
139
|
+
X : array-like of shape (n_samples, n_assets), optional
|
140
|
+
If provided, it is used to plot the empirical scatter matrix for
|
141
|
+
comparison versus the vine copula scatter matrix.
|
142
|
+
|
143
|
+
conditioning : dict[int | str, float | tuple[float, float] | array-like], optional
|
144
|
+
A dictionary specifying conditioning information for one or more assets.
|
145
|
+
The dictionary keys are asset indices or names, and the values define how
|
146
|
+
the samples are conditioned for that asset. Three types of conditioning
|
147
|
+
values are supported:
|
148
|
+
|
149
|
+
1. **Fixed value (float):**
|
150
|
+
If a float is provided, all samples are generated under the condition
|
151
|
+
that the asset takes exactly that value.
|
152
|
+
|
153
|
+
2. **Bounds (tuple of two floats):**
|
154
|
+
If a tuple `(min_value, max_value)` is provided, samples are generated
|
155
|
+
under the condition that the asset's value falls within the specified
|
156
|
+
bounds. Use `-np.Inf` for no lower bound or `np.Inf` for no upper bound.
|
157
|
+
|
158
|
+
3. **Array-like (1D array):**
|
159
|
+
If an array-like of length `n_samples` is provided, each sample is
|
160
|
+
conditioned on the corresponding value in the array for that asset.
|
161
|
+
|
162
|
+
n_samples : int, default=1000
|
163
|
+
Number of samples used to control the density and readability of the plot.
|
164
|
+
If `X` is provided and contains more than `n_samples` rows, a random
|
165
|
+
subsample of size `n_samples` is selected. Conversely, if `X` has fewer
|
166
|
+
rows than `n_samples`, the value is adjusted to match the number of rows in
|
167
|
+
`X` to ensure balanced visualization.
|
168
|
+
|
169
|
+
title : str, default="Scatter Matrix"
|
170
|
+
The title for the plot.
|
171
|
+
|
172
|
+
Returns
|
173
|
+
-------
|
174
|
+
fig : plotly.graph_objects.Figure
|
175
|
+
A figure object containing the scatter matrix.
|
176
|
+
"""
|
177
|
+
traces = []
|
178
|
+
n_assets = self.n_features_in_
|
179
|
+
if X is not None:
|
180
|
+
X = np.asarray(X)
|
181
|
+
if X.ndim != 2:
|
182
|
+
raise ValueError("X should be an 2D array")
|
183
|
+
if X.shape[1] != n_assets:
|
184
|
+
raise ValueError(f"X should have {n_assets} columns")
|
185
|
+
if X.shape[0] > n_samples:
|
186
|
+
# We subsample for improved graph readability
|
187
|
+
rng = sku.check_random_state(self.random_state)
|
188
|
+
indices = rng.choice(
|
189
|
+
np.arange(X.shape[0]), size=n_samples, replace=False
|
190
|
+
)
|
191
|
+
X = X[indices, :]
|
192
|
+
else:
|
193
|
+
# We want same proportion as X to have a balanced graph
|
194
|
+
n_samples = X.shape[0]
|
195
|
+
traces.append(
|
196
|
+
go.Splom(
|
197
|
+
dimensions=[
|
198
|
+
{"label": self.feature_names_in_[i], "values": X[:, i]}
|
199
|
+
for i in range(n_assets)
|
200
|
+
],
|
201
|
+
showupperhalf=False,
|
202
|
+
diagonal_visible=False,
|
203
|
+
marker=dict(
|
204
|
+
size=5,
|
205
|
+
color="rgb(85,168,104)",
|
206
|
+
line=dict(width=0.2, color="white"),
|
207
|
+
opacity=0.6,
|
208
|
+
),
|
209
|
+
name="Historical",
|
210
|
+
showlegend=True,
|
211
|
+
)
|
212
|
+
)
|
213
|
+
|
214
|
+
sample = self.sample(n_samples=n_samples, conditioning=conditioning)
|
215
|
+
|
216
|
+
traces.append(
|
217
|
+
go.Splom(
|
218
|
+
dimensions=[
|
219
|
+
{"label": self.feature_names_in_[i], "values": sample[:, i]}
|
220
|
+
for i in range(n_assets)
|
221
|
+
],
|
222
|
+
showupperhalf=False,
|
223
|
+
diagonal_visible=False,
|
224
|
+
marker=dict(
|
225
|
+
size=5,
|
226
|
+
color="rgb(221,132,82)",
|
227
|
+
line=dict(width=0.2, color="white"),
|
228
|
+
opacity=0.6,
|
229
|
+
),
|
230
|
+
name="Generated",
|
231
|
+
showlegend=True,
|
232
|
+
)
|
233
|
+
)
|
234
|
+
|
235
|
+
if conditioning is not None:
|
236
|
+
# Improve readability
|
237
|
+
traces = traces[::-1]
|
238
|
+
|
239
|
+
fig = go.Figure(data=traces)
|
240
|
+
fig.update_layout(title=title)
|
241
|
+
return fig
|