skfolio 0.7.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. skfolio/__init__.py +2 -2
  2. skfolio/cluster/__init__.py +1 -1
  3. skfolio/cluster/_hierarchical.py +1 -1
  4. skfolio/datasets/__init__.py +1 -1
  5. skfolio/datasets/_base.py +2 -2
  6. skfolio/datasets/data/__init__.py +1 -0
  7. skfolio/distance/__init__.py +1 -1
  8. skfolio/distance/_base.py +2 -2
  9. skfolio/distance/_distance.py +4 -4
  10. skfolio/distribution/__init__.py +56 -0
  11. skfolio/distribution/_base.py +203 -0
  12. skfolio/distribution/copula/__init__.py +35 -0
  13. skfolio/distribution/copula/_base.py +456 -0
  14. skfolio/distribution/copula/_clayton.py +539 -0
  15. skfolio/distribution/copula/_gaussian.py +407 -0
  16. skfolio/distribution/copula/_gumbel.py +560 -0
  17. skfolio/distribution/copula/_independent.py +196 -0
  18. skfolio/distribution/copula/_joe.py +609 -0
  19. skfolio/distribution/copula/_selection.py +111 -0
  20. skfolio/distribution/copula/_student_t.py +486 -0
  21. skfolio/distribution/copula/_utils.py +509 -0
  22. skfolio/distribution/multivariate/__init__.py +11 -0
  23. skfolio/distribution/multivariate/_base.py +241 -0
  24. skfolio/distribution/multivariate/_utils.py +632 -0
  25. skfolio/distribution/multivariate/_vine_copula.py +1254 -0
  26. skfolio/distribution/univariate/__init__.py +19 -0
  27. skfolio/distribution/univariate/_base.py +308 -0
  28. skfolio/distribution/univariate/_gaussian.py +136 -0
  29. skfolio/distribution/univariate/_johnson_su.py +152 -0
  30. skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
  31. skfolio/distribution/univariate/_selection.py +85 -0
  32. skfolio/distribution/univariate/_student_t.py +144 -0
  33. skfolio/exceptions.py +6 -6
  34. skfolio/measures/__init__.py +1 -1
  35. skfolio/measures/_enums.py +7 -7
  36. skfolio/measures/_measures.py +4 -7
  37. skfolio/metrics/__init__.py +2 -0
  38. skfolio/metrics/_scorer.py +4 -4
  39. skfolio/model_selection/__init__.py +2 -2
  40. skfolio/model_selection/_combinatorial.py +15 -12
  41. skfolio/model_selection/_validation.py +2 -2
  42. skfolio/model_selection/_walk_forward.py +3 -3
  43. skfolio/moments/covariance/_base.py +1 -1
  44. skfolio/moments/covariance/_denoise_covariance.py +1 -1
  45. skfolio/moments/covariance/_detone_covariance.py +1 -1
  46. skfolio/moments/covariance/_empirical_covariance.py +1 -1
  47. skfolio/moments/covariance/_ew_covariance.py +1 -1
  48. skfolio/moments/covariance/_gerber_covariance.py +1 -1
  49. skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
  50. skfolio/moments/covariance/_implied_covariance.py +2 -7
  51. skfolio/moments/covariance/_ledoit_wolf.py +1 -1
  52. skfolio/moments/covariance/_oas.py +1 -1
  53. skfolio/moments/covariance/_shrunk_covariance.py +1 -1
  54. skfolio/moments/expected_returns/_base.py +1 -1
  55. skfolio/moments/expected_returns/_empirical_mu.py +1 -1
  56. skfolio/moments/expected_returns/_equilibrium_mu.py +1 -1
  57. skfolio/moments/expected_returns/_ew_mu.py +1 -1
  58. skfolio/moments/expected_returns/_shrunk_mu.py +2 -2
  59. skfolio/optimization/__init__.py +2 -0
  60. skfolio/optimization/_base.py +2 -2
  61. skfolio/optimization/cluster/__init__.py +2 -0
  62. skfolio/optimization/cluster/_nco.py +7 -7
  63. skfolio/optimization/cluster/hierarchical/__init__.py +2 -0
  64. skfolio/optimization/cluster/hierarchical/_base.py +1 -2
  65. skfolio/optimization/cluster/hierarchical/_herc.py +2 -2
  66. skfolio/optimization/cluster/hierarchical/_hrp.py +2 -2
  67. skfolio/optimization/convex/__init__.py +2 -0
  68. skfolio/optimization/convex/_base.py +8 -8
  69. skfolio/optimization/convex/_distributionally_robust.py +4 -4
  70. skfolio/optimization/convex/_maximum_diversification.py +5 -5
  71. skfolio/optimization/convex/_mean_risk.py +5 -6
  72. skfolio/optimization/convex/_risk_budgeting.py +3 -3
  73. skfolio/optimization/ensemble/__init__.py +2 -0
  74. skfolio/optimization/ensemble/_base.py +2 -2
  75. skfolio/optimization/ensemble/_stacking.py +1 -1
  76. skfolio/optimization/naive/__init__.py +2 -0
  77. skfolio/optimization/naive/_naive.py +1 -1
  78. skfolio/population/__init__.py +2 -0
  79. skfolio/population/_population.py +34 -7
  80. skfolio/portfolio/_base.py +42 -8
  81. skfolio/portfolio/_multi_period_portfolio.py +3 -2
  82. skfolio/portfolio/_portfolio.py +4 -4
  83. skfolio/pre_selection/__init__.py +2 -0
  84. skfolio/pre_selection/_drop_correlated.py +2 -2
  85. skfolio/pre_selection/_select_complete.py +25 -26
  86. skfolio/pre_selection/_select_k_extremes.py +2 -2
  87. skfolio/pre_selection/_select_non_dominated.py +2 -2
  88. skfolio/pre_selection/_select_non_expiring.py +2 -2
  89. skfolio/preprocessing/__init__.py +2 -0
  90. skfolio/preprocessing/_returns.py +2 -2
  91. skfolio/prior/__init__.py +4 -0
  92. skfolio/prior/_base.py +2 -2
  93. skfolio/prior/_black_litterman.py +5 -3
  94. skfolio/prior/_empirical.py +3 -1
  95. skfolio/prior/_factor_model.py +8 -4
  96. skfolio/prior/_synthetic_data.py +239 -0
  97. skfolio/synthetic_returns/__init__.py +1 -0
  98. skfolio/typing.py +1 -1
  99. skfolio/uncertainty_set/__init__.py +2 -0
  100. skfolio/uncertainty_set/_base.py +2 -2
  101. skfolio/uncertainty_set/_bootstrap.py +1 -1
  102. skfolio/uncertainty_set/_empirical.py +1 -1
  103. skfolio/utils/__init__.py +1 -0
  104. skfolio/utils/bootstrap.py +2 -2
  105. skfolio/utils/equations.py +13 -10
  106. skfolio/utils/sorting.py +2 -2
  107. skfolio/utils/stats.py +7 -7
  108. skfolio/utils/tools.py +76 -12
  109. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/METADATA +99 -24
  110. skfolio-0.8.0.dist-info/RECORD +120 -0
  111. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/WHEEL +1 -1
  112. skfolio-0.7.0.dist-info/RECORD +0 -95
  113. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info/licenses}/LICENSE +0 -0
  114. {skfolio-0.7.0.dist-info → skfolio-0.8.0.dist-info}/top_level.txt +0 -0
skfolio/__init__.py CHANGED
@@ -1,7 +1,7 @@
1
- """skfolio package"""
1
+ """skfolio package."""
2
2
 
3
3
  # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
- # License: BSD 3 clause
4
+ # SPDX-License-Identifier: BSD-3-Clause
5
5
  import importlib.metadata
6
6
 
7
7
  from skfolio.measures import (
@@ -1,7 +1,7 @@
1
1
  """Hierarchical Clustering estimators."""
2
2
 
3
3
  # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
- # License: BSD 3 clause
4
+ # SPDX-License-Identifier: BSD-3-Clause
5
5
 
6
6
  from skfolio.cluster._hierarchical import HierarchicalClustering, LinkageMethod
7
7
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Copyright (c) 2023
4
4
  # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
- # License: BSD 3 clause
5
+ # SPDX-License-Identifier: BSD-3-Clause
6
6
 
7
7
  from enum import auto
8
8
 
@@ -1,7 +1,7 @@
1
1
  """Datasets module."""
2
2
 
3
3
  # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
- # License: BSD 3 clause
4
+ # SPDX-License-Identifier: BSD-3-Clause
5
5
 
6
6
  from skfolio.datasets._base import (
7
7
  load_factors_dataset,
skfolio/datasets/_base.py CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  # Copyright (c) 2023
4
4
  # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
- # License: BSD 3 clause
5
+ # SPDX-License-Identifier: BSD-3-Clause
6
6
  # Implementation derived from:
7
7
  # scikit-portfolio, Copyright (c) 2022, Carlo Nicolini, Licensed under MIT Licence.
8
8
  # scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
@@ -74,7 +74,7 @@ def load_gzip_compressed_csv_data(
74
74
  encoding="utf-8",
75
75
  datetime_index: bool = True,
76
76
  ) -> pd.DataFrame:
77
- """Loads gzip-compressed csv files with `importlib.resources`.
77
+ """Load gzip-compressed csv files with `importlib.resources`.
78
78
 
79
79
  1) Open resource file with `importlib.resources.open_binary`
80
80
  2) Decompress csv file with `gzip.open`
@@ -0,0 +1 @@
1
+ """Dataset Data module."""
@@ -1,7 +1,7 @@
1
1
  """Distance Estimators."""
2
2
 
3
3
  # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
- # License: BSD 3 clause
4
+ # SPDX-License-Identifier: BSD-3-Clause
5
5
 
6
6
  from skfolio.distance._base import BaseDistance
7
7
  from skfolio.distance._distance import (
skfolio/distance/_base.py CHANGED
@@ -1,8 +1,8 @@
1
- """Base Distance Estimators"""
1
+ """Base Distance Estimators."""
2
2
 
3
3
  # Copyright (c) 2023
4
4
  # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
- # License: BSD 3 clause
5
+ # SPDX-License-Identifier: BSD-3-Clause
6
6
 
7
7
  from abc import ABC, abstractmethod
8
8
 
@@ -1,8 +1,8 @@
1
- """Distance Estimators"""
1
+ """Distance Estimators."""
2
2
 
3
3
  # Copyright (c) 2023
4
4
  # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
- # License: BSD 3 clause
5
+ # SPDX-License-Identifier: BSD-3-Clause
6
6
 
7
7
  import numpy as np
8
8
  import numpy.typing as npt
@@ -215,7 +215,7 @@ class SpearmanDistance(BaseDistance):
215
215
  self.power = power
216
216
 
217
217
  def fit(self, X: npt.ArrayLike, y=None) -> "SpearmanDistance":
218
- """Fit the Spearman Kendall estimator.
218
+ """Fit the Spearman estimator.
219
219
 
220
220
  Parameters
221
221
  ----------
@@ -384,7 +384,7 @@ class DistanceCorrelation(BaseDistance):
384
384
 
385
385
  @staticmethod
386
386
  def _dcorr(x: np.ndarray, y: np.ndarray):
387
- """Calculate the distance correlation between two variables"""
387
+ """Calculate the distance correlation between two variables."""
388
388
  x = scd.squareform(scd.pdist(x.reshape(-1, 1)))
389
389
  y = scd.squareform(scd.pdist(y.reshape(-1, 1)))
390
390
  x = x - x.mean(axis=0)[np.newaxis, :] - x.mean(axis=1)[:, np.newaxis] + x.mean()
@@ -0,0 +1,56 @@
1
+ """Distribution module."""
2
+
3
+ from skfolio.distribution._base import BaseDistribution, SelectionCriterion
4
+ from skfolio.distribution.copula import (
5
+ BaseBivariateCopula,
6
+ ClaytonCopula,
7
+ CopulaRotation,
8
+ GaussianCopula,
9
+ GumbelCopula,
10
+ IndependentCopula,
11
+ JoeCopula,
12
+ StudentTCopula,
13
+ compute_pseudo_observations,
14
+ empirical_tail_concentration,
15
+ plot_tail_concentration,
16
+ select_bivariate_copula,
17
+ )
18
+ from skfolio.distribution.multivariate import (
19
+ BaseMultivariateDist,
20
+ DependenceMethod,
21
+ VineCopula,
22
+ )
23
+ from skfolio.distribution.univariate import (
24
+ BaseUnivariateDist,
25
+ Gaussian,
26
+ JohnsonSU,
27
+ NormalInverseGaussian,
28
+ StudentT,
29
+ select_univariate_dist,
30
+ )
31
+
32
+ __all__ = [
33
+ "BaseBivariateCopula",
34
+ "BaseDistribution",
35
+ "BaseMultivariateDist",
36
+ "BaseUnivariateDist",
37
+ "ClaytonCopula",
38
+ "CopulaRotation",
39
+ "DependenceMethod",
40
+ "Gaussian",
41
+ "GaussianCopula",
42
+ "GumbelCopula",
43
+ "IndependentCopula",
44
+ "JoeCopula",
45
+ "JohnsonSU",
46
+ "NormalInverseGaussian",
47
+ "SelectionCriterion",
48
+ "StudentT",
49
+ "StudentTCopula",
50
+ "VineCopula",
51
+ "compute_pseudo_observations",
52
+ "empirical_tail_concentration",
53
+ "plot_tail_concentration",
54
+ "select_bivariate_copula",
55
+ "select_univariate_dist",
56
+ ]
@@ -0,0 +1,203 @@
1
+ """Base Distribution Estimator."""
2
+
3
+ # Copyright (c) 2025
4
+ # Authors: The skfolio developers
5
+ # Credits: Matteo Manzi, Vincent Maladière, Carlo Nicolini
6
+ # SPDX-License-Identifier: BSD-3-Clause
7
+
8
+ from abc import ABC, abstractmethod
9
+ from enum import auto
10
+
11
+ import numpy as np
12
+ import numpy.typing as npt
13
+ import sklearn.base as skb
14
+
15
+ from skfolio.utils.tools import AutoEnum
16
+
17
+
18
+ class SelectionCriterion(AutoEnum):
19
+ """Enum representing the selection criteria.
20
+
21
+ Attributes
22
+ ----------
23
+ AIC : str
24
+ Akaike Information Criterion (AIC)
25
+
26
+ BIC : str
27
+ Bayesian Information Criterion (BIC)
28
+ """
29
+
30
+ AIC = auto()
31
+ BIC = auto()
32
+
33
+
34
+ class BaseDistribution(skb.BaseEstimator, ABC):
35
+ """Base Distribution Estimator.
36
+
37
+ This abstract class serves as a foundation for distribution models in skfolio.
38
+
39
+ random_state : int, RandomState instance or None, default=None
40
+ Seed or random state to ensure reproducibility.
41
+ """
42
+
43
+ def __init__(self, random_state: int | None = None):
44
+ self.random_state = random_state
45
+
46
+ @property
47
+ @abstractmethod
48
+ def n_params(self) -> int:
49
+ """Number of model parameters."""
50
+ pass
51
+
52
+ @property
53
+ @abstractmethod
54
+ def fitted_repr(self) -> str:
55
+ """String representation of the fitted model."""
56
+ pass
57
+
58
+ @abstractmethod
59
+ def fit(self, X: npt.ArrayLike, y=None) -> "BaseDistribution":
60
+ """Fit the univariate distribution model.
61
+
62
+ Parameters
63
+ ----------
64
+ X : array-like of shape (n_observations, n_features)
65
+ The input data.
66
+
67
+ y : None
68
+ Ignored. Provided for compatibility with scikit-learn's API.
69
+
70
+ Returns
71
+ -------
72
+ self : BaseDistribution
73
+ Returns the instance itself.
74
+ """
75
+ pass
76
+
77
+ @abstractmethod
78
+ def score_samples(self, X: npt.ArrayLike) -> np.ndarray:
79
+ """Compute the log-likelihood of each sample (log-pdf) under the model.
80
+
81
+ Parameters
82
+ ----------
83
+ X : array-like of shape (n_observations, n_features)
84
+ The input data.
85
+
86
+ Returns
87
+ -------
88
+ density : ndarray of shape (n_observations,)
89
+ Log-likelihood values for each observation in X.
90
+ """
91
+ pass
92
+
93
+ def sample(self, n_samples: int = 1):
94
+ """Generate random samples from the fitted model.
95
+
96
+ Parameters
97
+ ----------
98
+ n_samples : int, default=1
99
+ Number of samples to generate.
100
+
101
+ Returns
102
+ -------
103
+ X : array-like of shape (n_samples, 1)
104
+ List of samples.
105
+ """
106
+ pass
107
+
108
+ def score(self, X: npt.ArrayLike, y=None):
109
+ """Compute the total log-likelihood under the model.
110
+
111
+ Parameters
112
+ ----------
113
+ X : array-like of shape (n_observations, n_features)
114
+ An array of data points for which the total log-likelihood is computed.
115
+
116
+ y : None
117
+ Ignored. Provided for compatibility with scikit-learn's API.
118
+
119
+ Returns
120
+ -------
121
+ logprob : float
122
+ The total log-likelihood (sum of log-pdf values).
123
+ """
124
+ return np.sum(self.score_samples(X))
125
+
126
+ def aic(self, X: npt.ArrayLike) -> float:
127
+ r"""Compute the Akaike Information Criterion (AIC) for the model given data X.
128
+
129
+ The AIC is defined as:
130
+
131
+ .. math::
132
+ \mathrm{AIC} = -2 \, \log L \;+\; 2 k,
133
+
134
+ where
135
+
136
+ - :math:`\log L` is the total log-likelihood
137
+ - :math:`k` is the number of parameters in the model
138
+
139
+ A lower AIC value indicates a better trade-off between model fit and complexity.
140
+
141
+ Parameters
142
+ ----------
143
+ X : array-like of shape (n_observations, n_features)
144
+ The input data on which to compute the AIC.
145
+
146
+ Notes
147
+ -----
148
+ In practice, both AIC and BIC measure the trade-off between model fit and
149
+ complexity, but BIC tends to prefer simpler models for large :math:`n`
150
+ because of the :math:`\ln(n)` term.
151
+
152
+ Returns
153
+ -------
154
+ aic : float
155
+ The AIC of the fitted model on the given data.
156
+
157
+ References
158
+ ----------
159
+ .. [1] "A new look at the statistical model identification", Akaike (1974).
160
+ """
161
+ log_likelihood = self.score(X)
162
+ return 2 * (self.n_params - log_likelihood)
163
+
164
+ def bic(self, X: npt.ArrayLike) -> float:
165
+ r"""Compute the Bayesian Information Criterion (BIC) for the model given data X.
166
+
167
+ The BIC is defined as:
168
+
169
+ .. math::
170
+ \mathrm{BIC} = -2 \, \log L \;+\; k \,\ln(n),
171
+
172
+ where
173
+
174
+ - :math:`\log L` is the (maximized) total log-likelihood
175
+ - :math:`k` is the number of parameters in the model
176
+ - :math:`n` is the number of observations
177
+
178
+ A lower BIC value suggests a better fit while imposing a stronger penalty
179
+ for model complexity than the AIC.
180
+
181
+ Parameters
182
+ ----------
183
+ X : array-like of shape (n_observations, n_features)
184
+ The input data on which to compute the BIC.
185
+
186
+ Returns
187
+ -------
188
+ bic : float
189
+ The BIC of the fitted model on the given data.
190
+
191
+ Notes
192
+ -----
193
+ In practice, both AIC and BIC measure the trade-off between model fit and
194
+ complexity, but BIC tends to prefer simpler models for large :math:`n`
195
+ because of the :math:`\ln(n)` term.
196
+
197
+ References
198
+ ----------
199
+ .. [1] "Estimating the dimension of a model", Schwarz, G. (1978).
200
+ """
201
+ log_likelihood = self.score(X)
202
+ n = X.shape[0]
203
+ return -2 * log_likelihood + self.n_params * np.log(n)
@@ -0,0 +1,35 @@
1
+ """Copula module."""
2
+
3
+ from skfolio.distribution.copula._base import (
4
+ UNIFORM_MARGINAL_EPSILON,
5
+ BaseBivariateCopula,
6
+ )
7
+ from skfolio.distribution.copula._clayton import ClaytonCopula
8
+ from skfolio.distribution.copula._gaussian import GaussianCopula
9
+ from skfolio.distribution.copula._gumbel import GumbelCopula
10
+ from skfolio.distribution.copula._independent import IndependentCopula
11
+ from skfolio.distribution.copula._joe import JoeCopula
12
+ from skfolio.distribution.copula._selection import select_bivariate_copula
13
+ from skfolio.distribution.copula._student_t import StudentTCopula
14
+ from skfolio.distribution.copula._utils import (
15
+ CopulaRotation,
16
+ compute_pseudo_observations,
17
+ empirical_tail_concentration,
18
+ plot_tail_concentration,
19
+ )
20
+
21
+ __all__ = [
22
+ "UNIFORM_MARGINAL_EPSILON",
23
+ "BaseBivariateCopula",
24
+ "ClaytonCopula",
25
+ "CopulaRotation",
26
+ "GaussianCopula",
27
+ "GumbelCopula",
28
+ "IndependentCopula",
29
+ "JoeCopula",
30
+ "StudentTCopula",
31
+ "compute_pseudo_observations",
32
+ "empirical_tail_concentration",
33
+ "plot_tail_concentration",
34
+ "select_bivariate_copula",
35
+ ]