skfolio 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skfolio/__init__.py +29 -0
  2. skfolio/cluster/__init__.py +8 -0
  3. skfolio/cluster/_hierarchical.py +387 -0
  4. skfolio/datasets/__init__.py +20 -0
  5. skfolio/datasets/_base.py +389 -0
  6. skfolio/datasets/data/__init__.py +0 -0
  7. skfolio/datasets/data/factors_dataset.csv.gz +0 -0
  8. skfolio/datasets/data/sp500_dataset.csv.gz +0 -0
  9. skfolio/datasets/data/sp500_index.csv.gz +0 -0
  10. skfolio/distance/__init__.py +26 -0
  11. skfolio/distance/_base.py +55 -0
  12. skfolio/distance/_distance.py +574 -0
  13. skfolio/exceptions.py +30 -0
  14. skfolio/measures/__init__.py +76 -0
  15. skfolio/measures/_enums.py +355 -0
  16. skfolio/measures/_measures.py +607 -0
  17. skfolio/metrics/__init__.py +3 -0
  18. skfolio/metrics/_scorer.py +121 -0
  19. skfolio/model_selection/__init__.py +18 -0
  20. skfolio/model_selection/_combinatorial.py +407 -0
  21. skfolio/model_selection/_validation.py +194 -0
  22. skfolio/model_selection/_walk_forward.py +221 -0
  23. skfolio/moments/__init__.py +41 -0
  24. skfolio/moments/covariance/__init__.py +29 -0
  25. skfolio/moments/covariance/_base.py +101 -0
  26. skfolio/moments/covariance/_covariance.py +1108 -0
  27. skfolio/moments/expected_returns/__init__.py +21 -0
  28. skfolio/moments/expected_returns/_base.py +31 -0
  29. skfolio/moments/expected_returns/_expected_returns.py +415 -0
  30. skfolio/optimization/__init__.py +36 -0
  31. skfolio/optimization/_base.py +147 -0
  32. skfolio/optimization/cluster/__init__.py +13 -0
  33. skfolio/optimization/cluster/_nco.py +348 -0
  34. skfolio/optimization/cluster/hierarchical/__init__.py +13 -0
  35. skfolio/optimization/cluster/hierarchical/_base.py +440 -0
  36. skfolio/optimization/cluster/hierarchical/_herc.py +406 -0
  37. skfolio/optimization/cluster/hierarchical/_hrp.py +368 -0
  38. skfolio/optimization/convex/__init__.py +16 -0
  39. skfolio/optimization/convex/_base.py +1944 -0
  40. skfolio/optimization/convex/_distributionally_robust.py +392 -0
  41. skfolio/optimization/convex/_maximum_diversification.py +417 -0
  42. skfolio/optimization/convex/_mean_risk.py +974 -0
  43. skfolio/optimization/convex/_risk_budgeting.py +560 -0
  44. skfolio/optimization/ensemble/__init__.py +6 -0
  45. skfolio/optimization/ensemble/_base.py +87 -0
  46. skfolio/optimization/ensemble/_stacking.py +326 -0
  47. skfolio/optimization/naive/__init__.py +3 -0
  48. skfolio/optimization/naive/_naive.py +173 -0
  49. skfolio/population/__init__.py +3 -0
  50. skfolio/population/_population.py +883 -0
  51. skfolio/portfolio/__init__.py +13 -0
  52. skfolio/portfolio/_base.py +1096 -0
  53. skfolio/portfolio/_multi_period_portfolio.py +610 -0
  54. skfolio/portfolio/_portfolio.py +842 -0
  55. skfolio/pre_selection/__init__.py +7 -0
  56. skfolio/pre_selection/_pre_selection.py +342 -0
  57. skfolio/preprocessing/__init__.py +3 -0
  58. skfolio/preprocessing/_returns.py +114 -0
  59. skfolio/prior/__init__.py +18 -0
  60. skfolio/prior/_base.py +63 -0
  61. skfolio/prior/_black_litterman.py +238 -0
  62. skfolio/prior/_empirical.py +163 -0
  63. skfolio/prior/_factor_model.py +268 -0
  64. skfolio/typing.py +50 -0
  65. skfolio/uncertainty_set/__init__.py +23 -0
  66. skfolio/uncertainty_set/_base.py +108 -0
  67. skfolio/uncertainty_set/_bootstrap.py +281 -0
  68. skfolio/uncertainty_set/_empirical.py +237 -0
  69. skfolio/utils/__init__.py +0 -0
  70. skfolio/utils/bootstrap.py +115 -0
  71. skfolio/utils/equations.py +350 -0
  72. skfolio/utils/sorting.py +117 -0
  73. skfolio/utils/stats.py +466 -0
  74. skfolio/utils/tools.py +567 -0
  75. skfolio-0.0.1.dist-info/LICENSE +29 -0
  76. skfolio-0.0.1.dist-info/METADATA +568 -0
  77. skfolio-0.0.1.dist-info/RECORD +79 -0
  78. skfolio-0.0.1.dist-info/WHEEL +5 -0
  79. skfolio-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,440 @@
1
+ """Base Hierarchical Clustering Optimization estimator."""
2
+
3
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
+ # License: BSD 3 clause
5
+
6
+ from abc import ABC, abstractmethod
7
+
8
+ import numpy as np
9
+ import numpy.typing as npt
10
+ import pandas as pd
11
+
12
+ import skfolio.typing as skt
13
+ from skfolio.cluster import HierarchicalClustering
14
+ from skfolio.distance import BaseDistance
15
+ from skfolio.measures import ExtraRiskMeasure, RiskMeasure
16
+ from skfolio.optimization._base import BaseOptimization
17
+ from skfolio.population import Population
18
+ from skfolio.portfolio import Portfolio
19
+ from skfolio.prior import BasePrior, PriorModel
20
+ from skfolio.utils.tools import input_to_array
21
+
22
+
23
+ class BaseHierarchicalOptimization(BaseOptimization, ABC):
24
+ r"""Base Hierarchical Clustering Optimization estimator.
25
+
26
+ Parameters
27
+ ----------
28
+ risk_measure : RiskMeasure or ExtraRiskMeasure, default=RiskMeasure.VARIANCE
29
+ :class:`~skfolio.meta.RiskMeasure` or :class:`~skfolio.meta.ExtraRiskMeasure`
30
+ of the optimization.
31
+ Can be any of:
32
+
33
+ * MEAN_ABSOLUTE_DEVIATION
34
+ * FIRST_LOWER_PARTIAL_MOMENT
35
+ * VARIANCE
36
+ * SEMI_VARIANCE
37
+ * CVAR
38
+ * EVAR
39
+ * WORST_REALIZATION
40
+ * CDAR
41
+ * MAX_DRAWDOWN
42
+ * AVERAGE_DRAWDOWN
43
+ * EDAR
44
+ * ULCER_INDEX
45
+ * GINI_MEAN_DIFFERENCE_RATIO
46
+ * VALUE_AT_RISK
47
+ * DRAWDOWN_AT_RISK
48
+ * ENTROPIC_RISK_MEASURE
49
+ * FOURTH_CENTRAL_MOMENT
50
+ * FOURTH_LOWER_PARTIAL_MOMENT
51
+ * SKEW
52
+ * KURTOSIS
53
+
54
+ The default is `RiskMeasure.VARIANCE`.
55
+
56
+ prior_estimator : BasePrior, optional
57
+ :ref:`Prior estimator <prior>`.
58
+ The prior estimator is used to estimate the :class:`~skfolio.prior.PriorModel`
59
+ containing the estimation of assets expected returns, covariance matrix and
60
+ returns. The moments and returns estimations are used for the risk computation
61
+ and the returns estimation are used by the distance matrix estimator.
62
+ The default (`None`) is to use :class:`~skfolio.prior.EmpiricalPrior`.
63
+
64
+ distance_estimator : BaseDistance, optional
65
+ :ref:`Distance estimator <distance>`.
66
+ The distance estimator is used to estimate the codependence and the distance
67
+ matrix needed for the computation of the linkage matrix.
68
+ The default (`None`) is to use :class:`~skfolio.distance.PearsonDistance`.
69
+
70
+ hierarchical_clustering_estimator : HierarchicalClustering, optional
71
+ :ref:`Hierarchical Clustering estimator <hierarchical_clustering>`.
72
+ The hierarchical clustering estimator is used to compute the linkage matrix
73
+ and the hierarchical clustering of the assets based on the distance matrix.
74
+ The default (`None`) is to use
75
+ :class:`~skfolio.cluster.HierarchicalClustering`.
76
+
77
+ min_weights : float | dict[str, float] | array-like of shape (n_assets, ), default=0.0
78
+ Minimum assets weights (weights lower bounds). Negative weights are not allowed.
79
+ If a float is provided, it is applied to each asset. `None` is equivalent to
80
+ `-np.Inf` (no lower bound). If a dictionary is provided, its (key/value) pair
81
+ must be the (asset name/asset minium weight) and the input `X` of the `fit`
82
+ methods must be a DataFrame with the assets names in columns. When using a
83
+ dictionary, assets values that are not provided are assigned a minimum weight
84
+ of `0.0`. The default is 0.0 (no short selling).
85
+
86
+ Example:
87
+
88
+ * min_weights = 0 --> long only portfolio (no short selling).
89
+ * min_weights = None --> no lower bound (same as `-np.Inf`).
90
+ * min_weights = {"SX5E": 0, "SPX": 0.1}
91
+ * min_weights = [0, 0.1]
92
+
93
+ max_weights : float | dict[str, float] | array-like of shape (n_assets, ), default=1.0
94
+ Maximum assets weights (weights upper bounds). Weights above 1.0 are not
95
+ allowed. If a float is provided, it is applied to each asset. `None` is
96
+ equivalent to `+np.Inf` (no upper bound). If a dictionary is provided, its
97
+ (key/value) pair must be the (asset name/asset maximum weight) and the input `X`
98
+ of the `fit` methods must be a DataFrame with the assets names in columns. When
99
+ using a dictionary, assets values that are not provided are assigned a minimum
100
+ weight of `1.0`. The default is 1.0 (each asset is below 100%).
101
+
102
+ Example:
103
+
104
+ * max_weights = 0 --> no long position (short only portfolio).
105
+ * max_weights = 0.5 --> each weight must be below 50%.
106
+ * max_weights = {"SX5E": 1, "SPX": 0.25}
107
+ * max_weights = [1, 0.25]
108
+
109
+ transaction_costs : float | dict[str, float] | array-like of shape (n_assets, ), default=0.0
110
+ Transaction costs of the assets. It is used to add linear transaction costs to
111
+ the optimization problem:
112
+
113
+ .. math:: total\_cost = \sum_{i=1}^{N} c_{i} \times |w_{i} - w\_prev_{i}|
114
+
115
+ with :math:`c_{i}` the transaction cost of asset i, :math:`w_{i}` its weight
116
+ and :math:`w\_prev_{i}` its previous weight (defined in `previous_weights`).
117
+ The float :math:`total\_cost` is used in the portfolio expected return:
118
+
119
+ .. math:: expected\_return = \mu^{T} \cdot w - total\_cost
120
+
121
+ with :math:`\mu` the vector af assets' expected returns and :math:`w` the
122
+ vector of assets weights.
123
+
124
+ If a float is provided, it is applied to each asset.
125
+ If a dictionary is provided, its (key/value) pair must be the
126
+ (asset name/asset cost) and the input `X` of the `fit` methods must be a
127
+ DataFrame with the assets names in columns.
128
+ The default value is `0.0`.
129
+
130
+ .. warning::
131
+
132
+ Based on the above formula, the periodicity of the transaction costs
133
+ needs to be homogenous to the periodicity of :math:`\mu`. For example, if
134
+ the input `X` is composed of **daily** returns, the `transaction_costs` need
135
+ to be expressed in **daily** costs.
136
+ (See :ref:`sphx_glr_auto_examples_1_mean_risk_plot_6_transaction_costs.py`)
137
+
138
+ management_fees : float | dict[str, float] | array-like of shape (n_assets, ), default=0.0
139
+ Management fees of the assets. It is used to add linear management fees to the
140
+ optimization problem:
141
+
142
+ .. math:: total\_fee = \sum_{i=1}^{N} f_{i} \times w_{i}
143
+
144
+ with :math:`f_{i}` the management fee of asset i and :math:`w_{i}` its weight.
145
+ The float :math:`total\_fee` is used in the portfolio expected return:
146
+
147
+ .. math:: expected\_return = \mu^{T} \cdot w - total\_fee
148
+
149
+ with :math:`\mu` the vector af assets expected returns and :math:`w` the vector
150
+ of assets weights.
151
+
152
+ If a float is provided, it is applied to each asset.
153
+ If a dictionary is provided, its (key/value) pair must be the
154
+ (asset name/asset fee) and the input `X` of the `fit` methods must be a
155
+ DataFrame with the assets names in columns.
156
+ The default value is `0.0`.
157
+
158
+ .. warning::
159
+
160
+ Based on the above formula, the periodicity of the management fees needs to
161
+ be homogenous to the periodicity of :math:`\mu`. For example, if the input
162
+ `X` is composed of **daily** returns, the `management_fees` need to be
163
+ expressed in **daily** fees.
164
+
165
+ .. note::
166
+
167
+ Another approach is to directly impact the management fees to the input `X`
168
+ in order to express the returns net of fees. However, when estimating the
169
+ :math:`\mu` parameter using for example Shrinkage estimators, this approach
170
+ would mix a deterministic value with an uncertain one leading to unwanted
171
+ bias in the management fees.
172
+
173
+ previous_weights : float | dict[str, float] | array-like of shape (n_assets, ), optional
174
+ Previous weights of the assets. Previous weights are used to compute the
175
+ portfolio total cost. If a float is provided, it is applied to each asset.
176
+ If a dictionary is provided, its (key/value) pair must be the
177
+ (asset name/asset previous weight) and the input `X` of the `fit` methods must
178
+ be a DataFrame with the assets names in columns.
179
+ The default (`None`) means no previous weights.
180
+
181
+ portfolio_params : dict, optional
182
+ Portfolio parameters passed to the portfolio evaluated by the `predict` and
183
+ `score` methods. If not provided, the `name`, `transaction_costs`,
184
+ `management_fees` and `previous_weights` are copied from the optimization
185
+ model and systematically passed to the portfolio.
186
+
187
+ Attributes
188
+ ----------
189
+ weights_ : ndarray of shape (n_assets,)
190
+ Weights of the assets.
191
+
192
+ prior_estimator_ : BasePrior
193
+ Fitted `prior_estimator`.
194
+
195
+ distance_estimator_ : BaseDistance
196
+ Fitted `distance_estimator`.
197
+
198
+ hierarchical_clustering_estimator_ : HierarchicalClustering
199
+ Fitted `hierarchical_clustering_estimator`.
200
+ """
201
+
202
+ prior_estimator_: BasePrior
203
+ distance_estimator_: BaseDistance
204
+ hierarchical_clustering_estimator_: HierarchicalClustering
205
+
206
+ @abstractmethod
207
+ def __init__(
208
+ self,
209
+ risk_measure: RiskMeasure | ExtraRiskMeasure = RiskMeasure.VARIANCE,
210
+ prior_estimator: BasePrior | None = None,
211
+ distance_estimator: BaseDistance | None = None,
212
+ hierarchical_clustering_estimator: HierarchicalClustering | None = None,
213
+ min_weights: skt.MultiInput | None = 0.0,
214
+ max_weights: skt.MultiInput | None = 1.0,
215
+ transaction_costs: skt.MultiInput = 0.0,
216
+ management_fees: skt.MultiInput = 0.0,
217
+ previous_weights: skt.MultiInput | None = None,
218
+ portfolio_params: dict | None = None,
219
+ ):
220
+ super().__init__(portfolio_params=portfolio_params)
221
+ self.risk_measure = risk_measure
222
+ self.prior_estimator = prior_estimator
223
+ self.distance_estimator = distance_estimator
224
+ self.hierarchical_clustering_estimator = hierarchical_clustering_estimator
225
+ self.min_weights = min_weights
226
+ self.max_weights = max_weights
227
+ self.transaction_costs = transaction_costs
228
+ self.management_fees = management_fees
229
+ self.previous_weights = previous_weights
230
+ self._seriated = False
231
+
232
+ def _clean_input(
233
+ self,
234
+ value: float | dict | np.ndarray | list,
235
+ n_assets: int,
236
+ fill_value: any,
237
+ name: str,
238
+ ) -> np.ndarray:
239
+ """Convert input to cleaned 1D array
240
+ value : float, dict, array-like or None.
241
+ Input value to clean and convert.
242
+
243
+ Parameters
244
+ ----------
245
+ value : float, dict or array-like.
246
+ Input value to clean.
247
+
248
+ n_assets : int
249
+ Number of assets. Used to verify the shape of the converted array.
250
+
251
+ fill_value : any
252
+ When `items` is a dictionary, elements that are not in `asset_names` are
253
+ filled with `fill_value` in the converted array.
254
+
255
+ name : str
256
+ Name used for error messages.
257
+
258
+ Returns
259
+ -------
260
+ value : ndarray of shape (n_assets,)
261
+ The cleaned float or 1D array.
262
+ """
263
+ if value is None:
264
+ raise ValueError("Cannot convert None to array")
265
+ if np.isscalar(value):
266
+ return value * np.ones(n_assets)
267
+ return input_to_array(
268
+ items=value,
269
+ n_assets=n_assets,
270
+ fill_value=fill_value,
271
+ dim=1,
272
+ assets_names=(
273
+ self.feature_names_in_ if hasattr(self, "feature_names_in_") else None
274
+ ),
275
+ name=name,
276
+ )
277
+
278
+ def _risk(
279
+ self,
280
+ weights: np.ndarray,
281
+ prior_model: PriorModel,
282
+ ) -> float:
283
+ """Compute the risk measure of a theoretical portfolio defined by the weights
284
+ vector.
285
+
286
+ Parameters
287
+ ----------
288
+ weights : ndarray of shape (n_assets,)
289
+ The vector of weights.
290
+
291
+ prior_model : PriorModel
292
+ The prior model of the assets distribution.
293
+
294
+ Returns
295
+ -------
296
+ risk: float
297
+ The risk measure of a theoretical portfolio defined by the weights
298
+ vector.
299
+ """
300
+ ptf = Portfolio(
301
+ X=prior_model.returns,
302
+ weights=weights,
303
+ transaction_costs=self.transaction_costs,
304
+ management_fees=self.management_fees,
305
+ previous_weights=self.previous_weights,
306
+ )
307
+ if self.risk_measure in [RiskMeasure.VARIANCE, RiskMeasure.STANDARD_DEVIATION]:
308
+ risk = ptf.variance_from_assets(assets_covariance=prior_model.covariance)
309
+ if self.risk_measure == RiskMeasure.STANDARD_DEVIATION:
310
+ risk = np.sqrt(risk)
311
+ else:
312
+ risk = getattr(ptf, str(self.risk_measure.value))
313
+ return risk
314
+
315
+ def _unitary_risks(self, prior_model: PriorModel) -> np.ndarray:
316
+ """Compute the vector of risk measure for each single assets.
317
+
318
+ Parameters
319
+ ----------
320
+ prior_model : PriorModel
321
+ The prior model of the assets distribution.
322
+
323
+ Returns
324
+ -------
325
+ values: ndarray of shape (n_assets,)
326
+ The risk measure of each asset.
327
+ """
328
+ n_assets = prior_model.returns.shape[1]
329
+ risks = [
330
+ self._risk(weights=weights, prior_model=prior_model)
331
+ for weights in np.identity(n_assets)
332
+ ]
333
+ return np.array(risks)
334
+
335
+ def _convert_weights_bounds(self, n_assets: int) -> tuple[np.ndarray, np.ndarray]:
336
+ """Convert the input weights lower and upper bounds to two 1D arrays.
337
+
338
+ Parameters
339
+ ----------
340
+ n_assets : int
341
+ Number of assets.
342
+
343
+ Returns
344
+ -------
345
+ min_weights : ndarray of shape (n_assets,)
346
+ The weight lower bound 1D array.
347
+ max_weights : ndarray of shape (n_assets,)
348
+ The weight upper bound 1D array.
349
+ """
350
+
351
+ if self.min_weights is None:
352
+ min_weights = np.zeros(n_assets)
353
+ else:
354
+ min_weights = self._clean_input(
355
+ self.min_weights,
356
+ n_assets=n_assets,
357
+ fill_value=0,
358
+ name="min_weights",
359
+ )
360
+ if np.any(min_weights < 0):
361
+ raise ValueError("`min_weights` must be strictly positive")
362
+
363
+ if self.max_weights is None:
364
+ max_weights = np.ones(n_assets)
365
+ else:
366
+ max_weights = self._clean_input(
367
+ self.max_weights,
368
+ n_assets=n_assets,
369
+ fill_value=1,
370
+ name="max_weights",
371
+ )
372
+ if np.any(max_weights > 1):
373
+ raise ValueError("`max_weights` must be less than or equal to 1.0")
374
+ if np.sum(max_weights) < 1:
375
+ raise ValueError(
376
+ "The sum of `max_weights` must be greater than or equal to 1.0"
377
+ )
378
+
379
+ if np.any(min_weights > max_weights):
380
+ raise NameError(
381
+ "Items of `min_weights` must be less than or equal to items of"
382
+ " `max_weights`"
383
+ )
384
+
385
+ return min_weights, max_weights
386
+
387
+ @staticmethod
388
+ def _apply_weight_constraints_to_alpha(
389
+ alpha: float,
390
+ max_weights: np.ndarray,
391
+ min_weights: np.ndarray,
392
+ weights: np.ndarray,
393
+ left_cluster: np.ndarray,
394
+ right_cluster: np.ndarray,
395
+ ) -> float:
396
+ """Apply weight constraints to the alpha multiplication factor of the
397
+ Hierarchical Tree Clustering algorithm.
398
+
399
+ Parameters
400
+ ----------
401
+ alpha : float
402
+ The alpha multiplication factor of the Hierarchical Tree Clustering
403
+ algorithm.
404
+
405
+ min_weights : ndarray of shape (n_assets,)
406
+ The weight lower bound 1D array.
407
+
408
+ max_weights : ndarray of shape (n_assets,)
409
+ The weight upper bound 1D array.
410
+
411
+ weights : np.ndarray of shape (n_assets,)
412
+ The assets weights.
413
+
414
+ left_cluster : ndarray of shape (n_left_cluster,)
415
+ Indices of the left cluster weights.
416
+
417
+ right_cluster : ndarray of shape (n_right_cluster,)
418
+ Indices of the right cluster weights.
419
+
420
+ Returns
421
+ -------
422
+ value : float
423
+ The transformed alpha incorporating the weight constraints.
424
+ """
425
+ alpha = min(
426
+ np.sum(max_weights[left_cluster]) / weights[left_cluster[0]],
427
+ max(np.sum(min_weights[left_cluster]) / weights[left_cluster[0]], alpha),
428
+ )
429
+ alpha = 1 - min(
430
+ np.sum(max_weights[right_cluster]) / weights[right_cluster[0]],
431
+ max(
432
+ np.sum(min_weights[right_cluster]) / weights[right_cluster[0]],
433
+ 1 - alpha,
434
+ ),
435
+ )
436
+ return alpha
437
+
438
+ @abstractmethod
439
+ def fit(self, X: npt.ArrayLike, y: None = None):
440
+ pass