skfolio 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skfolio/__init__.py +29 -0
  2. skfolio/cluster/__init__.py +8 -0
  3. skfolio/cluster/_hierarchical.py +387 -0
  4. skfolio/datasets/__init__.py +20 -0
  5. skfolio/datasets/_base.py +389 -0
  6. skfolio/datasets/data/__init__.py +0 -0
  7. skfolio/datasets/data/factors_dataset.csv.gz +0 -0
  8. skfolio/datasets/data/sp500_dataset.csv.gz +0 -0
  9. skfolio/datasets/data/sp500_index.csv.gz +0 -0
  10. skfolio/distance/__init__.py +26 -0
  11. skfolio/distance/_base.py +55 -0
  12. skfolio/distance/_distance.py +574 -0
  13. skfolio/exceptions.py +30 -0
  14. skfolio/measures/__init__.py +76 -0
  15. skfolio/measures/_enums.py +355 -0
  16. skfolio/measures/_measures.py +607 -0
  17. skfolio/metrics/__init__.py +3 -0
  18. skfolio/metrics/_scorer.py +121 -0
  19. skfolio/model_selection/__init__.py +18 -0
  20. skfolio/model_selection/_combinatorial.py +407 -0
  21. skfolio/model_selection/_validation.py +194 -0
  22. skfolio/model_selection/_walk_forward.py +221 -0
  23. skfolio/moments/__init__.py +41 -0
  24. skfolio/moments/covariance/__init__.py +29 -0
  25. skfolio/moments/covariance/_base.py +101 -0
  26. skfolio/moments/covariance/_covariance.py +1108 -0
  27. skfolio/moments/expected_returns/__init__.py +21 -0
  28. skfolio/moments/expected_returns/_base.py +31 -0
  29. skfolio/moments/expected_returns/_expected_returns.py +415 -0
  30. skfolio/optimization/__init__.py +36 -0
  31. skfolio/optimization/_base.py +147 -0
  32. skfolio/optimization/cluster/__init__.py +13 -0
  33. skfolio/optimization/cluster/_nco.py +348 -0
  34. skfolio/optimization/cluster/hierarchical/__init__.py +13 -0
  35. skfolio/optimization/cluster/hierarchical/_base.py +440 -0
  36. skfolio/optimization/cluster/hierarchical/_herc.py +406 -0
  37. skfolio/optimization/cluster/hierarchical/_hrp.py +368 -0
  38. skfolio/optimization/convex/__init__.py +16 -0
  39. skfolio/optimization/convex/_base.py +1944 -0
  40. skfolio/optimization/convex/_distributionally_robust.py +392 -0
  41. skfolio/optimization/convex/_maximum_diversification.py +417 -0
  42. skfolio/optimization/convex/_mean_risk.py +974 -0
  43. skfolio/optimization/convex/_risk_budgeting.py +560 -0
  44. skfolio/optimization/ensemble/__init__.py +6 -0
  45. skfolio/optimization/ensemble/_base.py +87 -0
  46. skfolio/optimization/ensemble/_stacking.py +326 -0
  47. skfolio/optimization/naive/__init__.py +3 -0
  48. skfolio/optimization/naive/_naive.py +173 -0
  49. skfolio/population/__init__.py +3 -0
  50. skfolio/population/_population.py +883 -0
  51. skfolio/portfolio/__init__.py +13 -0
  52. skfolio/portfolio/_base.py +1096 -0
  53. skfolio/portfolio/_multi_period_portfolio.py +610 -0
  54. skfolio/portfolio/_portfolio.py +842 -0
  55. skfolio/pre_selection/__init__.py +7 -0
  56. skfolio/pre_selection/_pre_selection.py +342 -0
  57. skfolio/preprocessing/__init__.py +3 -0
  58. skfolio/preprocessing/_returns.py +114 -0
  59. skfolio/prior/__init__.py +18 -0
  60. skfolio/prior/_base.py +63 -0
  61. skfolio/prior/_black_litterman.py +238 -0
  62. skfolio/prior/_empirical.py +163 -0
  63. skfolio/prior/_factor_model.py +268 -0
  64. skfolio/typing.py +50 -0
  65. skfolio/uncertainty_set/__init__.py +23 -0
  66. skfolio/uncertainty_set/_base.py +108 -0
  67. skfolio/uncertainty_set/_bootstrap.py +281 -0
  68. skfolio/uncertainty_set/_empirical.py +237 -0
  69. skfolio/utils/__init__.py +0 -0
  70. skfolio/utils/bootstrap.py +115 -0
  71. skfolio/utils/equations.py +350 -0
  72. skfolio/utils/sorting.py +117 -0
  73. skfolio/utils/stats.py +466 -0
  74. skfolio/utils/tools.py +567 -0
  75. skfolio-0.0.1.dist-info/LICENSE +29 -0
  76. skfolio-0.0.1.dist-info/METADATA +568 -0
  77. skfolio-0.0.1.dist-info/RECORD +79 -0
  78. skfolio-0.0.1.dist-info/WHEEL +5 -0
  79. skfolio-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,121 @@
1
+ """Scorer module"""
2
+
3
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
+ # License: BSD 3 clause
5
+
6
+ from collections.abc import Callable
7
+
8
+ import numpy.typing as npt
9
+
10
+ import skfolio.typing as skt
11
+ from skfolio.optimization import BaseOptimization
12
+ from skfolio.portfolio import Portfolio
13
+
14
+
15
+ class _PortfolioScorer:
16
+ """Portfolio Scorer wrapper"""
17
+
18
+ def __init__(self, score_func: Callable, sign: int, kwargs: dict):
19
+ self._score_func = score_func
20
+ self._kwargs = kwargs
21
+ self._sign = sign
22
+
23
+ def __repr__(self) -> str:
24
+ """String representation of the `PortfolioScorer`."""
25
+ kwargs_string = "".join([f", {k}={v}" for k, v in self._kwargs.items()])
26
+ return (
27
+ f"make_scorer({self._score_func.__name__}"
28
+ f"{'' if self._sign > 0 else ', greater_is_better=False'}"
29
+ f"{kwargs_string})"
30
+ )
31
+
32
+ def __call__(self, estimator: BaseOptimization, X: npt.ArrayLike) -> float:
33
+ """Compute the score of the estimator prediction on X.
34
+
35
+ Parameters
36
+ ----------
37
+ estimator : BaseOptimization
38
+ Trained estimator to use for scoring.
39
+
40
+ X : array-like of shape (n_observations, n_assets)
41
+ Test data that will be fed to estimator.predict.
42
+
43
+ Returns
44
+ -------
45
+ score : float
46
+ Score of the estimator prediction on X.
47
+ """
48
+ pred = estimator.predict(X)
49
+ return self._sign * self._score_func(pred, **self._kwargs)
50
+
51
+
52
+ def make_scorer(
53
+ score_func: skt.Measure | Callable,
54
+ greater_is_better: bool | None = None,
55
+ **kwargs,
56
+ ) -> Callable:
57
+ """Make a scorer from a :ref:`measure <measures_ref>` or from a custom score
58
+ function.
59
+
60
+ This is a modified version from `scikit-learn` `make_scorer` for enhanced
61
+ functionalities with `Portfolio` objects.
62
+
63
+ This factory function wraps scoring functions for use in
64
+ `sklearn.model_selection.GridSearchCV` and
65
+ `sklearn.model_selection.cross_val_score`.
66
+
67
+ Parameters
68
+ ----------
69
+ score_func : Measure | callable
70
+ If `score_func` is a :ref:`measure <measures_ref>`, we return the measure of
71
+ the predicted :class:`~skfolio.portfolio.Portfolio` times `1` or `-1`
72
+ depending on the `greater_is_better` parameter.
73
+
74
+ Otherwise, `score_func` must be a score function (or loss function) with
75
+ signature `score_func(pred, **kwargs)`. The argument `pred` is the predicted
76
+ :class:`~skfolio.portfolio.Portfolio`.
77
+
78
+ Note that you can convert this portfolio object into a numpy array of price
79
+ returns with `np.asarray(pred)`.
80
+
81
+ greater_is_better : bool, optional
82
+ If this is set to True, `score_func` is a score function (default) meaning high
83
+ is good, otherwise it is a loss function, meaning low is good.
84
+ In the latter case, the scorer object will sign-flip the outcome of the `score_func`.
85
+ The default (`None`) is to use:
86
+
87
+ * If `score_func` is a :ref:`measure <measures_ref>`:
88
+
89
+ * True for `PerfMeasure` and `RationMeasure`
90
+ * False for `RiskMeasure` and `ExtraRiskMeasure`.
91
+
92
+ * Otherwise, True.
93
+
94
+ **kwargs : additional arguments
95
+ Additional parameters to be passed to score_func.
96
+
97
+ Returns
98
+ -------
99
+ scorer : callable
100
+ Callable object that returns a scalar score.
101
+ """
102
+ if callable(score_func):
103
+ if greater_is_better is None:
104
+ greater_is_better = True
105
+
106
+ else:
107
+ measure = score_func
108
+ if not isinstance(measure, skt.Measure):
109
+ raise TypeError("`score_func` must be a callable or a measure")
110
+ if greater_is_better is None:
111
+ if measure.is_perf or measure.is_ratio:
112
+ greater_is_better = True
113
+ else:
114
+ greater_is_better = False
115
+
116
+ def score_func(pred: Portfolio) -> float:
117
+ """Score function"""
118
+ return getattr(pred, measure.value)
119
+
120
+ sign = 1 if greater_is_better else -1
121
+ return _PortfolioScorer(score_func, sign, kwargs)
@@ -0,0 +1,18 @@
1
+ """Model selection module"""
2
+
3
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
+ # License: BSD 3 clause
5
+
6
+ from skfolio.model_selection._combinatorial import (
7
+ BaseCombinatorialCV,
8
+ CombinatorialPurgedCV,
9
+ )
10
+ from skfolio.model_selection._validation import cross_val_predict
11
+ from skfolio.model_selection._walk_forward import WalkForward
12
+
13
+ __all__ = [
14
+ "cross_val_predict",
15
+ "WalkForward",
16
+ "BaseCombinatorialCV",
17
+ "CombinatorialPurgedCV",
18
+ ]
@@ -0,0 +1,407 @@
1
+ """Combinatorial module"""
2
+
3
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
4
+ # License: BSD 3 clause
5
+
6
+ import itertools
7
+ import math
8
+ import numbers
9
+ from abc import ABC, abstractmethod
10
+ from collections.abc import Iterator
11
+
12
+ import numpy as np
13
+ import numpy.typing as npt
14
+ import pandas as pd
15
+ import plotly.graph_objects as go
16
+ import sklearn.model_selection as skm
17
+ import sklearn.utils as sku
18
+
19
+ import skfolio.typing as skt
20
+
21
+
22
+ class BaseCombinatorialCV(ABC):
23
+ """Base class for all combinatorial cross-validators.
24
+
25
+ Implementations must define `split` or `get_path_ids`.
26
+ """
27
+
28
+ @abstractmethod
29
+ def split(self, X: npt.ArrayLike, y=None) -> tuple[np.ndarray, list[np.ndarray]]:
30
+ pass
31
+
32
+ @abstractmethod
33
+ def get_path_ids(self) -> np.ndarray:
34
+ """Return the path id of each test sets in each split"""
35
+ pass
36
+
37
+ __repr__ = skm.BaseCrossValidator.__repr__
38
+
39
+
40
+ # TODO: review params and function naming
41
+ class CombinatorialPurgedCV(BaseCombinatorialCV):
42
+ """Combinatorial Purged Cross-Validation.
43
+
44
+ Provides train/test indices to split time series data samples based on
45
+ Combinatorial Purged Cross-Validation [1]_.
46
+
47
+ Compared to `KFold` which split the data into `k` folds with `1` fold for the test
48
+ set and `k - 1` folds for the training set, `CombinatorialPurgedCV` uses `k - p`
49
+ folds for the training set with `p > 1` being the number of test folds.
50
+
51
+ `KFold` can recombine one single testing path while `CombinatorialPurgedCV` can
52
+ recombine multiple testing paths from the combinations of the train/test sets.
53
+
54
+ To avoid data leakage, purging and embargoing can be performed.
55
+
56
+ Purging consist of removing from the training set all observations whose labels
57
+ overlapped in time with those labels included in the testing set.
58
+
59
+ Embargoing consist of removing from the training set all observations that
60
+ immediately follow an observation in the testing set since financial features
61
+ often incorporate series that exhibit serial correlation (like ARMA processes).
62
+
63
+ Parameters
64
+ ----------
65
+ n_folds : int, default=10
66
+ Number of folds. Must be at least 3.
67
+
68
+ n_test_folds : int, default=8
69
+ Number of test folds. Must be at least 2.
70
+ For only one test fold, use `sklearn.model_validation.KFold`.
71
+
72
+ purged_size : int, default=0
73
+ Number of observations to exclude from the start of each train set that are
74
+ after a test set **and** the number of observations to exclude from the end of
75
+ each training set that are before a test set.
76
+
77
+ embargo_size : int, default=0
78
+ Number of observations to exclude from the start of each training set that are
79
+ after a test set.
80
+
81
+ Attributes
82
+ ----------
83
+ index_train_test_ : ndarray of shape (n_observations, n_splits)
84
+
85
+ Examples
86
+ --------
87
+ >>> import numpy as np
88
+ >>> from skfolio.model_selection import CombinatorialPurgedCV
89
+ >>> X = np.random.randn(12, 2)
90
+ >>> cv = CombinatorialPurgedCV(n_folds=3, n_test_folds=2)
91
+ >>> for i, (train_index, tests) in enumerate(cv.split(X)):
92
+ ... print(f"Split {i}:")
93
+ ... print(f" Train: index={train_index}")
94
+ ... for j, test_index in enumerate(tests):
95
+ ... print(f" Test {j}: index={test_index}")
96
+ Split 0:
97
+ Train: index=[ 8 9 10 11]
98
+ Test 0: index=[0 1 2 3]
99
+ Test 1: index=[4 5 6 7]
100
+ Split 1:
101
+ Train: index=[4 5 6 7]
102
+ Test 0: index=[0 1 2 3]
103
+ Test 1: index=[ 8 9 10 11]
104
+ Split 2:
105
+ Train: index=[0 1 2 3]
106
+ Test 0: index=[4 5 6 7]
107
+ Test 1: index=[ 8 9 10 11]
108
+ >>> cv = CombinatorialPurgedCV(n_folds=3, n_test_folds=2, purged_size=1)
109
+ >>> for i, (train_index, tests) in enumerate(cv.split(X)):
110
+ ... print(f"Split {i}:")
111
+ ... print(f" Train: index={train_index}")
112
+ ... for j, test_index in enumerate(tests):
113
+ ... print(f" Test {j}: index={test_index}")
114
+ Split 0:
115
+ Train: index=[ 9 10 11]
116
+ Test 0: index=[0 1 2 3]
117
+ Test 1: index=[4 5 6 7]
118
+ Split 1:
119
+ Train: index=[5 6]
120
+ Test 0: index=[0 1 2 3]
121
+ Test 1: index=[ 8 9 10 11]
122
+ Split 2:
123
+ Train: index=[0 1 2]
124
+ Test 0: index=[4 5 6 7]
125
+ Test 1: index=[ 8 9 10 11]
126
+ >>> cv = CombinatorialPurgedCV(n_folds=3, n_test_folds=2, embargo_size=1)
127
+ >>> for i, (train_index, tests) in enumerate(cv.split(X)):
128
+ ... print(f"Split {i}:")
129
+ ... print(f" Train: index={train_index}")
130
+ ... for j, test_index in enumerate(tests):
131
+ ... print(f" Test {j}: index={test_index}")
132
+ Split 0:
133
+ Train: index=[ 9 10 11]
134
+ Test 0: index=[0 1 2 3]
135
+ Test 1: index=[4 5 6 7]
136
+ Split 1:
137
+ Train: index=[5 6 7]
138
+ Test 0: index=[0 1 2 3]
139
+ Test 1: index=[ 8 9 10 11]
140
+ Split 2:
141
+ Train: index=[0 1 2 3]
142
+ Test 0: index=[4 5 6 7]
143
+ Test 1: index=[ 8 9 10 11]
144
+
145
+ References
146
+ ----------
147
+ .. [1] "Advances in Financial Machine Learning",
148
+ Marcos López de Prado (2018)
149
+ """
150
+
151
+ index_train_test_: np.ndarray
152
+
153
+ def __init__(
154
+ self,
155
+ n_folds: int = 10,
156
+ n_test_folds: int = 8,
157
+ purged_size: int = 0,
158
+ embargo_size: int = 0,
159
+ ):
160
+ if not isinstance(n_folds, numbers.Integral):
161
+ raise ValueError(
162
+ "The number of folds must be of Integral type. "
163
+ f"{n_folds} of type {type(n_folds)} was passed."
164
+ )
165
+ n_folds = int(n_folds)
166
+
167
+ if n_folds <= 2:
168
+ raise ValueError(f"`n_folds` must be at least 3`, got `n_folds={n_folds}`.")
169
+
170
+ if n_test_folds <= 1:
171
+ raise ValueError(
172
+ f"`n_test_folds` must at least 2, got `n_test_folds={n_test_folds}`."
173
+ )
174
+
175
+ if n_test_folds >= n_folds:
176
+ raise ValueError(
177
+ "Combinatorial purged cross-validation requires `n_folds` "
178
+ "to be greater than `n_test_folds`."
179
+ )
180
+
181
+ if purged_size < 0:
182
+ raise ValueError("`purged_size` cannot be negative")
183
+
184
+ if embargo_size < 0:
185
+ raise ValueError("`embargo_size` cannot be negative")
186
+
187
+ self.n_folds = n_folds
188
+ self.n_test_folds = n_test_folds
189
+ self.purged_size = purged_size
190
+ self.embargo_size = embargo_size
191
+
192
+ @property
193
+ def n_splits(self) -> int:
194
+ """Number of splits"""
195
+ return int(
196
+ math.factorial(self.n_folds)
197
+ / (
198
+ math.factorial(self.n_test_folds)
199
+ * math.factorial(self.n_folds - self.n_test_folds)
200
+ )
201
+ )
202
+
203
+ @property
204
+ def n_test_paths(self) -> int:
205
+ """Number of test paths that can be reconstructed from the train/test
206
+ combinations"""
207
+ return self.n_splits * self.n_test_folds // self.n_folds
208
+
209
+ @property
210
+ def test_set_index(self) -> np.ndarray:
211
+ """Location of each test set"""
212
+ return np.array(
213
+ list(itertools.combinations(np.arange(self.n_folds), self.n_test_folds))
214
+ ).reshape(-1, self.n_test_folds)
215
+
216
+ @property
217
+ def binary_train_test_sets(self) -> np.ndarray:
218
+ """Identify training and test folds for each combinations by assigning `0` to
219
+ training folds and `1` to test folds"""
220
+ folds_train_test = np.zeros((self.n_folds, self.n_splits))
221
+ folds_train_test[
222
+ self.test_set_index, np.arange(self.n_splits)[:, np.newaxis]
223
+ ] = 1
224
+ return folds_train_test
225
+
226
+ @property
227
+ def recombined_paths(self) -> np.ndarray:
228
+ """Recombine each test path by returning the test set location in each split."""
229
+ return np.argwhere(self.binary_train_test_sets == 1)[:, 1].reshape(
230
+ self.n_folds, -1
231
+ )
232
+
233
+ def get_path_ids(self) -> np.ndarray:
234
+ """Return the path id of each test sets in each split"""
235
+ recombine_paths = self.recombined_paths
236
+ path_ids = np.zeros((self.n_splits, self.n_test_folds), dtype=int)
237
+ for i in range(self.n_splits):
238
+ for j in range(self.n_test_folds):
239
+ path_ids[i, j] = np.argwhere(recombine_paths == i)[j][1]
240
+ return path_ids
241
+
242
+ def split(
243
+ self, X: npt.ArrayLike, y=None, groups=None
244
+ ) -> Iterator[tuple[np.ndarray, list[np.ndarray]]]:
245
+ """Generate indices to split data into training and test set.
246
+
247
+ Parameters
248
+ ----------
249
+ X : array-like of shape (n_samples, n_features)
250
+ Training data, where `n_samples` is the number of samples
251
+ and `n_features` is the number of features.
252
+
253
+ y : array-like of shape (n_samples,), optional
254
+ The (multi-)target variable
255
+
256
+ groups : array-like of shape (n_samples,), optional
257
+ Group labels for the samples used while splitting the dataset into
258
+ train/test set.
259
+
260
+ Yields
261
+ ------
262
+ train : ndarray
263
+ The training set indices for that split.
264
+
265
+ test : ndarray
266
+ The testing set indices for that split.
267
+ """
268
+ test_set_index = self.test_set_index
269
+ recombine_paths = self.recombined_paths
270
+
271
+ X, y = sku.indexable(X, y)
272
+ n_samples = X.shape[0]
273
+ min_fold_size = n_samples // self.n_folds
274
+ if self.purged_size + self.embargo_size >= min_fold_size - 1:
275
+ raise ValueError(
276
+ "The sum of `purged_size` and `embargo_size` must be smaller than the"
277
+ f" size of a train fold which is {min_fold_size}"
278
+ )
279
+
280
+ fold_index_num = np.arange(n_samples) // (n_samples // self.n_folds)
281
+ fold_index_num[fold_index_num == self.n_folds] = self.n_folds - 1
282
+
283
+ index_train_test = np.zeros((n_samples, self.n_splits))
284
+ for i in range(self.n_splits):
285
+ index_train_test[
286
+ np.argwhere([fold_index_num == j for j in test_set_index[i]])[:, 1], i
287
+ ] = 1
288
+
289
+ diff = np.diff(index_train_test, axis=0)
290
+
291
+ # Purge before
292
+ before_index = np.argwhere(diff == 1)
293
+ for k in range(self.purged_size):
294
+ index_train_test[
295
+ np.maximum(0, before_index[:, 0] - k), before_index[:, 1]
296
+ ] = -1
297
+
298
+ # Purge after and Embargo
299
+ after_index = np.argwhere(diff == -1)
300
+ for k in range(self.purged_size + self.embargo_size):
301
+ index_train_test[
302
+ np.minimum(n_samples - 1, after_index[:, 0] + k + 1), after_index[:, 1]
303
+ ] = -1
304
+ self.index_train_test_ = index_train_test
305
+
306
+ fold_index = {
307
+ fold_id: np.argwhere(fold_index_num == fold_id).reshape(-1)
308
+ for fold_id in range(self.n_folds)
309
+ }
310
+ for i in range(self.n_splits):
311
+ train_index = np.argwhere(index_train_test[:, i] == 0).reshape(-1)
312
+ test_index_list = [
313
+ fold_index[fold_id] for fold_id, _ in np.argwhere(recombine_paths == i)
314
+ ]
315
+ yield train_index, test_index_list
316
+
317
+ def summary(self, X) -> pd.Series:
318
+ n_samples = X.shape[0]
319
+ return pd.Series(
320
+ {
321
+ "Number of Observations": n_samples,
322
+ "Total Number of Folds": self.n_folds,
323
+ "Number of Test Folds": self.n_test_folds,
324
+ "Purge Size": self.purged_size,
325
+ "Embargo Size": self.embargo_size,
326
+ "Average Training Size": int(
327
+ n_samples / self.n_folds * (self.n_folds - self.n_test_folds)
328
+ ),
329
+ "Number of Test Paths": self.n_test_paths,
330
+ "Number of Training Combinations": self.n_splits,
331
+ }
332
+ )
333
+
334
+ def plot_train_test_folds(self) -> skt.Figure:
335
+ """Plot the train/test fold locations"""
336
+ values = self.binary_train_test_sets
337
+ fill_color = np.where(values == 0, "blue", "red")
338
+ fill_color = fill_color.astype(object)
339
+ fill_color = np.insert(
340
+ fill_color, 0, np.array(["darkblue" for _ in range(self.n_splits)]), axis=0
341
+ )
342
+ values = np.insert(values, 0, np.arange(self.n_splits), axis=0)
343
+ fig = go.Figure(
344
+ data=[
345
+ go.Table(
346
+ header=dict(
347
+ values=["Train Combinations"]
348
+ + [f"Fold {i}" for i in range(self.n_folds)],
349
+ fill_color="darkblue",
350
+ font=dict(color="white"),
351
+ align="left",
352
+ ),
353
+ cells=dict(
354
+ values=values,
355
+ font=dict(color="white"),
356
+ fill_color=fill_color,
357
+ line_color="grey",
358
+ align="left",
359
+ ),
360
+ )
361
+ ]
362
+ )
363
+ fig.update_layout(title="Split Train (0) /Test (1) Folds per Combination")
364
+ return fig
365
+
366
+ def plot_train_test_index(self, X) -> skt.Figure:
367
+ """Plot the training and test indices for each combinations by assigning `0` to
368
+ training, `1` to test and `-1` to both purge and embargo indices."""
369
+ next(self.split(X))
370
+ n_samples = X.shape[0]
371
+ cond = [
372
+ self.index_train_test_ == -1,
373
+ self.index_train_test_ == 0,
374
+ self.index_train_test_ == 1,
375
+ ]
376
+ values = self.index_train_test_.T
377
+ values = np.insert(values, 0, np.arange(n_samples), axis=0)
378
+ fill_color = np.select(cond, ["green", "blue", "red"]).T
379
+ fill_color = fill_color.astype(object)
380
+ fill_color = np.insert(
381
+ fill_color, 0, np.array(["darkblue" for _ in range(n_samples)]), axis=0
382
+ )
383
+ fig = go.Figure(
384
+ data=[
385
+ go.Table(
386
+ header=dict(
387
+ values=["observations"]
388
+ + [f"Split {i}" for i in range(self.n_splits)],
389
+ fill_color="darkblue",
390
+ font=dict(color="white"),
391
+ align="left",
392
+ ),
393
+ cells=dict(
394
+ values=values,
395
+ font=dict(color="white"),
396
+ fill_color=fill_color,
397
+ line_color="grey",
398
+ align="left",
399
+ ),
400
+ )
401
+ ]
402
+ )
403
+ fig.update_layout(
404
+ title="Train (0), Test (1) and Purge/Embargo (-1) observations per splits"
405
+ )
406
+
407
+ return fig