skfolio 0.3.1__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. {skfolio-0.3.1/src/skfolio.egg-info → skfolio-0.4.0}/PKG-INFO +1 -1
  2. {skfolio-0.3.1 → skfolio-0.4.0}/pyproject.toml +3 -2
  3. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/datasets/_base.py +1 -1
  4. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/measures/_measures.py +1 -1
  5. skfolio-0.4.0/src/skfolio/model_selection/_walk_forward.py +440 -0
  6. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/population/_population.py +215 -152
  7. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/portfolio/_base.py +48 -9
  8. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/portfolio/_multi_period_portfolio.py +45 -0
  9. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/portfolio/_portfolio.py +82 -46
  10. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/utils/tools.py +18 -1
  11. {skfolio-0.3.1 → skfolio-0.4.0/src/skfolio.egg-info}/PKG-INFO +1 -1
  12. skfolio-0.3.1/src/skfolio/model_selection/_walk_forward.py +0 -226
  13. {skfolio-0.3.1 → skfolio-0.4.0}/LICENSE +0 -0
  14. {skfolio-0.3.1 → skfolio-0.4.0}/MANIFEST.in +0 -0
  15. {skfolio-0.3.1 → skfolio-0.4.0}/README.rst +0 -0
  16. {skfolio-0.3.1 → skfolio-0.4.0}/setup.cfg +0 -0
  17. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/__init__.py +0 -0
  18. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/cluster/__init__.py +0 -0
  19. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/cluster/_hierarchical.py +0 -0
  20. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/datasets/__init__.py +0 -0
  21. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/datasets/data/__init__.py +0 -0
  22. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/datasets/data/factors_dataset.csv.gz +0 -0
  23. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/datasets/data/sp500_dataset.csv.gz +0 -0
  24. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/datasets/data/sp500_index.csv.gz +0 -0
  25. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/distance/__init__.py +0 -0
  26. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/distance/_base.py +0 -0
  27. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/distance/_distance.py +0 -0
  28. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/exceptions.py +0 -0
  29. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/measures/__init__.py +0 -0
  30. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/measures/_enums.py +0 -0
  31. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/metrics/__init__.py +0 -0
  32. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/metrics/_scorer.py +0 -0
  33. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/model_selection/__init__.py +0 -0
  34. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/model_selection/_combinatorial.py +0 -0
  35. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/model_selection/_validation.py +0 -0
  36. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/__init__.py +0 -0
  37. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/covariance/__init__.py +0 -0
  38. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/covariance/_base.py +0 -0
  39. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/covariance/_denoise_covariance.py +0 -0
  40. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/covariance/_detone_covariance.py +0 -0
  41. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/covariance/_empirical_covariance.py +0 -0
  42. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/covariance/_ew_covariance.py +0 -0
  43. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/covariance/_gerber_covariance.py +0 -0
  44. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/covariance/_graphical_lasso_cv.py +0 -0
  45. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/covariance/_implied_covariance.py +0 -0
  46. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/covariance/_ledoit_wolf.py +0 -0
  47. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/covariance/_oas.py +0 -0
  48. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/covariance/_shrunk_covariance.py +0 -0
  49. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/expected_returns/__init__.py +0 -0
  50. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/expected_returns/_base.py +0 -0
  51. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/expected_returns/_empirical_mu.py +0 -0
  52. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/expected_returns/_equilibrium_mu.py +0 -0
  53. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/expected_returns/_ew_mu.py +0 -0
  54. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/moments/expected_returns/_shrunk_mu.py +0 -0
  55. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/__init__.py +0 -0
  56. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/_base.py +0 -0
  57. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/cluster/__init__.py +0 -0
  58. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/cluster/_nco.py +0 -0
  59. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/cluster/hierarchical/__init__.py +0 -0
  60. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/cluster/hierarchical/_base.py +0 -0
  61. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/cluster/hierarchical/_herc.py +0 -0
  62. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/cluster/hierarchical/_hrp.py +0 -0
  63. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/convex/__init__.py +0 -0
  64. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/convex/_base.py +0 -0
  65. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/convex/_distributionally_robust.py +0 -0
  66. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/convex/_maximum_diversification.py +0 -0
  67. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/convex/_mean_risk.py +0 -0
  68. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/convex/_risk_budgeting.py +0 -0
  69. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/ensemble/__init__.py +0 -0
  70. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/ensemble/_base.py +0 -0
  71. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/ensemble/_stacking.py +0 -0
  72. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/naive/__init__.py +0 -0
  73. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/optimization/naive/_naive.py +0 -0
  74. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/population/__init__.py +0 -0
  75. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/portfolio/__init__.py +0 -0
  76. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/pre_selection/__init__.py +0 -0
  77. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/pre_selection/_pre_selection.py +0 -0
  78. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/preprocessing/__init__.py +0 -0
  79. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/preprocessing/_returns.py +0 -0
  80. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/prior/__init__.py +0 -0
  81. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/prior/_base.py +0 -0
  82. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/prior/_black_litterman.py +0 -0
  83. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/prior/_empirical.py +0 -0
  84. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/prior/_factor_model.py +0 -0
  85. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/typing.py +0 -0
  86. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/uncertainty_set/__init__.py +0 -0
  87. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/uncertainty_set/_base.py +0 -0
  88. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/uncertainty_set/_bootstrap.py +0 -0
  89. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/uncertainty_set/_empirical.py +0 -0
  90. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/utils/__init__.py +0 -0
  91. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/utils/bootstrap.py +0 -0
  92. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/utils/equations.py +0 -0
  93. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/utils/sorting.py +0 -0
  94. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio/utils/stats.py +0 -0
  95. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio.egg-info/SOURCES.txt +0 -0
  96. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio.egg-info/dependency_links.txt +0 -0
  97. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio.egg-info/requires.txt +0 -0
  98. {skfolio-0.3.1 → skfolio-0.4.0}/src/skfolio.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skfolio
3
- Version: 0.3.1
3
+ Version: 0.4.0
4
4
  Summary: Portfolio optimization built on top of scikit-learn
5
5
  Author-email: Hugo Delatte <delatte.hugo@gmail.com>
6
6
  Maintainer-email: Hugo Delatte <delatte.hugo@gmail.com>
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "skfolio"
7
- version = "0.3.1"
7
+ version = "0.4.0"
8
8
  maintainers = [
9
9
  { name = "Hugo Delatte", email = "delatte.hugo@gmail.com" },
10
10
  ]
@@ -95,7 +95,8 @@ version_toml = [
95
95
  "pyproject.toml:project.version",
96
96
  ]
97
97
  branch = "main"
98
- major_on_zero = true
98
+ allow_zero_version = true
99
+ major_on_zero = false
99
100
  tag_format = "v{version}"
100
101
  upload_to_pypi = false
101
102
  upload_to_vcs_release = true
@@ -250,7 +250,7 @@ def load_factors_dataset() -> pd.DataFrame:
250
250
  The factors are:
251
251
 
252
252
  * "MTUM": Momentum
253
- * "QUAL": Quanlity
253
+ * "QUAL": Quality
254
254
  * "SIZE": Size
255
255
  * "VLUE": Value
256
256
  * "USMV": low volatility
@@ -347,7 +347,7 @@ def entropic_risk_measure(
347
347
  """Compute the entropic risk measure.
348
348
 
349
349
  The entropic risk measure is a risk measure which depends on the risk aversion
350
- defined by the investor (theat) through the exponential utility function at a given
350
+ defined by the investor (theta) through the exponential utility function at a given
351
351
  confidence level (beta).
352
352
 
353
353
  Parameters
@@ -0,0 +1,440 @@
1
+ """Walk Forward cross-validator"""
2
+
3
+ # Copyright (c) 2023
4
+ # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
+ # License: BSD 3 clause
6
+ # Implementation derived from:
7
+ # scikit-portfolio, Copyright (c) 2022, Carlo Nicolini, Licensed under MIT Licence.
8
+ # scikit-learn, Copyright (c) 2007-2010 David Cournapeau, Fabian Pedregosa, Olivier
9
+ # Grisel Licensed under BSD 3 clause.
10
+
11
+ import datetime as dt
12
+ from collections.abc import Iterator
13
+
14
+ import numpy as np
15
+ import numpy.typing as npt
16
+ import pandas as pd
17
+ import sklearn.model_selection as sks
18
+ import sklearn.utils as sku
19
+
20
+
21
+ class WalkForward(sks.BaseCrossValidator):
22
+ """Walk Forward Cross-Validator.
23
+
24
+ Provides train/test indices to split time series data samples using a walk-forward
25
+ logic.
26
+
27
+ In each split, test indices must be higher than the previous ones; therefore,
28
+ shuffling in cross-validator is inappropriate.
29
+
30
+ Compared to `sklearn.model_selection.TimeSeriesSplit`, you control the train/test
31
+ folds by specifying the number of training and test samples instead of the number
32
+ of splits, making it more suitable for portfolio cross-validation.
33
+
34
+ If your data is a DataFrame indexed with a DatetimeIndex, you can split the data
35
+ using specific datetime frequencies and offsets.
36
+
37
+ Parameters
38
+ ----------
39
+ test_size : int
40
+ Length of each test set.
41
+ If `freq` is `None` (default), it represents the number of observations.
42
+ Otherwise, it represents the number of periods defined by `freq`.
43
+
44
+ train_size : int | pandas.offsets.DateOffset | datetime.timedelta
45
+ Length of each training set.
46
+ If `freq` is `None` (default), it represents the number of observations.
47
+ Otherwise, for integers, it represents the number of periods defined by `freq`;
48
+ for pandas DateOffset or datetime timedelta it represents the date offset
49
+ applied to the start of each period.
50
+
51
+ freq : str | pandas.offsets.DateOffset, optional
52
+ If provided, it must be a frequency string or a pandas DateOffset, and the
53
+ returns `X` must be a DataFrame with an index of type `DatetimeIndex`.
54
+ For a list of pandas frequencies and offsets, see `here <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#timeseries-offset-aliases>`_.
55
+ The defaul (`None`) means `test_size` and `train_size` represent the number of
56
+ observations.
57
+
58
+ Below are some common examples:
59
+
60
+ * Rebalancing : Montly on the first day
61
+ * Test Duration : 1 month
62
+ * Train Duration : 6 months
63
+
64
+ >>> cv = WalkForward(test_size=1, train_size=6, freq="MS")
65
+
66
+ * Rebalancing : Quarterly on the first day
67
+ * Test Duration : 1 quarter
68
+ * Train Duration : 2 months
69
+
70
+ >>> cv = WalkForward(test_size=1, train_size=pd.DateOffset(months=2), freq="QS")
71
+
72
+ * Rebalancing : Montly on the third Friday
73
+ * Test Duration : 1 month
74
+ * Train Duration : 6 weeks
75
+
76
+ >>> cv = WalkForward(test_size=1, train_size=pd.offsets.Week(6), freq= "WOM-3FRI")
77
+
78
+ * Rebalancing : Semi-annually on the last day
79
+ * Test Duration : 6 months
80
+ * Train Duration : 1 year
81
+
82
+ >>> cv = WalkForward(test_size=1, train_size=2, freq=pd.offsets.SemiMonthEnd())
83
+
84
+ * Rebalancing : Every 2 months on the second day
85
+ * Test Duration : 2 months
86
+ * Train Duration : 6 months
87
+
88
+ >>> cv = WalkForward(test_size=2, train_size=6, freq="MS", freq_offset=dt.timedelta(days=2))
89
+
90
+ freq_offset : pandas DateOffset | datetime timedelta, optional
91
+ Only used if `freq` is provided. Offsets the `freq` by a pandas DateOffset or a
92
+ datetime timedelta offset.
93
+
94
+ previous : bool, default=False
95
+ Only used if `freq` is provided. If set to `True`, and if the period start
96
+ or period end is not in the `DatetimeIndex`, the previous observation is used;
97
+ otherwise, the next observation is used (default).
98
+
99
+ expend_train : bool, default=False
100
+ If set to `True`, each subsequent training set after the first one will
101
+ use all past observations.
102
+ The default is `False`.
103
+
104
+ reduce_test : bool, default=False
105
+ If set to `True`, the last train/test split will be returned even if the
106
+ test set is partial (i.e., it contains fewer observations than `test_size`),
107
+ otherwise, it will be ignored.
108
+ The default is `False`.
109
+
110
+ purged_size : int, default=0
111
+ The number of observations to exclude from the end of each training set before
112
+ the test set.
113
+ The default value is `0`.
114
+
115
+ Examples
116
+ --------
117
+ >>> import numpy as np
118
+ >>> from skfolio.model_selection import WalkForward
119
+ >>> X = np.random.randn(6, 2)
120
+ >>> cv = WalkForward(test_size=1, train_size=2)
121
+ >>> for i, (train_index, test_index) in enumerate(cv.split(X)):
122
+ ... print(f"Fold {i}:")
123
+ ... print(f" Train: index={train_index}")
124
+ ... print(f" Test: index={test_index}")
125
+ Fold 0:
126
+ Train: index=[0 1]
127
+ Test: index=[2]
128
+ Fold 1:
129
+ Train: index=[1 2]
130
+ Test: index=[3]
131
+ Fold 2:
132
+ Train: index=[2 3]
133
+ Test: index=[4]
134
+ Fold 3:
135
+ Train: index=[3 4]
136
+ Test: index=[5]
137
+ >>> cv = WalkForward(test_size=1, train_size=2, purged_size=1)
138
+ >>> for i, (train_index, test_index) in enumerate(cv.split(X)):
139
+ ... print(f"Fold {i}:")
140
+ ... print(f" Train: index={train_index}")
141
+ ... print(f" Test: index={test_index}")
142
+ Fold 0:
143
+ Train: index=[0 1]
144
+ Test: index=[3]
145
+ Fold 1:
146
+ Train: index=[1 2]
147
+ Test: index=[4]
148
+ Fold 2:
149
+ Train: index=[2 3]
150
+ Test: index=[5]
151
+ >>> cv = WalkForward(test_size=2, train_size=3)
152
+ >>> for i, (train_index, test_index) in enumerate(cv.split(X)):
153
+ ... print(f"Fold {i}:")
154
+ ... print(f" Train: index={train_index}")
155
+ ... print(f" Test: index={test_index}")
156
+ Fold 0:
157
+ Train: index=[0 1 2]
158
+ Test: index=[3 4]
159
+ >>> cv = WalkForward(test_size=2, train_size=3, reduce_test=True)
160
+ >>> for i, (train_index, test_index) in enumerate(cv.split(X)):
161
+ ... print(f"Fold {i}:")
162
+ ... print(f" Train: index={train_index}")
163
+ ... print(f" Test: index={test_index}")
164
+ Fold 0:
165
+ Train: index=[0 1 2]
166
+ Test: index=[3 4]
167
+ Fold 1:
168
+ Train: index=[2 3 4]
169
+ Test: index=[5]
170
+ >>> cv = WalkForward(test_size=2, train_size=3, expend_train=True, reduce_test=True)
171
+ >>> for i, (train_index, test_index) in enumerate(cv.split(X)):
172
+ ... print(f"Fold {i}:")
173
+ ... print(f" Train: index={train_index}")
174
+ ... print(f" Test: index={test_index}")
175
+ Fold 0:
176
+ Train: index=[0 1 2]
177
+ Test: index=[3 4]
178
+ Fold 1:
179
+ Train: index=[0 1 2 3 4]
180
+ Test: index=[5]
181
+ """
182
+
183
+ def __init__(
184
+ self,
185
+ test_size: int,
186
+ train_size: int | pd.offsets.BaseOffset | dt.timedelta,
187
+ freq: str | pd.offsets.BaseOffset | None = None,
188
+ freq_offset: pd.offsets.BaseOffset | dt.timedelta | None = None,
189
+ previous: bool = False,
190
+ expend_train: bool = False,
191
+ reduce_test: bool = False,
192
+ purged_size: int = 0,
193
+ ):
194
+ self.test_size = test_size
195
+ self.train_size = train_size
196
+ self.freq = freq
197
+ self.freq_offset = freq_offset
198
+ self.previous = previous
199
+ self.expend_train = expend_train
200
+ self.reduce_test = reduce_test
201
+ self.purged_size = purged_size
202
+
203
+ def split(
204
+ self, X: npt.ArrayLike, y=None, groups=None
205
+ ) -> Iterator[np.ndarray, np.ndarray]:
206
+ """Generate indices to split data into training and test set.
207
+
208
+ Parameters
209
+ ----------
210
+ X : array-like of shape (n_observations, n_assets)
211
+ Price returns of the assets.
212
+
213
+ y : array-like of shape (n_observations, n_targets)
214
+ Always ignored, exists for compatibility.
215
+
216
+ groups : array-like of shape (n_observations,)
217
+ Always ignored, exists for compatibility.
218
+
219
+ Yields
220
+ ------
221
+ train : ndarray
222
+ The training set indices for that split.
223
+
224
+ test : ndarray
225
+ The testing set indices for that split.
226
+ """
227
+ X, y = sku.indexable(X, y)
228
+ n_samples = X.shape[0]
229
+
230
+ if not isinstance(self.test_size, int):
231
+ raise ValueError("test_size` must be an integer")
232
+
233
+ if self.freq is None:
234
+ if not isinstance(self.train_size, int):
235
+ raise ValueError("When `freq` is None, `train_size` must be an integer")
236
+ return _split_without_period(
237
+ n_samples=n_samples,
238
+ train_size=self.train_size,
239
+ test_size=self.test_size,
240
+ purged_size=self.purged_size,
241
+ expend_train=self.expend_train,
242
+ reduce_test=self.reduce_test,
243
+ )
244
+
245
+ if not hasattr(X, "index") or not isinstance(X.index, pd.DatetimeIndex):
246
+ raise ValueError(
247
+ "X must be a DataFrame with an index of type DatetimeIndex"
248
+ )
249
+ if isinstance(self.train_size, int):
250
+ return _split_from_period_without_train_offset(
251
+ n_samples=n_samples,
252
+ train_size=self.train_size,
253
+ test_size=self.test_size,
254
+ freq=self.freq,
255
+ freq_offset=self.freq_offset,
256
+ previous=self.previous,
257
+ purged_size=self.purged_size,
258
+ expend_train=self.expend_train,
259
+ reduce_test=self.reduce_test,
260
+ ts_index=X.index,
261
+ )
262
+ return _split_from_period_with_train_offset(
263
+ n_samples=n_samples,
264
+ train_size=self.train_size,
265
+ test_size=self.test_size,
266
+ freq=self.freq,
267
+ freq_offset=self.freq_offset,
268
+ previous=self.previous,
269
+ purged_size=self.purged_size,
270
+ expend_train=self.expend_train,
271
+ reduce_test=self.reduce_test,
272
+ ts_index=X.index,
273
+ )
274
+
275
+ def get_n_splits(self, X=None, y=None, groups=None) -> int:
276
+ """Returns the number of splitting iterations in the cross-validator
277
+
278
+ Parameters
279
+ ----------
280
+ X : array-like of shape (n_observations, n_assets)
281
+ Price returns of the assets.
282
+
283
+ y : array-like of shape (n_observations, n_targets)
284
+ Always ignored, exists for compatibility.
285
+
286
+ groups : array-like of shape (n_observations,)
287
+ Always ignored, exists for compatibility.
288
+
289
+ Returns
290
+ -------
291
+ n_folds : int
292
+ Returns the number of splitting iterations in the cross-validator.
293
+ """
294
+ if X is None:
295
+ raise ValueError("The 'X' parameter should not be None.")
296
+ X, y = sku.indexable(X, y)
297
+ n_samples = X.shape[0]
298
+ n = n_samples - self.train_size - self.purged_size
299
+
300
+ if self.reduce_test and n % self.test_size != 0:
301
+ return n // self.test_size + 1
302
+ return n // self.test_size
303
+
304
+
305
+ def _split_without_period(
306
+ n_samples: int,
307
+ train_size: int,
308
+ test_size: int,
309
+ purged_size: int,
310
+ expend_train: bool,
311
+ reduce_test: bool,
312
+ ) -> Iterator[np.ndarray, np.ndarray]:
313
+ if train_size + purged_size >= n_samples:
314
+ raise ValueError(
315
+ "The sum of `train_size` with `purged_size` "
316
+ f"({train_size + purged_size}) cannot be greater than the"
317
+ f" number of samples ({n_samples})."
318
+ )
319
+
320
+ indices = np.arange(n_samples)
321
+
322
+ test_start = train_size + purged_size
323
+ while True:
324
+ if test_start >= n_samples:
325
+ return
326
+ test_end = test_start + test_size
327
+ train_end = test_start - purged_size
328
+ if expend_train:
329
+ train_start = 0
330
+ else:
331
+ train_start = train_end - train_size
332
+
333
+ if test_end > n_samples:
334
+ if not reduce_test:
335
+ return
336
+ test_indices = indices[test_start:]
337
+ else:
338
+ test_indices = indices[test_start:test_end]
339
+ train_indices = indices[train_start:train_end]
340
+ yield train_indices, test_indices
341
+
342
+ test_start = test_end
343
+
344
+
345
+ def _split_from_period_without_train_offset(
346
+ n_samples: int,
347
+ train_size: int,
348
+ test_size: int,
349
+ freq: str,
350
+ freq_offset: pd.offsets.BaseOffset | dt.timedelta | None,
351
+ previous: bool,
352
+ purged_size: int,
353
+ expend_train: bool,
354
+ reduce_test: bool,
355
+ ts_index,
356
+ ) -> Iterator[np.ndarray, np.ndarray]:
357
+ start = ts_index[0]
358
+ end = ts_index[-1]
359
+ if freq_offset is not None:
360
+ start = min(start, start - freq_offset)
361
+
362
+ date_range = pd.date_range(start=start, end=end, freq=freq)
363
+ if freq_offset is not None:
364
+ date_range += freq_offset
365
+
366
+ idx = ts_index.get_indexer(date_range, method="ffill" if previous else "bfill")
367
+ n = len(idx)
368
+ i = 0
369
+ while True:
370
+ if i + train_size >= n:
371
+ return
372
+
373
+ if i + train_size + test_size >= n:
374
+ if not reduce_test:
375
+ return
376
+ test_indices = np.arange(idx[i + train_size], n_samples)
377
+
378
+ else:
379
+ test_indices = np.arange(
380
+ idx[i + train_size], idx[i + train_size + test_size]
381
+ )
382
+ if expend_train:
383
+ train_start = 0
384
+ else:
385
+ train_start = idx[i]
386
+ train_indices = np.arange(train_start, idx[i + train_size] - purged_size)
387
+ yield train_indices, test_indices
388
+
389
+ i += test_size
390
+
391
+
392
+ def _split_from_period_with_train_offset(
393
+ n_samples: int,
394
+ train_size: pd.offsets.BaseOffset | dt.timedelta,
395
+ test_size: int,
396
+ freq: str,
397
+ freq_offset: pd.offsets.BaseOffset | dt.timedelta | None,
398
+ previous: bool,
399
+ purged_size: int,
400
+ expend_train: bool,
401
+ reduce_test: bool,
402
+ ts_index,
403
+ ) -> Iterator[np.ndarray, np.ndarray]:
404
+ start = ts_index[0]
405
+ end = ts_index[-1]
406
+ if freq_offset is not None:
407
+ start = min(start, start - freq_offset)
408
+
409
+ date_range = pd.date_range(start=start, end=end, freq=freq)
410
+ if freq_offset is not None:
411
+ date_range += freq_offset
412
+
413
+ idx = ts_index.get_indexer(date_range, method="ffill" if previous else "bfill")
414
+ train_idx = ts_index.get_indexer(date_range - train_size, method="ffill")
415
+
416
+ n = len(idx)
417
+
418
+ if np.all(train_idx == -1):
419
+ return
420
+
421
+ i = np.argmax(train_idx > -1)
422
+ while True:
423
+ if i >= n:
424
+ return
425
+
426
+ if i + test_size >= n:
427
+ if not reduce_test:
428
+ return
429
+ test_indices = np.arange(idx[i], n_samples)
430
+ else:
431
+ test_indices = np.arange(idx[i], idx[i + test_size] - purged_size)
432
+
433
+ if expend_train:
434
+ train_start = 0
435
+ else:
436
+ train_start = train_idx[i]
437
+ train_indices = np.arange(train_start, idx[i])
438
+ yield train_indices, test_indices
439
+
440
+ i += test_size