skfolio 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/__init__.py +29 -0
- skfolio/cluster/__init__.py +8 -0
- skfolio/cluster/_hierarchical.py +387 -0
- skfolio/datasets/__init__.py +20 -0
- skfolio/datasets/_base.py +389 -0
- skfolio/datasets/data/__init__.py +0 -0
- skfolio/datasets/data/factors_dataset.csv.gz +0 -0
- skfolio/datasets/data/sp500_dataset.csv.gz +0 -0
- skfolio/datasets/data/sp500_index.csv.gz +0 -0
- skfolio/distance/__init__.py +26 -0
- skfolio/distance/_base.py +55 -0
- skfolio/distance/_distance.py +574 -0
- skfolio/exceptions.py +30 -0
- skfolio/measures/__init__.py +76 -0
- skfolio/measures/_enums.py +355 -0
- skfolio/measures/_measures.py +607 -0
- skfolio/metrics/__init__.py +3 -0
- skfolio/metrics/_scorer.py +121 -0
- skfolio/model_selection/__init__.py +18 -0
- skfolio/model_selection/_combinatorial.py +407 -0
- skfolio/model_selection/_validation.py +194 -0
- skfolio/model_selection/_walk_forward.py +221 -0
- skfolio/moments/__init__.py +41 -0
- skfolio/moments/covariance/__init__.py +29 -0
- skfolio/moments/covariance/_base.py +101 -0
- skfolio/moments/covariance/_covariance.py +1108 -0
- skfolio/moments/expected_returns/__init__.py +21 -0
- skfolio/moments/expected_returns/_base.py +31 -0
- skfolio/moments/expected_returns/_expected_returns.py +415 -0
- skfolio/optimization/__init__.py +36 -0
- skfolio/optimization/_base.py +147 -0
- skfolio/optimization/cluster/__init__.py +13 -0
- skfolio/optimization/cluster/_nco.py +348 -0
- skfolio/optimization/cluster/hierarchical/__init__.py +13 -0
- skfolio/optimization/cluster/hierarchical/_base.py +440 -0
- skfolio/optimization/cluster/hierarchical/_herc.py +406 -0
- skfolio/optimization/cluster/hierarchical/_hrp.py +368 -0
- skfolio/optimization/convex/__init__.py +16 -0
- skfolio/optimization/convex/_base.py +1944 -0
- skfolio/optimization/convex/_distributionally_robust.py +392 -0
- skfolio/optimization/convex/_maximum_diversification.py +417 -0
- skfolio/optimization/convex/_mean_risk.py +974 -0
- skfolio/optimization/convex/_risk_budgeting.py +560 -0
- skfolio/optimization/ensemble/__init__.py +6 -0
- skfolio/optimization/ensemble/_base.py +87 -0
- skfolio/optimization/ensemble/_stacking.py +326 -0
- skfolio/optimization/naive/__init__.py +3 -0
- skfolio/optimization/naive/_naive.py +173 -0
- skfolio/population/__init__.py +3 -0
- skfolio/population/_population.py +883 -0
- skfolio/portfolio/__init__.py +13 -0
- skfolio/portfolio/_base.py +1096 -0
- skfolio/portfolio/_multi_period_portfolio.py +610 -0
- skfolio/portfolio/_portfolio.py +842 -0
- skfolio/pre_selection/__init__.py +7 -0
- skfolio/pre_selection/_pre_selection.py +342 -0
- skfolio/preprocessing/__init__.py +3 -0
- skfolio/preprocessing/_returns.py +114 -0
- skfolio/prior/__init__.py +18 -0
- skfolio/prior/_base.py +63 -0
- skfolio/prior/_black_litterman.py +238 -0
- skfolio/prior/_empirical.py +163 -0
- skfolio/prior/_factor_model.py +268 -0
- skfolio/typing.py +50 -0
- skfolio/uncertainty_set/__init__.py +23 -0
- skfolio/uncertainty_set/_base.py +108 -0
- skfolio/uncertainty_set/_bootstrap.py +281 -0
- skfolio/uncertainty_set/_empirical.py +237 -0
- skfolio/utils/__init__.py +0 -0
- skfolio/utils/bootstrap.py +115 -0
- skfolio/utils/equations.py +350 -0
- skfolio/utils/sorting.py +117 -0
- skfolio/utils/stats.py +466 -0
- skfolio/utils/tools.py +567 -0
- skfolio-0.0.1.dist-info/LICENSE +29 -0
- skfolio-0.0.1.dist-info/METADATA +568 -0
- skfolio-0.0.1.dist-info/RECORD +79 -0
- skfolio-0.0.1.dist-info/WHEEL +5 -0
- skfolio-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,389 @@
|
|
1
|
+
"""Datasets module."""
|
2
|
+
|
3
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
4
|
+
# License: BSD 3 clause
|
5
|
+
|
6
|
+
import gzip
|
7
|
+
import os
|
8
|
+
import shutil
|
9
|
+
import urllib.request as ur
|
10
|
+
from importlib import resources
|
11
|
+
from pathlib import Path
|
12
|
+
|
13
|
+
import joblib
|
14
|
+
import pandas as pd
|
15
|
+
|
16
|
+
DATA_MODULE = "skfolio.datasets.data"
|
17
|
+
|
18
|
+
|
19
|
+
def get_data_home(data_home: str | Path | None = None) -> str:
|
20
|
+
"""Return the path of the skfolio data directory.
|
21
|
+
|
22
|
+
This folder is used by some large dataset loaders to avoid downloading the
|
23
|
+
data several times.
|
24
|
+
|
25
|
+
By default, the data directory is set to a folder named 'skfolio_data' in the
|
26
|
+
user home folder.
|
27
|
+
|
28
|
+
Alternatively, it can be set by the 'SKFOLIO_DATA' environment
|
29
|
+
variable or programmatically by giving an explicit folder path. The '~'
|
30
|
+
symbol is expanded to the user home folder.
|
31
|
+
|
32
|
+
If the folder does not already exist, it is automatically created.
|
33
|
+
|
34
|
+
Parameters
|
35
|
+
----------
|
36
|
+
data_home : str, optional
|
37
|
+
The path to skfolio data directory. If `None`, the default path
|
38
|
+
is `~/skfolio_data`.
|
39
|
+
|
40
|
+
Returns
|
41
|
+
-------
|
42
|
+
data_home: str or path-like, optional
|
43
|
+
The path to skfolio data directory.
|
44
|
+
"""
|
45
|
+
if data_home is None:
|
46
|
+
data_home = os.environ.get("SKFOLIO_DATA", os.path.join("~", "skfolio_data"))
|
47
|
+
data_home = os.path.expanduser(data_home)
|
48
|
+
os.makedirs(data_home, exist_ok=True)
|
49
|
+
return data_home
|
50
|
+
|
51
|
+
|
52
|
+
def clear_data_home(data_home: str | Path | None = None) -> None:
|
53
|
+
"""Delete all the content of the data home cache.
|
54
|
+
|
55
|
+
Parameters
|
56
|
+
----------
|
57
|
+
data_home : str or path-like, optional
|
58
|
+
The path to scikit-learn data directory. If `None`, the default path
|
59
|
+
is `~/skfolio_data`.
|
60
|
+
"""
|
61
|
+
data_home = get_data_home(data_home)
|
62
|
+
shutil.rmtree(data_home)
|
63
|
+
|
64
|
+
|
65
|
+
def load_gzip_compressed_csv_data(
|
66
|
+
data_filename: str,
|
67
|
+
data_module: str = DATA_MODULE,
|
68
|
+
encoding="utf-8",
|
69
|
+
datetime_index: bool = True,
|
70
|
+
) -> pd.DataFrame:
|
71
|
+
"""Loads gzip-compressed csv files with `importlib.resources`.
|
72
|
+
|
73
|
+
1) Open resource file with `importlib.resources.open_binary`
|
74
|
+
2) Decompress csv file with `gzip.open`
|
75
|
+
3) Load decompressed data with `pd.read_csv`
|
76
|
+
|
77
|
+
Parameters
|
78
|
+
----------
|
79
|
+
data_filename : str
|
80
|
+
Name of gzip-compressed csv file (`'*.csv.gz'`) to be loaded from
|
81
|
+
`data_module/data_file_name`. For example `'SPX500.csv.gz'`.
|
82
|
+
|
83
|
+
data_module : str or module, default='skfolio.datasets.data'
|
84
|
+
Module where data lives. The default is `'skfolio.datasets.data'`.
|
85
|
+
|
86
|
+
encoding : str, default="utf-8"
|
87
|
+
Name of the encoding that the gzip-decompressed file will be
|
88
|
+
decoded with. The default is 'utf-8'.
|
89
|
+
|
90
|
+
datetime_index: bool, default=True
|
91
|
+
If this is set to True, the DataFrame index is converted to datetime with
|
92
|
+
format="%Y-%m-%d".
|
93
|
+
The default is `True`.
|
94
|
+
|
95
|
+
Returns
|
96
|
+
-------
|
97
|
+
df : DataFrame of shape (n_observations, n_assets)
|
98
|
+
DataFrame with each row representing one observation and each column
|
99
|
+
representing the asset price of a given observation.
|
100
|
+
"""
|
101
|
+
path = resources.files(data_module).joinpath(data_filename)
|
102
|
+
with path.open("rb") as compressed_file:
|
103
|
+
compressed_file = gzip.open(compressed_file, mode="rt", encoding=encoding)
|
104
|
+
df = pd.read_csv(compressed_file, sep=",", index_col=0)
|
105
|
+
if datetime_index:
|
106
|
+
df.index = pd.to_datetime(df.index, format="%Y-%m-%d")
|
107
|
+
return df
|
108
|
+
|
109
|
+
|
110
|
+
def download_dataset(
|
111
|
+
data_filename: str,
|
112
|
+
data_home: str | Path | None = None,
|
113
|
+
download_if_missing: bool = True,
|
114
|
+
) -> pd.DataFrame:
|
115
|
+
"""Download and save locally a dataset from the remote GitHub dataset folder.
|
116
|
+
|
117
|
+
Parameters
|
118
|
+
----------
|
119
|
+
data_filename : str
|
120
|
+
Name of gzip-compressed csv file (`'*.csv.gz'`) to be loaded from a remote
|
121
|
+
GitHub dataset folder.
|
122
|
+
|
123
|
+
data_home : str or path-like, optional
|
124
|
+
Specify another download and cache folder for the datasets. By default,
|
125
|
+
all skfolio data is stored in `~/skfolio_data` sub-folders.
|
126
|
+
|
127
|
+
download_if_missing : bool, default=True
|
128
|
+
If False, raise an OSError if the data is not locally available
|
129
|
+
instead of trying to download the data from the source site.
|
130
|
+
The default is `True`.
|
131
|
+
|
132
|
+
Returns
|
133
|
+
-------
|
134
|
+
df : DataFrame of shape (n_observations, n_assets)
|
135
|
+
DataFrame with each row representing one observation and each column
|
136
|
+
representing the asset price of a given observation.
|
137
|
+
"""
|
138
|
+
url = (
|
139
|
+
"https://github.com/HugoDelatte/portfolio-optimization/raw/main/datasets/"
|
140
|
+
f"{data_filename}.csv.gz"
|
141
|
+
)
|
142
|
+
|
143
|
+
data_home = get_data_home(data_home=data_home)
|
144
|
+
filepath = os.path.join(data_home, f"{data_filename}.pkz")
|
145
|
+
|
146
|
+
if os.path.exists(filepath):
|
147
|
+
return joblib.load(filepath)
|
148
|
+
|
149
|
+
if not download_if_missing:
|
150
|
+
raise OSError("Data not found and `download_if_missing` is False")
|
151
|
+
|
152
|
+
archive_path = os.path.join(data_home, os.path.basename(url))
|
153
|
+
ur.urlretrieve(url, archive_path)
|
154
|
+
df = load_gzip_compressed_csv_data(archive_path)
|
155
|
+
joblib.dump(df, filepath, compress=6)
|
156
|
+
os.remove(archive_path)
|
157
|
+
return df
|
158
|
+
|
159
|
+
|
160
|
+
def load_sp500_dataset() -> pd.DataFrame:
|
161
|
+
"""Load the prices of 20 assets from the S&P 500 Index composition.
|
162
|
+
|
163
|
+
This dataset is composed of the daily prices of 20 assets from the S&P 500
|
164
|
+
composition starting from 1990-01-02 up to 2022-12-28.
|
165
|
+
|
166
|
+
The data comes from the Yahoo public API.
|
167
|
+
The price is the adjusted close which is the closing price after adjustments for
|
168
|
+
all applicable splits and dividend distributions.
|
169
|
+
The adjustment uses appropriate split and dividend multipliers, adhering to
|
170
|
+
the Center for Research in Security Prices (CRSP) standards.
|
171
|
+
|
172
|
+
============== ==================
|
173
|
+
Observations 8313
|
174
|
+
Assets 20
|
175
|
+
============== ==================
|
176
|
+
|
177
|
+
Returns
|
178
|
+
-------
|
179
|
+
df : DataFrame of shape (n_observations, n_assets)
|
180
|
+
Prices DataFrame
|
181
|
+
|
182
|
+
Examples
|
183
|
+
--------
|
184
|
+
>>> from skfolio.datasets import load_sp500_dataset
|
185
|
+
>>> prices = load_sp500_dataset()
|
186
|
+
>>> prices.head()
|
187
|
+
AAPL AMD BAC ... UNH WMT XOM
|
188
|
+
1990-01-02 0.332589 4.1250 11.65625 ... 0.382813 5.890625 12.5000
|
189
|
+
1990-01-03 0.334821 4.0000 11.75000 ... 0.375000 5.890625 12.3750
|
190
|
+
1990-01-04 0.335938 3.9375 11.50000 ... 0.371094 5.859375 12.2500
|
191
|
+
1990-01-05 0.337054 3.8125 11.25000 ... 0.355469 5.796875 12.1875
|
192
|
+
1990-01-08 0.339286 3.8125 11.31250 ... 0.347656 5.875000 12.3750
|
193
|
+
"""
|
194
|
+
data_filename = "sp500_dataset.csv.gz"
|
195
|
+
df = load_gzip_compressed_csv_data(data_filename)
|
196
|
+
return df
|
197
|
+
|
198
|
+
|
199
|
+
def load_sp500_index() -> pd.DataFrame:
|
200
|
+
"""Load the prices of the S&P 500 Index.
|
201
|
+
|
202
|
+
This dataset is composed of the daily prices of the S&P 500 Index starting from
|
203
|
+
1990-01-02 up to 2022-12-28.
|
204
|
+
|
205
|
+
The data comes from the Yahoo public API.
|
206
|
+
The price is the adjusted close which is the closing price after adjustments for
|
207
|
+
all applicable splits and dividend distributions.
|
208
|
+
The adjustment uses appropriate split and dividend multipliers, adhering to
|
209
|
+
the Center for Research in Security Prices (CRSP) standards.
|
210
|
+
|
211
|
+
============== ==================
|
212
|
+
Observations 8313
|
213
|
+
Assets 1
|
214
|
+
============== ==================
|
215
|
+
|
216
|
+
Returns
|
217
|
+
-------
|
218
|
+
df : DataFrame of shape (n_observations, n_assets)
|
219
|
+
Prices DataFrame
|
220
|
+
|
221
|
+
Examples
|
222
|
+
--------
|
223
|
+
>>> from skfolio.datasets import load_sp500_index
|
224
|
+
>>> prices = load_sp500_index()
|
225
|
+
>>> prices.head()
|
226
|
+
SP500
|
227
|
+
Date
|
228
|
+
1990-01-02 359.69
|
229
|
+
1990-01-03 358.76
|
230
|
+
1990-01-04 355.67
|
231
|
+
1990-01-05 352.20
|
232
|
+
1990-01-08 353.79
|
233
|
+
"""
|
234
|
+
data_filename = "sp500_index.csv.gz"
|
235
|
+
df = load_gzip_compressed_csv_data(data_filename)
|
236
|
+
return df
|
237
|
+
|
238
|
+
|
239
|
+
def load_factors_dataset() -> pd.DataFrame:
|
240
|
+
"""Load the prices of 5 factor ETFs.
|
241
|
+
|
242
|
+
This dataset is composed of the daily prices of 5 ETF representing common factors
|
243
|
+
starting from 2014-01-02 up to 2022-12-28.
|
244
|
+
|
245
|
+
The factors are:
|
246
|
+
|
247
|
+
* "MTUM": Momentum
|
248
|
+
* "QUAL": Quanlity
|
249
|
+
* "SIZE": Size
|
250
|
+
* "VLUE": Value
|
251
|
+
* "USMV": low volatility
|
252
|
+
|
253
|
+
The data comes from the Yahoo public API.
|
254
|
+
The price is the adjusted close which is the closing price after adjustments for
|
255
|
+
all applicable splits and dividend distributions.
|
256
|
+
The adjustment uses appropriate split and dividend multipliers, adhering to
|
257
|
+
the Center for Research in Security Prices (CRSP) standards.
|
258
|
+
|
259
|
+
============== ==================
|
260
|
+
Observations 2264
|
261
|
+
Assets 5
|
262
|
+
============== ==================
|
263
|
+
|
264
|
+
Returns
|
265
|
+
-------
|
266
|
+
df : DataFrame of shape (n_observations, n_assets)
|
267
|
+
Prices DataFrame
|
268
|
+
|
269
|
+
Examples
|
270
|
+
--------
|
271
|
+
>>> from skfolio.datasets import load_factors_dataset
|
272
|
+
>>> prices = load_factors_dataset()
|
273
|
+
>>> prices.head()
|
274
|
+
MTUM QUAL SIZE USMV VLUE
|
275
|
+
Date
|
276
|
+
2014-01-02 52.704 48.351 48.986 29.338 47.054
|
277
|
+
2014-01-03 52.792 48.256 48.722 29.330 46.999
|
278
|
+
2014-01-06 52.677 48.067 48.722 29.263 46.991
|
279
|
+
2014-01-07 53.112 48.455 48.731 29.430 47.253
|
280
|
+
2014-01-08 53.502 48.437 48.731 29.422 47.253
|
281
|
+
"""
|
282
|
+
data_filename = "factors_dataset.csv.gz"
|
283
|
+
df = load_gzip_compressed_csv_data(data_filename)
|
284
|
+
return df
|
285
|
+
|
286
|
+
|
287
|
+
def load_ftse100_dataset(data_home=None, download_if_missing=True) -> pd.DataFrame:
|
288
|
+
"""Load the prices of 64 assets from the FTSE 100 Index composition.
|
289
|
+
|
290
|
+
This dataset is composed of the daily prices of 64 assets from the FTSE 100 Index
|
291
|
+
starting from 2000-01-04 up to 2023-05-31.
|
292
|
+
|
293
|
+
The data comes from the Yahoo public API.
|
294
|
+
The price is the adjusted close which is the closing price after adjustments for
|
295
|
+
all applicable splits and dividend distributions.
|
296
|
+
The adjustment uses appropriate split and dividend multipliers, adhering to
|
297
|
+
the Center for Research in Security Prices (CRSP) standards.
|
298
|
+
The data contains NaN.
|
299
|
+
|
300
|
+
============== ==================
|
301
|
+
Observations 5960
|
302
|
+
Assets 64
|
303
|
+
============== ==================
|
304
|
+
|
305
|
+
Parameters
|
306
|
+
----------
|
307
|
+
data_home : str, optional
|
308
|
+
Specify another download and cache folder for the datasets.
|
309
|
+
By default, all skfolio data is stored in `~/skfolio_data` subfolders.
|
310
|
+
|
311
|
+
download_if_missing : bool, default=True
|
312
|
+
If False, raise an OSError if the data is not locally available
|
313
|
+
instead of trying to download the data from the source site.
|
314
|
+
|
315
|
+
Returns
|
316
|
+
-------
|
317
|
+
df : DataFrame of shape (n_observations, n_assets)
|
318
|
+
Prices DataFrame
|
319
|
+
|
320
|
+
Examples
|
321
|
+
--------
|
322
|
+
>>> from skfolio.datasets import load_ftse100_dataset
|
323
|
+
>>> prices = load_ftse100_dataset()
|
324
|
+
>>> prices.head()
|
325
|
+
AAL.L ABF.L AHT.L ANTO.L ... VOD.L WEIR.L WPP.L WTB.L
|
326
|
+
Date ...
|
327
|
+
2000-01-04 535.354 205.926 97.590 40.313 ... 72.562 115.240 512.249 382.907
|
328
|
+
2000-01-05 540.039 209.185 96.729 40.313 ... 69.042 118.483 462.080 381.972
|
329
|
+
2000-01-06 553.289 229.048 95.581 40.452 ... 66.950 124.220 458.119 386.337
|
330
|
+
2000-01-07 572.829 222.220 95.581 40.452 ... 70.716 121.725 475.283 405.046
|
331
|
+
2000-01-10 578.852 224.548 92.711 40.685 ... 74.285 121.476 498.254 392.885
|
332
|
+
"""
|
333
|
+
data_filename = "ftse100_dataset"
|
334
|
+
df = download_dataset(
|
335
|
+
data_filename, data_home=data_home, download_if_missing=download_if_missing
|
336
|
+
)
|
337
|
+
return df
|
338
|
+
|
339
|
+
|
340
|
+
def load_nasdaq_dataset(data_home=None, download_if_missing=True) -> pd.DataFrame:
|
341
|
+
"""Load the prices of 1455 assets from the NASDAQ Composite Index.
|
342
|
+
|
343
|
+
This dataset is composed of the daily prices of 1455 assets from the NASDAQ
|
344
|
+
Composite starting from 2018-01-02 up to 2023-05-31.
|
345
|
+
|
346
|
+
The data comes from the Yahoo public API.
|
347
|
+
The price is the adjusted close which is the closing price after adjustments for
|
348
|
+
all applicable splits and dividend distributions.
|
349
|
+
The adjustment uses appropriate split and dividend multipliers, adhering to
|
350
|
+
the Center for Research in Security Prices (CRSP) standards.
|
351
|
+
|
352
|
+
============== ==================
|
353
|
+
Observations 1362
|
354
|
+
Assets 1455
|
355
|
+
============== ==================
|
356
|
+
|
357
|
+
Parameters
|
358
|
+
----------
|
359
|
+
data_home : str, optional
|
360
|
+
Specify another download and cache folder for the datasets.
|
361
|
+
By default, all skfolio data is stored in `~/skfolio_data` subfolders.
|
362
|
+
|
363
|
+
download_if_missing : bool, default=True
|
364
|
+
If False, raise an OSError if the data is not locally available
|
365
|
+
instead of trying to download the data from the source site.
|
366
|
+
|
367
|
+
Returns
|
368
|
+
-------
|
369
|
+
df : DataFrame of shape (n_observations, n_assets)
|
370
|
+
Prices DataFrame
|
371
|
+
|
372
|
+
Examples
|
373
|
+
--------
|
374
|
+
>>> from skfolio.datasets import load_nasdaq_dataset
|
375
|
+
>>> prices = load_nasdaq_dataset()
|
376
|
+
>>> prices.head()
|
377
|
+
AAL AAOI AAON AAPL ... ZVRA ZYME ZYNE ZYXI
|
378
|
+
Date ...
|
379
|
+
2018-01-02 51.648 37.91 35.621 41.310 ... 66.4 7.933 12.995 2.922
|
380
|
+
2018-01-03 51.014 37.89 36.247 41.303 ... 72.8 7.965 13.460 2.913
|
381
|
+
2018-01-04 51.336 38.38 36.103 41.495 ... 78.4 8.430 12.700 2.869
|
382
|
+
2018-01-05 51.316 38.89 36.681 41.967 ... 77.6 8.400 12.495 2.780
|
383
|
+
2018-01-08 50.809 38.37 36.103 41.811 ... 82.4 8.310 12.550 2.825
|
384
|
+
"""
|
385
|
+
data_filename = "nasdaq_dataset"
|
386
|
+
df = download_dataset(
|
387
|
+
data_filename, data_home=data_home, download_if_missing=download_if_missing
|
388
|
+
)
|
389
|
+
return df
|
File without changes
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,26 @@
|
|
1
|
+
"""Distance Estimators."""
|
2
|
+
|
3
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
4
|
+
# License: BSD 3 clause
|
5
|
+
|
6
|
+
from skfolio.distance._base import BaseDistance
|
7
|
+
from skfolio.distance._distance import (
|
8
|
+
CovarianceDistance,
|
9
|
+
DistanceCorrelation,
|
10
|
+
KendallDistance,
|
11
|
+
MutualInformation,
|
12
|
+
NBinsMethod,
|
13
|
+
PearsonDistance,
|
14
|
+
SpearmanDistance,
|
15
|
+
)
|
16
|
+
|
17
|
+
__all__ = [
|
18
|
+
"BaseDistance",
|
19
|
+
"PearsonDistance",
|
20
|
+
"KendallDistance",
|
21
|
+
"SpearmanDistance",
|
22
|
+
"CovarianceDistance",
|
23
|
+
"DistanceCorrelation",
|
24
|
+
"MutualInformation",
|
25
|
+
"NBinsMethod",
|
26
|
+
]
|
@@ -0,0 +1,55 @@
|
|
1
|
+
"""Base Distance Estimators"""
|
2
|
+
|
3
|
+
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
4
|
+
# License: BSD 3 clause
|
5
|
+
|
6
|
+
from abc import ABC, abstractmethod
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
import numpy.typing as npt
|
10
|
+
import sklearn.base as skb
|
11
|
+
|
12
|
+
|
13
|
+
class BaseDistance(skb.BaseEstimator, ABC):
|
14
|
+
"""Base class for all distance estimators in skfolio.
|
15
|
+
|
16
|
+
Notes
|
17
|
+
-----
|
18
|
+
All estimators should specify all the parameters that can be set
|
19
|
+
at the class level in their ``__init__`` as explicit keyword
|
20
|
+
arguments (no ``*args`` or ``**kwargs``).
|
21
|
+
|
22
|
+
Attributes
|
23
|
+
----------
|
24
|
+
codependence_ : ndarray of shape (n_assets, n_assets)
|
25
|
+
Codependence matrix.
|
26
|
+
|
27
|
+
distance_ : ndarray of shape (n_assets, n_assets)
|
28
|
+
Distance matrix.
|
29
|
+
"""
|
30
|
+
|
31
|
+
codependence_: np.ndarray
|
32
|
+
distance_: np.ndarray
|
33
|
+
|
34
|
+
@abstractmethod
|
35
|
+
def __init__(self):
|
36
|
+
pass
|
37
|
+
|
38
|
+
@abstractmethod
|
39
|
+
def fit(self, X: npt.ArrayLike, y=None) -> "BaseDistance":
|
40
|
+
"""Fit the Distance estimator.
|
41
|
+
|
42
|
+
Parameters
|
43
|
+
----------
|
44
|
+
X : array-like of shape (n_observations, n_assets)
|
45
|
+
Price returns of the assets.
|
46
|
+
|
47
|
+
y : Ignored
|
48
|
+
Not used, present for API consistency by convention.
|
49
|
+
|
50
|
+
Returns
|
51
|
+
-------
|
52
|
+
self : BaseDistance
|
53
|
+
Fitted estimator.
|
54
|
+
"""
|
55
|
+
pass
|