skfolio 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/__init__.py +7 -7
- skfolio/cluster/__init__.py +2 -2
- skfolio/cluster/_hierarchical.py +2 -2
- skfolio/datasets/__init__.py +3 -3
- skfolio/datasets/_base.py +2 -2
- skfolio/datasets/data/__init__.py +1 -0
- skfolio/distance/__init__.py +4 -4
- skfolio/distance/_base.py +2 -2
- skfolio/distance/_distance.py +11 -10
- skfolio/distribution/__init__.py +56 -0
- skfolio/distribution/_base.py +203 -0
- skfolio/distribution/copula/__init__.py +35 -0
- skfolio/distribution/copula/_base.py +456 -0
- skfolio/distribution/copula/_clayton.py +539 -0
- skfolio/distribution/copula/_gaussian.py +407 -0
- skfolio/distribution/copula/_gumbel.py +560 -0
- skfolio/distribution/copula/_independent.py +196 -0
- skfolio/distribution/copula/_joe.py +609 -0
- skfolio/distribution/copula/_selection.py +111 -0
- skfolio/distribution/copula/_student_t.py +486 -0
- skfolio/distribution/copula/_utils.py +509 -0
- skfolio/distribution/multivariate/__init__.py +11 -0
- skfolio/distribution/multivariate/_base.py +241 -0
- skfolio/distribution/multivariate/_utils.py +632 -0
- skfolio/distribution/multivariate/_vine_copula.py +1254 -0
- skfolio/distribution/univariate/__init__.py +19 -0
- skfolio/distribution/univariate/_base.py +308 -0
- skfolio/distribution/univariate/_gaussian.py +136 -0
- skfolio/distribution/univariate/_johnson_su.py +152 -0
- skfolio/distribution/univariate/_normal_inverse_gaussian.py +153 -0
- skfolio/distribution/univariate/_selection.py +85 -0
- skfolio/distribution/univariate/_student_t.py +144 -0
- skfolio/exceptions.py +8 -8
- skfolio/measures/__init__.py +24 -24
- skfolio/measures/_enums.py +7 -7
- skfolio/measures/_measures.py +4 -7
- skfolio/metrics/__init__.py +2 -0
- skfolio/metrics/_scorer.py +4 -4
- skfolio/model_selection/__init__.py +4 -4
- skfolio/model_selection/_combinatorial.py +15 -12
- skfolio/model_selection/_validation.py +2 -2
- skfolio/model_selection/_walk_forward.py +3 -3
- skfolio/moments/__init__.py +11 -11
- skfolio/moments/covariance/__init__.py +6 -6
- skfolio/moments/covariance/_base.py +1 -1
- skfolio/moments/covariance/_denoise_covariance.py +3 -2
- skfolio/moments/covariance/_detone_covariance.py +3 -2
- skfolio/moments/covariance/_empirical_covariance.py +3 -2
- skfolio/moments/covariance/_ew_covariance.py +3 -2
- skfolio/moments/covariance/_gerber_covariance.py +3 -2
- skfolio/moments/covariance/_graphical_lasso_cv.py +1 -1
- skfolio/moments/covariance/_implied_covariance.py +3 -8
- skfolio/moments/covariance/_ledoit_wolf.py +1 -1
- skfolio/moments/covariance/_oas.py +1 -1
- skfolio/moments/covariance/_shrunk_covariance.py +1 -1
- skfolio/moments/expected_returns/__init__.py +2 -2
- skfolio/moments/expected_returns/_base.py +1 -1
- skfolio/moments/expected_returns/_empirical_mu.py +3 -2
- skfolio/moments/expected_returns/_equilibrium_mu.py +3 -2
- skfolio/moments/expected_returns/_ew_mu.py +3 -2
- skfolio/moments/expected_returns/_shrunk_mu.py +4 -3
- skfolio/optimization/__init__.py +12 -10
- skfolio/optimization/_base.py +2 -2
- skfolio/optimization/cluster/__init__.py +3 -1
- skfolio/optimization/cluster/_nco.py +10 -9
- skfolio/optimization/cluster/hierarchical/__init__.py +3 -1
- skfolio/optimization/cluster/hierarchical/_base.py +1 -2
- skfolio/optimization/cluster/hierarchical/_herc.py +4 -3
- skfolio/optimization/cluster/hierarchical/_hrp.py +4 -3
- skfolio/optimization/convex/__init__.py +5 -3
- skfolio/optimization/convex/_base.py +10 -9
- skfolio/optimization/convex/_distributionally_robust.py +8 -5
- skfolio/optimization/convex/_maximum_diversification.py +8 -6
- skfolio/optimization/convex/_mean_risk.py +10 -8
- skfolio/optimization/convex/_risk_budgeting.py +6 -4
- skfolio/optimization/ensemble/__init__.py +2 -0
- skfolio/optimization/ensemble/_base.py +2 -2
- skfolio/optimization/ensemble/_stacking.py +3 -3
- skfolio/optimization/naive/__init__.py +3 -1
- skfolio/optimization/naive/_naive.py +4 -3
- skfolio/population/__init__.py +2 -0
- skfolio/population/_population.py +34 -7
- skfolio/portfolio/__init__.py +1 -1
- skfolio/portfolio/_base.py +43 -8
- skfolio/portfolio/_multi_period_portfolio.py +3 -2
- skfolio/portfolio/_portfolio.py +5 -4
- skfolio/pre_selection/__init__.py +3 -1
- skfolio/pre_selection/_drop_correlated.py +3 -3
- skfolio/pre_selection/_select_complete.py +31 -30
- skfolio/pre_selection/_select_k_extremes.py +3 -3
- skfolio/pre_selection/_select_non_dominated.py +3 -3
- skfolio/pre_selection/_select_non_expiring.py +8 -6
- skfolio/preprocessing/__init__.py +2 -0
- skfolio/preprocessing/_returns.py +2 -2
- skfolio/prior/__init__.py +7 -3
- skfolio/prior/_base.py +2 -2
- skfolio/prior/_black_litterman.py +7 -4
- skfolio/prior/_empirical.py +5 -2
- skfolio/prior/_factor_model.py +10 -5
- skfolio/prior/_synthetic_data.py +239 -0
- skfolio/synthetic_returns/__init__.py +1 -0
- skfolio/typing.py +7 -7
- skfolio/uncertainty_set/__init__.py +7 -5
- skfolio/uncertainty_set/_base.py +5 -4
- skfolio/uncertainty_set/_bootstrap.py +1 -1
- skfolio/uncertainty_set/_empirical.py +1 -1
- skfolio/utils/__init__.py +1 -0
- skfolio/utils/bootstrap.py +2 -2
- skfolio/utils/equations.py +13 -10
- skfolio/utils/sorting.py +2 -2
- skfolio/utils/stats.py +15 -15
- skfolio/utils/tools.py +86 -22
- {skfolio-0.6.0.dist-info → skfolio-0.8.0.dist-info}/METADATA +122 -46
- skfolio-0.8.0.dist-info/RECORD +120 -0
- {skfolio-0.6.0.dist-info → skfolio-0.8.0.dist-info}/WHEEL +1 -1
- skfolio-0.6.0.dist-info/RECORD +0 -95
- {skfolio-0.6.0.dist-info → skfolio-0.8.0.dist-info/licenses}/LICENSE +0 -0
- {skfolio-0.6.0.dist-info → skfolio-0.8.0.dist-info}/top_level.txt +0 -0
@@ -4,7 +4,7 @@ A population is a collection of portfolios.
|
|
4
4
|
|
5
5
|
# Copyright (c) 2023
|
6
6
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
7
|
-
# License: BSD
|
7
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
8
8
|
|
9
9
|
import inspect
|
10
10
|
from typing import Any
|
@@ -14,6 +14,7 @@ import pandas as pd
|
|
14
14
|
import plotly.express as px
|
15
15
|
import plotly.graph_objects as go
|
16
16
|
import scipy.interpolate as sci
|
17
|
+
import scipy.stats as st
|
17
18
|
|
18
19
|
import skfolio.typing as skt
|
19
20
|
from skfolio.measures import RatioMeasure
|
@@ -285,7 +286,7 @@ class Population(list):
|
|
285
286
|
measure: skt.Measure,
|
286
287
|
q: float,
|
287
288
|
) -> BasePortfolio:
|
288
|
-
"""
|
289
|
+
"""Return the portfolio corresponding to the `q` quantile for a given portfolio
|
289
290
|
measure.
|
290
291
|
|
291
292
|
Parameters
|
@@ -311,7 +312,7 @@ class Population(list):
|
|
311
312
|
self,
|
312
313
|
measure: skt.Measure,
|
313
314
|
) -> BasePortfolio:
|
314
|
-
"""
|
315
|
+
"""Return the portfolio with the minimum measure.
|
315
316
|
|
316
317
|
Parameters
|
317
318
|
----------
|
@@ -329,7 +330,7 @@ class Population(list):
|
|
329
330
|
self,
|
330
331
|
measure: skt.Measure,
|
331
332
|
) -> BasePortfolio:
|
332
|
-
"""
|
333
|
+
"""Return the portfolio with the maximum measure.
|
333
334
|
|
334
335
|
Parameters
|
335
336
|
----------
|
@@ -347,7 +348,7 @@ class Population(list):
|
|
347
348
|
self,
|
348
349
|
formatted: bool = True,
|
349
350
|
) -> pd.DataFrame:
|
350
|
-
"""Summary of the portfolios in the population
|
351
|
+
"""Summary of the portfolios in the population.
|
351
352
|
|
352
353
|
Parameters
|
353
354
|
----------
|
@@ -361,7 +362,6 @@ class Population(list):
|
|
361
362
|
summary : pandas DataFrame
|
362
363
|
The population's portfolios summary
|
363
364
|
"""
|
364
|
-
|
365
365
|
df = pd.concat(
|
366
366
|
[p.summary(formatted=formatted) for p in self],
|
367
367
|
keys=[p.name for p in self],
|
@@ -473,7 +473,6 @@ class Population(list):
|
|
473
473
|
dataframe : pandas DataFrame
|
474
474
|
The rolling measures.
|
475
475
|
"""
|
476
|
-
|
477
476
|
rolling_measures = []
|
478
477
|
names = []
|
479
478
|
for ptf in self:
|
@@ -942,6 +941,34 @@ class Population(list):
|
|
942
941
|
)
|
943
942
|
return fig
|
944
943
|
|
944
|
+
def plot_returns_distribution(self) -> go.Figure:
|
945
|
+
"""Plot the Portfolios returns distribution using Gaussian KDE.
|
946
|
+
|
947
|
+
Returns
|
948
|
+
-------
|
949
|
+
plot : Figure
|
950
|
+
Returns the plot Figure object
|
951
|
+
"""
|
952
|
+
traces = []
|
953
|
+
for ptf in self:
|
954
|
+
lower = np.percentile(ptf.returns, 1e-1)
|
955
|
+
upper = np.percentile(ptf.returns, 100 - 1e-1)
|
956
|
+
x = np.linspace(lower, upper, 500)
|
957
|
+
y = st.gaussian_kde(ptf.returns)(x)
|
958
|
+
traces.append(
|
959
|
+
go.Scatter(x=x, y=y, mode="lines", fill="tozeroy", name=ptf.name)
|
960
|
+
)
|
961
|
+
fig = go.Figure(traces)
|
962
|
+
fig.update_layout(
|
963
|
+
title="Returns Distribution",
|
964
|
+
xaxis_title="Returns",
|
965
|
+
yaxis_title="Probability Density",
|
966
|
+
)
|
967
|
+
fig.update_xaxes(
|
968
|
+
tickformat=".0%",
|
969
|
+
)
|
970
|
+
return fig
|
971
|
+
|
945
972
|
|
946
973
|
def _ptf_name_with_tag(portfolio: BasePortfolio) -> str:
|
947
974
|
if portfolio.tag is None:
|
skfolio/portfolio/__init__.py
CHANGED
@@ -10,4 +10,4 @@ from skfolio.portfolio._base import BasePortfolio
|
|
10
10
|
from skfolio.portfolio._multi_period_portfolio import MultiPeriodPortfolio
|
11
11
|
from skfolio.portfolio._portfolio import Portfolio
|
12
12
|
|
13
|
-
__all__ = ["BasePortfolio", "
|
13
|
+
__all__ = ["BasePortfolio", "MultiPeriodPortfolio", "Portfolio"]
|
skfolio/portfolio/_base.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
"""Base Portfolio module"""
|
1
|
+
"""Base Portfolio module."""
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
|
7
7
|
# The Portfolio class contains more than 40 measures than can be computationally
|
8
8
|
# expensive. The use of __slots__ instead of __dict__ is based on the following
|
@@ -45,6 +45,7 @@ import numpy as np
|
|
45
45
|
import pandas as pd
|
46
46
|
import plotly.express as px
|
47
47
|
import plotly.graph_objects as go
|
48
|
+
import scipy.stats as st
|
48
49
|
|
49
50
|
import skfolio.typing as skt
|
50
51
|
from skfolio import measures as mt
|
@@ -389,6 +390,7 @@ class BasePortfolio:
|
|
389
390
|
"edar_beta",
|
390
391
|
}
|
391
392
|
|
393
|
+
# ruff: noqa: RUF023
|
392
394
|
__slots__ = {
|
393
395
|
# public
|
394
396
|
"tag",
|
@@ -612,14 +614,14 @@ class BasePortfolio:
|
|
612
614
|
@property
|
613
615
|
@abstractmethod
|
614
616
|
def composition(self) -> pd.DataFrame:
|
615
|
-
"""DataFrame of the Portfolio composition"""
|
617
|
+
"""DataFrame of the Portfolio composition."""
|
616
618
|
pass
|
617
619
|
|
618
620
|
@abstractmethod
|
619
621
|
def contribution(
|
620
622
|
self, measure: skt.Measure, spacing: float | None = None, to_df: bool = True
|
621
623
|
) -> np.ndarray | pd.DataFrame:
|
622
|
-
"""Compute the contribution of each asset to a given measure"""
|
624
|
+
"""Compute the contribution of each asset to a given measure."""
|
623
625
|
pass
|
624
626
|
|
625
627
|
# Custom attribute setter and getter
|
@@ -653,7 +655,7 @@ class BasePortfolio:
|
|
653
655
|
# Custom attribute getter (read-only and cached)
|
654
656
|
@cached_property_slots
|
655
657
|
def fitness(self) -> np.ndarray:
|
656
|
-
"""
|
658
|
+
"""Portfolio fitness."""
|
657
659
|
res = []
|
658
660
|
for measure in self.fitness_measures:
|
659
661
|
if isinstance(measure, PerfMeasure | RatioMeasure):
|
@@ -678,7 +680,7 @@ class BasePortfolio:
|
|
678
680
|
# Classic property
|
679
681
|
@property
|
680
682
|
def n_observations(self) -> int:
|
681
|
-
"""Number of observations"""
|
683
|
+
"""Number of observations."""
|
682
684
|
return len(self.observations)
|
683
685
|
|
684
686
|
@property
|
@@ -708,7 +710,7 @@ class BasePortfolio:
|
|
708
710
|
return self.__copy__()
|
709
711
|
|
710
712
|
def clear(self) -> None:
|
711
|
-
"""Clear all measures, fitness, cumulative returns and drawdowns in slots"""
|
713
|
+
"""Clear all measures, fitness, cumulative returns and drawdowns in slots."""
|
712
714
|
attrs = ["_fitness", "_cumulative_returns", "_drawdowns"]
|
713
715
|
for attr in attrs + list(_MEASURES_VALUES):
|
714
716
|
delattr(self, attr)
|
@@ -1006,7 +1008,7 @@ class BasePortfolio:
|
|
1006
1008
|
return fig
|
1007
1009
|
|
1008
1010
|
def plot_returns(self, idx: slice | np.ndarray | None = None) -> go.Figure:
|
1009
|
-
"""Plot the Portfolio returns
|
1011
|
+
"""Plot the Portfolio returns.
|
1010
1012
|
|
1011
1013
|
Parameters
|
1012
1014
|
----------
|
@@ -1030,6 +1032,39 @@ class BasePortfolio:
|
|
1030
1032
|
)
|
1031
1033
|
return fig
|
1032
1034
|
|
1035
|
+
def plot_returns_distribution(self) -> go.Figure:
|
1036
|
+
"""Plot the Portfolio returns distribution using Gaussian KDE.
|
1037
|
+
|
1038
|
+
Returns
|
1039
|
+
-------
|
1040
|
+
plot : Figure
|
1041
|
+
Returns the plot Figure object
|
1042
|
+
"""
|
1043
|
+
lower = np.percentile(self.returns, 1e-1)
|
1044
|
+
upper = np.percentile(self.returns, 100 - 1e-1)
|
1045
|
+
x = np.linspace(lower, upper, 500)
|
1046
|
+
y = st.gaussian_kde(self.returns)(x)
|
1047
|
+
|
1048
|
+
fig = go.Figure(
|
1049
|
+
go.Scatter(
|
1050
|
+
x=x,
|
1051
|
+
y=y,
|
1052
|
+
mode="lines",
|
1053
|
+
fill="tozeroy",
|
1054
|
+
)
|
1055
|
+
)
|
1056
|
+
|
1057
|
+
fig.update_layout(
|
1058
|
+
title="Returns Distribution",
|
1059
|
+
xaxis_title="Returns",
|
1060
|
+
yaxis_title="Probability Density",
|
1061
|
+
showlegend=False,
|
1062
|
+
)
|
1063
|
+
fig.update_xaxes(
|
1064
|
+
tickformat=".0%",
|
1065
|
+
)
|
1066
|
+
return fig
|
1067
|
+
|
1033
1068
|
def plot_rolling_measure(
|
1034
1069
|
self,
|
1035
1070
|
measure: skt.Measure = RatioMeasure.SHARPE_RATIO,
|
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
# Copyright (c) 2023
|
7
7
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
8
|
-
# License: BSD
|
8
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
9
9
|
|
10
10
|
import numbers
|
11
11
|
from collections.abc import Iterator
|
@@ -538,7 +538,8 @@ class MultiPeriodPortfolio(BasePortfolio):
|
|
538
538
|
@portfolios.setter
|
539
539
|
def portfolios(self, value: list[Portfolio] | None = None):
|
540
540
|
"""Set the list of Portfolios and clear the attributes cache linked to the
|
541
|
-
list of portfolios.
|
541
|
+
list of portfolios.
|
542
|
+
"""
|
542
543
|
self._set_portfolios(portfolios=value)
|
543
544
|
self.clear()
|
544
545
|
|
skfolio/portfolio/_portfolio.py
CHANGED
@@ -6,7 +6,7 @@ is the dot product of the assets weights with the assets returns.
|
|
6
6
|
|
7
7
|
# Copyright (c) 2023
|
8
8
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
9
|
-
# License: BSD
|
9
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
10
10
|
|
11
11
|
import numbers
|
12
12
|
from typing import ClassVar
|
@@ -412,6 +412,7 @@ class Portfolio(BasePortfolio):
|
|
412
412
|
}
|
413
413
|
)
|
414
414
|
|
415
|
+
# ruff: noqa: RUF023
|
415
416
|
__slots__ = {
|
416
417
|
# read-only
|
417
418
|
"X",
|
@@ -648,12 +649,12 @@ class Portfolio(BasePortfolio):
|
|
648
649
|
# Custom attribute getter (read-only and cached)
|
649
650
|
@cached_property_slots
|
650
651
|
def nonzero_assets(self) -> np.ndarray:
|
651
|
-
"""Invested asset :math:`abs(weights) > 0.001
|
652
|
+
"""Invested asset :math:`abs(weights) > 0.001%`."""
|
652
653
|
return self.assets[self.nonzero_assets_index]
|
653
654
|
|
654
655
|
@cached_property_slots
|
655
656
|
def nonzero_assets_index(self) -> np.ndarray:
|
656
|
-
"""Indices of invested asset :math:`abs(weights) > 0.001
|
657
|
+
"""Indices of invested asset :math:`abs(weights) > 0.001%`."""
|
657
658
|
return np.flatnonzero(abs(self.weights) > _ZERO_THRESHOLD)
|
658
659
|
|
659
660
|
@property
|
@@ -705,7 +706,7 @@ class Portfolio(BasePortfolio):
|
|
705
706
|
@property
|
706
707
|
def effective_number_assets(self) -> float:
|
707
708
|
r"""Computes the effective number of assets, defined as the inverse of the
|
708
|
-
Herfindahl index
|
709
|
+
Herfindahl index.
|
709
710
|
|
710
711
|
.. math:: N_{eff} = \frac{1}{\Vert w \Vert_{2}^{2}}
|
711
712
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
"""Pre Selection module."""
|
2
|
+
|
1
3
|
from skfolio.pre_selection._drop_correlated import DropCorrelated
|
2
4
|
from skfolio.pre_selection._select_complete import SelectComplete
|
3
5
|
from skfolio.pre_selection._select_k_extremes import SelectKExtremes
|
@@ -6,8 +8,8 @@ from skfolio.pre_selection._select_non_expiring import SelectNonExpiring
|
|
6
8
|
|
7
9
|
__all__ = [
|
8
10
|
"DropCorrelated",
|
11
|
+
"SelectComplete",
|
9
12
|
"SelectKExtremes",
|
10
13
|
"SelectNonDominated",
|
11
|
-
"SelectComplete",
|
12
14
|
"SelectNonExpiring",
|
13
15
|
]
|
@@ -1,8 +1,8 @@
|
|
1
|
-
"""Pre-selection DropCorrelated module"""
|
1
|
+
"""Pre-selection DropCorrelated module."""
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import numpy.typing as npt
|
@@ -74,7 +74,7 @@ class DropCorrelated(skf.SelectorMixin, skb.BaseEstimator):
|
|
74
74
|
self : DropCorrelated
|
75
75
|
Fitted estimator.
|
76
76
|
"""
|
77
|
-
X =
|
77
|
+
X = skv.validate_data(self, X)
|
78
78
|
if not -1 <= self.threshold <= 1:
|
79
79
|
raise ValueError("`threshold` must be between -1 and 1")
|
80
80
|
|
@@ -1,8 +1,8 @@
|
|
1
|
-
"""pre-selection SelectComplete module"""
|
1
|
+
"""pre-selection SelectComplete module."""
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import numpy.typing as npt
|
@@ -49,30 +49,29 @@ class SelectComplete(skf.SelectorMixin, skb.BaseEstimator):
|
|
49
49
|
|
50
50
|
Examples
|
51
51
|
--------
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
[5.]])
|
52
|
+
>>> import numpy as np
|
53
|
+
>>> import pandas as pd
|
54
|
+
>>> from skfolio.pre_selection import SelectComplete
|
55
|
+
>>> X = pd.DataFrame({
|
56
|
+
... 'asset1': [np.nan, np.nan, 2, 3, 4], # Starts late (inception)
|
57
|
+
... 'asset2': [1, 2, 3, 4, 5], # Complete data
|
58
|
+
... 'asset3': [1, 2, 3, np.nan, 5], # Missing values within data
|
59
|
+
... 'asset4': [1, 2, 3, 4, np.nan] # Ends early (expiration)
|
60
|
+
... })
|
61
|
+
>>> selector = SelectComplete()
|
62
|
+
>>> selector.fit_transform(X)
|
63
|
+
array([[ 1., 1.],
|
64
|
+
[ 2., 2.],
|
65
|
+
[ 3., 3.],
|
66
|
+
[ 4., nan],
|
67
|
+
[ 5., 5.]])
|
68
|
+
>>> selector = SelectComplete(drop_assets_with_internal_nan=True)
|
69
|
+
>>> selector.fit_transform(X)
|
70
|
+
array([[1.],
|
71
|
+
[2.],
|
72
|
+
[3.],
|
73
|
+
[4.],
|
74
|
+
[5.]])
|
76
75
|
"""
|
77
76
|
|
78
77
|
to_keep_: np.ndarray
|
@@ -97,7 +96,7 @@ class SelectComplete(skf.SelectorMixin, skb.BaseEstimator):
|
|
97
96
|
Fitted estimator.
|
98
97
|
"""
|
99
98
|
# Validate by allowing NaNs
|
100
|
-
X =
|
99
|
+
X = skv.validate_data(self, X, ensure_all_finite="allow-nan")
|
101
100
|
|
102
101
|
if self.drop_assets_with_internal_nan:
|
103
102
|
# Identify columns with any NaNs
|
@@ -108,9 +107,11 @@ class SelectComplete(skf.SelectorMixin, skb.BaseEstimator):
|
|
108
107
|
|
109
108
|
return self
|
110
109
|
|
111
|
-
def _get_support_mask(self):
|
110
|
+
def _get_support_mask(self) -> np.ndarray:
|
112
111
|
skv.check_is_fitted(self)
|
113
112
|
return self.to_keep_
|
114
113
|
|
115
|
-
def
|
116
|
-
|
114
|
+
def __sklearn_tags__(self):
|
115
|
+
tags = super().__sklearn_tags__()
|
116
|
+
tags.input_tags.allow_nan = True
|
117
|
+
return tags
|
@@ -1,8 +1,8 @@
|
|
1
|
-
"""Pre-selection SelectKExtremes module"""
|
1
|
+
"""Pre-selection SelectKExtremes module."""
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import numpy.typing as npt
|
@@ -76,7 +76,7 @@ class SelectKExtremes(skf.SelectorMixin, skb.BaseEstimator):
|
|
76
76
|
self : SelectKExtremes
|
77
77
|
Fitted estimator.
|
78
78
|
"""
|
79
|
-
X =
|
79
|
+
X = skv.validate_data(self, X)
|
80
80
|
k = int(self.k)
|
81
81
|
if k <= 0:
|
82
82
|
raise ValueError("`k` must be strictly positive")
|
@@ -1,8 +1,8 @@
|
|
1
|
-
"""Pre-selection SelectNonDominated module"""
|
1
|
+
"""Pre-selection SelectNonDominated module."""
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import numpy.typing as npt
|
@@ -95,7 +95,7 @@ class SelectNonDominated(skf.SelectorMixin, skb.BaseEstimator):
|
|
95
95
|
self : SelectNonDominated
|
96
96
|
Fitted estimator.
|
97
97
|
"""
|
98
|
-
X =
|
98
|
+
X = skv.validate_data(self, X)
|
99
99
|
if not -1 <= self.threshold <= 1:
|
100
100
|
raise ValueError("`threshold` must be between -1 and 1")
|
101
101
|
n_assets = X.shape[1]
|
@@ -1,10 +1,10 @@
|
|
1
|
-
"""pre-selection estimators module"""
|
1
|
+
"""pre-selection estimators module."""
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
5
|
# Implementation derived from:
|
6
6
|
# Conway-Yu https://github.com/skfolio/skfolio/discussions/60
|
7
|
-
# License: BSD
|
7
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
8
8
|
|
9
9
|
import datetime as dt
|
10
10
|
|
@@ -114,7 +114,7 @@ class SelectNonExpiring(skf.SelectorMixin, skb.BaseEstimator):
|
|
114
114
|
self : SelectNonExpiring
|
115
115
|
Fitted estimator.
|
116
116
|
"""
|
117
|
-
_ =
|
117
|
+
_ = skv.validate_data(self, X, ensure_all_finite="allow-nan")
|
118
118
|
|
119
119
|
# Validate by allowing NaNs
|
120
120
|
if not hasattr(X, "index") or not isinstance(X.index, pd.DatetimeIndex):
|
@@ -140,9 +140,11 @@ class SelectNonExpiring(skf.SelectorMixin, skb.BaseEstimator):
|
|
140
140
|
|
141
141
|
return self
|
142
142
|
|
143
|
-
def _get_support_mask(self):
|
143
|
+
def _get_support_mask(self) -> np.ndarray:
|
144
144
|
skv.check_is_fitted(self)
|
145
145
|
return self.to_keep_
|
146
146
|
|
147
|
-
def
|
148
|
-
|
147
|
+
def __sklearn_tags__(self):
|
148
|
+
tags = super().__sklearn_tags__()
|
149
|
+
tags.input_tags.allow_nan = True
|
150
|
+
return tags
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
|
7
7
|
from typing import Literal
|
8
8
|
|
@@ -19,7 +19,7 @@ def prices_to_returns(
|
|
19
19
|
drop_inceptions_nan: bool = True,
|
20
20
|
fill_nan: bool = True,
|
21
21
|
) -> pd.DataFrame | tuple[pd.DataFrame, pd.DataFrame]:
|
22
|
-
r"""
|
22
|
+
r"""Transform a DataFrame of prices to linear or logarithmic returns.
|
23
23
|
|
24
24
|
Linear returns (also called simple returns) are defined as:
|
25
25
|
.. math:: \frac{S_{t}}{S_{t-1}} - 1
|
skfolio/prior/__init__.py
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
"""Prior module."""
|
2
|
+
|
1
3
|
from skfolio.prior._base import BasePrior, PriorModel
|
2
4
|
from skfolio.prior._black_litterman import BlackLitterman
|
3
5
|
from skfolio.prior._empirical import EmpiricalPrior
|
@@ -6,13 +8,15 @@ from skfolio.prior._factor_model import (
|
|
6
8
|
FactorModel,
|
7
9
|
LoadingMatrixRegression,
|
8
10
|
)
|
11
|
+
from skfolio.prior._synthetic_data import SyntheticData
|
9
12
|
|
10
13
|
__all__ = [
|
11
|
-
"
|
14
|
+
"BaseLoadingMatrix",
|
12
15
|
"BasePrior",
|
13
|
-
"EmpiricalPrior",
|
14
16
|
"BlackLitterman",
|
17
|
+
"EmpiricalPrior",
|
15
18
|
"FactorModel",
|
16
|
-
"BaseLoadingMatrix",
|
17
19
|
"LoadingMatrixRegression",
|
20
|
+
"PriorModel",
|
21
|
+
"SyntheticData",
|
18
22
|
]
|
skfolio/prior/_base.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
"""Base Prior estimator"""
|
1
|
+
"""Base Prior estimator."""
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
|
7
7
|
from abc import ABC, abstractmethod
|
8
8
|
from dataclasses import dataclass
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
# Implementation derived from:
|
7
7
|
# Riskfolio-Lib, Copyright (c) 2020-2023, Dany Cajas, Licensed under BSD 3 clause.
|
8
8
|
# PyPortfolioOpt, Copyright (c) 2018 Robert Andrew Martin, Licensed under MIT Licence.
|
@@ -10,6 +10,7 @@
|
|
10
10
|
import numpy as np
|
11
11
|
import numpy.typing as npt
|
12
12
|
import sklearn.utils.metadata_routing as skm
|
13
|
+
import sklearn.utils.validation as skv
|
13
14
|
|
14
15
|
from skfolio.moments import EquilibriumMu
|
15
16
|
from skfolio.prior._base import BasePrior, PriorModel
|
@@ -38,7 +39,7 @@ class BlackLitterman(BasePrior):
|
|
38
39
|
about the assets expected returns expressed in the same frequency as the
|
39
40
|
returns `X`.
|
40
41
|
|
41
|
-
|
42
|
+
For example:
|
42
43
|
|
43
44
|
* "SPX = 0.00015" --> the SPX will have a daily expected return of 0.015%
|
44
45
|
* "SX5E - TLT = 0.00039" --> the SX5E will outperform the TLT by a daily expected return of 0.039%
|
@@ -52,7 +53,7 @@ class BlackLitterman(BasePrior):
|
|
52
53
|
(asset name/asset groups) and the input `X` of the `fit` method must be a
|
53
54
|
DataFrame with the assets names in columns.
|
54
55
|
|
55
|
-
|
56
|
+
For example:
|
56
57
|
|
57
58
|
* groups = {"SX5E": ["Equity", "Europe"], "SPX": ["Equity", "US"], "TLT": ["Bond", "US"]}
|
58
59
|
* groups = [["Equity", "Equity", "Bond"], ["Europe", "US", "US"]]
|
@@ -118,6 +119,8 @@ class BlackLitterman(BasePrior):
|
|
118
119
|
views_: np.ndarray
|
119
120
|
picking_matrix_: np.ndarray
|
120
121
|
prior_estimator_: BasePrior
|
122
|
+
n_features_in_: int
|
123
|
+
feature_names_in_: np.ndarray
|
121
124
|
|
122
125
|
def __init__(
|
123
126
|
self,
|
@@ -182,7 +185,7 @@ class BlackLitterman(BasePrior):
|
|
182
185
|
|
183
186
|
# we validate after all models have been fitted to keep features names
|
184
187
|
# information.
|
185
|
-
|
188
|
+
skv.validate_data(self, X)
|
186
189
|
|
187
190
|
n_assets = prior_returns.shape[1]
|
188
191
|
views = np.asarray(self.views)
|
skfolio/prior/_empirical.py
CHANGED
@@ -2,11 +2,12 @@
|
|
2
2
|
|
3
3
|
# Copyright (c) 2023
|
4
4
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
|
-
# License: BSD
|
5
|
+
# SPDX-License-Identifier: BSD-3-Clause
|
6
6
|
|
7
7
|
import numpy as np
|
8
8
|
import numpy.typing as npt
|
9
9
|
import sklearn.utils.metadata_routing as skm
|
10
|
+
import sklearn.utils.validation as skv
|
10
11
|
|
11
12
|
from skfolio.moments import BaseCovariance, BaseMu, EmpiricalCovariance, EmpiricalMu
|
12
13
|
from skfolio.prior._base import BasePrior, PriorModel
|
@@ -74,6 +75,8 @@ class EmpiricalPrior(BasePrior):
|
|
74
75
|
|
75
76
|
mu_estimator_: BaseMu
|
76
77
|
covariance_estimator_: BaseCovariance
|
78
|
+
n_features_in_: int
|
79
|
+
feature_names_in_: np.ndarray
|
77
80
|
|
78
81
|
def __init__(
|
79
82
|
self,
|
@@ -190,7 +193,7 @@ class EmpiricalPrior(BasePrior):
|
|
190
193
|
|
191
194
|
# we validate and convert to numpy after all models have been fitted to keep
|
192
195
|
# features names information.
|
193
|
-
X =
|
196
|
+
X = skv.validate_data(self, X)
|
194
197
|
self.prior_model_ = PriorModel(
|
195
198
|
mu=mu,
|
196
199
|
covariance=covariance,
|