skfolio 0.5.2__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- skfolio/__init__.py +5 -5
- skfolio/cluster/__init__.py +1 -1
- skfolio/cluster/_hierarchical.py +1 -1
- skfolio/datasets/__init__.py +2 -2
- skfolio/distance/__init__.py +3 -3
- skfolio/distance/_distance.py +7 -6
- skfolio/exceptions.py +2 -2
- skfolio/measures/__init__.py +23 -23
- skfolio/model_selection/__init__.py +2 -2
- skfolio/moments/__init__.py +11 -11
- skfolio/moments/covariance/__init__.py +6 -6
- skfolio/moments/covariance/_denoise_covariance.py +2 -1
- skfolio/moments/covariance/_detone_covariance.py +2 -1
- skfolio/moments/covariance/_empirical_covariance.py +2 -1
- skfolio/moments/covariance/_ew_covariance.py +2 -1
- skfolio/moments/covariance/_gerber_covariance.py +2 -1
- skfolio/moments/covariance/_implied_covariance.py +1 -1
- skfolio/moments/expected_returns/__init__.py +2 -2
- skfolio/moments/expected_returns/_empirical_mu.py +2 -1
- skfolio/moments/expected_returns/_equilibrium_mu.py +2 -1
- skfolio/moments/expected_returns/_ew_mu.py +2 -1
- skfolio/moments/expected_returns/_shrunk_mu.py +2 -1
- skfolio/optimization/__init__.py +10 -10
- skfolio/optimization/cluster/__init__.py +1 -1
- skfolio/optimization/cluster/_nco.py +3 -2
- skfolio/optimization/cluster/hierarchical/__init__.py +1 -1
- skfolio/optimization/cluster/hierarchical/_herc.py +2 -1
- skfolio/optimization/cluster/hierarchical/_hrp.py +2 -1
- skfolio/optimization/convex/__init__.py +3 -3
- skfolio/optimization/convex/_base.py +344 -31
- skfolio/optimization/convex/_distributionally_robust.py +4 -1
- skfolio/optimization/convex/_maximum_diversification.py +4 -2
- skfolio/optimization/convex/_mean_risk.py +125 -17
- skfolio/optimization/convex/_risk_budgeting.py +3 -1
- skfolio/optimization/ensemble/_stacking.py +2 -2
- skfolio/optimization/naive/__init__.py +1 -1
- skfolio/optimization/naive/_naive.py +3 -2
- skfolio/portfolio/__init__.py +1 -1
- skfolio/portfolio/_base.py +1 -0
- skfolio/portfolio/_portfolio.py +1 -0
- skfolio/pre_selection/__init__.py +1 -1
- skfolio/pre_selection/_drop_correlated.py +1 -1
- skfolio/pre_selection/_select_complete.py +6 -4
- skfolio/pre_selection/_select_k_extremes.py +1 -1
- skfolio/pre_selection/_select_non_dominated.py +1 -1
- skfolio/pre_selection/_select_non_expiring.py +6 -4
- skfolio/prior/__init__.py +3 -3
- skfolio/prior/_black_litterman.py +2 -1
- skfolio/prior/_empirical.py +2 -1
- skfolio/prior/_factor_model.py +2 -1
- skfolio/typing.py +6 -6
- skfolio/uncertainty_set/__init__.py +5 -5
- skfolio/uncertainty_set/_base.py +3 -2
- skfolio/utils/equations.py +58 -1
- skfolio/utils/stats.py +8 -8
- skfolio/utils/tools.py +10 -10
- {skfolio-0.5.2.dist-info → skfolio-0.7.0.dist-info}/METADATA +32 -29
- skfolio-0.7.0.dist-info/RECORD +95 -0
- {skfolio-0.5.2.dist-info → skfolio-0.7.0.dist-info}/WHEEL +1 -1
- skfolio-0.5.2.dist-info/RECORD +0 -95
- {skfolio-0.5.2.dist-info → skfolio-0.7.0.dist-info}/LICENSE +0 -0
- {skfolio-0.5.2.dist-info → skfolio-0.7.0.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,19 @@
|
|
1
1
|
"""Mean Risk Optimization estimator."""
|
2
2
|
|
3
|
+
import warnings
|
4
|
+
|
3
5
|
# Copyright (c) 2023
|
4
6
|
# Author: Hugo Delatte <delatte.hugo@gmail.com>
|
5
7
|
# License: BSD 3 clause
|
6
8
|
# The optimization features are derived
|
7
9
|
# from Riskfolio-Lib, Copyright (c) 2020-2023, Dany Cajas, Licensed under BSD 3 clause.
|
8
|
-
|
9
10
|
import cvxpy as cp
|
10
11
|
import numpy as np
|
11
12
|
import numpy.typing as npt
|
12
13
|
import pandas as pd
|
13
14
|
import sklearn as sk
|
14
15
|
import sklearn.utils.metadata_routing as skm
|
16
|
+
import sklearn.utils.validation as skv
|
15
17
|
|
16
18
|
import skfolio.typing as skt
|
17
19
|
from skfolio.measures import RiskMeasure
|
@@ -144,6 +146,11 @@ class MeanRisk(ConvexOptimization):
|
|
144
146
|
returns and Cholesky decomposition of the covariance.
|
145
147
|
The default (`None`) is to use :class:`~skfolio.prior.EmpiricalPrior`.
|
146
148
|
|
149
|
+
efficient_frontier_size : int, optional
|
150
|
+
If provided, it represents the number of Pareto-optimal portfolios along the
|
151
|
+
efficient frontier to be computed. This parameter can only be used with
|
152
|
+
`objective_function = ObjectiveFunction.MINIMIZE_RISK`.
|
153
|
+
|
147
154
|
min_weights : float | dict[str, float] | array-like of shape (n_assets, ) | None, default=0.0
|
148
155
|
Minimum assets weights (weights lower bounds).
|
149
156
|
If a float is provided, it is applied to each asset.
|
@@ -213,6 +220,36 @@ class MeanRisk(ConvexOptimization):
|
|
213
220
|
weights.
|
214
221
|
The default (`None`) means no maximum long position.
|
215
222
|
|
223
|
+
cardinality : int, optional
|
224
|
+
Specifies the cardinality constraint to limit the number of invested assets
|
225
|
+
(non-zero weights). This feature requires a mixed-integer solver. For an
|
226
|
+
open-source option, we recommend using SCIP by setting `solver="SCIP"`.
|
227
|
+
To install it, use: `pip install cvxpy[SCIP]`. For commercial solvers,
|
228
|
+
supported options include MOSEK, GUROBI, or CPLEX.
|
229
|
+
|
230
|
+
group_cardinalities : dict[str, int], optional
|
231
|
+
A dictionary specifying cardinality constraints for specific groups of assets.
|
232
|
+
The keys represent group names (strings), and the values specify the maximum
|
233
|
+
number of assets allowed in each group. You must provide the groups using the
|
234
|
+
`groups` parameter. This requires a mixed-integer solver (see `cardinality`
|
235
|
+
for more details).
|
236
|
+
|
237
|
+
threshold_long : float | dict[str, float] | array-like of shape (n_assets, ), optional
|
238
|
+
Specifies the minimum weight threshold for assets in the portfolio to be
|
239
|
+
considered as a long position. Assets with weights below this threshold
|
240
|
+
will not be included as part of the portfolio's long positions. This
|
241
|
+
constraint can help eliminate insignificant allocations.
|
242
|
+
This requires a mixed-integer solver (see `cardinality` for more details).
|
243
|
+
It follows the same format as `min_weights` and `max_weights`.
|
244
|
+
|
245
|
+
threshold_short : float | dict[str, float] | array-like of shape (n_assets, ), optional
|
246
|
+
Specifies the maximum weight threshold for assets in the portfolio to be
|
247
|
+
considered as a short position. Assets with weights above this threshold
|
248
|
+
will not be included as part of the portfolio's short positions. This
|
249
|
+
constraint can help control the magnitude of short positions.
|
250
|
+
This requires a mixed-integer solver (see `cardinality` for more details).
|
251
|
+
It follows the same format as `min_weights` and `max_weights`.
|
252
|
+
|
216
253
|
transaction_costs : float | dict[str, float] | array-like of shape (n_assets, ), default=0.0
|
217
254
|
Transaction costs of the assets. It is used to add linear transaction costs to
|
218
255
|
the optimization problem:
|
@@ -486,9 +523,10 @@ class MeanRisk(ConvexOptimization):
|
|
486
523
|
solver_params : dict, optional
|
487
524
|
Solver parameters. For example, `solver_params=dict(verbose=True)`.
|
488
525
|
The default (`None`) is use `{"tol_gap_abs": 1e-9, "tol_gap_rel": 1e-9}`
|
489
|
-
for
|
526
|
+
for "CLARABEL", `{"numerics/feastol": 1e-8, "limits/gap": 1e-8}` for SCIP
|
527
|
+
and the solver default otherwise.
|
490
528
|
For more details about solver arguments, check the CVXPY documentation:
|
491
|
-
https://www.cvxpy.org/tutorial/
|
529
|
+
https://www.cvxpy.org/tutorial/solvers
|
492
530
|
|
493
531
|
scale_objective : float, optional
|
494
532
|
Scale each objective element by this value.
|
@@ -511,7 +549,7 @@ class MeanRisk(ConvexOptimization):
|
|
511
549
|
portfolio_params : dict, optional
|
512
550
|
Portfolio parameters passed to the portfolio evaluated by the `predict` and
|
513
551
|
`score` methods. If not provided, the `name`, `transaction_costs`,
|
514
|
-
`management_fees`, `previous_weights` and `risk_free_rate` are copied from the
|
552
|
+
`management_fees`, `previous_weights` and `risk_free_rate` are copied from the
|
515
553
|
optimization model and passed to the portfolio.
|
516
554
|
|
517
555
|
Attributes
|
@@ -557,6 +595,10 @@ class MeanRisk(ConvexOptimization):
|
|
557
595
|
max_budget: float | None = None,
|
558
596
|
max_short: float | None = None,
|
559
597
|
max_long: float | None = None,
|
598
|
+
cardinality: int | None = None,
|
599
|
+
group_cardinalities: dict[str, int] | None = None,
|
600
|
+
threshold_long: skt.MultiInput | None = None,
|
601
|
+
threshold_short: skt.MultiInput | None = None,
|
560
602
|
transaction_costs: skt.MultiInput = 0.0,
|
561
603
|
management_fees: skt.MultiInput = 0.0,
|
562
604
|
previous_weights: skt.MultiInput | None = None,
|
@@ -617,6 +659,10 @@ class MeanRisk(ConvexOptimization):
|
|
617
659
|
max_budget=max_budget,
|
618
660
|
max_short=max_short,
|
619
661
|
max_long=max_long,
|
662
|
+
cardinality=cardinality,
|
663
|
+
group_cardinalities=group_cardinalities,
|
664
|
+
threshold_long=threshold_long,
|
665
|
+
threshold_short=threshold_short,
|
620
666
|
transaction_costs=transaction_costs,
|
621
667
|
management_fees=management_fees,
|
622
668
|
previous_weights=previous_weights,
|
@@ -719,7 +765,9 @@ class MeanRisk(ConvexOptimization):
|
|
719
765
|
"""
|
720
766
|
routed_params = skm.process_routing(self, "fit", **fit_params)
|
721
767
|
|
722
|
-
|
768
|
+
# `X` is unchanged and only `feature_names_in_` is performed
|
769
|
+
_ = skv.validate_data(self, X, skip_check_array=True)
|
770
|
+
|
723
771
|
# Validate
|
724
772
|
self._validation()
|
725
773
|
# Used to avoid adding multiple times similar constrains linked to identical
|
@@ -734,13 +782,42 @@ class MeanRisk(ConvexOptimization):
|
|
734
782
|
n_observations, n_assets = prior_model.returns.shape
|
735
783
|
|
736
784
|
# set solvers params
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
785
|
+
match self.solver:
|
786
|
+
case "CLARABEL":
|
787
|
+
self._set_solver_params(
|
788
|
+
default={"tol_gap_abs": 1e-9, "tol_gap_rel": 1e-9}
|
789
|
+
)
|
790
|
+
case "SCIP":
|
791
|
+
self._set_solver_params(
|
792
|
+
default={"numerics/feastol": 1e-8, "limits/gap": 1e-8}
|
793
|
+
)
|
794
|
+
case _:
|
795
|
+
self._set_solver_params(default=None)
|
741
796
|
|
742
|
-
# set scales
|
797
|
+
# set scales and check measure
|
743
798
|
if self.objective_function == ObjectiveFunction.MAXIMIZE_RATIO:
|
799
|
+
if self.overwrite_expected_return is not None:
|
800
|
+
if self.risk_measure == RiskMeasure.VARIANCE:
|
801
|
+
warnings.warn(
|
802
|
+
"When selecting 'MAXIMIZE_RATIO' with 'VARIANCE', the "
|
803
|
+
"optimization will return the maximum Sharpe Ratio portfolio. "
|
804
|
+
"This is because the mean/variance ratio is not a "
|
805
|
+
"1-homogeneous function, unlike the mean/std. To suppress this"
|
806
|
+
"warning, replace 'VARIANCE' by 'STANDARD_DEVIATION'",
|
807
|
+
stacklevel=2,
|
808
|
+
)
|
809
|
+
|
810
|
+
elif self.risk_measure == RiskMeasure.SEMI_VARIANCE:
|
811
|
+
warnings.warn(
|
812
|
+
"When selecting 'MAXIMIZE_RATIO' with 'SEMI_VARIANCE', the "
|
813
|
+
"optimization will return the maximum Sortino Ratio portfolio. "
|
814
|
+
"This is because the mean/semi-variance ratio is not a "
|
815
|
+
"1-homogeneous function, unlike the mean/semi-std ratio. To "
|
816
|
+
"suppress this warning, replace 'SEMI_VARIANCE' by "
|
817
|
+
"'SEMI_DEVIATION'",
|
818
|
+
stacklevel=2,
|
819
|
+
)
|
820
|
+
|
744
821
|
self._set_scale_objective(default=1)
|
745
822
|
self._set_scale_constraints(default=1)
|
746
823
|
else:
|
@@ -819,7 +896,7 @@ class MeanRisk(ConvexOptimization):
|
|
819
896
|
" 1d-array, a single-column DataFrame or a Series"
|
820
897
|
)
|
821
898
|
y = y[y.columns[0]]
|
822
|
-
_, y =
|
899
|
+
_, y = skv.validate_data(self, X, y)
|
823
900
|
tracking_error = self._tracking_error(
|
824
901
|
prior_model=prior_model, w=w, y=y, factor=factor
|
825
902
|
)
|
@@ -959,31 +1036,38 @@ class MeanRisk(ConvexOptimization):
|
|
959
1036
|
+ custom_objective * self._scale_objective
|
960
1037
|
)
|
961
1038
|
case ObjectiveFunction.MAXIMIZE_RATIO:
|
1039
|
+
homogenization_factor = _optimal_homogenization_factor(
|
1040
|
+
mu=prior_model.mu
|
1041
|
+
)
|
1042
|
+
|
962
1043
|
if expected_return.is_affine():
|
963
1044
|
# Charnes-Cooper's variable transformation for Fractional
|
964
|
-
# Programming problem
|
1045
|
+
# Programming problem Max(f1/f2) with f2 linear and with
|
1046
|
+
# 1-homogeneous function (homogeneous technique)
|
965
1047
|
constraints += [
|
966
1048
|
expected_return * self._scale_constraints
|
967
1049
|
- cp.Constant(self.risk_free_rate)
|
968
1050
|
* factor
|
969
1051
|
* self._scale_constraints
|
970
|
-
== cp.Constant(
|
1052
|
+
== cp.Constant(homogenization_factor) * self._scale_constraints
|
971
1053
|
]
|
972
1054
|
else:
|
973
1055
|
# Schaible's generalization of Charnes-Cooper's variable
|
974
1056
|
# transformation for Fractional Programming problem :Max(f1/f2)
|
975
|
-
# with f1 concave instead of linear
|
976
|
-
#
|
977
|
-
#
|
1057
|
+
# with f1 concave instead of linear and with 1-homogeneous function.
|
1058
|
+
# (homogeneous technique)
|
1059
|
+
# Schaible,"Parameter-free Convex Equivalent and Dual Programs of
|
1060
|
+
# Fractional Programming Problems".
|
978
1061
|
# The condition to work is f1 >= 0, so we need to raise an user
|
979
1062
|
# warning when it's not the case.
|
980
1063
|
# TODO: raise user warning when f1<0
|
1064
|
+
|
981
1065
|
constraints += [
|
982
1066
|
expected_return * self._scale_constraints
|
983
1067
|
- cp.Constant(self.risk_free_rate)
|
984
1068
|
* factor
|
985
1069
|
* self._scale_constraints
|
986
|
-
>= cp.Constant(
|
1070
|
+
>= cp.Constant(homogenization_factor) * self._scale_constraints
|
987
1071
|
]
|
988
1072
|
objective = cp.Minimize(
|
989
1073
|
risk * self._scale_objective
|
@@ -1014,3 +1098,27 @@ class MeanRisk(ConvexOptimization):
|
|
1014
1098
|
)
|
1015
1099
|
|
1016
1100
|
return self
|
1101
|
+
|
1102
|
+
|
1103
|
+
def _optimal_homogenization_factor(mu: np.ndarray) -> float:
|
1104
|
+
"""
|
1105
|
+
Compute the optimal homogenization factor for ratio optimization based on expected
|
1106
|
+
returns.
|
1107
|
+
|
1108
|
+
While a default value of 1 is commonly used in textbooks for simplicity,
|
1109
|
+
fine-tuning this factor based on the underlying data can enhance convergence.
|
1110
|
+
Additionally, using a data-driven approach to determine this factor can improve the
|
1111
|
+
robustness of certain constraints, such as the calibration of big M methods.
|
1112
|
+
|
1113
|
+
Parameters
|
1114
|
+
----------
|
1115
|
+
mu : ndarray of shape (n_assets,)
|
1116
|
+
Vector of expected returns.
|
1117
|
+
|
1118
|
+
Returns
|
1119
|
+
-------
|
1120
|
+
value : float
|
1121
|
+
Homogenization factor.
|
1122
|
+
"""
|
1123
|
+
|
1124
|
+
return min(1e3, max(1e-3, np.mean(np.abs(mu))))
|
@@ -10,6 +10,7 @@ import cvxpy as cp
|
|
10
10
|
import numpy as np
|
11
11
|
import numpy.typing as npt
|
12
12
|
import sklearn.utils.metadata_routing as skm
|
13
|
+
import sklearn.utils.validation as skv
|
13
14
|
|
14
15
|
import skfolio.typing as skt
|
15
16
|
from skfolio.measures import RiskMeasure
|
@@ -452,7 +453,8 @@ class RiskBudgeting(ConvexOptimization):
|
|
452
453
|
"""
|
453
454
|
routed_params = skm.process_routing(self, "fit", **fit_params)
|
454
455
|
|
455
|
-
|
456
|
+
# `X` is unchanged and only `feature_names_in_` is performed
|
457
|
+
_ = skv.validate_data(self, X, skip_check_array=True)
|
456
458
|
|
457
459
|
if not isinstance(self.risk_measure, RiskMeasure):
|
458
460
|
raise TypeError("risk_measure must be of type `RiskMeasure`")
|
@@ -330,9 +330,9 @@ class StackingOptimization(BaseOptimization, BaseComposition):
|
|
330
330
|
# We validate and convert to numpy array only after base-estimator fitting
|
331
331
|
# to keep the assets names in case they are used in the estimator.
|
332
332
|
if y is not None:
|
333
|
-
_, y =
|
333
|
+
_, y = skv.validate_data(self, X, y, multi_output=True)
|
334
334
|
else:
|
335
|
-
_ =
|
335
|
+
_ = skv.validate_data(self, X)
|
336
336
|
|
337
337
|
if isinstance(self.cv, BaseCombinatorialCV):
|
338
338
|
X_pred = np.array(
|
@@ -6,6 +6,7 @@
|
|
6
6
|
import numpy as np
|
7
7
|
import numpy.typing as npt
|
8
8
|
import sklearn.utils.metadata_routing as skm
|
9
|
+
import sklearn.utils.validation as skv
|
9
10
|
|
10
11
|
from skfolio.optimization._base import BaseOptimization
|
11
12
|
from skfolio.prior import BasePrior, EmpiricalPrior
|
@@ -141,7 +142,7 @@ class EqualWeighted(BaseOptimization):
|
|
141
142
|
self : EqualWeighted
|
142
143
|
Fitted estimator.
|
143
144
|
"""
|
144
|
-
X =
|
145
|
+
X = skv.validate_data(self, X)
|
145
146
|
n_assets = X.shape[1]
|
146
147
|
self.weights_ = np.ones(n_assets) / n_assets
|
147
148
|
return self
|
@@ -185,7 +186,7 @@ class Random(BaseOptimization):
|
|
185
186
|
self : EqualWeighted
|
186
187
|
Fitted estimator.
|
187
188
|
"""
|
188
|
-
X =
|
189
|
+
X = skv.validate_data(self, X)
|
189
190
|
n_assets = X.shape[1]
|
190
191
|
self.weights_ = rand_weights_dirichlet(n=n_assets)
|
191
192
|
return self
|
skfolio/portfolio/__init__.py
CHANGED
@@ -10,4 +10,4 @@ from skfolio.portfolio._base import BasePortfolio
|
|
10
10
|
from skfolio.portfolio._multi_period_portfolio import MultiPeriodPortfolio
|
11
11
|
from skfolio.portfolio._portfolio import Portfolio
|
12
12
|
|
13
|
-
__all__ = ["BasePortfolio", "
|
13
|
+
__all__ = ["BasePortfolio", "MultiPeriodPortfolio", "Portfolio"]
|
skfolio/portfolio/_base.py
CHANGED
skfolio/portfolio/_portfolio.py
CHANGED
@@ -74,7 +74,7 @@ class DropCorrelated(skf.SelectorMixin, skb.BaseEstimator):
|
|
74
74
|
self : DropCorrelated
|
75
75
|
Fitted estimator.
|
76
76
|
"""
|
77
|
-
X =
|
77
|
+
X = skv.validate_data(self, X)
|
78
78
|
if not -1 <= self.threshold <= 1:
|
79
79
|
raise ValueError("`threshold` must be between -1 and 1")
|
80
80
|
|
@@ -97,7 +97,7 @@ class SelectComplete(skf.SelectorMixin, skb.BaseEstimator):
|
|
97
97
|
Fitted estimator.
|
98
98
|
"""
|
99
99
|
# Validate by allowing NaNs
|
100
|
-
X =
|
100
|
+
X = skv.validate_data(self, X, ensure_all_finite="allow-nan")
|
101
101
|
|
102
102
|
if self.drop_assets_with_internal_nan:
|
103
103
|
# Identify columns with any NaNs
|
@@ -108,9 +108,11 @@ class SelectComplete(skf.SelectorMixin, skb.BaseEstimator):
|
|
108
108
|
|
109
109
|
return self
|
110
110
|
|
111
|
-
def _get_support_mask(self):
|
111
|
+
def _get_support_mask(self) -> np.ndarray:
|
112
112
|
skv.check_is_fitted(self)
|
113
113
|
return self.to_keep_
|
114
114
|
|
115
|
-
def
|
116
|
-
|
115
|
+
def __sklearn_tags__(self):
|
116
|
+
tags = super().__sklearn_tags__()
|
117
|
+
tags.input_tags.allow_nan = True
|
118
|
+
return tags
|
@@ -76,7 +76,7 @@ class SelectKExtremes(skf.SelectorMixin, skb.BaseEstimator):
|
|
76
76
|
self : SelectKExtremes
|
77
77
|
Fitted estimator.
|
78
78
|
"""
|
79
|
-
X =
|
79
|
+
X = skv.validate_data(self, X)
|
80
80
|
k = int(self.k)
|
81
81
|
if k <= 0:
|
82
82
|
raise ValueError("`k` must be strictly positive")
|
@@ -95,7 +95,7 @@ class SelectNonDominated(skf.SelectorMixin, skb.BaseEstimator):
|
|
95
95
|
self : SelectNonDominated
|
96
96
|
Fitted estimator.
|
97
97
|
"""
|
98
|
-
X =
|
98
|
+
X = skv.validate_data(self, X)
|
99
99
|
if not -1 <= self.threshold <= 1:
|
100
100
|
raise ValueError("`threshold` must be between -1 and 1")
|
101
101
|
n_assets = X.shape[1]
|
@@ -114,7 +114,7 @@ class SelectNonExpiring(skf.SelectorMixin, skb.BaseEstimator):
|
|
114
114
|
self : SelectNonExpiring
|
115
115
|
Fitted estimator.
|
116
116
|
"""
|
117
|
-
_ =
|
117
|
+
_ = skv.validate_data(self, X, ensure_all_finite="allow-nan")
|
118
118
|
|
119
119
|
# Validate by allowing NaNs
|
120
120
|
if not hasattr(X, "index") or not isinstance(X.index, pd.DatetimeIndex):
|
@@ -140,9 +140,11 @@ class SelectNonExpiring(skf.SelectorMixin, skb.BaseEstimator):
|
|
140
140
|
|
141
141
|
return self
|
142
142
|
|
143
|
-
def _get_support_mask(self):
|
143
|
+
def _get_support_mask(self) -> np.ndarray:
|
144
144
|
skv.check_is_fitted(self)
|
145
145
|
return self.to_keep_
|
146
146
|
|
147
|
-
def
|
148
|
-
|
147
|
+
def __sklearn_tags__(self):
|
148
|
+
tags = super().__sklearn_tags__()
|
149
|
+
tags.input_tags.allow_nan = True
|
150
|
+
return tags
|
skfolio/prior/__init__.py
CHANGED
@@ -8,11 +8,11 @@ from skfolio.prior._factor_model import (
|
|
8
8
|
)
|
9
9
|
|
10
10
|
__all__ = [
|
11
|
-
"
|
11
|
+
"BaseLoadingMatrix",
|
12
12
|
"BasePrior",
|
13
|
-
"EmpiricalPrior",
|
14
13
|
"BlackLitterman",
|
14
|
+
"EmpiricalPrior",
|
15
15
|
"FactorModel",
|
16
|
-
"BaseLoadingMatrix",
|
17
16
|
"LoadingMatrixRegression",
|
17
|
+
"PriorModel",
|
18
18
|
]
|
@@ -10,6 +10,7 @@
|
|
10
10
|
import numpy as np
|
11
11
|
import numpy.typing as npt
|
12
12
|
import sklearn.utils.metadata_routing as skm
|
13
|
+
import sklearn.utils.validation as skv
|
13
14
|
|
14
15
|
from skfolio.moments import EquilibriumMu
|
15
16
|
from skfolio.prior._base import BasePrior, PriorModel
|
@@ -182,7 +183,7 @@ class BlackLitterman(BasePrior):
|
|
182
183
|
|
183
184
|
# we validate after all models have been fitted to keep features names
|
184
185
|
# information.
|
185
|
-
|
186
|
+
skv.validate_data(self, X)
|
186
187
|
|
187
188
|
n_assets = prior_returns.shape[1]
|
188
189
|
views = np.asarray(self.views)
|
skfolio/prior/_empirical.py
CHANGED
@@ -7,6 +7,7 @@
|
|
7
7
|
import numpy as np
|
8
8
|
import numpy.typing as npt
|
9
9
|
import sklearn.utils.metadata_routing as skm
|
10
|
+
import sklearn.utils.validation as skv
|
10
11
|
|
11
12
|
from skfolio.moments import BaseCovariance, BaseMu, EmpiricalCovariance, EmpiricalMu
|
12
13
|
from skfolio.prior._base import BasePrior, PriorModel
|
@@ -190,7 +191,7 @@ class EmpiricalPrior(BasePrior):
|
|
190
191
|
|
191
192
|
# we validate and convert to numpy after all models have been fitted to keep
|
192
193
|
# features names information.
|
193
|
-
X =
|
194
|
+
X = skv.validate_data(self, X)
|
194
195
|
self.prior_model_ = PriorModel(
|
195
196
|
mu=mu,
|
196
197
|
covariance=covariance,
|
skfolio/prior/_factor_model.py
CHANGED
@@ -17,6 +17,7 @@ import sklearn.base as skb
|
|
17
17
|
import sklearn.linear_model as skl
|
18
18
|
import sklearn.multioutput as skmo
|
19
19
|
import sklearn.utils.metadata_routing as skm
|
20
|
+
import sklearn.utils.validation as skv
|
20
21
|
|
21
22
|
from skfolio.prior._base import BasePrior, PriorModel
|
22
23
|
from skfolio.prior._empirical import EmpiricalPrior
|
@@ -273,7 +274,7 @@ class FactorModel(BasePrior):
|
|
273
274
|
|
274
275
|
# we validate and convert to numpy after all models have been fitted to keep
|
275
276
|
# features names information.
|
276
|
-
X, y =
|
277
|
+
X, y = skv.validate_data(self, X, y, multi_output=True)
|
277
278
|
n_assets = X.shape[1]
|
278
279
|
n_factors = y.shape[1]
|
279
280
|
|
skfolio/typing.py
CHANGED
@@ -14,20 +14,20 @@ import plotly.graph_objects as go
|
|
14
14
|
from skfolio.measures import ExtraRiskMeasure, PerfMeasure, RatioMeasure, RiskMeasure
|
15
15
|
|
16
16
|
__all__ = [
|
17
|
+
"CvxMeasure",
|
18
|
+
"ExpressionFunction",
|
19
|
+
"Factor",
|
17
20
|
"Groups",
|
18
21
|
"Inequality",
|
19
22
|
"LinearConstraints",
|
23
|
+
"Measure",
|
20
24
|
"MultiInput",
|
21
|
-
"
|
25
|
+
"Names",
|
22
26
|
"ParametersValues",
|
23
|
-
"Factor",
|
24
27
|
"Result",
|
25
28
|
"RiskResult",
|
26
|
-
"ExpressionFunction",
|
27
|
-
"Measure",
|
28
|
-
"CvxMeasure",
|
29
|
-
"Names",
|
30
29
|
"Tags",
|
30
|
+
"Target",
|
31
31
|
]
|
32
32
|
|
33
33
|
Measure = PerfMeasure | RiskMeasure | ExtraRiskMeasure | RatioMeasure
|
@@ -13,11 +13,11 @@ from skfolio.uncertainty_set._empirical import (
|
|
13
13
|
)
|
14
14
|
|
15
15
|
__all__ = [
|
16
|
-
"UncertaintySet",
|
17
|
-
"BaseMuUncertaintySet",
|
18
16
|
"BaseCovarianceUncertaintySet",
|
19
|
-
"
|
20
|
-
"EmpiricalCovarianceUncertaintySet",
|
21
|
-
"BootstrapMuUncertaintySet",
|
17
|
+
"BaseMuUncertaintySet",
|
22
18
|
"BootstrapCovarianceUncertaintySet",
|
19
|
+
"BootstrapMuUncertaintySet",
|
20
|
+
"EmpiricalCovarianceUncertaintySet",
|
21
|
+
"EmpiricalMuUncertaintySet",
|
22
|
+
"UncertaintySet",
|
23
23
|
]
|
skfolio/uncertainty_set/_base.py
CHANGED
@@ -11,6 +11,7 @@ import numpy as np
|
|
11
11
|
import numpy.typing as npt
|
12
12
|
import sklearn.base as skb
|
13
13
|
import sklearn.utils.metadata_routing as skm
|
14
|
+
import sklearn.utils.validation as skv
|
14
15
|
|
15
16
|
from skfolio.prior import BasePrior
|
16
17
|
|
@@ -113,9 +114,9 @@ class BaseCovarianceUncertaintySet(skb.BaseEstimator, ABC):
|
|
113
114
|
Validated price returns of factors or a target benchmark if provided.
|
114
115
|
"""
|
115
116
|
if y is None:
|
116
|
-
X =
|
117
|
+
X = skv.validate_data(self, X)
|
117
118
|
else:
|
118
|
-
X, y =
|
119
|
+
X, y = skv.validate_data(self, X, y, multi_output=True)
|
119
120
|
return X, y
|
120
121
|
|
121
122
|
def get_metadata_routing(self):
|
skfolio/utils/equations.py
CHANGED
@@ -16,7 +16,7 @@ from skfolio.exceptions import (
|
|
16
16
|
GroupNotFoundError,
|
17
17
|
)
|
18
18
|
|
19
|
-
__all__ = ["equations_to_matrix"]
|
19
|
+
__all__ = ["equations_to_matrix", "group_cardinalities_to_matrix"]
|
20
20
|
|
21
21
|
_EQUALITY_OPERATORS = {"==", "="}
|
22
22
|
_INEQUALITY_OPERATORS = {">=", "<="}
|
@@ -132,6 +132,63 @@ def equations_to_matrix(
|
|
132
132
|
)
|
133
133
|
|
134
134
|
|
135
|
+
def group_cardinalities_to_matrix(
|
136
|
+
groups: npt.ArrayLike,
|
137
|
+
group_cardinalities: dict[str, int],
|
138
|
+
raise_if_group_missing: bool = False,
|
139
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
140
|
+
"""Convert a list of linear equations into the left and right matrices of the
|
141
|
+
inequality A <= B and equality A == B.
|
142
|
+
|
143
|
+
Parameters
|
144
|
+
----------
|
145
|
+
groups : array-like of shape (n_groups, n_assets)
|
146
|
+
2D array of assets groups.
|
147
|
+
|
148
|
+
Examples:
|
149
|
+
groups = np.array(
|
150
|
+
[
|
151
|
+
["Equity", "Equity", "Equity", "Bond"],
|
152
|
+
["US", "Europe", "Japan", "US"],
|
153
|
+
]
|
154
|
+
)
|
155
|
+
|
156
|
+
group_cardinalities : dict[str, int]
|
157
|
+
Dictionary of cardinality constraint per group.
|
158
|
+
Examples: {"Equity": 1, "US": 3}
|
159
|
+
|
160
|
+
raise_if_group_missing : bool, default=False
|
161
|
+
If this is set to True, an error is raised when a group is not found in the
|
162
|
+
groups, otherwise only a warning is shown.
|
163
|
+
The default is False.
|
164
|
+
|
165
|
+
Returns
|
166
|
+
-------
|
167
|
+
left_inequality: ndarray of shape (n_constraints, n_assets)
|
168
|
+
right_inequality: ndarray of shape (n_constraints,)
|
169
|
+
The left and right matrices of the cardinality inequality.
|
170
|
+
"""
|
171
|
+
groups = _validate_groups(groups, name="group")
|
172
|
+
|
173
|
+
a_inequality = []
|
174
|
+
b_inequality = []
|
175
|
+
|
176
|
+
for group, card in group_cardinalities.items():
|
177
|
+
try:
|
178
|
+
arr = _matching_array(values=groups, key=group, sum_to_one=False)
|
179
|
+
a_inequality.append(arr)
|
180
|
+
b_inequality.append(card)
|
181
|
+
|
182
|
+
except GroupNotFoundError as e:
|
183
|
+
if raise_if_group_missing:
|
184
|
+
raise
|
185
|
+
warnings.warn(str(e), stacklevel=2)
|
186
|
+
return (
|
187
|
+
np.array(a_inequality),
|
188
|
+
np.array(b_inequality),
|
189
|
+
)
|
190
|
+
|
191
|
+
|
135
192
|
def _validate_groups(groups: npt.ArrayLike, name: str = "groups") -> np.ndarray:
|
136
193
|
"""Validate groups by checking its dim and if group names don't appear in multiple
|
137
194
|
levels and convert to numpy array.
|