skfolio 0.5.2__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. skfolio/__init__.py +5 -5
  2. skfolio/cluster/__init__.py +1 -1
  3. skfolio/cluster/_hierarchical.py +1 -1
  4. skfolio/datasets/__init__.py +2 -2
  5. skfolio/distance/__init__.py +3 -3
  6. skfolio/distance/_distance.py +7 -6
  7. skfolio/exceptions.py +2 -2
  8. skfolio/measures/__init__.py +23 -23
  9. skfolio/model_selection/__init__.py +2 -2
  10. skfolio/moments/__init__.py +11 -11
  11. skfolio/moments/covariance/__init__.py +6 -6
  12. skfolio/moments/covariance/_denoise_covariance.py +2 -1
  13. skfolio/moments/covariance/_detone_covariance.py +2 -1
  14. skfolio/moments/covariance/_empirical_covariance.py +2 -1
  15. skfolio/moments/covariance/_ew_covariance.py +2 -1
  16. skfolio/moments/covariance/_gerber_covariance.py +2 -1
  17. skfolio/moments/covariance/_implied_covariance.py +1 -1
  18. skfolio/moments/expected_returns/__init__.py +2 -2
  19. skfolio/moments/expected_returns/_empirical_mu.py +2 -1
  20. skfolio/moments/expected_returns/_equilibrium_mu.py +2 -1
  21. skfolio/moments/expected_returns/_ew_mu.py +2 -1
  22. skfolio/moments/expected_returns/_shrunk_mu.py +2 -1
  23. skfolio/optimization/__init__.py +10 -10
  24. skfolio/optimization/cluster/__init__.py +1 -1
  25. skfolio/optimization/cluster/_nco.py +3 -2
  26. skfolio/optimization/cluster/hierarchical/__init__.py +1 -1
  27. skfolio/optimization/cluster/hierarchical/_herc.py +2 -1
  28. skfolio/optimization/cluster/hierarchical/_hrp.py +2 -1
  29. skfolio/optimization/convex/__init__.py +3 -3
  30. skfolio/optimization/convex/_base.py +344 -31
  31. skfolio/optimization/convex/_distributionally_robust.py +4 -1
  32. skfolio/optimization/convex/_maximum_diversification.py +4 -2
  33. skfolio/optimization/convex/_mean_risk.py +125 -17
  34. skfolio/optimization/convex/_risk_budgeting.py +3 -1
  35. skfolio/optimization/ensemble/_stacking.py +2 -2
  36. skfolio/optimization/naive/__init__.py +1 -1
  37. skfolio/optimization/naive/_naive.py +3 -2
  38. skfolio/portfolio/__init__.py +1 -1
  39. skfolio/portfolio/_base.py +1 -0
  40. skfolio/portfolio/_portfolio.py +1 -0
  41. skfolio/pre_selection/__init__.py +1 -1
  42. skfolio/pre_selection/_drop_correlated.py +1 -1
  43. skfolio/pre_selection/_select_complete.py +6 -4
  44. skfolio/pre_selection/_select_k_extremes.py +1 -1
  45. skfolio/pre_selection/_select_non_dominated.py +1 -1
  46. skfolio/pre_selection/_select_non_expiring.py +6 -4
  47. skfolio/prior/__init__.py +3 -3
  48. skfolio/prior/_black_litterman.py +2 -1
  49. skfolio/prior/_empirical.py +2 -1
  50. skfolio/prior/_factor_model.py +2 -1
  51. skfolio/typing.py +6 -6
  52. skfolio/uncertainty_set/__init__.py +5 -5
  53. skfolio/uncertainty_set/_base.py +3 -2
  54. skfolio/utils/equations.py +58 -1
  55. skfolio/utils/stats.py +8 -8
  56. skfolio/utils/tools.py +10 -10
  57. {skfolio-0.5.2.dist-info → skfolio-0.7.0.dist-info}/METADATA +32 -29
  58. skfolio-0.7.0.dist-info/RECORD +95 -0
  59. {skfolio-0.5.2.dist-info → skfolio-0.7.0.dist-info}/WHEEL +1 -1
  60. skfolio-0.5.2.dist-info/RECORD +0 -95
  61. {skfolio-0.5.2.dist-info → skfolio-0.7.0.dist-info}/LICENSE +0 -0
  62. {skfolio-0.5.2.dist-info → skfolio-0.7.0.dist-info}/top_level.txt +0 -0
@@ -1,17 +1,19 @@
1
1
  """Mean Risk Optimization estimator."""
2
2
 
3
+ import warnings
4
+
3
5
  # Copyright (c) 2023
4
6
  # Author: Hugo Delatte <delatte.hugo@gmail.com>
5
7
  # License: BSD 3 clause
6
8
  # The optimization features are derived
7
9
  # from Riskfolio-Lib, Copyright (c) 2020-2023, Dany Cajas, Licensed under BSD 3 clause.
8
-
9
10
  import cvxpy as cp
10
11
  import numpy as np
11
12
  import numpy.typing as npt
12
13
  import pandas as pd
13
14
  import sklearn as sk
14
15
  import sklearn.utils.metadata_routing as skm
16
+ import sklearn.utils.validation as skv
15
17
 
16
18
  import skfolio.typing as skt
17
19
  from skfolio.measures import RiskMeasure
@@ -144,6 +146,11 @@ class MeanRisk(ConvexOptimization):
144
146
  returns and Cholesky decomposition of the covariance.
145
147
  The default (`None`) is to use :class:`~skfolio.prior.EmpiricalPrior`.
146
148
 
149
+ efficient_frontier_size : int, optional
150
+ If provided, it represents the number of Pareto-optimal portfolios along the
151
+ efficient frontier to be computed. This parameter can only be used with
152
+ `objective_function = ObjectiveFunction.MINIMIZE_RISK`.
153
+
147
154
  min_weights : float | dict[str, float] | array-like of shape (n_assets, ) | None, default=0.0
148
155
  Minimum assets weights (weights lower bounds).
149
156
  If a float is provided, it is applied to each asset.
@@ -213,6 +220,36 @@ class MeanRisk(ConvexOptimization):
213
220
  weights.
214
221
  The default (`None`) means no maximum long position.
215
222
 
223
+ cardinality : int, optional
224
+ Specifies the cardinality constraint to limit the number of invested assets
225
+ (non-zero weights). This feature requires a mixed-integer solver. For an
226
+ open-source option, we recommend using SCIP by setting `solver="SCIP"`.
227
+ To install it, use: `pip install cvxpy[SCIP]`. For commercial solvers,
228
+ supported options include MOSEK, GUROBI, or CPLEX.
229
+
230
+ group_cardinalities : dict[str, int], optional
231
+ A dictionary specifying cardinality constraints for specific groups of assets.
232
+ The keys represent group names (strings), and the values specify the maximum
233
+ number of assets allowed in each group. You must provide the groups using the
234
+ `groups` parameter. This requires a mixed-integer solver (see `cardinality`
235
+ for more details).
236
+
237
+ threshold_long : float | dict[str, float] | array-like of shape (n_assets, ), optional
238
+ Specifies the minimum weight threshold for assets in the portfolio to be
239
+ considered as a long position. Assets with weights below this threshold
240
+ will not be included as part of the portfolio's long positions. This
241
+ constraint can help eliminate insignificant allocations.
242
+ This requires a mixed-integer solver (see `cardinality` for more details).
243
+ It follows the same format as `min_weights` and `max_weights`.
244
+
245
+ threshold_short : float | dict[str, float] | array-like of shape (n_assets, ), optional
246
+ Specifies the maximum weight threshold for assets in the portfolio to be
247
+ considered as a short position. Assets with weights above this threshold
248
+ will not be included as part of the portfolio's short positions. This
249
+ constraint can help control the magnitude of short positions.
250
+ This requires a mixed-integer solver (see `cardinality` for more details).
251
+ It follows the same format as `min_weights` and `max_weights`.
252
+
216
253
  transaction_costs : float | dict[str, float] | array-like of shape (n_assets, ), default=0.0
217
254
  Transaction costs of the assets. It is used to add linear transaction costs to
218
255
  the optimization problem:
@@ -486,9 +523,10 @@ class MeanRisk(ConvexOptimization):
486
523
  solver_params : dict, optional
487
524
  Solver parameters. For example, `solver_params=dict(verbose=True)`.
488
525
  The default (`None`) is use `{"tol_gap_abs": 1e-9, "tol_gap_rel": 1e-9}`
489
- for the solver "CLARABEL" and the CVXPY default otherwise.
526
+ for "CLARABEL", `{"numerics/feastol": 1e-8, "limits/gap": 1e-8}` for SCIP
527
+ and the solver default otherwise.
490
528
  For more details about solver arguments, check the CVXPY documentation:
491
- https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options
529
+ https://www.cvxpy.org/tutorial/solvers
492
530
 
493
531
  scale_objective : float, optional
494
532
  Scale each objective element by this value.
@@ -511,7 +549,7 @@ class MeanRisk(ConvexOptimization):
511
549
  portfolio_params : dict, optional
512
550
  Portfolio parameters passed to the portfolio evaluated by the `predict` and
513
551
  `score` methods. If not provided, the `name`, `transaction_costs`,
514
- `management_fees`, `previous_weights` and `risk_free_rate` are copied from the
552
+ `management_fees`, `previous_weights` and `risk_free_rate` are copied from the
515
553
  optimization model and passed to the portfolio.
516
554
 
517
555
  Attributes
@@ -557,6 +595,10 @@ class MeanRisk(ConvexOptimization):
557
595
  max_budget: float | None = None,
558
596
  max_short: float | None = None,
559
597
  max_long: float | None = None,
598
+ cardinality: int | None = None,
599
+ group_cardinalities: dict[str, int] | None = None,
600
+ threshold_long: skt.MultiInput | None = None,
601
+ threshold_short: skt.MultiInput | None = None,
560
602
  transaction_costs: skt.MultiInput = 0.0,
561
603
  management_fees: skt.MultiInput = 0.0,
562
604
  previous_weights: skt.MultiInput | None = None,
@@ -617,6 +659,10 @@ class MeanRisk(ConvexOptimization):
617
659
  max_budget=max_budget,
618
660
  max_short=max_short,
619
661
  max_long=max_long,
662
+ cardinality=cardinality,
663
+ group_cardinalities=group_cardinalities,
664
+ threshold_long=threshold_long,
665
+ threshold_short=threshold_short,
620
666
  transaction_costs=transaction_costs,
621
667
  management_fees=management_fees,
622
668
  previous_weights=previous_weights,
@@ -719,7 +765,9 @@ class MeanRisk(ConvexOptimization):
719
765
  """
720
766
  routed_params = skm.process_routing(self, "fit", **fit_params)
721
767
 
722
- self._check_feature_names(X, reset=True)
768
+ # `X` is unchanged and only `feature_names_in_` is performed
769
+ _ = skv.validate_data(self, X, skip_check_array=True)
770
+
723
771
  # Validate
724
772
  self._validation()
725
773
  # Used to avoid adding multiple times similar constrains linked to identical
@@ -734,13 +782,42 @@ class MeanRisk(ConvexOptimization):
734
782
  n_observations, n_assets = prior_model.returns.shape
735
783
 
736
784
  # set solvers params
737
- if self.solver == "CLARABEL":
738
- self._set_solver_params(default={"tol_gap_abs": 1e-9, "tol_gap_rel": 1e-9})
739
- else:
740
- self._set_solver_params(default=None)
785
+ match self.solver:
786
+ case "CLARABEL":
787
+ self._set_solver_params(
788
+ default={"tol_gap_abs": 1e-9, "tol_gap_rel": 1e-9}
789
+ )
790
+ case "SCIP":
791
+ self._set_solver_params(
792
+ default={"numerics/feastol": 1e-8, "limits/gap": 1e-8}
793
+ )
794
+ case _:
795
+ self._set_solver_params(default=None)
741
796
 
742
- # set scales
797
+ # set scales and check measure
743
798
  if self.objective_function == ObjectiveFunction.MAXIMIZE_RATIO:
799
+ if self.overwrite_expected_return is not None:
800
+ if self.risk_measure == RiskMeasure.VARIANCE:
801
+ warnings.warn(
802
+ "When selecting 'MAXIMIZE_RATIO' with 'VARIANCE', the "
803
+ "optimization will return the maximum Sharpe Ratio portfolio. "
804
+ "This is because the mean/variance ratio is not a "
805
+ "1-homogeneous function, unlike the mean/std. To suppress this"
806
+ "warning, replace 'VARIANCE' by 'STANDARD_DEVIATION'",
807
+ stacklevel=2,
808
+ )
809
+
810
+ elif self.risk_measure == RiskMeasure.SEMI_VARIANCE:
811
+ warnings.warn(
812
+ "When selecting 'MAXIMIZE_RATIO' with 'SEMI_VARIANCE', the "
813
+ "optimization will return the maximum Sortino Ratio portfolio. "
814
+ "This is because the mean/semi-variance ratio is not a "
815
+ "1-homogeneous function, unlike the mean/semi-std ratio. To "
816
+ "suppress this warning, replace 'SEMI_VARIANCE' by "
817
+ "'SEMI_DEVIATION'",
818
+ stacklevel=2,
819
+ )
820
+
744
821
  self._set_scale_objective(default=1)
745
822
  self._set_scale_constraints(default=1)
746
823
  else:
@@ -819,7 +896,7 @@ class MeanRisk(ConvexOptimization):
819
896
  " 1d-array, a single-column DataFrame or a Series"
820
897
  )
821
898
  y = y[y.columns[0]]
822
- _, y = self._validate_data(X, y)
899
+ _, y = skv.validate_data(self, X, y)
823
900
  tracking_error = self._tracking_error(
824
901
  prior_model=prior_model, w=w, y=y, factor=factor
825
902
  )
@@ -959,31 +1036,38 @@ class MeanRisk(ConvexOptimization):
959
1036
  + custom_objective * self._scale_objective
960
1037
  )
961
1038
  case ObjectiveFunction.MAXIMIZE_RATIO:
1039
+ homogenization_factor = _optimal_homogenization_factor(
1040
+ mu=prior_model.mu
1041
+ )
1042
+
962
1043
  if expected_return.is_affine():
963
1044
  # Charnes-Cooper's variable transformation for Fractional
964
- # Programming problem :Max(f1/f2) with f2 linear
1045
+ # Programming problem Max(f1/f2) with f2 linear and with
1046
+ # 1-homogeneous function (homogeneous technique)
965
1047
  constraints += [
966
1048
  expected_return * self._scale_constraints
967
1049
  - cp.Constant(self.risk_free_rate)
968
1050
  * factor
969
1051
  * self._scale_constraints
970
- == cp.Constant(1) * self._scale_constraints
1052
+ == cp.Constant(homogenization_factor) * self._scale_constraints
971
1053
  ]
972
1054
  else:
973
1055
  # Schaible's generalization of Charnes-Cooper's variable
974
1056
  # transformation for Fractional Programming problem :Max(f1/f2)
975
- # with f1 concave instead of linear: Schaible,"Parameter-free
976
- # Convex Equivalent and Dual Programs of Fractional Programming
977
- # Problems".
1057
+ # with f1 concave instead of linear and with 1-homogeneous function.
1058
+ # (homogeneous technique)
1059
+ # Schaible,"Parameter-free Convex Equivalent and Dual Programs of
1060
+ # Fractional Programming Problems".
978
1061
  # The condition to work is f1 >= 0, so we need to raise an user
979
1062
  # warning when it's not the case.
980
1063
  # TODO: raise user warning when f1<0
1064
+
981
1065
  constraints += [
982
1066
  expected_return * self._scale_constraints
983
1067
  - cp.Constant(self.risk_free_rate)
984
1068
  * factor
985
1069
  * self._scale_constraints
986
- >= cp.Constant(1) * self._scale_constraints
1070
+ >= cp.Constant(homogenization_factor) * self._scale_constraints
987
1071
  ]
988
1072
  objective = cp.Minimize(
989
1073
  risk * self._scale_objective
@@ -1014,3 +1098,27 @@ class MeanRisk(ConvexOptimization):
1014
1098
  )
1015
1099
 
1016
1100
  return self
1101
+
1102
+
1103
+ def _optimal_homogenization_factor(mu: np.ndarray) -> float:
1104
+ """
1105
+ Compute the optimal homogenization factor for ratio optimization based on expected
1106
+ returns.
1107
+
1108
+ While a default value of 1 is commonly used in textbooks for simplicity,
1109
+ fine-tuning this factor based on the underlying data can enhance convergence.
1110
+ Additionally, using a data-driven approach to determine this factor can improve the
1111
+ robustness of certain constraints, such as the calibration of big M methods.
1112
+
1113
+ Parameters
1114
+ ----------
1115
+ mu : ndarray of shape (n_assets,)
1116
+ Vector of expected returns.
1117
+
1118
+ Returns
1119
+ -------
1120
+ value : float
1121
+ Homogenization factor.
1122
+ """
1123
+
1124
+ return min(1e3, max(1e-3, np.mean(np.abs(mu))))
@@ -10,6 +10,7 @@ import cvxpy as cp
10
10
  import numpy as np
11
11
  import numpy.typing as npt
12
12
  import sklearn.utils.metadata_routing as skm
13
+ import sklearn.utils.validation as skv
13
14
 
14
15
  import skfolio.typing as skt
15
16
  from skfolio.measures import RiskMeasure
@@ -452,7 +453,8 @@ class RiskBudgeting(ConvexOptimization):
452
453
  """
453
454
  routed_params = skm.process_routing(self, "fit", **fit_params)
454
455
 
455
- self._check_feature_names(X, reset=True)
456
+ # `X` is unchanged and only `feature_names_in_` is performed
457
+ _ = skv.validate_data(self, X, skip_check_array=True)
456
458
 
457
459
  if not isinstance(self.risk_measure, RiskMeasure):
458
460
  raise TypeError("risk_measure must be of type `RiskMeasure`")
@@ -330,9 +330,9 @@ class StackingOptimization(BaseOptimization, BaseComposition):
330
330
  # We validate and convert to numpy array only after base-estimator fitting
331
331
  # to keep the assets names in case they are used in the estimator.
332
332
  if y is not None:
333
- _, y = self._validate_data(X, y, multi_output=True)
333
+ _, y = skv.validate_data(self, X, y, multi_output=True)
334
334
  else:
335
- _ = self._validate_data(X)
335
+ _ = skv.validate_data(self, X)
336
336
 
337
337
  if isinstance(self.cv, BaseCombinatorialCV):
338
338
  X_pred = np.array(
@@ -1,3 +1,3 @@
1
1
  from skfolio.optimization.naive._naive import EqualWeighted, InverseVolatility, Random
2
2
 
3
- __all__ = ["InverseVolatility", "EqualWeighted", "Random"]
3
+ __all__ = ["EqualWeighted", "InverseVolatility", "Random"]
@@ -6,6 +6,7 @@
6
6
  import numpy as np
7
7
  import numpy.typing as npt
8
8
  import sklearn.utils.metadata_routing as skm
9
+ import sklearn.utils.validation as skv
9
10
 
10
11
  from skfolio.optimization._base import BaseOptimization
11
12
  from skfolio.prior import BasePrior, EmpiricalPrior
@@ -141,7 +142,7 @@ class EqualWeighted(BaseOptimization):
141
142
  self : EqualWeighted
142
143
  Fitted estimator.
143
144
  """
144
- X = self._validate_data(X)
145
+ X = skv.validate_data(self, X)
145
146
  n_assets = X.shape[1]
146
147
  self.weights_ = np.ones(n_assets) / n_assets
147
148
  return self
@@ -185,7 +186,7 @@ class Random(BaseOptimization):
185
186
  self : EqualWeighted
186
187
  Fitted estimator.
187
188
  """
188
- X = self._validate_data(X)
189
+ X = skv.validate_data(self, X)
189
190
  n_assets = X.shape[1]
190
191
  self.weights_ = rand_weights_dirichlet(n=n_assets)
191
192
  return self
@@ -10,4 +10,4 @@ from skfolio.portfolio._base import BasePortfolio
10
10
  from skfolio.portfolio._multi_period_portfolio import MultiPeriodPortfolio
11
11
  from skfolio.portfolio._portfolio import Portfolio
12
12
 
13
- __all__ = ["BasePortfolio", "Portfolio", "MultiPeriodPortfolio"]
13
+ __all__ = ["BasePortfolio", "MultiPeriodPortfolio", "Portfolio"]
@@ -389,6 +389,7 @@ class BasePortfolio:
389
389
  "edar_beta",
390
390
  }
391
391
 
392
+ # ruff: noqa: RUF023
392
393
  __slots__ = {
393
394
  # public
394
395
  "tag",
@@ -412,6 +412,7 @@ class Portfolio(BasePortfolio):
412
412
  }
413
413
  )
414
414
 
415
+ # ruff: noqa: RUF023
415
416
  __slots__ = {
416
417
  # read-only
417
418
  "X",
@@ -6,8 +6,8 @@ from skfolio.pre_selection._select_non_expiring import SelectNonExpiring
6
6
 
7
7
  __all__ = [
8
8
  "DropCorrelated",
9
+ "SelectComplete",
9
10
  "SelectKExtremes",
10
11
  "SelectNonDominated",
11
- "SelectComplete",
12
12
  "SelectNonExpiring",
13
13
  ]
@@ -74,7 +74,7 @@ class DropCorrelated(skf.SelectorMixin, skb.BaseEstimator):
74
74
  self : DropCorrelated
75
75
  Fitted estimator.
76
76
  """
77
- X = self._validate_data(X)
77
+ X = skv.validate_data(self, X)
78
78
  if not -1 <= self.threshold <= 1:
79
79
  raise ValueError("`threshold` must be between -1 and 1")
80
80
 
@@ -97,7 +97,7 @@ class SelectComplete(skf.SelectorMixin, skb.BaseEstimator):
97
97
  Fitted estimator.
98
98
  """
99
99
  # Validate by allowing NaNs
100
- X = self._validate_data(X, force_all_finite="allow-nan")
100
+ X = skv.validate_data(self, X, ensure_all_finite="allow-nan")
101
101
 
102
102
  if self.drop_assets_with_internal_nan:
103
103
  # Identify columns with any NaNs
@@ -108,9 +108,11 @@ class SelectComplete(skf.SelectorMixin, skb.BaseEstimator):
108
108
 
109
109
  return self
110
110
 
111
- def _get_support_mask(self):
111
+ def _get_support_mask(self) -> np.ndarray:
112
112
  skv.check_is_fitted(self)
113
113
  return self.to_keep_
114
114
 
115
- def _more_tags(self):
116
- return {"allow_nan": True}
115
+ def __sklearn_tags__(self):
116
+ tags = super().__sklearn_tags__()
117
+ tags.input_tags.allow_nan = True
118
+ return tags
@@ -76,7 +76,7 @@ class SelectKExtremes(skf.SelectorMixin, skb.BaseEstimator):
76
76
  self : SelectKExtremes
77
77
  Fitted estimator.
78
78
  """
79
- X = self._validate_data(X)
79
+ X = skv.validate_data(self, X)
80
80
  k = int(self.k)
81
81
  if k <= 0:
82
82
  raise ValueError("`k` must be strictly positive")
@@ -95,7 +95,7 @@ class SelectNonDominated(skf.SelectorMixin, skb.BaseEstimator):
95
95
  self : SelectNonDominated
96
96
  Fitted estimator.
97
97
  """
98
- X = self._validate_data(X)
98
+ X = skv.validate_data(self, X)
99
99
  if not -1 <= self.threshold <= 1:
100
100
  raise ValueError("`threshold` must be between -1 and 1")
101
101
  n_assets = X.shape[1]
@@ -114,7 +114,7 @@ class SelectNonExpiring(skf.SelectorMixin, skb.BaseEstimator):
114
114
  self : SelectNonExpiring
115
115
  Fitted estimator.
116
116
  """
117
- _ = self._validate_data(X, force_all_finite="allow-nan")
117
+ _ = skv.validate_data(self, X, ensure_all_finite="allow-nan")
118
118
 
119
119
  # Validate by allowing NaNs
120
120
  if not hasattr(X, "index") or not isinstance(X.index, pd.DatetimeIndex):
@@ -140,9 +140,11 @@ class SelectNonExpiring(skf.SelectorMixin, skb.BaseEstimator):
140
140
 
141
141
  return self
142
142
 
143
- def _get_support_mask(self):
143
+ def _get_support_mask(self) -> np.ndarray:
144
144
  skv.check_is_fitted(self)
145
145
  return self.to_keep_
146
146
 
147
- def _more_tags(self):
148
- return {"allow_nan": True}
147
+ def __sklearn_tags__(self):
148
+ tags = super().__sklearn_tags__()
149
+ tags.input_tags.allow_nan = True
150
+ return tags
skfolio/prior/__init__.py CHANGED
@@ -8,11 +8,11 @@ from skfolio.prior._factor_model import (
8
8
  )
9
9
 
10
10
  __all__ = [
11
- "PriorModel",
11
+ "BaseLoadingMatrix",
12
12
  "BasePrior",
13
- "EmpiricalPrior",
14
13
  "BlackLitterman",
14
+ "EmpiricalPrior",
15
15
  "FactorModel",
16
- "BaseLoadingMatrix",
17
16
  "LoadingMatrixRegression",
17
+ "PriorModel",
18
18
  ]
@@ -10,6 +10,7 @@
10
10
  import numpy as np
11
11
  import numpy.typing as npt
12
12
  import sklearn.utils.metadata_routing as skm
13
+ import sklearn.utils.validation as skv
13
14
 
14
15
  from skfolio.moments import EquilibriumMu
15
16
  from skfolio.prior._base import BasePrior, PriorModel
@@ -182,7 +183,7 @@ class BlackLitterman(BasePrior):
182
183
 
183
184
  # we validate after all models have been fitted to keep features names
184
185
  # information.
185
- self._validate_data(X)
186
+ skv.validate_data(self, X)
186
187
 
187
188
  n_assets = prior_returns.shape[1]
188
189
  views = np.asarray(self.views)
@@ -7,6 +7,7 @@
7
7
  import numpy as np
8
8
  import numpy.typing as npt
9
9
  import sklearn.utils.metadata_routing as skm
10
+ import sklearn.utils.validation as skv
10
11
 
11
12
  from skfolio.moments import BaseCovariance, BaseMu, EmpiricalCovariance, EmpiricalMu
12
13
  from skfolio.prior._base import BasePrior, PriorModel
@@ -190,7 +191,7 @@ class EmpiricalPrior(BasePrior):
190
191
 
191
192
  # we validate and convert to numpy after all models have been fitted to keep
192
193
  # features names information.
193
- X = self._validate_data(X)
194
+ X = skv.validate_data(self, X)
194
195
  self.prior_model_ = PriorModel(
195
196
  mu=mu,
196
197
  covariance=covariance,
@@ -17,6 +17,7 @@ import sklearn.base as skb
17
17
  import sklearn.linear_model as skl
18
18
  import sklearn.multioutput as skmo
19
19
  import sklearn.utils.metadata_routing as skm
20
+ import sklearn.utils.validation as skv
20
21
 
21
22
  from skfolio.prior._base import BasePrior, PriorModel
22
23
  from skfolio.prior._empirical import EmpiricalPrior
@@ -273,7 +274,7 @@ class FactorModel(BasePrior):
273
274
 
274
275
  # we validate and convert to numpy after all models have been fitted to keep
275
276
  # features names information.
276
- X, y = self._validate_data(X, y, multi_output=True)
277
+ X, y = skv.validate_data(self, X, y, multi_output=True)
277
278
  n_assets = X.shape[1]
278
279
  n_factors = y.shape[1]
279
280
 
skfolio/typing.py CHANGED
@@ -14,20 +14,20 @@ import plotly.graph_objects as go
14
14
  from skfolio.measures import ExtraRiskMeasure, PerfMeasure, RatioMeasure, RiskMeasure
15
15
 
16
16
  __all__ = [
17
+ "CvxMeasure",
18
+ "ExpressionFunction",
19
+ "Factor",
17
20
  "Groups",
18
21
  "Inequality",
19
22
  "LinearConstraints",
23
+ "Measure",
20
24
  "MultiInput",
21
- "Target",
25
+ "Names",
22
26
  "ParametersValues",
23
- "Factor",
24
27
  "Result",
25
28
  "RiskResult",
26
- "ExpressionFunction",
27
- "Measure",
28
- "CvxMeasure",
29
- "Names",
30
29
  "Tags",
30
+ "Target",
31
31
  ]
32
32
 
33
33
  Measure = PerfMeasure | RiskMeasure | ExtraRiskMeasure | RatioMeasure
@@ -13,11 +13,11 @@ from skfolio.uncertainty_set._empirical import (
13
13
  )
14
14
 
15
15
  __all__ = [
16
- "UncertaintySet",
17
- "BaseMuUncertaintySet",
18
16
  "BaseCovarianceUncertaintySet",
19
- "EmpiricalMuUncertaintySet",
20
- "EmpiricalCovarianceUncertaintySet",
21
- "BootstrapMuUncertaintySet",
17
+ "BaseMuUncertaintySet",
22
18
  "BootstrapCovarianceUncertaintySet",
19
+ "BootstrapMuUncertaintySet",
20
+ "EmpiricalCovarianceUncertaintySet",
21
+ "EmpiricalMuUncertaintySet",
22
+ "UncertaintySet",
23
23
  ]
@@ -11,6 +11,7 @@ import numpy as np
11
11
  import numpy.typing as npt
12
12
  import sklearn.base as skb
13
13
  import sklearn.utils.metadata_routing as skm
14
+ import sklearn.utils.validation as skv
14
15
 
15
16
  from skfolio.prior import BasePrior
16
17
 
@@ -113,9 +114,9 @@ class BaseCovarianceUncertaintySet(skb.BaseEstimator, ABC):
113
114
  Validated price returns of factors or a target benchmark if provided.
114
115
  """
115
116
  if y is None:
116
- X = self._validate_data(X)
117
+ X = skv.validate_data(self, X)
117
118
  else:
118
- X, y = self._validate_data(X, y, multi_output=True)
119
+ X, y = skv.validate_data(self, X, y, multi_output=True)
119
120
  return X, y
120
121
 
121
122
  def get_metadata_routing(self):
@@ -16,7 +16,7 @@ from skfolio.exceptions import (
16
16
  GroupNotFoundError,
17
17
  )
18
18
 
19
- __all__ = ["equations_to_matrix"]
19
+ __all__ = ["equations_to_matrix", "group_cardinalities_to_matrix"]
20
20
 
21
21
  _EQUALITY_OPERATORS = {"==", "="}
22
22
  _INEQUALITY_OPERATORS = {">=", "<="}
@@ -132,6 +132,63 @@ def equations_to_matrix(
132
132
  )
133
133
 
134
134
 
135
+ def group_cardinalities_to_matrix(
136
+ groups: npt.ArrayLike,
137
+ group_cardinalities: dict[str, int],
138
+ raise_if_group_missing: bool = False,
139
+ ) -> tuple[np.ndarray, np.ndarray]:
140
+ """Convert a list of linear equations into the left and right matrices of the
141
+ inequality A <= B and equality A == B.
142
+
143
+ Parameters
144
+ ----------
145
+ groups : array-like of shape (n_groups, n_assets)
146
+ 2D array of assets groups.
147
+
148
+ Examples:
149
+ groups = np.array(
150
+ [
151
+ ["Equity", "Equity", "Equity", "Bond"],
152
+ ["US", "Europe", "Japan", "US"],
153
+ ]
154
+ )
155
+
156
+ group_cardinalities : dict[str, int]
157
+ Dictionary of cardinality constraint per group.
158
+ Examples: {"Equity": 1, "US": 3}
159
+
160
+ raise_if_group_missing : bool, default=False
161
+ If this is set to True, an error is raised when a group is not found in the
162
+ groups, otherwise only a warning is shown.
163
+ The default is False.
164
+
165
+ Returns
166
+ -------
167
+ left_inequality: ndarray of shape (n_constraints, n_assets)
168
+ right_inequality: ndarray of shape (n_constraints,)
169
+ The left and right matrices of the cardinality inequality.
170
+ """
171
+ groups = _validate_groups(groups, name="group")
172
+
173
+ a_inequality = []
174
+ b_inequality = []
175
+
176
+ for group, card in group_cardinalities.items():
177
+ try:
178
+ arr = _matching_array(values=groups, key=group, sum_to_one=False)
179
+ a_inequality.append(arr)
180
+ b_inequality.append(card)
181
+
182
+ except GroupNotFoundError as e:
183
+ if raise_if_group_missing:
184
+ raise
185
+ warnings.warn(str(e), stacklevel=2)
186
+ return (
187
+ np.array(a_inequality),
188
+ np.array(b_inequality),
189
+ )
190
+
191
+
135
192
  def _validate_groups(groups: npt.ArrayLike, name: str = "groups") -> np.ndarray:
136
193
  """Validate groups by checking its dim and if group names don't appear in multiple
137
194
  levels and convert to numpy array.