PyPI - skfolio - Versions diffs - 0.5.2__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

skfolio 0.5.2py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

skfolio/__init__.py +5 -5
skfolio/cluster/__init__.py +1 -1
skfolio/cluster/_hierarchical.py +1 -1
skfolio/datasets/__init__.py +2 -2
skfolio/distance/__init__.py +3 -3
skfolio/distance/_distance.py +7 -6
skfolio/exceptions.py +2 -2
skfolio/measures/__init__.py +23 -23
skfolio/model_selection/__init__.py +2 -2
skfolio/moments/__init__.py +11 -11
skfolio/moments/covariance/__init__.py +6 -6
skfolio/moments/covariance/_denoise_covariance.py +2 -1
skfolio/moments/covariance/_detone_covariance.py +2 -1
skfolio/moments/covariance/_empirical_covariance.py +2 -1
skfolio/moments/covariance/_ew_covariance.py +2 -1
skfolio/moments/covariance/_gerber_covariance.py +2 -1
skfolio/moments/covariance/_implied_covariance.py +1 -1
skfolio/moments/expected_returns/__init__.py +2 -2
skfolio/moments/expected_returns/_empirical_mu.py +2 -1
skfolio/moments/expected_returns/_equilibrium_mu.py +2 -1
skfolio/moments/expected_returns/_ew_mu.py +2 -1
skfolio/moments/expected_returns/_shrunk_mu.py +2 -1
skfolio/optimization/__init__.py +10 -10
skfolio/optimization/cluster/__init__.py +1 -1
skfolio/optimization/cluster/_nco.py +3 -2
skfolio/optimization/cluster/hierarchical/__init__.py +1 -1
skfolio/optimization/cluster/hierarchical/_herc.py +2 -1
skfolio/optimization/cluster/hierarchical/_hrp.py +2 -1
skfolio/optimization/convex/__init__.py +3 -3
skfolio/optimization/convex/_base.py +344 -31
skfolio/optimization/convex/_distributionally_robust.py +4 -1
skfolio/optimization/convex/_maximum_diversification.py +4 -2
skfolio/optimization/convex/_mean_risk.py +125 -17
skfolio/optimization/convex/_risk_budgeting.py +3 -1
skfolio/optimization/ensemble/_stacking.py +2 -2
skfolio/optimization/naive/__init__.py +1 -1
skfolio/optimization/naive/_naive.py +3 -2
skfolio/portfolio/__init__.py +1 -1
skfolio/portfolio/_base.py +1 -0
skfolio/portfolio/_portfolio.py +1 -0
skfolio/pre_selection/__init__.py +1 -1
skfolio/pre_selection/_drop_correlated.py +1 -1
skfolio/pre_selection/_select_complete.py +6 -4
skfolio/pre_selection/_select_k_extremes.py +1 -1
skfolio/pre_selection/_select_non_dominated.py +1 -1
skfolio/pre_selection/_select_non_expiring.py +6 -4
skfolio/prior/__init__.py +3 -3
skfolio/prior/_black_litterman.py +2 -1
skfolio/prior/_empirical.py +2 -1
skfolio/prior/_factor_model.py +2 -1
skfolio/typing.py +6 -6
skfolio/uncertainty_set/__init__.py +5 -5
skfolio/uncertainty_set/_base.py +3 -2
skfolio/utils/equations.py +58 -1
skfolio/utils/stats.py +8 -8
skfolio/utils/tools.py +10 -10
{skfolio-0.5.2.dist-info → skfolio-0.7.0.dist-info}/METADATA +32 -29
skfolio-0.7.0.dist-info/RECORD +95 -0
{skfolio-0.5.2.dist-info → skfolio-0.7.0.dist-info}/WHEEL +1 -1
skfolio-0.5.2.dist-info/RECORD +0 -95
{skfolio-0.5.2.dist-info → skfolio-0.7.0.dist-info}/LICENSE +0 -0
{skfolio-0.5.2.dist-info → skfolio-0.7.0.dist-info}/top_level.txt +0 -0

skfolio/optimization/convex/_mean_risk.py CHANGED Viewed

@@ -1,17 +1,19 @@
 """Mean Risk Optimization estimator."""
+import warnings
 # Copyright (c) 2023
 # Author: Hugo Delatte <delatte.hugo@gmail.com>
 # License: BSD 3 clause
 # The optimization features are derived
 # from Riskfolio-Lib, Copyright (c) 2020-2023, Dany Cajas, Licensed under BSD 3 clause.
 import cvxpy as cp
 import numpy as np
 import numpy.typing as npt
 import pandas as pd
 import sklearn as sk
 import sklearn.utils.metadata_routing as skm
+import sklearn.utils.validation as skv
 import skfolio.typing as skt
 from skfolio.measures import RiskMeasure
@@ -144,6 +146,11 @@ class MeanRisk(ConvexOptimization):
         returns and Cholesky decomposition of the covariance.
         The default (`None`) is to use :class:`~skfolio.prior.EmpiricalPrior`.
+    efficient_frontier_size : int, optional
+        If provided, it represents the number of Pareto-optimal portfolios along the
+        efficient frontier to be computed. This parameter can only be used with
+        `objective_function = ObjectiveFunction.MINIMIZE_RISK`.
     min_weights : float | dict[str, float] | array-like of shape (n_assets, ) | None, default=0.0
         Minimum assets weights (weights lower bounds).
         If a float is provided, it is applied to each asset.
@@ -213,6 +220,36 @@ class MeanRisk(ConvexOptimization):
         weights.
         The default (`None`) means no maximum long position.
+    cardinality : int, optional
+        Specifies the cardinality constraint to limit the number of invested assets
+        (non-zero weights). This feature requires a mixed-integer solver. For an
+        open-source option, we recommend using SCIP by setting `solver="SCIP"`.
+        To install it, use: `pip install cvxpy[SCIP]`. For commercial solvers,
+        supported options include MOSEK, GUROBI, or CPLEX.
+    group_cardinalities : dict[str, int], optional
+        A dictionary specifying cardinality constraints for specific groups of assets.
+        The keys represent group names (strings), and the values specify the maximum
+        number of assets allowed in each group. You must provide the groups using the
+        `groups` parameter. This requires a mixed-integer solver (see `cardinality`
+        for more details).
+    threshold_long : float | dict[str, float] | array-like of shape (n_assets, ), optional
+        Specifies the minimum weight threshold for assets in the portfolio to be
+        considered as a long position. Assets with weights below this threshold
+        will not be included as part of the portfolio's long positions. This
+        constraint can help eliminate insignificant allocations.
+        This requires a mixed-integer solver (see `cardinality` for more details).
+        It follows the same format as `min_weights` and `max_weights`.
+    threshold_short : float | dict[str, float] | array-like of shape (n_assets, ), optional
+        Specifies the maximum weight threshold for assets in the portfolio to be
+        considered as a short position. Assets with weights above this threshold
+        will not be included as part of the portfolio's short positions. This
+        constraint can help control the magnitude of short positions.
+        This requires a mixed-integer solver (see `cardinality` for more details).
+        It follows the same format as `min_weights` and `max_weights`.
     transaction_costs : float | dict[str, float] | array-like of shape (n_assets, ), default=0.0
         Transaction costs of the assets. It is used to add linear transaction costs to
         the optimization problem:
@@ -486,9 +523,10 @@ class MeanRisk(ConvexOptimization):
     solver_params : dict, optional
         Solver parameters. For example, `solver_params=dict(verbose=True)`.
         The default (`None`) is use `{"tol_gap_abs": 1e-9, "tol_gap_rel": 1e-9}`
-        for the solver "CLARABEL" and the CVXPY default otherwise.
+        for "CLARABEL", `{"numerics/feastol": 1e-8, "limits/gap": 1e-8}` for SCIP
+        and the solver default otherwise.
         For more details about solver arguments, check the CVXPY documentation:
-        https://www.cvxpy.org/tutorial/advanced/index.html#setting-solver-options
+        https://www.cvxpy.org/tutorial/solvers
     scale_objective : float, optional
         Scale each objective element by this value.
@@ -511,7 +549,7 @@ class MeanRisk(ConvexOptimization):
     portfolio_params :  dict, optional
         Portfolio parameters passed to the portfolio evaluated by the `predict` and
         `score` methods. If not provided, the `name`, `transaction_costs`,
-        `management_fees`, `previous_weights` and `risk_free_rate` are copied from the
+        `management_fees`, `previous_weights` and `risk_free_rate` are copied from the
         optimization model and passed to the portfolio.
     Attributes
@@ -557,6 +595,10 @@ class MeanRisk(ConvexOptimization):
         max_budget: float | None = None,
         max_short: float | None = None,
         max_long: float | None = None,
+        cardinality: int | None = None,
+        group_cardinalities: dict[str, int] | None = None,
+        threshold_long: skt.MultiInput | None = None,
+        threshold_short: skt.MultiInput | None = None,
         transaction_costs: skt.MultiInput = 0.0,
         management_fees: skt.MultiInput = 0.0,
         previous_weights: skt.MultiInput | None = None,
@@ -617,6 +659,10 @@ class MeanRisk(ConvexOptimization):
             max_budget=max_budget,
             max_short=max_short,
             max_long=max_long,
+            cardinality=cardinality,
+            group_cardinalities=group_cardinalities,
+            threshold_long=threshold_long,
+            threshold_short=threshold_short,
             transaction_costs=transaction_costs,
             management_fees=management_fees,
             previous_weights=previous_weights,
@@ -719,7 +765,9 @@ class MeanRisk(ConvexOptimization):
         """
         routed_params = skm.process_routing(self, "fit", **fit_params)
-        self._check_feature_names(X, reset=True)
+        # `X` is unchanged and only `feature_names_in_` is performed
+        _ = skv.validate_data(self, X, skip_check_array=True)
         # Validate
         self._validation()
         # Used to avoid adding multiple times similar constrains linked to identical
@@ -734,13 +782,42 @@ class MeanRisk(ConvexOptimization):
         n_observations, n_assets = prior_model.returns.shape
         # set solvers params
-        if self.solver == "CLARABEL":
-            self._set_solver_params(default={"tol_gap_abs": 1e-9, "tol_gap_rel": 1e-9})
-        else:
-            self._set_solver_params(default=None)
+        match self.solver:
+            case "CLARABEL":
+                self._set_solver_params(
+                    default={"tol_gap_abs": 1e-9, "tol_gap_rel": 1e-9}
+                )
+            case "SCIP":
+                self._set_solver_params(
+                    default={"numerics/feastol": 1e-8, "limits/gap": 1e-8}
+                )
+            case _:
+                self._set_solver_params(default=None)
-        # set scales
+        # set scales and check measure
         if self.objective_function == ObjectiveFunction.MAXIMIZE_RATIO:
+            if self.overwrite_expected_return is not None:
+                if self.risk_measure == RiskMeasure.VARIANCE:
+                    warnings.warn(
+                        "When selecting 'MAXIMIZE_RATIO' with 'VARIANCE', the "
+                        "optimization will return the maximum Sharpe Ratio portfolio. "
+                        "This is because the mean/variance ratio is not a "
+                        "1-homogeneous function, unlike the mean/std. To suppress this"
+                        "warning, replace 'VARIANCE' by 'STANDARD_DEVIATION'",
+                        stacklevel=2,
+                    )
+                elif self.risk_measure == RiskMeasure.SEMI_VARIANCE:
+                    warnings.warn(
+                        "When selecting 'MAXIMIZE_RATIO' with 'SEMI_VARIANCE', the "
+                        "optimization will return the maximum Sortino Ratio portfolio. "
+                        "This is because the mean/semi-variance ratio is not a "
+                        "1-homogeneous function, unlike the mean/semi-std ratio. To "
+                        "suppress this warning, replace 'SEMI_VARIANCE' by "
+                        "'SEMI_DEVIATION'",
+                        stacklevel=2,
+                    )
             self._set_scale_objective(default=1)
             self._set_scale_constraints(default=1)
         else:
@@ -819,7 +896,7 @@ class MeanRisk(ConvexOptimization):
                         " 1d-array, a single-column DataFrame or a Series"
                     )
                 y = y[y.columns[0]]
-            _, y = self._validate_data(X, y)
+            _, y = skv.validate_data(self, X, y)
             tracking_error = self._tracking_error(
                 prior_model=prior_model, w=w, y=y, factor=factor
             )
@@ -959,31 +1036,38 @@ class MeanRisk(ConvexOptimization):
                     + custom_objective * self._scale_objective
                 )
             case ObjectiveFunction.MAXIMIZE_RATIO:
+                homogenization_factor = _optimal_homogenization_factor(
+                    mu=prior_model.mu
+                )
                 if expected_return.is_affine():
                     # Charnes-Cooper's variable transformation for Fractional
-                    # Programming problem :Max(f1/f2) with f2 linear
+                    # Programming problem Max(f1/f2) with f2 linear and with
+                    # 1-homogeneous function (homogeneous technique)
                     constraints += [
                         expected_return * self._scale_constraints
                         - cp.Constant(self.risk_free_rate)
                         * factor
                         * self._scale_constraints
-                        == cp.Constant(1) * self._scale_constraints
+                        == cp.Constant(homogenization_factor) * self._scale_constraints
                     ]
                 else:
                     # Schaible's generalization of Charnes-Cooper's variable
                     # transformation for Fractional Programming problem :Max(f1/f2)
-                    # with f1 concave instead of linear: Schaible,"Parameter-free
-                    # Convex Equivalent and Dual Programs of Fractional Programming
-                    # Problems".
+                    # with f1 concave instead of linear and with 1-homogeneous function.
+                    # (homogeneous technique)
+                    # Schaible,"Parameter-free Convex Equivalent and Dual Programs of
+                    # Fractional Programming Problems".
                     # The condition to work is f1 >= 0, so we need to raise an user
                     # warning when it's not the case.
                     # TODO: raise user warning when f1<0
                     constraints += [
                         expected_return * self._scale_constraints
                         - cp.Constant(self.risk_free_rate)
                         * factor
                         * self._scale_constraints
-                        >= cp.Constant(1) * self._scale_constraints
+                        >= cp.Constant(homogenization_factor) * self._scale_constraints
                     ]
                 objective = cp.Minimize(
                     risk * self._scale_objective
@@ -1014,3 +1098,27 @@ class MeanRisk(ConvexOptimization):
         )
         return self
+def _optimal_homogenization_factor(mu: np.ndarray) -> float:
+    """
+    Compute the optimal homogenization factor for ratio optimization based on expected
+    returns.
+    While a default value of 1 is commonly used in textbooks for simplicity,
+    fine-tuning this factor based on the underlying data can enhance convergence.
+    Additionally, using a data-driven approach to determine this factor can improve the
+    robustness of certain constraints, such as the calibration of big M methods.
+    Parameters
+    ----------
+    mu : ndarray of shape (n_assets,)
+        Vector of expected returns.
+    Returns
+    -------
+    value : float
+        Homogenization factor.
+    """
+    return min(1e3, max(1e-3, np.mean(np.abs(mu))))

skfolio/optimization/convex/_risk_budgeting.py CHANGED Viewed

@@ -10,6 +10,7 @@ import cvxpy as cp
 import numpy as np
 import numpy.typing as npt
 import sklearn.utils.metadata_routing as skm
+import sklearn.utils.validation as skv
 import skfolio.typing as skt
 from skfolio.measures import RiskMeasure
@@ -452,7 +453,8 @@ class RiskBudgeting(ConvexOptimization):
         """
         routed_params = skm.process_routing(self, "fit", **fit_params)
-        self._check_feature_names(X, reset=True)
+        # `X` is unchanged and only `feature_names_in_` is performed
+        _ = skv.validate_data(self, X, skip_check_array=True)
         if not isinstance(self.risk_measure, RiskMeasure):
             raise TypeError("risk_measure must be of type `RiskMeasure`")

skfolio/optimization/ensemble/_stacking.py CHANGED Viewed

@@ -330,9 +330,9 @@ class StackingOptimization(BaseOptimization, BaseComposition):
             # We validate and convert to numpy array only after base-estimator fitting
             # to keep the assets names in case they are used in the estimator.
             if y is not None:
-                _, y = self._validate_data(X, y, multi_output=True)
+                _, y = skv.validate_data(self, X, y, multi_output=True)
             else:
-                _ = self._validate_data(X)
+                _ = skv.validate_data(self, X)
             if isinstance(self.cv, BaseCombinatorialCV):
                 X_pred = np.array(

skfolio/optimization/naive/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 from skfolio.optimization.naive._naive import EqualWeighted, InverseVolatility, Random
-__all__ = ["InverseVolatility", "EqualWeighted", "Random"]
+__all__ = ["EqualWeighted", "InverseVolatility", "Random"]

skfolio/optimization/naive/_naive.py CHANGED Viewed

@@ -6,6 +6,7 @@
 import numpy as np
 import numpy.typing as npt
 import sklearn.utils.metadata_routing as skm
+import sklearn.utils.validation as skv
 from skfolio.optimization._base import BaseOptimization
 from skfolio.prior import BasePrior, EmpiricalPrior
@@ -141,7 +142,7 @@ class EqualWeighted(BaseOptimization):
         self : EqualWeighted
             Fitted estimator.
         """
-        X = self._validate_data(X)
+        X = skv.validate_data(self, X)
         n_assets = X.shape[1]
         self.weights_ = np.ones(n_assets) / n_assets
         return self
@@ -185,7 +186,7 @@ class Random(BaseOptimization):
         self : EqualWeighted
             Fitted estimator.
         """
-        X = self._validate_data(X)
+        X = skv.validate_data(self, X)
         n_assets = X.shape[1]
         self.weights_ = rand_weights_dirichlet(n=n_assets)
         return self

skfolio/portfolio/__init__.py CHANGED Viewed

@@ -10,4 +10,4 @@ from skfolio.portfolio._base import BasePortfolio
 from skfolio.portfolio._multi_period_portfolio import MultiPeriodPortfolio
 from skfolio.portfolio._portfolio import Portfolio
-__all__ = ["BasePortfolio", "Portfolio", "MultiPeriodPortfolio"]
+__all__ = ["BasePortfolio", "MultiPeriodPortfolio", "Portfolio"]

skfolio/portfolio/_base.py CHANGED Viewed

@@ -389,6 +389,7 @@ class BasePortfolio:
         "edar_beta",
     }
+    # ruff: noqa: RUF023
     __slots__ = {
         # public
         "tag",

skfolio/portfolio/_portfolio.py CHANGED Viewed

@@ -412,6 +412,7 @@ class Portfolio(BasePortfolio):
         }
     )
+    # ruff: noqa: RUF023
     __slots__ = {
         # read-only
         "X",

skfolio/pre_selection/__init__.py CHANGED Viewed

@@ -6,8 +6,8 @@ from skfolio.pre_selection._select_non_expiring import SelectNonExpiring
 __all__ = [
     "DropCorrelated",
+    "SelectComplete",
     "SelectKExtremes",
     "SelectNonDominated",
-    "SelectComplete",
     "SelectNonExpiring",
 ]

skfolio/pre_selection/_drop_correlated.py CHANGED Viewed

@@ -74,7 +74,7 @@ class DropCorrelated(skf.SelectorMixin, skb.BaseEstimator):
         self : DropCorrelated
             Fitted estimator.
         """
-        X = self._validate_data(X)
+        X = skv.validate_data(self, X)
         if not -1 <= self.threshold <= 1:
             raise ValueError("`threshold` must be between -1 and 1")

skfolio/pre_selection/_select_complete.py CHANGED Viewed

@@ -97,7 +97,7 @@ class SelectComplete(skf.SelectorMixin, skb.BaseEstimator):
             Fitted estimator.
         """
         # Validate by allowing NaNs
-        X = self._validate_data(X, force_all_finite="allow-nan")
+        X = skv.validate_data(self, X, ensure_all_finite="allow-nan")
         if self.drop_assets_with_internal_nan:
             # Identify columns with any NaNs
@@ -108,9 +108,11 @@ class SelectComplete(skf.SelectorMixin, skb.BaseEstimator):
         return self
-    def _get_support_mask(self):
+    def _get_support_mask(self) -> np.ndarray:
         skv.check_is_fitted(self)
         return self.to_keep_
-    def _more_tags(self):
-        return {"allow_nan": True}
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.allow_nan = True
+        return tags

skfolio/pre_selection/_select_k_extremes.py CHANGED Viewed

@@ -76,7 +76,7 @@ class SelectKExtremes(skf.SelectorMixin, skb.BaseEstimator):
         self : SelectKExtremes
             Fitted estimator.
         """
-        X = self._validate_data(X)
+        X = skv.validate_data(self, X)
         k = int(self.k)
         if k <= 0:
             raise ValueError("`k` must be strictly positive")

skfolio/pre_selection/_select_non_dominated.py CHANGED Viewed

@@ -95,7 +95,7 @@ class SelectNonDominated(skf.SelectorMixin, skb.BaseEstimator):
         self : SelectNonDominated
             Fitted estimator.
         """
-        X = self._validate_data(X)
+        X = skv.validate_data(self, X)
         if not -1 <= self.threshold <= 1:
             raise ValueError("`threshold` must be between -1 and 1")
         n_assets = X.shape[1]

skfolio/pre_selection/_select_non_expiring.py CHANGED Viewed

@@ -114,7 +114,7 @@ class SelectNonExpiring(skf.SelectorMixin, skb.BaseEstimator):
         self : SelectNonExpiring
             Fitted estimator.
         """
-        _ = self._validate_data(X, force_all_finite="allow-nan")
+        _ = skv.validate_data(self, X, ensure_all_finite="allow-nan")
         # Validate by allowing NaNs
         if not hasattr(X, "index") or not isinstance(X.index, pd.DatetimeIndex):
@@ -140,9 +140,11 @@ class SelectNonExpiring(skf.SelectorMixin, skb.BaseEstimator):
         return self
-    def _get_support_mask(self):
+    def _get_support_mask(self) -> np.ndarray:
         skv.check_is_fitted(self)
         return self.to_keep_
-    def _more_tags(self):
-        return {"allow_nan": True}
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.allow_nan = True
+        return tags

skfolio/prior/__init__.py CHANGED Viewed

@@ -8,11 +8,11 @@ from skfolio.prior._factor_model import (
 )
 __all__ = [
-    "PriorModel",
+    "BaseLoadingMatrix",
     "BasePrior",
-    "EmpiricalPrior",
     "BlackLitterman",
+    "EmpiricalPrior",
     "FactorModel",
-    "BaseLoadingMatrix",
     "LoadingMatrixRegression",
+    "PriorModel",
 ]

skfolio/prior/_black_litterman.py CHANGED Viewed

@@ -10,6 +10,7 @@
 import numpy as np
 import numpy.typing as npt
 import sklearn.utils.metadata_routing as skm
+import sklearn.utils.validation as skv
 from skfolio.moments import EquilibriumMu
 from skfolio.prior._base import BasePrior, PriorModel
@@ -182,7 +183,7 @@ class BlackLitterman(BasePrior):
         # we validate after all models have been fitted to keep features names
         # information.
-        self._validate_data(X)
+        skv.validate_data(self, X)
         n_assets = prior_returns.shape[1]
         views = np.asarray(self.views)

skfolio/prior/_empirical.py CHANGED Viewed

@@ -7,6 +7,7 @@
 import numpy as np
 import numpy.typing as npt
 import sklearn.utils.metadata_routing as skm
+import sklearn.utils.validation as skv
 from skfolio.moments import BaseCovariance, BaseMu, EmpiricalCovariance, EmpiricalMu
 from skfolio.prior._base import BasePrior, PriorModel
@@ -190,7 +191,7 @@ class EmpiricalPrior(BasePrior):
         # we validate and convert to numpy after all models have been fitted to keep
         # features names information.
-        X = self._validate_data(X)
+        X = skv.validate_data(self, X)
         self.prior_model_ = PriorModel(
             mu=mu,
             covariance=covariance,

skfolio/prior/_factor_model.py CHANGED Viewed

@@ -17,6 +17,7 @@ import sklearn.base as skb
 import sklearn.linear_model as skl
 import sklearn.multioutput as skmo
 import sklearn.utils.metadata_routing as skm
+import sklearn.utils.validation as skv
 from skfolio.prior._base import BasePrior, PriorModel
 from skfolio.prior._empirical import EmpiricalPrior
@@ -273,7 +274,7 @@ class FactorModel(BasePrior):
         # we validate and convert to numpy after all models have been fitted to keep
         # features names information.
-        X, y = self._validate_data(X, y, multi_output=True)
+        X, y = skv.validate_data(self, X, y, multi_output=True)
         n_assets = X.shape[1]
         n_factors = y.shape[1]

skfolio/typing.py CHANGED Viewed

@@ -14,20 +14,20 @@ import plotly.graph_objects as go
 from skfolio.measures import ExtraRiskMeasure, PerfMeasure, RatioMeasure, RiskMeasure
 __all__ = [
+    "CvxMeasure",
+    "ExpressionFunction",
+    "Factor",
     "Groups",
     "Inequality",
     "LinearConstraints",
+    "Measure",
     "MultiInput",
-    "Target",
+    "Names",
     "ParametersValues",
-    "Factor",
     "Result",
     "RiskResult",
-    "ExpressionFunction",
-    "Measure",
-    "CvxMeasure",
-    "Names",
     "Tags",
+    "Target",
 ]
 Measure = PerfMeasure | RiskMeasure | ExtraRiskMeasure | RatioMeasure

skfolio/uncertainty_set/__init__.py CHANGED Viewed

@@ -13,11 +13,11 @@ from skfolio.uncertainty_set._empirical import (
 )
 __all__ = [
-    "UncertaintySet",
-    "BaseMuUncertaintySet",
     "BaseCovarianceUncertaintySet",
-    "EmpiricalMuUncertaintySet",
-    "EmpiricalCovarianceUncertaintySet",
-    "BootstrapMuUncertaintySet",
+    "BaseMuUncertaintySet",
     "BootstrapCovarianceUncertaintySet",
+    "BootstrapMuUncertaintySet",
+    "EmpiricalCovarianceUncertaintySet",
+    "EmpiricalMuUncertaintySet",
+    "UncertaintySet",
 ]

skfolio/uncertainty_set/_base.py CHANGED Viewed

@@ -11,6 +11,7 @@ import numpy as np
 import numpy.typing as npt
 import sklearn.base as skb
 import sklearn.utils.metadata_routing as skm
+import sklearn.utils.validation as skv
 from skfolio.prior import BasePrior
@@ -113,9 +114,9 @@ class BaseCovarianceUncertaintySet(skb.BaseEstimator, ABC):
             Validated price returns of factors or a target benchmark if provided.
         """
         if y is None:
-            X = self._validate_data(X)
+            X = skv.validate_data(self, X)
         else:
-            X, y = self._validate_data(X, y, multi_output=True)
+            X, y = skv.validate_data(self, X, y, multi_output=True)
         return X, y
     def get_metadata_routing(self):

skfolio/utils/equations.py CHANGED Viewed

@@ -16,7 +16,7 @@ from skfolio.exceptions import (
     GroupNotFoundError,
 )
-__all__ = ["equations_to_matrix"]
+__all__ = ["equations_to_matrix", "group_cardinalities_to_matrix"]
 _EQUALITY_OPERATORS = {"==", "="}
 _INEQUALITY_OPERATORS = {">=", "<="}
@@ -132,6 +132,63 @@ def equations_to_matrix(
     )
+def group_cardinalities_to_matrix(
+    groups: npt.ArrayLike,
+    group_cardinalities: dict[str, int],
+    raise_if_group_missing: bool = False,
+) -> tuple[np.ndarray, np.ndarray]:
+    """Convert a list of linear equations into the left and right matrices of the
+    inequality A <= B and equality A == B.
+    Parameters
+    ----------
+    groups : array-like of shape (n_groups, n_assets)
+       2D array of assets groups.
+       Examples:
+             groups = np.array(
+                [
+                    ["Equity", "Equity", "Equity", "Bond"],
+                    ["US", "Europe", "Japan", "US"],
+                ]
+            )
+    group_cardinalities : dict[str, int]
+       Dictionary of cardinality constraint per group.
+       Examples: {"Equity": 1, "US": 3}
+    raise_if_group_missing : bool, default=False
+        If this is set to True, an error is raised when a group is not found in the
+        groups, otherwise only a warning is shown.
+        The default is False.
+    Returns
+    -------
+    left_inequality: ndarray of shape (n_constraints, n_assets)
+    right_inequality: ndarray of shape (n_constraints,)
+        The left and right matrices of the cardinality inequality.
+    """
+    groups = _validate_groups(groups, name="group")
+    a_inequality = []
+    b_inequality = []
+    for group, card in group_cardinalities.items():
+        try:
+            arr = _matching_array(values=groups, key=group, sum_to_one=False)
+            a_inequality.append(arr)
+            b_inequality.append(card)
+        except GroupNotFoundError as e:
+            if raise_if_group_missing:
+                raise
+            warnings.warn(str(e), stacklevel=2)
+    return (
+        np.array(a_inequality),
+        np.array(b_inequality),
+    )
 def _validate_groups(groups: npt.ArrayLike, name: str = "groups") -> np.ndarray:
     """Validate groups by checking its dim and if group names don't appear in multiple
     levels and convert to numpy array.

skfolio 0.5.2__py3-none-any.whl → 0.7.0__py3-none-any.whl

skfolio 0.5.2py3-none-any.whl → 0.7.0py3-none-any.whl