PyPI - skfolio - Versions diffs - 0.0.1__py3-none-any.whl - Mend

skfolio 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

skfolio/__init__.py +29 -0
skfolio/cluster/__init__.py +8 -0
skfolio/cluster/_hierarchical.py +387 -0
skfolio/datasets/__init__.py +20 -0
skfolio/datasets/_base.py +389 -0
skfolio/datasets/data/__init__.py +0 -0
skfolio/datasets/data/factors_dataset.csv.gz +0 -0
skfolio/datasets/data/sp500_dataset.csv.gz +0 -0
skfolio/datasets/data/sp500_index.csv.gz +0 -0
skfolio/distance/__init__.py +26 -0
skfolio/distance/_base.py +55 -0
skfolio/distance/_distance.py +574 -0
skfolio/exceptions.py +30 -0
skfolio/measures/__init__.py +76 -0
skfolio/measures/_enums.py +355 -0
skfolio/measures/_measures.py +607 -0
skfolio/metrics/__init__.py +3 -0
skfolio/metrics/_scorer.py +121 -0
skfolio/model_selection/__init__.py +18 -0
skfolio/model_selection/_combinatorial.py +407 -0
skfolio/model_selection/_validation.py +194 -0
skfolio/model_selection/_walk_forward.py +221 -0
skfolio/moments/__init__.py +41 -0
skfolio/moments/covariance/__init__.py +29 -0
skfolio/moments/covariance/_base.py +101 -0
skfolio/moments/covariance/_covariance.py +1108 -0
skfolio/moments/expected_returns/__init__.py +21 -0
skfolio/moments/expected_returns/_base.py +31 -0
skfolio/moments/expected_returns/_expected_returns.py +415 -0
skfolio/optimization/__init__.py +36 -0
skfolio/optimization/_base.py +147 -0
skfolio/optimization/cluster/__init__.py +13 -0
skfolio/optimization/cluster/_nco.py +348 -0
skfolio/optimization/cluster/hierarchical/__init__.py +13 -0
skfolio/optimization/cluster/hierarchical/_base.py +440 -0
skfolio/optimization/cluster/hierarchical/_herc.py +406 -0
skfolio/optimization/cluster/hierarchical/_hrp.py +368 -0
skfolio/optimization/convex/__init__.py +16 -0
skfolio/optimization/convex/_base.py +1944 -0
skfolio/optimization/convex/_distributionally_robust.py +392 -0
skfolio/optimization/convex/_maximum_diversification.py +417 -0
skfolio/optimization/convex/_mean_risk.py +974 -0
skfolio/optimization/convex/_risk_budgeting.py +560 -0
skfolio/optimization/ensemble/__init__.py +6 -0
skfolio/optimization/ensemble/_base.py +87 -0
skfolio/optimization/ensemble/_stacking.py +326 -0
skfolio/optimization/naive/__init__.py +3 -0
skfolio/optimization/naive/_naive.py +173 -0
skfolio/population/__init__.py +3 -0
skfolio/population/_population.py +883 -0
skfolio/portfolio/__init__.py +13 -0
skfolio/portfolio/_base.py +1096 -0
skfolio/portfolio/_multi_period_portfolio.py +610 -0
skfolio/portfolio/_portfolio.py +842 -0
skfolio/pre_selection/__init__.py +7 -0
skfolio/pre_selection/_pre_selection.py +342 -0
skfolio/preprocessing/__init__.py +3 -0
skfolio/preprocessing/_returns.py +114 -0
skfolio/prior/__init__.py +18 -0
skfolio/prior/_base.py +63 -0
skfolio/prior/_black_litterman.py +238 -0
skfolio/prior/_empirical.py +163 -0
skfolio/prior/_factor_model.py +268 -0
skfolio/typing.py +50 -0
skfolio/uncertainty_set/__init__.py +23 -0
skfolio/uncertainty_set/_base.py +108 -0
skfolio/uncertainty_set/_bootstrap.py +281 -0
skfolio/uncertainty_set/_empirical.py +237 -0
skfolio/utils/__init__.py +0 -0
skfolio/utils/bootstrap.py +115 -0
skfolio/utils/equations.py +350 -0
skfolio/utils/sorting.py +117 -0
skfolio/utils/stats.py +466 -0
skfolio/utils/tools.py +567 -0
skfolio-0.0.1.dist-info/LICENSE +29 -0
skfolio-0.0.1.dist-info/METADATA +568 -0
skfolio-0.0.1.dist-info/RECORD +79 -0
skfolio-0.0.1.dist-info/WHEEL +5 -0
skfolio-0.0.1.dist-info/top_level.txt +1 -0

skfolio/utils/bootstrap.py ADDED Viewed

@@ -0,0 +1,115 @@
+"""Bootstrap module."""
+# Author: Hugo Delatte <delatte.hugo@gmail.com>
+# License: BSD 3 clause
+import numpy as np
+__all__ = ["stationary_bootstrap"]
+def optimal_block_size(x: np.ndarray) -> float:
+    """Compute the optimal block size for a single series using Politis & White
+    algorithm [1]_.
+    Parameters
+    ----------
+    x : ndarray
+        The input 1D-array.
+    Returns
+    -------
+    value : float
+        The optimal block size.
+    References
+    ----------
+    .. [1] "Automatic Block-Length Selection for the Dependent Bootstrap".
+        Politis & White (2004).
+    .. [2] "Correction to Automatic Block-Length Selection for the Dependent Bootstrap".
+        Patton, Politis & White (2009).
+    """
+    n = x.shape[0]
+    eps = x - x.mean(0)
+    b_max = np.ceil(min(3 * np.sqrt(n), n / 3))
+    kn = max(5, int(np.log10(n)))
+    m_max = int(np.ceil(np.sqrt(n))) + kn
+    cv = 2 * np.sqrt(np.log10(n) / n)
+    acv = np.zeros(m_max + 1)
+    abs_acorr = np.zeros(m_max + 1)
+    opt_m = None
+    for i in range(m_max + 1):
+        v1 = eps[i + 1 :] @ eps[i + 1 :]
+        v2 = eps[: -(i + 1)] @ eps[: -(i + 1)]
+        cross_prod = eps[i:] @ eps[: n - i]
+        acv[i] = cross_prod / n
+        abs_acorr[i] = np.abs(cross_prod) / np.sqrt(v1 * v2)
+        if i >= kn:
+            if np.all(abs_acorr[i - kn : i] < cv) and opt_m is None:
+                opt_m = i - kn
+    m = 2 * max(opt_m, 1) if opt_m is not None else m_max
+    m = min(m, m_max)
+    g = 0.0
+    lr_acv = acv[0]
+    for k in range(1, m + 1):
+        lam = 1 if k / m <= 1 / 2 else 2 * (1 - k / m)
+        g += 2 * lam * k * acv[k]
+        lr_acv += 2 * lam * acv[k]
+    d = 2 * lr_acv**2
+    b = ((2 * g**2) / d) ** (1 / 3) * n ** (1 / 3)
+    b = min(b, b_max)
+    return b
+def stationary_bootstrap(
+    returns: np.ndarray,
+    n_bootstrap_samples: int,
+    block_size: float | None = None,
+    seed: int | None = None,
+) -> np.ndarray:
+    """Creates `n_bootstrap_samples` samples from a multivariate return series via
+    stationary bootstrapping.
+    Parameters
+    ----------
+    returns: ndarray of shape (n_observations, n_assets)
+        The returns array.
+    n_bootstrap_samples: int
+        The number of bootstrap samples to generate.
+    block_size: float, optional
+        The block size.
+        If this is set to None, we estimate the optimal block size using Politis &
+        White algorithm for all individual asset and the median.
+    seed: int, optional
+        Random seed used to initialize the pseudo-random number generator
+    Returns
+    -------
+    value: ndarray
+           The sample returns of shape (reps, nb observations, nb assets)
+    """
+    np.random.seed(seed=seed)
+    n_observations, n_assets = returns.shape
+    x = np.vstack((returns, returns))
+    # Loop over reps bootstraps
+    if block_size is None:
+        block_size = np.median(
+            [optimal_block_size(returns[:, i]) for i in range(n_assets)]
+        )
+    indices = np.random.randint(
+        n_observations, size=(n_bootstrap_samples, n_observations)
+    )
+    cond = np.random.rand(n_bootstrap_samples, n_observations) >= 1.0 / block_size
+    # TODO: don't use loop
+    for i in range(n_bootstrap_samples):
+        for j in range(1, n_observations):
+            if cond[i, j]:
+                indices[i, j] = indices[i, j - 1] + 1
+    indices[indices > 2 * n_observations] = 0
+    return x[indices, :]

skfolio/utils/equations.py ADDED Viewed

@@ -0,0 +1,350 @@
+"""Equation module"""
+# Author: Hugo Delatte <delatte.hugo@gmail.com>
+# License: BSD 3 clause
+import re
+import warnings
+import numpy as np
+import numpy.typing as npt
+from skfolio.exceptions import EquationToMatrixError, GroupNotFoundError
+__all__ = ["equations_to_matrix"]
+def equations_to_matrix(
+    groups: npt.ArrayLike,
+    equations: npt.ArrayLike,
+    sum_to_one: bool = False,
+    raise_if_group_missing: bool = False,
+    names: tuple[str, str] = ("groups", "equations"),
+) -> tuple[np.ndarray, np.ndarray]:
+    """Convert a list of linear equations into the left and right matrices of the
+    inequality A <= B.
+    Parameters
+    ----------
+    groups : array-like of shape (n_groups, n_assets)
+        2D array of assets groups.
+        Examples:
+             groups = np.array(
+                [
+                    ["SPX", "SX5E", "NKY", "TLT"],
+                    ["Equity", "Equity", "Equity", "Bond"],
+                    ["US", "Europe", "Japan", "US"],
+                ]
+            )
+    equations : array-like of shape (n_equations,)
+         1D array of equations.
+         Example of valid equation patterns:
+            * "number_1 * group_1 + number_3 <= number_4 * group_3 + number_5"
+            * "group_1 >= number * group_2"
+            * "group_1 <= number"
+            * "group_1 >= number"
+        "group_1" and "group_2" are the group names defined in `groups`.
+        The second expression means that the sum of all assets in "group_1" should be
+        less or equal to "number" times the sum of all assets in "group_2".
+        Examples:
+             equations = [
+                "Equity <= 3 * Bond",
+                "US >= 1.5",
+                "Europe >= 0.5 * Japan",
+                "Japan <= 1",
+                "3*SPX + 5*SX5E <= 2*TLT + 3",
+            ]
+    sum_to_one : bool
+        If this is set to True, all elements in a group sum to one (used in the `views`
+        of the Black-Litterman model).
+    raise_if_group_missing : bool, default=False
+        If this is set to True, an error is raised when a group is not found in the
+        groups, otherwise only a warning is shown.
+        The default is False.
+    names : tuple[str, str], default=('groups', 'equations')
+        The group and equation names used in error messages.
+        The default is `('groups', 'equations')`.
+    Returns
+    -------
+    left: ndarray of shape (n_equations, n_assets)
+    right: ndarray of shape (n_equations,)
+        The left and right matrices of the inequality A <= B.
+        If none of the group inside the equations are part of the groups, `None` is
+        returned.
+    """
+    groups = np.asarray(groups)
+    equations = np.asarray(equations)
+    if groups.ndim != 2:
+        raise ValueError(
+            f"`{names[0]}` must be a 2D array, got {groups.ndim}D array instead."
+        )
+    if equations.ndim != 1:
+        raise ValueError(
+            f"`{names[1]}` must be a 1D array, got {equations.ndim}D array instead."
+        )
+    n_equations = len(equations)
+    n_assets = groups.shape[1]
+    a = np.zeros((n_equations, n_assets))
+    b = np.zeros(n_equations)
+    for i, string in enumerate(equations):
+        try:
+            left, right = _string_to_equation(
+                groups=groups,
+                string=string,
+                sum_to_one=sum_to_one,
+            )
+            a[i] = left
+            b[i] = right
+        except GroupNotFoundError as e:
+            if raise_if_group_missing:
+                raise
+            warnings.warn(str(e), stacklevel=2)
+    return a, b
+def _matching_array(values: np.ndarray, key: str, sum_to_one: bool) -> np.ndarray:
+    """Takes in a 2D array of strings, a key string, and a boolean flag.
+    It returns a 1D array where the value is 1 if there is a match between the key and
+    any value in the 2D array, and 0 otherwise. The returned array can be scaled to
+    have a sum of one if the flag is set to True.
+    Parameters
+    ----------
+    values : ndarray of shape (n, m)
+        2D-array of strings.
+    key : str
+        String to match in the values.
+    sum_to_one : bool
+        If this is set to True, the matching 1D-array is scaled to have a sum of one.
+    Returns
+    -------
+    matching_array : ndarray of shape (n, )
+        Matching 1D-array.
+    """
+    arr = np.any(values == key, axis=0)
+    if not arr.any():
+        raise EquationToMatrixError(f"Unable to find '{key}' in '{values}'")
+    if sum_to_one:
+        s = np.sum(arr)
+    else:
+        s = 1
+    return arr / s
+_operator_mapping = {">=": -1, "<=": 1, "==": 1, "=": 1}
+_operator_signs = {"+": 1, "-": -1}
+def _inequality_operator_sign(operator: str) -> int:
+    """Convert the operators '>=', "==" and '<=' into the corresponding integer
+    values -1, 1 and 1, respectively.
+    Parameters
+    ----------
+    operator : str
+        Operator: '>=' or '<='.
+    Returns
+    -------
+    value : int
+        Operator sign: 1 or -1.
+    """
+    try:
+        return _operator_mapping[operator]
+    except KeyError:
+        raise EquationToMatrixError(
+            f"operator '{operator}' is not valid. It should be '<=' or '>='"
+        ) from None
+def _operator_sign(operator: str) -> int:
+    """Convert the operators '+' and '-' into 1 or -1
+    Parameters
+    ----------
+    operator : str
+       Operator: '+' and '-'.
+    Returns
+    -------
+    value : int
+       Operator sign: 1 or -1.
+    """
+    try:
+        return _operator_signs[operator]
+    except KeyError:
+        raise EquationToMatrixError(
+            f"operator '{operator}' is not valid. It should be be '+' or '-'"
+        ) from None
+def _string_to_float(string: str) -> float:
+    """Convert the factor string into a float.
+    Parameters
+    ----------
+    string : str
+       The factor string.
+    Returns
+    -------
+    value : int
+       The factor string converted to float.
+    """
+    try:
+        return float(string)
+    except ValueError:
+        raise EquationToMatrixError(f"Unable to convert {string} into float") from None
+def _string_to_equation(
+    groups: np.ndarray,
+    string: str,
+    sum_to_one: bool,
+) -> tuple[np.ndarray, float]:
+    """Convert a string to a left 1D-array and right float of the form:
+    `groups @ left <= right`.
+    Parameters
+    ----------
+    groups : ndarray of shape (n_groups, n_assets)
+        Groups 2D-array
+    string : str
+        String to convert
+    sum_to_one : bool
+        If this is set to True, the 1D-array is scaled to have a sum of one.
+    Returns
+    -------
+    left: 1D-array of shape (n_assets,)
+    right: float
+    """
+    n = groups.shape[1]
+    operators = ["-", "+", "*", ">=", "<=", "==", "="]
+    invalid_operators = [">", "<"]
+    pattern = re.compile(r"((?:" + "|\\".join(operators) + r"))")
+    invalid_pattern = re.compile(r"((?:" + "|\\".join(invalid_operators) + r"))")
+    err_msg = f"Wrong pattern encountered while converting the string '{string}'"
+    res = re.split(pattern, string)
+    res = [x.strip() for x in res]
+    res = [x for x in res if x != ""]
+    iterator = iter(res)
+    group_names = set(groups.flatten())
+    def is_group(name: str) -> bool:
+        return name in group_names
+    left = np.zeros(n)
+    right = 0
+    main_sign = 1
+    inequality_sign = None
+    e = next(iterator, None)
+    i = 0
+    while True:
+        i += 1
+        if i > 1e6:
+            raise RecursionError(err_msg)
+        if e is None:
+            break
+        sign = 1
+        if e in [">=", "<=", "==", "="]:
+            main_sign = -1
+            inequality_sign = _inequality_operator_sign(e)
+            e = next(iterator, None)
+            if e in ["-", "+"]:
+                sign *= _operator_sign(e)
+                e = next(iterator, None)
+        elif e in ["-", "+"]:
+            sign *= _operator_sign(e)
+            e = next(iterator, None)
+        elif e == "*":
+            raise EquationToMatrixError(
+                f"{err_msg}: the character '{e}' is wrongly positioned"
+            )
+        sign *= main_sign
+        # next can only be a number or a group
+        if e is None or e in operators:
+            raise EquationToMatrixError(
+                f"{err_msg}: the character '{e}' is wrongly positioned"
+            )
+        if is_group(e):
+            arr = _matching_array(values=groups, key=e, sum_to_one=sum_to_one)
+            # next can only be a '*' or an ['-', '+', '>=', '<=', '==', '='] or None
+            e = next(iterator, None)
+            if e is None or e in ["-", "+", ">=", "<=", "==", "="]:
+                left += sign * arr
+            elif e == "*":
+                # next can only a number
+                e = next(iterator, None)
+                try:
+                    number = float(e)
+                except ValueError:
+                    invalid_ops = invalid_pattern.findall(e)
+                    if len(invalid_ops) > 0:
+                        raise EquationToMatrixError(
+                            f"{invalid_ops[0]} is an invalid operator. Valid operators"
+                            f" are: {operators}"
+                        ) from None
+                    raise GroupNotFoundError(
+                        f"{err_msg}: the group '{e}' is missing from the groups"
+                        f" {groups}"
+                    ) from None
+                left += number * sign * arr
+                e = next(iterator, None)
+            else:
+                raise EquationToMatrixError(
+                    f"{err_msg}: the character '{e}' is wrongly positioned"
+                )
+        else:
+            try:
+                number = float(e)
+            except ValueError:
+                invalid_ops = invalid_pattern.findall(e)
+                if len(invalid_ops) > 0:
+                    raise EquationToMatrixError(
+                        f"{invalid_ops[0]} is an invalid operator. Valid operators are:"
+                        f" {operators}"
+                    ) from None
+                raise GroupNotFoundError(
+                    f"{err_msg}: the group '{e}' is missing from the groups {groups}"
+                ) from None
+            # next can only be a '*' or an operator or None
+            e = next(iterator, None)
+            if e == "*":
+                # next can only a group
+                e = next(iterator, None)
+                if not is_group(e):
+                    raise EquationToMatrixError(
+                        f"{err_msg}: the character '{e}' is wrongly positioned"
+                    )
+                arr = _matching_array(values=groups, key=e, sum_to_one=sum_to_one)
+                left += number * sign * arr
+                e = next(iterator, None)
+            elif e is None or e in ["-", "+", ">=", "<=", "==", "="]:
+                right += number * sign
+            else:
+                raise EquationToMatrixError(
+                    f"{err_msg}: the character '{e}' is wrongly positioned"
+                )
+    left *= inequality_sign
+    right *= -inequality_sign
+    return left, right

skfolio/utils/sorting.py ADDED Viewed

@@ -0,0 +1,117 @@
+"""Fast non-dominated sorting module"""
+# Author: Hugo Delatte <delatte.hugo@gmail.com>
+# License: BSD 3 clause
+import numpy as np
+__all__ = ["dominate", "non_denominated_sort"]
+def dominate(fitness_1: np.ndarray, fitness_2: np.ndarray) -> bool:
+    """Compute the domination of two fitness arrays.
+    Domination of `fitness_1` over `fitness_2` means that each objective (value) of
+    `fitness_1` is not strictly worse than the corresponding objective of `fitness_2`
+    and at least one objective is strictly better.
+    Parameters
+    ----------
+    fitness_1 : ndarray of floats of shape (n_objectives,)
+        Fitness array 1.
+    fitness_2 : ndarray of floats of shape (n_objectives,)
+        Fitness array 2.
+    Returns
+    -------
+    is_dominated : bool
+        Ture if `fitness_1` dominates `fitness_2`, False otherwise.
+    """
+    if fitness_1.ndim != fitness_2.ndim != 1:
+        raise ValueError("fitness_1 and fitness_2 must be 1D array")
+    not_equal = False
+    for self_value, other_value in zip(fitness_1, fitness_2, strict=True):
+        if self_value > other_value:
+            not_equal = True
+        elif self_value < other_value:
+            return False
+    return not_equal
+def non_denominated_sort(
+    fitnesses: np.ndarray, first_front_only: bool
+) -> list[list[int]]:
+    """Fast non-dominated sorting.
+    Sort the fitnesses into different non-domination levels.
+    Complexity O(MN^2) where M is the number of objectives and N the number of
+    portfolios.
+    Parameters
+    ----------
+    fitnesses: ndarray of shape(n, n_fitness)
+        Fitnesses array.
+    first_front_only : bool
+        If this is set to True, only the first front is computed and returned.
+    Returns
+    -------
+    fronts: list[list[int]]
+      A list of Pareto fronts (lists), the first list includes non-dominated fitnesses.
+    """
+    n = len(fitnesses)
+    fronts = []
+    if n == 0:
+        return fronts
+    # final rank that will be returned
+    n_ranked = 0
+    ranked = np.array([0 for _ in range(n)])
+    # for each portfolio a list of all portfolios that are dominated by this one
+    is_dominating = [[x for x in range(0)] for _ in range(n)]
+    # storage for the number of solutions dominated this one
+    n_dominated = [0 for _ in range(n)]
+    current_front = [x for x in range(0)]
+    for i in range(n):
+        for j in range(i + 1, n):
+            if dominate(fitnesses[i], fitnesses[j]):
+                is_dominating[i].append(j)
+                n_dominated[j] += 1
+            elif dominate(fitnesses[j], fitnesses[i]):
+                is_dominating[j].append(i)
+                n_dominated[i] += 1
+        if n_dominated[i] == 0:
+            current_front.append(i)
+            ranked[i] = 1.0
+            n_ranked += 1
+    # append the first front to the current front
+    fronts.append(current_front)
+    if first_front_only:
+        return fronts
+    # while not all solutions are assigned to a pareto front
+    while n_ranked < n:
+        next_front = []
+        # for each portfolio in the current front
+        for i in current_front:
+            # all solutions that are dominated by this portfolio
+            for j in is_dominating[i]:
+                n_dominated[j] -= 1
+                if n_dominated[j] == 0:
+                    next_front.append(j)
+                    ranked[j] = 1.0
+                    n_ranked += 1
+        fronts.append(next_front)
+        current_front = next_front
+    return fronts