PyPI - numerai-tools - Versions diffs - 0.4.2.dev1__tar.gz → 0.5.0.dev0__tar.gz - Mend

numerai-tools 0.4.2.dev1tar.gz → 0.5.0.dev0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

{numerai_tools-0.4.2.dev1 → numerai_tools-0.5.0.dev0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: numerai_tools
-Version: 0.4.2.dev1
+Version: 0.5.0.dev0
 Summary: A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
 Home-page: https://github.com/numerai/numerai-tools
 Maintainer: Numerai

{numerai_tools-0.4.2.dev1 → numerai_tools-0.5.0.dev0}/numerai_tools/scoring.py RENAMED Viewed

@@ -47,8 +47,9 @@ def filter_sort_index(
 def filter_sort_index_many(
-    inputs: List[S1], max_filtered_ratio: float = DEFAULT_MAX_FILTERED_INDEX_RATIO
-) -> List[S1]:
+    inputs: List[pd.DataFrame],
+    max_filtered_ratio: float = DEFAULT_MAX_FILTERED_INDEX_RATIO,
+) -> List[pd.DataFrame]:
     """Filters the indices of the given list of series to match each other,
     then sorts the indices, then checks that we didn't filter too many indices
     before returning the filtered and sorted series.
@@ -461,7 +462,7 @@ def numerai_corr(
     Returns:
         pd.Series - the resulting correlation scores for each column in predictions
     """
-    targets = targets - targets.mean()
+    targets = center(targets)
     targets, predictions = filter_sort_index(
         targets, predictions, max_filtered_index_ratio
     )
@@ -557,14 +558,15 @@ def alpha(
         sample_weights: pd.Series - the universe sampling weights
         targets: pd.Series - the live targets to evaluate against
     """
+    targets = center(targets)
     assert not predictions.isna().any().any(), "Predictions contain NaNs"
     assert not neutralizers.isna().any().any(), "Normalization factors contain NaNs"
     assert not sample_weights.isna().any(), "Weights contain NaNs"
     predictions, neutralizers, sample_weights, targets = filter_sort_index_many(
         [predictions, neutralizers, sample_weights, targets]
     )
-    weights = tie_kept_rank__gaussianize__pow_1_5(predictions).apply(
+    ranked_preds = tie_kept_rank__gaussianize__pow_1_5(predictions)
+    weights = ranked_preds.apply(
         lambda s_prime: generate_neutralized_weights(
             s_prime, neutralizers, sample_weights
         )
@@ -593,6 +595,7 @@ def meta_portfolio_contribution(
         sample_weights: pd.Series - the universe sampling weights
         targets: pd.Series - the live targets to evaluate against
     """
+    targets = center(targets)
     assert not predictions.isna().any().any(), "Predictions contain NaNs"
     assert not neutralizers.isna().any().any(), "Normalization factors contain NaNs"
     assert not sample_weights.isna().any(), "Weights contain NaNs"
@@ -611,15 +614,12 @@ def meta_portfolio_contribution(
     t = targets.values
     swp = w @ s
     swp = swp - swp.mean()
-    swp_abs_sum = np.sum(np.abs(swp))
+    l1_norm = np.sum(np.abs(swp))
+    l1_norm_squared = np.power(l1_norm, 2)
     swp_sign = np.sign(swp)
-    alpha_unnormalized_swp_grad = (
-        1
-        / np.power(swp_abs_sum, 2)
-        * (swp_abs_sum * t - swp_sign * np.dot(swp, t)).reshape(-1, 1)
-    )
-    zero_mean_jac_vec_prod = (
-        alpha_unnormalized_swp_grad - alpha_unnormalized_swp_grad.mean()
-    )
-    mpc = (w.T @ zero_mean_jac_vec_prod).squeeze()
+    swp_alpha = np.dot(swp, t)
+    directional_gradient = l1_norm * t - swp_sign * swp_alpha
+    jacobian_vector_product = directional_gradient.reshape(-1, 1) / l1_norm_squared
+    centered_jacobian = jacobian_vector_product - jacobian_vector_product.mean()
+    mpc = (w.T @ centered_jacobian).squeeze()
     return pd.Series(mpc, index=stakes.index)

numerai_tools-0.5.0.dev0/numerai_tools/signals.py ADDED Viewed

@@ -0,0 +1,215 @@
+from typing import Tuple, Optional
+from numerai_tools.submissions import validate_headers_signals, validate_ids_signals
+from numerai_tools.scoring import (
+    filter_sort_index,
+    filter_sort_top_bottom,
+    spearman_correlation,
+    tie_kept_rank,
+    tie_kept_rank__gaussianize__pow_1_5,
+    filter_sort_index_many,
+    generate_neutralized_weights,
+    weight_normalize,
+    center,
+)
+import pandas as pd
+def churn(
+    s1: pd.Series,
+    s2: pd.Series,
+    top_bottom: Optional[int] = None,
+) -> float:
+    """Calculate the churn between two series. Churn is the proportion of elements
+    that are different between the two series.
+    For 2 given series with overlapping indices, churn is 1 - Spearman Correlation.
+    If top_bottom is provided, the churn is calculated as the average of the % of
+    tickers that stay in the top and bottom predictions. This is only relevant when
+    the series are rank signals and not portfolio weights.
+    Arguments:
+        s1: pd.Series - the first series to compare
+        s2: pd.Series - the second series to compare
+        top_bottom: Optional[int] - the number of top and bottom predictions to use
+                                    when calculating the correlation. Results in
+                                    2*top_bottom predictions.
+    Returns:
+        float - the churn between the two series
+    """
+    if top_bottom is not None and top_bottom > 0:
+        s1_top, s1_bot = filter_sort_top_bottom(s1, top_bottom, False)
+        s2_top, s2_bot = filter_sort_top_bottom(s2, top_bottom, False)
+        top_overlap = len(s1_top.index.intersection(s2_top.index)) / top_bottom
+        bot_overlap = len(s1_bot.index.intersection(s2_bot.index)) / top_bottom
+        avg_overlap = (top_overlap + bot_overlap) / 2
+        return 1 - avg_overlap
+    s1, s2 = filter_sort_index(s1, s2)
+    assert s1.std() > 0, "s1 must have non-zero standard deviation"
+    assert s2.std() > 0, "s2 must have non-zero standard deviation"
+    return 1 - spearman_correlation(s1, s2)
+def turnover(
+    s1: pd.Series,
+    s2: pd.Series,
+):
+    """Calculate the turnover between two series. Turnover is the total change in weights between
+    the two series divided by 2.
+    For 2 given series with overlapping indices, join the series on index, fill nans with zeroes
+    and calculate turnover as the absolute total difference between the two series divided by 2.
+    This is only relevant when the series are portfolio weights and not rank signals.
+    Arguments:
+        s1: pd.Series - the first series to compare
+        s2: pd.Series - the second series to compare
+        top_bottom: Optional[int] - the number of top and bottom predictions to use
+                                    when calculating the correlation. Results in
+                                    2*top_bottom predictions.
+    Returns:
+        float - the turnover between the two series
+    """
+    s1, s2 = filter_sort_index(s1, s2)
+    turnover = (s1 - s2).abs().sum() / 2
+    return turnover
+def neutral_weight(
+    submission: pd.Series,
+    signal_col: str,
+    neutralizer: pd.DataFrame,
+    weight: pd.Series,
+) -> pd.Series:
+    s_prime = tie_kept_rank__gaussianize__pow_1_5(submission.to_frame())[signal_col]
+    s_prime, neutralizer, weight = filter_sort_index_many(  # type: ignore
+        [s_prime, neutralizer, weight]
+    )
+    neutral_weights = generate_neutralized_weights(s_prime, neutralizer, weight)
+    neutral_weights = weight_normalize(center(neutral_weights.to_frame()))[0]
+    return neutral_weights.sort_index()
+def remap_ticker_col(
+    predictions: pd.DataFrame,
+    universe: pd.DataFrame,
+    ticker_col: str,
+) -> pd.DataFrame:
+    return (
+        predictions.join(universe, how="right")
+        .reset_index()
+        .set_index(ticker_col)
+        .sort_index()
+    )
+def rank_and_fill_signal(
+    universe: pd.DataFrame,
+    submission: pd.Series,
+    signal_col: str,
+) -> pd.Series:
+    uni_joined_sub = universe.sort_index().join(
+        tie_kept_rank(submission.sort_index().to_frame())
+    )[[signal_col]]
+    filled_sub = uni_joined_sub.fillna(uni_joined_sub.median()).sort_index()
+    return filled_sub[signal_col]
+def calculate_max_churn_and_turnover(
+    curr_sub: pd.DataFrame,
+    curr_neutralizer: pd.DataFrame,
+    curr_weight: pd.Series,
+    prev_week_subs: dict[str, pd.DataFrame],
+    prev_neutralizers: dict[str, pd.DataFrame],
+    prev_sample_weights: dict[str, pd.Series],
+    universe: pd.DataFrame,
+    curr_signal_col: str,
+    curr_ticker_col: str,
+) -> Tuple[float, float]:
+    """Calculate the maximum churn and turnover with respect to previous submissions.
+    Arguments:
+        curr_sub -- the current submission
+        curr_neutralizer -- the neutralizer DataFrame for the current submission
+        curr_weight -- the sample weights Series for the current submission
+        prev_week_subs -- a dictionary of datestamps to submissions
+        prev_neutralizers -- a dictionary of datestamps to neutralizers
+        prev_sample_weights -- a dictionary of datestamps to sample weights
+        universe -- the internal universe DataFrame
+        curr_signal_col -- the column name for signal in the current submission
+        curr_ticker_col -- the column name for tickers in the current submission
+    Returns:
+        prev_week_max_churn -- the maximum churn from previous submissions
+        prev_week_max_turnover -- the maximum turnover from previous submissions
+    """
+    curr_sub_vector: pd.Series = rank_and_fill_signal(
+        universe,
+        curr_sub.reset_index().set_index(curr_ticker_col).sort_index()[curr_signal_col],
+        curr_signal_col,
+    )
+    churn_stats = []
+    turnover_stats = []
+    neutralized_weights = neutral_weight(
+        curr_sub_vector, curr_signal_col, curr_neutralizer, curr_weight
+    )
+    for datestamp in prev_week_subs:
+        prev_sub = prev_week_subs[datestamp]
+        prev_neutralizer = prev_neutralizers[datestamp]
+        prev_weight = prev_sample_weights[datestamp]
+        prev_ticker_col, prev_signal_col = validate_headers_signals(prev_sub)  # type: ignore
+        prev_universe = universe.reset_index().set_index(prev_ticker_col)
+        filtered_prev_sub_df, _ = validate_ids_signals(
+            prev_universe.index, prev_sub, prev_ticker_col
+        )
+        # in case the previous submission has a different ticker column,
+        # remap the ticker column of prev data to the current ticker column
+        filtered_prev_sub = remap_ticker_col(
+            filtered_prev_sub_df.set_index(prev_ticker_col),
+            universe=prev_universe,
+            ticker_col=curr_ticker_col,
+        )[curr_signal_col]
+        filtered_prev_sub = rank_and_fill_signal(
+            universe=universe,
+            submission=filtered_prev_sub,
+            signal_col=curr_signal_col,
+        )
+        prev_neutralizer = remap_ticker_col(
+            prev_neutralizer,
+            universe=prev_universe,
+            ticker_col=curr_ticker_col,
+        ).filter(like="neutralizer_")
+        prev_weight = remap_ticker_col(
+            prev_weight.to_frame(),
+            universe=prev_universe,
+            ticker_col=curr_ticker_col,
+        )[prev_weight.name]
+        prev_neutralized_weights = neutral_weight(
+            filtered_prev_sub, prev_signal_col, prev_neutralizer, prev_weight
+        )
+        try:
+            churn_val = abs(churn(curr_sub_vector, filtered_prev_sub))
+        except AssertionError as e:
+            if "does not have enough overlapping ids" in str(e):
+                continue
+        try:
+            turnover_val = abs(turnover(neutralized_weights, prev_neutralized_weights))
+        except AssertionError as e:
+            if "does not have enough overlapping ids" in str(e):
+                continue
+        churn_stats.append(churn_val)
+        turnover_stats.append(turnover_val)
+    if len(churn_stats) == 0:
+        prev_week_max_churn = 1.0
+    else:
+        prev_week_max_churn = max(churn_stats)
+    if len(turnover_stats) == 0:
+        prev_week_max_turnover = 1.0
+    else:
+        prev_week_max_turnover = max(turnover_stats)
+    return prev_week_max_churn, prev_week_max_turnover

{numerai_tools-0.4.2.dev1 → numerai_tools-0.5.0.dev0}/numerai_tools/submissions.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from numerai_tools.scoring import tie_kept_rank
+import logging
 from typing import Tuple, List
 import pandas as pd
@@ -16,12 +17,15 @@ SIGNALS_ALLOWED_ID_COLS = [
     "numerai_ticker",
 ]
 SIGNALS_ALLOWED_PRED_COLS = ["prediction", "signal"]
+SIGNALS_ALLOWED_DATE_COLS = ["friday_date", "date"]
 SIGNALS_MIN_TICKERS = 100
 CRYPTO_ALLOWED_ID_COLS = ["symbol"]
 CRYPTO_ALLOWED_PRED_COLS = ["prediction", "signal"]
 CRYPTO_MIN_TICKERS = 100
+logger = logging.getLogger(__name__)
 def _validate_headers(
     expected_id_cols: List[str], expected_pred_cols: List[str], submission: pd.DataFrame
@@ -58,6 +62,17 @@ def validate_headers_numerai(submission: pd.DataFrame) -> Tuple[str, str]:
 def validate_headers_signals(submission: pd.DataFrame) -> Tuple[str, str]:
+    if "data_type" in submission.columns:
+        logger.warning(
+            "data_type column found in Signals submission. This is deprecated and will be removed in the future. "
+            "Please remove the data_type column from your Signals submission."
+        )
+        date_col = [
+            date_col
+            for date_col in SIGNALS_ALLOWED_DATE_COLS
+            if date_col in list(submission.columns)
+        ]
+        submission = submission.drop(columns=["data_type", *date_col], errors="ignore")
     return _validate_headers(
         SIGNALS_ALLOWED_ID_COLS, SIGNALS_ALLOWED_PRED_COLS, submission
     )
@@ -155,6 +170,7 @@ def clean_predictions(
     predictions: pd.DataFrame,
     id_col: str,
     rank_and_fill: bool,
+    left_join_on_ids: bool = False,
 ) -> pd.Series:
     """Prepare predictions for submission to Numerai.
     Filters out ids not in live data, drops duplicates, sets ids as index,
@@ -169,6 +185,7 @@ def clean_predictions(
         predictions: pd.DataFrame - the predictions to clean
         id_col: str - the column name of the ids
         rank_and_fill: bool - whether to rank and fill NaNs with 0.5
+        left_join_ids: bool - whether to left join the predictions onto the ids
     """
     assert len(live_ids) > 0, "live_ids must not be empty"
     assert live_ids.isna().sum() == 0, "live_ids must not contain NaNs"
@@ -177,13 +194,15 @@ def clean_predictions(
     # drop null indices
     predictions = predictions[~predictions[id_col].isna()]
     predictions = (
-        predictions
-        # filter out ids not in live data
-        [predictions[id_col].isin(live_ids)]
+        predictions[
+            # filter out ids not in live data
+            predictions[id_col].isin(live_ids)
+        ]
         # drop duplicate ids (keep first)
         .drop_duplicates(subset=id_col, keep="first")
         # set ids as index
-        .set_index(id_col).sort_index()
+        .set_index(id_col)
+        .sort_index()
     )
     # rank and fill with 0.5
     if rank_and_fill:

{numerai_tools-0.4.2.dev1 → numerai_tools-0.5.0.dev0}/numerai_tools.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: numerai-tools
-Version: 0.4.2.dev1
+Version: 0.5.0.dev0
 Summary: A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
 Home-page: https://github.com/numerai/numerai-tools
 Maintainer: Numerai

{numerai_tools-0.4.2.dev1 → numerai_tools-0.5.0.dev0}/setup.py RENAMED Viewed

@@ -1,7 +1,7 @@
 from setuptools import setup
 from setuptools import find_packages
-VERSION = "0.4.2.dev1"
+VERSION = "0.5.0.dev0"
 def load(path):

numerai_tools-0.5.0.dev0/tests/test_signals.py ADDED Viewed

@@ -0,0 +1,139 @@
+import unittest
+import numpy as np
+import pandas as pd  # type: ignore
+from numerai_tools.signals import (
+    churn,
+    turnover,
+    calculate_max_churn_and_turnover,
+)
+from .util import (
+    generate_fake_universe,
+    generate_new_submission,
+)
+class TestSignals(unittest.TestCase):
+    def setUp(self):
+        self.up = pd.Series(list(range(5))).rename("up")
+        self.down = pd.Series(list(reversed(range(5)))).rename("down")
+        self.up_down = pd.Series([0, 1, 2, 1, 0]).rename("up_down")
+        self.oscillate = pd.Series([1, 0, 1, 0, 1]).rename("oscillate")
+        self.constant = pd.Series([1, 1, 1, 1, 1]).rename("pos_neg")
+    def test_churn(self):
+        assert np.isclose(churn(self.up, self.up), 0)
+        assert np.isclose(churn(self.up, self.up_down), 1)
+        assert np.isclose(churn(self.up, self.oscillate), 1)
+        assert np.isclose(churn(self.up, self.down), 2)
+        self.assertRaisesRegex(
+            AssertionError,
+            "s2 must have non-zero standard deviation",
+            churn,
+            self.up,
+            self.constant,
+        )
+    def test_churn_tb(self):
+        tmp = churn(self.up, self.up, top_bottom=2)
+        assert np.isclose(tmp, 0), tmp
+        tmp = churn(self.up, self.up_down, top_bottom=2)
+        assert np.isclose(tmp, 0.5), tmp
+        tmp = churn(self.up, self.oscillate, top_bottom=2)
+        assert np.isclose(tmp, 0.5), tmp
+        tmp = churn(self.up, self.down, top_bottom=2)
+        assert np.isclose(tmp, 1), tmp
+        tmp = churn(self.up, self.constant, top_bottom=2)
+        assert np.isclose(tmp, 0), tmp
+    def test_turnover(self):
+        assert np.isclose(turnover(self.up, self.up), 0)
+        assert np.isclose(turnover(self.up, self.up_down), 3)
+        assert np.isclose(turnover(self.up, self.oscillate), 4.5)
+        assert np.isclose(turnover(self.up, self.down), 6)
+        assert np.isclose(turnover(self.up, self.constant), 3.5)
+    def test_churn_first_submission(self):
+        """
+        Test that the churn function works for the first submission
+        No exceptions should be raised, should return 1
+        """
+        fake_universe = generate_fake_universe("20130308")
+        fake_submission = generate_new_submission(fake_universe)
+        fake_neutralizers = pd.DataFrame(
+            {
+                "neutralizer_1": [0.1] * len(fake_universe),
+                "neutralizer_2": [0.2] * len(fake_universe),
+            },
+            index=fake_universe["numerai_ticker"],
+        )
+        fake_sample_weights = pd.Series(
+            [0.5] * len(fake_universe),
+            index=fake_universe["numerai_ticker"],
+            name="sample_weight",
+        )
+        churn, turnover = calculate_max_churn_and_turnover(
+            curr_sub=fake_submission,
+            curr_neutralizer=fake_neutralizers,
+            curr_weight=fake_sample_weights,
+            prev_week_subs=[],
+            prev_neutralizers={"20240208": fake_neutralizers},
+            prev_sample_weights={"20240208": fake_sample_weights},
+            universe=fake_universe.set_index("numerai_ticker").sort_index(),
+            curr_signal_col="signal",
+            curr_ticker_col="numerai_ticker",
+        )
+        assert np.isclose(churn, 1)
+        assert np.isclose(turnover, 1)
+    def test_churn_handles_different_id_columns(self):
+        """
+        Test that the churn function works when
+        previous submission has different id columns.
+        """
+        fake_universe = generate_fake_universe("20130308")
+        fake_submission = generate_new_submission(fake_universe, legacy_headers=True)
+        new_fake_universe = generate_fake_universe(
+            date_value="20130308", ticker_col="ticker"
+        )
+        fake_universe["ticker"] = new_fake_universe["ticker"]
+        prev_submission = fake_submission.copy()
+        fake_neutralizers = pd.DataFrame(
+            {
+                "neutralizer_1": [0.1] * len(fake_universe),
+                "neutralizer_2": [0.2] * len(fake_universe),
+            },
+            index=fake_universe["numerai_ticker"],
+        )
+        fake_sample_weights = pd.Series(
+            [0.5] * len(fake_universe),
+            index=fake_universe["numerai_ticker"],
+            name="sample_weight",
+        )
+        # switch out the numerai_ticke col in-place
+        prev_submission["numerai_ticker"] = new_fake_universe["ticker"]
+        prev_submission.rename(columns={"numerai_ticker": "ticker"}, inplace=True)
+        prev_neutralizers = fake_neutralizers.copy()
+        prev_neutralizers.index = new_fake_universe["ticker"]
+        prev_neutralizers.index.name = "ticker"
+        prev_sample_weights = fake_sample_weights.copy()
+        prev_sample_weights.index = new_fake_universe["ticker"]
+        prev_sample_weights.index.name = "ticker"
+        churn, turnover = calculate_max_churn_and_turnover(
+            curr_sub=fake_submission,
+            curr_neutralizer=fake_neutralizers,
+            curr_weight=fake_sample_weights,
+            prev_week_subs={"20240208": prev_submission},
+            prev_neutralizers={"20240208": prev_neutralizers},
+            prev_sample_weights={"20240208": prev_sample_weights},
+            universe=fake_universe.set_index("numerai_ticker").sort_index(),
+            curr_signal_col="signal",
+            curr_ticker_col="numerai_ticker",
+        )
+        assert np.isclose(churn, 0)
+        assert np.isclose(turnover, 0)
+if __name__ == "__main__":
+    unittest.main()

{numerai_tools-0.4.2.dev1 → numerai_tools-0.5.0.dev0}/tests/test_submissions.py RENAMED Viewed

@@ -155,6 +155,18 @@ class TestSubmissions(unittest.TestCase):
                 sub[[sub.columns[1]]],
             )
+    def test_validate_headers_signals_data_type_and_date_col(self):
+        fake_sub = generate_submission(self.ids, "ticker", "signal")
+        fake_sub["data_type"] = "signals"
+        fake_sub["friday_date"] = "2023-01-01"
+        with self.assertLogs(level="WARNING") as cm:
+            assert validate_headers_signals(fake_sub) == ("ticker", "signal")
+        self.assertIn(
+            "WARNING:numerai_tools.submissions:data_type column found in Signals submission. This is deprecated and will be removed in the future. "
+            "Please remove the data_type column from your Signals submission.",
+            cm.output[0],
+        )
     def test_validate_headers_crypto(self):
         for sub in self.crypto_subs:
             assert validate_headers_crypto(sub) == tuple(sub.columns)

numerai_tools-0.4.2.dev1/numerai_tools/signals.py DELETED Viewed

@@ -1,72 +0,0 @@
-from numerai_tools.scoring import (
-    filter_sort_index,
-    filter_sort_top_bottom,
-    spearman_correlation,
-)
-from typing import List, Tuple, Union, Optional
-import pandas as pd
-def churn(
-    s1: pd.Series,
-    s2: pd.Series,
-    top_bottom: Optional[int] = None,
-) -> float:
-    """Calculate the churn between two series. Churn is the proportion of elements
-    that are different between the two series.
-    For 2 given series with overlapping indices, churn is 1 - Spearman Correlation.
-    If top_bottom is provided, the churn is calculated as the average of the % of
-    tickers that stay in the top and bottom predictions. This is only relevant when
-    the series are rank signals and not portfolio weights.
-    Arguments:
-        s1: pd.Series - the first series to compare
-        s2: pd.Series - the second series to compare
-        top_bottom: Optional[int] - the number of top and bottom predictions to use
-                                    when calculating the correlation. Results in
-                                    2*top_bottom predictions.
-    Returns:
-        float - the churn between the two series
-    """
-    if top_bottom is not None and top_bottom > 0:
-        s1_top, s1_bot = filter_sort_top_bottom(s1, top_bottom, False)
-        s2_top, s2_bot = filter_sort_top_bottom(s2, top_bottom, False)
-        top_overlap = len(s1_top.index.intersection(s2_top.index)) / top_bottom
-        bot_overlap = len(s1_bot.index.intersection(s2_bot.index)) / top_bottom
-        avg_overlap = (top_overlap + bot_overlap) / 2
-        return 1 - avg_overlap
-    s1, s2 = filter_sort_index(s1, s2)
-    assert s1.std() > 0, "s1 must have non-zero standard deviation"
-    assert s2.std() > 0, "s2 must have non-zero standard deviation"
-    return 1 - spearman_correlation(s1, s2)
-def turnover(
-    s1: pd.Series,
-    s2: pd.Series,
-):
-    """Calculate the turnover between two series. Turnover is the total change in weights between
-    the two series divided by 2.
-    For 2 given series with overlapping indices, join the series on index, fill nans with zeroes
-    and calculate turnover as the absolute total difference between the two series divided by 2.
-    This is only relevant when the series are portfolio weights and not rank signals.
-    Arguments:
-        s1: pd.Series - the first series to compare
-        s2: pd.Series - the second series to compare
-        top_bottom: Optional[int] - the number of top and bottom predictions to use
-                                    when calculating the correlation. Results in
-                                    2*top_bottom predictions.
-    Returns:
-        float - the turnover between the two series
-    """
-    s1, s2 = filter_sort_index(s1, s2)
-    turnover = (s1 - s2).abs().sum() / 2
-    return turnover

numerai_tools-0.4.2.dev1/tests/test_signals.py DELETED Viewed

@@ -1,51 +0,0 @@
-import unittest
-import numpy as np
-import pandas as pd  # type: ignore
-from numerai_tools.signals import churn, turnover
-class TestSignals(unittest.TestCase):
-    def setUp(self):
-        self.up = pd.Series(list(range(5))).rename("up")
-        self.down = pd.Series(list(reversed(range(5)))).rename("down")
-        self.up_down = pd.Series([0, 1, 2, 1, 0]).rename("up_down")
-        self.oscillate = pd.Series([1, 0, 1, 0, 1]).rename("oscillate")
-        self.constant = pd.Series([1, 1, 1, 1, 1]).rename("pos_neg")
-    def test_churn(self):
-        assert np.isclose(churn(self.up, self.up), 0)
-        assert np.isclose(churn(self.up, self.up_down), 1)
-        assert np.isclose(churn(self.up, self.oscillate), 1)
-        assert np.isclose(churn(self.up, self.down), 2)
-        self.assertRaisesRegex(
-            AssertionError,
-            "s2 must have non-zero standard deviation",
-            churn,
-            self.up,
-            self.constant,
-        )
-    def test_churn_tb(self):
-        tmp = churn(self.up, self.up, top_bottom=2)
-        assert np.isclose(tmp, 0), tmp
-        tmp = churn(self.up, self.up_down, top_bottom=2)
-        assert np.isclose(tmp, 0.5), tmp
-        tmp = churn(self.up, self.oscillate, top_bottom=2)
-        assert np.isclose(tmp, 0.5), tmp
-        tmp = churn(self.up, self.down, top_bottom=2)
-        assert np.isclose(tmp, 1), tmp
-        tmp = churn(self.up, self.constant, top_bottom=2)
-        assert np.isclose(tmp, 0), tmp
-    def test_turnover(self):
-        assert np.isclose(turnover(self.up, self.up), 0)
-        assert np.isclose(turnover(self.up, self.up_down), 3)
-        assert np.isclose(turnover(self.up, self.oscillate), 4.5)
-        assert np.isclose(turnover(self.up, self.down), 6)
-        assert np.isclose(turnover(self.up, self.constant), 3.5)
-if __name__ == "__main__":
-    unittest.main()