PyPI - numerai-tools - Versions diffs - 0.5.0.dev10__tar.gz → 0.5.0.dev11__tar.gz - Mend

numerai-tools 0.5.0.dev10tar.gz → 0.5.0.dev11tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: numerai-tools
-Version: 0.5.0.dev10
+Version: 0.5.0.dev11
 Summary: A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
 License: MIT
 Author: Numerai Engineering

{numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/numerai_tools/scoring.py RENAMED Viewed

@@ -560,7 +560,7 @@ def generate_neutralized_weights(
     neutralizers: pd.DataFrame,
     sample_weights: pd.Series,
     center_and_normalize: bool = False,
-) -> pd.Series:
+) -> pd.DataFrame:
     assert not predictions.isna().any().any(), "Predictions contain NaNs"
     assert not neutralizers.isna().any().any(), "Normalization factors contain NaNs"
     assert not sample_weights.isna().any(), "Weights contain NaNs"

{numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/numerai_tools/signals.py RENAMED Viewed

@@ -76,111 +76,92 @@ def turnover(
 def calculate_max_churn_and_turnover(
-    curr_sub: pd.DataFrame,
+    curr_sub: pd.Series,
     curr_neutralizer: pd.DataFrame,
-    curr_weight: pd.Series,
-    prev_week_subs: dict[str, pd.DataFrame],
+    curr_sample_weight: pd.Series,
+    prev_subs: dict[str, pd.Series],
     prev_neutralizers: dict[str, pd.DataFrame],
     prev_sample_weights: dict[str, pd.Series],
-    universe: pd.DataFrame,
-    curr_signal_col: str,
-    curr_ticker_col: str,
 ) -> Tuple[float, float]:
     """Calculate the maximum churn and turnover with respect to previous submissions.
+    This function iterates over previous submissions and calculates churn and turnover
+    for each submission against the current submission. It expects all data to be
+    indexed on the same type tickers/IDs (e.g. all numerai_ticker, or all composite_figi, or all etc.) .
     Arguments:
-        curr_sub -- the current submission
-        curr_neutralizer -- the neutralizer DataFrame for the current submission
-        curr_weight -- the sample weights Series for the current submission
-        prev_week_subs -- a dictionary of datestamps to submissions
-        prev_neutralizers -- a dictionary of datestamps to neutralizers
-        prev_sample_weights -- a dictionary of datestamps to sample weights
-        universe -- the universe DataFrame for the current era
-        curr_signal_col -- the column name for signal in the current submission
-        curr_ticker_col -- the column name for tickers in the current submission
+        curr_sub: pd.Series - the current submission as a Series indexed on tickers/ids
+        curr_neutralizer: pd.DataFrame - the neutralizer DataFrame for the current submission indexed on numerai_ticker
+        curr_sample_weight: pd.Series - the sample weights Series for the current submission indexed on numerai_ticker
+        prev_subs: dict[str, pd.DataFrame] - a dictionary of datestamps to submissions, where each submission is a DataFrame
+                     with 2 columns: a ticker/id column and a signal/prediction column. To calculate churn
+                     and turnover for a live submission, use the most recent 5 submissions. For diagnostics,
+                     just provide the previous era.
+        prev_neutralizers: dict[str, pd.DataFrame] - a dictionary of datestamps to neutralizers DataFrames where each neutralizers
+                             DataFrame is indexed on the same ticker column as the current submission
+        prev_sample_weights: dict[str, pd.Series] - a dictionary of datestamps to sample weights where each sample weights
+                             Series is indexed on the same ticker column as the current submission
     Returns:
         prev_week_max_churn -- the maximum churn from previous submissions
         prev_week_max_turnover -- the maximum turnover from previous submissions
     """
-    universe = universe.reset_index()
     (
         curr_ticker_col,
         curr_signal_col,
         _,
-        curr_sub,
+        curr_sub_df,
         _,
     ) = validate_submission_signals(
-        universe=universe,
-        submission=curr_sub,
+        universe=curr_sample_weight.index.to_frame(),
+        submission=curr_sub.reset_index(),
     )
-    curr_sub_vector = clean_submission(
-        universe=universe,
-        submission=curr_sub,
+    curr_sub = clean_submission(
+        universe=curr_sample_weight.index.to_frame(),
+        submission=curr_sub_df,
         src_id_col=curr_ticker_col,
         src_signal_col=curr_signal_col,
         rank_and_fill=True,
     )
+    print("curr_sub", curr_sub)
     churn_stats = []
     turnover_stats = []
     neutralized_weights = generate_neutralized_weights(
-        curr_sub_vector.to_frame(),
+        curr_sub.to_frame(),
         curr_neutralizer,
-        curr_weight,
+        curr_sample_weight,
         center_and_normalize=True,
-    )[curr_sub_vector.name]
-    for datestamp in prev_week_subs:
-        prev_sub = prev_week_subs[datestamp]
+    )[curr_sub.name]
+    for datestamp in prev_subs:
+        prev_sub = prev_subs[datestamp]
         prev_neutralizer = prev_neutralizers[datestamp]
-        prev_weight = prev_sample_weights[datestamp]
+        prev_sample_weight = prev_sample_weights[datestamp]
         (
             prev_ticker_col,
             prev_signal_col,
             _,
-            prev_sub,
+            prev_sub_df,
             _,
         ) = validate_submission_signals(
-            universe=universe,
-            submission=prev_sub,
+            universe=prev_sample_weight.index.to_frame(),
+            submission=prev_sub.reset_index(),
         )
-        filtered_prev_sub = clean_submission(
-            universe=universe,
-            submission=prev_sub,
+        prev_sub = clean_submission(
+            universe=prev_sample_weight.index.to_frame(),
+            submission=prev_sub_df,
             src_id_col=prev_ticker_col,
             src_signal_col=prev_signal_col,
             dst_id_col=curr_ticker_col,
             dst_signal_col=curr_signal_col,
             rank_and_fill=True,
         )
-        prev_neutralizer = (
-            remap_ids(
-                prev_neutralizer.reset_index(),
-                universe,
-                str(prev_neutralizer.index.name),
-                curr_ticker_col,
-            )
-            .set_index(curr_ticker_col)
-            .filter(like="neutralizer_")
-            .dropna()
-        )
-        prev_weight = (
-            remap_ids(
-                prev_weight.reset_index(),
-                universe,
-                str(prev_weight.index.name),
-                curr_ticker_col,
-            )
-            .set_index(curr_ticker_col)[prev_weight.name]
-            .dropna()
-        )
         prev_neutralized_weights = generate_neutralized_weights(
-            filtered_prev_sub.to_frame(),
+            prev_sub.to_frame(),
             prev_neutralizer,
-            prev_weight,
+            prev_sample_weight,
             center_and_normalize=True,
-        )[filtered_prev_sub.name]
+        )[prev_sub.name]
+        print("prev_sub", prev_sub)
         try:
-            churn_val = abs(churn(curr_sub_vector, filtered_prev_sub))
+            churn_val = abs(churn(curr_sub, prev_sub))
         except AssertionError as e:
             if "does not have enough overlapping ids" in str(e):
                 continue

{numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/numerai_tools/submissions.py RENAMED Viewed

@@ -88,6 +88,7 @@ def validate_headers_signals(
         assert (
             date_col_name is not None
         ), "invalid_submission_headers: submission must contain a date column"
+    print(submission)
     ticker_col, signal_col = _validate_headers(
         submission,
         SIGNALS_ALLOWED_ID_COLS,
@@ -228,11 +229,8 @@ def validate_submission_signals(
             "data_type column found in Signals submission. This is deprecated and support will be removed in the future. "
             "Please remove the data_type column from your Signals submission."
         )
-        submission.drop(
-            columns=["data_type"],
-            errors="ignore",
-            inplace=True,
-        )
+    submission.drop(columns=["data_type"], errors="ignore", inplace=True)
+    print(submission)
     ticker_col, signal_col, date_col = validate_headers_signals(
         submission, assert_date_col
     )
@@ -266,7 +264,7 @@ def validate_submission_crypto(
 def remap_ids(
     data: pd.DataFrame,
-    ticker_map: pd.Series | pd.DataFrame,
+    ticker_map: pd.DataFrame,
     src_id_col: str,
     dst_id_col: str,
 ) -> pd.DataFrame:
@@ -277,25 +275,25 @@ def remap_ids(
     Arguments:
         data: pd.DataFrame - the data to remap
-        ticker_map: pd.Series | pd.DataFrame - the mapping of source ids to destination ids
+        ticker_map: pd.DataFrame - the mapping of source ids to destination ids
         src_id_col: str - the name of the source ids column in the data
         dst_id_col: str - the name of the destination ids column in the ticker map
     """
     # first, index the universe and data on the source ids
-    indexed_map = ticker_map.reset_index().set_index(src_id_col)
+    indexed_map = ticker_map.set_index(src_id_col, drop=False)
     indexed_data = data.set_index(src_id_col)
     return (
         # then, join the universe and data
         indexed_map.join(indexed_data)
         # get just the destination ids and prediction columns
-        .reset_index()[[dst_id_col, *indexed_data.columns]]
+        .reset_index(drop=True)[[dst_id_col, *indexed_data.columns]]
         # finally, sort by the destination ticker column
         .sort_values(dst_id_col)
     )
 def clean_submission(
-    universe: pd.Series | pd.DataFrame,
+    universe: pd.DataFrame,
     submission: pd.DataFrame,
     src_id_col: str,
     src_signal_col: str,

{numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "numerai-tools"
-version = "0.5.0.dev10"
+version = "0.5.0.dev11"
 description = "A collection of open-source tools to help interact with Numerai, model data, and automate submissions."
 authors = [
     {name = "Numerai Engineering",email = "engineering@numer.ai"}

{numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/LICENSE RENAMED Viewed

File without changes

{numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/README.md RENAMED Viewed

File without changes

{numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/numerai_tools/__init__.py RENAMED Viewed

File without changes

{numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/numerai_tools/py.typed RENAMED Viewed

File without changes

numerai-tools 0.5.0.dev10__tar.gz → 0.5.0.dev11__tar.gz

numerai-tools 0.5.0.dev10tar.gz → 0.5.0.dev11tar.gz