PyPI - numerai-tools - Versions diffs - 0.5.0.dev0__tar.gz → 0.5.0.dev2__tar.gz - Mend

numerai-tools 0.5.0.dev0tar.gz → 0.5.0.dev2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

{numerai_tools-0.5.0.dev0/numerai_tools.egg-info → numerai_tools-0.5.0.dev2}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,11 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.3
 Name: numerai-tools
-Version: 0.5.0.dev0
+Version: 0.5.0.dev2
 Summary: A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
-Home-page: https://github.com/numerai/numerai-tools
-Maintainer: Numerai
-Maintainer-email: support@numer.ai
-License: MIT License
-Platform: OS Independent
+License: MIT
+Author: Numerai Engineering
+Author-email: engineering@numer.ai
+Requires-Python: >=3.11
 Classifier: Development Status :: 5 - Production/Stable
 Classifier: Environment :: Console
 Classifier: Intended Audience :: Science/Research
@@ -15,8 +14,15 @@ Classifier: Operating System :: OS Independent
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 3
 Classifier: Topic :: Scientific/Engineering
+Requires-Dist: numpy (>=2.0.0,<3.0.0)
+Requires-Dist: pandas (>=2.2.2,<3.0.0)
+Requires-Dist: scikit-learn (>=1.5.0,<2.0.0)
+Requires-Dist: scipy (>=1.13.0,<2.0.0)
+Project-URL: Documentation, https://docs.numer.ai/
+Project-URL: Homepage, https://numer.ai
+Project-URL: Repository, https://github.com/numerai/numerai-tools
 Description-Content-Type: text/markdown
-License-File: LICENSE
 # numerai-tools
 A collection of open-source tools to help interact with Numerai, model data, and automate submissions.

{numerai_tools-0.5.0.dev0 → numerai_tools-0.5.0.dev2}/numerai_tools/scoring.py RENAMED Viewed

@@ -1,8 +1,8 @@
-from typing import List, Tuple, Union, Optional, TypeVar
+from typing import List, Tuple, Union, Optional, TypeVar, cast, Any
 import numpy as np
-import pandas as pd  # type: ignore
-from scipy import stats  # type: ignore
+import pandas as pd
+from scipy import stats
 from sklearn.preprocessing import OneHotEncoder  # type: ignore
@@ -43,13 +43,13 @@ def filter_sort_index(
         "s2 does not have enough overlapping ids with s1,"
         f" must have >= {round(1-max_filtered_ratio,2)*100}% overlapping ids"
     )
-    return s1.loc[ids].sort_index(), s2.loc[ids].sort_index()
+    return cast(S1, s1.loc[ids].sort_index()), cast(S2, s2.loc[ids].sort_index())
 def filter_sort_index_many(
-    inputs: List[pd.DataFrame],
+    inputs: List[Any],
     max_filtered_ratio: float = DEFAULT_MAX_FILTERED_INDEX_RATIO,
-) -> List[pd.DataFrame]:
+) -> List[Any]:
     """Filters the indices of the given list of series to match each other,
     then sorts the indices, then checks that we didn't filter too many indices
     before returning the filtered and sorted series.
@@ -75,25 +75,38 @@ def filter_sort_index_many(
 def filter_sort_top_bottom(
-    s: pd.Series, top_bottom: int, return_concatenated: bool = True
-) -> Union[pd.Series, Tuple[pd.Series, pd.Series]]:
+    s: pd.Series, top_bottom: int
+) -> Tuple[pd.Series, pd.Series]:
     """Filters the series according to the top n and bottom n values
-    then sorts the index and returns the filtered and sorted series.
+    then sorts the index and returns two filtered and sorted series
+    for the top and bottom values respectively.
     Arguments:
         s: pd.Series - the data to filter and sort
         top_bottom: int - the number of top n and bottom n values to keep
     Returns:
-        pd.Series - the filtered and sorted data
+        Tuple[pd.Series, pd.Series] - the filtered and sorted top and bottom series respectively
     """
     tb_idx = np.argsort(s, kind="stable")
     bot = s.iloc[tb_idx[:top_bottom]]
     top = s.iloc[tb_idx[-top_bottom:]]
-    if return_concatenated:
-        return pd.concat([top, bot]).sort_index()
-    else:
-        return top.sort_index(), bot.sort_index()
+    return top.sort_index(), bot.sort_index()
+def filter_sort_top_bottom_concat(s: pd.Series, top_bottom: int) -> pd.Series:
+    """Similar to filter_sort_top_bottom, but concatenates the top and bottom series
+    into 1 series and then sorts the index.
+    Arguments:
+        s: pd.Series - the data to filter and sort
+        top_bottom: int - the number of top n and bottom n values to keep
+    Returns:
+        pd.Series - the concatenated and sorted series of top and bottom values
+    """
+    top, bot = filter_sort_top_bottom(s, top_bottom)
+    return pd.concat([top, bot]).sort_index()
 def rank(df: pd.DataFrame, method: str = "average") -> pd.DataFrame:
@@ -134,14 +147,14 @@ def variance_normalize(df: pd.DataFrame) -> pd.DataFrame:
     return df / np.std(df, axis=0)
-def weight_normalize(df: pd.DataFrame) -> pd.DataFrame:
-    """Scale a df such that all columns have absolute value sum == 1."""
-    return df / df.abs().sum(axis=0)
+def weight_normalize(s: S1) -> S1:
+    """Scale a input such that all columns have absolute value sum == 1."""
+    return cast(S1, s / s.abs().sum(axis=0))
-def center(df: pd.DataFrame) -> pd.DataFrame:
-    """Shift the df such that all columns have mean == 0."""
-    return df - df.mean()
+def center(s: S1) -> S1:
+    """Shift the input such that all columns have mean == 0."""
+    return cast(S1, s - s.mean())
 def standardize(df: pd.DataFrame) -> pd.DataFrame:
@@ -180,7 +193,7 @@ def pearson_correlation(
     target: pd.Series, predictions: pd.Series, top_bottom: Optional[int] = None
 ) -> float:
     if top_bottom is not None and top_bottom > 0:
-        predictions = filter_sort_top_bottom(predictions, top_bottom)
+        predictions = filter_sort_top_bottom_concat(predictions, top_bottom)
         target, predictions = filter_sort_index(
             target, predictions, (1 - top_bottom / len(target))
         )
@@ -206,7 +219,7 @@ def power(df: pd.DataFrame, p: float) -> pd.DataFrame:
     """
     assert not df.isna().any().any(), "Data contains NaNs"
     assert np.array_equal(df.index.sort_values(), df.index), "Index is not sorted"
-    result = np.sign(df) * np.abs(df) ** p
+    result = cast(pd.DataFrame, np.sign(df) * np.abs(df) ** p)
     assert ((result.std() == 0) | (result.corrwith(df) >= 0.9)).all()
     return result
@@ -222,7 +235,7 @@ def gaussian(df: pd.DataFrame) -> pd.DataFrame:
         pd.DataFrame - the gaussianized data
     """
     assert np.array_equal(df.index.sort_values(), df.index)
-    return df.apply(lambda series: stats.norm.ppf(series))
+    return df.apply(lambda series: cast(np.ndarray, stats.norm.ppf(series)))
 def orthogonalize(v: np.ndarray, u: np.ndarray) -> np.ndarray:
@@ -304,7 +317,7 @@ def correlation_contribution(
     m = gaussian(tie_kept_rank(meta_model.to_frame()))[meta_model.name].values
     # orthogonalize predictions wrt meta model
-    neutral_preds = orthogonalize(p, m)
+    neutral_preds = orthogonalize(p, cast(np.ndarray, m))
     # convert target to buckets [-2, -1, 0, 1, 2]
     if (live_targets >= 0).all() and (live_targets <= 1).all():
@@ -315,9 +328,9 @@ def correlation_contribution(
         # filter each column to its top and bottom n predictions
         neutral_preds_df = pd.DataFrame(
             neutral_preds, columns=predictions.columns, index=predictions.index
-        ).apply(lambda p: filter_sort_top_bottom(p, top_bottom))
-        # create a dataframe for targets to match the filtered predictions
-        live_targets = (
+        ).apply(lambda p: filter_sort_top_bottom_concat(p, top_bottom))
+        mmc_matrix = (
+            # create a dataframe for targets to match the filtered predictions
             neutral_preds_df.apply(
                 lambda p: filter_sort_index(
                     p,
@@ -327,19 +340,15 @@ def correlation_contribution(
             )
             .fillna(0)
             .T.values
-        )
-        # fillna with 0 so we don't get NaNs in the dot product
-        neutral_preds = neutral_preds_df.fillna(0).values
-    # multiply target and neutralized predictions
-    # this is equivalent to covariance b/c mean = 0
-    mmc = live_targets @ neutral_preds
-    if top_bottom is not None and top_bottom > 0:
+            # then fill NaNs with 0 so we don't get NaNs in the dot product
+            #  and mutiply target w/ neutral preds to get MMC
+        ) @ neutral_preds_df.fillna(0).values
         # only the diagonal is the proper score
-        mmc = np.diag(mmc) / (top_bottom * 2)
+        mmc = np.diag(mmc_matrix) / (top_bottom * 2)
     else:
-        mmc /= len(live_targets)
+        # multiply target and neutralized predictions
+        # this is equivalent to covariance b/c mean = 0
+        mmc = (live_targets @ neutral_preds) / len(live_targets)
     return pd.Series(mmc, index=predictions.columns)
@@ -523,10 +532,10 @@ def max_feature_correlation(
     feature_correlations = features.apply(
         lambda f: pearson_correlation(f, s, top_bottom)
     )
-    feature_correlations = np.abs(feature_correlations)
+    feature_correlations = feature_correlations.abs()
     max_feature = feature_correlations.idxmax()
     max_corr = feature_correlations[max_feature]
-    return max_feature, max_corr
+    return str(max_feature), max_corr
 def generate_neutralized_weights(
@@ -609,9 +618,9 @@ def meta_portfolio_contribution(
             s_prime, neutralizers, sample_weights
         )
     )
-    w = weights[stakes.index].values
-    s = stake_weights.values
-    t = targets.values
+    w = cast(np.ndarray, weights[stakes.index].values)
+    s = cast(np.ndarray, stake_weights.values)
+    t = cast(np.ndarray, targets.values)
     swp = w @ s
     swp = swp - swp.mean()
     l1_norm = np.sum(np.abs(swp))

{numerai_tools-0.5.0.dev0 → numerai_tools-0.5.0.dev2}/numerai_tools/signals.py RENAMED Viewed

@@ -40,8 +40,8 @@ def churn(
         float - the churn between the two series
     """
     if top_bottom is not None and top_bottom > 0:
-        s1_top, s1_bot = filter_sort_top_bottom(s1, top_bottom, False)
-        s2_top, s2_bot = filter_sort_top_bottom(s2, top_bottom, False)
+        s1_top, s1_bot = filter_sort_top_bottom(s1, top_bottom)
+        s2_top, s2_bot = filter_sort_top_bottom(s2, top_bottom)
         top_overlap = len(s1_top.index.intersection(s2_top.index)) / top_bottom
         bot_overlap = len(s1_bot.index.intersection(s2_bot.index)) / top_bottom
         avg_overlap = (top_overlap + bot_overlap) / 2
@@ -85,11 +85,13 @@ def neutral_weight(
     neutralizer: pd.DataFrame,
     weight: pd.Series,
 ) -> pd.Series:
-    s_prime = tie_kept_rank__gaussianize__pow_1_5(submission.to_frame())[signal_col]
-    s_prime, neutralizer, weight = filter_sort_index_many(  # type: ignore
+    s_prime = tie_kept_rank__gaussianize__pow_1_5(submission.to_frame())
+    s_prime, neutralizer, weight = filter_sort_index_many(
         [s_prime, neutralizer, weight]
     )
-    neutral_weights = generate_neutralized_weights(s_prime, neutralizer, weight)
+    neutral_weights = generate_neutralized_weights(
+        s_prime[signal_col], neutralizer, weight
+    )
     neutral_weights = weight_normalize(center(neutral_weights.to_frame()))[0]
     return neutral_weights.sort_index()
@@ -161,10 +163,10 @@ def calculate_max_churn_and_turnover(
         prev_sub = prev_week_subs[datestamp]
         prev_neutralizer = prev_neutralizers[datestamp]
         prev_weight = prev_sample_weights[datestamp]
-        prev_ticker_col, prev_signal_col = validate_headers_signals(prev_sub)  # type: ignore
+        prev_ticker_col, prev_signal_col = validate_headers_signals(prev_sub)
         prev_universe = universe.reset_index().set_index(prev_ticker_col)
         filtered_prev_sub_df, _ = validate_ids_signals(
-            prev_universe.index, prev_sub, prev_ticker_col
+            prev_universe.index.to_series(), prev_sub, prev_ticker_col
         )
         # in case the previous submission has a different ticker column,
         # remap the ticker column of prev data to the current ticker column

{numerai_tools-0.5.0.dev0 → numerai_tools-0.5.0.dev2}/numerai_tools/submissions.py RENAMED Viewed

@@ -170,8 +170,7 @@ def clean_predictions(
     predictions: pd.DataFrame,
     id_col: str,
     rank_and_fill: bool,
-    left_join_on_ids: bool = False,
-) -> pd.Series:
+) -> pd.DataFrame:
     """Prepare predictions for submission to Numerai.
     Filters out ids not in live data, drops duplicates, sets ids as index,
     then optionally ranks (keeping ties) and fills NaNs with 0.5.

numerai_tools-0.5.0.dev2/pyproject.toml ADDED Viewed

@@ -0,0 +1,45 @@
+[project]
+name = "numerai-tools"
+version = "0.5.0.dev2"
+description = "A collection of open-source tools to help interact with Numerai, model data, and automate submissions."
+authors = [
+    {name = "Numerai Engineering",email = "engineering@numer.ai"}
+]
+license = {text = "MIT"}
+readme = "README.md"
+requires-python = ">=3.11"
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Environment :: Console",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Topic :: Scientific/Engineering",
+]
+[project.urls]
+homepage = "https://numer.ai"
+repository = "https://github.com/numerai/numerai-tools"
+documentation = "https://docs.numer.ai/"
+[tool.poetry]
+packages = [
+    {include = "numerai_tools", from = "."},
+]
+[tool.poetry.dependencies]
+pandas = "^2.2.2"
+numpy = "^2.0.0"
+scipy = "^1.13.0"
+scikit-learn = "^1.5.0"
+[tool.poetry.group.dev.dependencies]
+pytest = "^8.3.4"
+mypy = "^1.15.0"
+ruff = "^0.5.4"
+[build-system]
+requires = ["poetry-core>=2.0.0,<3.0.0"]
+build-backend = "poetry.core.masonry.api"

numerai_tools-0.5.0.dev0/PKG-INFO DELETED Viewed

@@ -1,22 +0,0 @@
-Metadata-Version: 2.1
-Name: numerai_tools
-Version: 0.5.0.dev0
-Summary: A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
-Home-page: https://github.com/numerai/numerai-tools
-Maintainer: Numerai
-Maintainer-email: support@numer.ai
-License: MIT License
-Platform: OS Independent
-Classifier: Development Status :: 5 - Production/Stable
-Classifier: Environment :: Console
-Classifier: Intended Audience :: Science/Research
-Classifier: License :: OSI Approved :: MIT License
-Classifier: Operating System :: OS Independent
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 3
-Classifier: Topic :: Scientific/Engineering
-Description-Content-Type: text/markdown
-License-File: LICENSE
-# numerai-tools
-A collection of open-source tools to help interact with Numerai, model data, and automate submissions.

numerai_tools-0.5.0.dev0/numerai_tools.egg-info/SOURCES.txt DELETED Viewed

@@ -1,16 +0,0 @@
-LICENSE
-README.md
-setup.py
-numerai_tools/__init__.py
-numerai_tools/py.typed
-numerai_tools/scoring.py
-numerai_tools/signals.py
-numerai_tools/submissions.py
-numerai_tools.egg-info/PKG-INFO
-numerai_tools.egg-info/SOURCES.txt
-numerai_tools.egg-info/dependency_links.txt
-numerai_tools.egg-info/requires.txt
-numerai_tools.egg-info/top_level.txt
-tests/test_scoring.py
-tests/test_signals.py
-tests/test_submissions.py

numerai_tools-0.5.0.dev0/numerai_tools.egg-info/dependency_links.txt DELETED Viewed

	@@ -1 +0,0 @@
1	-

numerai_tools-0.5.0.dev0/numerai_tools.egg-info/requires.txt DELETED Viewed

@@ -1,4 +0,0 @@
-pandas<3.0.0,>=2.2.2
-numpy<3.0.0,>=2.0.0
-scipy<2.0.0,>=1.13.0
-scikit-learn<2.0.0,>=1.5.0

numerai_tools-0.5.0.dev0/numerai_tools.egg-info/top_level.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- numerai_tools

numerai_tools-0.5.0.dev0/setup.cfg DELETED Viewed

@@ -1,4 +0,0 @@
-[egg_info]
-tag_build =
-tag_date = 0

numerai_tools-0.5.0.dev0/setup.py DELETED Viewed

@@ -1,47 +0,0 @@
-from setuptools import setup
-from setuptools import find_packages
-VERSION = "0.5.0.dev0"
-def load(path):
-    return open(path, "r").read()
-classifiers = [
-    "Development Status :: 5 - Production/Stable",
-    "Environment :: Console",
-    "Intended Audience :: Science/Research",
-    "License :: OSI Approved :: MIT License",
-    "Operating System :: OS Independent",
-    "Programming Language :: Python",
-    "Programming Language :: Python :: 3",
-    "Topic :: Scientific/Engineering",
-]
-if __name__ == "__main__":
-    setup(
-        name="numerai_tools",
-        version=VERSION,
-        maintainer="Numerai",
-        maintainer_email="support@numer.ai",
-        description="A collection of open-source tools to help interact with Numerai, model data, and automate submissions.",
-        long_description=load("README.md"),
-        long_description_content_type="text/markdown",
-        url="https://github.com/numerai/numerai-tools",
-        platforms="OS Independent",
-        classifiers=classifiers,
-        license="MIT License",
-        package_data={
-            "numerai_tools": ["LICENSE", "README.md", "py.typed"],
-        },
-        packages=find_packages(exclude=["tests"]),
-        install_requires=[
-            # pandas 2.2.2 was the first version to support numpy 2
-            "pandas>=2.2.2,<3.0.0",
-            "numpy>=2.0.0,<3.0.0",
-            "scipy>=1.13.0,<2.0.0",
-            "scikit-learn>=1.5.0,<2.0.0",
-        ],
-    )

numerai-tools 0.5.0.dev0__tar.gz → 0.5.0.dev2__tar.gz

numerai-tools 0.5.0.dev0tar.gz → 0.5.0.dev2tar.gz