PyPI - numerai-tools - Versions diffs - 0.5.0.dev13__tar.gz → 0.5.2__tar.gz - Mend

numerai-tools 0.5.0.dev13tar.gz → 0.5.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

{numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/PKG-INFO RENAMED Viewed

@@ -1,8 +1,9 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: numerai-tools
-Version: 0.5.0.dev13
+Version: 0.5.2
 Summary: A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
 License: MIT
+License-File: LICENSE
 Author: Numerai Engineering
 Author-email: engineering@numer.ai
 Requires-Python: >=3.11

{numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/numerai_tools/scoring.py RENAMED Viewed

@@ -468,6 +468,7 @@ def numerai_corr(
     targets: pd.Series,
     max_filtered_index_ratio: float = DEFAULT_MAX_FILTERED_INDEX_RATIO,
     top_bottom: Optional[int] = None,
+    target_pow15: bool = True,
 ) -> pd.Series:
     """Calculates the canonical Numerai correlation.
     1. Re-center the target on 0
@@ -484,6 +485,8 @@ def numerai_corr(
         top_bottom: Optional[int] - the number of top and bottom predictions to use
                                     when calculating the correlation. Results in
                                     2*top_bottom predictions.
+        target_pow15: bool - whether to raise the targets to the 1.5 power, defaults to True.
+                             Set to False if you are passing in returns as the targets.
     Returns:
         pd.Series - the resulting correlation scores for each column in predictions
@@ -493,7 +496,8 @@ def numerai_corr(
         targets, predictions, max_filtered_index_ratio
     )
     predictions = tie_kept_rank__gaussianize__pow_1_5(predictions)
-    targets = power(targets.to_frame(), 1.5)[targets.name]
+    if target_pow15:
+        targets = power(targets.to_frame(), 1.5)[targets.name]
     scores = predictions.apply(
         lambda sub: pearson_correlation(targets, sub, top_bottom)
     )
@@ -610,12 +614,14 @@ def meta_portfolio_contribution(
     sample_weights: pd.Series,
     targets: pd.Series,
 ) -> pd.Series:
-    """Calculates the "meta portfolio" score:
-        - rank, normalize, and power each signal
-        - convert each signal into neutralized weights
-        - generate the stake-weighted portfolio
-        - calculate the gradient of the portfolio w.r.t. the stakes
-        - multiplying the weights by the targets
+    """Calculates the "meta portfolio" gradient w.r.t. stakes:
+    - rank, normalize, and power each signal
+    - convert each signal into neutralized weights
+    - center weights across samples (explicit W_c = C W)
+    - generate the stake-weighted portfolio
+    - calculate the gradient of the portfolio w.r.t. the stakes
+    - multiply by the (centered) targets
     Arguments:
         predictions: pd.DataFrame - the predictions to evaluate
         stakes: pd.Series - the stakes to use as weights
@@ -623,22 +629,41 @@ def meta_portfolio_contribution(
         sample_weights: pd.Series - the universe sampling weights
         targets: pd.Series - the live targets to evaluate against
     """
-    targets = center(targets)
+    # Align predictions and targets on the same index / universe
     predictions, targets = filter_sort_index(predictions, targets)
+    # Center targets in sample space: t_c = C t
+    targets = center(targets)
+    # Normalize stakes to sum to 1
     stake_weights = weight_normalize(stakes.fillna(0))
     assert np.isclose(stake_weights.sum(), 1), "Stakes must sum to 1"
+    # Generate neutralized weights W(predictions, neutralizers, sample_weights)
     weights = generate_neutralized_weights(predictions, neutralizers, sample_weights)
-    w = cast(np.ndarray, weights[stakes.index].values)
-    s = cast(np.ndarray, stake_weights.values)
-    t = cast(np.ndarray, targets.values)
-    swp = w @ s
-    swp = swp - swp.mean()
-    l1_norm = np.sum(np.abs(swp))
-    l1_norm_squared = np.power(l1_norm, 2)
-    swp_sign = np.sign(swp)
-    swp_alpha = np.dot(swp, t)
-    directional_gradient = l1_norm * t - swp_sign * swp_alpha
-    jacobian_vector_product = directional_gradient.reshape(-1, 1) / l1_norm_squared
-    centered_jacobian = jacobian_vector_product - jacobian_vector_product.mean()
-    mpc = (w.T @ centered_jacobian).squeeze()
+    # Extract aligned matrices/vectors
+    w = cast(np.ndarray, weights[stakes.index].values)  # W ∈ R^{N×K}
+    s = cast(np.ndarray, stake_weights.values)  # s ∈ R^K
+    t = cast(np.ndarray, targets.values)  # t_c ∈ R^N (already centered)
+    # Explicit centering of weights across samples:
+    # W_c = C W = W - 1 μ^T, where μ is the column-wise mean of W
+    w_centered = w - w.mean(axis=0, keepdims=True)  # W_c
+    # Centered prediction vector v = W_c s
+    v = w_centered @ s  # v ∈ R^N, already mean ~ 0
+    # Optionally re-center to remove numerical drift
+    v = v - v.mean()
+    # Its L2 norm r = ||v||
+    l2_norm = np.sqrt(np.sum(v**2))
+    # Residualize W_c against v:
+    # residualized_w ≈ R_v W_c = (I - v v^T / ||v||^2) W_c
+    residualized_w = orthogonalize(w_centered, v)
+    # Gradient: ∇_s α = (1 / ||v||) (R_v W_c)^T t_c
+    mpc = (residualized_w.T @ t).squeeze() / l2_norm
     return pd.Series(mpc, index=stakes.index)

{numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/numerai_tools/signals.py RENAMED Viewed

@@ -82,23 +82,48 @@ def calculate_max_churn_and_turnover(
     prev_neutralizers: dict[str, pd.DataFrame],
     prev_sample_weights: dict[str, pd.Series],
 ) -> Tuple[float, float]:
-    """Calculate the maximum churn and turnover with respect to previous submissions.
-    This function iterates over previous submissions and calculates churn and turnover
-    for each submission against the current submission. It expects all data to be
-    indexed on the same type tickers/IDs (e.g. all numerai_ticker, or all composite_figi, or all etc.) .
+    """Calculate the maximum churn and turnover of the current submission with respect to previous submissions.
+    This function iterates over previous submissions and calculates churn and turnover for each submission
+    against the current submission. It expects the following:
+        - all submissions, neutralizers, and sample weights are indexed on the same type of tickers/IDs
+          (e.g. all numerai_ticker, or all composite_figi, or all etc.)
+        - neutralizers and sample weights cover the full universe of their respective eras. This means you
+          should avoid removing rows from neutralizers or sample weights before passing them to this function.
+    In a live submission environment your submissions are joined on their respective full universes, ranked,
+    and then any NaNs are filled with 0.5 before calculating churn and turnover. So, if you provide filtered
+    neutralizers or sample weights, your locally calculated churn and turnover may not match the live value.
     Arguments:
-        curr_sub: pd.Series - the current submission as a Series indexed on tickers/ids
-        curr_neutralizer: pd.DataFrame - the neutralizer DataFrame for the current submission indexed on numerai_ticker
-        curr_sample_weight: pd.Series - the sample weights Series for the current submission indexed on numerai_ticker
-        prev_subs: dict[str, pd.DataFrame] - a dictionary of datestamps to submissions, where each submission is a DataFrame
-                     with 2 columns: a ticker/id column and a signal/prediction column. To calculate churn
-                     and turnover for a live submission, use the most recent 5 submissions. For diagnostics,
-                     just provide the previous era.
-        prev_neutralizers: dict[str, pd.DataFrame] - a dictionary of datestamps to neutralizers DataFrames where each neutralizers
-                             DataFrame is indexed on the same ticker column as the current submission
-        prev_sample_weights: dict[str, pd.Series] - a dictionary of datestamps to sample weights where each sample weights
-                             Series is indexed on the same ticker column as the current submission
+        curr_sub: pd.Series - current-era submission indexed on tickers/ids
+        curr_neutralizer: pd.DataFrame
+            - current-era neutralizers indexed on the same type of tickers/ids.
+              We expect these to cover the full universe for the current era.
+        curr_sample_weight: pd.Series
+            - current-era sample weights indexed on the same type of tickers/ids.
+              We expect these to cover the full universe for the current era.
+        prev_subs: dict[str, pd.Series]
+            - a dictionary mapping datestamps to submissions, where each submission is a
+              Series indexed on the same type of tickers/ids as the current
+              submission. To calculate churn and turnover for a live submission,
+              use the most recent 5 submissions. For diagnostics, just provide the
+              last 1 era.
+        prev_neutralizers: dict[str, pd.DataFrame]
+            - a dictionary mapping datestamps to neutralizers DataFrames where each neutralizers
+              DataFrame is indexed on the same type of tickers/ids as the current submission.
+              We expect each of these to cover the full universe of their respective eras.
+        prev_sample_weights: dict[str, pd.Series]
+            - a dictionary mapping datestamps to sample weights where each sample weights
+              Series is indexed on the same type of tickers/ids as the current submission.
+              We expect each of these to cover the full universe of their respective eras.
     Returns:
         prev_week_max_churn -- the maximum churn from previous submissions
         prev_week_max_turnover -- the maximum turnover from previous submissions

{numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "numerai-tools"
-version = "0.5.0.dev13"
+version = "0.5.2"
 description = "A collection of open-source tools to help interact with Numerai, model data, and automate submissions."
 authors = [
     {name = "Numerai Engineering",email = "engineering@numer.ai"}

{numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/LICENSE RENAMED Viewed

File without changes

{numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/README.md RENAMED Viewed

File without changes

{numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/numerai_tools/__init__.py RENAMED Viewed

File without changes

{numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/numerai_tools/py.typed RENAMED Viewed

File without changes

{numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/numerai_tools/submissions.py RENAMED Viewed

File without changes

numerai-tools 0.5.0.dev13__tar.gz → 0.5.2__tar.gz

numerai-tools 0.5.0.dev13tar.gz → 0.5.2tar.gz