numerai-tools 0.5.0.dev13__tar.gz → 0.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,9 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: numerai-tools
3
- Version: 0.5.0.dev13
3
+ Version: 0.5.2
4
4
  Summary: A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
5
5
  License: MIT
6
+ License-File: LICENSE
6
7
  Author: Numerai Engineering
7
8
  Author-email: engineering@numer.ai
8
9
  Requires-Python: >=3.11
@@ -468,6 +468,7 @@ def numerai_corr(
468
468
  targets: pd.Series,
469
469
  max_filtered_index_ratio: float = DEFAULT_MAX_FILTERED_INDEX_RATIO,
470
470
  top_bottom: Optional[int] = None,
471
+ target_pow15: bool = True,
471
472
  ) -> pd.Series:
472
473
  """Calculates the canonical Numerai correlation.
473
474
  1. Re-center the target on 0
@@ -484,6 +485,8 @@ def numerai_corr(
484
485
  top_bottom: Optional[int] - the number of top and bottom predictions to use
485
486
  when calculating the correlation. Results in
486
487
  2*top_bottom predictions.
488
+ target_pow15: bool - whether to raise the targets to the 1.5 power, defaults to True.
489
+ Set to False if you are passing in returns as the targets.
487
490
 
488
491
  Returns:
489
492
  pd.Series - the resulting correlation scores for each column in predictions
@@ -493,7 +496,8 @@ def numerai_corr(
493
496
  targets, predictions, max_filtered_index_ratio
494
497
  )
495
498
  predictions = tie_kept_rank__gaussianize__pow_1_5(predictions)
496
- targets = power(targets.to_frame(), 1.5)[targets.name]
499
+ if target_pow15:
500
+ targets = power(targets.to_frame(), 1.5)[targets.name]
497
501
  scores = predictions.apply(
498
502
  lambda sub: pearson_correlation(targets, sub, top_bottom)
499
503
  )
@@ -610,12 +614,14 @@ def meta_portfolio_contribution(
610
614
  sample_weights: pd.Series,
611
615
  targets: pd.Series,
612
616
  ) -> pd.Series:
613
- """Calculates the "meta portfolio" score:
614
- - rank, normalize, and power each signal
615
- - convert each signal into neutralized weights
616
- - generate the stake-weighted portfolio
617
- - calculate the gradient of the portfolio w.r.t. the stakes
618
- - multiplying the weights by the targets
617
+ """Calculates the "meta portfolio" gradient w.r.t. stakes:
618
+ - rank, normalize, and power each signal
619
+ - convert each signal into neutralized weights
620
+ - center weights across samples (explicit W_c = C W)
621
+ - generate the stake-weighted portfolio
622
+ - calculate the gradient of the portfolio w.r.t. the stakes
623
+ - multiply by the (centered) targets
624
+
619
625
  Arguments:
620
626
  predictions: pd.DataFrame - the predictions to evaluate
621
627
  stakes: pd.Series - the stakes to use as weights
@@ -623,22 +629,41 @@ def meta_portfolio_contribution(
623
629
  sample_weights: pd.Series - the universe sampling weights
624
630
  targets: pd.Series - the live targets to evaluate against
625
631
  """
626
- targets = center(targets)
632
+ # Align predictions and targets on the same index / universe
627
633
  predictions, targets = filter_sort_index(predictions, targets)
634
+
635
+ # Center targets in sample space: t_c = C t
636
+ targets = center(targets)
637
+
638
+ # Normalize stakes to sum to 1
628
639
  stake_weights = weight_normalize(stakes.fillna(0))
629
640
  assert np.isclose(stake_weights.sum(), 1), "Stakes must sum to 1"
641
+
642
+ # Generate neutralized weights W(predictions, neutralizers, sample_weights)
630
643
  weights = generate_neutralized_weights(predictions, neutralizers, sample_weights)
631
- w = cast(np.ndarray, weights[stakes.index].values)
632
- s = cast(np.ndarray, stake_weights.values)
633
- t = cast(np.ndarray, targets.values)
634
- swp = w @ s
635
- swp = swp - swp.mean()
636
- l1_norm = np.sum(np.abs(swp))
637
- l1_norm_squared = np.power(l1_norm, 2)
638
- swp_sign = np.sign(swp)
639
- swp_alpha = np.dot(swp, t)
640
- directional_gradient = l1_norm * t - swp_sign * swp_alpha
641
- jacobian_vector_product = directional_gradient.reshape(-1, 1) / l1_norm_squared
642
- centered_jacobian = jacobian_vector_product - jacobian_vector_product.mean()
643
- mpc = (w.T @ centered_jacobian).squeeze()
644
+
645
+ # Extract aligned matrices/vectors
646
+ w = cast(np.ndarray, weights[stakes.index].values) # W ∈ R^{N×K}
647
+ s = cast(np.ndarray, stake_weights.values) # s ∈ R^K
648
+ t = cast(np.ndarray, targets.values) # t_c ∈ R^N (already centered)
649
+
650
+ # Explicit centering of weights across samples:
651
+ # W_c = C W = W - 1 μ^T, where μ is the column-wise mean of W
652
+ w_centered = w - w.mean(axis=0, keepdims=True) # W_c
653
+
654
+ # Centered prediction vector v = W_c s
655
+ v = w_centered @ s # v ∈ R^N, already mean ~ 0
656
+ # Optionally re-center to remove numerical drift
657
+ v = v - v.mean()
658
+
659
+ # Its L2 norm r = ||v||
660
+ l2_norm = np.sqrt(np.sum(v**2))
661
+
662
+ # Residualize W_c against v:
663
+ # residualized_w ≈ R_v W_c = (I - v v^T / ||v||^2) W_c
664
+ residualized_w = orthogonalize(w_centered, v)
665
+
666
+ # Gradient: ∇_s α = (1 / ||v||) (R_v W_c)^T t_c
667
+ mpc = (residualized_w.T @ t).squeeze() / l2_norm
668
+
644
669
  return pd.Series(mpc, index=stakes.index)
@@ -82,23 +82,48 @@ def calculate_max_churn_and_turnover(
82
82
  prev_neutralizers: dict[str, pd.DataFrame],
83
83
  prev_sample_weights: dict[str, pd.Series],
84
84
  ) -> Tuple[float, float]:
85
- """Calculate the maximum churn and turnover with respect to previous submissions.
86
- This function iterates over previous submissions and calculates churn and turnover
87
- for each submission against the current submission. It expects all data to be
88
- indexed on the same type tickers/IDs (e.g. all numerai_ticker, or all composite_figi, or all etc.) .
85
+ """Calculate the maximum churn and turnover of the current submission with respect to previous submissions.
86
+ This function iterates over previous submissions and calculates churn and turnover for each submission
87
+ against the current submission. It expects the following:
88
+
89
+ - all submissions, neutralizers, and sample weights are indexed on the same type of tickers/IDs
90
+ (e.g. all numerai_ticker, or all composite_figi, or all etc.)
91
+
92
+ - neutralizers and sample weights cover the full universe of their respective eras. This means you
93
+ should avoid removing rows from neutralizers or sample weights before passing them to this function.
94
+
95
+ In a live submission environment your submissions are joined on their respective full universes, ranked,
96
+ and then any NaNs are filled with 0.5 before calculating churn and turnover. So, if you provide filtered
97
+ neutralizers or sample weights, your locally calculated churn and turnover may not match the live value.
89
98
 
90
99
  Arguments:
91
- curr_sub: pd.Series - the current submission as a Series indexed on tickers/ids
92
- curr_neutralizer: pd.DataFrame - the neutralizer DataFrame for the current submission indexed on numerai_ticker
93
- curr_sample_weight: pd.Series - the sample weights Series for the current submission indexed on numerai_ticker
94
- prev_subs: dict[str, pd.DataFrame] - a dictionary of datestamps to submissions, where each submission is a DataFrame
95
- with 2 columns: a ticker/id column and a signal/prediction column. To calculate churn
96
- and turnover for a live submission, use the most recent 5 submissions. For diagnostics,
97
- just provide the previous era.
98
- prev_neutralizers: dict[str, pd.DataFrame] - a dictionary of datestamps to neutralizers DataFrames where each neutralizers
99
- DataFrame is indexed on the same ticker column as the current submission
100
- prev_sample_weights: dict[str, pd.Series] - a dictionary of datestamps to sample weights where each sample weights
101
- Series is indexed on the same ticker column as the current submission
100
+ curr_sub: pd.Series - current-era submission indexed on tickers/ids
101
+
102
+ curr_neutralizer: pd.DataFrame
103
+ - current-era neutralizers indexed on the same type of tickers/ids.
104
+ We expect these to cover the full universe for the current era.
105
+
106
+ curr_sample_weight: pd.Series
107
+ - current-era sample weights indexed on the same type of tickers/ids.
108
+ We expect these to cover the full universe for the current era.
109
+
110
+ prev_subs: dict[str, pd.Series]
111
+ - a dictionary mapping datestamps to submissions, where each submission is a
112
+ Series indexed on the same type of tickers/ids as the current
113
+ submission. To calculate churn and turnover for a live submission,
114
+ use the most recent 5 submissions. For diagnostics, just provide the
115
+ last 1 era.
116
+
117
+ prev_neutralizers: dict[str, pd.DataFrame]
118
+ - a dictionary mapping datestamps to neutralizers DataFrames where each neutralizers
119
+ DataFrame is indexed on the same type of tickers/ids as the current submission.
120
+ We expect each of these to cover the full universe of their respective eras.
121
+
122
+ prev_sample_weights: dict[str, pd.Series]
123
+ - a dictionary mapping datestamps to sample weights where each sample weights
124
+ Series is indexed on the same type of tickers/ids as the current submission.
125
+ We expect each of these to cover the full universe of their respective eras.
126
+
102
127
  Returns:
103
128
  prev_week_max_churn -- the maximum churn from previous submissions
104
129
  prev_week_max_turnover -- the maximum turnover from previous submissions
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "numerai-tools"
3
- version = "0.5.0.dev13"
3
+ version = "0.5.2"
4
4
  description = "A collection of open-source tools to help interact with Numerai, model data, and automate submissions."
5
5
  authors = [
6
6
  {name = "Numerai Engineering",email = "engineering@numer.ai"}