numerai-tools 0.5.0.dev13__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/PKG-INFO +3 -2
- {numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/numerai_tools/scoring.py +46 -21
- {numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/numerai_tools/signals.py +40 -15
- {numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/pyproject.toml +1 -1
- {numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/LICENSE +0 -0
- {numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/README.md +0 -0
- {numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/numerai_tools/__init__.py +0 -0
- {numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/numerai_tools/py.typed +0 -0
- {numerai_tools-0.5.0.dev13 → numerai_tools-0.5.2}/numerai_tools/submissions.py +0 -0
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: numerai-tools
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
|
|
5
5
|
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
6
7
|
Author: Numerai Engineering
|
|
7
8
|
Author-email: engineering@numer.ai
|
|
8
9
|
Requires-Python: >=3.11
|
|
@@ -468,6 +468,7 @@ def numerai_corr(
|
|
|
468
468
|
targets: pd.Series,
|
|
469
469
|
max_filtered_index_ratio: float = DEFAULT_MAX_FILTERED_INDEX_RATIO,
|
|
470
470
|
top_bottom: Optional[int] = None,
|
|
471
|
+
target_pow15: bool = True,
|
|
471
472
|
) -> pd.Series:
|
|
472
473
|
"""Calculates the canonical Numerai correlation.
|
|
473
474
|
1. Re-center the target on 0
|
|
@@ -484,6 +485,8 @@ def numerai_corr(
|
|
|
484
485
|
top_bottom: Optional[int] - the number of top and bottom predictions to use
|
|
485
486
|
when calculating the correlation. Results in
|
|
486
487
|
2*top_bottom predictions.
|
|
488
|
+
target_pow15: bool - whether to raise the targets to the 1.5 power, defaults to True.
|
|
489
|
+
Set to False if you are passing in returns as the targets.
|
|
487
490
|
|
|
488
491
|
Returns:
|
|
489
492
|
pd.Series - the resulting correlation scores for each column in predictions
|
|
@@ -493,7 +496,8 @@ def numerai_corr(
|
|
|
493
496
|
targets, predictions, max_filtered_index_ratio
|
|
494
497
|
)
|
|
495
498
|
predictions = tie_kept_rank__gaussianize__pow_1_5(predictions)
|
|
496
|
-
|
|
499
|
+
if target_pow15:
|
|
500
|
+
targets = power(targets.to_frame(), 1.5)[targets.name]
|
|
497
501
|
scores = predictions.apply(
|
|
498
502
|
lambda sub: pearson_correlation(targets, sub, top_bottom)
|
|
499
503
|
)
|
|
@@ -610,12 +614,14 @@ def meta_portfolio_contribution(
|
|
|
610
614
|
sample_weights: pd.Series,
|
|
611
615
|
targets: pd.Series,
|
|
612
616
|
) -> pd.Series:
|
|
613
|
-
"""Calculates the "meta portfolio"
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
617
|
+
"""Calculates the "meta portfolio" gradient w.r.t. stakes:
|
|
618
|
+
- rank, normalize, and power each signal
|
|
619
|
+
- convert each signal into neutralized weights
|
|
620
|
+
- center weights across samples (explicit W_c = C W)
|
|
621
|
+
- generate the stake-weighted portfolio
|
|
622
|
+
- calculate the gradient of the portfolio w.r.t. the stakes
|
|
623
|
+
- multiply by the (centered) targets
|
|
624
|
+
|
|
619
625
|
Arguments:
|
|
620
626
|
predictions: pd.DataFrame - the predictions to evaluate
|
|
621
627
|
stakes: pd.Series - the stakes to use as weights
|
|
@@ -623,22 +629,41 @@ def meta_portfolio_contribution(
|
|
|
623
629
|
sample_weights: pd.Series - the universe sampling weights
|
|
624
630
|
targets: pd.Series - the live targets to evaluate against
|
|
625
631
|
"""
|
|
626
|
-
targets
|
|
632
|
+
# Align predictions and targets on the same index / universe
|
|
627
633
|
predictions, targets = filter_sort_index(predictions, targets)
|
|
634
|
+
|
|
635
|
+
# Center targets in sample space: t_c = C t
|
|
636
|
+
targets = center(targets)
|
|
637
|
+
|
|
638
|
+
# Normalize stakes to sum to 1
|
|
628
639
|
stake_weights = weight_normalize(stakes.fillna(0))
|
|
629
640
|
assert np.isclose(stake_weights.sum(), 1), "Stakes must sum to 1"
|
|
641
|
+
|
|
642
|
+
# Generate neutralized weights W(predictions, neutralizers, sample_weights)
|
|
630
643
|
weights = generate_neutralized_weights(predictions, neutralizers, sample_weights)
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
+
|
|
645
|
+
# Extract aligned matrices/vectors
|
|
646
|
+
w = cast(np.ndarray, weights[stakes.index].values) # W ∈ R^{N×K}
|
|
647
|
+
s = cast(np.ndarray, stake_weights.values) # s ∈ R^K
|
|
648
|
+
t = cast(np.ndarray, targets.values) # t_c ∈ R^N (already centered)
|
|
649
|
+
|
|
650
|
+
# Explicit centering of weights across samples:
|
|
651
|
+
# W_c = C W = W - 1 μ^T, where μ is the column-wise mean of W
|
|
652
|
+
w_centered = w - w.mean(axis=0, keepdims=True) # W_c
|
|
653
|
+
|
|
654
|
+
# Centered prediction vector v = W_c s
|
|
655
|
+
v = w_centered @ s # v ∈ R^N, already mean ~ 0
|
|
656
|
+
# Optionally re-center to remove numerical drift
|
|
657
|
+
v = v - v.mean()
|
|
658
|
+
|
|
659
|
+
# Its L2 norm r = ||v||
|
|
660
|
+
l2_norm = np.sqrt(np.sum(v**2))
|
|
661
|
+
|
|
662
|
+
# Residualize W_c against v:
|
|
663
|
+
# residualized_w ≈ R_v W_c = (I - v v^T / ||v||^2) W_c
|
|
664
|
+
residualized_w = orthogonalize(w_centered, v)
|
|
665
|
+
|
|
666
|
+
# Gradient: ∇_s α = (1 / ||v||) (R_v W_c)^T t_c
|
|
667
|
+
mpc = (residualized_w.T @ t).squeeze() / l2_norm
|
|
668
|
+
|
|
644
669
|
return pd.Series(mpc, index=stakes.index)
|
|
@@ -82,23 +82,48 @@ def calculate_max_churn_and_turnover(
|
|
|
82
82
|
prev_neutralizers: dict[str, pd.DataFrame],
|
|
83
83
|
prev_sample_weights: dict[str, pd.Series],
|
|
84
84
|
) -> Tuple[float, float]:
|
|
85
|
-
"""Calculate the maximum churn and turnover with respect to previous submissions.
|
|
86
|
-
This function iterates over previous submissions and calculates churn and turnover
|
|
87
|
-
|
|
88
|
-
|
|
85
|
+
"""Calculate the maximum churn and turnover of the current submission with respect to previous submissions.
|
|
86
|
+
This function iterates over previous submissions and calculates churn and turnover for each submission
|
|
87
|
+
against the current submission. It expects the following:
|
|
88
|
+
|
|
89
|
+
- all submissions, neutralizers, and sample weights are indexed on the same type of tickers/IDs
|
|
90
|
+
(e.g. all numerai_ticker, or all composite_figi, or all etc.)
|
|
91
|
+
|
|
92
|
+
- neutralizers and sample weights cover the full universe of their respective eras. This means you
|
|
93
|
+
should avoid removing rows from neutralizers or sample weights before passing them to this function.
|
|
94
|
+
|
|
95
|
+
In a live submission environment your submissions are joined on their respective full universes, ranked,
|
|
96
|
+
and then any NaNs are filled with 0.5 before calculating churn and turnover. So, if you provide filtered
|
|
97
|
+
neutralizers or sample weights, your locally calculated churn and turnover may not match the live value.
|
|
89
98
|
|
|
90
99
|
Arguments:
|
|
91
|
-
curr_sub: pd.Series -
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
100
|
+
curr_sub: pd.Series - current-era submission indexed on tickers/ids
|
|
101
|
+
|
|
102
|
+
curr_neutralizer: pd.DataFrame
|
|
103
|
+
- current-era neutralizers indexed on the same type of tickers/ids.
|
|
104
|
+
We expect these to cover the full universe for the current era.
|
|
105
|
+
|
|
106
|
+
curr_sample_weight: pd.Series
|
|
107
|
+
- current-era sample weights indexed on the same type of tickers/ids.
|
|
108
|
+
We expect these to cover the full universe for the current era.
|
|
109
|
+
|
|
110
|
+
prev_subs: dict[str, pd.Series]
|
|
111
|
+
- a dictionary mapping datestamps to submissions, where each submission is a
|
|
112
|
+
Series indexed on the same type of tickers/ids as the current
|
|
113
|
+
submission. To calculate churn and turnover for a live submission,
|
|
114
|
+
use the most recent 5 submissions. For diagnostics, just provide the
|
|
115
|
+
last 1 era.
|
|
116
|
+
|
|
117
|
+
prev_neutralizers: dict[str, pd.DataFrame]
|
|
118
|
+
- a dictionary mapping datestamps to neutralizers DataFrames where each neutralizers
|
|
119
|
+
DataFrame is indexed on the same type of tickers/ids as the current submission.
|
|
120
|
+
We expect each of these to cover the full universe of their respective eras.
|
|
121
|
+
|
|
122
|
+
prev_sample_weights: dict[str, pd.Series]
|
|
123
|
+
- a dictionary mapping datestamps to sample weights where each sample weights
|
|
124
|
+
Series is indexed on the same type of tickers/ids as the current submission.
|
|
125
|
+
We expect each of these to cover the full universe of their respective eras.
|
|
126
|
+
|
|
102
127
|
Returns:
|
|
103
128
|
prev_week_max_churn -- the maximum churn from previous submissions
|
|
104
129
|
prev_week_max_turnover -- the maximum turnover from previous submissions
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "numerai-tools"
|
|
3
|
-
version = "0.5.
|
|
3
|
+
version = "0.5.2"
|
|
4
4
|
description = "A collection of open-source tools to help interact with Numerai, model data, and automate submissions."
|
|
5
5
|
authors = [
|
|
6
6
|
{name = "Numerai Engineering",email = "engineering@numer.ai"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|