numerai-tools 0.5.0.dev10__tar.gz → 0.5.0.dev12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev12}/PKG-INFO +1 -1
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev12}/numerai_tools/scoring.py +1 -1
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev12}/numerai_tools/signals.py +39 -61
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev12}/numerai_tools/submissions.py +6 -10
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev12}/pyproject.toml +1 -1
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev12}/LICENSE +0 -0
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev12}/README.md +0 -0
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev12}/numerai_tools/__init__.py +0 -0
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev12}/numerai_tools/py.typed +0 -0
|
@@ -560,7 +560,7 @@ def generate_neutralized_weights(
|
|
|
560
560
|
neutralizers: pd.DataFrame,
|
|
561
561
|
sample_weights: pd.Series,
|
|
562
562
|
center_and_normalize: bool = False,
|
|
563
|
-
) -> pd.
|
|
563
|
+
) -> pd.DataFrame:
|
|
564
564
|
assert not predictions.isna().any().any(), "Predictions contain NaNs"
|
|
565
565
|
assert not neutralizers.isna().any().any(), "Normalization factors contain NaNs"
|
|
566
566
|
assert not sample_weights.isna().any(), "Weights contain NaNs"
|
|
@@ -9,7 +9,6 @@ from numerai_tools.scoring import (
|
|
|
9
9
|
from numerai_tools.submissions import (
|
|
10
10
|
validate_submission_signals,
|
|
11
11
|
clean_submission,
|
|
12
|
-
remap_ids,
|
|
13
12
|
)
|
|
14
13
|
|
|
15
14
|
import pandas as pd
|
|
@@ -76,47 +75,47 @@ def turnover(
|
|
|
76
75
|
|
|
77
76
|
|
|
78
77
|
def calculate_max_churn_and_turnover(
|
|
79
|
-
curr_sub: pd.
|
|
78
|
+
curr_sub: pd.Series,
|
|
80
79
|
curr_neutralizer: pd.DataFrame,
|
|
81
|
-
|
|
82
|
-
|
|
80
|
+
curr_sample_weight: pd.Series,
|
|
81
|
+
prev_subs: dict[str, pd.Series],
|
|
83
82
|
prev_neutralizers: dict[str, pd.DataFrame],
|
|
84
83
|
prev_sample_weights: dict[str, pd.Series],
|
|
85
|
-
universe: pd.DataFrame,
|
|
86
|
-
curr_signal_col: str,
|
|
87
|
-
curr_ticker_col: str,
|
|
88
84
|
) -> Tuple[float, float]:
|
|
89
85
|
"""Calculate the maximum churn and turnover with respect to previous submissions.
|
|
86
|
+
This function iterates over previous submissions and calculates churn and turnover
|
|
87
|
+
for each submission against the current submission. It expects all data to be
|
|
88
|
+
indexed on the same type tickers/IDs (e.g. all numerai_ticker, or all composite_figi, or all etc.) .
|
|
90
89
|
|
|
91
90
|
Arguments:
|
|
92
|
-
curr_sub
|
|
93
|
-
curr_neutralizer
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
91
|
+
curr_sub: pd.Series - the current submission as a Series indexed on tickers/ids
|
|
92
|
+
curr_neutralizer: pd.DataFrame - the neutralizer DataFrame for the current submission indexed on numerai_ticker
|
|
93
|
+
curr_sample_weight: pd.Series - the sample weights Series for the current submission indexed on numerai_ticker
|
|
94
|
+
prev_subs: dict[str, pd.DataFrame] - a dictionary of datestamps to submissions, where each submission is a DataFrame
|
|
95
|
+
with 2 columns: a ticker/id column and a signal/prediction column. To calculate churn
|
|
96
|
+
and turnover for a live submission, use the most recent 5 submissions. For diagnostics,
|
|
97
|
+
just provide the previous era.
|
|
98
|
+
prev_neutralizers: dict[str, pd.DataFrame] - a dictionary of datestamps to neutralizers DataFrames where each neutralizers
|
|
99
|
+
DataFrame is indexed on the same ticker column as the current submission
|
|
100
|
+
prev_sample_weights: dict[str, pd.Series] - a dictionary of datestamps to sample weights where each sample weights
|
|
101
|
+
Series is indexed on the same ticker column as the current submission
|
|
102
102
|
Returns:
|
|
103
103
|
prev_week_max_churn -- the maximum churn from previous submissions
|
|
104
104
|
prev_week_max_turnover -- the maximum turnover from previous submissions
|
|
105
105
|
"""
|
|
106
|
-
universe = universe.reset_index()
|
|
107
106
|
(
|
|
108
107
|
curr_ticker_col,
|
|
109
108
|
curr_signal_col,
|
|
110
109
|
_,
|
|
111
|
-
|
|
110
|
+
curr_sub_df,
|
|
112
111
|
_,
|
|
113
112
|
) = validate_submission_signals(
|
|
114
|
-
universe=
|
|
115
|
-
submission=curr_sub,
|
|
113
|
+
universe=curr_sample_weight.index.to_frame(),
|
|
114
|
+
submission=curr_sub.reset_index(),
|
|
116
115
|
)
|
|
117
|
-
|
|
118
|
-
universe=
|
|
119
|
-
submission=
|
|
116
|
+
curr_sub = clean_submission(
|
|
117
|
+
universe=curr_sample_weight.index.to_frame(),
|
|
118
|
+
submission=curr_sub_df,
|
|
120
119
|
src_id_col=curr_ticker_col,
|
|
121
120
|
src_signal_col=curr_signal_col,
|
|
122
121
|
rank_and_fill=True,
|
|
@@ -124,63 +123,42 @@ def calculate_max_churn_and_turnover(
|
|
|
124
123
|
churn_stats = []
|
|
125
124
|
turnover_stats = []
|
|
126
125
|
neutralized_weights = generate_neutralized_weights(
|
|
127
|
-
|
|
126
|
+
curr_sub.to_frame(),
|
|
128
127
|
curr_neutralizer,
|
|
129
|
-
|
|
128
|
+
curr_sample_weight,
|
|
130
129
|
center_and_normalize=True,
|
|
131
|
-
)[
|
|
132
|
-
for datestamp in
|
|
133
|
-
prev_sub =
|
|
130
|
+
)[curr_sub.name]
|
|
131
|
+
for datestamp in prev_subs:
|
|
132
|
+
prev_sub = prev_subs[datestamp]
|
|
134
133
|
prev_neutralizer = prev_neutralizers[datestamp]
|
|
135
|
-
|
|
134
|
+
prev_sample_weight = prev_sample_weights[datestamp]
|
|
136
135
|
(
|
|
137
136
|
prev_ticker_col,
|
|
138
137
|
prev_signal_col,
|
|
139
138
|
_,
|
|
140
|
-
|
|
139
|
+
prev_sub_df,
|
|
141
140
|
_,
|
|
142
141
|
) = validate_submission_signals(
|
|
143
|
-
universe=
|
|
144
|
-
submission=prev_sub,
|
|
142
|
+
universe=prev_sample_weight.index.to_frame(),
|
|
143
|
+
submission=prev_sub.reset_index(),
|
|
145
144
|
)
|
|
146
|
-
|
|
147
|
-
universe=
|
|
148
|
-
submission=
|
|
145
|
+
prev_sub = clean_submission(
|
|
146
|
+
universe=prev_sample_weight.index.to_frame(),
|
|
147
|
+
submission=prev_sub_df,
|
|
149
148
|
src_id_col=prev_ticker_col,
|
|
150
149
|
src_signal_col=prev_signal_col,
|
|
151
150
|
dst_id_col=curr_ticker_col,
|
|
152
151
|
dst_signal_col=curr_signal_col,
|
|
153
152
|
rank_and_fill=True,
|
|
154
153
|
)
|
|
155
|
-
prev_neutralizer = (
|
|
156
|
-
remap_ids(
|
|
157
|
-
prev_neutralizer.reset_index(),
|
|
158
|
-
universe,
|
|
159
|
-
str(prev_neutralizer.index.name),
|
|
160
|
-
curr_ticker_col,
|
|
161
|
-
)
|
|
162
|
-
.set_index(curr_ticker_col)
|
|
163
|
-
.filter(like="neutralizer_")
|
|
164
|
-
.dropna()
|
|
165
|
-
)
|
|
166
|
-
prev_weight = (
|
|
167
|
-
remap_ids(
|
|
168
|
-
prev_weight.reset_index(),
|
|
169
|
-
universe,
|
|
170
|
-
str(prev_weight.index.name),
|
|
171
|
-
curr_ticker_col,
|
|
172
|
-
)
|
|
173
|
-
.set_index(curr_ticker_col)[prev_weight.name]
|
|
174
|
-
.dropna()
|
|
175
|
-
)
|
|
176
154
|
prev_neutralized_weights = generate_neutralized_weights(
|
|
177
|
-
|
|
155
|
+
prev_sub.to_frame(),
|
|
178
156
|
prev_neutralizer,
|
|
179
|
-
|
|
157
|
+
prev_sample_weight,
|
|
180
158
|
center_and_normalize=True,
|
|
181
|
-
)[
|
|
159
|
+
)[prev_sub.name]
|
|
182
160
|
try:
|
|
183
|
-
churn_val = abs(churn(
|
|
161
|
+
churn_val = abs(churn(curr_sub, prev_sub))
|
|
184
162
|
except AssertionError as e:
|
|
185
163
|
if "does not have enough overlapping ids" in str(e):
|
|
186
164
|
continue
|
|
@@ -228,11 +228,7 @@ def validate_submission_signals(
|
|
|
228
228
|
"data_type column found in Signals submission. This is deprecated and support will be removed in the future. "
|
|
229
229
|
"Please remove the data_type column from your Signals submission."
|
|
230
230
|
)
|
|
231
|
-
|
|
232
|
-
columns=["data_type"],
|
|
233
|
-
errors="ignore",
|
|
234
|
-
inplace=True,
|
|
235
|
-
)
|
|
231
|
+
submission.drop(columns=["data_type"], errors="ignore", inplace=True)
|
|
236
232
|
ticker_col, signal_col, date_col = validate_headers_signals(
|
|
237
233
|
submission, assert_date_col
|
|
238
234
|
)
|
|
@@ -266,7 +262,7 @@ def validate_submission_crypto(
|
|
|
266
262
|
|
|
267
263
|
def remap_ids(
|
|
268
264
|
data: pd.DataFrame,
|
|
269
|
-
ticker_map: pd.
|
|
265
|
+
ticker_map: pd.DataFrame,
|
|
270
266
|
src_id_col: str,
|
|
271
267
|
dst_id_col: str,
|
|
272
268
|
) -> pd.DataFrame:
|
|
@@ -277,25 +273,25 @@ def remap_ids(
|
|
|
277
273
|
|
|
278
274
|
Arguments:
|
|
279
275
|
data: pd.DataFrame - the data to remap
|
|
280
|
-
ticker_map: pd.
|
|
276
|
+
ticker_map: pd.DataFrame - the mapping of source ids to destination ids
|
|
281
277
|
src_id_col: str - the name of the source ids column in the data
|
|
282
278
|
dst_id_col: str - the name of the destination ids column in the ticker map
|
|
283
279
|
"""
|
|
284
280
|
# first, index the universe and data on the source ids
|
|
285
|
-
indexed_map = ticker_map.
|
|
281
|
+
indexed_map = ticker_map.set_index(src_id_col, drop=False)
|
|
286
282
|
indexed_data = data.set_index(src_id_col)
|
|
287
283
|
return (
|
|
288
284
|
# then, join the universe and data
|
|
289
285
|
indexed_map.join(indexed_data)
|
|
290
286
|
# get just the destination ids and prediction columns
|
|
291
|
-
.reset_index()[[dst_id_col, *indexed_data.columns]]
|
|
287
|
+
.reset_index(drop=True)[[dst_id_col, *indexed_data.columns]]
|
|
292
288
|
# finally, sort by the destination ticker column
|
|
293
289
|
.sort_values(dst_id_col)
|
|
294
290
|
)
|
|
295
291
|
|
|
296
292
|
|
|
297
293
|
def clean_submission(
|
|
298
|
-
universe: pd.
|
|
294
|
+
universe: pd.DataFrame,
|
|
299
295
|
submission: pd.DataFrame,
|
|
300
296
|
src_id_col: str,
|
|
301
297
|
src_signal_col: str,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "numerai-tools"
|
|
3
|
-
version = "0.5.0.
|
|
3
|
+
version = "0.5.0.dev12"
|
|
4
4
|
description = "A collection of open-source tools to help interact with Numerai, model data, and automate submissions."
|
|
5
5
|
authors = [
|
|
6
6
|
{name = "Numerai Engineering",email = "engineering@numer.ai"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|