numerai-tools 0.5.0.dev10__tar.gz → 0.5.0.dev11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/PKG-INFO +1 -1
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/numerai_tools/scoring.py +1 -1
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/numerai_tools/signals.py +41 -60
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/numerai_tools/submissions.py +8 -10
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/pyproject.toml +1 -1
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/LICENSE +0 -0
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/README.md +0 -0
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/numerai_tools/__init__.py +0 -0
- {numerai_tools-0.5.0.dev10 → numerai_tools-0.5.0.dev11}/numerai_tools/py.typed +0 -0
|
@@ -560,7 +560,7 @@ def generate_neutralized_weights(
|
|
|
560
560
|
neutralizers: pd.DataFrame,
|
|
561
561
|
sample_weights: pd.Series,
|
|
562
562
|
center_and_normalize: bool = False,
|
|
563
|
-
) -> pd.
|
|
563
|
+
) -> pd.DataFrame:
|
|
564
564
|
assert not predictions.isna().any().any(), "Predictions contain NaNs"
|
|
565
565
|
assert not neutralizers.isna().any().any(), "Normalization factors contain NaNs"
|
|
566
566
|
assert not sample_weights.isna().any(), "Weights contain NaNs"
|
|
@@ -76,111 +76,92 @@ def turnover(
|
|
|
76
76
|
|
|
77
77
|
|
|
78
78
|
def calculate_max_churn_and_turnover(
|
|
79
|
-
curr_sub: pd.
|
|
79
|
+
curr_sub: pd.Series,
|
|
80
80
|
curr_neutralizer: pd.DataFrame,
|
|
81
|
-
|
|
82
|
-
|
|
81
|
+
curr_sample_weight: pd.Series,
|
|
82
|
+
prev_subs: dict[str, pd.Series],
|
|
83
83
|
prev_neutralizers: dict[str, pd.DataFrame],
|
|
84
84
|
prev_sample_weights: dict[str, pd.Series],
|
|
85
|
-
universe: pd.DataFrame,
|
|
86
|
-
curr_signal_col: str,
|
|
87
|
-
curr_ticker_col: str,
|
|
88
85
|
) -> Tuple[float, float]:
|
|
89
86
|
"""Calculate the maximum churn and turnover with respect to previous submissions.
|
|
87
|
+
This function iterates over previous submissions and calculates churn and turnover
|
|
88
|
+
for each submission against the current submission. It expects all data to be
|
|
89
|
+
indexed on the same type tickers/IDs (e.g. all numerai_ticker, or all composite_figi, or all etc.) .
|
|
90
90
|
|
|
91
91
|
Arguments:
|
|
92
|
-
curr_sub
|
|
93
|
-
curr_neutralizer
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
92
|
+
curr_sub: pd.Series - the current submission as a Series indexed on tickers/ids
|
|
93
|
+
curr_neutralizer: pd.DataFrame - the neutralizer DataFrame for the current submission indexed on numerai_ticker
|
|
94
|
+
curr_sample_weight: pd.Series - the sample weights Series for the current submission indexed on numerai_ticker
|
|
95
|
+
prev_subs: dict[str, pd.DataFrame] - a dictionary of datestamps to submissions, where each submission is a DataFrame
|
|
96
|
+
with 2 columns: a ticker/id column and a signal/prediction column. To calculate churn
|
|
97
|
+
and turnover for a live submission, use the most recent 5 submissions. For diagnostics,
|
|
98
|
+
just provide the previous era.
|
|
99
|
+
prev_neutralizers: dict[str, pd.DataFrame] - a dictionary of datestamps to neutralizers DataFrames where each neutralizers
|
|
100
|
+
DataFrame is indexed on the same ticker column as the current submission
|
|
101
|
+
prev_sample_weights: dict[str, pd.Series] - a dictionary of datestamps to sample weights where each sample weights
|
|
102
|
+
Series is indexed on the same ticker column as the current submission
|
|
102
103
|
Returns:
|
|
103
104
|
prev_week_max_churn -- the maximum churn from previous submissions
|
|
104
105
|
prev_week_max_turnover -- the maximum turnover from previous submissions
|
|
105
106
|
"""
|
|
106
|
-
universe = universe.reset_index()
|
|
107
107
|
(
|
|
108
108
|
curr_ticker_col,
|
|
109
109
|
curr_signal_col,
|
|
110
110
|
_,
|
|
111
|
-
|
|
111
|
+
curr_sub_df,
|
|
112
112
|
_,
|
|
113
113
|
) = validate_submission_signals(
|
|
114
|
-
universe=
|
|
115
|
-
submission=curr_sub,
|
|
114
|
+
universe=curr_sample_weight.index.to_frame(),
|
|
115
|
+
submission=curr_sub.reset_index(),
|
|
116
116
|
)
|
|
117
|
-
|
|
118
|
-
universe=
|
|
119
|
-
submission=
|
|
117
|
+
curr_sub = clean_submission(
|
|
118
|
+
universe=curr_sample_weight.index.to_frame(),
|
|
119
|
+
submission=curr_sub_df,
|
|
120
120
|
src_id_col=curr_ticker_col,
|
|
121
121
|
src_signal_col=curr_signal_col,
|
|
122
122
|
rank_and_fill=True,
|
|
123
123
|
)
|
|
124
|
+
print("curr_sub", curr_sub)
|
|
124
125
|
churn_stats = []
|
|
125
126
|
turnover_stats = []
|
|
126
127
|
neutralized_weights = generate_neutralized_weights(
|
|
127
|
-
|
|
128
|
+
curr_sub.to_frame(),
|
|
128
129
|
curr_neutralizer,
|
|
129
|
-
|
|
130
|
+
curr_sample_weight,
|
|
130
131
|
center_and_normalize=True,
|
|
131
|
-
)[
|
|
132
|
-
for datestamp in
|
|
133
|
-
prev_sub =
|
|
132
|
+
)[curr_sub.name]
|
|
133
|
+
for datestamp in prev_subs:
|
|
134
|
+
prev_sub = prev_subs[datestamp]
|
|
134
135
|
prev_neutralizer = prev_neutralizers[datestamp]
|
|
135
|
-
|
|
136
|
+
prev_sample_weight = prev_sample_weights[datestamp]
|
|
136
137
|
(
|
|
137
138
|
prev_ticker_col,
|
|
138
139
|
prev_signal_col,
|
|
139
140
|
_,
|
|
140
|
-
|
|
141
|
+
prev_sub_df,
|
|
141
142
|
_,
|
|
142
143
|
) = validate_submission_signals(
|
|
143
|
-
universe=
|
|
144
|
-
submission=prev_sub,
|
|
144
|
+
universe=prev_sample_weight.index.to_frame(),
|
|
145
|
+
submission=prev_sub.reset_index(),
|
|
145
146
|
)
|
|
146
|
-
|
|
147
|
-
universe=
|
|
148
|
-
submission=
|
|
147
|
+
prev_sub = clean_submission(
|
|
148
|
+
universe=prev_sample_weight.index.to_frame(),
|
|
149
|
+
submission=prev_sub_df,
|
|
149
150
|
src_id_col=prev_ticker_col,
|
|
150
151
|
src_signal_col=prev_signal_col,
|
|
151
152
|
dst_id_col=curr_ticker_col,
|
|
152
153
|
dst_signal_col=curr_signal_col,
|
|
153
154
|
rank_and_fill=True,
|
|
154
155
|
)
|
|
155
|
-
prev_neutralizer = (
|
|
156
|
-
remap_ids(
|
|
157
|
-
prev_neutralizer.reset_index(),
|
|
158
|
-
universe,
|
|
159
|
-
str(prev_neutralizer.index.name),
|
|
160
|
-
curr_ticker_col,
|
|
161
|
-
)
|
|
162
|
-
.set_index(curr_ticker_col)
|
|
163
|
-
.filter(like="neutralizer_")
|
|
164
|
-
.dropna()
|
|
165
|
-
)
|
|
166
|
-
prev_weight = (
|
|
167
|
-
remap_ids(
|
|
168
|
-
prev_weight.reset_index(),
|
|
169
|
-
universe,
|
|
170
|
-
str(prev_weight.index.name),
|
|
171
|
-
curr_ticker_col,
|
|
172
|
-
)
|
|
173
|
-
.set_index(curr_ticker_col)[prev_weight.name]
|
|
174
|
-
.dropna()
|
|
175
|
-
)
|
|
176
156
|
prev_neutralized_weights = generate_neutralized_weights(
|
|
177
|
-
|
|
157
|
+
prev_sub.to_frame(),
|
|
178
158
|
prev_neutralizer,
|
|
179
|
-
|
|
159
|
+
prev_sample_weight,
|
|
180
160
|
center_and_normalize=True,
|
|
181
|
-
)[
|
|
161
|
+
)[prev_sub.name]
|
|
162
|
+
print("prev_sub", prev_sub)
|
|
182
163
|
try:
|
|
183
|
-
churn_val = abs(churn(
|
|
164
|
+
churn_val = abs(churn(curr_sub, prev_sub))
|
|
184
165
|
except AssertionError as e:
|
|
185
166
|
if "does not have enough overlapping ids" in str(e):
|
|
186
167
|
continue
|
|
@@ -88,6 +88,7 @@ def validate_headers_signals(
|
|
|
88
88
|
assert (
|
|
89
89
|
date_col_name is not None
|
|
90
90
|
), "invalid_submission_headers: submission must contain a date column"
|
|
91
|
+
print(submission)
|
|
91
92
|
ticker_col, signal_col = _validate_headers(
|
|
92
93
|
submission,
|
|
93
94
|
SIGNALS_ALLOWED_ID_COLS,
|
|
@@ -228,11 +229,8 @@ def validate_submission_signals(
|
|
|
228
229
|
"data_type column found in Signals submission. This is deprecated and support will be removed in the future. "
|
|
229
230
|
"Please remove the data_type column from your Signals submission."
|
|
230
231
|
)
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
errors="ignore",
|
|
234
|
-
inplace=True,
|
|
235
|
-
)
|
|
232
|
+
submission.drop(columns=["data_type"], errors="ignore", inplace=True)
|
|
233
|
+
print(submission)
|
|
236
234
|
ticker_col, signal_col, date_col = validate_headers_signals(
|
|
237
235
|
submission, assert_date_col
|
|
238
236
|
)
|
|
@@ -266,7 +264,7 @@ def validate_submission_crypto(
|
|
|
266
264
|
|
|
267
265
|
def remap_ids(
|
|
268
266
|
data: pd.DataFrame,
|
|
269
|
-
ticker_map: pd.
|
|
267
|
+
ticker_map: pd.DataFrame,
|
|
270
268
|
src_id_col: str,
|
|
271
269
|
dst_id_col: str,
|
|
272
270
|
) -> pd.DataFrame:
|
|
@@ -277,25 +275,25 @@ def remap_ids(
|
|
|
277
275
|
|
|
278
276
|
Arguments:
|
|
279
277
|
data: pd.DataFrame - the data to remap
|
|
280
|
-
ticker_map: pd.
|
|
278
|
+
ticker_map: pd.DataFrame - the mapping of source ids to destination ids
|
|
281
279
|
src_id_col: str - the name of the source ids column in the data
|
|
282
280
|
dst_id_col: str - the name of the destination ids column in the ticker map
|
|
283
281
|
"""
|
|
284
282
|
# first, index the universe and data on the source ids
|
|
285
|
-
indexed_map = ticker_map.
|
|
283
|
+
indexed_map = ticker_map.set_index(src_id_col, drop=False)
|
|
286
284
|
indexed_data = data.set_index(src_id_col)
|
|
287
285
|
return (
|
|
288
286
|
# then, join the universe and data
|
|
289
287
|
indexed_map.join(indexed_data)
|
|
290
288
|
# get just the destination ids and prediction columns
|
|
291
|
-
.reset_index()[[dst_id_col, *indexed_data.columns]]
|
|
289
|
+
.reset_index(drop=True)[[dst_id_col, *indexed_data.columns]]
|
|
292
290
|
# finally, sort by the destination ticker column
|
|
293
291
|
.sort_values(dst_id_col)
|
|
294
292
|
)
|
|
295
293
|
|
|
296
294
|
|
|
297
295
|
def clean_submission(
|
|
298
|
-
universe: pd.
|
|
296
|
+
universe: pd.DataFrame,
|
|
299
297
|
submission: pd.DataFrame,
|
|
300
298
|
src_id_col: str,
|
|
301
299
|
src_signal_col: str,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "numerai-tools"
|
|
3
|
-
version = "0.5.0.
|
|
3
|
+
version = "0.5.0.dev11"
|
|
4
4
|
description = "A collection of open-source tools to help interact with Numerai, model data, and automate submissions."
|
|
5
5
|
authors = [
|
|
6
6
|
{name = "Numerai Engineering",email = "engineering@numer.ai"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|