numerai-tools 0.5.0.dev5__tar.gz → 0.5.0.dev7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numerai_tools-0.5.0.dev5 → numerai_tools-0.5.0.dev7}/PKG-INFO +1 -1
- {numerai_tools-0.5.0.dev5 → numerai_tools-0.5.0.dev7}/numerai_tools/signals.py +10 -5
- {numerai_tools-0.5.0.dev5 → numerai_tools-0.5.0.dev7}/numerai_tools/submissions.py +31 -18
- {numerai_tools-0.5.0.dev5 → numerai_tools-0.5.0.dev7}/pyproject.toml +1 -1
- {numerai_tools-0.5.0.dev5 → numerai_tools-0.5.0.dev7}/LICENSE +0 -0
- {numerai_tools-0.5.0.dev5 → numerai_tools-0.5.0.dev7}/README.md +0 -0
- {numerai_tools-0.5.0.dev5 → numerai_tools-0.5.0.dev7}/numerai_tools/__init__.py +0 -0
- {numerai_tools-0.5.0.dev5 → numerai_tools-0.5.0.dev7}/numerai_tools/py.typed +0 -0
- {numerai_tools-0.5.0.dev5 → numerai_tools-0.5.0.dev7}/numerai_tools/scoring.py +0 -0
|
@@ -66,9 +66,6 @@ def turnover(
|
|
|
66
66
|
Arguments:
|
|
67
67
|
s1: pd.Series - the first series to compare
|
|
68
68
|
s2: pd.Series - the second series to compare
|
|
69
|
-
top_bottom: Optional[int] - the number of top and bottom predictions to use
|
|
70
|
-
when calculating the correlation. Results in
|
|
71
|
-
2*top_bottom predictions.
|
|
72
69
|
|
|
73
70
|
Returns:
|
|
74
71
|
float - the turnover between the two series
|
|
@@ -110,6 +107,7 @@ def calculate_max_churn_and_turnover(
|
|
|
110
107
|
(
|
|
111
108
|
curr_ticker_col,
|
|
112
109
|
curr_signal_col,
|
|
110
|
+
_,
|
|
113
111
|
curr_sub,
|
|
114
112
|
_,
|
|
115
113
|
) = validate_submission_signals(
|
|
@@ -126,7 +124,10 @@ def calculate_max_churn_and_turnover(
|
|
|
126
124
|
churn_stats = []
|
|
127
125
|
turnover_stats = []
|
|
128
126
|
neutralized_weights = generate_neutralized_weights(
|
|
129
|
-
curr_sub_vector.to_frame(),
|
|
127
|
+
curr_sub_vector.to_frame(),
|
|
128
|
+
curr_neutralizer,
|
|
129
|
+
curr_weight,
|
|
130
|
+
center_and_normalize=True,
|
|
130
131
|
)
|
|
131
132
|
for datestamp in prev_week_subs:
|
|
132
133
|
prev_sub = prev_week_subs[datestamp]
|
|
@@ -135,6 +136,7 @@ def calculate_max_churn_and_turnover(
|
|
|
135
136
|
(
|
|
136
137
|
prev_ticker_col,
|
|
137
138
|
prev_signal_col,
|
|
139
|
+
_,
|
|
138
140
|
prev_sub,
|
|
139
141
|
_,
|
|
140
142
|
) = validate_submission_signals(
|
|
@@ -167,7 +169,10 @@ def calculate_max_churn_and_turnover(
|
|
|
167
169
|
curr_ticker_col,
|
|
168
170
|
).set_index(curr_ticker_col)[prev_weight.name]
|
|
169
171
|
prev_neutralized_weights = generate_neutralized_weights(
|
|
170
|
-
filtered_prev_sub.to_frame(),
|
|
172
|
+
filtered_prev_sub.to_frame(),
|
|
173
|
+
prev_neutralizer,
|
|
174
|
+
prev_weight,
|
|
175
|
+
center_and_normalize=True,
|
|
171
176
|
)
|
|
172
177
|
try:
|
|
173
178
|
churn_val = abs(churn(curr_sub_vector, filtered_prev_sub))
|
|
@@ -31,7 +31,6 @@ def _validate_headers(
|
|
|
31
31
|
submission: pd.DataFrame,
|
|
32
32
|
expected_id_cols: List[str],
|
|
33
33
|
expected_pred_cols: List[str],
|
|
34
|
-
other_cols: Optional[List[str]] = None,
|
|
35
34
|
) -> Tuple[str, str]:
|
|
36
35
|
"""Validate the given submission has the right headers.
|
|
37
36
|
It is recommended to use one of the following functions instead of this one:
|
|
@@ -42,7 +41,6 @@ def _validate_headers(
|
|
|
42
41
|
submission -- pandas DataFrame of the submission
|
|
43
42
|
expected_id_cols -- list of expected id columns
|
|
44
43
|
expected_pred_cols -- list of expected prediction columns
|
|
45
|
-
other_cols -- optional list of other columns that can be present in the submission
|
|
46
44
|
|
|
47
45
|
Return Tuple[str, str]:
|
|
48
46
|
- string name of the id column
|
|
@@ -53,13 +51,6 @@ def _validate_headers(
|
|
|
53
51
|
for ticker_col in expected_id_cols
|
|
54
52
|
for signal_col in expected_pred_cols
|
|
55
53
|
]
|
|
56
|
-
if other_cols is not None:
|
|
57
|
-
expected_headers += [
|
|
58
|
-
[ticker_col, signal_col, other_col]
|
|
59
|
-
for ticker_col in expected_id_cols
|
|
60
|
-
for signal_col in expected_pred_cols
|
|
61
|
-
for other_col in other_cols
|
|
62
|
-
]
|
|
63
54
|
columns = submission.columns
|
|
64
55
|
valid_headers = list(columns) in expected_headers
|
|
65
56
|
assert valid_headers, (
|
|
@@ -77,13 +68,30 @@ def validate_headers_numerai(submission: pd.DataFrame) -> Tuple[str, str]:
|
|
|
77
68
|
)
|
|
78
69
|
|
|
79
70
|
|
|
80
|
-
def validate_headers_signals(
|
|
81
|
-
|
|
71
|
+
def validate_headers_signals(
|
|
72
|
+
submission: pd.DataFrame, assert_date_col: bool = False
|
|
73
|
+
) -> Tuple[str, str, Optional[str]]:
|
|
74
|
+
# remove date columns if they exist and store them temporarily
|
|
75
|
+
date_col_name: Optional[str] = None
|
|
76
|
+
date_col: Optional[pd.Series] = None
|
|
77
|
+
for col in submission.columns:
|
|
78
|
+
if col in SIGNALS_ALLOWED_DATE_COLS:
|
|
79
|
+
date_col_name = col
|
|
80
|
+
date_col = submission[date_col_name].copy()
|
|
81
|
+
submission = submission.drop(columns=date_col_name, errors="ignore")
|
|
82
|
+
break
|
|
83
|
+
if assert_date_col:
|
|
84
|
+
assert (
|
|
85
|
+
date_col_name is not None
|
|
86
|
+
), "invalid_submission_headers: submission must contain a date column"
|
|
87
|
+
ticker_col, signal_col = _validate_headers(
|
|
82
88
|
submission,
|
|
83
89
|
SIGNALS_ALLOWED_ID_COLS,
|
|
84
90
|
SIGNALS_ALLOWED_PRED_COLS,
|
|
85
|
-
SIGNALS_ALLOWED_DATE_COLS,
|
|
86
91
|
)
|
|
92
|
+
if date_col is not None:
|
|
93
|
+
submission[date_col_name] = date_col
|
|
94
|
+
return ticker_col, signal_col, date_col_name
|
|
87
95
|
|
|
88
96
|
|
|
89
97
|
def validate_headers_crypto(submission: pd.DataFrame) -> Tuple[str, str]:
|
|
@@ -198,8 +206,8 @@ def validate_submission_numerai(
|
|
|
198
206
|
|
|
199
207
|
|
|
200
208
|
def validate_submission_signals(
|
|
201
|
-
universe: pd.DataFrame, submission: pd.DataFrame
|
|
202
|
-
) -> Tuple[str, str, pd.DataFrame, List[str]]:
|
|
209
|
+
universe: pd.DataFrame, submission: pd.DataFrame, assert_date_col: bool = False
|
|
210
|
+
) -> Tuple[str, str, Optional[str], pd.DataFrame, List[str]]:
|
|
203
211
|
"""Validate the headers, ids, and values for a submission.
|
|
204
212
|
|
|
205
213
|
Arguments:
|
|
@@ -216,13 +224,19 @@ def validate_submission_signals(
|
|
|
216
224
|
"data_type column found in Signals submission. This is deprecated and support will be removed in the future. "
|
|
217
225
|
"Please remove the data_type column from your Signals submission."
|
|
218
226
|
)
|
|
219
|
-
submission
|
|
220
|
-
|
|
227
|
+
submission.drop(
|
|
228
|
+
columns=["data_type"],
|
|
229
|
+
errors="ignore",
|
|
230
|
+
inplace=True,
|
|
231
|
+
)
|
|
232
|
+
ticker_col, signal_col, date_col = validate_headers_signals(
|
|
233
|
+
submission, assert_date_col
|
|
234
|
+
)
|
|
221
235
|
filtered_sub, invalid_tickers = validate_ids_signals(
|
|
222
236
|
universe[ticker_col], submission, ticker_col
|
|
223
237
|
)
|
|
224
238
|
validate_values(filtered_sub, signal_col)
|
|
225
|
-
return ticker_col, signal_col, filtered_sub, invalid_tickers
|
|
239
|
+
return ticker_col, signal_col, date_col, filtered_sub, invalid_tickers
|
|
226
240
|
|
|
227
241
|
|
|
228
242
|
def validate_submission_crypto(
|
|
@@ -238,7 +252,6 @@ def validate_submission_crypto(
|
|
|
238
252
|
Tuple[str, str, pd.DataFrame, List[str]] - the validated ticker column, signal column,
|
|
239
253
|
filtered submission, and list of invalid tickers
|
|
240
254
|
"""
|
|
241
|
-
print(universe)
|
|
242
255
|
ticker_col, signal_col = validate_headers_crypto(submission)
|
|
243
256
|
filtered_sub, invalid_tickers = validate_ids_crypto(
|
|
244
257
|
universe[ticker_col], submission, ticker_col
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "numerai-tools"
|
|
3
|
-
version = "0.5.0.
|
|
3
|
+
version = "0.5.0.dev7"
|
|
4
4
|
description = "A collection of open-source tools to help interact with Numerai, model data, and automate submissions."
|
|
5
5
|
authors = [
|
|
6
6
|
{name = "Numerai Engineering",email = "engineering@numer.ai"}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|