numerai-tools 0.5.0.dev5__tar.gz → 0.5.0.dev7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: numerai-tools
3
- Version: 0.5.0.dev5
3
+ Version: 0.5.0.dev7
4
4
  Summary: A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
5
5
  License: MIT
6
6
  Author: Numerai Engineering
@@ -66,9 +66,6 @@ def turnover(
66
66
  Arguments:
67
67
  s1: pd.Series - the first series to compare
68
68
  s2: pd.Series - the second series to compare
69
- top_bottom: Optional[int] - the number of top and bottom predictions to use
70
- when calculating the correlation. Results in
71
- 2*top_bottom predictions.
72
69
 
73
70
  Returns:
74
71
  float - the turnover between the two series
@@ -110,6 +107,7 @@ def calculate_max_churn_and_turnover(
110
107
  (
111
108
  curr_ticker_col,
112
109
  curr_signal_col,
110
+ _,
113
111
  curr_sub,
114
112
  _,
115
113
  ) = validate_submission_signals(
@@ -126,7 +124,10 @@ def calculate_max_churn_and_turnover(
126
124
  churn_stats = []
127
125
  turnover_stats = []
128
126
  neutralized_weights = generate_neutralized_weights(
129
- curr_sub_vector.to_frame(), curr_neutralizer, curr_weight
127
+ curr_sub_vector.to_frame(),
128
+ curr_neutralizer,
129
+ curr_weight,
130
+ center_and_normalize=True,
130
131
  )
131
132
  for datestamp in prev_week_subs:
132
133
  prev_sub = prev_week_subs[datestamp]
@@ -135,6 +136,7 @@ def calculate_max_churn_and_turnover(
135
136
  (
136
137
  prev_ticker_col,
137
138
  prev_signal_col,
139
+ _,
138
140
  prev_sub,
139
141
  _,
140
142
  ) = validate_submission_signals(
@@ -167,7 +169,10 @@ def calculate_max_churn_and_turnover(
167
169
  curr_ticker_col,
168
170
  ).set_index(curr_ticker_col)[prev_weight.name]
169
171
  prev_neutralized_weights = generate_neutralized_weights(
170
- filtered_prev_sub.to_frame(), prev_neutralizer, prev_weight
172
+ filtered_prev_sub.to_frame(),
173
+ prev_neutralizer,
174
+ prev_weight,
175
+ center_and_normalize=True,
171
176
  )
172
177
  try:
173
178
  churn_val = abs(churn(curr_sub_vector, filtered_prev_sub))
@@ -31,7 +31,6 @@ def _validate_headers(
31
31
  submission: pd.DataFrame,
32
32
  expected_id_cols: List[str],
33
33
  expected_pred_cols: List[str],
34
- other_cols: Optional[List[str]] = None,
35
34
  ) -> Tuple[str, str]:
36
35
  """Validate the given submission has the right headers.
37
36
  It is recommended to use one of the following functions instead of this one:
@@ -42,7 +41,6 @@ def _validate_headers(
42
41
  submission -- pandas DataFrame of the submission
43
42
  expected_id_cols -- list of expected id columns
44
43
  expected_pred_cols -- list of expected prediction columns
45
- other_cols -- optional list of other columns that can be present in the submission
46
44
 
47
45
  Return Tuple[str, str]:
48
46
  - string name of the id column
@@ -53,13 +51,6 @@ def _validate_headers(
53
51
  for ticker_col in expected_id_cols
54
52
  for signal_col in expected_pred_cols
55
53
  ]
56
- if other_cols is not None:
57
- expected_headers += [
58
- [ticker_col, signal_col, other_col]
59
- for ticker_col in expected_id_cols
60
- for signal_col in expected_pred_cols
61
- for other_col in other_cols
62
- ]
63
54
  columns = submission.columns
64
55
  valid_headers = list(columns) in expected_headers
65
56
  assert valid_headers, (
@@ -77,13 +68,30 @@ def validate_headers_numerai(submission: pd.DataFrame) -> Tuple[str, str]:
77
68
  )
78
69
 
79
70
 
80
- def validate_headers_signals(submission: pd.DataFrame) -> Tuple[str, str]:
81
- return _validate_headers(
71
+ def validate_headers_signals(
72
+ submission: pd.DataFrame, assert_date_col: bool = False
73
+ ) -> Tuple[str, str, Optional[str]]:
74
+ # remove date columns if they exist and store them temporarily
75
+ date_col_name: Optional[str] = None
76
+ date_col: Optional[pd.Series] = None
77
+ for col in submission.columns:
78
+ if col in SIGNALS_ALLOWED_DATE_COLS:
79
+ date_col_name = col
80
+ date_col = submission[date_col_name].copy()
81
+ submission = submission.drop(columns=date_col_name, errors="ignore")
82
+ break
83
+ if assert_date_col:
84
+ assert (
85
+ date_col_name is not None
86
+ ), "invalid_submission_headers: submission must contain a date column"
87
+ ticker_col, signal_col = _validate_headers(
82
88
  submission,
83
89
  SIGNALS_ALLOWED_ID_COLS,
84
90
  SIGNALS_ALLOWED_PRED_COLS,
85
- SIGNALS_ALLOWED_DATE_COLS,
86
91
  )
92
+ if date_col is not None:
93
+ submission[date_col_name] = date_col
94
+ return ticker_col, signal_col, date_col_name
87
95
 
88
96
 
89
97
  def validate_headers_crypto(submission: pd.DataFrame) -> Tuple[str, str]:
@@ -198,8 +206,8 @@ def validate_submission_numerai(
198
206
 
199
207
 
200
208
  def validate_submission_signals(
201
- universe: pd.DataFrame, submission: pd.DataFrame
202
- ) -> Tuple[str, str, pd.DataFrame, List[str]]:
209
+ universe: pd.DataFrame, submission: pd.DataFrame, assert_date_col: bool = False
210
+ ) -> Tuple[str, str, Optional[str], pd.DataFrame, List[str]]:
203
211
  """Validate the headers, ids, and values for a submission.
204
212
 
205
213
  Arguments:
@@ -216,13 +224,19 @@ def validate_submission_signals(
216
224
  "data_type column found in Signals submission. This is deprecated and support will be removed in the future. "
217
225
  "Please remove the data_type column from your Signals submission."
218
226
  )
219
- submission = submission.drop(columns=["data_type"], errors="ignore")
220
- ticker_col, signal_col = validate_headers_signals(submission)
227
+ submission.drop(
228
+ columns=["data_type"],
229
+ errors="ignore",
230
+ inplace=True,
231
+ )
232
+ ticker_col, signal_col, date_col = validate_headers_signals(
233
+ submission, assert_date_col
234
+ )
221
235
  filtered_sub, invalid_tickers = validate_ids_signals(
222
236
  universe[ticker_col], submission, ticker_col
223
237
  )
224
238
  validate_values(filtered_sub, signal_col)
225
- return ticker_col, signal_col, filtered_sub, invalid_tickers
239
+ return ticker_col, signal_col, date_col, filtered_sub, invalid_tickers
226
240
 
227
241
 
228
242
  def validate_submission_crypto(
@@ -238,7 +252,6 @@ def validate_submission_crypto(
238
252
  Tuple[str, str, pd.DataFrame, List[str]] - the validated ticker column, signal column,
239
253
  filtered submission, and list of invalid tickers
240
254
  """
241
- print(universe)
242
255
  ticker_col, signal_col = validate_headers_crypto(submission)
243
256
  filtered_sub, invalid_tickers = validate_ids_crypto(
244
257
  universe[ticker_col], submission, ticker_col
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "numerai-tools"
3
- version = "0.5.0.dev5"
3
+ version = "0.5.0.dev7"
4
4
  description = "A collection of open-source tools to help interact with Numerai, model data, and automate submissions."
5
5
  authors = [
6
6
  {name = "Numerai Engineering",email = "engineering@numer.ai"}