numerai-tools 0.5.0.dev5__tar.gz → 0.5.0.dev6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: numerai-tools
3
- Version: 0.5.0.dev5
3
+ Version: 0.5.0.dev6
4
4
  Summary: A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
5
5
  License: MIT
6
6
  Author: Numerai Engineering
@@ -66,9 +66,6 @@ def turnover(
66
66
  Arguments:
67
67
  s1: pd.Series - the first series to compare
68
68
  s2: pd.Series - the second series to compare
69
- top_bottom: Optional[int] - the number of top and bottom predictions to use
70
- when calculating the correlation. Results in
71
- 2*top_bottom predictions.
72
69
 
73
70
  Returns:
74
71
  float - the turnover between the two series
@@ -126,7 +123,10 @@ def calculate_max_churn_and_turnover(
126
123
  churn_stats = []
127
124
  turnover_stats = []
128
125
  neutralized_weights = generate_neutralized_weights(
129
- curr_sub_vector.to_frame(), curr_neutralizer, curr_weight
126
+ curr_sub_vector.to_frame(),
127
+ curr_neutralizer,
128
+ curr_weight,
129
+ center_and_normalize=True,
130
130
  )
131
131
  for datestamp in prev_week_subs:
132
132
  prev_sub = prev_week_subs[datestamp]
@@ -167,7 +167,10 @@ def calculate_max_churn_and_turnover(
167
167
  curr_ticker_col,
168
168
  ).set_index(curr_ticker_col)[prev_weight.name]
169
169
  prev_neutralized_weights = generate_neutralized_weights(
170
- filtered_prev_sub.to_frame(), prev_neutralizer, prev_weight
170
+ filtered_prev_sub.to_frame(),
171
+ prev_neutralizer,
172
+ prev_weight,
173
+ center_and_normalize=True,
171
174
  )
172
175
  try:
173
176
  churn_val = abs(churn(curr_sub_vector, filtered_prev_sub))
@@ -31,7 +31,6 @@ def _validate_headers(
31
31
  submission: pd.DataFrame,
32
32
  expected_id_cols: List[str],
33
33
  expected_pred_cols: List[str],
34
- other_cols: Optional[List[str]] = None,
35
34
  ) -> Tuple[str, str]:
36
35
  """Validate the given submission has the right headers.
37
36
  It is recommended to use one of the following functions instead of this one:
@@ -42,7 +41,6 @@ def _validate_headers(
42
41
  submission -- pandas DataFrame of the submission
43
42
  expected_id_cols -- list of expected id columns
44
43
  expected_pred_cols -- list of expected prediction columns
45
- other_cols -- optional list of other columns that can be present in the submission
46
44
 
47
45
  Return Tuple[str, str]:
48
46
  - string name of the id column
@@ -53,13 +51,6 @@ def _validate_headers(
53
51
  for ticker_col in expected_id_cols
54
52
  for signal_col in expected_pred_cols
55
53
  ]
56
- if other_cols is not None:
57
- expected_headers += [
58
- [ticker_col, signal_col, other_col]
59
- for ticker_col in expected_id_cols
60
- for signal_col in expected_pred_cols
61
- for other_col in other_cols
62
- ]
63
54
  columns = submission.columns
64
55
  valid_headers = list(columns) in expected_headers
65
56
  assert valid_headers, (
@@ -77,13 +68,30 @@ def validate_headers_numerai(submission: pd.DataFrame) -> Tuple[str, str]:
77
68
  )
78
69
 
79
70
 
80
- def validate_headers_signals(submission: pd.DataFrame) -> Tuple[str, str]:
81
- return _validate_headers(
71
+ def validate_headers_signals(
72
+ submission: pd.DataFrame, assert_date_col: bool = False
73
+ ) -> Tuple[str, str, Optional[str]]:
74
+ # remove date columns if they exist and store them temporarily
75
+ date_col_name: Optional[str] = None
76
+ date_col: Optional[pd.Series] = None
77
+ for col in submission.columns:
78
+ if col in SIGNALS_ALLOWED_DATE_COLS:
79
+ date_col_name = col
80
+ date_col = submission[date_col_name].copy()
81
+ submission = submission.drop(columns=date_col_name, errors="ignore")
82
+ break
83
+ if assert_date_col:
84
+ assert (
85
+ date_col_name is not None
86
+ ), "invalid_submission_headers: submission must contain a date column"
87
+ ticker_col, signal_col = _validate_headers(
82
88
  submission,
83
89
  SIGNALS_ALLOWED_ID_COLS,
84
90
  SIGNALS_ALLOWED_PRED_COLS,
85
- SIGNALS_ALLOWED_DATE_COLS,
86
91
  )
92
+ if date_col is not None:
93
+ submission[date_col_name] = date_col
94
+ return ticker_col, signal_col, date_col_name
87
95
 
88
96
 
89
97
  def validate_headers_crypto(submission: pd.DataFrame) -> Tuple[str, str]:
@@ -217,7 +225,7 @@ def validate_submission_signals(
217
225
  "Please remove the data_type column from your Signals submission."
218
226
  )
219
227
  submission = submission.drop(columns=["data_type"], errors="ignore")
220
- ticker_col, signal_col = validate_headers_signals(submission)
228
+ ticker_col, signal_col, _ = validate_headers_signals(submission)
221
229
  filtered_sub, invalid_tickers = validate_ids_signals(
222
230
  universe[ticker_col], submission, ticker_col
223
231
  )
@@ -238,7 +246,6 @@ def validate_submission_crypto(
238
246
  Tuple[str, str, pd.DataFrame, List[str]] - the validated ticker column, signal column,
239
247
  filtered submission, and list of invalid tickers
240
248
  """
241
- print(universe)
242
249
  ticker_col, signal_col = validate_headers_crypto(submission)
243
250
  filtered_sub, invalid_tickers = validate_ids_crypto(
244
251
  universe[ticker_col], submission, ticker_col
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "numerai-tools"
3
- version = "0.5.0.dev5"
3
+ version = "0.5.0.dev6"
4
4
  description = "A collection of open-source tools to help interact with Numerai, model data, and automate submissions."
5
5
  authors = [
6
6
  {name = "Numerai Engineering",email = "engineering@numer.ai"}