numerai-tools 0.4.3__tar.gz → 0.5.0.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/PKG-INFO +1 -1
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/numerai_tools/scoring.py +53 -43
- numerai_tools-0.5.0.dev1/numerai_tools/signals.py +217 -0
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/numerai_tools/submissions.py +23 -5
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/numerai_tools.egg-info/PKG-INFO +1 -1
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/setup.py +1 -1
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/tests/test_scoring.py +2 -2
- numerai_tools-0.5.0.dev1/tests/test_signals.py +139 -0
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/tests/test_submissions.py +13 -1
- numerai_tools-0.4.3/numerai_tools/signals.py +0 -72
- numerai_tools-0.4.3/tests/test_signals.py +0 -51
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/LICENSE +0 -0
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/README.md +0 -0
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/numerai_tools/__init__.py +0 -0
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/numerai_tools/py.typed +0 -0
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/numerai_tools.egg-info/SOURCES.txt +0 -0
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/numerai_tools.egg-info/dependency_links.txt +0 -0
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/numerai_tools.egg-info/requires.txt +0 -0
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/numerai_tools.egg-info/top_level.txt +0 -0
- {numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: numerai_tools
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0.dev1
|
|
4
4
|
Summary: A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
|
|
5
5
|
Home-page: https://github.com/numerai/numerai-tools
|
|
6
6
|
Maintainer: Numerai
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from typing import List, Tuple, Union, Optional, TypeVar
|
|
1
|
+
from typing import List, Tuple, Union, Optional, TypeVar, cast, Any
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
|
-
import pandas as pd
|
|
5
|
-
from scipy import stats
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from scipy import stats
|
|
6
6
|
from sklearn.preprocessing import OneHotEncoder # type: ignore
|
|
7
7
|
|
|
8
8
|
|
|
@@ -43,12 +43,13 @@ def filter_sort_index(
|
|
|
43
43
|
"s2 does not have enough overlapping ids with s1,"
|
|
44
44
|
f" must have >= {round(1-max_filtered_ratio,2)*100}% overlapping ids"
|
|
45
45
|
)
|
|
46
|
-
return s1.loc[ids].sort_index(), s2.loc[ids].sort_index()
|
|
46
|
+
return cast(S1, s1.loc[ids].sort_index()), cast(S2, s2.loc[ids].sort_index())
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
def filter_sort_index_many(
|
|
50
|
-
inputs: List[
|
|
51
|
-
|
|
50
|
+
inputs: List[Any],
|
|
51
|
+
max_filtered_ratio: float = DEFAULT_MAX_FILTERED_INDEX_RATIO,
|
|
52
|
+
) -> List[Any]:
|
|
52
53
|
"""Filters the indices of the given list of series to match each other,
|
|
53
54
|
then sorts the indices, then checks that we didn't filter too many indices
|
|
54
55
|
before returning the filtered and sorted series.
|
|
@@ -74,25 +75,38 @@ def filter_sort_index_many(
|
|
|
74
75
|
|
|
75
76
|
|
|
76
77
|
def filter_sort_top_bottom(
|
|
77
|
-
s: pd.Series, top_bottom: int
|
|
78
|
-
) ->
|
|
78
|
+
s: pd.Series, top_bottom: int
|
|
79
|
+
) -> Tuple[pd.Series, pd.Series]:
|
|
79
80
|
"""Filters the series according to the top n and bottom n values
|
|
80
|
-
then sorts the index and returns
|
|
81
|
+
then sorts the index and returns two filtered and sorted series
|
|
82
|
+
for the top and bottom values respectively.
|
|
81
83
|
|
|
82
84
|
Arguments:
|
|
83
85
|
s: pd.Series - the data to filter and sort
|
|
84
86
|
top_bottom: int - the number of top n and bottom n values to keep
|
|
85
87
|
|
|
86
88
|
Returns:
|
|
87
|
-
pd.Series - the filtered and sorted
|
|
89
|
+
Tuple[pd.Series, pd.Series] - the filtered and sorted top and bottom series respectively
|
|
88
90
|
"""
|
|
89
91
|
tb_idx = np.argsort(s, kind="stable")
|
|
90
92
|
bot = s.iloc[tb_idx[:top_bottom]]
|
|
91
93
|
top = s.iloc[tb_idx[-top_bottom:]]
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
94
|
+
return top.sort_index(), bot.sort_index()
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def filter_sort_top_bottom_concat(s: pd.Series, top_bottom: int) -> pd.Series:
|
|
98
|
+
"""Similar to filter_sort_top_bottom, but concatenates the top and bottom series
|
|
99
|
+
into 1 series and then sorts the index.
|
|
100
|
+
|
|
101
|
+
Arguments:
|
|
102
|
+
s: pd.Series - the data to filter and sort
|
|
103
|
+
top_bottom: int - the number of top n and bottom n values to keep
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
pd.Series - the concatenated and sorted series of top and bottom values
|
|
107
|
+
"""
|
|
108
|
+
top, bot = filter_sort_top_bottom(s, top_bottom)
|
|
109
|
+
return pd.concat([top, bot]).sort_index()
|
|
96
110
|
|
|
97
111
|
|
|
98
112
|
def rank(df: pd.DataFrame, method: str = "average") -> pd.DataFrame:
|
|
@@ -133,14 +147,14 @@ def variance_normalize(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
133
147
|
return df / np.std(df, axis=0)
|
|
134
148
|
|
|
135
149
|
|
|
136
|
-
def weight_normalize(
|
|
137
|
-
"""Scale a
|
|
138
|
-
return
|
|
150
|
+
def weight_normalize(s: S1) -> S1:
|
|
151
|
+
"""Scale a input such that all columns have absolute value sum == 1."""
|
|
152
|
+
return cast(S1, s / s.abs().sum(axis=0))
|
|
139
153
|
|
|
140
154
|
|
|
141
|
-
def center(
|
|
142
|
-
"""Shift the
|
|
143
|
-
return
|
|
155
|
+
def center(s: S1) -> S1:
|
|
156
|
+
"""Shift the input such that all columns have mean == 0."""
|
|
157
|
+
return cast(S1, s - s.mean())
|
|
144
158
|
|
|
145
159
|
|
|
146
160
|
def standardize(df: pd.DataFrame) -> pd.DataFrame:
|
|
@@ -179,7 +193,7 @@ def pearson_correlation(
|
|
|
179
193
|
target: pd.Series, predictions: pd.Series, top_bottom: Optional[int] = None
|
|
180
194
|
) -> float:
|
|
181
195
|
if top_bottom is not None and top_bottom > 0:
|
|
182
|
-
predictions =
|
|
196
|
+
predictions = filter_sort_top_bottom_concat(predictions, top_bottom)
|
|
183
197
|
target, predictions = filter_sort_index(
|
|
184
198
|
target, predictions, (1 - top_bottom / len(target))
|
|
185
199
|
)
|
|
@@ -205,7 +219,7 @@ def power(df: pd.DataFrame, p: float) -> pd.DataFrame:
|
|
|
205
219
|
"""
|
|
206
220
|
assert not df.isna().any().any(), "Data contains NaNs"
|
|
207
221
|
assert np.array_equal(df.index.sort_values(), df.index), "Index is not sorted"
|
|
208
|
-
result = np.sign(df) * np.abs(df) ** p
|
|
222
|
+
result = cast(pd.DataFrame, np.sign(df) * np.abs(df) ** p)
|
|
209
223
|
assert ((result.std() == 0) | (result.corrwith(df) >= 0.9)).all()
|
|
210
224
|
return result
|
|
211
225
|
|
|
@@ -221,7 +235,7 @@ def gaussian(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
221
235
|
pd.DataFrame - the gaussianized data
|
|
222
236
|
"""
|
|
223
237
|
assert np.array_equal(df.index.sort_values(), df.index)
|
|
224
|
-
return df.apply(lambda series: stats.norm.ppf(series))
|
|
238
|
+
return df.apply(lambda series: cast(np.ndarray, stats.norm.ppf(series)))
|
|
225
239
|
|
|
226
240
|
|
|
227
241
|
def orthogonalize(v: np.ndarray, u: np.ndarray) -> np.ndarray:
|
|
@@ -303,7 +317,7 @@ def correlation_contribution(
|
|
|
303
317
|
m = gaussian(tie_kept_rank(meta_model.to_frame()))[meta_model.name].values
|
|
304
318
|
|
|
305
319
|
# orthogonalize predictions wrt meta model
|
|
306
|
-
neutral_preds = orthogonalize(p, m)
|
|
320
|
+
neutral_preds = orthogonalize(p, cast(np.ndarray, m))
|
|
307
321
|
|
|
308
322
|
# convert target to buckets [-2, -1, 0, 1, 2]
|
|
309
323
|
if (live_targets >= 0).all() and (live_targets <= 1).all():
|
|
@@ -314,9 +328,9 @@ def correlation_contribution(
|
|
|
314
328
|
# filter each column to its top and bottom n predictions
|
|
315
329
|
neutral_preds_df = pd.DataFrame(
|
|
316
330
|
neutral_preds, columns=predictions.columns, index=predictions.index
|
|
317
|
-
).apply(lambda p:
|
|
318
|
-
|
|
319
|
-
|
|
331
|
+
).apply(lambda p: filter_sort_top_bottom_concat(p, top_bottom))
|
|
332
|
+
mmc_matrix = (
|
|
333
|
+
# create a dataframe for targets to match the filtered predictions
|
|
320
334
|
neutral_preds_df.apply(
|
|
321
335
|
lambda p: filter_sort_index(
|
|
322
336
|
p,
|
|
@@ -326,19 +340,15 @@ def correlation_contribution(
|
|
|
326
340
|
)
|
|
327
341
|
.fillna(0)
|
|
328
342
|
.T.values
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
# multiply target and neutralized predictions
|
|
334
|
-
# this is equivalent to covariance b/c mean = 0
|
|
335
|
-
mmc = live_targets @ neutral_preds
|
|
336
|
-
if top_bottom is not None and top_bottom > 0:
|
|
343
|
+
# then fill NaNs with 0 so we don't get NaNs in the dot product
|
|
344
|
+
# and mutiply target w/ neutral preds to get MMC
|
|
345
|
+
) @ neutral_preds_df.fillna(0).values
|
|
337
346
|
# only the diagonal is the proper score
|
|
338
|
-
mmc = np.diag(
|
|
347
|
+
mmc = np.diag(mmc_matrix) / (top_bottom * 2)
|
|
339
348
|
else:
|
|
340
|
-
|
|
341
|
-
|
|
349
|
+
# multiply target and neutralized predictions
|
|
350
|
+
# this is equivalent to covariance b/c mean = 0
|
|
351
|
+
mmc = (live_targets @ neutral_preds) / len(live_targets)
|
|
342
352
|
return pd.Series(mmc, index=predictions.columns)
|
|
343
353
|
|
|
344
354
|
|
|
@@ -522,10 +532,10 @@ def max_feature_correlation(
|
|
|
522
532
|
feature_correlations = features.apply(
|
|
523
533
|
lambda f: pearson_correlation(f, s, top_bottom)
|
|
524
534
|
)
|
|
525
|
-
feature_correlations =
|
|
535
|
+
feature_correlations = feature_correlations.abs()
|
|
526
536
|
max_feature = feature_correlations.idxmax()
|
|
527
537
|
max_corr = feature_correlations[max_feature]
|
|
528
|
-
return max_feature, max_corr
|
|
538
|
+
return str(max_feature), max_corr
|
|
529
539
|
|
|
530
540
|
|
|
531
541
|
def generate_neutralized_weights(
|
|
@@ -608,9 +618,9 @@ def meta_portfolio_contribution(
|
|
|
608
618
|
s_prime, neutralizers, sample_weights
|
|
609
619
|
)
|
|
610
620
|
)
|
|
611
|
-
w = weights[stakes.index].values
|
|
612
|
-
s = stake_weights.values
|
|
613
|
-
t = targets.values
|
|
621
|
+
w = cast(np.ndarray, weights[stakes.index].values)
|
|
622
|
+
s = cast(np.ndarray, stake_weights.values)
|
|
623
|
+
t = cast(np.ndarray, targets.values)
|
|
614
624
|
swp = w @ s
|
|
615
625
|
swp = swp - swp.mean()
|
|
616
626
|
l1_norm = np.sum(np.abs(swp))
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
from typing import Tuple, Optional
|
|
2
|
+
|
|
3
|
+
from numerai_tools.submissions import validate_headers_signals, validate_ids_signals
|
|
4
|
+
from numerai_tools.scoring import (
|
|
5
|
+
filter_sort_index,
|
|
6
|
+
filter_sort_top_bottom,
|
|
7
|
+
spearman_correlation,
|
|
8
|
+
tie_kept_rank,
|
|
9
|
+
tie_kept_rank__gaussianize__pow_1_5,
|
|
10
|
+
filter_sort_index_many,
|
|
11
|
+
generate_neutralized_weights,
|
|
12
|
+
weight_normalize,
|
|
13
|
+
center,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def churn(
|
|
20
|
+
s1: pd.Series,
|
|
21
|
+
s2: pd.Series,
|
|
22
|
+
top_bottom: Optional[int] = None,
|
|
23
|
+
) -> float:
|
|
24
|
+
"""Calculate the churn between two series. Churn is the proportion of elements
|
|
25
|
+
that are different between the two series.
|
|
26
|
+
|
|
27
|
+
For 2 given series with overlapping indices, churn is 1 - Spearman Correlation.
|
|
28
|
+
If top_bottom is provided, the churn is calculated as the average of the % of
|
|
29
|
+
tickers that stay in the top and bottom predictions. This is only relevant when
|
|
30
|
+
the series are rank signals and not portfolio weights.
|
|
31
|
+
|
|
32
|
+
Arguments:
|
|
33
|
+
s1: pd.Series - the first series to compare
|
|
34
|
+
s2: pd.Series - the second series to compare
|
|
35
|
+
top_bottom: Optional[int] - the number of top and bottom predictions to use
|
|
36
|
+
when calculating the correlation. Results in
|
|
37
|
+
2*top_bottom predictions.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
float - the churn between the two series
|
|
41
|
+
"""
|
|
42
|
+
if top_bottom is not None and top_bottom > 0:
|
|
43
|
+
s1_top, s1_bot = filter_sort_top_bottom(s1, top_bottom)
|
|
44
|
+
s2_top, s2_bot = filter_sort_top_bottom(s2, top_bottom)
|
|
45
|
+
top_overlap = len(s1_top.index.intersection(s2_top.index)) / top_bottom
|
|
46
|
+
bot_overlap = len(s1_bot.index.intersection(s2_bot.index)) / top_bottom
|
|
47
|
+
avg_overlap = (top_overlap + bot_overlap) / 2
|
|
48
|
+
return 1 - avg_overlap
|
|
49
|
+
|
|
50
|
+
s1, s2 = filter_sort_index(s1, s2)
|
|
51
|
+
assert s1.std() > 0, "s1 must have non-zero standard deviation"
|
|
52
|
+
assert s2.std() > 0, "s2 must have non-zero standard deviation"
|
|
53
|
+
return 1 - spearman_correlation(s1, s2)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def turnover(
|
|
57
|
+
s1: pd.Series,
|
|
58
|
+
s2: pd.Series,
|
|
59
|
+
):
|
|
60
|
+
"""Calculate the turnover between two series. Turnover is the total change in weights between
|
|
61
|
+
the two series divided by 2.
|
|
62
|
+
|
|
63
|
+
For 2 given series with overlapping indices, join the series on index, fill nans with zeroes
|
|
64
|
+
and calculate turnover as the absolute total difference between the two series divided by 2.
|
|
65
|
+
This is only relevant when the series are portfolio weights and not rank signals.
|
|
66
|
+
|
|
67
|
+
Arguments:
|
|
68
|
+
s1: pd.Series - the first series to compare
|
|
69
|
+
s2: pd.Series - the second series to compare
|
|
70
|
+
top_bottom: Optional[int] - the number of top and bottom predictions to use
|
|
71
|
+
when calculating the correlation. Results in
|
|
72
|
+
2*top_bottom predictions.
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
float - the turnover between the two series
|
|
76
|
+
"""
|
|
77
|
+
s1, s2 = filter_sort_index(s1, s2)
|
|
78
|
+
turnover = (s1 - s2).abs().sum() / 2
|
|
79
|
+
return turnover
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def neutral_weight(
|
|
83
|
+
submission: pd.Series,
|
|
84
|
+
signal_col: str,
|
|
85
|
+
neutralizer: pd.DataFrame,
|
|
86
|
+
weight: pd.Series,
|
|
87
|
+
) -> pd.Series:
|
|
88
|
+
s_prime = tie_kept_rank__gaussianize__pow_1_5(submission.to_frame())
|
|
89
|
+
s_prime, neutralizer, weight = filter_sort_index_many(
|
|
90
|
+
[s_prime, neutralizer, weight]
|
|
91
|
+
)
|
|
92
|
+
neutral_weights = generate_neutralized_weights(
|
|
93
|
+
s_prime[signal_col], neutralizer, weight
|
|
94
|
+
)
|
|
95
|
+
neutral_weights = weight_normalize(center(neutral_weights.to_frame()))[0]
|
|
96
|
+
return neutral_weights.sort_index()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def remap_ticker_col(
|
|
100
|
+
predictions: pd.DataFrame,
|
|
101
|
+
universe: pd.DataFrame,
|
|
102
|
+
ticker_col: str,
|
|
103
|
+
) -> pd.DataFrame:
|
|
104
|
+
return (
|
|
105
|
+
predictions.join(universe, how="right")
|
|
106
|
+
.reset_index()
|
|
107
|
+
.set_index(ticker_col)
|
|
108
|
+
.sort_index()
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def rank_and_fill_signal(
|
|
113
|
+
universe: pd.DataFrame,
|
|
114
|
+
submission: pd.Series,
|
|
115
|
+
signal_col: str,
|
|
116
|
+
) -> pd.Series:
|
|
117
|
+
uni_joined_sub = universe.sort_index().join(
|
|
118
|
+
tie_kept_rank(submission.sort_index().to_frame())
|
|
119
|
+
)[[signal_col]]
|
|
120
|
+
filled_sub = uni_joined_sub.fillna(uni_joined_sub.median()).sort_index()
|
|
121
|
+
return filled_sub[signal_col]
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def calculate_max_churn_and_turnover(
|
|
125
|
+
curr_sub: pd.DataFrame,
|
|
126
|
+
curr_neutralizer: pd.DataFrame,
|
|
127
|
+
curr_weight: pd.Series,
|
|
128
|
+
prev_week_subs: dict[str, pd.DataFrame],
|
|
129
|
+
prev_neutralizers: dict[str, pd.DataFrame],
|
|
130
|
+
prev_sample_weights: dict[str, pd.Series],
|
|
131
|
+
universe: pd.DataFrame,
|
|
132
|
+
curr_signal_col: str,
|
|
133
|
+
curr_ticker_col: str,
|
|
134
|
+
) -> Tuple[float, float]:
|
|
135
|
+
"""Calculate the maximum churn and turnover with respect to previous submissions.
|
|
136
|
+
|
|
137
|
+
Arguments:
|
|
138
|
+
curr_sub -- the current submission
|
|
139
|
+
curr_neutralizer -- the neutralizer DataFrame for the current submission
|
|
140
|
+
curr_weight -- the sample weights Series for the current submission
|
|
141
|
+
prev_week_subs -- a dictionary of datestamps to submissions
|
|
142
|
+
prev_neutralizers -- a dictionary of datestamps to neutralizers
|
|
143
|
+
prev_sample_weights -- a dictionary of datestamps to sample weights
|
|
144
|
+
universe -- the internal universe DataFrame
|
|
145
|
+
curr_signal_col -- the column name for signal in the current submission
|
|
146
|
+
curr_ticker_col -- the column name for tickers in the current submission
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
prev_week_max_churn -- the maximum churn from previous submissions
|
|
150
|
+
prev_week_max_turnover -- the maximum turnover from previous submissions
|
|
151
|
+
"""
|
|
152
|
+
curr_sub_vector: pd.Series = rank_and_fill_signal(
|
|
153
|
+
universe,
|
|
154
|
+
curr_sub.reset_index().set_index(curr_ticker_col).sort_index()[curr_signal_col],
|
|
155
|
+
curr_signal_col,
|
|
156
|
+
)
|
|
157
|
+
churn_stats = []
|
|
158
|
+
turnover_stats = []
|
|
159
|
+
neutralized_weights = neutral_weight(
|
|
160
|
+
curr_sub_vector, curr_signal_col, curr_neutralizer, curr_weight
|
|
161
|
+
)
|
|
162
|
+
for datestamp in prev_week_subs:
|
|
163
|
+
prev_sub = prev_week_subs[datestamp]
|
|
164
|
+
prev_neutralizer = prev_neutralizers[datestamp]
|
|
165
|
+
prev_weight = prev_sample_weights[datestamp]
|
|
166
|
+
prev_ticker_col, prev_signal_col = validate_headers_signals(prev_sub)
|
|
167
|
+
prev_universe = universe.reset_index().set_index(prev_ticker_col)
|
|
168
|
+
filtered_prev_sub_df, _ = validate_ids_signals(
|
|
169
|
+
prev_universe.index.to_series(), prev_sub, prev_ticker_col
|
|
170
|
+
)
|
|
171
|
+
# in case the previous submission has a different ticker column,
|
|
172
|
+
# remap the ticker column of prev data to the current ticker column
|
|
173
|
+
filtered_prev_sub = remap_ticker_col(
|
|
174
|
+
filtered_prev_sub_df.set_index(prev_ticker_col),
|
|
175
|
+
universe=prev_universe,
|
|
176
|
+
ticker_col=curr_ticker_col,
|
|
177
|
+
)[curr_signal_col]
|
|
178
|
+
filtered_prev_sub = rank_and_fill_signal(
|
|
179
|
+
universe=universe,
|
|
180
|
+
submission=filtered_prev_sub,
|
|
181
|
+
signal_col=curr_signal_col,
|
|
182
|
+
)
|
|
183
|
+
prev_neutralizer = remap_ticker_col(
|
|
184
|
+
prev_neutralizer,
|
|
185
|
+
universe=prev_universe,
|
|
186
|
+
ticker_col=curr_ticker_col,
|
|
187
|
+
).filter(like="neutralizer_")
|
|
188
|
+
prev_weight = remap_ticker_col(
|
|
189
|
+
prev_weight.to_frame(),
|
|
190
|
+
universe=prev_universe,
|
|
191
|
+
ticker_col=curr_ticker_col,
|
|
192
|
+
)[prev_weight.name]
|
|
193
|
+
prev_neutralized_weights = neutral_weight(
|
|
194
|
+
filtered_prev_sub, prev_signal_col, prev_neutralizer, prev_weight
|
|
195
|
+
)
|
|
196
|
+
try:
|
|
197
|
+
churn_val = abs(churn(curr_sub_vector, filtered_prev_sub))
|
|
198
|
+
except AssertionError as e:
|
|
199
|
+
if "does not have enough overlapping ids" in str(e):
|
|
200
|
+
continue
|
|
201
|
+
try:
|
|
202
|
+
turnover_val = abs(turnover(neutralized_weights, prev_neutralized_weights))
|
|
203
|
+
except AssertionError as e:
|
|
204
|
+
if "does not have enough overlapping ids" in str(e):
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
churn_stats.append(churn_val)
|
|
208
|
+
turnover_stats.append(turnover_val)
|
|
209
|
+
if len(churn_stats) == 0:
|
|
210
|
+
prev_week_max_churn = 1.0
|
|
211
|
+
else:
|
|
212
|
+
prev_week_max_churn = max(churn_stats)
|
|
213
|
+
if len(turnover_stats) == 0:
|
|
214
|
+
prev_week_max_turnover = 1.0
|
|
215
|
+
else:
|
|
216
|
+
prev_week_max_turnover = max(turnover_stats)
|
|
217
|
+
return prev_week_max_churn, prev_week_max_turnover
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from numerai_tools.scoring import tie_kept_rank
|
|
2
2
|
|
|
3
|
+
import logging
|
|
3
4
|
from typing import Tuple, List
|
|
4
5
|
|
|
5
6
|
import pandas as pd
|
|
@@ -16,12 +17,15 @@ SIGNALS_ALLOWED_ID_COLS = [
|
|
|
16
17
|
"numerai_ticker",
|
|
17
18
|
]
|
|
18
19
|
SIGNALS_ALLOWED_PRED_COLS = ["prediction", "signal"]
|
|
20
|
+
SIGNALS_ALLOWED_DATE_COLS = ["friday_date", "date"]
|
|
19
21
|
SIGNALS_MIN_TICKERS = 100
|
|
20
22
|
|
|
21
23
|
CRYPTO_ALLOWED_ID_COLS = ["symbol"]
|
|
22
24
|
CRYPTO_ALLOWED_PRED_COLS = ["prediction", "signal"]
|
|
23
25
|
CRYPTO_MIN_TICKERS = 100
|
|
24
26
|
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
25
29
|
|
|
26
30
|
def _validate_headers(
|
|
27
31
|
expected_id_cols: List[str], expected_pred_cols: List[str], submission: pd.DataFrame
|
|
@@ -58,6 +62,17 @@ def validate_headers_numerai(submission: pd.DataFrame) -> Tuple[str, str]:
|
|
|
58
62
|
|
|
59
63
|
|
|
60
64
|
def validate_headers_signals(submission: pd.DataFrame) -> Tuple[str, str]:
|
|
65
|
+
if "data_type" in submission.columns:
|
|
66
|
+
logger.warning(
|
|
67
|
+
"data_type column found in Signals submission. This is deprecated and will be removed in the future. "
|
|
68
|
+
"Please remove the data_type column from your Signals submission."
|
|
69
|
+
)
|
|
70
|
+
date_col = [
|
|
71
|
+
date_col
|
|
72
|
+
for date_col in SIGNALS_ALLOWED_DATE_COLS
|
|
73
|
+
if date_col in list(submission.columns)
|
|
74
|
+
]
|
|
75
|
+
submission = submission.drop(columns=["data_type", *date_col], errors="ignore")
|
|
61
76
|
return _validate_headers(
|
|
62
77
|
SIGNALS_ALLOWED_ID_COLS, SIGNALS_ALLOWED_PRED_COLS, submission
|
|
63
78
|
)
|
|
@@ -155,7 +170,7 @@ def clean_predictions(
|
|
|
155
170
|
predictions: pd.DataFrame,
|
|
156
171
|
id_col: str,
|
|
157
172
|
rank_and_fill: bool,
|
|
158
|
-
) -> pd.
|
|
173
|
+
) -> pd.DataFrame:
|
|
159
174
|
"""Prepare predictions for submission to Numerai.
|
|
160
175
|
Filters out ids not in live data, drops duplicates, sets ids as index,
|
|
161
176
|
then optionally ranks (keeping ties) and fills NaNs with 0.5.
|
|
@@ -169,6 +184,7 @@ def clean_predictions(
|
|
|
169
184
|
predictions: pd.DataFrame - the predictions to clean
|
|
170
185
|
id_col: str - the column name of the ids
|
|
171
186
|
rank_and_fill: bool - whether to rank and fill NaNs with 0.5
|
|
187
|
+
left_join_ids: bool - whether to left join the predictions onto the ids
|
|
172
188
|
"""
|
|
173
189
|
assert len(live_ids) > 0, "live_ids must not be empty"
|
|
174
190
|
assert live_ids.isna().sum() == 0, "live_ids must not contain NaNs"
|
|
@@ -177,13 +193,15 @@ def clean_predictions(
|
|
|
177
193
|
# drop null indices
|
|
178
194
|
predictions = predictions[~predictions[id_col].isna()]
|
|
179
195
|
predictions = (
|
|
180
|
-
predictions
|
|
181
|
-
|
|
182
|
-
|
|
196
|
+
predictions[
|
|
197
|
+
# filter out ids not in live data
|
|
198
|
+
predictions[id_col].isin(live_ids)
|
|
199
|
+
]
|
|
183
200
|
# drop duplicate ids (keep first)
|
|
184
201
|
.drop_duplicates(subset=id_col, keep="first")
|
|
185
202
|
# set ids as index
|
|
186
|
-
.set_index(id_col)
|
|
203
|
+
.set_index(id_col)
|
|
204
|
+
.sort_index()
|
|
187
205
|
)
|
|
188
206
|
# rank and fill with 0.5
|
|
189
207
|
if rank_and_fill:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: numerai-tools
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0.dev1
|
|
4
4
|
Summary: A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
|
|
5
5
|
Home-page: https://github.com/numerai/numerai-tools
|
|
6
6
|
Maintainer: Numerai
|
|
@@ -22,6 +22,7 @@ from numerai_tools.scoring import (
|
|
|
22
22
|
filter_sort_index,
|
|
23
23
|
filter_sort_index_many,
|
|
24
24
|
filter_sort_top_bottom,
|
|
25
|
+
filter_sort_top_bottom_concat,
|
|
25
26
|
alpha,
|
|
26
27
|
meta_portfolio_contribution,
|
|
27
28
|
)
|
|
@@ -296,13 +297,12 @@ class TestScoring(unittest.TestCase):
|
|
|
296
297
|
top_bottom=None,
|
|
297
298
|
)
|
|
298
299
|
np.testing.assert_allclose(
|
|
299
|
-
|
|
300
|
+
filter_sort_top_bottom_concat(self.up, top_bottom=2),
|
|
300
301
|
[0, 1, 3, 4],
|
|
301
302
|
)
|
|
302
303
|
top, bot = filter_sort_top_bottom(
|
|
303
304
|
self.up,
|
|
304
305
|
top_bottom=2,
|
|
305
|
-
return_concatenated=False,
|
|
306
306
|
)
|
|
307
307
|
np.testing.assert_allclose(top, [3, 4])
|
|
308
308
|
np.testing.assert_allclose(bot, [0, 1])
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import unittest
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import pandas as pd # type: ignore
|
|
5
|
+
|
|
6
|
+
from numerai_tools.signals import (
|
|
7
|
+
churn,
|
|
8
|
+
turnover,
|
|
9
|
+
calculate_max_churn_and_turnover,
|
|
10
|
+
)
|
|
11
|
+
from .util import (
|
|
12
|
+
generate_fake_universe,
|
|
13
|
+
generate_new_submission,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class TestSignals(unittest.TestCase):
|
|
18
|
+
def setUp(self):
|
|
19
|
+
self.up = pd.Series(list(range(5))).rename("up")
|
|
20
|
+
self.down = pd.Series(list(reversed(range(5)))).rename("down")
|
|
21
|
+
self.up_down = pd.Series([0, 1, 2, 1, 0]).rename("up_down")
|
|
22
|
+
self.oscillate = pd.Series([1, 0, 1, 0, 1]).rename("oscillate")
|
|
23
|
+
self.constant = pd.Series([1, 1, 1, 1, 1]).rename("pos_neg")
|
|
24
|
+
|
|
25
|
+
def test_churn(self):
|
|
26
|
+
assert np.isclose(churn(self.up, self.up), 0)
|
|
27
|
+
assert np.isclose(churn(self.up, self.up_down), 1)
|
|
28
|
+
assert np.isclose(churn(self.up, self.oscillate), 1)
|
|
29
|
+
assert np.isclose(churn(self.up, self.down), 2)
|
|
30
|
+
self.assertRaisesRegex(
|
|
31
|
+
AssertionError,
|
|
32
|
+
"s2 must have non-zero standard deviation",
|
|
33
|
+
churn,
|
|
34
|
+
self.up,
|
|
35
|
+
self.constant,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
def test_churn_tb(self):
|
|
39
|
+
tmp = churn(self.up, self.up, top_bottom=2)
|
|
40
|
+
assert np.isclose(tmp, 0), tmp
|
|
41
|
+
tmp = churn(self.up, self.up_down, top_bottom=2)
|
|
42
|
+
assert np.isclose(tmp, 0.5), tmp
|
|
43
|
+
tmp = churn(self.up, self.oscillate, top_bottom=2)
|
|
44
|
+
assert np.isclose(tmp, 0.5), tmp
|
|
45
|
+
tmp = churn(self.up, self.down, top_bottom=2)
|
|
46
|
+
assert np.isclose(tmp, 1), tmp
|
|
47
|
+
tmp = churn(self.up, self.constant, top_bottom=2)
|
|
48
|
+
assert np.isclose(tmp, 0), tmp
|
|
49
|
+
|
|
50
|
+
def test_turnover(self):
|
|
51
|
+
assert np.isclose(turnover(self.up, self.up), 0)
|
|
52
|
+
assert np.isclose(turnover(self.up, self.up_down), 3)
|
|
53
|
+
assert np.isclose(turnover(self.up, self.oscillate), 4.5)
|
|
54
|
+
assert np.isclose(turnover(self.up, self.down), 6)
|
|
55
|
+
assert np.isclose(turnover(self.up, self.constant), 3.5)
|
|
56
|
+
|
|
57
|
+
def test_churn_first_submission(self):
|
|
58
|
+
"""
|
|
59
|
+
Test that the churn function works for the first submission
|
|
60
|
+
No exceptions should be raised, should return 1
|
|
61
|
+
"""
|
|
62
|
+
fake_universe = generate_fake_universe("20130308")
|
|
63
|
+
fake_submission = generate_new_submission(fake_universe)
|
|
64
|
+
fake_neutralizers = pd.DataFrame(
|
|
65
|
+
{
|
|
66
|
+
"neutralizer_1": [0.1] * len(fake_universe),
|
|
67
|
+
"neutralizer_2": [0.2] * len(fake_universe),
|
|
68
|
+
},
|
|
69
|
+
index=fake_universe["numerai_ticker"],
|
|
70
|
+
)
|
|
71
|
+
fake_sample_weights = pd.Series(
|
|
72
|
+
[0.5] * len(fake_universe),
|
|
73
|
+
index=fake_universe["numerai_ticker"],
|
|
74
|
+
name="sample_weight",
|
|
75
|
+
)
|
|
76
|
+
churn, turnover = calculate_max_churn_and_turnover(
|
|
77
|
+
curr_sub=fake_submission,
|
|
78
|
+
curr_neutralizer=fake_neutralizers,
|
|
79
|
+
curr_weight=fake_sample_weights,
|
|
80
|
+
prev_week_subs=[],
|
|
81
|
+
prev_neutralizers={"20240208": fake_neutralizers},
|
|
82
|
+
prev_sample_weights={"20240208": fake_sample_weights},
|
|
83
|
+
universe=fake_universe.set_index("numerai_ticker").sort_index(),
|
|
84
|
+
curr_signal_col="signal",
|
|
85
|
+
curr_ticker_col="numerai_ticker",
|
|
86
|
+
)
|
|
87
|
+
assert np.isclose(churn, 1)
|
|
88
|
+
assert np.isclose(turnover, 1)
|
|
89
|
+
|
|
90
|
+
def test_churn_handles_different_id_columns(self):
|
|
91
|
+
"""
|
|
92
|
+
Test that the churn function works when
|
|
93
|
+
previous submission has different id columns.
|
|
94
|
+
"""
|
|
95
|
+
fake_universe = generate_fake_universe("20130308")
|
|
96
|
+
fake_submission = generate_new_submission(fake_universe, legacy_headers=True)
|
|
97
|
+
new_fake_universe = generate_fake_universe(
|
|
98
|
+
date_value="20130308", ticker_col="ticker"
|
|
99
|
+
)
|
|
100
|
+
fake_universe["ticker"] = new_fake_universe["ticker"]
|
|
101
|
+
prev_submission = fake_submission.copy()
|
|
102
|
+
fake_neutralizers = pd.DataFrame(
|
|
103
|
+
{
|
|
104
|
+
"neutralizer_1": [0.1] * len(fake_universe),
|
|
105
|
+
"neutralizer_2": [0.2] * len(fake_universe),
|
|
106
|
+
},
|
|
107
|
+
index=fake_universe["numerai_ticker"],
|
|
108
|
+
)
|
|
109
|
+
fake_sample_weights = pd.Series(
|
|
110
|
+
[0.5] * len(fake_universe),
|
|
111
|
+
index=fake_universe["numerai_ticker"],
|
|
112
|
+
name="sample_weight",
|
|
113
|
+
)
|
|
114
|
+
# switch out the numerai_ticke col in-place
|
|
115
|
+
prev_submission["numerai_ticker"] = new_fake_universe["ticker"]
|
|
116
|
+
prev_submission.rename(columns={"numerai_ticker": "ticker"}, inplace=True)
|
|
117
|
+
prev_neutralizers = fake_neutralizers.copy()
|
|
118
|
+
prev_neutralizers.index = new_fake_universe["ticker"]
|
|
119
|
+
prev_neutralizers.index.name = "ticker"
|
|
120
|
+
prev_sample_weights = fake_sample_weights.copy()
|
|
121
|
+
prev_sample_weights.index = new_fake_universe["ticker"]
|
|
122
|
+
prev_sample_weights.index.name = "ticker"
|
|
123
|
+
churn, turnover = calculate_max_churn_and_turnover(
|
|
124
|
+
curr_sub=fake_submission,
|
|
125
|
+
curr_neutralizer=fake_neutralizers,
|
|
126
|
+
curr_weight=fake_sample_weights,
|
|
127
|
+
prev_week_subs={"20240208": prev_submission},
|
|
128
|
+
prev_neutralizers={"20240208": prev_neutralizers},
|
|
129
|
+
prev_sample_weights={"20240208": prev_sample_weights},
|
|
130
|
+
universe=fake_universe.set_index("numerai_ticker").sort_index(),
|
|
131
|
+
curr_signal_col="signal",
|
|
132
|
+
curr_ticker_col="numerai_ticker",
|
|
133
|
+
)
|
|
134
|
+
assert np.isclose(churn, 0)
|
|
135
|
+
assert np.isclose(turnover, 0)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
if __name__ == "__main__":
|
|
139
|
+
unittest.main()
|
|
@@ -155,6 +155,18 @@ class TestSubmissions(unittest.TestCase):
|
|
|
155
155
|
sub[[sub.columns[1]]],
|
|
156
156
|
)
|
|
157
157
|
|
|
158
|
+
def test_validate_headers_signals_data_type_and_date_col(self):
|
|
159
|
+
fake_sub = generate_submission(self.ids, "ticker", "signal")
|
|
160
|
+
fake_sub["data_type"] = "signals"
|
|
161
|
+
fake_sub["friday_date"] = "2023-01-01"
|
|
162
|
+
with self.assertLogs(level="WARNING") as cm:
|
|
163
|
+
assert validate_headers_signals(fake_sub) == ("ticker", "signal")
|
|
164
|
+
self.assertIn(
|
|
165
|
+
"WARNING:numerai_tools.submissions:data_type column found in Signals submission. This is deprecated and will be removed in the future. "
|
|
166
|
+
"Please remove the data_type column from your Signals submission.",
|
|
167
|
+
cm.output[0],
|
|
168
|
+
)
|
|
169
|
+
|
|
158
170
|
def test_validate_headers_crypto(self):
|
|
159
171
|
for sub in self.crypto_subs:
|
|
160
172
|
assert validate_headers_crypto(sub) == tuple(sub.columns)
|
|
@@ -432,7 +444,7 @@ class TestSubmissions(unittest.TestCase):
|
|
|
432
444
|
assert not cleaned_predictions.index.duplicated().any()
|
|
433
445
|
|
|
434
446
|
|
|
435
|
-
def generate_ids(id_length: int, num_rows: int) ->
|
|
447
|
+
def generate_ids(id_length: int, num_rows: int) -> pd.Series:
|
|
436
448
|
"""Generates a given number of unique ascii-valued strings of a given length.
|
|
437
449
|
|
|
438
450
|
Arguments:
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
from numerai_tools.scoring import (
|
|
2
|
-
filter_sort_index,
|
|
3
|
-
filter_sort_top_bottom,
|
|
4
|
-
spearman_correlation,
|
|
5
|
-
)
|
|
6
|
-
|
|
7
|
-
from typing import List, Tuple, Union, Optional
|
|
8
|
-
|
|
9
|
-
import pandas as pd
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def churn(
|
|
13
|
-
s1: pd.Series,
|
|
14
|
-
s2: pd.Series,
|
|
15
|
-
top_bottom: Optional[int] = None,
|
|
16
|
-
) -> float:
|
|
17
|
-
"""Calculate the churn between two series. Churn is the proportion of elements
|
|
18
|
-
that are different between the two series.
|
|
19
|
-
|
|
20
|
-
For 2 given series with overlapping indices, churn is 1 - Spearman Correlation.
|
|
21
|
-
If top_bottom is provided, the churn is calculated as the average of the % of
|
|
22
|
-
tickers that stay in the top and bottom predictions. This is only relevant when
|
|
23
|
-
the series are rank signals and not portfolio weights.
|
|
24
|
-
|
|
25
|
-
Arguments:
|
|
26
|
-
s1: pd.Series - the first series to compare
|
|
27
|
-
s2: pd.Series - the second series to compare
|
|
28
|
-
top_bottom: Optional[int] - the number of top and bottom predictions to use
|
|
29
|
-
when calculating the correlation. Results in
|
|
30
|
-
2*top_bottom predictions.
|
|
31
|
-
|
|
32
|
-
Returns:
|
|
33
|
-
float - the churn between the two series
|
|
34
|
-
"""
|
|
35
|
-
if top_bottom is not None and top_bottom > 0:
|
|
36
|
-
s1_top, s1_bot = filter_sort_top_bottom(s1, top_bottom, False)
|
|
37
|
-
s2_top, s2_bot = filter_sort_top_bottom(s2, top_bottom, False)
|
|
38
|
-
top_overlap = len(s1_top.index.intersection(s2_top.index)) / top_bottom
|
|
39
|
-
bot_overlap = len(s1_bot.index.intersection(s2_bot.index)) / top_bottom
|
|
40
|
-
avg_overlap = (top_overlap + bot_overlap) / 2
|
|
41
|
-
return 1 - avg_overlap
|
|
42
|
-
|
|
43
|
-
s1, s2 = filter_sort_index(s1, s2)
|
|
44
|
-
assert s1.std() > 0, "s1 must have non-zero standard deviation"
|
|
45
|
-
assert s2.std() > 0, "s2 must have non-zero standard deviation"
|
|
46
|
-
return 1 - spearman_correlation(s1, s2)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def turnover(
|
|
50
|
-
s1: pd.Series,
|
|
51
|
-
s2: pd.Series,
|
|
52
|
-
):
|
|
53
|
-
"""Calculate the turnover between two series. Turnover is the total change in weights between
|
|
54
|
-
the two series divided by 2.
|
|
55
|
-
|
|
56
|
-
For 2 given series with overlapping indices, join the series on index, fill nans with zeroes
|
|
57
|
-
and calculate turnover as the absolute total difference between the two series divided by 2.
|
|
58
|
-
This is only relevant when the series are portfolio weights and not rank signals.
|
|
59
|
-
|
|
60
|
-
Arguments:
|
|
61
|
-
s1: pd.Series - the first series to compare
|
|
62
|
-
s2: pd.Series - the second series to compare
|
|
63
|
-
top_bottom: Optional[int] - the number of top and bottom predictions to use
|
|
64
|
-
when calculating the correlation. Results in
|
|
65
|
-
2*top_bottom predictions.
|
|
66
|
-
|
|
67
|
-
Returns:
|
|
68
|
-
float - the turnover between the two series
|
|
69
|
-
"""
|
|
70
|
-
s1, s2 = filter_sort_index(s1, s2)
|
|
71
|
-
turnover = (s1 - s2).abs().sum() / 2
|
|
72
|
-
return turnover
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
import unittest
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pandas as pd # type: ignore
|
|
5
|
-
|
|
6
|
-
from numerai_tools.signals import churn, turnover
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class TestSignals(unittest.TestCase):
|
|
10
|
-
def setUp(self):
|
|
11
|
-
self.up = pd.Series(list(range(5))).rename("up")
|
|
12
|
-
self.down = pd.Series(list(reversed(range(5)))).rename("down")
|
|
13
|
-
self.up_down = pd.Series([0, 1, 2, 1, 0]).rename("up_down")
|
|
14
|
-
self.oscillate = pd.Series([1, 0, 1, 0, 1]).rename("oscillate")
|
|
15
|
-
self.constant = pd.Series([1, 1, 1, 1, 1]).rename("pos_neg")
|
|
16
|
-
|
|
17
|
-
def test_churn(self):
|
|
18
|
-
assert np.isclose(churn(self.up, self.up), 0)
|
|
19
|
-
assert np.isclose(churn(self.up, self.up_down), 1)
|
|
20
|
-
assert np.isclose(churn(self.up, self.oscillate), 1)
|
|
21
|
-
assert np.isclose(churn(self.up, self.down), 2)
|
|
22
|
-
self.assertRaisesRegex(
|
|
23
|
-
AssertionError,
|
|
24
|
-
"s2 must have non-zero standard deviation",
|
|
25
|
-
churn,
|
|
26
|
-
self.up,
|
|
27
|
-
self.constant,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
def test_churn_tb(self):
|
|
31
|
-
tmp = churn(self.up, self.up, top_bottom=2)
|
|
32
|
-
assert np.isclose(tmp, 0), tmp
|
|
33
|
-
tmp = churn(self.up, self.up_down, top_bottom=2)
|
|
34
|
-
assert np.isclose(tmp, 0.5), tmp
|
|
35
|
-
tmp = churn(self.up, self.oscillate, top_bottom=2)
|
|
36
|
-
assert np.isclose(tmp, 0.5), tmp
|
|
37
|
-
tmp = churn(self.up, self.down, top_bottom=2)
|
|
38
|
-
assert np.isclose(tmp, 1), tmp
|
|
39
|
-
tmp = churn(self.up, self.constant, top_bottom=2)
|
|
40
|
-
assert np.isclose(tmp, 0), tmp
|
|
41
|
-
|
|
42
|
-
def test_turnover(self):
|
|
43
|
-
assert np.isclose(turnover(self.up, self.up), 0)
|
|
44
|
-
assert np.isclose(turnover(self.up, self.up_down), 3)
|
|
45
|
-
assert np.isclose(turnover(self.up, self.oscillate), 4.5)
|
|
46
|
-
assert np.isclose(turnover(self.up, self.down), 6)
|
|
47
|
-
assert np.isclose(turnover(self.up, self.constant), 3.5)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
if __name__ == "__main__":
|
|
51
|
-
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{numerai_tools-0.4.3 → numerai_tools-0.5.0.dev1}/numerai_tools.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|