numerai-tools 0.5.0.dev1__tar.gz → 0.5.0.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {numerai_tools-0.5.0.dev1/numerai_tools.egg-info → numerai_tools-0.5.0.dev2}/PKG-INFO +14 -8
- numerai_tools-0.5.0.dev2/pyproject.toml +45 -0
- numerai_tools-0.5.0.dev1/PKG-INFO +0 -22
- numerai_tools-0.5.0.dev1/numerai_tools.egg-info/SOURCES.txt +0 -16
- numerai_tools-0.5.0.dev1/numerai_tools.egg-info/dependency_links.txt +0 -1
- numerai_tools-0.5.0.dev1/numerai_tools.egg-info/requires.txt +0 -4
- numerai_tools-0.5.0.dev1/numerai_tools.egg-info/top_level.txt +0 -1
- numerai_tools-0.5.0.dev1/setup.cfg +0 -4
- numerai_tools-0.5.0.dev1/setup.py +0 -47
- numerai_tools-0.5.0.dev1/tests/test_scoring.py +0 -346
- numerai_tools-0.5.0.dev1/tests/test_signals.py +0 -139
- numerai_tools-0.5.0.dev1/tests/test_submissions.py +0 -498
- {numerai_tools-0.5.0.dev1 → numerai_tools-0.5.0.dev2}/LICENSE +0 -0
- {numerai_tools-0.5.0.dev1 → numerai_tools-0.5.0.dev2}/README.md +0 -0
- {numerai_tools-0.5.0.dev1 → numerai_tools-0.5.0.dev2}/numerai_tools/__init__.py +0 -0
- {numerai_tools-0.5.0.dev1 → numerai_tools-0.5.0.dev2}/numerai_tools/py.typed +0 -0
- {numerai_tools-0.5.0.dev1 → numerai_tools-0.5.0.dev2}/numerai_tools/scoring.py +0 -0
- {numerai_tools-0.5.0.dev1 → numerai_tools-0.5.0.dev2}/numerai_tools/signals.py +0 -0
- {numerai_tools-0.5.0.dev1 → numerai_tools-0.5.0.dev2}/numerai_tools/submissions.py +0 -0
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: numerai-tools
|
|
3
|
-
Version: 0.5.0.
|
|
3
|
+
Version: 0.5.0.dev2
|
|
4
4
|
Summary: A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
Platform: OS Independent
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Numerai Engineering
|
|
7
|
+
Author-email: engineering@numer.ai
|
|
8
|
+
Requires-Python: >=3.11
|
|
10
9
|
Classifier: Development Status :: 5 - Production/Stable
|
|
11
10
|
Classifier: Environment :: Console
|
|
12
11
|
Classifier: Intended Audience :: Science/Research
|
|
@@ -15,8 +14,15 @@ Classifier: Operating System :: OS Independent
|
|
|
15
14
|
Classifier: Programming Language :: Python
|
|
16
15
|
Classifier: Programming Language :: Python :: 3
|
|
17
16
|
Classifier: Topic :: Scientific/Engineering
|
|
17
|
+
Requires-Dist: numpy (>=2.0.0,<3.0.0)
|
|
18
|
+
Requires-Dist: pandas (>=2.2.2,<3.0.0)
|
|
19
|
+
Requires-Dist: scikit-learn (>=1.5.0,<2.0.0)
|
|
20
|
+
Requires-Dist: scipy (>=1.13.0,<2.0.0)
|
|
21
|
+
Project-URL: Documentation, https://docs.numer.ai/
|
|
22
|
+
Project-URL: Homepage, https://numer.ai
|
|
23
|
+
Project-URL: Repository, https://github.com/numerai/numerai-tools
|
|
18
24
|
Description-Content-Type: text/markdown
|
|
19
|
-
License-File: LICENSE
|
|
20
25
|
|
|
21
26
|
# numerai-tools
|
|
22
27
|
A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
|
|
28
|
+
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "numerai-tools"
|
|
3
|
+
version = "0.5.0.dev2"
|
|
4
|
+
description = "A collection of open-source tools to help interact with Numerai, model data, and automate submissions."
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "Numerai Engineering",email = "engineering@numer.ai"}
|
|
7
|
+
]
|
|
8
|
+
license = {text = "MIT"}
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Development Status :: 5 - Production/Stable",
|
|
13
|
+
"Environment :: Console",
|
|
14
|
+
"Intended Audience :: Science/Research",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Operating System :: OS Independent",
|
|
17
|
+
"Programming Language :: Python",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Topic :: Scientific/Engineering",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[project.urls]
|
|
23
|
+
homepage = "https://numer.ai"
|
|
24
|
+
repository = "https://github.com/numerai/numerai-tools"
|
|
25
|
+
documentation = "https://docs.numer.ai/"
|
|
26
|
+
|
|
27
|
+
[tool.poetry]
|
|
28
|
+
packages = [
|
|
29
|
+
{include = "numerai_tools", from = "."},
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
[tool.poetry.dependencies]
|
|
33
|
+
pandas = "^2.2.2"
|
|
34
|
+
numpy = "^2.0.0"
|
|
35
|
+
scipy = "^1.13.0"
|
|
36
|
+
scikit-learn = "^1.5.0"
|
|
37
|
+
|
|
38
|
+
[tool.poetry.group.dev.dependencies]
|
|
39
|
+
pytest = "^8.3.4"
|
|
40
|
+
mypy = "^1.15.0"
|
|
41
|
+
ruff = "^0.5.4"
|
|
42
|
+
|
|
43
|
+
[build-system]
|
|
44
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
45
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: numerai_tools
|
|
3
|
-
Version: 0.5.0.dev1
|
|
4
|
-
Summary: A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
|
|
5
|
-
Home-page: https://github.com/numerai/numerai-tools
|
|
6
|
-
Maintainer: Numerai
|
|
7
|
-
Maintainer-email: support@numer.ai
|
|
8
|
-
License: MIT License
|
|
9
|
-
Platform: OS Independent
|
|
10
|
-
Classifier: Development Status :: 5 - Production/Stable
|
|
11
|
-
Classifier: Environment :: Console
|
|
12
|
-
Classifier: Intended Audience :: Science/Research
|
|
13
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
-
Classifier: Operating System :: OS Independent
|
|
15
|
-
Classifier: Programming Language :: Python
|
|
16
|
-
Classifier: Programming Language :: Python :: 3
|
|
17
|
-
Classifier: Topic :: Scientific/Engineering
|
|
18
|
-
Description-Content-Type: text/markdown
|
|
19
|
-
License-File: LICENSE
|
|
20
|
-
|
|
21
|
-
# numerai-tools
|
|
22
|
-
A collection of open-source tools to help interact with Numerai, model data, and automate submissions.
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
LICENSE
|
|
2
|
-
README.md
|
|
3
|
-
setup.py
|
|
4
|
-
numerai_tools/__init__.py
|
|
5
|
-
numerai_tools/py.typed
|
|
6
|
-
numerai_tools/scoring.py
|
|
7
|
-
numerai_tools/signals.py
|
|
8
|
-
numerai_tools/submissions.py
|
|
9
|
-
numerai_tools.egg-info/PKG-INFO
|
|
10
|
-
numerai_tools.egg-info/SOURCES.txt
|
|
11
|
-
numerai_tools.egg-info/dependency_links.txt
|
|
12
|
-
numerai_tools.egg-info/requires.txt
|
|
13
|
-
numerai_tools.egg-info/top_level.txt
|
|
14
|
-
tests/test_scoring.py
|
|
15
|
-
tests/test_signals.py
|
|
16
|
-
tests/test_submissions.py
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
numerai_tools
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
from setuptools import setup
|
|
2
|
-
from setuptools import find_packages
|
|
3
|
-
|
|
4
|
-
VERSION = "0.5.0.dev1"
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def load(path):
|
|
8
|
-
return open(path, "r").read()
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
classifiers = [
|
|
12
|
-
"Development Status :: 5 - Production/Stable",
|
|
13
|
-
"Environment :: Console",
|
|
14
|
-
"Intended Audience :: Science/Research",
|
|
15
|
-
"License :: OSI Approved :: MIT License",
|
|
16
|
-
"Operating System :: OS Independent",
|
|
17
|
-
"Programming Language :: Python",
|
|
18
|
-
"Programming Language :: Python :: 3",
|
|
19
|
-
"Topic :: Scientific/Engineering",
|
|
20
|
-
]
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
if __name__ == "__main__":
|
|
24
|
-
setup(
|
|
25
|
-
name="numerai_tools",
|
|
26
|
-
version=VERSION,
|
|
27
|
-
maintainer="Numerai",
|
|
28
|
-
maintainer_email="support@numer.ai",
|
|
29
|
-
description="A collection of open-source tools to help interact with Numerai, model data, and automate submissions.",
|
|
30
|
-
long_description=load("README.md"),
|
|
31
|
-
long_description_content_type="text/markdown",
|
|
32
|
-
url="https://github.com/numerai/numerai-tools",
|
|
33
|
-
platforms="OS Independent",
|
|
34
|
-
classifiers=classifiers,
|
|
35
|
-
license="MIT License",
|
|
36
|
-
package_data={
|
|
37
|
-
"numerai_tools": ["LICENSE", "README.md", "py.typed"],
|
|
38
|
-
},
|
|
39
|
-
packages=find_packages(exclude=["tests"]),
|
|
40
|
-
install_requires=[
|
|
41
|
-
# pandas 2.2.2 was the first version to support numpy 2
|
|
42
|
-
"pandas>=2.2.2,<3.0.0",
|
|
43
|
-
"numpy>=2.0.0,<3.0.0",
|
|
44
|
-
"scipy>=1.13.0,<2.0.0",
|
|
45
|
-
"scikit-learn>=1.5.0,<2.0.0",
|
|
46
|
-
],
|
|
47
|
-
)
|
|
@@ -1,346 +0,0 @@
|
|
|
1
|
-
import unittest
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pandas as pd # type: ignore
|
|
5
|
-
|
|
6
|
-
from numerai_tools.scoring import (
|
|
7
|
-
correlation,
|
|
8
|
-
numerai_corr,
|
|
9
|
-
tie_broken_rank_correlation,
|
|
10
|
-
spearman_correlation,
|
|
11
|
-
pearson_correlation,
|
|
12
|
-
tie_broken_rank,
|
|
13
|
-
tie_kept_rank,
|
|
14
|
-
gaussian,
|
|
15
|
-
neutralize,
|
|
16
|
-
one_hot_encode,
|
|
17
|
-
power,
|
|
18
|
-
tie_kept_rank__gaussianize__pow_1_5,
|
|
19
|
-
variance_normalize,
|
|
20
|
-
orthogonalize,
|
|
21
|
-
stake_weight,
|
|
22
|
-
filter_sort_index,
|
|
23
|
-
filter_sort_index_many,
|
|
24
|
-
filter_sort_top_bottom,
|
|
25
|
-
filter_sort_top_bottom_concat,
|
|
26
|
-
alpha,
|
|
27
|
-
meta_portfolio_contribution,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class TestScoring(unittest.TestCase):
|
|
32
|
-
def setUp(self):
|
|
33
|
-
self.up = pd.Series(list(range(5))).rename("up")
|
|
34
|
-
self.down = pd.Series(list(reversed(range(5)))).rename("down")
|
|
35
|
-
self.up_down = pd.Series([1, 0, 1, 0, 1]).rename("up_down")
|
|
36
|
-
self.down_up = (1 - self.up_down).rename("down_up")
|
|
37
|
-
self.up_float = (self.up / self.up.max()).rename("up_float")
|
|
38
|
-
self.pos_neg = pd.Series([0, -0, 0.5, -0.5, 1.0, -1.0, 2.0, -2.0]).rename(
|
|
39
|
-
"pos_neg"
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
def test_filter_sort_index(self):
|
|
43
|
-
# Test with 2 simple ranges with different indices
|
|
44
|
-
s = pd.Series([1, 2, 3, 4, 5], index=[0, 1, 2, 3, 4])
|
|
45
|
-
t = pd.Series([1, 2, 3, 4, 5], index=[1, 2, 3, 4, 5])
|
|
46
|
-
new_s, new_t = filter_sort_index(s, t)
|
|
47
|
-
self.assertEqual(len(new_s), 4)
|
|
48
|
-
self.assertEqual(len(new_t), 4)
|
|
49
|
-
self.assertTrue(np.array_equal(new_s.index, [1, 2, 3, 4]))
|
|
50
|
-
self.assertTrue(np.array_equal(new_t.index, [1, 2, 3, 4]))
|
|
51
|
-
self.assertTrue(np.array_equal(new_s.values, [2, 3, 4, 5]))
|
|
52
|
-
self.assertTrue(np.array_equal(new_t.values, [1, 2, 3, 4]))
|
|
53
|
-
|
|
54
|
-
def test_filter_sort_index_invalid(self):
|
|
55
|
-
# Ensure assertion error when max filtered ratio is exceeded
|
|
56
|
-
s = pd.Series([1, 2, 3, 4, 5], index=[0, 1, 2, 3, 4])
|
|
57
|
-
t = pd.Series([1, 2, 3, 4, 5], index=[1, 2, 3, 4, 5])
|
|
58
|
-
with self.assertRaises(AssertionError):
|
|
59
|
-
filter_sort_index(s, t, max_filtered_ratio=0.1)
|
|
60
|
-
|
|
61
|
-
def test_filter_sort_index_many(self):
|
|
62
|
-
# Test with a DataFrame
|
|
63
|
-
s = pd.Series([1, 2, 3, 4, 5], index=[0, 1, 2, 3, 4])
|
|
64
|
-
t = pd.Series([1, 2, 3, 4, 5], index=[1, 2, 3, 4, 5])
|
|
65
|
-
new_s, new_t = filter_sort_index_many([s, t])
|
|
66
|
-
self.assertEqual(len(new_s), 4)
|
|
67
|
-
self.assertEqual(len(new_t), 4)
|
|
68
|
-
self.assertTrue(np.array_equal(new_s.index, [1, 2, 3, 4]))
|
|
69
|
-
self.assertTrue(np.array_equal(new_t.index, [1, 2, 3, 4]))
|
|
70
|
-
self.assertTrue(np.array_equal(new_s.values, [2, 3, 4, 5]))
|
|
71
|
-
self.assertTrue(np.array_equal(new_t.values, [1, 2, 3, 4]))
|
|
72
|
-
|
|
73
|
-
def test_filter_sort_index_many_invalid(self):
|
|
74
|
-
# Ensure assertion error when max filtered ratio is exceeded
|
|
75
|
-
s = pd.Series([1, 2, 3, 4, 5], index=[0, 1, 2, 3, 4])
|
|
76
|
-
t = pd.Series([1, 2, 3, 4, 5], index=[1, 2, 3, 4, 5])
|
|
77
|
-
with self.assertRaises(AssertionError):
|
|
78
|
-
filter_sort_index_many([s, t], max_filtered_ratio=0.1)
|
|
79
|
-
|
|
80
|
-
def test_correlation(self):
|
|
81
|
-
assert np.isclose(correlation(self.up, self.up), 1)
|
|
82
|
-
assert np.isclose(correlation(self.up, self.down), -1)
|
|
83
|
-
assert np.isclose(correlation(self.up, self.up_down), 0)
|
|
84
|
-
assert np.isclose(correlation(self.up, self.down_up), 0)
|
|
85
|
-
|
|
86
|
-
def test_tie_broken_rank_correlation(self):
|
|
87
|
-
assert np.isclose(tie_broken_rank_correlation(self.up, self.up), 1)
|
|
88
|
-
assert np.isclose(tie_broken_rank_correlation(self.up, self.down), -1)
|
|
89
|
-
# tie_broken_rank_correlation ranks the submission not the targets
|
|
90
|
-
assert np.isclose(tie_broken_rank_correlation(self.up, self.up_down), 0.5)
|
|
91
|
-
assert np.isclose(tie_broken_rank_correlation(self.up, self.down_up), 0.5)
|
|
92
|
-
assert np.isclose(tie_broken_rank_correlation(self.up_down, self.up), 0)
|
|
93
|
-
assert np.isclose(tie_broken_rank_correlation(self.down_up, self.up), 0)
|
|
94
|
-
|
|
95
|
-
def test_spearman_correlation(self):
|
|
96
|
-
assert np.isclose(spearman_correlation(self.up, self.up), 1)
|
|
97
|
-
assert np.isclose(spearman_correlation(self.up, self.down), -1)
|
|
98
|
-
assert np.isclose(spearman_correlation(self.up, self.up_down), 0)
|
|
99
|
-
assert np.isclose(spearman_correlation(self.up, self.down_up), 0)
|
|
100
|
-
assert np.isclose(spearman_correlation(self.up_down, self.up), 0)
|
|
101
|
-
assert np.isclose(spearman_correlation(self.down_up, self.up), 0)
|
|
102
|
-
|
|
103
|
-
def test_pearson_correlation(self):
|
|
104
|
-
assert np.isclose(pearson_correlation(self.up, self.up), 1)
|
|
105
|
-
assert np.isclose(pearson_correlation(self.up, self.down), -1)
|
|
106
|
-
assert np.isclose(pearson_correlation(self.up, self.up_down), 0)
|
|
107
|
-
assert np.isclose(pearson_correlation(self.up, self.down_up), 0)
|
|
108
|
-
assert np.isclose(pearson_correlation(self.up_down, self.up), 0)
|
|
109
|
-
assert np.isclose(pearson_correlation(self.down_up, self.up), 0)
|
|
110
|
-
|
|
111
|
-
def test_tie_broken_rank(self):
|
|
112
|
-
assert np.isclose(
|
|
113
|
-
tie_broken_rank(self.up.to_frame()).T, [0.1, 0.3, 0.5, 0.7, 0.9]
|
|
114
|
-
).all()
|
|
115
|
-
assert np.isclose(
|
|
116
|
-
tie_broken_rank(self.up_down.to_frame()).T, [0.5, 0.1, 0.7, 0.3, 0.9]
|
|
117
|
-
).all()
|
|
118
|
-
|
|
119
|
-
def test_tie_kept_rank(self):
|
|
120
|
-
assert np.isclose(
|
|
121
|
-
tie_kept_rank(self.up.to_frame()).T, [0.1, 0.3, 0.5, 0.7, 0.9]
|
|
122
|
-
).all()
|
|
123
|
-
assert np.isclose(
|
|
124
|
-
tie_kept_rank(self.up_down.to_frame()).T, [0.7, 0.2, 0.7, 0.2, 0.7]
|
|
125
|
-
).all()
|
|
126
|
-
|
|
127
|
-
def test_gaussian(self):
|
|
128
|
-
assert np.isclose(
|
|
129
|
-
gaussian(self.up_float).values.T,
|
|
130
|
-
[-np.inf, -0.6744897501960817, 0, 0.6744897501960817, np.inf],
|
|
131
|
-
).all()
|
|
132
|
-
|
|
133
|
-
def test_variance_normalize(self):
|
|
134
|
-
assert np.isclose(
|
|
135
|
-
variance_normalize(self.up_float).values.T,
|
|
136
|
-
[
|
|
137
|
-
0.0,
|
|
138
|
-
0.7071067811865475,
|
|
139
|
-
1.414213562373095,
|
|
140
|
-
2.1213203435596424,
|
|
141
|
-
2.82842712474619,
|
|
142
|
-
],
|
|
143
|
-
).all()
|
|
144
|
-
|
|
145
|
-
def test_one_hot_encode(self):
|
|
146
|
-
assert np.isclose(
|
|
147
|
-
one_hot_encode(self.up.to_frame(), ["up"]).values.T,
|
|
148
|
-
[
|
|
149
|
-
[1.0, 0.0, 0.0, 0.0, 0.0],
|
|
150
|
-
[0.0, 1.0, 0.0, 0.0, 0.0],
|
|
151
|
-
[0.0, 0.0, 1.0, 0.0, 0.0],
|
|
152
|
-
[0.0, 0.0, 0.0, 1.0, 0.0],
|
|
153
|
-
[0.0, 0.0, 0.0, 0.0, 1.0],
|
|
154
|
-
],
|
|
155
|
-
).all()
|
|
156
|
-
|
|
157
|
-
def test_power(self):
|
|
158
|
-
assert np.isclose(
|
|
159
|
-
power(self.pos_neg.to_frame(), 1.5),
|
|
160
|
-
[
|
|
161
|
-
[0.0],
|
|
162
|
-
[0.0],
|
|
163
|
-
[0.3535533905932738],
|
|
164
|
-
[-0.3535533905932738],
|
|
165
|
-
[1.0000000000000000],
|
|
166
|
-
[-1.0000000000000000],
|
|
167
|
-
[2.8284271247461903],
|
|
168
|
-
[-2.8284271247461903],
|
|
169
|
-
],
|
|
170
|
-
).all()
|
|
171
|
-
|
|
172
|
-
def test_tie_kept_rank__gaussianize__pow_1_5(self):
|
|
173
|
-
assert np.isclose(
|
|
174
|
-
tie_kept_rank__gaussianize__pow_1_5(self.up_float.to_frame()),
|
|
175
|
-
[
|
|
176
|
-
[-1.4507885796854221],
|
|
177
|
-
[-0.3797472709071263],
|
|
178
|
-
[0.0000000000000000],
|
|
179
|
-
[0.3797472709071261],
|
|
180
|
-
[1.4507885796854221],
|
|
181
|
-
],
|
|
182
|
-
).all()
|
|
183
|
-
|
|
184
|
-
def test_orthoganalize(self):
|
|
185
|
-
assert np.isclose(
|
|
186
|
-
orthogonalize(self.up.to_frame().values, self.up.to_frame().values),
|
|
187
|
-
[0, 0, 0, 0, 0],
|
|
188
|
-
).all()
|
|
189
|
-
assert np.isclose(
|
|
190
|
-
orthogonalize(self.up.to_frame().values, self.up_down.to_frame().values),
|
|
191
|
-
[[-2], [1], [0], [3], [2]],
|
|
192
|
-
).all()
|
|
193
|
-
assert np.isclose(
|
|
194
|
-
orthogonalize(
|
|
195
|
-
self.down_up.to_frame().values, self.up_down.to_frame().values
|
|
196
|
-
),
|
|
197
|
-
[[0], [1], [0], [1], [0]],
|
|
198
|
-
).all()
|
|
199
|
-
|
|
200
|
-
def test_stake_weight(self):
|
|
201
|
-
assert np.isclose(
|
|
202
|
-
stake_weight(self.up.to_frame(), pd.Series([1], index=[self.up.name])),
|
|
203
|
-
self.up.values.T,
|
|
204
|
-
).all()
|
|
205
|
-
assert np.isclose(
|
|
206
|
-
stake_weight(
|
|
207
|
-
pd.concat([self.up, self.down], axis=1),
|
|
208
|
-
pd.Series([1, 1], index=[self.up.name, self.down.name]),
|
|
209
|
-
),
|
|
210
|
-
((self.up + self.down) / 2).values.T,
|
|
211
|
-
).all()
|
|
212
|
-
|
|
213
|
-
def test_neutralize_basic(self):
|
|
214
|
-
assert np.isclose(
|
|
215
|
-
neutralize(self.up.to_frame(), pd.DataFrame([0, 0, 0, 0, 0])).values.T,
|
|
216
|
-
self.up - self.up.mean(),
|
|
217
|
-
).all()
|
|
218
|
-
|
|
219
|
-
def test_neutralize_multiple_subs(self):
|
|
220
|
-
assert np.isclose(
|
|
221
|
-
neutralize(self.up_down.to_frame(), self.down_up.to_frame()).values.T,
|
|
222
|
-
[0, 0, 0, 0, 0],
|
|
223
|
-
).all()
|
|
224
|
-
|
|
225
|
-
def test_neutralize_multiple_subs_multiple_neutralizers(self):
|
|
226
|
-
# ensure it works for multiple submissions/neutralizers
|
|
227
|
-
assert np.isclose(
|
|
228
|
-
neutralize(
|
|
229
|
-
pd.concat([self.up_down, self.up_down], axis=1),
|
|
230
|
-
pd.concat([self.down_up, self.down_up], axis=1),
|
|
231
|
-
).values.T,
|
|
232
|
-
[
|
|
233
|
-
[0, 0, 0, 0, 0],
|
|
234
|
-
[0, 0, 0, 0, 0],
|
|
235
|
-
],
|
|
236
|
-
).all()
|
|
237
|
-
assert np.isclose(
|
|
238
|
-
neutralize(
|
|
239
|
-
pd.concat([self.up, self.down], axis=1),
|
|
240
|
-
pd.concat(
|
|
241
|
-
[pd.Series([0, 0, 0, 0, 0]), pd.Series([0, 0, 0, 0, 0])], axis=1
|
|
242
|
-
),
|
|
243
|
-
).values.T,
|
|
244
|
-
pd.concat(
|
|
245
|
-
[self.up - self.up.mean(), self.down - self.down.mean()], axis=1
|
|
246
|
-
).values.T,
|
|
247
|
-
).all()
|
|
248
|
-
|
|
249
|
-
def test_neutralize_proportion(self):
|
|
250
|
-
# Test with proportion less than 1
|
|
251
|
-
assert np.isclose(
|
|
252
|
-
neutralize(
|
|
253
|
-
self.up.to_frame(), pd.DataFrame([0, 0, 0, 0, 0]), proportion=0.5
|
|
254
|
-
).values.T,
|
|
255
|
-
(self.up - self.up.mean() * 0.5),
|
|
256
|
-
).all()
|
|
257
|
-
|
|
258
|
-
# Test with proportion equal to 0
|
|
259
|
-
assert np.isclose(
|
|
260
|
-
neutralize(
|
|
261
|
-
self.up.to_frame(), pd.DataFrame([0, 0, 0, 0, 0]), proportion=0
|
|
262
|
-
).values.T,
|
|
263
|
-
self.up,
|
|
264
|
-
).all()
|
|
265
|
-
|
|
266
|
-
def test_neutralize_with_nans(self):
|
|
267
|
-
# Test with NaNs in input data
|
|
268
|
-
up_with_nans = self.up.copy()
|
|
269
|
-
up_with_nans[2] = np.nan
|
|
270
|
-
self.assertRaisesRegex(
|
|
271
|
-
AssertionError,
|
|
272
|
-
"Data contains NaNs",
|
|
273
|
-
neutralize,
|
|
274
|
-
up_with_nans.to_frame(),
|
|
275
|
-
pd.DataFrame([0, 0, 0, 0, 0]),
|
|
276
|
-
)
|
|
277
|
-
|
|
278
|
-
def test_neutralize_large_data(self):
|
|
279
|
-
# Test with larger dataset
|
|
280
|
-
large_data = pd.DataFrame(np.random.randn(1000, 10))
|
|
281
|
-
neutralizers = pd.DataFrame(np.random.randn(1000, 5))
|
|
282
|
-
neutralized = neutralize(large_data, neutralizers)
|
|
283
|
-
assert neutralized.shape == large_data.shape
|
|
284
|
-
assert not np.isnan(neutralized).any().any()
|
|
285
|
-
|
|
286
|
-
def test_numerai_corr_doesnt_clobber_targets(self):
|
|
287
|
-
s = [x / 4 for x in range(5)]
|
|
288
|
-
df = pd.DataFrame({"target": s, "prediction": reversed(s)})
|
|
289
|
-
numerai_corr(df[["prediction"]], df["target"])
|
|
290
|
-
assert pd.Series(s).equals(df["target"]), f"{s} != {list(df['target'].values)}"
|
|
291
|
-
|
|
292
|
-
def test_filter_top_bottom(self):
|
|
293
|
-
self.assertRaises(
|
|
294
|
-
TypeError,
|
|
295
|
-
filter_sort_top_bottom,
|
|
296
|
-
self.up,
|
|
297
|
-
top_bottom=None,
|
|
298
|
-
)
|
|
299
|
-
np.testing.assert_allclose(
|
|
300
|
-
filter_sort_top_bottom_concat(self.up, top_bottom=2),
|
|
301
|
-
[0, 1, 3, 4],
|
|
302
|
-
)
|
|
303
|
-
top, bot = filter_sort_top_bottom(
|
|
304
|
-
self.up,
|
|
305
|
-
top_bottom=2,
|
|
306
|
-
)
|
|
307
|
-
np.testing.assert_allclose(top, [3, 4])
|
|
308
|
-
np.testing.assert_allclose(bot, [0, 1])
|
|
309
|
-
|
|
310
|
-
def test_alpha(self):
|
|
311
|
-
s = pd.DataFrame([[1, 2, 3, 4, 5]]).T
|
|
312
|
-
N = pd.DataFrame(
|
|
313
|
-
[
|
|
314
|
-
[1, 5],
|
|
315
|
-
[2, 4],
|
|
316
|
-
[3, 3],
|
|
317
|
-
[4, 2],
|
|
318
|
-
[5, 1],
|
|
319
|
-
]
|
|
320
|
-
)
|
|
321
|
-
v = pd.Series([1, 0.5, 1, 0.5, 1]).T
|
|
322
|
-
t = pd.Series([1, 0, 1, 0, 1]).T
|
|
323
|
-
score = alpha(s, N, v, t)
|
|
324
|
-
np.testing.assert_allclose(score, 0.0, atol=1e-14, rtol=1e-14)
|
|
325
|
-
|
|
326
|
-
def test_meta_portfolio_contribution(self):
|
|
327
|
-
s = pd.DataFrame([[1, 2, 3, 4, 5], [1, 2, 1, 2, 1]]).T
|
|
328
|
-
st = pd.Series([0.6, 0.4])
|
|
329
|
-
N = pd.DataFrame(
|
|
330
|
-
[
|
|
331
|
-
[1, 5],
|
|
332
|
-
[2, 4],
|
|
333
|
-
[3, 3],
|
|
334
|
-
[4, 2],
|
|
335
|
-
[5, 1],
|
|
336
|
-
]
|
|
337
|
-
)
|
|
338
|
-
v = pd.Series([3, 2, 1, 2, 3]).T
|
|
339
|
-
t = pd.Series([1.0, 2.0, 3.0, 2.0, 1.0]).T
|
|
340
|
-
score = meta_portfolio_contribution(s, st, N, v, t)
|
|
341
|
-
assert np.isclose(score[0], -0.04329786867021718)
|
|
342
|
-
assert np.isclose(score[1], 0.06494680300532589)
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
if __name__ == "__main__":
|
|
346
|
-
unittest.main()
|
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
import unittest
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pandas as pd # type: ignore
|
|
5
|
-
|
|
6
|
-
from numerai_tools.signals import (
|
|
7
|
-
churn,
|
|
8
|
-
turnover,
|
|
9
|
-
calculate_max_churn_and_turnover,
|
|
10
|
-
)
|
|
11
|
-
from .util import (
|
|
12
|
-
generate_fake_universe,
|
|
13
|
-
generate_new_submission,
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class TestSignals(unittest.TestCase):
|
|
18
|
-
def setUp(self):
|
|
19
|
-
self.up = pd.Series(list(range(5))).rename("up")
|
|
20
|
-
self.down = pd.Series(list(reversed(range(5)))).rename("down")
|
|
21
|
-
self.up_down = pd.Series([0, 1, 2, 1, 0]).rename("up_down")
|
|
22
|
-
self.oscillate = pd.Series([1, 0, 1, 0, 1]).rename("oscillate")
|
|
23
|
-
self.constant = pd.Series([1, 1, 1, 1, 1]).rename("pos_neg")
|
|
24
|
-
|
|
25
|
-
def test_churn(self):
|
|
26
|
-
assert np.isclose(churn(self.up, self.up), 0)
|
|
27
|
-
assert np.isclose(churn(self.up, self.up_down), 1)
|
|
28
|
-
assert np.isclose(churn(self.up, self.oscillate), 1)
|
|
29
|
-
assert np.isclose(churn(self.up, self.down), 2)
|
|
30
|
-
self.assertRaisesRegex(
|
|
31
|
-
AssertionError,
|
|
32
|
-
"s2 must have non-zero standard deviation",
|
|
33
|
-
churn,
|
|
34
|
-
self.up,
|
|
35
|
-
self.constant,
|
|
36
|
-
)
|
|
37
|
-
|
|
38
|
-
def test_churn_tb(self):
|
|
39
|
-
tmp = churn(self.up, self.up, top_bottom=2)
|
|
40
|
-
assert np.isclose(tmp, 0), tmp
|
|
41
|
-
tmp = churn(self.up, self.up_down, top_bottom=2)
|
|
42
|
-
assert np.isclose(tmp, 0.5), tmp
|
|
43
|
-
tmp = churn(self.up, self.oscillate, top_bottom=2)
|
|
44
|
-
assert np.isclose(tmp, 0.5), tmp
|
|
45
|
-
tmp = churn(self.up, self.down, top_bottom=2)
|
|
46
|
-
assert np.isclose(tmp, 1), tmp
|
|
47
|
-
tmp = churn(self.up, self.constant, top_bottom=2)
|
|
48
|
-
assert np.isclose(tmp, 0), tmp
|
|
49
|
-
|
|
50
|
-
def test_turnover(self):
|
|
51
|
-
assert np.isclose(turnover(self.up, self.up), 0)
|
|
52
|
-
assert np.isclose(turnover(self.up, self.up_down), 3)
|
|
53
|
-
assert np.isclose(turnover(self.up, self.oscillate), 4.5)
|
|
54
|
-
assert np.isclose(turnover(self.up, self.down), 6)
|
|
55
|
-
assert np.isclose(turnover(self.up, self.constant), 3.5)
|
|
56
|
-
|
|
57
|
-
def test_churn_first_submission(self):
|
|
58
|
-
"""
|
|
59
|
-
Test that the churn function works for the first submission
|
|
60
|
-
No exceptions should be raised, should return 1
|
|
61
|
-
"""
|
|
62
|
-
fake_universe = generate_fake_universe("20130308")
|
|
63
|
-
fake_submission = generate_new_submission(fake_universe)
|
|
64
|
-
fake_neutralizers = pd.DataFrame(
|
|
65
|
-
{
|
|
66
|
-
"neutralizer_1": [0.1] * len(fake_universe),
|
|
67
|
-
"neutralizer_2": [0.2] * len(fake_universe),
|
|
68
|
-
},
|
|
69
|
-
index=fake_universe["numerai_ticker"],
|
|
70
|
-
)
|
|
71
|
-
fake_sample_weights = pd.Series(
|
|
72
|
-
[0.5] * len(fake_universe),
|
|
73
|
-
index=fake_universe["numerai_ticker"],
|
|
74
|
-
name="sample_weight",
|
|
75
|
-
)
|
|
76
|
-
churn, turnover = calculate_max_churn_and_turnover(
|
|
77
|
-
curr_sub=fake_submission,
|
|
78
|
-
curr_neutralizer=fake_neutralizers,
|
|
79
|
-
curr_weight=fake_sample_weights,
|
|
80
|
-
prev_week_subs=[],
|
|
81
|
-
prev_neutralizers={"20240208": fake_neutralizers},
|
|
82
|
-
prev_sample_weights={"20240208": fake_sample_weights},
|
|
83
|
-
universe=fake_universe.set_index("numerai_ticker").sort_index(),
|
|
84
|
-
curr_signal_col="signal",
|
|
85
|
-
curr_ticker_col="numerai_ticker",
|
|
86
|
-
)
|
|
87
|
-
assert np.isclose(churn, 1)
|
|
88
|
-
assert np.isclose(turnover, 1)
|
|
89
|
-
|
|
90
|
-
def test_churn_handles_different_id_columns(self):
|
|
91
|
-
"""
|
|
92
|
-
Test that the churn function works when
|
|
93
|
-
previous submission has different id columns.
|
|
94
|
-
"""
|
|
95
|
-
fake_universe = generate_fake_universe("20130308")
|
|
96
|
-
fake_submission = generate_new_submission(fake_universe, legacy_headers=True)
|
|
97
|
-
new_fake_universe = generate_fake_universe(
|
|
98
|
-
date_value="20130308", ticker_col="ticker"
|
|
99
|
-
)
|
|
100
|
-
fake_universe["ticker"] = new_fake_universe["ticker"]
|
|
101
|
-
prev_submission = fake_submission.copy()
|
|
102
|
-
fake_neutralizers = pd.DataFrame(
|
|
103
|
-
{
|
|
104
|
-
"neutralizer_1": [0.1] * len(fake_universe),
|
|
105
|
-
"neutralizer_2": [0.2] * len(fake_universe),
|
|
106
|
-
},
|
|
107
|
-
index=fake_universe["numerai_ticker"],
|
|
108
|
-
)
|
|
109
|
-
fake_sample_weights = pd.Series(
|
|
110
|
-
[0.5] * len(fake_universe),
|
|
111
|
-
index=fake_universe["numerai_ticker"],
|
|
112
|
-
name="sample_weight",
|
|
113
|
-
)
|
|
114
|
-
# switch out the numerai_ticke col in-place
|
|
115
|
-
prev_submission["numerai_ticker"] = new_fake_universe["ticker"]
|
|
116
|
-
prev_submission.rename(columns={"numerai_ticker": "ticker"}, inplace=True)
|
|
117
|
-
prev_neutralizers = fake_neutralizers.copy()
|
|
118
|
-
prev_neutralizers.index = new_fake_universe["ticker"]
|
|
119
|
-
prev_neutralizers.index.name = "ticker"
|
|
120
|
-
prev_sample_weights = fake_sample_weights.copy()
|
|
121
|
-
prev_sample_weights.index = new_fake_universe["ticker"]
|
|
122
|
-
prev_sample_weights.index.name = "ticker"
|
|
123
|
-
churn, turnover = calculate_max_churn_and_turnover(
|
|
124
|
-
curr_sub=fake_submission,
|
|
125
|
-
curr_neutralizer=fake_neutralizers,
|
|
126
|
-
curr_weight=fake_sample_weights,
|
|
127
|
-
prev_week_subs={"20240208": prev_submission},
|
|
128
|
-
prev_neutralizers={"20240208": prev_neutralizers},
|
|
129
|
-
prev_sample_weights={"20240208": prev_sample_weights},
|
|
130
|
-
universe=fake_universe.set_index("numerai_ticker").sort_index(),
|
|
131
|
-
curr_signal_col="signal",
|
|
132
|
-
curr_ticker_col="numerai_ticker",
|
|
133
|
-
)
|
|
134
|
-
assert np.isclose(churn, 0)
|
|
135
|
-
assert np.isclose(turnover, 0)
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
if __name__ == "__main__":
|
|
139
|
-
unittest.main()
|
|
@@ -1,498 +0,0 @@
|
|
|
1
|
-
import unittest
|
|
2
|
-
import random
|
|
3
|
-
import string
|
|
4
|
-
from typing import List
|
|
5
|
-
|
|
6
|
-
import numpy as np
|
|
7
|
-
import pandas as pd # type: ignore
|
|
8
|
-
|
|
9
|
-
from numerai_tools.submissions import (
|
|
10
|
-
NUMERAI_ALLOWED_ID_COLS,
|
|
11
|
-
NUMERAI_ALLOWED_PRED_COLS,
|
|
12
|
-
SIGNALS_ALLOWED_ID_COLS,
|
|
13
|
-
SIGNALS_ALLOWED_PRED_COLS,
|
|
14
|
-
CRYPTO_ALLOWED_ID_COLS,
|
|
15
|
-
CRYPTO_ALLOWED_PRED_COLS,
|
|
16
|
-
_validate_headers,
|
|
17
|
-
validate_headers_numerai,
|
|
18
|
-
validate_headers_signals,
|
|
19
|
-
validate_headers_crypto,
|
|
20
|
-
validate_values,
|
|
21
|
-
_validate_ids,
|
|
22
|
-
validate_ids_numerai,
|
|
23
|
-
validate_ids_signals,
|
|
24
|
-
validate_ids_crypto,
|
|
25
|
-
clean_predictions,
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class TestSubmissions(unittest.TestCase):
|
|
30
|
-
def setUp(self):
|
|
31
|
-
# use 9 digits for cusip handling checks
|
|
32
|
-
self.ids = generate_ids(9, 5)
|
|
33
|
-
self.classic_subs = [
|
|
34
|
-
generate_submission(self.ids, id_col, pred_col)
|
|
35
|
-
for id_col in NUMERAI_ALLOWED_ID_COLS
|
|
36
|
-
for pred_col in NUMERAI_ALLOWED_PRED_COLS
|
|
37
|
-
]
|
|
38
|
-
self.signals_subs = [
|
|
39
|
-
generate_submission(self.ids, id_col, pred_col)
|
|
40
|
-
for id_col in SIGNALS_ALLOWED_ID_COLS
|
|
41
|
-
for pred_col in SIGNALS_ALLOWED_PRED_COLS
|
|
42
|
-
]
|
|
43
|
-
self.crypto_subs = [
|
|
44
|
-
generate_submission(self.ids, id_col, pred_col)
|
|
45
|
-
for id_col in CRYPTO_ALLOWED_ID_COLS
|
|
46
|
-
for pred_col in CRYPTO_ALLOWED_PRED_COLS
|
|
47
|
-
]
|
|
48
|
-
|
|
49
|
-
def test_validate_headers(self):
|
|
50
|
-
assert _validate_headers(
|
|
51
|
-
["test1"], ["test2"], generate_submission(self.ids, "test1", "test2")
|
|
52
|
-
) == ("test1", "test2")
|
|
53
|
-
|
|
54
|
-
def test_validate_headers_wrong_name(self):
|
|
55
|
-
self.assertRaisesRegex(
|
|
56
|
-
AssertionError,
|
|
57
|
-
"headers must be one of",
|
|
58
|
-
_validate_headers,
|
|
59
|
-
["test1"],
|
|
60
|
-
["test2"],
|
|
61
|
-
generate_submission(self.ids, "wrong", "test2"),
|
|
62
|
-
)
|
|
63
|
-
self.assertRaisesRegex(
|
|
64
|
-
AssertionError,
|
|
65
|
-
"headers must be one of",
|
|
66
|
-
_validate_headers,
|
|
67
|
-
["test1"],
|
|
68
|
-
["test2"],
|
|
69
|
-
generate_submission(self.ids, "test1", "wrong"),
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
def test_validate_headers_missing(self):
|
|
73
|
-
self.assertRaisesRegex(
|
|
74
|
-
AssertionError,
|
|
75
|
-
"headers must be one of",
|
|
76
|
-
_validate_headers,
|
|
77
|
-
["test1"],
|
|
78
|
-
["test2"],
|
|
79
|
-
generate_submission(self.ids, "test1", "test2")[["test1"]],
|
|
80
|
-
)
|
|
81
|
-
self.assertRaisesRegex(
|
|
82
|
-
AssertionError,
|
|
83
|
-
"headers must be one of",
|
|
84
|
-
_validate_headers,
|
|
85
|
-
["test1"],
|
|
86
|
-
["test2"],
|
|
87
|
-
generate_submission(self.ids, "test1", "test2")[["test2"]],
|
|
88
|
-
)
|
|
89
|
-
|
|
90
|
-
def test_validate_headers_numerai(self):
|
|
91
|
-
for sub in self.classic_subs:
|
|
92
|
-
assert validate_headers_numerai(sub) == tuple(sub.columns)
|
|
93
|
-
|
|
94
|
-
def test_validate_headers_numerai_wrong_name(self):
|
|
95
|
-
for sub in self.classic_subs:
|
|
96
|
-
self.assertRaisesRegex(
|
|
97
|
-
AssertionError,
|
|
98
|
-
"headers must be one of",
|
|
99
|
-
validate_headers_numerai,
|
|
100
|
-
sub.rename(columns={sub.columns[0]: "wrong"}),
|
|
101
|
-
)
|
|
102
|
-
self.assertRaisesRegex(
|
|
103
|
-
AssertionError,
|
|
104
|
-
"headers must be one of",
|
|
105
|
-
validate_headers_numerai,
|
|
106
|
-
sub.rename(columns={sub.columns[1]: "wrong"}),
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
def test_validate_headers_numerai_missing(self):
|
|
110
|
-
for sub in self.classic_subs:
|
|
111
|
-
self.assertRaisesRegex(
|
|
112
|
-
AssertionError,
|
|
113
|
-
"headers must be one of",
|
|
114
|
-
validate_headers_numerai,
|
|
115
|
-
sub[[sub.columns[0]]],
|
|
116
|
-
)
|
|
117
|
-
self.assertRaisesRegex(
|
|
118
|
-
AssertionError,
|
|
119
|
-
"headers must be one of",
|
|
120
|
-
validate_headers_numerai,
|
|
121
|
-
sub[[sub.columns[1]]],
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
def test_validate_headers_signals(self):
|
|
125
|
-
for sub in self.signals_subs:
|
|
126
|
-
assert validate_headers_signals(sub) == tuple(sub.columns)
|
|
127
|
-
|
|
128
|
-
def test_validate_headers_signals_wrong_name(self):
|
|
129
|
-
for sub in self.signals_subs:
|
|
130
|
-
self.assertRaisesRegex(
|
|
131
|
-
AssertionError,
|
|
132
|
-
"headers must be one of",
|
|
133
|
-
validate_headers_signals,
|
|
134
|
-
sub.rename(columns={sub.columns[0]: "wrong"}),
|
|
135
|
-
)
|
|
136
|
-
self.assertRaisesRegex(
|
|
137
|
-
AssertionError,
|
|
138
|
-
"headers must be one of",
|
|
139
|
-
validate_headers_signals,
|
|
140
|
-
sub.rename(columns={sub.columns[1]: "wrong"}),
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
def test_validate_headers_signals_missing(self):
|
|
144
|
-
for sub in self.signals_subs:
|
|
145
|
-
self.assertRaisesRegex(
|
|
146
|
-
AssertionError,
|
|
147
|
-
"headers must be one of",
|
|
148
|
-
validate_headers_signals,
|
|
149
|
-
sub[[sub.columns[0]]],
|
|
150
|
-
)
|
|
151
|
-
self.assertRaisesRegex(
|
|
152
|
-
AssertionError,
|
|
153
|
-
"headers must be one of",
|
|
154
|
-
validate_headers_signals,
|
|
155
|
-
sub[[sub.columns[1]]],
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
def test_validate_headers_signals_data_type_and_date_col(self):
|
|
159
|
-
fake_sub = generate_submission(self.ids, "ticker", "signal")
|
|
160
|
-
fake_sub["data_type"] = "signals"
|
|
161
|
-
fake_sub["friday_date"] = "2023-01-01"
|
|
162
|
-
with self.assertLogs(level="WARNING") as cm:
|
|
163
|
-
assert validate_headers_signals(fake_sub) == ("ticker", "signal")
|
|
164
|
-
self.assertIn(
|
|
165
|
-
"WARNING:numerai_tools.submissions:data_type column found in Signals submission. This is deprecated and will be removed in the future. "
|
|
166
|
-
"Please remove the data_type column from your Signals submission.",
|
|
167
|
-
cm.output[0],
|
|
168
|
-
)
|
|
169
|
-
|
|
170
|
-
def test_validate_headers_crypto(self):
|
|
171
|
-
for sub in self.crypto_subs:
|
|
172
|
-
assert validate_headers_crypto(sub) == tuple(sub.columns)
|
|
173
|
-
|
|
174
|
-
def test_validate_headers_crypto_wrong_name(self):
|
|
175
|
-
for sub in self.crypto_subs:
|
|
176
|
-
self.assertRaisesRegex(
|
|
177
|
-
AssertionError,
|
|
178
|
-
"headers must be one of",
|
|
179
|
-
validate_headers_crypto,
|
|
180
|
-
sub.rename(columns={sub.columns[0]: "wrong"}),
|
|
181
|
-
)
|
|
182
|
-
self.assertRaisesRegex(
|
|
183
|
-
AssertionError,
|
|
184
|
-
"headers must be one of",
|
|
185
|
-
validate_headers_crypto,
|
|
186
|
-
sub.rename(columns={sub.columns[1]: "wrong"}),
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
def test_validate_headers_crypto_missing(self):
|
|
190
|
-
for sub in self.crypto_subs:
|
|
191
|
-
self.assertRaisesRegex(
|
|
192
|
-
AssertionError,
|
|
193
|
-
"headers must be one of",
|
|
194
|
-
validate_headers_crypto,
|
|
195
|
-
sub[[sub.columns[0]]],
|
|
196
|
-
)
|
|
197
|
-
self.assertRaisesRegex(
|
|
198
|
-
AssertionError,
|
|
199
|
-
"headers must be one of",
|
|
200
|
-
validate_headers_crypto,
|
|
201
|
-
sub[[sub.columns[1]]],
|
|
202
|
-
)
|
|
203
|
-
|
|
204
|
-
def test_validate_values(self):
|
|
205
|
-
validate_values(generate_submission(self.ids, "id", "prediction"), "prediction")
|
|
206
|
-
|
|
207
|
-
def test_validate_values_nans(self):
|
|
208
|
-
nan_sub = generate_submission(self.ids, "id", "prediction")
|
|
209
|
-
nan_sub.loc[0, "prediction"] = np.nan
|
|
210
|
-
self.assertRaisesRegex(
|
|
211
|
-
AssertionError,
|
|
212
|
-
"must not contain NaNs",
|
|
213
|
-
validate_values,
|
|
214
|
-
nan_sub,
|
|
215
|
-
"prediction",
|
|
216
|
-
)
|
|
217
|
-
|
|
218
|
-
def test_validate_values_out_of_bounds(self):
|
|
219
|
-
out_of_bounds_sub = generate_submission(self.ids, "id", "prediction")
|
|
220
|
-
out_of_bounds_sub.loc[0, "prediction"] = -1
|
|
221
|
-
self.assertRaisesRegex(
|
|
222
|
-
AssertionError,
|
|
223
|
-
"values must be between 0 and 1 exclusive",
|
|
224
|
-
validate_values,
|
|
225
|
-
out_of_bounds_sub,
|
|
226
|
-
"prediction",
|
|
227
|
-
)
|
|
228
|
-
out_of_bounds_sub.loc[0, "prediction"] = 2
|
|
229
|
-
self.assertRaisesRegex(
|
|
230
|
-
AssertionError,
|
|
231
|
-
"values must be between 0 and 1 exclusive",
|
|
232
|
-
validate_values,
|
|
233
|
-
out_of_bounds_sub,
|
|
234
|
-
"prediction",
|
|
235
|
-
)
|
|
236
|
-
|
|
237
|
-
def test_validate_values_zero_std(self):
|
|
238
|
-
const_sub = generate_submission(self.ids, "id", "prediction")
|
|
239
|
-
const_sub["prediction"] = 0.5
|
|
240
|
-
self.assertRaisesRegex(
|
|
241
|
-
AssertionError,
|
|
242
|
-
"submission must have non-zero standard deviation",
|
|
243
|
-
validate_values,
|
|
244
|
-
const_sub,
|
|
245
|
-
"prediction",
|
|
246
|
-
)
|
|
247
|
-
|
|
248
|
-
def test_validate_ids(self):
|
|
249
|
-
sub = generate_submission(self.ids, "id", "prediction")
|
|
250
|
-
new_sub, invalid_ids = _validate_ids(self.ids, sub, "id", len(self.ids))
|
|
251
|
-
assert (new_sub == sub.sort_values("id")).all().all()
|
|
252
|
-
assert invalid_ids == []
|
|
253
|
-
|
|
254
|
-
def test_validate_ids_nans(self):
|
|
255
|
-
nan_sub = generate_submission(self.ids, "id", "prediction")
|
|
256
|
-
nan_sub.loc[0, "id"] = np.nan
|
|
257
|
-
self.assertRaisesRegex(
|
|
258
|
-
AssertionError,
|
|
259
|
-
"must not contain NaNs",
|
|
260
|
-
_validate_ids,
|
|
261
|
-
self.ids,
|
|
262
|
-
nan_sub,
|
|
263
|
-
"id",
|
|
264
|
-
len(self.ids),
|
|
265
|
-
)
|
|
266
|
-
|
|
267
|
-
def test_validate_ids_all_nan_ids(self):
|
|
268
|
-
nan_ids = pd.Series([np.nan, np.nan, np.nan])
|
|
269
|
-
submission = generate_submission(nan_ids, "id", "prediction")
|
|
270
|
-
self.assertRaisesRegex(
|
|
271
|
-
AssertionError,
|
|
272
|
-
"Submission must not contain NaNs",
|
|
273
|
-
_validate_ids,
|
|
274
|
-
self.ids,
|
|
275
|
-
submission,
|
|
276
|
-
"id",
|
|
277
|
-
len(self.ids),
|
|
278
|
-
)
|
|
279
|
-
|
|
280
|
-
def test_validate_ids_duplicates(self):
|
|
281
|
-
dup_sub = generate_submission(self.ids, "id", "prediction")
|
|
282
|
-
dup_sub.loc[0] = dup_sub.loc[1]
|
|
283
|
-
self.assertRaisesRegex(
|
|
284
|
-
AssertionError,
|
|
285
|
-
"Duplicates detected",
|
|
286
|
-
_validate_ids,
|
|
287
|
-
self.ids,
|
|
288
|
-
dup_sub,
|
|
289
|
-
"id",
|
|
290
|
-
len(self.ids),
|
|
291
|
-
)
|
|
292
|
-
|
|
293
|
-
def test_validate_ids_duplicate_ids(self):
|
|
294
|
-
submission = generate_submission(self.ids, "id", "prediction")
|
|
295
|
-
submission = pd.concat([submission, submission.iloc[:1]])
|
|
296
|
-
self.assertRaisesRegex(
|
|
297
|
-
AssertionError,
|
|
298
|
-
"Duplicates detected",
|
|
299
|
-
_validate_ids,
|
|
300
|
-
self.ids,
|
|
301
|
-
submission,
|
|
302
|
-
"id",
|
|
303
|
-
len(self.ids),
|
|
304
|
-
)
|
|
305
|
-
|
|
306
|
-
def test_validate_ids_missing(self):
|
|
307
|
-
missing_sub = generate_submission(self.ids, "id", "prediction")
|
|
308
|
-
missing_sub = missing_sub[missing_sub["id"] != self.ids[0]]
|
|
309
|
-
self.assertRaisesRegex(
|
|
310
|
-
AssertionError,
|
|
311
|
-
"Not enough stocks submitted",
|
|
312
|
-
_validate_ids,
|
|
313
|
-
self.ids,
|
|
314
|
-
missing_sub,
|
|
315
|
-
"id",
|
|
316
|
-
len(self.ids),
|
|
317
|
-
)
|
|
318
|
-
|
|
319
|
-
def test_validate_ids_empty_submission(self):
|
|
320
|
-
empty_submission = pd.DataFrame(columns=["id", "prediction"])
|
|
321
|
-
self.assertRaisesRegex(
|
|
322
|
-
AssertionError,
|
|
323
|
-
"Not enough stocks submitted.",
|
|
324
|
-
_validate_ids,
|
|
325
|
-
self.ids,
|
|
326
|
-
empty_submission,
|
|
327
|
-
"id",
|
|
328
|
-
len(self.ids),
|
|
329
|
-
)
|
|
330
|
-
|
|
331
|
-
def test_validate_ids_all_invalid_ids(self):
|
|
332
|
-
invalid_ids = pd.Series(["invalid1", "invalid2", "invalid3"])
|
|
333
|
-
submission = generate_submission(invalid_ids, "id", "prediction")
|
|
334
|
-
self.assertRaisesRegex(
|
|
335
|
-
AssertionError,
|
|
336
|
-
"Not enough stocks submitted.",
|
|
337
|
-
_validate_ids,
|
|
338
|
-
self.ids,
|
|
339
|
-
submission,
|
|
340
|
-
"id",
|
|
341
|
-
len(self.ids),
|
|
342
|
-
)
|
|
343
|
-
|
|
344
|
-
def test_validate_ids_mixed_valid_invalid_ids(self):
|
|
345
|
-
mixed_ids = self.ids.tolist() + ["invalid1", "invalid2"]
|
|
346
|
-
submission = generate_submission(mixed_ids, "id", "prediction")
|
|
347
|
-
new_sub, invalid_ids = _validate_ids(self.ids, submission, "id", len(self.ids))
|
|
348
|
-
assert (new_sub["id"] == self.ids.sort_values()).all()
|
|
349
|
-
assert set(invalid_ids) == {"invalid1", "invalid2"}
|
|
350
|
-
|
|
351
|
-
def test_validate_ids_numerai(self):
|
|
352
|
-
sub = generate_submission(self.ids, "id", "prediction")
|
|
353
|
-
new_sub, invalid_ids = validate_ids_numerai(self.ids, sub, "id")
|
|
354
|
-
assert (new_sub == sub.sort_values("id")).all().all()
|
|
355
|
-
assert invalid_ids == []
|
|
356
|
-
|
|
357
|
-
def test_validate_ids_signals(self):
|
|
358
|
-
ids = generate_ids(9, 100)
|
|
359
|
-
sub = generate_submission(ids, "ticker", "signal")
|
|
360
|
-
new_sub, invalid_ids = validate_ids_signals(ids, sub, "ticker")
|
|
361
|
-
assert (new_sub == sub.sort_values("ticker")).all().all()
|
|
362
|
-
assert invalid_ids == []
|
|
363
|
-
|
|
364
|
-
def test_validate_ids_crypto(self):
|
|
365
|
-
ids = generate_ids(9, 100)
|
|
366
|
-
sub = generate_submission(ids, "ticker", "signal")
|
|
367
|
-
new_sub, invalid_ids = validate_ids_crypto(ids, sub, "ticker")
|
|
368
|
-
assert (new_sub == sub.sort_values("ticker")).all().all()
|
|
369
|
-
assert invalid_ids == []
|
|
370
|
-
|
|
371
|
-
def test_clean_predictions(self):
|
|
372
|
-
int_sub = generate_submission(self.ids, "id", "prediction", random_vals=False)
|
|
373
|
-
assert (
|
|
374
|
-
(
|
|
375
|
-
clean_predictions(
|
|
376
|
-
self.ids,
|
|
377
|
-
int_sub,
|
|
378
|
-
id_col="id",
|
|
379
|
-
rank_and_fill=False,
|
|
380
|
-
).reset_index()
|
|
381
|
-
== int_sub.set_index("id").sort_index().reset_index()
|
|
382
|
-
)
|
|
383
|
-
.all()
|
|
384
|
-
.all()
|
|
385
|
-
)
|
|
386
|
-
|
|
387
|
-
def test_clean_predictions_rank_and_fill(self):
|
|
388
|
-
int_sub = generate_submission(self.ids, "id", "prediction", random_vals=False)
|
|
389
|
-
assert np.isclose(
|
|
390
|
-
clean_predictions(
|
|
391
|
-
self.ids,
|
|
392
|
-
int_sub,
|
|
393
|
-
id_col="id",
|
|
394
|
-
rank_and_fill=True,
|
|
395
|
-
)
|
|
396
|
-
.sort_values("prediction")
|
|
397
|
-
.values.T,
|
|
398
|
-
[[0.1, 0.3, 0.5, 0.7, 0.9]],
|
|
399
|
-
).all()
|
|
400
|
-
|
|
401
|
-
def test_clean_predictions_empty_predictions(self):
|
|
402
|
-
empty_predictions = pd.DataFrame(columns=["id", "prediction"])
|
|
403
|
-
self.assertRaisesRegex(
|
|
404
|
-
AssertionError,
|
|
405
|
-
"predictions must not be empty",
|
|
406
|
-
clean_predictions,
|
|
407
|
-
self.ids,
|
|
408
|
-
empty_predictions,
|
|
409
|
-
id_col="id",
|
|
410
|
-
rank_and_fill=False,
|
|
411
|
-
)
|
|
412
|
-
|
|
413
|
-
def test_clean_predictions_all_nan_predictions(self):
|
|
414
|
-
predictions = generate_submission(self.ids, "id", "prediction")
|
|
415
|
-
predictions["prediction"] = np.nan
|
|
416
|
-
cleaned_predictions = clean_predictions(
|
|
417
|
-
self.ids,
|
|
418
|
-
predictions,
|
|
419
|
-
id_col="id",
|
|
420
|
-
rank_and_fill=True,
|
|
421
|
-
)
|
|
422
|
-
assert (cleaned_predictions == 0.5).all().all()
|
|
423
|
-
|
|
424
|
-
def test_clean_predictions_mixed_valid_invalid_ids(self):
|
|
425
|
-
mixed_ids = self.ids.tolist() + ["invalid1", "invalid2"]
|
|
426
|
-
predictions = generate_submission(mixed_ids, "id", "prediction")
|
|
427
|
-
cleaned_predictions = clean_predictions(
|
|
428
|
-
self.ids,
|
|
429
|
-
predictions,
|
|
430
|
-
id_col="id",
|
|
431
|
-
rank_and_fill=False,
|
|
432
|
-
)
|
|
433
|
-
assert (cleaned_predictions.index == self.ids.sort_values()).all()
|
|
434
|
-
|
|
435
|
-
def test_clean_predictions_duplicate_ids(self):
|
|
436
|
-
predictions = generate_submission(self.ids, "id", "prediction")
|
|
437
|
-
predictions = pd.concat([predictions, predictions.iloc[:1]])
|
|
438
|
-
cleaned_predictions = clean_predictions(
|
|
439
|
-
self.ids,
|
|
440
|
-
predictions,
|
|
441
|
-
id_col="id",
|
|
442
|
-
rank_and_fill=False,
|
|
443
|
-
)
|
|
444
|
-
assert not cleaned_predictions.index.duplicated().any()
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
def generate_ids(id_length: int, num_rows: int) -> pd.Series:
|
|
448
|
-
"""Generates a given number of unique ascii-valued strings of a given length.
|
|
449
|
-
|
|
450
|
-
Arguments:
|
|
451
|
-
id_length -- integer length of the id
|
|
452
|
-
num_rows -- integer number of rows to generate
|
|
453
|
-
|
|
454
|
-
Return List[str]:
|
|
455
|
-
- list of unique ascii-valued strings of the given
|
|
456
|
-
"""
|
|
457
|
-
values: set[str] = set()
|
|
458
|
-
while len(values) < num_rows:
|
|
459
|
-
new_value = "".join(random.choices(string.ascii_uppercase, k=id_length))
|
|
460
|
-
values.add(new_value)
|
|
461
|
-
return pd.Series(list(values))
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
def generate_submission(
|
|
465
|
-
live_ids: List[str],
|
|
466
|
-
id_col: str,
|
|
467
|
-
pred_col: str,
|
|
468
|
-
random_vals: bool = True,
|
|
469
|
-
legacy_headers: dict = {},
|
|
470
|
-
) -> pd.DataFrame:
|
|
471
|
-
"""Generates a random vector with given columns and ids.
|
|
472
|
-
|
|
473
|
-
Arguments:
|
|
474
|
-
live_ids -- list of strings of ids
|
|
475
|
-
id_col -- string name of the id column
|
|
476
|
-
pred_col -- string name of the prediction column
|
|
477
|
-
random -- boolean whether to generate random values or sequential
|
|
478
|
-
legacy_headers -- dictionary of legacy headers to add to the submission
|
|
479
|
-
|
|
480
|
-
Return pd.DataFrame:
|
|
481
|
-
- submission DataFrame with the given columns and ids
|
|
482
|
-
"""
|
|
483
|
-
rows = []
|
|
484
|
-
for i, ticker in enumerate(live_ids):
|
|
485
|
-
if random_vals:
|
|
486
|
-
val = random.random()
|
|
487
|
-
else:
|
|
488
|
-
val = i
|
|
489
|
-
row = {id_col: ticker, pred_col: val}
|
|
490
|
-
for col, value in legacy_headers.items():
|
|
491
|
-
row[col] = value
|
|
492
|
-
rows.append(row)
|
|
493
|
-
sub = pd.DataFrame(rows)
|
|
494
|
-
return sub
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
if __name__ == "__main__":
|
|
498
|
-
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|