perspective-python 4.2.0__cp311-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- perspective/__init__.py +396 -0
- perspective/extension/finos-perspective-nbextension.json +5 -0
- perspective/handlers/__init__.py +11 -0
- perspective/handlers/aiohttp.py +61 -0
- perspective/handlers/starlette.py +55 -0
- perspective/handlers/tornado.py +184 -0
- perspective/perspective.pyd +0 -0
- perspective/templates/exported_widget.html.template +35 -0
- perspective/tests/__init__.py +11 -0
- perspective/tests/async/test_async_client.py +83 -0
- perspective/tests/async/test_websocket_client.py +124 -0
- perspective/tests/conftest.py +272 -0
- perspective/tests/core/__init__.py +11 -0
- perspective/tests/core/test_async.py +351 -0
- perspective/tests/multi_threaded/__init__.py +11 -0
- perspective/tests/multi_threaded/test_multi_threaded.py +201 -0
- perspective/tests/server/__init__.py +11 -0
- perspective/tests/server/test_server.py +1016 -0
- perspective/tests/server/test_session.py +110 -0
- perspective/tests/table/__init__.py +11 -0
- perspective/tests/table/arrow/date32.arrow +0 -0
- perspective/tests/table/arrow/date64.arrow +0 -0
- perspective/tests/table/arrow/dict.arrow +0 -0
- perspective/tests/table/arrow/dict_update.arrow +0 -0
- perspective/tests/table/arrow/int_float_str.arrow +0 -0
- perspective/tests/table/arrow/int_float_str_file.arrow +0 -0
- perspective/tests/table/arrow/int_float_str_update.arrow +0 -0
- perspective/tests/table/object_sequence.py +402 -0
- perspective/tests/table/test_column_paths.py +89 -0
- perspective/tests/table/test_delete.py +124 -0
- perspective/tests/table/test_exception.py +65 -0
- perspective/tests/table/test_leaks.py +54 -0
- perspective/tests/table/test_ports.py +178 -0
- perspective/tests/table/test_remove.py +102 -0
- perspective/tests/table/test_table.py +641 -0
- perspective/tests/table/test_table_arrow.py +503 -0
- perspective/tests/table/test_table_datetime.py +2409 -0
- perspective/tests/table/test_table_infer.py +201 -0
- perspective/tests/table/test_table_limit.py +45 -0
- perspective/tests/table/test_table_numpy.py +1022 -0
- perspective/tests/table/test_table_pandas.py +1018 -0
- perspective/tests/table/test_table_polars.py +251 -0
- perspective/tests/table/test_table_view_table.py +130 -0
- perspective/tests/table/test_to_arrow.py +417 -0
- perspective/tests/table/test_to_arrow_lz4.py +32 -0
- perspective/tests/table/test_to_format.py +1024 -0
- perspective/tests/table/test_to_polars.py +26 -0
- perspective/tests/table/test_update.py +545 -0
- perspective/tests/table/test_update_arrow.py +980 -0
- perspective/tests/table/test_update_pandas.py +211 -0
- perspective/tests/table/test_view.py +2261 -0
- perspective/tests/table/test_view_expression.py +1940 -0
- perspective/tests/test_dependencies.py +53 -0
- perspective/tests/viewer/__init__.py +11 -0
- perspective/tests/viewer/test_viewer.py +246 -0
- perspective/tests/widget/__init__.py +11 -0
- perspective/tests/widget/test_widget.py +278 -0
- perspective/tests/widget/test_widget_pandas.py +453 -0
- perspective/virtual_servers/__init__.py +134 -0
- perspective/virtual_servers/clickhouse.py +245 -0
- perspective/virtual_servers/duckdb.py +236 -0
- perspective/widget/__init__.py +349 -0
- perspective/widget/viewer/__init__.py +15 -0
- perspective/widget/viewer/validate.py +22 -0
- perspective/widget/viewer/viewer.py +343 -0
- perspective/widget/viewer/viewer_traitlets.py +101 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/install.json +5 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/package.json +71 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/253.5f5c9e80605aa4106a28.js +2 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/253.5f5c9e80605aa4106a28.js.LICENSE.txt +25 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/523.c030af5d3c4f67ff83f6.js +1 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/remoteEntry.95a8ea1b44d96032833f.js +1 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/style.js +4 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/third-party-licenses.json +16 -0
- perspective_python-4.2.0.dist-info/METADATA +27 -0
- perspective_python-4.2.0.dist-info/RECORD +79 -0
- perspective_python-4.2.0.dist-info/WHEEL +4 -0
- perspective_python-4.2.0.dist-info/licenses/LICENSE.md +193 -0
- perspective_python-4.2.0.dist-info/licenses/LICENSE_THIRDPARTY_cargo.yml +17395 -0
|
@@ -0,0 +1,1018 @@
|
|
|
1
|
+
# ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
|
2
|
+
# ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃
|
|
3
|
+
# ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃
|
|
4
|
+
# ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃
|
|
5
|
+
# ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃
|
|
6
|
+
# ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
|
|
7
|
+
# ┃ Copyright (c) 2017, the Perspective Authors. ┃
|
|
8
|
+
# ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃
|
|
9
|
+
# ┃ This file is part of the Perspective library, distributed under the terms ┃
|
|
10
|
+
# ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
|
|
11
|
+
# ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
|
|
12
|
+
|
|
13
|
+
from datetime import date, datetime
|
|
14
|
+
from io import StringIO
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
from pytest import mark
|
|
18
|
+
import perspective as psp
|
|
19
|
+
|
|
20
|
+
client = psp.Server().new_local_client()
|
|
21
|
+
Table = client.table
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def arrow_bytes_to_pandas(view):
|
|
25
|
+
import pyarrow
|
|
26
|
+
|
|
27
|
+
with pyarrow.ipc.open_stream(pyarrow.BufferReader(view.to_arrow())) as reader:
|
|
28
|
+
return reader.read_pandas()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class TestTablePandas(object):
|
|
32
|
+
def test_empty_table(self):
|
|
33
|
+
tbl = Table([])
|
|
34
|
+
assert tbl.size() == 0
|
|
35
|
+
assert tbl.schema() == {}
|
|
36
|
+
|
|
37
|
+
def test_table_dataframe(self):
|
|
38
|
+
d = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
|
|
39
|
+
data = pd.DataFrame(d)
|
|
40
|
+
tbl = Table(data)
|
|
41
|
+
assert tbl.size() == 2
|
|
42
|
+
assert tbl.schema() == {"index": "integer", "a": "integer", "b": "integer"}
|
|
43
|
+
assert tbl.view().to_records() == [
|
|
44
|
+
{"a": 1, "b": 2, "index": 0},
|
|
45
|
+
{"a": 3, "b": 4, "index": 1},
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
def test_table_dataframe_column_order(self):
|
|
49
|
+
d = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}]
|
|
50
|
+
data = pd.DataFrame(d, columns=["b", "c", "a", "d"])
|
|
51
|
+
tbl = Table(data)
|
|
52
|
+
assert tbl.size() == 2
|
|
53
|
+
assert tbl.columns() == ["index", "b", "c", "a", "d"]
|
|
54
|
+
|
|
55
|
+
def test_table_dataframe_selective_column_order(self):
|
|
56
|
+
d = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}]
|
|
57
|
+
data = pd.DataFrame(d, columns=["b", "c", "a"])
|
|
58
|
+
tbl = Table(data)
|
|
59
|
+
assert tbl.size() == 2
|
|
60
|
+
assert tbl.columns() == ["index", "b", "c", "a"]
|
|
61
|
+
|
|
62
|
+
def test_table_dataframe_does_not_mutate(self):
|
|
63
|
+
# make sure we don't mutate the dataframe that a user passes in
|
|
64
|
+
data = pd.DataFrame(
|
|
65
|
+
{
|
|
66
|
+
"a": np.array([None, 1, None, 2], dtype=object),
|
|
67
|
+
"b": np.array([1.5, None, 2.5, None], dtype=object),
|
|
68
|
+
}
|
|
69
|
+
)
|
|
70
|
+
assert data["a"].tolist() == [None, 1, None, 2]
|
|
71
|
+
assert data["b"].tolist() == [1.5, None, 2.5, None]
|
|
72
|
+
|
|
73
|
+
tbl = Table(data)
|
|
74
|
+
assert tbl.size() == 4
|
|
75
|
+
assert tbl.schema() == {"index": "integer", "a": "integer", "b": "float"}
|
|
76
|
+
|
|
77
|
+
assert data["a"].tolist() == [None, 1, None, 2]
|
|
78
|
+
assert data["b"].tolist() == [1.5, None, 2.5, None]
|
|
79
|
+
|
|
80
|
+
@mark.skip(reason="Deprecated support for Series")
|
|
81
|
+
def test_table_date_series(self, util):
|
|
82
|
+
data = util.make_series(freq="D")
|
|
83
|
+
tbl = Table(data)
|
|
84
|
+
assert tbl.size() == 10
|
|
85
|
+
assert tbl.schema() == {"index": "date", "0": "float"}
|
|
86
|
+
assert tbl.view().to_columns()["index"] == [
|
|
87
|
+
util.to_timestamp(datetime(2000, 1, 1)),
|
|
88
|
+
util.to_timestamp(datetime(2000, 1, 2)),
|
|
89
|
+
util.to_timestamp(datetime(2000, 1, 3)),
|
|
90
|
+
util.to_timestamp(datetime(2000, 1, 4)),
|
|
91
|
+
util.to_timestamp(datetime(2000, 1, 5)),
|
|
92
|
+
util.to_timestamp(datetime(2000, 1, 6)),
|
|
93
|
+
util.to_timestamp(datetime(2000, 1, 7)),
|
|
94
|
+
util.to_timestamp(datetime(2000, 1, 8)),
|
|
95
|
+
util.to_timestamp(datetime(2000, 1, 9)),
|
|
96
|
+
util.to_timestamp(datetime(2000, 1, 10)),
|
|
97
|
+
]
|
|
98
|
+
|
|
99
|
+
@mark.skip(reason="Deprecated support for Series")
|
|
100
|
+
def test_table_time_series(self, util):
|
|
101
|
+
data = util.make_series(freq="H")
|
|
102
|
+
tbl = Table(data)
|
|
103
|
+
assert tbl.size() == 10
|
|
104
|
+
assert tbl.schema() == {"index": "datetime", "0": "float"}
|
|
105
|
+
assert tbl.view().to_columns()["index"] == [
|
|
106
|
+
util.to_timestamp(datetime(2000, 1, 1, 0, 0, 0)),
|
|
107
|
+
util.to_timestamp(datetime(2000, 1, 1, 1, 0, 0)),
|
|
108
|
+
util.to_timestamp(datetime(2000, 1, 1, 2, 0, 0)),
|
|
109
|
+
util.to_timestamp(datetime(2000, 1, 1, 3, 0, 0)),
|
|
110
|
+
util.to_timestamp(datetime(2000, 1, 1, 4, 0, 0)),
|
|
111
|
+
util.to_timestamp(datetime(2000, 1, 1, 5, 0, 0)),
|
|
112
|
+
util.to_timestamp(datetime(2000, 1, 1, 6, 0, 0)),
|
|
113
|
+
util.to_timestamp(datetime(2000, 1, 1, 7, 0, 0)),
|
|
114
|
+
util.to_timestamp(datetime(2000, 1, 1, 8, 0, 0)),
|
|
115
|
+
util.to_timestamp(datetime(2000, 1, 1, 9, 0, 0)),
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
@mark.skip(reason="pyarrow dataframe does not support date inference")
|
|
119
|
+
def test_table_dataframe_infer_date(self, util):
|
|
120
|
+
data = util.make_dataframe(freq="ME")
|
|
121
|
+
|
|
122
|
+
tbl = Table(data)
|
|
123
|
+
assert tbl.size() == 10
|
|
124
|
+
assert tbl.schema() == {
|
|
125
|
+
"index": "date",
|
|
126
|
+
"a": "float",
|
|
127
|
+
"b": "float",
|
|
128
|
+
"c": "float",
|
|
129
|
+
"d": "float",
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
assert tbl.view().to_columns()["index"] == [
|
|
133
|
+
util.to_timestamp(datetime(2000, 1, 31)),
|
|
134
|
+
util.to_timestamp(datetime(2000, 2, 29)),
|
|
135
|
+
util.to_timestamp(datetime(2000, 3, 31)),
|
|
136
|
+
util.to_timestamp(datetime(2000, 4, 30)),
|
|
137
|
+
util.to_timestamp(datetime(2000, 5, 31)),
|
|
138
|
+
util.to_timestamp(datetime(2000, 6, 30)),
|
|
139
|
+
util.to_timestamp(datetime(2000, 7, 31)),
|
|
140
|
+
util.to_timestamp(datetime(2000, 8, 31)),
|
|
141
|
+
util.to_timestamp(datetime(2000, 9, 30)),
|
|
142
|
+
util.to_timestamp(datetime(2000, 10, 31)),
|
|
143
|
+
]
|
|
144
|
+
|
|
145
|
+
def test_table_dataframe_infer_date_fixed(self, util):
|
|
146
|
+
data = util.make_dataframe(freq="ME")
|
|
147
|
+
|
|
148
|
+
tbl = Table(data)
|
|
149
|
+
assert tbl.size() == 10
|
|
150
|
+
assert tbl.schema() == {
|
|
151
|
+
"index": "datetime",
|
|
152
|
+
"a": "float",
|
|
153
|
+
"b": "float",
|
|
154
|
+
"c": "float",
|
|
155
|
+
"d": "float",
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
assert tbl.view().to_columns()["index"] == [
|
|
159
|
+
util.to_timestamp(datetime(2000, 1, 31)),
|
|
160
|
+
util.to_timestamp(datetime(2000, 2, 29)),
|
|
161
|
+
util.to_timestamp(datetime(2000, 3, 31)),
|
|
162
|
+
util.to_timestamp(datetime(2000, 4, 30)),
|
|
163
|
+
util.to_timestamp(datetime(2000, 5, 31)),
|
|
164
|
+
util.to_timestamp(datetime(2000, 6, 30)),
|
|
165
|
+
util.to_timestamp(datetime(2000, 7, 31)),
|
|
166
|
+
util.to_timestamp(datetime(2000, 8, 31)),
|
|
167
|
+
util.to_timestamp(datetime(2000, 9, 30)),
|
|
168
|
+
util.to_timestamp(datetime(2000, 10, 31)),
|
|
169
|
+
]
|
|
170
|
+
|
|
171
|
+
def test_table_dataframe_infer_time(self, util):
|
|
172
|
+
data = util.make_dataframe(freq="h")
|
|
173
|
+
|
|
174
|
+
tbl = Table(data)
|
|
175
|
+
assert tbl.size() == 10
|
|
176
|
+
assert tbl.schema() == {
|
|
177
|
+
"index": "datetime",
|
|
178
|
+
"a": "float",
|
|
179
|
+
"b": "float",
|
|
180
|
+
"c": "float",
|
|
181
|
+
"d": "float",
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
assert tbl.view().to_columns()["index"] == [
|
|
185
|
+
util.to_timestamp(datetime(2000, 1, 1, 0, 0, 0)),
|
|
186
|
+
util.to_timestamp(datetime(2000, 1, 1, 1, 0, 0)),
|
|
187
|
+
util.to_timestamp(datetime(2000, 1, 1, 2, 0, 0)),
|
|
188
|
+
util.to_timestamp(datetime(2000, 1, 1, 3, 0, 0)),
|
|
189
|
+
util.to_timestamp(datetime(2000, 1, 1, 4, 0, 0)),
|
|
190
|
+
util.to_timestamp(datetime(2000, 1, 1, 5, 0, 0)),
|
|
191
|
+
util.to_timestamp(datetime(2000, 1, 1, 6, 0, 0)),
|
|
192
|
+
util.to_timestamp(datetime(2000, 1, 1, 7, 0, 0)),
|
|
193
|
+
util.to_timestamp(datetime(2000, 1, 1, 8, 0, 0)),
|
|
194
|
+
util.to_timestamp(datetime(2000, 1, 1, 9, 0, 0)),
|
|
195
|
+
]
|
|
196
|
+
|
|
197
|
+
@mark.skip(reason="pyarrow dataframe does not support date inference")
|
|
198
|
+
def test_table_dataframe_year_start_index(self, util):
|
|
199
|
+
data = util.make_dataframe(freq="YS")
|
|
200
|
+
|
|
201
|
+
tbl = Table(data)
|
|
202
|
+
assert tbl.size() == 10
|
|
203
|
+
assert tbl.schema() == {
|
|
204
|
+
"index": "date",
|
|
205
|
+
"a": "float",
|
|
206
|
+
"b": "float",
|
|
207
|
+
"c": "float",
|
|
208
|
+
"d": "float",
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
assert tbl.view().to_columns()["index"] == [
|
|
212
|
+
util.to_timestamp(datetime(2000, 1, 1, 0, 0, 0)),
|
|
213
|
+
util.to_timestamp(datetime(2001, 1, 1, 0, 0, 0)),
|
|
214
|
+
util.to_timestamp(datetime(2002, 1, 1, 0, 0, 0)),
|
|
215
|
+
util.to_timestamp(datetime(2003, 1, 1, 0, 0, 0)),
|
|
216
|
+
util.to_timestamp(datetime(2004, 1, 1, 0, 0, 0)),
|
|
217
|
+
util.to_timestamp(datetime(2005, 1, 1, 0, 0, 0)),
|
|
218
|
+
util.to_timestamp(datetime(2006, 1, 1, 0, 0, 0)),
|
|
219
|
+
util.to_timestamp(datetime(2007, 1, 1, 0, 0, 0)),
|
|
220
|
+
util.to_timestamp(datetime(2008, 1, 1, 0, 0, 0)),
|
|
221
|
+
util.to_timestamp(datetime(2009, 1, 1, 0, 0, 0)),
|
|
222
|
+
]
|
|
223
|
+
|
|
224
|
+
def test_table_dataframe_year_start_index_fixed(self, util):
|
|
225
|
+
data = util.make_dataframe(freq="YS")
|
|
226
|
+
|
|
227
|
+
tbl = Table(data)
|
|
228
|
+
assert tbl.size() == 10
|
|
229
|
+
assert tbl.schema() == {
|
|
230
|
+
"index": "datetime",
|
|
231
|
+
"a": "float",
|
|
232
|
+
"b": "float",
|
|
233
|
+
"c": "float",
|
|
234
|
+
"d": "float",
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
assert tbl.view().to_columns()["index"] == [
|
|
238
|
+
util.to_timestamp(datetime(2000, 1, 1, 0, 0, 0)),
|
|
239
|
+
util.to_timestamp(datetime(2001, 1, 1, 0, 0, 0)),
|
|
240
|
+
util.to_timestamp(datetime(2002, 1, 1, 0, 0, 0)),
|
|
241
|
+
util.to_timestamp(datetime(2003, 1, 1, 0, 0, 0)),
|
|
242
|
+
util.to_timestamp(datetime(2004, 1, 1, 0, 0, 0)),
|
|
243
|
+
util.to_timestamp(datetime(2005, 1, 1, 0, 0, 0)),
|
|
244
|
+
util.to_timestamp(datetime(2006, 1, 1, 0, 0, 0)),
|
|
245
|
+
util.to_timestamp(datetime(2007, 1, 1, 0, 0, 0)),
|
|
246
|
+
util.to_timestamp(datetime(2008, 1, 1, 0, 0, 0)),
|
|
247
|
+
util.to_timestamp(datetime(2009, 1, 1, 0, 0, 0)),
|
|
248
|
+
]
|
|
249
|
+
|
|
250
|
+
@mark.skip(reason="pyarrow dataframe does not support date inference")
|
|
251
|
+
def test_table_dataframe_quarter_index(self, util):
|
|
252
|
+
data = util.make_dataframe(size=4, freq="QE")
|
|
253
|
+
|
|
254
|
+
tbl = Table(data)
|
|
255
|
+
assert tbl.size() == 4
|
|
256
|
+
assert tbl.schema() == {
|
|
257
|
+
"index": "date",
|
|
258
|
+
"a": "float",
|
|
259
|
+
"b": "float",
|
|
260
|
+
"c": "float",
|
|
261
|
+
"d": "float",
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
assert tbl.view().to_columns()["index"] == [
|
|
265
|
+
util.to_timestamp(datetime(2000, 3, 31, 0, 0, 0)),
|
|
266
|
+
util.to_timestamp(datetime(2000, 6, 30, 0, 0, 0)),
|
|
267
|
+
util.to_timestamp(datetime(2000, 9, 30, 0, 0, 0)),
|
|
268
|
+
util.to_timestamp(datetime(2000, 12, 31, 0, 0, 0)),
|
|
269
|
+
]
|
|
270
|
+
|
|
271
|
+
def test_table_dataframe_quarter_index_fixed(self, util):
|
|
272
|
+
data = util.make_dataframe(size=4, freq="QE")
|
|
273
|
+
|
|
274
|
+
tbl = Table(data)
|
|
275
|
+
assert tbl.size() == 4
|
|
276
|
+
assert tbl.schema() == {
|
|
277
|
+
"index": "datetime",
|
|
278
|
+
"a": "float",
|
|
279
|
+
"b": "float",
|
|
280
|
+
"c": "float",
|
|
281
|
+
"d": "float",
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
assert tbl.view().to_columns()["index"] == [
|
|
285
|
+
util.to_timestamp(datetime(2000, 3, 31, 0, 0, 0)),
|
|
286
|
+
util.to_timestamp(datetime(2000, 6, 30, 0, 0, 0)),
|
|
287
|
+
util.to_timestamp(datetime(2000, 9, 30, 0, 0, 0)),
|
|
288
|
+
util.to_timestamp(datetime(2000, 12, 31, 0, 0, 0)),
|
|
289
|
+
]
|
|
290
|
+
|
|
291
|
+
def test_table_dataframe_minute_index(self, util):
|
|
292
|
+
data = util.make_dataframe(size=5, freq="min")
|
|
293
|
+
|
|
294
|
+
tbl = Table(data)
|
|
295
|
+
assert tbl.size() == 5
|
|
296
|
+
assert tbl.schema() == {
|
|
297
|
+
"index": "datetime",
|
|
298
|
+
"a": "float",
|
|
299
|
+
"b": "float",
|
|
300
|
+
"c": "float",
|
|
301
|
+
"d": "float",
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
assert tbl.view().to_columns()["index"] == [
|
|
305
|
+
util.to_timestamp(datetime(2000, 1, 1, 0, 0)),
|
|
306
|
+
util.to_timestamp(datetime(2000, 1, 1, 0, 1)),
|
|
307
|
+
util.to_timestamp(datetime(2000, 1, 1, 0, 2)),
|
|
308
|
+
util.to_timestamp(datetime(2000, 1, 1, 0, 3)),
|
|
309
|
+
util.to_timestamp(datetime(2000, 1, 1, 0, 4)),
|
|
310
|
+
]
|
|
311
|
+
|
|
312
|
+
def test_table_pandas_periodindex(self, util):
|
|
313
|
+
df = util.make_period_dataframe(30)
|
|
314
|
+
tbl = Table(df)
|
|
315
|
+
|
|
316
|
+
assert tbl.size() == 30
|
|
317
|
+
assert tbl.schema() == {
|
|
318
|
+
"index": "integer",
|
|
319
|
+
"a": "float",
|
|
320
|
+
"b": "float",
|
|
321
|
+
"c": "float",
|
|
322
|
+
"d": "float",
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
assert tbl.view().to_columns()["index"][:5] == [360, 361, 362, 363, 364]
|
|
326
|
+
|
|
327
|
+
@mark.skip(reason="pyarrow does not support this")
|
|
328
|
+
def test_table_pandas_period(self, util):
|
|
329
|
+
df = pd.DataFrame(
|
|
330
|
+
{
|
|
331
|
+
"a": [
|
|
332
|
+
pd.Period("1Q2019"),
|
|
333
|
+
pd.Period("2Q2019"),
|
|
334
|
+
pd.Period("3Q2019"),
|
|
335
|
+
pd.Period("4Q2019"),
|
|
336
|
+
]
|
|
337
|
+
}
|
|
338
|
+
)
|
|
339
|
+
tbl = Table(df)
|
|
340
|
+
assert tbl.size() == 4
|
|
341
|
+
assert tbl.schema() == {"index": "integer", "a": "datetime"}
|
|
342
|
+
assert tbl.view().to_columns()["a"] == [
|
|
343
|
+
util.to_timestamp(datetime(2019, 1, 1)),
|
|
344
|
+
util.to_timestamp(datetime(2019, 4, 1)),
|
|
345
|
+
util.to_timestamp(datetime(2019, 7, 1)),
|
|
346
|
+
util.to_timestamp(datetime(2019, 10, 1)),
|
|
347
|
+
]
|
|
348
|
+
|
|
349
|
+
def test_table_pandas_from_schema_int(self):
|
|
350
|
+
data = [None, 1, None, 2, None, 3, 4]
|
|
351
|
+
df = pd.DataFrame({"a": data})
|
|
352
|
+
table = Table({"a": "integer"})
|
|
353
|
+
table.update(df)
|
|
354
|
+
assert table.view().to_columns()["a"] == data
|
|
355
|
+
|
|
356
|
+
def test_table_pandas_from_schema_bool(self):
|
|
357
|
+
data = [True, False, True, False]
|
|
358
|
+
df = pd.DataFrame({"a": data})
|
|
359
|
+
table = Table({"a": "boolean"})
|
|
360
|
+
table.update(df)
|
|
361
|
+
assert table.view().to_columns()["a"] == data
|
|
362
|
+
|
|
363
|
+
@mark.skip(reason="pyarrow does not support this")
|
|
364
|
+
def test_table_pandas_from_schema_bool_str(self):
|
|
365
|
+
data = ["True", "False", "True", "False"]
|
|
366
|
+
df = pd.DataFrame({"a": data})
|
|
367
|
+
table = Table({"a": "boolean"})
|
|
368
|
+
table.update(df)
|
|
369
|
+
assert table.view().to_columns()["a"] == [True, False, True, False]
|
|
370
|
+
|
|
371
|
+
def test_table_pandas_from_schema_float(self):
|
|
372
|
+
data = [None, 1.5, None, 2.5, None, 3.5, 4.5]
|
|
373
|
+
df = pd.DataFrame({"a": data})
|
|
374
|
+
table = Table({"a": "float"})
|
|
375
|
+
table.update(df)
|
|
376
|
+
assert table.view().to_columns()["a"] == data
|
|
377
|
+
|
|
378
|
+
def test_table_pandas_from_schema_float_all_nan(self):
|
|
379
|
+
data = [np.nan, np.nan, np.nan, np.nan]
|
|
380
|
+
df = pd.DataFrame({"a": data})
|
|
381
|
+
table = Table({"a": "float"})
|
|
382
|
+
table.update(df)
|
|
383
|
+
assert table.view().to_columns()["a"] == [None, None, None, None]
|
|
384
|
+
|
|
385
|
+
def test_table_pandas_from_schema_float_to_int(self):
|
|
386
|
+
data = [None, 1.5, None, 2.5, None, 3.5, 4.5]
|
|
387
|
+
df = pd.DataFrame({"a": data})
|
|
388
|
+
table = Table({"a": "integer"})
|
|
389
|
+
table.update(df)
|
|
390
|
+
# truncates decimal
|
|
391
|
+
assert table.view().to_columns()["a"] == [None, 1, None, 2, None, 3, 4]
|
|
392
|
+
|
|
393
|
+
def test_table_pandas_from_schema_int_to_float(self):
|
|
394
|
+
data = [None, 1, None, 2, None, 3, 4]
|
|
395
|
+
df = pd.DataFrame({"a": data})
|
|
396
|
+
table = Table({"a": "float"})
|
|
397
|
+
table.update(df)
|
|
398
|
+
assert table.view().to_columns()["a"] == [None, 1.0, None, 2.0, None, 3.0, 4.0]
|
|
399
|
+
|
|
400
|
+
def test_table_pandas_from_schema_date(self, util):
|
|
401
|
+
data = [date(2019, 8, 15), None, date(2019, 8, 16)]
|
|
402
|
+
df = pd.DataFrame({"a": data})
|
|
403
|
+
table = Table({"a": "date"})
|
|
404
|
+
table.update(df)
|
|
405
|
+
assert table.view().to_columns()["a"] == [
|
|
406
|
+
util.to_timestamp(datetime(2019, 8, 15)),
|
|
407
|
+
None,
|
|
408
|
+
util.to_timestamp(datetime(2019, 8, 16)),
|
|
409
|
+
]
|
|
410
|
+
|
|
411
|
+
def test_table_pandas_from_schema_datetime(self, util):
|
|
412
|
+
data = [
|
|
413
|
+
util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
|
|
414
|
+
None,
|
|
415
|
+
util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
|
|
416
|
+
None,
|
|
417
|
+
]
|
|
418
|
+
df = pd.DataFrame({"a": pd.to_datetime(data, unit="ms")})
|
|
419
|
+
table = Table({"a": "datetime"})
|
|
420
|
+
table.update(df)
|
|
421
|
+
assert table.view().to_columns()["a"] == data
|
|
422
|
+
|
|
423
|
+
def test_table_pandas_from_schema_datetime_timestamp_s(self, util):
|
|
424
|
+
data = [
|
|
425
|
+
util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
|
|
426
|
+
np.nan,
|
|
427
|
+
util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
|
|
428
|
+
np.nan,
|
|
429
|
+
]
|
|
430
|
+
df = pd.DataFrame({"a": pd.to_datetime(data, unit="ms")})
|
|
431
|
+
table = Table({"a": "datetime"})
|
|
432
|
+
table.update(df)
|
|
433
|
+
assert table.view().to_columns()["a"] == [
|
|
434
|
+
util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
|
|
435
|
+
None,
|
|
436
|
+
util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
|
|
437
|
+
None,
|
|
438
|
+
]
|
|
439
|
+
|
|
440
|
+
@mark.skip(reason="This is no longer relevant")
|
|
441
|
+
def test_table_pandas_from_schema_datetime_timestamp_ms(self, util):
|
|
442
|
+
data = [
|
|
443
|
+
util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)) * 1000,
|
|
444
|
+
np.nan,
|
|
445
|
+
util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
|
|
446
|
+
*1000,
|
|
447
|
+
np.nan,
|
|
448
|
+
]
|
|
449
|
+
|
|
450
|
+
df = pd.DataFrame({"a": pd.to_datetime(data, unit="ms")})
|
|
451
|
+
table = Table({"a": "datetime"})
|
|
452
|
+
table.update(df)
|
|
453
|
+
assert table.view().to_columns()["a"] == [
|
|
454
|
+
util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
|
|
455
|
+
None,
|
|
456
|
+
util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
|
|
457
|
+
None,
|
|
458
|
+
]
|
|
459
|
+
|
|
460
|
+
def test_table_pandas_from_schema_str(self):
|
|
461
|
+
data = ["a", None, "b", None, "c"]
|
|
462
|
+
df = pd.DataFrame({"a": data})
|
|
463
|
+
table = Table({"a": "string"})
|
|
464
|
+
table.update(df)
|
|
465
|
+
assert table.view().to_columns()["a"] == data
|
|
466
|
+
|
|
467
|
+
def test_table_pandas_none(self):
|
|
468
|
+
data = [None, None, None]
|
|
469
|
+
df = pd.DataFrame({"a": data})
|
|
470
|
+
table = Table(df)
|
|
471
|
+
assert table.view().to_columns()["a"] == data
|
|
472
|
+
|
|
473
|
+
def test_table_pandas_symmetric_table(self):
|
|
474
|
+
# make sure that updates are symmetric to table creation
|
|
475
|
+
df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [1.5, 2.5, 3.5, 4.5]})
|
|
476
|
+
t1 = Table(df)
|
|
477
|
+
t2 = Table({"a": "integer", "b": "float"})
|
|
478
|
+
t2.update(df)
|
|
479
|
+
assert t1.view().to_columns() == {
|
|
480
|
+
"index": [0, 1, 2, 3],
|
|
481
|
+
"a": [1, 2, 3, 4],
|
|
482
|
+
"b": [1.5, 2.5, 3.5, 4.5],
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
def test_table_pandas_symmetric_stacked_updates(self):
|
|
486
|
+
# make sure that updates are symmetric to table creation
|
|
487
|
+
df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [1.5, 2.5, 3.5, 4.5]})
|
|
488
|
+
|
|
489
|
+
t1 = Table(df)
|
|
490
|
+
t1.update(df)
|
|
491
|
+
|
|
492
|
+
t2 = Table({"a": "integer", "b": "float"})
|
|
493
|
+
t2.update(df)
|
|
494
|
+
t2.update(df)
|
|
495
|
+
|
|
496
|
+
assert t1.view().to_columns() == {
|
|
497
|
+
"index": [0, 1, 2, 3, 0, 1, 2, 3],
|
|
498
|
+
"a": [1, 2, 3, 4, 1, 2, 3, 4],
|
|
499
|
+
"b": [1.5, 2.5, 3.5, 4.5, 1.5, 2.5, 3.5, 4.5],
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
def test_table_pandas_transitive(self):
|
|
503
|
+
# serialized output -> table -> serialized output
|
|
504
|
+
records = {
|
|
505
|
+
"a": [1, 2, 3, 4],
|
|
506
|
+
"b": [1.5, 2.5, 3.5, 4.5],
|
|
507
|
+
"c": [np.nan, np.nan, "abc", np.nan],
|
|
508
|
+
"d": [None, True, None, False],
|
|
509
|
+
"e": [
|
|
510
|
+
float("nan"),
|
|
511
|
+
datetime(2019, 7, 11, 12, 30),
|
|
512
|
+
float("nan"),
|
|
513
|
+
datetime(2019, 7, 11, 12, 30),
|
|
514
|
+
],
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
df = pd.DataFrame(records)
|
|
518
|
+
t1 = Table(df)
|
|
519
|
+
out1 = arrow_bytes_to_pandas(t1.view(columns=["a", "b", "c", "d", "e"]))
|
|
520
|
+
t2 = Table(out1)
|
|
521
|
+
assert t1.schema() == t2.schema()
|
|
522
|
+
out2 = t2.view().to_columns()
|
|
523
|
+
assert t1.view().to_columns() == out2
|
|
524
|
+
|
|
525
|
+
# dtype=object should have correct inferred types
|
|
526
|
+
|
|
527
|
+
def test_table_pandas_object_to_int(self):
|
|
528
|
+
df = pd.DataFrame({"a": np.array([1, 2, None, 2, None, 3, 4], dtype=object)})
|
|
529
|
+
table = Table(df)
|
|
530
|
+
assert table.schema() == {"index": "integer", "a": "integer"}
|
|
531
|
+
assert table.view().to_columns()["a"] == [1, 2, None, 2, None, 3, 4]
|
|
532
|
+
|
|
533
|
+
def test_table_pandas_object_to_float(self):
|
|
534
|
+
df = pd.DataFrame({"a": np.array([None, 1, None, 2, None, 3, 4], dtype=object)})
|
|
535
|
+
table = Table(df)
|
|
536
|
+
assert table.schema() == {"index": "integer", "a": "integer"}
|
|
537
|
+
assert table.view().to_columns()["a"] == [None, 1.0, None, 2.0, None, 3.0, 4.0]
|
|
538
|
+
|
|
539
|
+
def test_table_pandas_object_to_bool(self):
|
|
540
|
+
df = pd.DataFrame(
|
|
541
|
+
{"a": np.array([True, False, True, False, True, False], dtype=object)}
|
|
542
|
+
)
|
|
543
|
+
table = Table(df)
|
|
544
|
+
assert table.schema() == {"index": "integer", "a": "boolean"}
|
|
545
|
+
assert table.view().to_columns()["a"] == [True, False, True, False, True, False]
|
|
546
|
+
|
|
547
|
+
def test_table_pandas_object_to_date(self, util):
|
|
548
|
+
df = pd.DataFrame(
|
|
549
|
+
{"a": np.array([date(2019, 7, 11), date(2019, 7, 12), None], dtype=object)}
|
|
550
|
+
)
|
|
551
|
+
table = Table(df)
|
|
552
|
+
assert table.schema() == {"index": "integer", "a": "date"}
|
|
553
|
+
assert table.view().to_columns()["a"] == [
|
|
554
|
+
util.to_timestamp(datetime(2019, 7, 11)),
|
|
555
|
+
util.to_timestamp(datetime(2019, 7, 12)),
|
|
556
|
+
None,
|
|
557
|
+
]
|
|
558
|
+
|
|
559
|
+
def test_table_pandas_object_to_datetime(self, util):
|
|
560
|
+
df = pd.DataFrame(
|
|
561
|
+
{
|
|
562
|
+
"a": np.array(
|
|
563
|
+
[
|
|
564
|
+
datetime(2019, 7, 11, 1, 2, 3),
|
|
565
|
+
datetime(2019, 7, 12, 1, 2, 3),
|
|
566
|
+
None,
|
|
567
|
+
],
|
|
568
|
+
dtype=object,
|
|
569
|
+
)
|
|
570
|
+
}
|
|
571
|
+
)
|
|
572
|
+
table = Table(df)
|
|
573
|
+
assert table.schema() == {"index": "integer", "a": "datetime"}
|
|
574
|
+
assert table.view().to_columns()["a"] == [
|
|
575
|
+
util.to_timestamp(datetime(2019, 7, 11, 1, 2, 3)),
|
|
576
|
+
util.to_timestamp(datetime(2019, 7, 12, 1, 2, 3)),
|
|
577
|
+
None,
|
|
578
|
+
]
|
|
579
|
+
|
|
580
|
+
def test_table_pandas_object_to_str(self):
|
|
581
|
+
df = pd.DataFrame({"a": np.array(["abc", "def", None, "ghi"], dtype=object)})
|
|
582
|
+
table = Table(df)
|
|
583
|
+
assert table.schema() == {"index": "integer", "a": "string"}
|
|
584
|
+
assert table.view().to_columns()["a"] == ["abc", "def", None, "ghi"]
|
|
585
|
+
|
|
586
|
+
# Type matching
|
|
587
|
+
|
|
588
|
+
def test_table_pandas_update_float_schema_with_int(self):
|
|
589
|
+
df = pd.DataFrame({"a": [1.5, 2.5, 3.5, 4.5], "b": [1, 2, 3, 4]})
|
|
590
|
+
|
|
591
|
+
table = Table({"a": "float", "b": "float"})
|
|
592
|
+
|
|
593
|
+
table.update(df)
|
|
594
|
+
|
|
595
|
+
assert table.view().to_columns() == {
|
|
596
|
+
"a": [1.5, 2.5, 3.5, 4.5],
|
|
597
|
+
"b": [1.0, 2.0, 3.0, 4.0],
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
def test_table_pandas_update_int32_with_int64(self):
|
|
601
|
+
df = pd.DataFrame({"a": [1, 2, 3, 4]})
|
|
602
|
+
|
|
603
|
+
table = Table({"a": [1, 2, 3, 4]})
|
|
604
|
+
|
|
605
|
+
table.update(df)
|
|
606
|
+
|
|
607
|
+
assert table.view().to_columns() == {"a": [1, 2, 3, 4, 1, 2, 3, 4]}
|
|
608
|
+
|
|
609
|
+
def test_table_pandas_update_int64_with_float(self):
|
|
610
|
+
df = pd.DataFrame({"a": [1.5, 2.5, 3.5, 4.5]})
|
|
611
|
+
|
|
612
|
+
table = Table(pd.DataFrame({"a": [1, 2, 3, 4]}))
|
|
613
|
+
|
|
614
|
+
table.update(df)
|
|
615
|
+
|
|
616
|
+
assert table.view().to_columns()["a"] == [1, 2, 3, 4, 1, 2, 3, 4]
|
|
617
|
+
|
|
618
|
+
def test_table_pandas_update_date_schema_with_datetime(self, util):
|
|
619
|
+
df = pd.DataFrame({"a": np.array([date(2019, 7, 11)])})
|
|
620
|
+
|
|
621
|
+
table = Table({"a": "date"})
|
|
622
|
+
|
|
623
|
+
table.update(df)
|
|
624
|
+
|
|
625
|
+
assert table.schema() == {"a": "date"}
|
|
626
|
+
|
|
627
|
+
assert table.view().to_columns() == {
|
|
628
|
+
"a": [util.to_timestamp(datetime(2019, 7, 11))]
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
@mark.skip(reason="Not supported by pyarrow (?)")
|
|
632
|
+
def test_table_pandas_update_datetime_schema_with_date(self, util):
|
|
633
|
+
df = pd.DataFrame({"a": np.array([date(2019, 7, 11)])})
|
|
634
|
+
table = Table({"a": "datetime"})
|
|
635
|
+
table.update(df)
|
|
636
|
+
assert table.schema() == {"a": "datetime"}
|
|
637
|
+
assert table.view().to_columns() == {
|
|
638
|
+
"a": [util.to_timestamp(datetime(2019, 7, 11, 0, 0))]
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
# Timestamps
|
|
642
|
+
|
|
643
|
+
def test_table_pandas_timestamp_to_datetime(self, util):
|
|
644
|
+
data = [
|
|
645
|
+
pd.Timestamp("2019-07-11 12:30:05"),
|
|
646
|
+
None,
|
|
647
|
+
pd.Timestamp("2019-07-11 13:30:05"),
|
|
648
|
+
None,
|
|
649
|
+
]
|
|
650
|
+
df = pd.DataFrame({"a": data})
|
|
651
|
+
table = Table(df)
|
|
652
|
+
assert table.view().to_columns()["a"] == [
|
|
653
|
+
util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
|
|
654
|
+
None,
|
|
655
|
+
util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
|
|
656
|
+
None,
|
|
657
|
+
]
|
|
658
|
+
|
|
659
|
+
def test_table_pandas_timestamp_explicit_dtype(self, util):
|
|
660
|
+
data = [
|
|
661
|
+
pd.Timestamp("2019-07-11 12:30:05"),
|
|
662
|
+
None,
|
|
663
|
+
pd.Timestamp("2019-07-11 13:30:05"),
|
|
664
|
+
None,
|
|
665
|
+
]
|
|
666
|
+
df = pd.DataFrame({"a": np.array(data, dtype="datetime64[ns]")})
|
|
667
|
+
table = Table(df)
|
|
668
|
+
assert table.view().to_columns()["a"] == [
|
|
669
|
+
util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
|
|
670
|
+
None,
|
|
671
|
+
util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
|
|
672
|
+
None,
|
|
673
|
+
]
|
|
674
|
+
|
|
675
|
+
def test_table_pandas_update_datetime_with_timestamp(self, util):
|
|
676
|
+
data = [
|
|
677
|
+
pd.Timestamp("2019-07-11 12:30:05"),
|
|
678
|
+
None,
|
|
679
|
+
pd.Timestamp("2019-07-11 13:30:05"),
|
|
680
|
+
None,
|
|
681
|
+
]
|
|
682
|
+
df = pd.DataFrame({"a": data})
|
|
683
|
+
df2 = pd.DataFrame({"a": data})
|
|
684
|
+
table = Table(df)
|
|
685
|
+
table.update(df2)
|
|
686
|
+
assert table.view().to_columns()["a"] == [
|
|
687
|
+
util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
|
|
688
|
+
None,
|
|
689
|
+
util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
|
|
690
|
+
None,
|
|
691
|
+
util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
|
|
692
|
+
None,
|
|
693
|
+
util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
|
|
694
|
+
None,
|
|
695
|
+
]
|
|
696
|
+
|
|
697
|
+
# NaN/NaT reading
|
|
698
|
+
|
|
699
|
+
def test_table_pandas_nan(self):
|
|
700
|
+
data = [np.nan, np.nan, np.nan, np.nan]
|
|
701
|
+
df = pd.DataFrame({"a": data})
|
|
702
|
+
table = Table(df)
|
|
703
|
+
assert table.view().to_columns()["a"] == [None, None, None, None]
|
|
704
|
+
|
|
705
|
+
def test_table_pandas_int_nan(self):
|
|
706
|
+
data = [np.nan, 1, np.nan, 2]
|
|
707
|
+
df = pd.DataFrame({"a": data})
|
|
708
|
+
table = Table(df)
|
|
709
|
+
assert table.view().to_columns()["a"] == [None, 1, None, 2]
|
|
710
|
+
|
|
711
|
+
def test_table_pandas_float_nan(self):
|
|
712
|
+
data = [np.nan, 1.5, np.nan, 2.5]
|
|
713
|
+
df = pd.DataFrame({"a": data})
|
|
714
|
+
table = Table(df)
|
|
715
|
+
assert table.view().to_columns()["a"] == [None, 1.5, None, 2.5]
|
|
716
|
+
|
|
717
|
+
def test_table_read_nan_int_col(self):
|
|
718
|
+
data = pd.DataFrame(
|
|
719
|
+
{"str": ["abc", float("nan"), "def"], "int": [np.nan, 1, 2]}
|
|
720
|
+
)
|
|
721
|
+
tbl = Table(data)
|
|
722
|
+
assert tbl.schema() == {
|
|
723
|
+
"index": "integer",
|
|
724
|
+
"str": "string",
|
|
725
|
+
"int": "float",
|
|
726
|
+
} # np.nan is float type - ints convert to floats when filled in
|
|
727
|
+
assert tbl.size() == 3
|
|
728
|
+
assert tbl.view().to_columns() == {
|
|
729
|
+
"index": [0, 1, 2],
|
|
730
|
+
"str": ["abc", None, "def"],
|
|
731
|
+
"int": [None, 1.0, 2.0],
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
def test_table_read_nan_float_col(self):
|
|
735
|
+
data = pd.DataFrame(
|
|
736
|
+
{"str": [float("nan"), "abc", float("nan")], "float": [np.nan, 1.5, 2.5]}
|
|
737
|
+
)
|
|
738
|
+
tbl = Table(data)
|
|
739
|
+
assert tbl.schema() == {
|
|
740
|
+
"index": "integer",
|
|
741
|
+
"str": "string",
|
|
742
|
+
"float": "float",
|
|
743
|
+
} # can only promote to string or float
|
|
744
|
+
assert tbl.size() == 3
|
|
745
|
+
assert tbl.view().to_columns() == {
|
|
746
|
+
"index": [0, 1, 2],
|
|
747
|
+
"str": [None, "abc", None],
|
|
748
|
+
"float": [None, 1.5, 2.5],
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
def test_table_read_nan_bool_col(self):
|
|
752
|
+
data = pd.DataFrame(
|
|
753
|
+
{"bool": [np.nan, True, np.nan], "bool2": [False, np.nan, True]}
|
|
754
|
+
)
|
|
755
|
+
tbl = Table(data)
|
|
756
|
+
# if np.nan begins a column, it is inferred as float and then can be promoted. if np.nan is in the values (but not at start), the column type is whatever is inferred.
|
|
757
|
+
assert tbl.schema() == {
|
|
758
|
+
"index": "integer",
|
|
759
|
+
"bool": "boolean",
|
|
760
|
+
"bool2": "boolean",
|
|
761
|
+
}
|
|
762
|
+
assert tbl.size() == 3
|
|
763
|
+
# np.nans are always serialized as None
|
|
764
|
+
assert tbl.view().to_columns() == {
|
|
765
|
+
"index": [0, 1, 2],
|
|
766
|
+
"bool": [None, True, None],
|
|
767
|
+
"bool2": [False, None, True],
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
def test_table_read_nan_date_col(self):
|
|
771
|
+
data = pd.DataFrame(
|
|
772
|
+
{"str": ["abc", "def"], "date": [float("nan"), date(2019, 7, 11)]}
|
|
773
|
+
)
|
|
774
|
+
tbl = Table(data)
|
|
775
|
+
assert tbl.schema() == {
|
|
776
|
+
"index": "integer",
|
|
777
|
+
"str": "string",
|
|
778
|
+
"date": "date",
|
|
779
|
+
} # can only promote to string or float
|
|
780
|
+
assert tbl.size() == 2
|
|
781
|
+
assert tbl.view().to_columns() == {
|
|
782
|
+
"index": [0, 1],
|
|
783
|
+
"str": ["abc", "def"],
|
|
784
|
+
"date": [None, 1562803200000],
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
def test_table_read_nan_datetime_col(self, util):
|
|
788
|
+
data = pd.DataFrame(
|
|
789
|
+
{
|
|
790
|
+
"str": ["abc", "def"],
|
|
791
|
+
"datetime": [float("nan"), datetime(2019, 7, 11, 11, 0)],
|
|
792
|
+
}
|
|
793
|
+
)
|
|
794
|
+
tbl = Table(data)
|
|
795
|
+
assert tbl.schema() == {
|
|
796
|
+
"index": "integer",
|
|
797
|
+
"str": "string",
|
|
798
|
+
"datetime": "datetime",
|
|
799
|
+
} # can only promote to string or float
|
|
800
|
+
assert tbl.size() == 2
|
|
801
|
+
assert tbl.view().to_columns() == {
|
|
802
|
+
"index": [0, 1],
|
|
803
|
+
"str": ["abc", "def"],
|
|
804
|
+
"datetime": [None, util.to_timestamp(datetime(2019, 7, 11, 11, 0))],
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
def test_table_read_nat_datetime_col(self, util):
|
|
808
|
+
data = pd.DataFrame(
|
|
809
|
+
{"str": ["abc", "def"], "datetime": ["NaT", datetime(2019, 7, 11, 11, 0)]}
|
|
810
|
+
)
|
|
811
|
+
# datetime col is `datetime` in pandas<2, `object` in pandas>=2, so convert
|
|
812
|
+
data.datetime = pd.to_datetime(data.datetime)
|
|
813
|
+
tbl = Table(data)
|
|
814
|
+
assert tbl.schema() == {
|
|
815
|
+
"index": "integer",
|
|
816
|
+
"str": "string",
|
|
817
|
+
"datetime": "datetime",
|
|
818
|
+
} # can only promote to string or float
|
|
819
|
+
assert tbl.size() == 2
|
|
820
|
+
assert tbl.view().to_columns() == {
|
|
821
|
+
"index": [0, 1],
|
|
822
|
+
"str": ["abc", "def"],
|
|
823
|
+
"datetime": [None, util.to_timestamp(datetime(2019, 7, 11, 11, 0))],
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
def test_table_read_nan_datetime_as_date_col(self, util):
|
|
827
|
+
data = pd.DataFrame(
|
|
828
|
+
{"str": ["abc", "def"], "datetime": [float("nan"), datetime(2019, 7, 11)]}
|
|
829
|
+
)
|
|
830
|
+
tbl = Table(data)
|
|
831
|
+
assert tbl.schema() == {
|
|
832
|
+
"index": "integer",
|
|
833
|
+
"str": "string",
|
|
834
|
+
"datetime": "datetime",
|
|
835
|
+
} # can only promote to string or float
|
|
836
|
+
assert tbl.size() == 2
|
|
837
|
+
assert tbl.view().to_columns() == {
|
|
838
|
+
"index": [0, 1],
|
|
839
|
+
"str": ["abc", "def"],
|
|
840
|
+
"datetime": [None, util.to_timestamp(datetime(2019, 7, 11))],
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
def test_table_read_nan_datetime_no_seconds(self, util):
|
|
844
|
+
data = pd.DataFrame(
|
|
845
|
+
{
|
|
846
|
+
"str": ["abc", "def"],
|
|
847
|
+
"datetime": [float("nan"), datetime(2019, 7, 11, 11, 0)],
|
|
848
|
+
}
|
|
849
|
+
)
|
|
850
|
+
tbl = Table(data)
|
|
851
|
+
assert tbl.schema() == {
|
|
852
|
+
"index": "integer",
|
|
853
|
+
"str": "string",
|
|
854
|
+
"datetime": "datetime",
|
|
855
|
+
} # can only promote to string or float
|
|
856
|
+
assert tbl.size() == 2
|
|
857
|
+
assert tbl.view().to_columns() == {
|
|
858
|
+
"index": [0, 1],
|
|
859
|
+
"str": ["abc", "def"],
|
|
860
|
+
"datetime": [None, util.to_timestamp(datetime(2019, 7, 11, 11, 0))],
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
def test_table_read_nan_datetime_milliseconds(self, util):
|
|
864
|
+
data = pd.DataFrame(
|
|
865
|
+
{
|
|
866
|
+
"str": ["abc", "def"],
|
|
867
|
+
"datetime": [np.nan, datetime(2019, 7, 11, 10, 30, 55)],
|
|
868
|
+
}
|
|
869
|
+
)
|
|
870
|
+
tbl = Table(data)
|
|
871
|
+
assert tbl.schema() == {
|
|
872
|
+
"index": "integer",
|
|
873
|
+
"str": "string",
|
|
874
|
+
"datetime": "datetime",
|
|
875
|
+
} # can only promote to string or float
|
|
876
|
+
assert tbl.size() == 2
|
|
877
|
+
assert tbl.view().to_columns() == {
|
|
878
|
+
"index": [0, 1],
|
|
879
|
+
"str": ["abc", "def"],
|
|
880
|
+
"datetime": [None, util.to_timestamp(datetime(2019, 7, 11, 10, 30, 55))],
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
@mark.skip(reason="lol wtf")
|
|
884
|
+
def test_table_pandas_correct_csv_nan_end(self):
|
|
885
|
+
s = "string,\nint\n,1\n,2\nabc,3"
|
|
886
|
+
csv = StringIO(s)
|
|
887
|
+
data = pd.read_csv(csv)
|
|
888
|
+
tbl = Table(data)
|
|
889
|
+
assert tbl.schema() == {"index": "integer", "str": "string", "int": "integer"}
|
|
890
|
+
assert tbl.size() == 3
|
|
891
|
+
assert tbl.view().to_columns() == {
|
|
892
|
+
"index": [0, 1, 2],
|
|
893
|
+
"str": [None, None, "abc"],
|
|
894
|
+
"int": [1, 2, 3],
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
@mark.skip(reason="lol wtf")
|
|
898
|
+
def test_table_pandas_correct_csv_nan_intermittent(self):
|
|
899
|
+
s = "string,\nfloat\nabc,\n,2\nghi,"
|
|
900
|
+
csv = StringIO(s)
|
|
901
|
+
data = pd.read_csv(csv)
|
|
902
|
+
tbl = Table(data)
|
|
903
|
+
assert tbl.schema() == {"index": "integer", "str": "string", "float": "float"}
|
|
904
|
+
assert tbl.size() == 3
|
|
905
|
+
assert tbl.view().to_columns() == {
|
|
906
|
+
"index": [0, 1, 2],
|
|
907
|
+
"str": ["abc", None, "ghi"],
|
|
908
|
+
"float": [None, 2, None],
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
@mark.skip(reason="pyarrow does not support series")
|
|
912
|
+
def test_table_series(self):
|
|
913
|
+
import pandas as pd
|
|
914
|
+
|
|
915
|
+
data = pd.Series([1, 2, 3], name="a")
|
|
916
|
+
tbl = Table(data)
|
|
917
|
+
assert tbl.size() == 3
|
|
918
|
+
|
|
919
|
+
@mark.skip(reason="pyarrow does not support series")
|
|
920
|
+
def test_table_indexed_series(self):
|
|
921
|
+
import pandas as pd
|
|
922
|
+
|
|
923
|
+
data = pd.Series([1, 2, 3], index=["a", "b", "c"], name="a")
|
|
924
|
+
tbl = Table(data)
|
|
925
|
+
assert tbl.schema() == {"index": "string", "a": "integer"}
|
|
926
|
+
assert tbl.size() == 3
|
|
927
|
+
|
|
928
|
+
def test_groupbys(self, superstore):
|
|
929
|
+
df_pivoted = superstore.set_index(["Country", "Region"])
|
|
930
|
+
table = Table(df_pivoted)
|
|
931
|
+
columns = table.columns()
|
|
932
|
+
assert table.size() == 100
|
|
933
|
+
assert "Country" in columns
|
|
934
|
+
assert "Region" in columns
|
|
935
|
+
|
|
936
|
+
def test_pivottable(self, superstore):
|
|
937
|
+
pt = pd.pivot_table(
|
|
938
|
+
superstore,
|
|
939
|
+
values="Discount",
|
|
940
|
+
index=["Country", "Region"],
|
|
941
|
+
columns="Category",
|
|
942
|
+
)
|
|
943
|
+
table = Table(pt)
|
|
944
|
+
columns = table.columns()
|
|
945
|
+
assert "Country" in columns
|
|
946
|
+
assert "Region" in columns
|
|
947
|
+
|
|
948
|
+
@mark.skip(reason="TODO move this to Python")
|
|
949
|
+
def test_splitbys(self):
|
|
950
|
+
arrays = [
|
|
951
|
+
np.array(
|
|
952
|
+
[
|
|
953
|
+
"bar",
|
|
954
|
+
"bar",
|
|
955
|
+
"bar",
|
|
956
|
+
"bar",
|
|
957
|
+
"baz",
|
|
958
|
+
"baz",
|
|
959
|
+
"baz",
|
|
960
|
+
"baz",
|
|
961
|
+
"foo",
|
|
962
|
+
"foo",
|
|
963
|
+
"foo",
|
|
964
|
+
"foo",
|
|
965
|
+
"qux",
|
|
966
|
+
"qux",
|
|
967
|
+
"qux",
|
|
968
|
+
"qux",
|
|
969
|
+
]
|
|
970
|
+
),
|
|
971
|
+
np.array(
|
|
972
|
+
[
|
|
973
|
+
"one",
|
|
974
|
+
"one",
|
|
975
|
+
"two",
|
|
976
|
+
"two",
|
|
977
|
+
"one",
|
|
978
|
+
"one",
|
|
979
|
+
"two",
|
|
980
|
+
"two",
|
|
981
|
+
"one",
|
|
982
|
+
"one",
|
|
983
|
+
"two",
|
|
984
|
+
"two",
|
|
985
|
+
"one",
|
|
986
|
+
"one",
|
|
987
|
+
"two",
|
|
988
|
+
"two",
|
|
989
|
+
]
|
|
990
|
+
),
|
|
991
|
+
np.array(
|
|
992
|
+
[
|
|
993
|
+
"X",
|
|
994
|
+
"Y",
|
|
995
|
+
"X",
|
|
996
|
+
"Y",
|
|
997
|
+
"X",
|
|
998
|
+
"Y",
|
|
999
|
+
"X",
|
|
1000
|
+
"Y",
|
|
1001
|
+
"X",
|
|
1002
|
+
"Y",
|
|
1003
|
+
"X",
|
|
1004
|
+
"Y",
|
|
1005
|
+
"X",
|
|
1006
|
+
"Y",
|
|
1007
|
+
"X",
|
|
1008
|
+
"Y",
|
|
1009
|
+
]
|
|
1010
|
+
),
|
|
1011
|
+
]
|
|
1012
|
+
tuples = list(zip(*arrays))
|
|
1013
|
+
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second", "third"])
|
|
1014
|
+
df_both = pd.DataFrame(
|
|
1015
|
+
np.random.randn(3, 16), index=["A", "B", "C"], columns=index
|
|
1016
|
+
)
|
|
1017
|
+
table = Table(df_both)
|
|
1018
|
+
assert table.size() == 48
|