perspective-python 4.2.0__cp311-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- perspective/__init__.py +396 -0
- perspective/extension/finos-perspective-nbextension.json +5 -0
- perspective/handlers/__init__.py +11 -0
- perspective/handlers/aiohttp.py +61 -0
- perspective/handlers/starlette.py +55 -0
- perspective/handlers/tornado.py +184 -0
- perspective/perspective.pyd +0 -0
- perspective/templates/exported_widget.html.template +35 -0
- perspective/tests/__init__.py +11 -0
- perspective/tests/async/test_async_client.py +83 -0
- perspective/tests/async/test_websocket_client.py +124 -0
- perspective/tests/conftest.py +272 -0
- perspective/tests/core/__init__.py +11 -0
- perspective/tests/core/test_async.py +351 -0
- perspective/tests/multi_threaded/__init__.py +11 -0
- perspective/tests/multi_threaded/test_multi_threaded.py +201 -0
- perspective/tests/server/__init__.py +11 -0
- perspective/tests/server/test_server.py +1016 -0
- perspective/tests/server/test_session.py +110 -0
- perspective/tests/table/__init__.py +11 -0
- perspective/tests/table/arrow/date32.arrow +0 -0
- perspective/tests/table/arrow/date64.arrow +0 -0
- perspective/tests/table/arrow/dict.arrow +0 -0
- perspective/tests/table/arrow/dict_update.arrow +0 -0
- perspective/tests/table/arrow/int_float_str.arrow +0 -0
- perspective/tests/table/arrow/int_float_str_file.arrow +0 -0
- perspective/tests/table/arrow/int_float_str_update.arrow +0 -0
- perspective/tests/table/object_sequence.py +402 -0
- perspective/tests/table/test_column_paths.py +89 -0
- perspective/tests/table/test_delete.py +124 -0
- perspective/tests/table/test_exception.py +65 -0
- perspective/tests/table/test_leaks.py +54 -0
- perspective/tests/table/test_ports.py +178 -0
- perspective/tests/table/test_remove.py +102 -0
- perspective/tests/table/test_table.py +641 -0
- perspective/tests/table/test_table_arrow.py +503 -0
- perspective/tests/table/test_table_datetime.py +2409 -0
- perspective/tests/table/test_table_infer.py +201 -0
- perspective/tests/table/test_table_limit.py +45 -0
- perspective/tests/table/test_table_numpy.py +1022 -0
- perspective/tests/table/test_table_pandas.py +1018 -0
- perspective/tests/table/test_table_polars.py +251 -0
- perspective/tests/table/test_table_view_table.py +130 -0
- perspective/tests/table/test_to_arrow.py +417 -0
- perspective/tests/table/test_to_arrow_lz4.py +32 -0
- perspective/tests/table/test_to_format.py +1024 -0
- perspective/tests/table/test_to_polars.py +26 -0
- perspective/tests/table/test_update.py +545 -0
- perspective/tests/table/test_update_arrow.py +980 -0
- perspective/tests/table/test_update_pandas.py +211 -0
- perspective/tests/table/test_view.py +2261 -0
- perspective/tests/table/test_view_expression.py +1940 -0
- perspective/tests/test_dependencies.py +53 -0
- perspective/tests/viewer/__init__.py +11 -0
- perspective/tests/viewer/test_viewer.py +246 -0
- perspective/tests/widget/__init__.py +11 -0
- perspective/tests/widget/test_widget.py +278 -0
- perspective/tests/widget/test_widget_pandas.py +453 -0
- perspective/virtual_servers/__init__.py +134 -0
- perspective/virtual_servers/clickhouse.py +245 -0
- perspective/virtual_servers/duckdb.py +236 -0
- perspective/widget/__init__.py +349 -0
- perspective/widget/viewer/__init__.py +15 -0
- perspective/widget/viewer/validate.py +22 -0
- perspective/widget/viewer/viewer.py +343 -0
- perspective/widget/viewer/viewer_traitlets.py +101 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/install.json +5 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/package.json +71 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/253.5f5c9e80605aa4106a28.js +2 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/253.5f5c9e80605aa4106a28.js.LICENSE.txt +25 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/523.c030af5d3c4f67ff83f6.js +1 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/remoteEntry.95a8ea1b44d96032833f.js +1 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/style.js +4 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/third-party-licenses.json +16 -0
- perspective_python-4.2.0.dist-info/METADATA +27 -0
- perspective_python-4.2.0.dist-info/RECORD +79 -0
- perspective_python-4.2.0.dist-info/WHEEL +4 -0
- perspective_python-4.2.0.dist-info/licenses/LICENSE.md +193 -0
- perspective_python-4.2.0.dist-info/licenses/LICENSE_THIRDPARTY_cargo.yml +17395 -0
|
@@ -0,0 +1,980 @@
|
|
|
1
|
+
# ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
|
2
|
+
# ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃
|
|
3
|
+
# ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃
|
|
4
|
+
# ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃
|
|
5
|
+
# ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃
|
|
6
|
+
# ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
|
|
7
|
+
# ┃ Copyright (c) 2017, the Perspective Authors. ┃
|
|
8
|
+
# ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃
|
|
9
|
+
# ┃ This file is part of the Perspective library, distributed under the terms ┃
|
|
10
|
+
# ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
|
|
11
|
+
# ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
|
|
12
|
+
|
|
13
|
+
import os
|
|
14
|
+
import random
|
|
15
|
+
import uuid
|
|
16
|
+
import pyarrow as pa
|
|
17
|
+
import pandas as pd
|
|
18
|
+
import numpy as np
|
|
19
|
+
from datetime import date, datetime
|
|
20
|
+
from pytest import mark
|
|
21
|
+
import perspective as psp
|
|
22
|
+
|
|
23
|
+
client = psp.Server().new_local_client()
|
|
24
|
+
Table = client.table
|
|
25
|
+
|
|
26
|
+
SOURCE_STREAM_ARROW = os.path.join(
|
|
27
|
+
os.path.dirname(__file__), "arrow", "int_float_str.arrow"
|
|
28
|
+
)
|
|
29
|
+
SOURCE_FILE_ARROW = os.path.join(
|
|
30
|
+
os.path.dirname(__file__), "arrow", "int_float_str.arrow"
|
|
31
|
+
)
|
|
32
|
+
PARTIAL_ARROW = os.path.join(
|
|
33
|
+
os.path.dirname(__file__), "arrow", "int_float_str_update.arrow"
|
|
34
|
+
)
|
|
35
|
+
DICT_ARROW = os.path.join(os.path.dirname(__file__), "arrow", "dict.arrow")
|
|
36
|
+
DICT_UPDATE_ARROW = os.path.join(
|
|
37
|
+
os.path.dirname(__file__), "arrow", "dict_update.arrow"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
names = ["a", "b", "c", "d"]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class TestUpdateArrow(object):
|
|
44
|
+
# files
|
|
45
|
+
|
|
46
|
+
def test_update_arrow_updates_stream_file(self):
|
|
47
|
+
tbl = Table({"a": "integer", "b": "float", "c": "string"})
|
|
48
|
+
|
|
49
|
+
with open(SOURCE_STREAM_ARROW, mode="rb") as file: # b is important -> binary
|
|
50
|
+
tbl.update(file.read())
|
|
51
|
+
assert tbl.size() == 4
|
|
52
|
+
assert tbl.schema() == {"a": "integer", "b": "float", "c": "string"}
|
|
53
|
+
|
|
54
|
+
with open(SOURCE_FILE_ARROW, mode="rb") as file:
|
|
55
|
+
tbl.update(file.read())
|
|
56
|
+
assert tbl.size() == 8
|
|
57
|
+
assert tbl.view().to_columns() == {
|
|
58
|
+
"a": [1, 2, 3, 4] * 2,
|
|
59
|
+
"b": [1.5, 2.5, 3.5, 4.5] * 2,
|
|
60
|
+
"c": ["a", "b", "c", "d"] * 2,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
def test_update_arrow_partial_updates_file(self):
|
|
64
|
+
tbl = Table({"a": "integer", "b": "float", "c": "string"}, index="a")
|
|
65
|
+
|
|
66
|
+
with open(SOURCE_STREAM_ARROW, mode="rb") as src:
|
|
67
|
+
tbl.update(src.read())
|
|
68
|
+
assert tbl.size() == 4
|
|
69
|
+
|
|
70
|
+
with open(PARTIAL_ARROW, mode="rb") as partial:
|
|
71
|
+
tbl.update(partial.read())
|
|
72
|
+
assert tbl.size() == 4
|
|
73
|
+
assert tbl.view().to_columns() == {
|
|
74
|
+
"a": [1, 2, 3, 4],
|
|
75
|
+
"b": [100.5, 2.5, 3.5, 400.5],
|
|
76
|
+
"c": ["x", "b", "c", "y"],
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
def test_update_arrow_updates_dict_file(self):
|
|
80
|
+
tbl = Table({"a": "string", "b": "string"})
|
|
81
|
+
|
|
82
|
+
with open(DICT_ARROW, mode="rb") as src:
|
|
83
|
+
tbl.update(src.read())
|
|
84
|
+
assert tbl.size() == 5
|
|
85
|
+
|
|
86
|
+
with open(DICT_UPDATE_ARROW, mode="rb") as partial:
|
|
87
|
+
tbl.update(partial.read())
|
|
88
|
+
assert tbl.size() == 8
|
|
89
|
+
assert tbl.view().to_columns() == {
|
|
90
|
+
"a": ["abc", "def", "def", None, "abc", None, "update1", "update2"],
|
|
91
|
+
"b": ["klm", "hij", None, "hij", "klm", "update3", None, "update4"],
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
@mark.skip
|
|
95
|
+
def test_update_arrow_updates_dict_partial_file(self):
|
|
96
|
+
tbl = None
|
|
97
|
+
v = None
|
|
98
|
+
|
|
99
|
+
with open(DICT_ARROW, mode="rb") as src:
|
|
100
|
+
tbl = Table(src.read(), index="a")
|
|
101
|
+
v = tbl.view()
|
|
102
|
+
assert v.num_rows() == 2
|
|
103
|
+
assert v.to_columns() == {"a": ["abc", "def"], "b": ["klm", "hij"]}
|
|
104
|
+
|
|
105
|
+
with open(DICT_UPDATE_ARROW, mode="rb") as partial:
|
|
106
|
+
tbl.update(partial.read())
|
|
107
|
+
v.num_rows() == 4
|
|
108
|
+
assert v.to_columns() == {
|
|
109
|
+
"a": ["abc", "def", "update1", "update2"],
|
|
110
|
+
"b": ["klm", "hij", None, "update4"],
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# update with file arrow with more columns than in schema
|
|
114
|
+
|
|
115
|
+
def test_update_arrow_updates_more_columns_stream_file(self):
|
|
116
|
+
tbl = Table(
|
|
117
|
+
{
|
|
118
|
+
"a": "integer",
|
|
119
|
+
"b": "float",
|
|
120
|
+
}
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
with open(SOURCE_STREAM_ARROW, mode="rb") as file: # b is important -> binary
|
|
124
|
+
tbl.update(file.read())
|
|
125
|
+
assert tbl.size() == 4
|
|
126
|
+
assert tbl.schema() == {"a": "integer", "b": "float"}
|
|
127
|
+
|
|
128
|
+
with open(SOURCE_FILE_ARROW, mode="rb") as file:
|
|
129
|
+
tbl.update(file.read())
|
|
130
|
+
assert tbl.size() == 8
|
|
131
|
+
assert tbl.view().to_columns() == {
|
|
132
|
+
"a": [1, 2, 3, 4] * 2,
|
|
133
|
+
"b": [1.5, 2.5, 3.5, 4.5] * 2,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
def test_update_arrow_partial_updates_more_columns_file(self):
|
|
137
|
+
tbl = Table({"a": "integer", "c": "string"}, index="a")
|
|
138
|
+
|
|
139
|
+
with open(SOURCE_STREAM_ARROW, mode="rb") as src:
|
|
140
|
+
tbl.update(src.read())
|
|
141
|
+
assert tbl.size() == 4
|
|
142
|
+
|
|
143
|
+
with open(PARTIAL_ARROW, mode="rb") as partial:
|
|
144
|
+
tbl.update(partial.read())
|
|
145
|
+
assert tbl.size() == 4
|
|
146
|
+
assert tbl.view().to_columns() == {
|
|
147
|
+
"a": [1, 2, 3, 4],
|
|
148
|
+
"c": ["x", "b", "c", "y"],
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
def test_update_arrow_updates_dict_more_columns_file(self):
|
|
152
|
+
tbl = Table(
|
|
153
|
+
{
|
|
154
|
+
"a": "string",
|
|
155
|
+
}
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
with open(DICT_ARROW, mode="rb") as src:
|
|
159
|
+
tbl.update(src.read())
|
|
160
|
+
assert tbl.size() == 5
|
|
161
|
+
|
|
162
|
+
with open(DICT_UPDATE_ARROW, mode="rb") as partial:
|
|
163
|
+
tbl.update(partial.read())
|
|
164
|
+
assert tbl.size() == 8
|
|
165
|
+
assert tbl.view().to_columns() == {
|
|
166
|
+
"a": ["abc", "def", "def", None, "abc", None, "update1", "update2"]
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
@mark.skip
|
|
170
|
+
def test_update_arrow_updates_dict_more_columns_partial_file(self):
|
|
171
|
+
tbl = Table({"a": "string"}, index="a")
|
|
172
|
+
|
|
173
|
+
with open(DICT_ARROW, mode="rb") as src:
|
|
174
|
+
tbl.update(src.read())
|
|
175
|
+
assert tbl.size() == 4
|
|
176
|
+
|
|
177
|
+
with open(DICT_UPDATE_ARROW, mode="rb") as partial:
|
|
178
|
+
tbl.update(partial.read())
|
|
179
|
+
assert tbl.size() == 4
|
|
180
|
+
assert tbl.view().to_columns() == {
|
|
181
|
+
"a": ["abc", "def", "update1", "update2"]
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
# update with file arrow with less columns than in schema
|
|
185
|
+
|
|
186
|
+
def test_update_arrow_updates_less_columns_stream_file(self):
|
|
187
|
+
tbl = Table(
|
|
188
|
+
{
|
|
189
|
+
"a": "integer",
|
|
190
|
+
"x": "float",
|
|
191
|
+
}
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
with open(SOURCE_STREAM_ARROW, mode="rb") as file: # b is important -> binary
|
|
195
|
+
tbl.update(file.read())
|
|
196
|
+
assert tbl.size() == 4
|
|
197
|
+
assert tbl.schema() == {"a": "integer", "x": "float"}
|
|
198
|
+
|
|
199
|
+
with open(SOURCE_FILE_ARROW, mode="rb") as file:
|
|
200
|
+
tbl.update(file.read())
|
|
201
|
+
assert tbl.size() == 8
|
|
202
|
+
assert tbl.view().to_columns() == {
|
|
203
|
+
"a": [1, 2, 3, 4] * 2,
|
|
204
|
+
"x": [None for i in range(8)],
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
def test_update_arrow_partial_updates_less_columns_file(self):
|
|
208
|
+
tbl = Table({"a": "integer", "x": "string"}, index="a")
|
|
209
|
+
|
|
210
|
+
with open(SOURCE_STREAM_ARROW, mode="rb") as src:
|
|
211
|
+
tbl.update(src.read())
|
|
212
|
+
assert tbl.size() == 4
|
|
213
|
+
|
|
214
|
+
with open(PARTIAL_ARROW, mode="rb") as partial:
|
|
215
|
+
tbl.update(partial.read())
|
|
216
|
+
assert tbl.size() == 4
|
|
217
|
+
assert tbl.view().to_columns() == {
|
|
218
|
+
"a": [1, 2, 3, 4],
|
|
219
|
+
"x": [None for i in range(4)],
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
def test_update_arrow_updates_dict_less_columns_file(self):
|
|
223
|
+
tbl = Table({"a": "string", "x": "string"})
|
|
224
|
+
|
|
225
|
+
with open(DICT_ARROW, mode="rb") as src:
|
|
226
|
+
tbl.update(src.read())
|
|
227
|
+
assert tbl.size() == 5
|
|
228
|
+
|
|
229
|
+
with open(DICT_UPDATE_ARROW, mode="rb") as partial:
|
|
230
|
+
tbl.update(partial.read())
|
|
231
|
+
assert tbl.size() == 8
|
|
232
|
+
assert tbl.view().to_columns() == {
|
|
233
|
+
"a": ["abc", "def", "def", None, "abc", None, "update1", "update2"],
|
|
234
|
+
"x": [None for i in range(8)],
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
@mark.skip
|
|
238
|
+
def test_update_arrow_updates_dict_less_columns_partial_file(self):
|
|
239
|
+
tbl = Table({"a": "string", "x": "string"}, index="a")
|
|
240
|
+
|
|
241
|
+
with open(DICT_ARROW, mode="rb") as src:
|
|
242
|
+
tbl.update(src.read())
|
|
243
|
+
assert tbl.size() == 4
|
|
244
|
+
|
|
245
|
+
with open(DICT_UPDATE_ARROW, mode="rb") as partial:
|
|
246
|
+
tbl.update(partial.read())
|
|
247
|
+
assert tbl.size() == 4
|
|
248
|
+
assert tbl.view().to_columns() == {
|
|
249
|
+
"a": ["abc", "def", "update1", "update2"],
|
|
250
|
+
"x": [None for i in range(4)],
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
# update int schema with int
|
|
254
|
+
|
|
255
|
+
def test_update_arrow_update_int_schema_with_uint8(self, util):
|
|
256
|
+
array = [random.randint(0, 127) for i in range(100)]
|
|
257
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.uint8)})
|
|
258
|
+
|
|
259
|
+
schema = pa.schema({"a": pa.uint8()})
|
|
260
|
+
|
|
261
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
262
|
+
tbl = Table({"a": "integer"})
|
|
263
|
+
tbl.update(arrow)
|
|
264
|
+
assert tbl.view().to_columns()["a"] == array
|
|
265
|
+
|
|
266
|
+
def test_update_arrow_update_int_schema_with_uint16(self, util):
|
|
267
|
+
array = [random.randint(0, 32767) for i in range(100)]
|
|
268
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.uint16)})
|
|
269
|
+
|
|
270
|
+
schema = pa.schema({"a": pa.uint16()})
|
|
271
|
+
|
|
272
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
273
|
+
tbl = Table({"a": "integer"})
|
|
274
|
+
tbl.update(arrow)
|
|
275
|
+
assert tbl.view().to_columns()["a"] == array
|
|
276
|
+
|
|
277
|
+
def test_update_arrow_update_int_schema_with_uint32(self, util):
|
|
278
|
+
array = [random.randint(0, 2000000) for i in range(100)]
|
|
279
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.uint32)})
|
|
280
|
+
|
|
281
|
+
schema = pa.schema({"a": pa.uint32()})
|
|
282
|
+
|
|
283
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
284
|
+
tbl = Table({"a": "integer"})
|
|
285
|
+
tbl.update(arrow)
|
|
286
|
+
assert tbl.view().to_columns()["a"] == array
|
|
287
|
+
|
|
288
|
+
def test_update_arrow_update_int_schema_with_uint64(self, util):
|
|
289
|
+
array = [random.randint(0, 20000000) for i in range(100)]
|
|
290
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.uint64)})
|
|
291
|
+
|
|
292
|
+
schema = pa.schema({"a": pa.uint64()})
|
|
293
|
+
|
|
294
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
295
|
+
tbl = Table({"a": "integer"})
|
|
296
|
+
tbl.update(arrow)
|
|
297
|
+
assert tbl.view().to_columns()["a"] == array
|
|
298
|
+
|
|
299
|
+
def test_update_arrow_update_int_schema_with_int8(self, util):
|
|
300
|
+
array = [random.randint(-127, 127) for i in range(100)]
|
|
301
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.int8)})
|
|
302
|
+
|
|
303
|
+
schema = pa.schema({"a": pa.int8()})
|
|
304
|
+
|
|
305
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
306
|
+
tbl = Table({"a": "integer"})
|
|
307
|
+
tbl.update(arrow)
|
|
308
|
+
assert tbl.view().to_columns()["a"] == array
|
|
309
|
+
|
|
310
|
+
def test_update_arrow_update_int_schema_with_int16(self, util):
|
|
311
|
+
array = [random.randint(-32767, 32767) for i in range(100)]
|
|
312
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.int16)})
|
|
313
|
+
|
|
314
|
+
schema = pa.schema({"a": pa.int16()})
|
|
315
|
+
|
|
316
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
317
|
+
tbl = Table({"a": "integer"})
|
|
318
|
+
tbl.update(arrow)
|
|
319
|
+
assert tbl.view().to_columns()["a"] == array
|
|
320
|
+
|
|
321
|
+
def test_update_arrow_update_int_schema_with_int32(self, util):
|
|
322
|
+
array = [random.randint(-2000000, 2000000) for i in range(100)]
|
|
323
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.int32)})
|
|
324
|
+
|
|
325
|
+
schema = pa.schema({"a": pa.int32()})
|
|
326
|
+
|
|
327
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
328
|
+
tbl = Table({"a": "integer"})
|
|
329
|
+
tbl.update(arrow)
|
|
330
|
+
assert tbl.view().to_columns()["a"] == array
|
|
331
|
+
|
|
332
|
+
def test_update_arrow_update_int_schema_with_int64(self, util):
|
|
333
|
+
array = [random.randint(-20000000, 20000000) for i in range(100)]
|
|
334
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.int64)})
|
|
335
|
+
|
|
336
|
+
schema = pa.schema({"a": pa.int64()})
|
|
337
|
+
|
|
338
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
339
|
+
tbl = Table({"a": "integer"})
|
|
340
|
+
tbl.update(arrow)
|
|
341
|
+
assert tbl.view().to_columns()["a"] == [x * 1.0 for x in array]
|
|
342
|
+
|
|
343
|
+
# updating float schema with int
|
|
344
|
+
|
|
345
|
+
def test_update_arrow_update_float_schema_with_uint8(self, util):
|
|
346
|
+
array = [random.randint(0, 127) for i in range(100)]
|
|
347
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.uint8)})
|
|
348
|
+
|
|
349
|
+
schema = pa.schema({"a": pa.uint8()})
|
|
350
|
+
|
|
351
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
352
|
+
tbl = Table({"a": "float"})
|
|
353
|
+
tbl.update(arrow)
|
|
354
|
+
assert tbl.view().to_columns()["a"] == array
|
|
355
|
+
|
|
356
|
+
def test_update_arrow_update_float_schema_with_uint16(self, util):
|
|
357
|
+
array = [random.randint(0, 32767) for i in range(100)]
|
|
358
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.uint16)})
|
|
359
|
+
|
|
360
|
+
schema = pa.schema({"a": pa.uint16()})
|
|
361
|
+
|
|
362
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
363
|
+
tbl = Table({"a": "float"})
|
|
364
|
+
tbl.update(arrow)
|
|
365
|
+
assert tbl.view().to_columns()["a"] == array
|
|
366
|
+
|
|
367
|
+
def test_update_arrow_update_float_schema_with_uint32(self, util):
|
|
368
|
+
array = [random.randint(0, 2000000) for i in range(100)]
|
|
369
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.uint32)})
|
|
370
|
+
|
|
371
|
+
schema = pa.schema({"a": pa.uint32()})
|
|
372
|
+
|
|
373
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
374
|
+
tbl = Table({"a": "float"})
|
|
375
|
+
tbl.update(arrow)
|
|
376
|
+
assert tbl.view().to_columns()["a"] == array
|
|
377
|
+
|
|
378
|
+
def test_update_arrow_update_float_schema_with_uint64(self, util):
|
|
379
|
+
array = [random.randint(0, 20000000) for i in range(100)]
|
|
380
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.uint64)})
|
|
381
|
+
|
|
382
|
+
schema = pa.schema({"a": pa.uint64()})
|
|
383
|
+
|
|
384
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
385
|
+
tbl = Table({"a": "float"})
|
|
386
|
+
tbl.update(arrow)
|
|
387
|
+
assert tbl.view().to_columns()["a"] == array
|
|
388
|
+
|
|
389
|
+
def test_update_arrow_update_float_schema_with_int8(self, util):
|
|
390
|
+
array = [random.randint(-127, 127) for i in range(100)]
|
|
391
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.int8)})
|
|
392
|
+
|
|
393
|
+
schema = pa.schema({"a": pa.int8()})
|
|
394
|
+
|
|
395
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
396
|
+
tbl = Table({"a": "float"})
|
|
397
|
+
tbl.update(arrow)
|
|
398
|
+
assert tbl.view().to_columns()["a"] == array
|
|
399
|
+
|
|
400
|
+
def test_update_arrow_update_float_schema_with_int16(self, util):
|
|
401
|
+
array = [random.randint(-32767, 32767) for i in range(100)]
|
|
402
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.int16)})
|
|
403
|
+
|
|
404
|
+
schema = pa.schema({"a": pa.int16()})
|
|
405
|
+
|
|
406
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
407
|
+
tbl = Table({"a": "float"})
|
|
408
|
+
tbl.update(arrow)
|
|
409
|
+
assert tbl.view().to_columns()["a"] == array
|
|
410
|
+
|
|
411
|
+
def test_update_arrow_update_float_schema_with_int32(self, util):
|
|
412
|
+
array = [random.randint(-2000000, 2000000) for i in range(100)]
|
|
413
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.int32)})
|
|
414
|
+
|
|
415
|
+
schema = pa.schema({"a": pa.int32()})
|
|
416
|
+
|
|
417
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
418
|
+
tbl = Table({"a": "float"})
|
|
419
|
+
tbl.update(arrow)
|
|
420
|
+
assert tbl.view().to_columns()["a"] == array
|
|
421
|
+
|
|
422
|
+
def test_update_arrow_update_float_schema_with_int64(self, util):
|
|
423
|
+
array = [random.randint(-20000000, 20000000) for i in range(100)]
|
|
424
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.int64)})
|
|
425
|
+
|
|
426
|
+
schema = pa.schema({"a": pa.int64()})
|
|
427
|
+
|
|
428
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
429
|
+
tbl = Table({"a": "float"})
|
|
430
|
+
tbl.update(arrow)
|
|
431
|
+
assert tbl.view().to_columns()["a"] == array
|
|
432
|
+
|
|
433
|
+
# updating int schema with float
|
|
434
|
+
def test_update_arrow_update_int_schema_with_float32(self, util):
|
|
435
|
+
array = [random.randint(-2000000, 2000000) * 0.5 for i in range(100)]
|
|
436
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.float32)})
|
|
437
|
+
|
|
438
|
+
schema = pa.schema({"a": pa.float32()})
|
|
439
|
+
|
|
440
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
441
|
+
tbl = Table({"a": "integer"})
|
|
442
|
+
tbl.update(arrow)
|
|
443
|
+
assert tbl.view().to_columns()["a"] == [int(x) for x in array]
|
|
444
|
+
|
|
445
|
+
def test_update_arrow_update_int_schema_with_float64(self, util):
|
|
446
|
+
array = [
|
|
447
|
+
random.randint(-20000000, 20000000) * random.random() for i in range(100)
|
|
448
|
+
]
|
|
449
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.float64)})
|
|
450
|
+
|
|
451
|
+
schema = pa.schema({"a": pa.float64()})
|
|
452
|
+
|
|
453
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
454
|
+
tbl = Table({"a": "integer"})
|
|
455
|
+
tbl.update(arrow)
|
|
456
|
+
assert tbl.view().to_columns()["a"] == [int(x) for x in array]
|
|
457
|
+
|
|
458
|
+
# updating float schema with float
|
|
459
|
+
|
|
460
|
+
def test_update_arrow_update_float_schema_with_float32(self, util):
|
|
461
|
+
array = [random.randint(-2000000, 2000000) * 0.5 for i in range(100)]
|
|
462
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.float32)})
|
|
463
|
+
|
|
464
|
+
schema = pa.schema({"a": pa.float32()})
|
|
465
|
+
|
|
466
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
467
|
+
tbl = Table({"a": "float"})
|
|
468
|
+
tbl.update(arrow)
|
|
469
|
+
assert tbl.view().to_columns()["a"] == array
|
|
470
|
+
|
|
471
|
+
def test_update_arrow_update_float_schema_with_float64(self, util):
|
|
472
|
+
array = [
|
|
473
|
+
random.randint(-20000000, 20000000) * random.random() for i in range(100)
|
|
474
|
+
]
|
|
475
|
+
data = pd.DataFrame({"a": np.array(array, dtype=np.float64)})
|
|
476
|
+
|
|
477
|
+
schema = pa.schema({"a": pa.float64()})
|
|
478
|
+
|
|
479
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
480
|
+
tbl = Table({"a": "float"})
|
|
481
|
+
tbl.update(arrow)
|
|
482
|
+
assert tbl.view().to_columns()["a"] == array
|
|
483
|
+
|
|
484
|
+
# updating date schema
|
|
485
|
+
|
|
486
|
+
def test_update_arrow_update_date_schema_with_date32(self, util):
|
|
487
|
+
array = [date(2019, 2, i) for i in range(1, 11)]
|
|
488
|
+
data = pd.DataFrame({"a": array})
|
|
489
|
+
|
|
490
|
+
schema = pa.schema({"a": pa.date32()})
|
|
491
|
+
|
|
492
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
493
|
+
|
|
494
|
+
tbl = Table({"a": "date"})
|
|
495
|
+
|
|
496
|
+
tbl.update(arrow)
|
|
497
|
+
|
|
498
|
+
assert tbl.view().to_columns()["a"] == [
|
|
499
|
+
util.to_timestamp(datetime(2019, 2, i)) for i in range(1, 11)
|
|
500
|
+
]
|
|
501
|
+
|
|
502
|
+
def test_update_arrow_update_date_schema_with_date64(self, util):
|
|
503
|
+
array = [date(2019, 2, i) for i in range(1, 11)]
|
|
504
|
+
data = pd.DataFrame({"a": array})
|
|
505
|
+
|
|
506
|
+
schema = pa.schema({"a": pa.date64()})
|
|
507
|
+
|
|
508
|
+
arrow = util.make_arrow_from_pandas(data, schema)
|
|
509
|
+
|
|
510
|
+
tbl = Table({"a": "date"})
|
|
511
|
+
|
|
512
|
+
tbl.update(arrow)
|
|
513
|
+
|
|
514
|
+
assert tbl.view().to_columns()["a"] == [
|
|
515
|
+
util.to_timestamp(datetime(2019, 2, i)) for i in range(1, 11)
|
|
516
|
+
]
|
|
517
|
+
|
|
518
|
+
def test_update_arrow_update_datetime_schema_with_timestamp(self, util):
|
|
519
|
+
data = [
|
|
520
|
+
[datetime(2019, 2, i, 9) for i in range(1, 11)],
|
|
521
|
+
[datetime(2019, 2, i, 10) for i in range(1, 11)],
|
|
522
|
+
[datetime(2019, 2, i, 11) for i in range(1, 11)],
|
|
523
|
+
[datetime(2019, 2, i, 12) for i in range(1, 11)],
|
|
524
|
+
]
|
|
525
|
+
|
|
526
|
+
arrow_data = util.make_arrow(
|
|
527
|
+
names,
|
|
528
|
+
data,
|
|
529
|
+
types=[
|
|
530
|
+
pa.timestamp("s"),
|
|
531
|
+
pa.timestamp("ms"),
|
|
532
|
+
pa.timestamp("us"),
|
|
533
|
+
pa.timestamp("ns"),
|
|
534
|
+
],
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
tbl = Table(
|
|
538
|
+
{
|
|
539
|
+
"a": "datetime",
|
|
540
|
+
"b": "datetime",
|
|
541
|
+
"c": "datetime",
|
|
542
|
+
"d": "datetime",
|
|
543
|
+
}
|
|
544
|
+
)
|
|
545
|
+
tbl.update(arrow_data)
|
|
546
|
+
assert tbl.size() == 10
|
|
547
|
+
assert tbl.view().to_columns() == {
|
|
548
|
+
"a": [util.to_timestamp(d) for d in data[0]],
|
|
549
|
+
"b": [util.to_timestamp(d) for d in data[1]],
|
|
550
|
+
"c": [util.to_timestamp(d) for d in data[2]],
|
|
551
|
+
"d": [util.to_timestamp(d) for d in data[3]],
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
# streams
|
|
555
|
+
|
|
556
|
+
def test_update_arrow_updates_int_stream(self, util):
|
|
557
|
+
data = [list(range(10)) for i in range(4)]
|
|
558
|
+
arrow_data = util.make_arrow(names, data)
|
|
559
|
+
tbl = Table({"a": "integer", "b": "integer", "c": "integer", "d": "integer"})
|
|
560
|
+
tbl.update(arrow_data)
|
|
561
|
+
assert tbl.size() == 10
|
|
562
|
+
assert tbl.view().to_columns() == {
|
|
563
|
+
"a": data[0],
|
|
564
|
+
"b": data[1],
|
|
565
|
+
"c": data[2],
|
|
566
|
+
"d": data[3],
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
def test_update_arrow_updates_float_stream(self, util):
|
|
570
|
+
data = [[i for i in range(10)], [i * 1.5 for i in range(10)]]
|
|
571
|
+
arrow_data = util.make_arrow(["a", "b"], data)
|
|
572
|
+
tbl = Table(
|
|
573
|
+
{
|
|
574
|
+
"a": "integer",
|
|
575
|
+
"b": "float",
|
|
576
|
+
}
|
|
577
|
+
)
|
|
578
|
+
tbl.update(arrow_data)
|
|
579
|
+
assert tbl.size() == 10
|
|
580
|
+
assert tbl.view().to_columns() == {"a": data[0], "b": data[1]}
|
|
581
|
+
|
|
582
|
+
@mark.skip(reason="Decimal128 isn't part of our schema yet")
|
|
583
|
+
def test_update_arrow_updates_decimal128_stream(self, util):
|
|
584
|
+
data = [[i * 1000000000 for i in range(10)]]
|
|
585
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.decimal128(10)])
|
|
586
|
+
tbl = Table({"a": "integer"})
|
|
587
|
+
tbl.update(arrow_data)
|
|
588
|
+
assert tbl.size() == 10
|
|
589
|
+
assert tbl.view().to_columns() == {"a": data[0]}
|
|
590
|
+
|
|
591
|
+
def test_update_arrow_updates_bool_stream(self, util):
|
|
592
|
+
data = [[True if i % 2 == 0 else False for i in range(10)]]
|
|
593
|
+
arrow_data = util.make_arrow(["a"], data)
|
|
594
|
+
tbl = Table({"a": "boolean"})
|
|
595
|
+
tbl.update(arrow_data)
|
|
596
|
+
assert tbl.size() == 10
|
|
597
|
+
assert tbl.view().to_columns() == {"a": data[0]}
|
|
598
|
+
|
|
599
|
+
def test_update_arrow_updates_date32_stream(self, util):
|
|
600
|
+
data = [[date(2019, 2, i) for i in range(1, 11)]]
|
|
601
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.date32()])
|
|
602
|
+
tbl = Table({"a": "date"})
|
|
603
|
+
tbl.update(arrow_data)
|
|
604
|
+
assert tbl.size() == 10
|
|
605
|
+
assert tbl.view().to_columns() == {
|
|
606
|
+
"a": [int(1000 * datetime(2019, 2, i).timestamp()) for i in range(1, 11)]
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
def test_update_arrow_updates_date64_stream(self, util):
|
|
610
|
+
data = [[date(2019, 2, i) for i in range(1, 11)]]
|
|
611
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.date64()])
|
|
612
|
+
tbl = Table({"a": "date"})
|
|
613
|
+
tbl.update(arrow_data)
|
|
614
|
+
assert tbl.size() == 10
|
|
615
|
+
assert tbl.view().to_columns() == {
|
|
616
|
+
"a": [util.to_timestamp(datetime(2019, 2, i)) for i in range(1, 11)]
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
def test_update_arrow_updates_timestamp_all_formats_stream(self, util):
|
|
620
|
+
data = [
|
|
621
|
+
[datetime(2019, 2, i, 9) for i in range(1, 11)],
|
|
622
|
+
[datetime(2019, 2, i, 10) for i in range(1, 11)],
|
|
623
|
+
[datetime(2019, 2, i, 11) for i in range(1, 11)],
|
|
624
|
+
[datetime(2019, 2, i, 12) for i in range(1, 11)],
|
|
625
|
+
]
|
|
626
|
+
arrow_data = util.make_arrow(
|
|
627
|
+
names,
|
|
628
|
+
data,
|
|
629
|
+
types=[
|
|
630
|
+
pa.timestamp("s"),
|
|
631
|
+
pa.timestamp("ms"),
|
|
632
|
+
pa.timestamp("us"),
|
|
633
|
+
pa.timestamp("ns"),
|
|
634
|
+
],
|
|
635
|
+
)
|
|
636
|
+
tbl = Table(
|
|
637
|
+
{"a": "datetime", "b": "datetime", "c": "datetime", "d": "datetime"}
|
|
638
|
+
)
|
|
639
|
+
tbl.update(arrow_data)
|
|
640
|
+
assert tbl.size() == 10
|
|
641
|
+
assert tbl.view().to_columns() == {
|
|
642
|
+
"a": [util.to_timestamp(d) for d in data[0]],
|
|
643
|
+
"b": [util.to_timestamp(d) for d in data[1]],
|
|
644
|
+
"c": [util.to_timestamp(d) for d in data[2]],
|
|
645
|
+
"d": [util.to_timestamp(d) for d in data[3]],
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
def test_update_arrow_updates_string_stream(self, util):
|
|
649
|
+
data = [[str(i) for i in range(10)]]
|
|
650
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.string()])
|
|
651
|
+
tbl = Table({"a": "string"})
|
|
652
|
+
tbl.update(arrow_data)
|
|
653
|
+
assert tbl.size() == 10
|
|
654
|
+
assert tbl.view().to_columns() == {"a": data[0]}
|
|
655
|
+
|
|
656
|
+
def test_update_arrow_updates_dictionary_stream(self, util):
|
|
657
|
+
data = [([0, 1, 1, None], ["a", "b"]), ([0, 1, None, 2], ["x", "y", "z"])]
|
|
658
|
+
arrow_data = util.make_dictionary_arrow(["a", "b"], data)
|
|
659
|
+
tbl = Table({"a": "string", "b": "string"})
|
|
660
|
+
tbl.update(arrow_data)
|
|
661
|
+
|
|
662
|
+
assert tbl.size() == 4
|
|
663
|
+
assert tbl.view().to_columns() == {
|
|
664
|
+
"a": ["a", "b", "b", None],
|
|
665
|
+
"b": ["x", "y", None, "z"],
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
@mark.skip(reason="Arrow no longer supports partial updates per row")
|
|
669
|
+
def test_update_arrow_partial_updates_dictionary_stream(self, util):
|
|
670
|
+
data = [([0, 1, 1, None], ["a", "b"]), ([0, 1, None, 2], ["x", "y", "z"])]
|
|
671
|
+
arrow_data = util.make_dictionary_arrow(["a", "b"], data)
|
|
672
|
+
tbl = Table({"a": "string", "b": "string"}, index="a")
|
|
673
|
+
tbl.update(arrow_data)
|
|
674
|
+
assert tbl.size() == 3
|
|
675
|
+
assert tbl.view().to_columns() == {"a": [None, "a", "b"], "b": ["z", "x", "y"]}
|
|
676
|
+
|
|
677
|
+
@mark.skip
|
|
678
|
+
def test_update_arrow_partial_updates_dictionary_stream_duplicates(self, util):
|
|
679
|
+
"""If there are duplicate values in the dictionary, primary keys
|
|
680
|
+
may be duplicated if the column is used as an index. Skip this test
|
|
681
|
+
for now - still looking for the best way to fix."""
|
|
682
|
+
data = [
|
|
683
|
+
([0, 1, 1, None, 2], ["a", "b", "a"]),
|
|
684
|
+
([0, 1, None, 2, 1], ["x", "y", "z"]),
|
|
685
|
+
]
|
|
686
|
+
arrow_data = util.make_dictionary_arrow(["a", "b"], data)
|
|
687
|
+
|
|
688
|
+
tbl = Table({"a": "string", "b": "string"}, index="a")
|
|
689
|
+
|
|
690
|
+
tbl.update(arrow_data)
|
|
691
|
+
|
|
692
|
+
assert tbl.size() == 3
|
|
693
|
+
assert tbl.view().to_columns() == {"a": [None, "a", "b"], "b": ["z", "x", "y"]}
|
|
694
|
+
|
|
695
|
+
def test_update_arrow_partial_updates_more_columns_dictionary_stream(self, util):
|
|
696
|
+
data = [([0, 1, 1, None], ["a", "b"]), ([0, 1, None, 2], ["x", "y", "z"])]
|
|
697
|
+
arrow_data = util.make_dictionary_arrow(["a", "b"], data)
|
|
698
|
+
|
|
699
|
+
tbl = Table({"a": "string"}, index="a")
|
|
700
|
+
|
|
701
|
+
tbl.update(arrow_data)
|
|
702
|
+
|
|
703
|
+
assert tbl.size() == 3
|
|
704
|
+
assert tbl.view().to_columns() == {"a": [None, "a", "b"]}
|
|
705
|
+
|
|
706
|
+
@mark.skip(reason="Arrow no longer supports partial updates per row")
|
|
707
|
+
def test_update_arrow_partial_updates_less_columns_dictionary_stream(self, util):
|
|
708
|
+
data = [([0, 1, 1, None], ["a", "b"]), ([0, 1, None, 2], ["x", "y", "z"])]
|
|
709
|
+
arrow_data = util.make_dictionary_arrow(["a", "b"], data)
|
|
710
|
+
tbl = Table({"a": "string", "b": "string", "x": "string"}, index="a")
|
|
711
|
+
tbl.update(arrow_data)
|
|
712
|
+
|
|
713
|
+
assert tbl.size() == 3
|
|
714
|
+
assert tbl.view().to_columns() == {
|
|
715
|
+
"a": [None, "a", "b"],
|
|
716
|
+
"b": ["z", "x", "y"],
|
|
717
|
+
"x": [None, None, None],
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
def test_update_arrow_arbitary_order(self, util):
|
|
721
|
+
data = [[1, 2, 3, 4], ["a", "b", "c", "d"], [1, 2, 3, 4], ["a", "b", "c", "d"]]
|
|
722
|
+
update_data = [[5, 6], ["e", "f"], [5, 6], ["e", "f"]]
|
|
723
|
+
arrow = util.make_arrow(["a", "b", "c", "d"], data)
|
|
724
|
+
update_arrow = util.make_arrow(["c", "b", "a", "d"], update_data)
|
|
725
|
+
tbl = Table(arrow)
|
|
726
|
+
assert tbl.schema() == {
|
|
727
|
+
"a": "integer",
|
|
728
|
+
"b": "string",
|
|
729
|
+
"c": "integer",
|
|
730
|
+
"d": "string",
|
|
731
|
+
}
|
|
732
|
+
tbl.update(update_arrow)
|
|
733
|
+
assert tbl.size() == 6
|
|
734
|
+
assert tbl.view().to_columns() == {
|
|
735
|
+
"a": [1, 2, 3, 4, 5, 6],
|
|
736
|
+
"b": ["a", "b", "c", "d", "e", "f"],
|
|
737
|
+
"c": [1, 2, 3, 4, 5, 6],
|
|
738
|
+
"d": ["a", "b", "c", "d", "e", "f"],
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
# append
|
|
742
|
+
|
|
743
|
+
def test_update_arrow_updates_append_int_stream(self, util):
|
|
744
|
+
data = [list(range(10)) for i in range(4)]
|
|
745
|
+
arrow_data = util.make_arrow(names, data)
|
|
746
|
+
tbl = Table(arrow_data)
|
|
747
|
+
tbl.update(arrow_data)
|
|
748
|
+
assert tbl.size() == 20
|
|
749
|
+
assert tbl.view().to_columns() == {
|
|
750
|
+
"a": data[0] + data[0],
|
|
751
|
+
"b": data[1] + data[1],
|
|
752
|
+
"c": data[2] + data[2],
|
|
753
|
+
"d": data[3] + data[3],
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
def test_update_arrow_updates_append_float_stream(self, util):
|
|
757
|
+
data = [[i for i in range(10)], [i * 1.5 for i in range(10)]]
|
|
758
|
+
arrow_data = util.make_arrow(["a", "b"], data)
|
|
759
|
+
tbl = Table(arrow_data)
|
|
760
|
+
tbl.update(arrow_data)
|
|
761
|
+
assert tbl.size() == 20
|
|
762
|
+
assert tbl.view().to_columns() == {
|
|
763
|
+
"a": data[0] + data[0],
|
|
764
|
+
"b": data[1] + data[1],
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
def test_update_arrow_updates_append_decimal_stream(self, util):
|
|
768
|
+
data = [[i * 1000 for i in range(10)]]
|
|
769
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.decimal128(4)])
|
|
770
|
+
tbl = Table(arrow_data)
|
|
771
|
+
tbl.update(arrow_data)
|
|
772
|
+
assert tbl.size() == 20
|
|
773
|
+
assert tbl.view().to_columns() == {"a": data[0] + data[0]}
|
|
774
|
+
|
|
775
|
+
def test_update_arrow_updates_append_bool_stream(self, util):
|
|
776
|
+
data = [[True if i % 2 == 0 else False for i in range(10)]]
|
|
777
|
+
arrow_data = util.make_arrow(["a"], data)
|
|
778
|
+
tbl = Table(arrow_data)
|
|
779
|
+
tbl.update(arrow_data)
|
|
780
|
+
assert tbl.size() == 20
|
|
781
|
+
assert tbl.view().to_columns() == {"a": data[0] + data[0]}
|
|
782
|
+
|
|
783
|
+
def test_update_arrow_updates_append_date32_stream(self, util):
|
|
784
|
+
data = [[date(2019, 2, i) for i in range(1, 11)]]
|
|
785
|
+
out_data = [datetime(2019, 2, i) for i in range(1, 11)]
|
|
786
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.date32()])
|
|
787
|
+
tbl = Table(arrow_data)
|
|
788
|
+
tbl.update(arrow_data)
|
|
789
|
+
assert tbl.size() == 20
|
|
790
|
+
assert tbl.view().to_columns() == {
|
|
791
|
+
"a": [util.to_timestamp(d) for d in out_data + out_data]
|
|
792
|
+
}
|
|
793
|
+
|
|
794
|
+
def test_update_arrow_updates_append_date64_stream(self, util):
|
|
795
|
+
data = [[date(2019, 2, i) for i in range(1, 11)]]
|
|
796
|
+
out_data = [datetime(2019, 2, i) for i in range(1, 11)]
|
|
797
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.date64()])
|
|
798
|
+
tbl = Table(arrow_data)
|
|
799
|
+
tbl.update(arrow_data)
|
|
800
|
+
assert tbl.size() == 20
|
|
801
|
+
assert tbl.view().to_columns() == {
|
|
802
|
+
"a": [util.to_timestamp(d) for d in out_data + out_data]
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
def test_update_arrow_updates_append_timestamp_all_formats_stream(self, util):
|
|
806
|
+
data = [
|
|
807
|
+
[datetime(2019, 2, i, 9) for i in range(1, 11)],
|
|
808
|
+
[datetime(2019, 2, i, 10) for i in range(1, 11)],
|
|
809
|
+
[datetime(2019, 2, i, 11) for i in range(1, 11)],
|
|
810
|
+
[datetime(2019, 2, i, 12) for i in range(1, 11)],
|
|
811
|
+
]
|
|
812
|
+
arrow_data = util.make_arrow(
|
|
813
|
+
names,
|
|
814
|
+
data,
|
|
815
|
+
types=[
|
|
816
|
+
pa.timestamp("s"),
|
|
817
|
+
pa.timestamp("ms"),
|
|
818
|
+
pa.timestamp("us"),
|
|
819
|
+
pa.timestamp("ns"),
|
|
820
|
+
],
|
|
821
|
+
)
|
|
822
|
+
tbl = Table(arrow_data)
|
|
823
|
+
tbl.update(arrow_data)
|
|
824
|
+
assert tbl.size() == 20
|
|
825
|
+
assert tbl.view().to_columns() == {
|
|
826
|
+
"a": [util.to_timestamp(d) for d in data[0] + data[0]],
|
|
827
|
+
"b": [util.to_timestamp(d) for d in data[1] + data[1]],
|
|
828
|
+
"c": [util.to_timestamp(d) for d in data[2] + data[2]],
|
|
829
|
+
"d": [util.to_timestamp(d) for d in data[3] + data[3]],
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
def test_update_arrow_updates_append_string_stream(self, util):
|
|
833
|
+
data = [[str(i) for i in range(10)]]
|
|
834
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.string()])
|
|
835
|
+
tbl = Table(arrow_data)
|
|
836
|
+
tbl.update(arrow_data)
|
|
837
|
+
assert tbl.size() == 20
|
|
838
|
+
assert tbl.view().to_columns() == {"a": data[0] + data[0]}
|
|
839
|
+
|
|
840
|
+
def test_update_arrow_updates_append_dictionary_stream(self, util):
|
|
841
|
+
data = [([0, 1, 1, None], ["a", "b"]), ([0, 1, None, 2], ["x", "y", "z"])]
|
|
842
|
+
arrow_data = util.make_dictionary_arrow(["a", "b"], data)
|
|
843
|
+
tbl = Table(arrow_data)
|
|
844
|
+
tbl.update(arrow_data)
|
|
845
|
+
|
|
846
|
+
assert tbl.size() == 8
|
|
847
|
+
assert tbl.view().to_columns() == {
|
|
848
|
+
"a": ["a", "b", "b", None, "a", "b", "b", None],
|
|
849
|
+
"b": ["x", "y", None, "z", "x", "y", None, "z"],
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
def test_update_arrow_updates_append_dictionary_stream_legacy(self, util):
|
|
853
|
+
data = [([0, 1, 1, None], ["a", "b"]), ([0, 1, None, 2], ["x", "y", "z"])]
|
|
854
|
+
arrow_data = util.make_dictionary_arrow(["a", "b"], data, legacy=True)
|
|
855
|
+
tbl = Table(arrow_data)
|
|
856
|
+
tbl.update(arrow_data)
|
|
857
|
+
|
|
858
|
+
assert tbl.size() == 8
|
|
859
|
+
assert tbl.view().to_columns() == {
|
|
860
|
+
"a": ["a", "b", "b", None, "a", "b", "b", None],
|
|
861
|
+
"b": ["x", "y", None, "z", "x", "y", None, "z"],
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
# indexed
|
|
865
|
+
|
|
866
|
+
def test_update_arrow_partial_indexed(self, util):
|
|
867
|
+
data = [[1, 2, 3, 4], ["a", "b", "c", "d"]]
|
|
868
|
+
update_data = [[2, 4], ["x", "y"]]
|
|
869
|
+
arrow = util.make_arrow(["a", "b"], data)
|
|
870
|
+
update_arrow = util.make_arrow(["a", "b"], update_data)
|
|
871
|
+
tbl = Table(arrow, index="a")
|
|
872
|
+
assert tbl.schema() == {"a": "integer", "b": "string"}
|
|
873
|
+
tbl.update(update_arrow)
|
|
874
|
+
assert tbl.size() == 4
|
|
875
|
+
assert tbl.view().to_columns() == {"a": [1, 2, 3, 4], "b": ["a", "x", "c", "y"]}
|
|
876
|
+
|
|
877
|
+
# update specific columns
|
|
878
|
+
|
|
879
|
+
def test_update_arrow_specific_column(self, util):
|
|
880
|
+
data = [[1, 2, 3, 4], ["a", "b", "c", "d"]]
|
|
881
|
+
update_data = [[2, 3, 4]]
|
|
882
|
+
arrow = util.make_arrow(["a", "b"], data)
|
|
883
|
+
update_arrow = util.make_arrow(["a"], update_data)
|
|
884
|
+
tbl = Table(arrow)
|
|
885
|
+
assert tbl.schema() == {"a": "integer", "b": "string"}
|
|
886
|
+
tbl.update(update_arrow)
|
|
887
|
+
assert tbl.size() == 7
|
|
888
|
+
assert tbl.view().to_columns() == {
|
|
889
|
+
"a": [1, 2, 3, 4, 2, 3, 4],
|
|
890
|
+
"b": ["a", "b", "c", "d", None, None, None],
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
# try to fuzz column order
|
|
894
|
+
|
|
895
|
+
def test_update_arrow_column_order_str(self, util):
|
|
896
|
+
# use str so it doesn't get promoted
|
|
897
|
+
data = [["a", "b", "c"] for i in range(10)]
|
|
898
|
+
names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
|
|
899
|
+
names_scrambled = names[::-1]
|
|
900
|
+
arrow = util.make_arrow(names_scrambled, data)
|
|
901
|
+
tbl = Table({name: "string" for name in names})
|
|
902
|
+
tbl.update(arrow)
|
|
903
|
+
assert tbl.size() == 3
|
|
904
|
+
assert tbl.view().to_columns() == {name: data[0] for name in names}
|
|
905
|
+
|
|
906
|
+
def test_update_arrow_column_order_int(self, util):
|
|
907
|
+
data = [[1, 2, 3] for i in range(10)]
|
|
908
|
+
names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"]
|
|
909
|
+
names_scrambled = names[::-1]
|
|
910
|
+
arrow = util.make_arrow(names_scrambled, data)
|
|
911
|
+
tbl = Table({name: "integer" for name in names})
|
|
912
|
+
tbl.update(arrow)
|
|
913
|
+
assert tbl.size() == 3
|
|
914
|
+
assert tbl.view().to_columns() == {name: data[0] for name in names}
|
|
915
|
+
|
|
916
|
+
def test_update_arrow_thread_safe_int_index(self, util):
|
|
917
|
+
data = [["a", "b", "c"] for i in range(10)]
|
|
918
|
+
data += [[1, 2, 3]]
|
|
919
|
+
names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "uid"]
|
|
920
|
+
arrow = util.make_arrow(names, data)
|
|
921
|
+
tbl = Table(arrow, index="uid")
|
|
922
|
+
|
|
923
|
+
for i in range(100):
|
|
924
|
+
idx = (1, 2, 3)[random.randint(0, 2)]
|
|
925
|
+
update_data = [
|
|
926
|
+
[str(uuid.uuid4()) + str(random.randint(100, 1000000000))],
|
|
927
|
+
[idx],
|
|
928
|
+
]
|
|
929
|
+
update_names = [names[random.randint(0, 9)], "uid"]
|
|
930
|
+
update_arrow = util.make_arrow(update_names, update_data)
|
|
931
|
+
tbl.update(update_arrow)
|
|
932
|
+
|
|
933
|
+
assert tbl.size() == 3
|
|
934
|
+
|
|
935
|
+
def test_update_arrow_thread_safe_datetime_index(self, util):
|
|
936
|
+
data = [["a", "b", "c"] for i in range(10)]
|
|
937
|
+
data += [
|
|
938
|
+
[
|
|
939
|
+
datetime(2020, 1, 15, 12, 17),
|
|
940
|
+
datetime(2020, 1, 15, 12, 18),
|
|
941
|
+
datetime(2020, 1, 15, 12, 19),
|
|
942
|
+
]
|
|
943
|
+
]
|
|
944
|
+
names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "uid"]
|
|
945
|
+
arrow = util.make_arrow(names, data)
|
|
946
|
+
tbl = Table(arrow, index="uid")
|
|
947
|
+
|
|
948
|
+
for i in range(100):
|
|
949
|
+
idx = (
|
|
950
|
+
datetime(2020, 1, 15, 12, 17),
|
|
951
|
+
datetime(2020, 1, 15, 12, 18),
|
|
952
|
+
datetime(2020, 1, 15, 12, 19),
|
|
953
|
+
)[random.randint(0, 2)]
|
|
954
|
+
update_data = [
|
|
955
|
+
[str(uuid.uuid4()) + str(random.randint(100, 1000000000))],
|
|
956
|
+
[idx],
|
|
957
|
+
]
|
|
958
|
+
update_names = [names[random.randint(0, 9)], "uid"]
|
|
959
|
+
update_arrow = util.make_arrow(update_names, update_data)
|
|
960
|
+
tbl.update(update_arrow)
|
|
961
|
+
|
|
962
|
+
assert tbl.size() == 3
|
|
963
|
+
|
|
964
|
+
def test_update_arrow_thread_safe_str_index(self, util):
|
|
965
|
+
data = [["a", "b", "c"] for i in range(11)]
|
|
966
|
+
names = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "uid"]
|
|
967
|
+
arrow = util.make_arrow(names, data)
|
|
968
|
+
tbl = Table(arrow, index="uid")
|
|
969
|
+
|
|
970
|
+
for i in range(100):
|
|
971
|
+
idx = ("a", "b", "c")[random.randint(0, 2)]
|
|
972
|
+
update_data = [
|
|
973
|
+
[str(uuid.uuid4()) + str(random.randint(100, 1000000000))],
|
|
974
|
+
[idx],
|
|
975
|
+
]
|
|
976
|
+
update_names = [names[random.randint(0, 9)], "uid"]
|
|
977
|
+
update_arrow = util.make_arrow(update_names, update_data)
|
|
978
|
+
tbl.update(update_arrow)
|
|
979
|
+
|
|
980
|
+
assert tbl.size() == 3
|