perspective-python 4.2.0__cp311-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- perspective/__init__.py +396 -0
- perspective/extension/finos-perspective-nbextension.json +5 -0
- perspective/handlers/__init__.py +11 -0
- perspective/handlers/aiohttp.py +61 -0
- perspective/handlers/starlette.py +55 -0
- perspective/handlers/tornado.py +184 -0
- perspective/perspective.pyd +0 -0
- perspective/templates/exported_widget.html.template +35 -0
- perspective/tests/__init__.py +11 -0
- perspective/tests/async/test_async_client.py +83 -0
- perspective/tests/async/test_websocket_client.py +124 -0
- perspective/tests/conftest.py +272 -0
- perspective/tests/core/__init__.py +11 -0
- perspective/tests/core/test_async.py +351 -0
- perspective/tests/multi_threaded/__init__.py +11 -0
- perspective/tests/multi_threaded/test_multi_threaded.py +201 -0
- perspective/tests/server/__init__.py +11 -0
- perspective/tests/server/test_server.py +1016 -0
- perspective/tests/server/test_session.py +110 -0
- perspective/tests/table/__init__.py +11 -0
- perspective/tests/table/arrow/date32.arrow +0 -0
- perspective/tests/table/arrow/date64.arrow +0 -0
- perspective/tests/table/arrow/dict.arrow +0 -0
- perspective/tests/table/arrow/dict_update.arrow +0 -0
- perspective/tests/table/arrow/int_float_str.arrow +0 -0
- perspective/tests/table/arrow/int_float_str_file.arrow +0 -0
- perspective/tests/table/arrow/int_float_str_update.arrow +0 -0
- perspective/tests/table/object_sequence.py +402 -0
- perspective/tests/table/test_column_paths.py +89 -0
- perspective/tests/table/test_delete.py +124 -0
- perspective/tests/table/test_exception.py +65 -0
- perspective/tests/table/test_leaks.py +54 -0
- perspective/tests/table/test_ports.py +178 -0
- perspective/tests/table/test_remove.py +102 -0
- perspective/tests/table/test_table.py +641 -0
- perspective/tests/table/test_table_arrow.py +503 -0
- perspective/tests/table/test_table_datetime.py +2409 -0
- perspective/tests/table/test_table_infer.py +201 -0
- perspective/tests/table/test_table_limit.py +45 -0
- perspective/tests/table/test_table_numpy.py +1022 -0
- perspective/tests/table/test_table_pandas.py +1018 -0
- perspective/tests/table/test_table_polars.py +251 -0
- perspective/tests/table/test_table_view_table.py +130 -0
- perspective/tests/table/test_to_arrow.py +417 -0
- perspective/tests/table/test_to_arrow_lz4.py +32 -0
- perspective/tests/table/test_to_format.py +1024 -0
- perspective/tests/table/test_to_polars.py +26 -0
- perspective/tests/table/test_update.py +545 -0
- perspective/tests/table/test_update_arrow.py +980 -0
- perspective/tests/table/test_update_pandas.py +211 -0
- perspective/tests/table/test_view.py +2261 -0
- perspective/tests/table/test_view_expression.py +1940 -0
- perspective/tests/test_dependencies.py +53 -0
- perspective/tests/viewer/__init__.py +11 -0
- perspective/tests/viewer/test_viewer.py +246 -0
- perspective/tests/widget/__init__.py +11 -0
- perspective/tests/widget/test_widget.py +278 -0
- perspective/tests/widget/test_widget_pandas.py +453 -0
- perspective/virtual_servers/__init__.py +134 -0
- perspective/virtual_servers/clickhouse.py +245 -0
- perspective/virtual_servers/duckdb.py +236 -0
- perspective/widget/__init__.py +349 -0
- perspective/widget/viewer/__init__.py +15 -0
- perspective/widget/viewer/validate.py +22 -0
- perspective/widget/viewer/viewer.py +343 -0
- perspective/widget/viewer/viewer_traitlets.py +101 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/install.json +5 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/package.json +71 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/253.5f5c9e80605aa4106a28.js +2 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/253.5f5c9e80605aa4106a28.js.LICENSE.txt +25 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/523.c030af5d3c4f67ff83f6.js +1 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/remoteEntry.95a8ea1b44d96032833f.js +1 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/style.js +4 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/third-party-licenses.json +16 -0
- perspective_python-4.2.0.dist-info/METADATA +27 -0
- perspective_python-4.2.0.dist-info/RECORD +79 -0
- perspective_python-4.2.0.dist-info/WHEEL +4 -0
- perspective_python-4.2.0.dist-info/licenses/LICENSE.md +193 -0
- perspective_python-4.2.0.dist-info/licenses/LICENSE_THIRDPARTY_cargo.yml +17395 -0
|
@@ -0,0 +1,503 @@
|
|
|
1
|
+
# ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
|
2
|
+
# ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃
|
|
3
|
+
# ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃
|
|
4
|
+
# ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃
|
|
5
|
+
# ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃
|
|
6
|
+
# ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
|
|
7
|
+
# ┃ Copyright (c) 2017, the Perspective Authors. ┃
|
|
8
|
+
# ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃
|
|
9
|
+
# ┃ This file is part of the Perspective library, distributed under the terms ┃
|
|
10
|
+
# ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
|
|
11
|
+
# ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
|
|
12
|
+
|
|
13
|
+
import os.path
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from perspective.tests.conftest import Util
|
|
17
|
+
import pyarrow as pa
|
|
18
|
+
import pyarrow.ipc as ipc
|
|
19
|
+
from datetime import date, datetime
|
|
20
|
+
import perspective as psp
|
|
21
|
+
import io
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
client = psp.Server().new_local_client()
|
|
25
|
+
Table = client.table
|
|
26
|
+
|
|
27
|
+
DATE32_ARROW = os.path.join(os.path.dirname(__file__), "arrow", "date32.arrow")
|
|
28
|
+
DATE64_ARROW = os.path.join(os.path.dirname(__file__), "arrow", "date64.arrow")
|
|
29
|
+
DICT_ARROW = os.path.join(os.path.dirname(__file__), "arrow", "dict.arrow")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
names = ["a", "b", "c", "d"]
|
|
33
|
+
|
|
34
|
+
# Create sample data for every integer type
|
|
35
|
+
ALL_INTEGERS_DATA = {
|
|
36
|
+
"int8": pa.array([1, 2, 3], type=pa.int8()),
|
|
37
|
+
"int16": pa.array([1000, 2000, 3000], type=pa.int16()),
|
|
38
|
+
"int32": pa.array([100000, 200000, 300000], type=pa.int32()),
|
|
39
|
+
"int64": pa.array([10000000000, 20000000000, 30000000000], type=pa.int64()),
|
|
40
|
+
"uint8": pa.array([1, 2, 3], type=pa.uint8()),
|
|
41
|
+
"uint16": pa.array([1000, 2000, 3000], type=pa.uint16()),
|
|
42
|
+
"uint32": pa.array([100000, 200000, 300000], type=pa.uint32()),
|
|
43
|
+
"uint64": pa.array([10000000000, 20000000000, 30000000000], type=pa.uint64()),
|
|
44
|
+
"float32": pa.array([100000.0, 200000.0, 300000.0], type=pa.float32()),
|
|
45
|
+
"float64": pa.array(
|
|
46
|
+
[10000000000.0, 20000000000.0, 30000000000.0], type=pa.float64()
|
|
47
|
+
),
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
ALL_INTEGERS_TABLE = pa.Table.from_pydict(ALL_INTEGERS_DATA)
|
|
51
|
+
|
|
52
|
+
bytes_io = io.BytesIO()
|
|
53
|
+
with ipc.new_stream(bytes_io, ALL_INTEGERS_TABLE.schema) as stream:
|
|
54
|
+
stream.write_table(ALL_INTEGERS_TABLE)
|
|
55
|
+
ALL_INTEGERS_ARROW = bytes_io.getvalue()
|
|
56
|
+
|
|
57
|
+
class TestTableArrow(object):
|
|
58
|
+
def test_table_with_integer_types(self):
|
|
59
|
+
tbl = Table(ALL_INTEGERS_ARROW)
|
|
60
|
+
for k, values in ALL_INTEGERS_DATA.items():
|
|
61
|
+
v = tbl.view(filter=[[k, "==", values[0].as_py()]])
|
|
62
|
+
assert len(v.to_json()) == 1
|
|
63
|
+
|
|
64
|
+
def test_table_arrow_loads_date32_file(self, util: Util):
|
|
65
|
+
with open(DATE32_ARROW, mode="rb") as file: # b is important -> binary
|
|
66
|
+
tbl = Table(file.read())
|
|
67
|
+
assert tbl.schema() == {
|
|
68
|
+
"jan-2019": "date",
|
|
69
|
+
"feb-2020": "date",
|
|
70
|
+
"mar-2019": "date",
|
|
71
|
+
"apr-2020": "date",
|
|
72
|
+
}
|
|
73
|
+
assert tbl.size() == 31
|
|
74
|
+
view = tbl.view()
|
|
75
|
+
assert view.to_columns() == {
|
|
76
|
+
"jan-2019": [
|
|
77
|
+
util.to_timestamp(datetime(2019, 1, i)) for i in range(1, 32)
|
|
78
|
+
],
|
|
79
|
+
"feb-2020": [
|
|
80
|
+
util.to_timestamp(datetime(2020, 2, i)) for i in range(1, 30)
|
|
81
|
+
]
|
|
82
|
+
+ [None, None],
|
|
83
|
+
"mar-2019": [
|
|
84
|
+
util.to_timestamp(datetime(2019, 3, i)) for i in range(1, 32)
|
|
85
|
+
],
|
|
86
|
+
"apr-2020": [
|
|
87
|
+
util.to_timestamp(datetime(2020, 4, i)) for i in range(1, 31)
|
|
88
|
+
]
|
|
89
|
+
+ [None],
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
def test_table_arrow_loads_date64_file(self, util: Util):
|
|
93
|
+
with open(DATE64_ARROW, mode="rb") as file: # b is important -> binary
|
|
94
|
+
tbl = Table(file.read())
|
|
95
|
+
assert tbl.schema() == {
|
|
96
|
+
"jan-2019": "date",
|
|
97
|
+
"feb-2020": "date",
|
|
98
|
+
"mar-2019": "date",
|
|
99
|
+
"apr-2020": "date",
|
|
100
|
+
}
|
|
101
|
+
assert tbl.size() == 31
|
|
102
|
+
view = tbl.view()
|
|
103
|
+
assert view.to_columns() == {
|
|
104
|
+
"jan-2019": [
|
|
105
|
+
util.to_timestamp(datetime(2019, 1, i)) for i in range(1, 32)
|
|
106
|
+
],
|
|
107
|
+
"feb-2020": [
|
|
108
|
+
util.to_timestamp(datetime(2020, 2, i)) for i in range(1, 30)
|
|
109
|
+
]
|
|
110
|
+
+ [None, None],
|
|
111
|
+
"mar-2019": [
|
|
112
|
+
util.to_timestamp(datetime(2019, 3, i)) for i in range(1, 32)
|
|
113
|
+
],
|
|
114
|
+
"apr-2020": [
|
|
115
|
+
util.to_timestamp(datetime(2020, 4, i)) for i in range(1, 31)
|
|
116
|
+
]
|
|
117
|
+
+ [None],
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
def test_table_arrow_loads_dict_file(self):
|
|
121
|
+
with open(DICT_ARROW, mode="rb") as file: # b is important -> binary
|
|
122
|
+
tbl = Table(file.read())
|
|
123
|
+
assert tbl.schema() == {"a": "string", "b": "string"}
|
|
124
|
+
assert tbl.size() == 5
|
|
125
|
+
assert tbl.view().to_columns() == {
|
|
126
|
+
"a": ["abc", "def", "def", None, "abc"],
|
|
127
|
+
"b": ["klm", "hij", None, "hij", "klm"],
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
# streams
|
|
131
|
+
|
|
132
|
+
def test_table_arrow_loads_int_stream(self, util):
|
|
133
|
+
data = [list(range(10)) for i in range(4)]
|
|
134
|
+
arrow_data = util.make_arrow(names, data)
|
|
135
|
+
tbl = Table(arrow_data)
|
|
136
|
+
assert tbl.size() == 10
|
|
137
|
+
assert tbl.schema() == {
|
|
138
|
+
"a": "integer",
|
|
139
|
+
"b": "integer",
|
|
140
|
+
"c": "integer",
|
|
141
|
+
"d": "integer",
|
|
142
|
+
}
|
|
143
|
+
assert tbl.view().to_columns() == {
|
|
144
|
+
"a": data[0],
|
|
145
|
+
"b": data[1],
|
|
146
|
+
"c": data[2],
|
|
147
|
+
"d": data[3],
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
def test_empty_arrow(self, util):
|
|
151
|
+
table = pa.table(
|
|
152
|
+
{
|
|
153
|
+
"col1": [1, 2, 3],
|
|
154
|
+
"col2": ["abc", "foo", "bar"],
|
|
155
|
+
}
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
empty_table = table.schema.empty_table()
|
|
159
|
+
assert client.table(table, name="table2").size() == 3
|
|
160
|
+
assert client.table(empty_table, name="table_empty_bad").size() == 0
|
|
161
|
+
assert client.table(table, name="table3").schema() == {
|
|
162
|
+
"col1": "integer",
|
|
163
|
+
"col2": "string",
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
assert client.table(empty_table, name="table4").schema() == {
|
|
167
|
+
"col1": "integer",
|
|
168
|
+
"col2": "string",
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
def test_table_arrow_loads_float_stream(self, util):
|
|
172
|
+
data = [[i for i in range(10)], [i * 1.5 for i in range(10)]]
|
|
173
|
+
arrow_data = util.make_arrow(["a", "b"], data)
|
|
174
|
+
tbl = Table(arrow_data)
|
|
175
|
+
assert tbl.size() == 10
|
|
176
|
+
assert tbl.schema() == {
|
|
177
|
+
"a": "integer",
|
|
178
|
+
"b": "float",
|
|
179
|
+
}
|
|
180
|
+
assert tbl.view().to_columns() == {"a": data[0], "b": data[1]}
|
|
181
|
+
|
|
182
|
+
def test_table_arrow_loads_decimal_stream(self, util):
|
|
183
|
+
data = [[i * 1000 for i in range(10)]]
|
|
184
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.decimal128(4)])
|
|
185
|
+
tbl = Table(arrow_data)
|
|
186
|
+
assert tbl.size() == 10
|
|
187
|
+
assert tbl.schema() == {
|
|
188
|
+
"a": "float",
|
|
189
|
+
}
|
|
190
|
+
assert tbl.view().to_columns() == {"a": data[0]}
|
|
191
|
+
|
|
192
|
+
def test_table_arrow_loads_bool_stream(self, util):
|
|
193
|
+
data = [[True if i % 2 == 0 else False for i in range(10)]]
|
|
194
|
+
arrow_data = util.make_arrow(["a"], data)
|
|
195
|
+
tbl = Table(arrow_data)
|
|
196
|
+
assert tbl.size() == 10
|
|
197
|
+
assert tbl.schema() == {"a": "boolean"}
|
|
198
|
+
assert tbl.view().to_columns() == {"a": data[0]}
|
|
199
|
+
|
|
200
|
+
def test_table_arrow_loads_date32_stream(self, util):
|
|
201
|
+
data = [[date(2019, 2, i) for i in range(1, 11)]]
|
|
202
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.date32()])
|
|
203
|
+
tbl = Table(arrow_data)
|
|
204
|
+
assert tbl.size() == 10
|
|
205
|
+
assert tbl.schema() == {"a": "date"}
|
|
206
|
+
assert tbl.view().to_columns() == {
|
|
207
|
+
"a": [util.to_timestamp(datetime(2019, 2, i)) for i in range(1, 11)]
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
def test_table_arrow_loads_date64_stream(self, util):
|
|
211
|
+
data = [[date(2019, 2, i) for i in range(1, 11)]]
|
|
212
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.date64()])
|
|
213
|
+
tbl = Table(arrow_data)
|
|
214
|
+
assert tbl.size() == 10
|
|
215
|
+
assert tbl.schema() == {"a": "date"}
|
|
216
|
+
assert tbl.view().to_columns() == {
|
|
217
|
+
"a": [util.to_timestamp(datetime(2019, 2, i)) for i in range(1, 11)]
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
def test_table_arrow_loads_timestamp_all_formats_stream(self, util):
|
|
221
|
+
data = [
|
|
222
|
+
[datetime(2019, 2, i, 9) for i in range(1, 11)],
|
|
223
|
+
[datetime(2019, 2, i, 10) for i in range(1, 11)],
|
|
224
|
+
[datetime(2019, 2, i, 11) for i in range(1, 11)],
|
|
225
|
+
[datetime(2019, 2, i, 12) for i in range(1, 11)],
|
|
226
|
+
]
|
|
227
|
+
arrow_data = util.make_arrow(
|
|
228
|
+
names,
|
|
229
|
+
data,
|
|
230
|
+
types=[
|
|
231
|
+
pa.timestamp("s"),
|
|
232
|
+
pa.timestamp("ms"),
|
|
233
|
+
pa.timestamp("us"),
|
|
234
|
+
pa.timestamp("ns"),
|
|
235
|
+
],
|
|
236
|
+
)
|
|
237
|
+
tbl = Table(arrow_data)
|
|
238
|
+
assert tbl.size() == 10
|
|
239
|
+
assert tbl.schema() == {
|
|
240
|
+
"a": "datetime",
|
|
241
|
+
"b": "datetime",
|
|
242
|
+
"c": "datetime",
|
|
243
|
+
"d": "datetime",
|
|
244
|
+
}
|
|
245
|
+
assert tbl.view().to_columns() == {
|
|
246
|
+
"a": [util.to_timestamp(i) for i in data[0]],
|
|
247
|
+
"b": [util.to_timestamp(i) for i in data[1]],
|
|
248
|
+
"c": [util.to_timestamp(i) for i in data[2]],
|
|
249
|
+
"d": [util.to_timestamp(i) for i in data[3]],
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
def test_table_arrow_loads_string_stream(self, util):
|
|
253
|
+
data = [[str(i) for i in range(10)]]
|
|
254
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.string()])
|
|
255
|
+
tbl = Table(arrow_data)
|
|
256
|
+
assert tbl.size() == 10
|
|
257
|
+
assert tbl.schema() == {"a": "string"}
|
|
258
|
+
assert tbl.view().to_columns() == {"a": data[0]}
|
|
259
|
+
|
|
260
|
+
def test_table_arrow_loads_dictionary_stream_int8(self, util):
|
|
261
|
+
data = [
|
|
262
|
+
([0, 1, 1, None], ["abc", "def"]),
|
|
263
|
+
([0, 1, None, 2], ["xx", "yy", "zz"]),
|
|
264
|
+
]
|
|
265
|
+
types = [[pa.int8(), pa.string()]] * 2
|
|
266
|
+
arrow_data = util.make_dictionary_arrow(["a", "b"], data, types=types)
|
|
267
|
+
tbl = Table(arrow_data)
|
|
268
|
+
|
|
269
|
+
assert tbl.size() == 4
|
|
270
|
+
assert tbl.schema() == {"a": "string", "b": "string"}
|
|
271
|
+
assert tbl.view().to_columns() == {
|
|
272
|
+
"a": ["abc", "def", "def", None],
|
|
273
|
+
"b": ["xx", "yy", None, "zz"],
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
def test_table_arrow_loads_dictionary_stream_int16(self, util):
|
|
277
|
+
data = [
|
|
278
|
+
([0, 1, 1, None], ["abc", "def"]),
|
|
279
|
+
([0, 1, None, 2], ["xx", "yy", "zz"]),
|
|
280
|
+
]
|
|
281
|
+
types = [[pa.int16(), pa.string()]] * 2
|
|
282
|
+
arrow_data = util.make_dictionary_arrow(["a", "b"], data, types=types)
|
|
283
|
+
tbl = Table(arrow_data)
|
|
284
|
+
|
|
285
|
+
assert tbl.size() == 4
|
|
286
|
+
assert tbl.schema() == {"a": "string", "b": "string"}
|
|
287
|
+
assert tbl.view().to_columns() == {
|
|
288
|
+
"a": ["abc", "def", "def", None],
|
|
289
|
+
"b": ["xx", "yy", None, "zz"],
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
def test_table_arrow_loads_dictionary_stream_int32(self, util):
|
|
293
|
+
data = [
|
|
294
|
+
([0, 1, 1, None], ["abc", "def"]),
|
|
295
|
+
([0, 1, None, 2], ["xx", "yy", "zz"]),
|
|
296
|
+
]
|
|
297
|
+
types = [[pa.int32(), pa.string()]] * 2
|
|
298
|
+
arrow_data = util.make_dictionary_arrow(["a", "b"], data, types=types)
|
|
299
|
+
tbl = Table(arrow_data)
|
|
300
|
+
|
|
301
|
+
assert tbl.size() == 4
|
|
302
|
+
assert tbl.schema() == {"a": "string", "b": "string"}
|
|
303
|
+
assert tbl.view().to_columns() == {
|
|
304
|
+
"a": ["abc", "def", "def", None],
|
|
305
|
+
"b": ["xx", "yy", None, "zz"],
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
def test_table_arrow_loads_dictionary_stream_int64(self, util):
|
|
309
|
+
data = [
|
|
310
|
+
([0, 1, 1, None], ["abc", "def"]),
|
|
311
|
+
([0, 1, None, 2], ["xx", "yy", "zz"]),
|
|
312
|
+
]
|
|
313
|
+
arrow_data = util.make_dictionary_arrow(["a", "b"], data)
|
|
314
|
+
tbl = Table(arrow_data)
|
|
315
|
+
|
|
316
|
+
assert tbl.size() == 4
|
|
317
|
+
assert tbl.schema() == {"a": "string", "b": "string"}
|
|
318
|
+
assert tbl.view().to_columns() == {
|
|
319
|
+
"a": ["abc", "def", "def", None],
|
|
320
|
+
"b": ["xx", "yy", None, "zz"],
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
def test_table_arrow_loads_dictionary_stream_nones(self, util):
|
|
324
|
+
data = [([None, 0, 1, 2], ["", "abc", "def"])]
|
|
325
|
+
arrow_data = util.make_dictionary_arrow(["a"], data)
|
|
326
|
+
tbl = Table(arrow_data)
|
|
327
|
+
|
|
328
|
+
assert tbl.size() == 4
|
|
329
|
+
assert tbl.schema() == {"a": "string"}
|
|
330
|
+
assert tbl.view().to_columns() == {"a": [None, "", "abc", "def"]}
|
|
331
|
+
|
|
332
|
+
def test_table_arrow_loads_dictionary_stream_nones_indexed(self, util):
|
|
333
|
+
data = [
|
|
334
|
+
([1, None, 0, 2], ["", "abc", "def"]),
|
|
335
|
+
([2, 1, 0, None], ["", "hij", "klm"]),
|
|
336
|
+
] # ["abc", None, "", "def"] # ["klm", "hij", "", None]
|
|
337
|
+
arrow_data = util.make_dictionary_arrow(["a", "b"], data)
|
|
338
|
+
tbl = Table(arrow_data, index="a") # column "a" is sorted
|
|
339
|
+
|
|
340
|
+
assert tbl.schema() == {"a": "string", "b": "string"}
|
|
341
|
+
assert tbl.view().to_columns() == {
|
|
342
|
+
"a": [None, "", "abc", "def"],
|
|
343
|
+
"b": ["hij", "", "klm", None],
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
def test_table_arrow_loads_dictionary_stream_nones_indexed_2(self, util):
|
|
347
|
+
"""Test the other column, just in case."""
|
|
348
|
+
data = [
|
|
349
|
+
([1, None, 0, 2], ["", "abc", "def"]),
|
|
350
|
+
([2, 1, 0, None], ["", "hij", "klm"]),
|
|
351
|
+
] # ["abc", None, "", "def"] # ["klm", "hij", "", None]
|
|
352
|
+
arrow_data = util.make_dictionary_arrow(["a", "b"], data)
|
|
353
|
+
tbl = Table(arrow_data, index="b") # column "b" is sorted
|
|
354
|
+
|
|
355
|
+
assert tbl.schema() == {"a": "string", "b": "string"}
|
|
356
|
+
assert tbl.view().to_columns() == {
|
|
357
|
+
"a": ["def", "", None, "abc"],
|
|
358
|
+
"b": [None, "", "hij", "klm"],
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
# legacy
|
|
362
|
+
|
|
363
|
+
def test_table_arrow_loads_int_legacy(self, util):
|
|
364
|
+
data = [list(range(10)) for i in range(4)]
|
|
365
|
+
arrow_data = util.make_arrow(names, data, legacy=True)
|
|
366
|
+
tbl = Table(arrow_data)
|
|
367
|
+
assert tbl.size() == 10
|
|
368
|
+
assert tbl.schema() == {
|
|
369
|
+
"a": "integer",
|
|
370
|
+
"b": "integer",
|
|
371
|
+
"c": "integer",
|
|
372
|
+
"d": "integer",
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
def test_table_arrow_loads_float_legacy(self, util):
|
|
376
|
+
data = [[i for i in range(10)], [i * 1.5 for i in range(10)]]
|
|
377
|
+
arrow_data = util.make_arrow(["a", "b"], data, legacy=True)
|
|
378
|
+
tbl = Table(arrow_data)
|
|
379
|
+
assert tbl.size() == 10
|
|
380
|
+
assert tbl.schema() == {
|
|
381
|
+
"a": "integer",
|
|
382
|
+
"b": "float",
|
|
383
|
+
}
|
|
384
|
+
assert tbl.view().to_columns() == {"a": data[0], "b": data[1]}
|
|
385
|
+
|
|
386
|
+
def test_table_arrow_loads_decimal128_legacy(self, util):
|
|
387
|
+
data = [[i * 1000 for i in range(10)]]
|
|
388
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.decimal128(4)], legacy=True)
|
|
389
|
+
tbl = Table(arrow_data)
|
|
390
|
+
assert tbl.size() == 10
|
|
391
|
+
assert tbl.schema() == {
|
|
392
|
+
"a": "float",
|
|
393
|
+
}
|
|
394
|
+
assert tbl.view().to_columns() == {"a": data[0]}
|
|
395
|
+
|
|
396
|
+
def test_table_arrow_loads_bool_legacy(self, util):
|
|
397
|
+
data = [[True if i % 2 == 0 else False for i in range(10)]]
|
|
398
|
+
arrow_data = util.make_arrow(["a"], data, legacy=True)
|
|
399
|
+
tbl = Table(arrow_data)
|
|
400
|
+
assert tbl.size() == 10
|
|
401
|
+
assert tbl.schema() == {"a": "boolean"}
|
|
402
|
+
assert tbl.view().to_columns() == {"a": data[0]}
|
|
403
|
+
|
|
404
|
+
def test_table_arrow_loads_date32_legacy(self, util):
|
|
405
|
+
data = [[date(2019, 2, i) for i in range(1, 11)]]
|
|
406
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.date32()], legacy=True)
|
|
407
|
+
tbl = Table(arrow_data)
|
|
408
|
+
assert tbl.size() == 10
|
|
409
|
+
assert tbl.schema() == {"a": "date"}
|
|
410
|
+
assert tbl.view().to_columns() == {
|
|
411
|
+
"a": [util.to_timestamp(datetime(2019, 2, i)) for i in range(1, 11)]
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
def test_table_arrow_loads_date64_legacy(self, util):
|
|
415
|
+
data = [[date(2019, 2, i) for i in range(1, 11)]]
|
|
416
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.date64()], legacy=True)
|
|
417
|
+
tbl = Table(arrow_data)
|
|
418
|
+
assert tbl.size() == 10
|
|
419
|
+
assert tbl.schema() == {"a": "date"}
|
|
420
|
+
assert tbl.view().to_columns() == {
|
|
421
|
+
"a": [util.to_timestamp(datetime(2019, 2, i)) for i in range(1, 11)]
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
def test_table_arrow_loads_timestamp_all_formats_legacy(self, util):
|
|
425
|
+
data = [
|
|
426
|
+
[datetime(2019, 2, i, 9) for i in range(1, 11)],
|
|
427
|
+
[datetime(2019, 2, i, 10) for i in range(1, 11)],
|
|
428
|
+
[datetime(2019, 2, i, 11) for i in range(1, 11)],
|
|
429
|
+
[datetime(2019, 2, i, 12) for i in range(1, 11)],
|
|
430
|
+
]
|
|
431
|
+
arrow_data = util.make_arrow(
|
|
432
|
+
names,
|
|
433
|
+
data,
|
|
434
|
+
types=[
|
|
435
|
+
pa.timestamp("s"),
|
|
436
|
+
pa.timestamp("ms"),
|
|
437
|
+
pa.timestamp("us"),
|
|
438
|
+
pa.timestamp("ns"),
|
|
439
|
+
],
|
|
440
|
+
legacy=True,
|
|
441
|
+
)
|
|
442
|
+
tbl = Table(arrow_data)
|
|
443
|
+
assert tbl.size() == 10
|
|
444
|
+
assert tbl.schema() == {
|
|
445
|
+
"a": "datetime",
|
|
446
|
+
"b": "datetime",
|
|
447
|
+
"c": "datetime",
|
|
448
|
+
"d": "datetime",
|
|
449
|
+
}
|
|
450
|
+
assert tbl.view().to_columns() == {
|
|
451
|
+
"a": [util.to_timestamp(i) for i in data[0]],
|
|
452
|
+
"b": [util.to_timestamp(i) for i in data[1]],
|
|
453
|
+
"c": [util.to_timestamp(i) for i in data[2]],
|
|
454
|
+
"d": [util.to_timestamp(i) for i in data[3]],
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
def test_table_arrow_loads_string_legacy(self, util):
|
|
458
|
+
data = [[str(i) for i in range(10)]]
|
|
459
|
+
arrow_data = util.make_arrow(["a"], data, types=[pa.string()], legacy=True)
|
|
460
|
+
tbl = Table(arrow_data)
|
|
461
|
+
assert tbl.size() == 10
|
|
462
|
+
assert tbl.schema() == {"a": "string"}
|
|
463
|
+
assert tbl.view().to_columns() == {"a": data[0]}
|
|
464
|
+
|
|
465
|
+
def test_table_arrow_loads_dictionary_legacy(self, util):
|
|
466
|
+
data = [([0, 1, 1, None], ["a", "b"]), ([0, 1, None, 2], ["x", "y", "z"])]
|
|
467
|
+
arrow_data = util.make_dictionary_arrow(["a", "b"], data, legacy=True)
|
|
468
|
+
tbl = Table(arrow_data)
|
|
469
|
+
|
|
470
|
+
assert tbl.size() == 4
|
|
471
|
+
assert tbl.schema() == {"a": "string", "b": "string"}
|
|
472
|
+
assert tbl.view().to_columns() == {
|
|
473
|
+
"a": ["a", "b", "b", None],
|
|
474
|
+
"b": ["x", "y", None, "z"],
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
def test_table_arrow_loads_arrow_from_df_with_nan(self):
|
|
478
|
+
data = pd.DataFrame({"a": [1.5, 2.5, np.nan, 3.5, 4.5, np.nan, np.nan, np.nan]})
|
|
479
|
+
|
|
480
|
+
arrow_table = pa.Table.from_pandas(data, preserve_index=False)
|
|
481
|
+
|
|
482
|
+
assert arrow_table["a"].null_count == 4
|
|
483
|
+
|
|
484
|
+
# write arrow to stream
|
|
485
|
+
stream = pa.BufferOutputStream()
|
|
486
|
+
writer = pa.RecordBatchStreamWriter(
|
|
487
|
+
stream, arrow_table.schema
|
|
488
|
+
)
|
|
489
|
+
writer.write_table(arrow_table)
|
|
490
|
+
writer.close()
|
|
491
|
+
arrow = stream.getvalue().to_pybytes()
|
|
492
|
+
|
|
493
|
+
# load
|
|
494
|
+
tbl = Table(arrow)
|
|
495
|
+
assert tbl.size() == 8
|
|
496
|
+
|
|
497
|
+
# check types
|
|
498
|
+
assert tbl.schema() == {"a": "float"}
|
|
499
|
+
|
|
500
|
+
# check nans
|
|
501
|
+
json = tbl.view().to_columns()
|
|
502
|
+
|
|
503
|
+
assert json["a"] == [1.5, 2.5, None, 3.5, 4.5, None, None, None]
|