perspective-python 4.2.0__cp311-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. perspective/__init__.py +396 -0
  2. perspective/extension/finos-perspective-nbextension.json +5 -0
  3. perspective/handlers/__init__.py +11 -0
  4. perspective/handlers/aiohttp.py +61 -0
  5. perspective/handlers/starlette.py +55 -0
  6. perspective/handlers/tornado.py +184 -0
  7. perspective/perspective.pyd +0 -0
  8. perspective/templates/exported_widget.html.template +35 -0
  9. perspective/tests/__init__.py +11 -0
  10. perspective/tests/async/test_async_client.py +83 -0
  11. perspective/tests/async/test_websocket_client.py +124 -0
  12. perspective/tests/conftest.py +272 -0
  13. perspective/tests/core/__init__.py +11 -0
  14. perspective/tests/core/test_async.py +351 -0
  15. perspective/tests/multi_threaded/__init__.py +11 -0
  16. perspective/tests/multi_threaded/test_multi_threaded.py +201 -0
  17. perspective/tests/server/__init__.py +11 -0
  18. perspective/tests/server/test_server.py +1016 -0
  19. perspective/tests/server/test_session.py +110 -0
  20. perspective/tests/table/__init__.py +11 -0
  21. perspective/tests/table/arrow/date32.arrow +0 -0
  22. perspective/tests/table/arrow/date64.arrow +0 -0
  23. perspective/tests/table/arrow/dict.arrow +0 -0
  24. perspective/tests/table/arrow/dict_update.arrow +0 -0
  25. perspective/tests/table/arrow/int_float_str.arrow +0 -0
  26. perspective/tests/table/arrow/int_float_str_file.arrow +0 -0
  27. perspective/tests/table/arrow/int_float_str_update.arrow +0 -0
  28. perspective/tests/table/object_sequence.py +402 -0
  29. perspective/tests/table/test_column_paths.py +89 -0
  30. perspective/tests/table/test_delete.py +124 -0
  31. perspective/tests/table/test_exception.py +65 -0
  32. perspective/tests/table/test_leaks.py +54 -0
  33. perspective/tests/table/test_ports.py +178 -0
  34. perspective/tests/table/test_remove.py +102 -0
  35. perspective/tests/table/test_table.py +641 -0
  36. perspective/tests/table/test_table_arrow.py +503 -0
  37. perspective/tests/table/test_table_datetime.py +2409 -0
  38. perspective/tests/table/test_table_infer.py +201 -0
  39. perspective/tests/table/test_table_limit.py +45 -0
  40. perspective/tests/table/test_table_numpy.py +1022 -0
  41. perspective/tests/table/test_table_pandas.py +1018 -0
  42. perspective/tests/table/test_table_polars.py +251 -0
  43. perspective/tests/table/test_table_view_table.py +130 -0
  44. perspective/tests/table/test_to_arrow.py +417 -0
  45. perspective/tests/table/test_to_arrow_lz4.py +32 -0
  46. perspective/tests/table/test_to_format.py +1024 -0
  47. perspective/tests/table/test_to_polars.py +26 -0
  48. perspective/tests/table/test_update.py +545 -0
  49. perspective/tests/table/test_update_arrow.py +980 -0
  50. perspective/tests/table/test_update_pandas.py +211 -0
  51. perspective/tests/table/test_view.py +2261 -0
  52. perspective/tests/table/test_view_expression.py +1940 -0
  53. perspective/tests/test_dependencies.py +53 -0
  54. perspective/tests/viewer/__init__.py +11 -0
  55. perspective/tests/viewer/test_viewer.py +246 -0
  56. perspective/tests/widget/__init__.py +11 -0
  57. perspective/tests/widget/test_widget.py +278 -0
  58. perspective/tests/widget/test_widget_pandas.py +453 -0
  59. perspective/virtual_servers/__init__.py +134 -0
  60. perspective/virtual_servers/clickhouse.py +245 -0
  61. perspective/virtual_servers/duckdb.py +236 -0
  62. perspective/widget/__init__.py +349 -0
  63. perspective/widget/viewer/__init__.py +15 -0
  64. perspective/widget/viewer/validate.py +22 -0
  65. perspective/widget/viewer/viewer.py +343 -0
  66. perspective/widget/viewer/viewer_traitlets.py +101 -0
  67. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/install.json +5 -0
  68. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/package.json +71 -0
  69. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/253.5f5c9e80605aa4106a28.js +2 -0
  70. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/253.5f5c9e80605aa4106a28.js.LICENSE.txt +25 -0
  71. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/523.c030af5d3c4f67ff83f6.js +1 -0
  72. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/remoteEntry.95a8ea1b44d96032833f.js +1 -0
  73. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/style.js +4 -0
  74. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/third-party-licenses.json +16 -0
  75. perspective_python-4.2.0.dist-info/METADATA +27 -0
  76. perspective_python-4.2.0.dist-info/RECORD +79 -0
  77. perspective_python-4.2.0.dist-info/WHEEL +4 -0
  78. perspective_python-4.2.0.dist-info/licenses/LICENSE.md +193 -0
  79. perspective_python-4.2.0.dist-info/licenses/LICENSE_THIRDPARTY_cargo.yml +17395 -0
@@ -0,0 +1,1018 @@
1
+ # ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
2
+ # ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃
3
+ # ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃
4
+ # ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃
5
+ # ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃
6
+ # ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
7
+ # ┃ Copyright (c) 2017, the Perspective Authors. ┃
8
+ # ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃
9
+ # ┃ This file is part of the Perspective library, distributed under the terms ┃
10
+ # ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
11
+ # ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
12
+
13
+ from datetime import date, datetime
14
+ from io import StringIO
15
+ import numpy as np
16
+ import pandas as pd
17
+ from pytest import mark
18
+ import perspective as psp
19
+
20
+ client = psp.Server().new_local_client()
21
+ Table = client.table
22
+
23
+
24
+ def arrow_bytes_to_pandas(view):
25
+ import pyarrow
26
+
27
+ with pyarrow.ipc.open_stream(pyarrow.BufferReader(view.to_arrow())) as reader:
28
+ return reader.read_pandas()
29
+
30
+
31
+ class TestTablePandas(object):
32
+ def test_empty_table(self):
33
+ tbl = Table([])
34
+ assert tbl.size() == 0
35
+ assert tbl.schema() == {}
36
+
37
+ def test_table_dataframe(self):
38
+ d = [{"a": 1, "b": 2}, {"a": 3, "b": 4}]
39
+ data = pd.DataFrame(d)
40
+ tbl = Table(data)
41
+ assert tbl.size() == 2
42
+ assert tbl.schema() == {"index": "integer", "a": "integer", "b": "integer"}
43
+ assert tbl.view().to_records() == [
44
+ {"a": 1, "b": 2, "index": 0},
45
+ {"a": 3, "b": 4, "index": 1},
46
+ ]
47
+
48
+ def test_table_dataframe_column_order(self):
49
+ d = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}]
50
+ data = pd.DataFrame(d, columns=["b", "c", "a", "d"])
51
+ tbl = Table(data)
52
+ assert tbl.size() == 2
53
+ assert tbl.columns() == ["index", "b", "c", "a", "d"]
54
+
55
+ def test_table_dataframe_selective_column_order(self):
56
+ d = [{"a": 1, "b": 2, "c": 3, "d": 4}, {"a": 3, "b": 4, "c": 5, "d": 6}]
57
+ data = pd.DataFrame(d, columns=["b", "c", "a"])
58
+ tbl = Table(data)
59
+ assert tbl.size() == 2
60
+ assert tbl.columns() == ["index", "b", "c", "a"]
61
+
62
+ def test_table_dataframe_does_not_mutate(self):
63
+ # make sure we don't mutate the dataframe that a user passes in
64
+ data = pd.DataFrame(
65
+ {
66
+ "a": np.array([None, 1, None, 2], dtype=object),
67
+ "b": np.array([1.5, None, 2.5, None], dtype=object),
68
+ }
69
+ )
70
+ assert data["a"].tolist() == [None, 1, None, 2]
71
+ assert data["b"].tolist() == [1.5, None, 2.5, None]
72
+
73
+ tbl = Table(data)
74
+ assert tbl.size() == 4
75
+ assert tbl.schema() == {"index": "integer", "a": "integer", "b": "float"}
76
+
77
+ assert data["a"].tolist() == [None, 1, None, 2]
78
+ assert data["b"].tolist() == [1.5, None, 2.5, None]
79
+
80
+ @mark.skip(reason="Deprecated support for Series")
81
+ def test_table_date_series(self, util):
82
+ data = util.make_series(freq="D")
83
+ tbl = Table(data)
84
+ assert tbl.size() == 10
85
+ assert tbl.schema() == {"index": "date", "0": "float"}
86
+ assert tbl.view().to_columns()["index"] == [
87
+ util.to_timestamp(datetime(2000, 1, 1)),
88
+ util.to_timestamp(datetime(2000, 1, 2)),
89
+ util.to_timestamp(datetime(2000, 1, 3)),
90
+ util.to_timestamp(datetime(2000, 1, 4)),
91
+ util.to_timestamp(datetime(2000, 1, 5)),
92
+ util.to_timestamp(datetime(2000, 1, 6)),
93
+ util.to_timestamp(datetime(2000, 1, 7)),
94
+ util.to_timestamp(datetime(2000, 1, 8)),
95
+ util.to_timestamp(datetime(2000, 1, 9)),
96
+ util.to_timestamp(datetime(2000, 1, 10)),
97
+ ]
98
+
99
+ @mark.skip(reason="Deprecated support for Series")
100
+ def test_table_time_series(self, util):
101
+ data = util.make_series(freq="H")
102
+ tbl = Table(data)
103
+ assert tbl.size() == 10
104
+ assert tbl.schema() == {"index": "datetime", "0": "float"}
105
+ assert tbl.view().to_columns()["index"] == [
106
+ util.to_timestamp(datetime(2000, 1, 1, 0, 0, 0)),
107
+ util.to_timestamp(datetime(2000, 1, 1, 1, 0, 0)),
108
+ util.to_timestamp(datetime(2000, 1, 1, 2, 0, 0)),
109
+ util.to_timestamp(datetime(2000, 1, 1, 3, 0, 0)),
110
+ util.to_timestamp(datetime(2000, 1, 1, 4, 0, 0)),
111
+ util.to_timestamp(datetime(2000, 1, 1, 5, 0, 0)),
112
+ util.to_timestamp(datetime(2000, 1, 1, 6, 0, 0)),
113
+ util.to_timestamp(datetime(2000, 1, 1, 7, 0, 0)),
114
+ util.to_timestamp(datetime(2000, 1, 1, 8, 0, 0)),
115
+ util.to_timestamp(datetime(2000, 1, 1, 9, 0, 0)),
116
+ ]
117
+
118
+ @mark.skip(reason="pyarrow dataframe does not support date inference")
119
+ def test_table_dataframe_infer_date(self, util):
120
+ data = util.make_dataframe(freq="ME")
121
+
122
+ tbl = Table(data)
123
+ assert tbl.size() == 10
124
+ assert tbl.schema() == {
125
+ "index": "date",
126
+ "a": "float",
127
+ "b": "float",
128
+ "c": "float",
129
+ "d": "float",
130
+ }
131
+
132
+ assert tbl.view().to_columns()["index"] == [
133
+ util.to_timestamp(datetime(2000, 1, 31)),
134
+ util.to_timestamp(datetime(2000, 2, 29)),
135
+ util.to_timestamp(datetime(2000, 3, 31)),
136
+ util.to_timestamp(datetime(2000, 4, 30)),
137
+ util.to_timestamp(datetime(2000, 5, 31)),
138
+ util.to_timestamp(datetime(2000, 6, 30)),
139
+ util.to_timestamp(datetime(2000, 7, 31)),
140
+ util.to_timestamp(datetime(2000, 8, 31)),
141
+ util.to_timestamp(datetime(2000, 9, 30)),
142
+ util.to_timestamp(datetime(2000, 10, 31)),
143
+ ]
144
+
145
+ def test_table_dataframe_infer_date_fixed(self, util):
146
+ data = util.make_dataframe(freq="ME")
147
+
148
+ tbl = Table(data)
149
+ assert tbl.size() == 10
150
+ assert tbl.schema() == {
151
+ "index": "datetime",
152
+ "a": "float",
153
+ "b": "float",
154
+ "c": "float",
155
+ "d": "float",
156
+ }
157
+
158
+ assert tbl.view().to_columns()["index"] == [
159
+ util.to_timestamp(datetime(2000, 1, 31)),
160
+ util.to_timestamp(datetime(2000, 2, 29)),
161
+ util.to_timestamp(datetime(2000, 3, 31)),
162
+ util.to_timestamp(datetime(2000, 4, 30)),
163
+ util.to_timestamp(datetime(2000, 5, 31)),
164
+ util.to_timestamp(datetime(2000, 6, 30)),
165
+ util.to_timestamp(datetime(2000, 7, 31)),
166
+ util.to_timestamp(datetime(2000, 8, 31)),
167
+ util.to_timestamp(datetime(2000, 9, 30)),
168
+ util.to_timestamp(datetime(2000, 10, 31)),
169
+ ]
170
+
171
+ def test_table_dataframe_infer_time(self, util):
172
+ data = util.make_dataframe(freq="h")
173
+
174
+ tbl = Table(data)
175
+ assert tbl.size() == 10
176
+ assert tbl.schema() == {
177
+ "index": "datetime",
178
+ "a": "float",
179
+ "b": "float",
180
+ "c": "float",
181
+ "d": "float",
182
+ }
183
+
184
+ assert tbl.view().to_columns()["index"] == [
185
+ util.to_timestamp(datetime(2000, 1, 1, 0, 0, 0)),
186
+ util.to_timestamp(datetime(2000, 1, 1, 1, 0, 0)),
187
+ util.to_timestamp(datetime(2000, 1, 1, 2, 0, 0)),
188
+ util.to_timestamp(datetime(2000, 1, 1, 3, 0, 0)),
189
+ util.to_timestamp(datetime(2000, 1, 1, 4, 0, 0)),
190
+ util.to_timestamp(datetime(2000, 1, 1, 5, 0, 0)),
191
+ util.to_timestamp(datetime(2000, 1, 1, 6, 0, 0)),
192
+ util.to_timestamp(datetime(2000, 1, 1, 7, 0, 0)),
193
+ util.to_timestamp(datetime(2000, 1, 1, 8, 0, 0)),
194
+ util.to_timestamp(datetime(2000, 1, 1, 9, 0, 0)),
195
+ ]
196
+
197
+ @mark.skip(reason="pyarrow dataframe does not support date inference")
198
+ def test_table_dataframe_year_start_index(self, util):
199
+ data = util.make_dataframe(freq="YS")
200
+
201
+ tbl = Table(data)
202
+ assert tbl.size() == 10
203
+ assert tbl.schema() == {
204
+ "index": "date",
205
+ "a": "float",
206
+ "b": "float",
207
+ "c": "float",
208
+ "d": "float",
209
+ }
210
+
211
+ assert tbl.view().to_columns()["index"] == [
212
+ util.to_timestamp(datetime(2000, 1, 1, 0, 0, 0)),
213
+ util.to_timestamp(datetime(2001, 1, 1, 0, 0, 0)),
214
+ util.to_timestamp(datetime(2002, 1, 1, 0, 0, 0)),
215
+ util.to_timestamp(datetime(2003, 1, 1, 0, 0, 0)),
216
+ util.to_timestamp(datetime(2004, 1, 1, 0, 0, 0)),
217
+ util.to_timestamp(datetime(2005, 1, 1, 0, 0, 0)),
218
+ util.to_timestamp(datetime(2006, 1, 1, 0, 0, 0)),
219
+ util.to_timestamp(datetime(2007, 1, 1, 0, 0, 0)),
220
+ util.to_timestamp(datetime(2008, 1, 1, 0, 0, 0)),
221
+ util.to_timestamp(datetime(2009, 1, 1, 0, 0, 0)),
222
+ ]
223
+
224
+ def test_table_dataframe_year_start_index_fixed(self, util):
225
+ data = util.make_dataframe(freq="YS")
226
+
227
+ tbl = Table(data)
228
+ assert tbl.size() == 10
229
+ assert tbl.schema() == {
230
+ "index": "datetime",
231
+ "a": "float",
232
+ "b": "float",
233
+ "c": "float",
234
+ "d": "float",
235
+ }
236
+
237
+ assert tbl.view().to_columns()["index"] == [
238
+ util.to_timestamp(datetime(2000, 1, 1, 0, 0, 0)),
239
+ util.to_timestamp(datetime(2001, 1, 1, 0, 0, 0)),
240
+ util.to_timestamp(datetime(2002, 1, 1, 0, 0, 0)),
241
+ util.to_timestamp(datetime(2003, 1, 1, 0, 0, 0)),
242
+ util.to_timestamp(datetime(2004, 1, 1, 0, 0, 0)),
243
+ util.to_timestamp(datetime(2005, 1, 1, 0, 0, 0)),
244
+ util.to_timestamp(datetime(2006, 1, 1, 0, 0, 0)),
245
+ util.to_timestamp(datetime(2007, 1, 1, 0, 0, 0)),
246
+ util.to_timestamp(datetime(2008, 1, 1, 0, 0, 0)),
247
+ util.to_timestamp(datetime(2009, 1, 1, 0, 0, 0)),
248
+ ]
249
+
250
+ @mark.skip(reason="pyarrow dataframe does not support date inference")
251
+ def test_table_dataframe_quarter_index(self, util):
252
+ data = util.make_dataframe(size=4, freq="QE")
253
+
254
+ tbl = Table(data)
255
+ assert tbl.size() == 4
256
+ assert tbl.schema() == {
257
+ "index": "date",
258
+ "a": "float",
259
+ "b": "float",
260
+ "c": "float",
261
+ "d": "float",
262
+ }
263
+
264
+ assert tbl.view().to_columns()["index"] == [
265
+ util.to_timestamp(datetime(2000, 3, 31, 0, 0, 0)),
266
+ util.to_timestamp(datetime(2000, 6, 30, 0, 0, 0)),
267
+ util.to_timestamp(datetime(2000, 9, 30, 0, 0, 0)),
268
+ util.to_timestamp(datetime(2000, 12, 31, 0, 0, 0)),
269
+ ]
270
+
271
+ def test_table_dataframe_quarter_index_fixed(self, util):
272
+ data = util.make_dataframe(size=4, freq="QE")
273
+
274
+ tbl = Table(data)
275
+ assert tbl.size() == 4
276
+ assert tbl.schema() == {
277
+ "index": "datetime",
278
+ "a": "float",
279
+ "b": "float",
280
+ "c": "float",
281
+ "d": "float",
282
+ }
283
+
284
+ assert tbl.view().to_columns()["index"] == [
285
+ util.to_timestamp(datetime(2000, 3, 31, 0, 0, 0)),
286
+ util.to_timestamp(datetime(2000, 6, 30, 0, 0, 0)),
287
+ util.to_timestamp(datetime(2000, 9, 30, 0, 0, 0)),
288
+ util.to_timestamp(datetime(2000, 12, 31, 0, 0, 0)),
289
+ ]
290
+
291
+ def test_table_dataframe_minute_index(self, util):
292
+ data = util.make_dataframe(size=5, freq="min")
293
+
294
+ tbl = Table(data)
295
+ assert tbl.size() == 5
296
+ assert tbl.schema() == {
297
+ "index": "datetime",
298
+ "a": "float",
299
+ "b": "float",
300
+ "c": "float",
301
+ "d": "float",
302
+ }
303
+
304
+ assert tbl.view().to_columns()["index"] == [
305
+ util.to_timestamp(datetime(2000, 1, 1, 0, 0)),
306
+ util.to_timestamp(datetime(2000, 1, 1, 0, 1)),
307
+ util.to_timestamp(datetime(2000, 1, 1, 0, 2)),
308
+ util.to_timestamp(datetime(2000, 1, 1, 0, 3)),
309
+ util.to_timestamp(datetime(2000, 1, 1, 0, 4)),
310
+ ]
311
+
312
+ def test_table_pandas_periodindex(self, util):
313
+ df = util.make_period_dataframe(30)
314
+ tbl = Table(df)
315
+
316
+ assert tbl.size() == 30
317
+ assert tbl.schema() == {
318
+ "index": "integer",
319
+ "a": "float",
320
+ "b": "float",
321
+ "c": "float",
322
+ "d": "float",
323
+ }
324
+
325
+ assert tbl.view().to_columns()["index"][:5] == [360, 361, 362, 363, 364]
326
+
327
+ @mark.skip(reason="pyarrow does not support this")
328
+ def test_table_pandas_period(self, util):
329
+ df = pd.DataFrame(
330
+ {
331
+ "a": [
332
+ pd.Period("1Q2019"),
333
+ pd.Period("2Q2019"),
334
+ pd.Period("3Q2019"),
335
+ pd.Period("4Q2019"),
336
+ ]
337
+ }
338
+ )
339
+ tbl = Table(df)
340
+ assert tbl.size() == 4
341
+ assert tbl.schema() == {"index": "integer", "a": "datetime"}
342
+ assert tbl.view().to_columns()["a"] == [
343
+ util.to_timestamp(datetime(2019, 1, 1)),
344
+ util.to_timestamp(datetime(2019, 4, 1)),
345
+ util.to_timestamp(datetime(2019, 7, 1)),
346
+ util.to_timestamp(datetime(2019, 10, 1)),
347
+ ]
348
+
349
+ def test_table_pandas_from_schema_int(self):
350
+ data = [None, 1, None, 2, None, 3, 4]
351
+ df = pd.DataFrame({"a": data})
352
+ table = Table({"a": "integer"})
353
+ table.update(df)
354
+ assert table.view().to_columns()["a"] == data
355
+
356
+ def test_table_pandas_from_schema_bool(self):
357
+ data = [True, False, True, False]
358
+ df = pd.DataFrame({"a": data})
359
+ table = Table({"a": "boolean"})
360
+ table.update(df)
361
+ assert table.view().to_columns()["a"] == data
362
+
363
+ @mark.skip(reason="pyarrow does not support this")
364
+ def test_table_pandas_from_schema_bool_str(self):
365
+ data = ["True", "False", "True", "False"]
366
+ df = pd.DataFrame({"a": data})
367
+ table = Table({"a": "boolean"})
368
+ table.update(df)
369
+ assert table.view().to_columns()["a"] == [True, False, True, False]
370
+
371
+ def test_table_pandas_from_schema_float(self):
372
+ data = [None, 1.5, None, 2.5, None, 3.5, 4.5]
373
+ df = pd.DataFrame({"a": data})
374
+ table = Table({"a": "float"})
375
+ table.update(df)
376
+ assert table.view().to_columns()["a"] == data
377
+
378
+ def test_table_pandas_from_schema_float_all_nan(self):
379
+ data = [np.nan, np.nan, np.nan, np.nan]
380
+ df = pd.DataFrame({"a": data})
381
+ table = Table({"a": "float"})
382
+ table.update(df)
383
+ assert table.view().to_columns()["a"] == [None, None, None, None]
384
+
385
+ def test_table_pandas_from_schema_float_to_int(self):
386
+ data = [None, 1.5, None, 2.5, None, 3.5, 4.5]
387
+ df = pd.DataFrame({"a": data})
388
+ table = Table({"a": "integer"})
389
+ table.update(df)
390
+ # truncates decimal
391
+ assert table.view().to_columns()["a"] == [None, 1, None, 2, None, 3, 4]
392
+
393
+ def test_table_pandas_from_schema_int_to_float(self):
394
+ data = [None, 1, None, 2, None, 3, 4]
395
+ df = pd.DataFrame({"a": data})
396
+ table = Table({"a": "float"})
397
+ table.update(df)
398
+ assert table.view().to_columns()["a"] == [None, 1.0, None, 2.0, None, 3.0, 4.0]
399
+
400
+ def test_table_pandas_from_schema_date(self, util):
401
+ data = [date(2019, 8, 15), None, date(2019, 8, 16)]
402
+ df = pd.DataFrame({"a": data})
403
+ table = Table({"a": "date"})
404
+ table.update(df)
405
+ assert table.view().to_columns()["a"] == [
406
+ util.to_timestamp(datetime(2019, 8, 15)),
407
+ None,
408
+ util.to_timestamp(datetime(2019, 8, 16)),
409
+ ]
410
+
411
+ def test_table_pandas_from_schema_datetime(self, util):
412
+ data = [
413
+ util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
414
+ None,
415
+ util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
416
+ None,
417
+ ]
418
+ df = pd.DataFrame({"a": pd.to_datetime(data, unit="ms")})
419
+ table = Table({"a": "datetime"})
420
+ table.update(df)
421
+ assert table.view().to_columns()["a"] == data
422
+
423
+ def test_table_pandas_from_schema_datetime_timestamp_s(self, util):
424
+ data = [
425
+ util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
426
+ np.nan,
427
+ util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
428
+ np.nan,
429
+ ]
430
+ df = pd.DataFrame({"a": pd.to_datetime(data, unit="ms")})
431
+ table = Table({"a": "datetime"})
432
+ table.update(df)
433
+ assert table.view().to_columns()["a"] == [
434
+ util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
435
+ None,
436
+ util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
437
+ None,
438
+ ]
439
+
440
+ @mark.skip(reason="This is no longer relevant")
441
+ def test_table_pandas_from_schema_datetime_timestamp_ms(self, util):
442
+ data = [
443
+ util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)) * 1000,
444
+ np.nan,
445
+ util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
446
+ *1000,
447
+ np.nan,
448
+ ]
449
+
450
+ df = pd.DataFrame({"a": pd.to_datetime(data, unit="ms")})
451
+ table = Table({"a": "datetime"})
452
+ table.update(df)
453
+ assert table.view().to_columns()["a"] == [
454
+ util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
455
+ None,
456
+ util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
457
+ None,
458
+ ]
459
+
460
+ def test_table_pandas_from_schema_str(self):
461
+ data = ["a", None, "b", None, "c"]
462
+ df = pd.DataFrame({"a": data})
463
+ table = Table({"a": "string"})
464
+ table.update(df)
465
+ assert table.view().to_columns()["a"] == data
466
+
467
+ def test_table_pandas_none(self):
468
+ data = [None, None, None]
469
+ df = pd.DataFrame({"a": data})
470
+ table = Table(df)
471
+ assert table.view().to_columns()["a"] == data
472
+
473
+ def test_table_pandas_symmetric_table(self):
474
+ # make sure that updates are symmetric to table creation
475
+ df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [1.5, 2.5, 3.5, 4.5]})
476
+ t1 = Table(df)
477
+ t2 = Table({"a": "integer", "b": "float"})
478
+ t2.update(df)
479
+ assert t1.view().to_columns() == {
480
+ "index": [0, 1, 2, 3],
481
+ "a": [1, 2, 3, 4],
482
+ "b": [1.5, 2.5, 3.5, 4.5],
483
+ }
484
+
485
+ def test_table_pandas_symmetric_stacked_updates(self):
486
+ # make sure that updates are symmetric to table creation
487
+ df = pd.DataFrame({"a": [1, 2, 3, 4], "b": [1.5, 2.5, 3.5, 4.5]})
488
+
489
+ t1 = Table(df)
490
+ t1.update(df)
491
+
492
+ t2 = Table({"a": "integer", "b": "float"})
493
+ t2.update(df)
494
+ t2.update(df)
495
+
496
+ assert t1.view().to_columns() == {
497
+ "index": [0, 1, 2, 3, 0, 1, 2, 3],
498
+ "a": [1, 2, 3, 4, 1, 2, 3, 4],
499
+ "b": [1.5, 2.5, 3.5, 4.5, 1.5, 2.5, 3.5, 4.5],
500
+ }
501
+
502
+ def test_table_pandas_transitive(self):
503
+ # serialized output -> table -> serialized output
504
+ records = {
505
+ "a": [1, 2, 3, 4],
506
+ "b": [1.5, 2.5, 3.5, 4.5],
507
+ "c": [np.nan, np.nan, "abc", np.nan],
508
+ "d": [None, True, None, False],
509
+ "e": [
510
+ float("nan"),
511
+ datetime(2019, 7, 11, 12, 30),
512
+ float("nan"),
513
+ datetime(2019, 7, 11, 12, 30),
514
+ ],
515
+ }
516
+
517
+ df = pd.DataFrame(records)
518
+ t1 = Table(df)
519
+ out1 = arrow_bytes_to_pandas(t1.view(columns=["a", "b", "c", "d", "e"]))
520
+ t2 = Table(out1)
521
+ assert t1.schema() == t2.schema()
522
+ out2 = t2.view().to_columns()
523
+ assert t1.view().to_columns() == out2
524
+
525
+ # dtype=object should have correct inferred types
526
+
527
+ def test_table_pandas_object_to_int(self):
528
+ df = pd.DataFrame({"a": np.array([1, 2, None, 2, None, 3, 4], dtype=object)})
529
+ table = Table(df)
530
+ assert table.schema() == {"index": "integer", "a": "integer"}
531
+ assert table.view().to_columns()["a"] == [1, 2, None, 2, None, 3, 4]
532
+
533
+ def test_table_pandas_object_to_float(self):
534
+ df = pd.DataFrame({"a": np.array([None, 1, None, 2, None, 3, 4], dtype=object)})
535
+ table = Table(df)
536
+ assert table.schema() == {"index": "integer", "a": "integer"}
537
+ assert table.view().to_columns()["a"] == [None, 1.0, None, 2.0, None, 3.0, 4.0]
538
+
539
+ def test_table_pandas_object_to_bool(self):
540
+ df = pd.DataFrame(
541
+ {"a": np.array([True, False, True, False, True, False], dtype=object)}
542
+ )
543
+ table = Table(df)
544
+ assert table.schema() == {"index": "integer", "a": "boolean"}
545
+ assert table.view().to_columns()["a"] == [True, False, True, False, True, False]
546
+
547
+ def test_table_pandas_object_to_date(self, util):
548
+ df = pd.DataFrame(
549
+ {"a": np.array([date(2019, 7, 11), date(2019, 7, 12), None], dtype=object)}
550
+ )
551
+ table = Table(df)
552
+ assert table.schema() == {"index": "integer", "a": "date"}
553
+ assert table.view().to_columns()["a"] == [
554
+ util.to_timestamp(datetime(2019, 7, 11)),
555
+ util.to_timestamp(datetime(2019, 7, 12)),
556
+ None,
557
+ ]
558
+
559
+ def test_table_pandas_object_to_datetime(self, util):
560
+ df = pd.DataFrame(
561
+ {
562
+ "a": np.array(
563
+ [
564
+ datetime(2019, 7, 11, 1, 2, 3),
565
+ datetime(2019, 7, 12, 1, 2, 3),
566
+ None,
567
+ ],
568
+ dtype=object,
569
+ )
570
+ }
571
+ )
572
+ table = Table(df)
573
+ assert table.schema() == {"index": "integer", "a": "datetime"}
574
+ assert table.view().to_columns()["a"] == [
575
+ util.to_timestamp(datetime(2019, 7, 11, 1, 2, 3)),
576
+ util.to_timestamp(datetime(2019, 7, 12, 1, 2, 3)),
577
+ None,
578
+ ]
579
+
580
+ def test_table_pandas_object_to_str(self):
581
+ df = pd.DataFrame({"a": np.array(["abc", "def", None, "ghi"], dtype=object)})
582
+ table = Table(df)
583
+ assert table.schema() == {"index": "integer", "a": "string"}
584
+ assert table.view().to_columns()["a"] == ["abc", "def", None, "ghi"]
585
+
586
+ # Type matching
587
+
588
+ def test_table_pandas_update_float_schema_with_int(self):
589
+ df = pd.DataFrame({"a": [1.5, 2.5, 3.5, 4.5], "b": [1, 2, 3, 4]})
590
+
591
+ table = Table({"a": "float", "b": "float"})
592
+
593
+ table.update(df)
594
+
595
+ assert table.view().to_columns() == {
596
+ "a": [1.5, 2.5, 3.5, 4.5],
597
+ "b": [1.0, 2.0, 3.0, 4.0],
598
+ }
599
+
600
+ def test_table_pandas_update_int32_with_int64(self):
601
+ df = pd.DataFrame({"a": [1, 2, 3, 4]})
602
+
603
+ table = Table({"a": [1, 2, 3, 4]})
604
+
605
+ table.update(df)
606
+
607
+ assert table.view().to_columns() == {"a": [1, 2, 3, 4, 1, 2, 3, 4]}
608
+
609
+ def test_table_pandas_update_int64_with_float(self):
610
+ df = pd.DataFrame({"a": [1.5, 2.5, 3.5, 4.5]})
611
+
612
+ table = Table(pd.DataFrame({"a": [1, 2, 3, 4]}))
613
+
614
+ table.update(df)
615
+
616
+ assert table.view().to_columns()["a"] == [1, 2, 3, 4, 1, 2, 3, 4]
617
+
618
+ def test_table_pandas_update_date_schema_with_datetime(self, util):
619
+ df = pd.DataFrame({"a": np.array([date(2019, 7, 11)])})
620
+
621
+ table = Table({"a": "date"})
622
+
623
+ table.update(df)
624
+
625
+ assert table.schema() == {"a": "date"}
626
+
627
+ assert table.view().to_columns() == {
628
+ "a": [util.to_timestamp(datetime(2019, 7, 11))]
629
+ }
630
+
631
+ @mark.skip(reason="Not supported by pyarrow (?)")
632
+ def test_table_pandas_update_datetime_schema_with_date(self, util):
633
+ df = pd.DataFrame({"a": np.array([date(2019, 7, 11)])})
634
+ table = Table({"a": "datetime"})
635
+ table.update(df)
636
+ assert table.schema() == {"a": "datetime"}
637
+ assert table.view().to_columns() == {
638
+ "a": [util.to_timestamp(datetime(2019, 7, 11, 0, 0))]
639
+ }
640
+
641
+ # Timestamps
642
+
643
+ def test_table_pandas_timestamp_to_datetime(self, util):
644
+ data = [
645
+ pd.Timestamp("2019-07-11 12:30:05"),
646
+ None,
647
+ pd.Timestamp("2019-07-11 13:30:05"),
648
+ None,
649
+ ]
650
+ df = pd.DataFrame({"a": data})
651
+ table = Table(df)
652
+ assert table.view().to_columns()["a"] == [
653
+ util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
654
+ None,
655
+ util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
656
+ None,
657
+ ]
658
+
659
+ def test_table_pandas_timestamp_explicit_dtype(self, util):
660
+ data = [
661
+ pd.Timestamp("2019-07-11 12:30:05"),
662
+ None,
663
+ pd.Timestamp("2019-07-11 13:30:05"),
664
+ None,
665
+ ]
666
+ df = pd.DataFrame({"a": np.array(data, dtype="datetime64[ns]")})
667
+ table = Table(df)
668
+ assert table.view().to_columns()["a"] == [
669
+ util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
670
+ None,
671
+ util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
672
+ None,
673
+ ]
674
+
675
+ def test_table_pandas_update_datetime_with_timestamp(self, util):
676
+ data = [
677
+ pd.Timestamp("2019-07-11 12:30:05"),
678
+ None,
679
+ pd.Timestamp("2019-07-11 13:30:05"),
680
+ None,
681
+ ]
682
+ df = pd.DataFrame({"a": data})
683
+ df2 = pd.DataFrame({"a": data})
684
+ table = Table(df)
685
+ table.update(df2)
686
+ assert table.view().to_columns()["a"] == [
687
+ util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
688
+ None,
689
+ util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
690
+ None,
691
+ util.to_timestamp(datetime(2019, 7, 11, 12, 30, 5)),
692
+ None,
693
+ util.to_timestamp(datetime(2019, 7, 11, 13, 30, 5)),
694
+ None,
695
+ ]
696
+
697
+ # NaN/NaT reading
698
+
699
+ def test_table_pandas_nan(self):
700
+ data = [np.nan, np.nan, np.nan, np.nan]
701
+ df = pd.DataFrame({"a": data})
702
+ table = Table(df)
703
+ assert table.view().to_columns()["a"] == [None, None, None, None]
704
+
705
+ def test_table_pandas_int_nan(self):
706
+ data = [np.nan, 1, np.nan, 2]
707
+ df = pd.DataFrame({"a": data})
708
+ table = Table(df)
709
+ assert table.view().to_columns()["a"] == [None, 1, None, 2]
710
+
711
+ def test_table_pandas_float_nan(self):
712
+ data = [np.nan, 1.5, np.nan, 2.5]
713
+ df = pd.DataFrame({"a": data})
714
+ table = Table(df)
715
+ assert table.view().to_columns()["a"] == [None, 1.5, None, 2.5]
716
+
717
+ def test_table_read_nan_int_col(self):
718
+ data = pd.DataFrame(
719
+ {"str": ["abc", float("nan"), "def"], "int": [np.nan, 1, 2]}
720
+ )
721
+ tbl = Table(data)
722
+ assert tbl.schema() == {
723
+ "index": "integer",
724
+ "str": "string",
725
+ "int": "float",
726
+ } # np.nan is float type - ints convert to floats when filled in
727
+ assert tbl.size() == 3
728
+ assert tbl.view().to_columns() == {
729
+ "index": [0, 1, 2],
730
+ "str": ["abc", None, "def"],
731
+ "int": [None, 1.0, 2.0],
732
+ }
733
+
734
+ def test_table_read_nan_float_col(self):
735
+ data = pd.DataFrame(
736
+ {"str": [float("nan"), "abc", float("nan")], "float": [np.nan, 1.5, 2.5]}
737
+ )
738
+ tbl = Table(data)
739
+ assert tbl.schema() == {
740
+ "index": "integer",
741
+ "str": "string",
742
+ "float": "float",
743
+ } # can only promote to string or float
744
+ assert tbl.size() == 3
745
+ assert tbl.view().to_columns() == {
746
+ "index": [0, 1, 2],
747
+ "str": [None, "abc", None],
748
+ "float": [None, 1.5, 2.5],
749
+ }
750
+
751
+ def test_table_read_nan_bool_col(self):
752
+ data = pd.DataFrame(
753
+ {"bool": [np.nan, True, np.nan], "bool2": [False, np.nan, True]}
754
+ )
755
+ tbl = Table(data)
756
+ # if np.nan begins a column, it is inferred as float and then can be promoted. if np.nan is in the values (but not at start), the column type is whatever is inferred.
757
+ assert tbl.schema() == {
758
+ "index": "integer",
759
+ "bool": "boolean",
760
+ "bool2": "boolean",
761
+ }
762
+ assert tbl.size() == 3
763
+ # np.nans are always serialized as None
764
+ assert tbl.view().to_columns() == {
765
+ "index": [0, 1, 2],
766
+ "bool": [None, True, None],
767
+ "bool2": [False, None, True],
768
+ }
769
+
770
+ def test_table_read_nan_date_col(self):
771
+ data = pd.DataFrame(
772
+ {"str": ["abc", "def"], "date": [float("nan"), date(2019, 7, 11)]}
773
+ )
774
+ tbl = Table(data)
775
+ assert tbl.schema() == {
776
+ "index": "integer",
777
+ "str": "string",
778
+ "date": "date",
779
+ } # can only promote to string or float
780
+ assert tbl.size() == 2
781
+ assert tbl.view().to_columns() == {
782
+ "index": [0, 1],
783
+ "str": ["abc", "def"],
784
+ "date": [None, 1562803200000],
785
+ }
786
+
787
+ def test_table_read_nan_datetime_col(self, util):
788
+ data = pd.DataFrame(
789
+ {
790
+ "str": ["abc", "def"],
791
+ "datetime": [float("nan"), datetime(2019, 7, 11, 11, 0)],
792
+ }
793
+ )
794
+ tbl = Table(data)
795
+ assert tbl.schema() == {
796
+ "index": "integer",
797
+ "str": "string",
798
+ "datetime": "datetime",
799
+ } # can only promote to string or float
800
+ assert tbl.size() == 2
801
+ assert tbl.view().to_columns() == {
802
+ "index": [0, 1],
803
+ "str": ["abc", "def"],
804
+ "datetime": [None, util.to_timestamp(datetime(2019, 7, 11, 11, 0))],
805
+ }
806
+
807
+ def test_table_read_nat_datetime_col(self, util):
808
+ data = pd.DataFrame(
809
+ {"str": ["abc", "def"], "datetime": ["NaT", datetime(2019, 7, 11, 11, 0)]}
810
+ )
811
+ # datetime col is `datetime` in pandas<2, `object` in pandas>=2, so convert
812
+ data.datetime = pd.to_datetime(data.datetime)
813
+ tbl = Table(data)
814
+ assert tbl.schema() == {
815
+ "index": "integer",
816
+ "str": "string",
817
+ "datetime": "datetime",
818
+ } # can only promote to string or float
819
+ assert tbl.size() == 2
820
+ assert tbl.view().to_columns() == {
821
+ "index": [0, 1],
822
+ "str": ["abc", "def"],
823
+ "datetime": [None, util.to_timestamp(datetime(2019, 7, 11, 11, 0))],
824
+ }
825
+
826
+ def test_table_read_nan_datetime_as_date_col(self, util):
827
+ data = pd.DataFrame(
828
+ {"str": ["abc", "def"], "datetime": [float("nan"), datetime(2019, 7, 11)]}
829
+ )
830
+ tbl = Table(data)
831
+ assert tbl.schema() == {
832
+ "index": "integer",
833
+ "str": "string",
834
+ "datetime": "datetime",
835
+ } # can only promote to string or float
836
+ assert tbl.size() == 2
837
+ assert tbl.view().to_columns() == {
838
+ "index": [0, 1],
839
+ "str": ["abc", "def"],
840
+ "datetime": [None, util.to_timestamp(datetime(2019, 7, 11))],
841
+ }
842
+
843
+ def test_table_read_nan_datetime_no_seconds(self, util):
844
+ data = pd.DataFrame(
845
+ {
846
+ "str": ["abc", "def"],
847
+ "datetime": [float("nan"), datetime(2019, 7, 11, 11, 0)],
848
+ }
849
+ )
850
+ tbl = Table(data)
851
+ assert tbl.schema() == {
852
+ "index": "integer",
853
+ "str": "string",
854
+ "datetime": "datetime",
855
+ } # can only promote to string or float
856
+ assert tbl.size() == 2
857
+ assert tbl.view().to_columns() == {
858
+ "index": [0, 1],
859
+ "str": ["abc", "def"],
860
+ "datetime": [None, util.to_timestamp(datetime(2019, 7, 11, 11, 0))],
861
+ }
862
+
863
+ def test_table_read_nan_datetime_milliseconds(self, util):
864
+ data = pd.DataFrame(
865
+ {
866
+ "str": ["abc", "def"],
867
+ "datetime": [np.nan, datetime(2019, 7, 11, 10, 30, 55)],
868
+ }
869
+ )
870
+ tbl = Table(data)
871
+ assert tbl.schema() == {
872
+ "index": "integer",
873
+ "str": "string",
874
+ "datetime": "datetime",
875
+ } # can only promote to string or float
876
+ assert tbl.size() == 2
877
+ assert tbl.view().to_columns() == {
878
+ "index": [0, 1],
879
+ "str": ["abc", "def"],
880
+ "datetime": [None, util.to_timestamp(datetime(2019, 7, 11, 10, 30, 55))],
881
+ }
882
+
883
+ @mark.skip(reason="lol wtf")
884
+ def test_table_pandas_correct_csv_nan_end(self):
885
+ s = "string,\nint\n,1\n,2\nabc,3"
886
+ csv = StringIO(s)
887
+ data = pd.read_csv(csv)
888
+ tbl = Table(data)
889
+ assert tbl.schema() == {"index": "integer", "str": "string", "int": "integer"}
890
+ assert tbl.size() == 3
891
+ assert tbl.view().to_columns() == {
892
+ "index": [0, 1, 2],
893
+ "str": [None, None, "abc"],
894
+ "int": [1, 2, 3],
895
+ }
896
+
897
+ @mark.skip(reason="lol wtf")
898
+ def test_table_pandas_correct_csv_nan_intermittent(self):
899
+ s = "string,\nfloat\nabc,\n,2\nghi,"
900
+ csv = StringIO(s)
901
+ data = pd.read_csv(csv)
902
+ tbl = Table(data)
903
+ assert tbl.schema() == {"index": "integer", "str": "string", "float": "float"}
904
+ assert tbl.size() == 3
905
+ assert tbl.view().to_columns() == {
906
+ "index": [0, 1, 2],
907
+ "str": ["abc", None, "ghi"],
908
+ "float": [None, 2, None],
909
+ }
910
+
911
+ @mark.skip(reason="pyarrow does not support series")
912
+ def test_table_series(self):
913
+ import pandas as pd
914
+
915
+ data = pd.Series([1, 2, 3], name="a")
916
+ tbl = Table(data)
917
+ assert tbl.size() == 3
918
+
919
+ @mark.skip(reason="pyarrow does not support series")
920
+ def test_table_indexed_series(self):
921
+ import pandas as pd
922
+
923
+ data = pd.Series([1, 2, 3], index=["a", "b", "c"], name="a")
924
+ tbl = Table(data)
925
+ assert tbl.schema() == {"index": "string", "a": "integer"}
926
+ assert tbl.size() == 3
927
+
928
+ def test_groupbys(self, superstore):
929
+ df_pivoted = superstore.set_index(["Country", "Region"])
930
+ table = Table(df_pivoted)
931
+ columns = table.columns()
932
+ assert table.size() == 100
933
+ assert "Country" in columns
934
+ assert "Region" in columns
935
+
936
+ def test_pivottable(self, superstore):
937
+ pt = pd.pivot_table(
938
+ superstore,
939
+ values="Discount",
940
+ index=["Country", "Region"],
941
+ columns="Category",
942
+ )
943
+ table = Table(pt)
944
+ columns = table.columns()
945
+ assert "Country" in columns
946
+ assert "Region" in columns
947
+
948
+ @mark.skip(reason="TODO move this to Python")
949
+ def test_splitbys(self):
950
+ arrays = [
951
+ np.array(
952
+ [
953
+ "bar",
954
+ "bar",
955
+ "bar",
956
+ "bar",
957
+ "baz",
958
+ "baz",
959
+ "baz",
960
+ "baz",
961
+ "foo",
962
+ "foo",
963
+ "foo",
964
+ "foo",
965
+ "qux",
966
+ "qux",
967
+ "qux",
968
+ "qux",
969
+ ]
970
+ ),
971
+ np.array(
972
+ [
973
+ "one",
974
+ "one",
975
+ "two",
976
+ "two",
977
+ "one",
978
+ "one",
979
+ "two",
980
+ "two",
981
+ "one",
982
+ "one",
983
+ "two",
984
+ "two",
985
+ "one",
986
+ "one",
987
+ "two",
988
+ "two",
989
+ ]
990
+ ),
991
+ np.array(
992
+ [
993
+ "X",
994
+ "Y",
995
+ "X",
996
+ "Y",
997
+ "X",
998
+ "Y",
999
+ "X",
1000
+ "Y",
1001
+ "X",
1002
+ "Y",
1003
+ "X",
1004
+ "Y",
1005
+ "X",
1006
+ "Y",
1007
+ "X",
1008
+ "Y",
1009
+ ]
1010
+ ),
1011
+ ]
1012
+ tuples = list(zip(*arrays))
1013
+ index = pd.MultiIndex.from_tuples(tuples, names=["first", "second", "third"])
1014
+ df_both = pd.DataFrame(
1015
+ np.random.randn(3, 16), index=["A", "B", "C"], columns=index
1016
+ )
1017
+ table = Table(df_both)
1018
+ assert table.size() == 48