perspective-python 4.2.0__cp311-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. perspective/__init__.py +396 -0
  2. perspective/extension/finos-perspective-nbextension.json +5 -0
  3. perspective/handlers/__init__.py +11 -0
  4. perspective/handlers/aiohttp.py +61 -0
  5. perspective/handlers/starlette.py +55 -0
  6. perspective/handlers/tornado.py +184 -0
  7. perspective/perspective.pyd +0 -0
  8. perspective/templates/exported_widget.html.template +35 -0
  9. perspective/tests/__init__.py +11 -0
  10. perspective/tests/async/test_async_client.py +83 -0
  11. perspective/tests/async/test_websocket_client.py +124 -0
  12. perspective/tests/conftest.py +272 -0
  13. perspective/tests/core/__init__.py +11 -0
  14. perspective/tests/core/test_async.py +351 -0
  15. perspective/tests/multi_threaded/__init__.py +11 -0
  16. perspective/tests/multi_threaded/test_multi_threaded.py +201 -0
  17. perspective/tests/server/__init__.py +11 -0
  18. perspective/tests/server/test_server.py +1016 -0
  19. perspective/tests/server/test_session.py +110 -0
  20. perspective/tests/table/__init__.py +11 -0
  21. perspective/tests/table/arrow/date32.arrow +0 -0
  22. perspective/tests/table/arrow/date64.arrow +0 -0
  23. perspective/tests/table/arrow/dict.arrow +0 -0
  24. perspective/tests/table/arrow/dict_update.arrow +0 -0
  25. perspective/tests/table/arrow/int_float_str.arrow +0 -0
  26. perspective/tests/table/arrow/int_float_str_file.arrow +0 -0
  27. perspective/tests/table/arrow/int_float_str_update.arrow +0 -0
  28. perspective/tests/table/object_sequence.py +402 -0
  29. perspective/tests/table/test_column_paths.py +89 -0
  30. perspective/tests/table/test_delete.py +124 -0
  31. perspective/tests/table/test_exception.py +65 -0
  32. perspective/tests/table/test_leaks.py +54 -0
  33. perspective/tests/table/test_ports.py +178 -0
  34. perspective/tests/table/test_remove.py +102 -0
  35. perspective/tests/table/test_table.py +641 -0
  36. perspective/tests/table/test_table_arrow.py +503 -0
  37. perspective/tests/table/test_table_datetime.py +2409 -0
  38. perspective/tests/table/test_table_infer.py +201 -0
  39. perspective/tests/table/test_table_limit.py +45 -0
  40. perspective/tests/table/test_table_numpy.py +1022 -0
  41. perspective/tests/table/test_table_pandas.py +1018 -0
  42. perspective/tests/table/test_table_polars.py +251 -0
  43. perspective/tests/table/test_table_view_table.py +130 -0
  44. perspective/tests/table/test_to_arrow.py +417 -0
  45. perspective/tests/table/test_to_arrow_lz4.py +32 -0
  46. perspective/tests/table/test_to_format.py +1024 -0
  47. perspective/tests/table/test_to_polars.py +26 -0
  48. perspective/tests/table/test_update.py +545 -0
  49. perspective/tests/table/test_update_arrow.py +980 -0
  50. perspective/tests/table/test_update_pandas.py +211 -0
  51. perspective/tests/table/test_view.py +2261 -0
  52. perspective/tests/table/test_view_expression.py +1940 -0
  53. perspective/tests/test_dependencies.py +53 -0
  54. perspective/tests/viewer/__init__.py +11 -0
  55. perspective/tests/viewer/test_viewer.py +246 -0
  56. perspective/tests/widget/__init__.py +11 -0
  57. perspective/tests/widget/test_widget.py +278 -0
  58. perspective/tests/widget/test_widget_pandas.py +453 -0
  59. perspective/virtual_servers/__init__.py +134 -0
  60. perspective/virtual_servers/clickhouse.py +245 -0
  61. perspective/virtual_servers/duckdb.py +236 -0
  62. perspective/widget/__init__.py +349 -0
  63. perspective/widget/viewer/__init__.py +15 -0
  64. perspective/widget/viewer/validate.py +22 -0
  65. perspective/widget/viewer/viewer.py +343 -0
  66. perspective/widget/viewer/viewer_traitlets.py +101 -0
  67. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/install.json +5 -0
  68. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/package.json +71 -0
  69. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/253.5f5c9e80605aa4106a28.js +2 -0
  70. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/253.5f5c9e80605aa4106a28.js.LICENSE.txt +25 -0
  71. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/523.c030af5d3c4f67ff83f6.js +1 -0
  72. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/remoteEntry.95a8ea1b44d96032833f.js +1 -0
  73. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/style.js +4 -0
  74. perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/third-party-licenses.json +16 -0
  75. perspective_python-4.2.0.dist-info/METADATA +27 -0
  76. perspective_python-4.2.0.dist-info/RECORD +79 -0
  77. perspective_python-4.2.0.dist-info/WHEEL +4 -0
  78. perspective_python-4.2.0.dist-info/licenses/LICENSE.md +193 -0
  79. perspective_python-4.2.0.dist-info/licenses/LICENSE_THIRDPARTY_cargo.yml +17395 -0
@@ -0,0 +1,1940 @@
1
+ # ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
2
+ # ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃
3
+ # ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃
4
+ # ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃
5
+ # ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃
6
+ # ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
7
+ # ┃ Copyright (c) 2017, the Perspective Authors. ┃
8
+ # ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃
9
+ # ┃ This file is part of the Perspective library, distributed under the terms ┃
10
+ # ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
11
+ # ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
12
+
13
+ import re
14
+ from random import random, randint, choices
15
+ from string import ascii_letters
16
+ from pytest import raises
17
+ from datetime import date, datetime
18
+ from time import mktime
19
+ from perspective import PerspectiveError
20
+ from .test_view import compare_delta
21
+ import perspective as psp
22
+
23
+ client = psp.Server().new_local_client()
24
+ Table = client.table
25
+
26
+
27
+ def randstr(length, input=ascii_letters):
28
+ return "".join(choices(input, k=length))
29
+
30
+
31
+ class TestViewExpression(object):
32
+ def test_table_validate_expressions_empty(self):
33
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
34
+ validate = table.validate_expressions([])
35
+ assert validate["expression_schema"] == {}
36
+ assert validate["expression_alias"] == {}
37
+ assert validate["errors"] == {}
38
+
39
+ def test_view_expression_schema_empty(self):
40
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
41
+ view = table.view()
42
+ assert view.to_columns() == {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
43
+ assert view.expression_schema() == {}
44
+
45
+ def test_view_validate_expressions_alias_map_errors(self):
46
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
47
+ expressions = {
48
+ "x": '"a"',
49
+ "y": '"b" * 0.5',
50
+ "c": "'abcdefg'",
51
+ "d": "true and false",
52
+ "e": 'float("a") > 2 ? null : 1',
53
+ "f": "today()",
54
+ "g": "now()",
55
+ "h": "length(123)",
56
+ }
57
+
58
+ validated = table.validate_expressions(expressions)
59
+ aliases = ["x", "y", "c", "d", "e", "f", "g", "h"]
60
+
61
+ # Errored should also be in aliases
62
+ for alias in aliases:
63
+ assert validated["expression_alias"][alias] == expressions[alias]
64
+
65
+ def test_view_validate_expressions_alias_map(self):
66
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
67
+ expressions = {
68
+ "x": '"a"',
69
+ "y": '"b" * 0.5',
70
+ "c": "'abcdefg'",
71
+ "d": "true and false",
72
+ "e": 'float("a") > 2 ? null : 1',
73
+ "f": "today()",
74
+ "g": "now()",
75
+ "h": "length('abcd')",
76
+ }
77
+
78
+ validated = table.validate_expressions(expressions)
79
+ aliases = ["x", "y", "c", "d", "e", "f", "g", "h"]
80
+ for alias in aliases:
81
+ assert validated["expression_alias"][alias] == expressions[alias]
82
+
83
+ def test_view_expression_schema_all_types(self):
84
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
85
+ expressions = [
86
+ '"a"',
87
+ '"b" * 0.5',
88
+ "'abcdefg'",
89
+ "true and false",
90
+ 'float("a") > 2 ? null : 1',
91
+ "today()",
92
+ "now()",
93
+ "length('abcd')",
94
+ ]
95
+
96
+ view = table.view(expressions=expressions)
97
+ assert view.expression_schema() == {
98
+ '"a"': "integer",
99
+ '"b" * 0.5': "float",
100
+ "'abcdefg'": "string",
101
+ "true and false": "boolean",
102
+ 'float("a") > 2 ? null : 1': "float",
103
+ "today()": "date",
104
+ "now()": "datetime",
105
+ "length('abcd')": "float",
106
+ }
107
+
108
+ result = view.to_columns()
109
+ today = datetime(date.today().year, date.today().month, date.today().day)
110
+ del result["now()"] # no need to match datetime.now()
111
+
112
+ assert result == {
113
+ "a": [1, 2, 3, 4],
114
+ "b": [5, 6, 7, 8],
115
+ '"a"': [1, 2, 3, 4],
116
+ '"b" * 0.5': [2.5, 3, 3.5, 4],
117
+ "'abcdefg'": ["abcdefg" for _ in range(4)],
118
+ "true and false": [False for _ in range(4)],
119
+ 'float("a") > 2 ? null : 1': [1, 1, None, None],
120
+ "today()": [int(today.timestamp()) * 1000 for _ in range(4)],
121
+ "length('abcd')": [4 for _ in range(4)],
122
+ }
123
+
124
+ validated = table.validate_expressions(expressions)
125
+ for expr in expressions:
126
+ assert validated["expression_alias"][expr] == expr
127
+
128
+ def test_table_validate_expressions_with_errors(self):
129
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
130
+ expressions = ['"Sales" + "a"', "datetime()", "string()", "for () {}"]
131
+ validate = table.validate_expressions(expressions)
132
+ assert validate["expression_schema"] == {}
133
+ assert validate["expression_alias"] == {expr: expr for expr in expressions}
134
+ assert validate["errors"] == {
135
+ '"Sales" + "a"': {
136
+ "column": 0,
137
+ "error_message": 'Value Error - Input column "Sales" does not exist.',
138
+ "line": 0,
139
+ },
140
+ "datetime()": {
141
+ "column": 10,
142
+ "error_message": "Zero parameter call to generic function: datetime not allowed",
143
+ "line": 0,
144
+ },
145
+ "for () {}": {
146
+ "column": 5,
147
+ "error_message": "Premature end of expression[2]",
148
+ "line": 0,
149
+ },
150
+ "string()": {
151
+ "column": 8,
152
+ "error_message": "Zero parameter call to generic function: string not allowed",
153
+ "line": 0,
154
+ },
155
+ }
156
+
157
+ def test_view_expression_create(self):
158
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
159
+ view = table.view(expressions={"computed": ' "a" + "b"'})
160
+ assert view.to_columns() == {
161
+ "a": [1, 2, 3, 4],
162
+ "b": [5, 6, 7, 8],
163
+ "computed": [6, 8, 10, 12],
164
+ }
165
+ assert view.expression_schema() == {"computed": "float"}
166
+
167
+ def test_view_expression_string_per_page(self):
168
+ table = Table({"a": [i for i in range(100)]})
169
+ big_strings = [randstr(6400) for _ in range(4)]
170
+ view = table.view(
171
+ expressions={
172
+ "computed{}".format(i): "var x := '{}'; lower(x)".format(big_strings[i])
173
+ for i in range(4)
174
+ }
175
+ )
176
+
177
+ result = view.to_columns()
178
+ schema = view.expression_schema()
179
+
180
+ for i in range(4):
181
+ name = "computed{}".format(i)
182
+ res = big_strings[i].lower()
183
+ assert schema[name] == "string"
184
+ assert result[name] == [res for _ in range(100)]
185
+
186
+ def test_view_expression_string_page_stress(self):
187
+ table = Table({"a": [i for i in range(100)]})
188
+ big_strings = [
189
+ "".join(["a" for _ in range(640)]),
190
+ "".join(["b" for _ in range(640)]),
191
+ "".join(["c" for _ in range(640)]),
192
+ "".join(["d" for _ in range(640)]),
193
+ ]
194
+
195
+ view = table.view(
196
+ expressions={
197
+ "computed": "var a := '{}'; var b := '{}'; var c := '{}'; var d := '{}'; concat(a, b, c, d)".format(
198
+ *big_strings
199
+ )
200
+ }
201
+ )
202
+
203
+ result = view.to_columns()
204
+ schema = view.expression_schema()
205
+ assert schema == {"computed": "string"}
206
+ assert result["computed"] == ["".join(big_strings) for _ in range(100)]
207
+
208
+ def test_view_expression_new_vocab_page(self):
209
+ table = Table({"a": [randstr(100) for _ in range(100)]})
210
+
211
+ def make_expression(idx):
212
+ expr = ["//computed{}".format(idx)]
213
+ num_vars = randint(1, 26)
214
+ concat_cols = []
215
+ concat_result = []
216
+
217
+ for i in range(num_vars):
218
+ name = ascii_letters[i]
219
+ string_literal = randstr(randint(100, 1000))
220
+
221
+ if random() > 0.5:
222
+ result = string_literal.upper()
223
+ string_literal = "upper('{}')".format(string_literal)
224
+ else:
225
+ result = string_literal.lower()
226
+ string_literal = "lower('{}')".format(string_literal)
227
+
228
+ concat_cols.append(name)
229
+ concat_result.append(result)
230
+
231
+ expr.append("var {} := {};".format(name, string_literal))
232
+
233
+ expr.append('concat("a", {})'.format(", ".join(concat_cols)))
234
+
235
+ return {
236
+ "expression_name": expr[0][2:],
237
+ "expression": "\n".join(expr),
238
+ "output": "".join(concat_result),
239
+ }
240
+
241
+ expressions = [make_expression(i) for i in range(10)]
242
+
243
+ view = table.view(expressions=[expr["expression"] for expr in expressions])
244
+
245
+ result = view.to_columns()
246
+ schema = view.expression_schema()
247
+
248
+ for expr in expressions:
249
+ name = expr["expression_name"]
250
+ assert schema[name] == "string"
251
+
252
+ for i in range(100):
253
+ val = result["a"][i]
254
+ assert result[name][i] == val + expr["output"]
255
+
256
+ def test_view_expression_collide_local_var(self):
257
+ """Make sure that strings declared under the same var name in
258
+ different expressions do not collide."""
259
+ table = Table({"a": [1, 2, 3, 4]})
260
+ strings = [randstr(50) for _ in range(8)]
261
+
262
+ view = table.view(
263
+ expressions={
264
+ "computed": " var w := '{}'; var x := '{}'; var y := '{}'; var z := '{}'; concat(w, x, y, z)".format(
265
+ *strings[:4]
266
+ ),
267
+ "computed2": " var w := '{}'; var x := '{}'; var y := '{}'; var z := '{}'; concat(w, x, y, z)".format(
268
+ *strings[4:]
269
+ ),
270
+ }
271
+ )
272
+
273
+ result = view.to_columns()
274
+ schema = view.expression_schema()
275
+ assert schema == {"computed": "string", "computed2": "string"}
276
+ assert result["computed"] == ["".join(strings[:4]) for _ in range(4)]
277
+ assert result["computed2"] == ["".join(strings[4:]) for _ in range(4)]
278
+
279
+ def test_view_random_expressions(self):
280
+ def make_expression():
281
+ """Create a random expression with a few local string vars that
282
+ are too long to be stored in-place."""
283
+ expression_name = randstr(10)
284
+ expression = ""
285
+ num_vars = randint(1, 26)
286
+ output_var_name = ""
287
+ output_str = ""
288
+ for i in range(num_vars):
289
+ name = ascii_letters[i]
290
+ string_literal = randstr(randint(15, 100))
291
+ expression += "var {} := '{}';\n".format(name, string_literal)
292
+ if i == num_vars - 1:
293
+ output_var_name = name
294
+ output_str = string_literal
295
+
296
+ expression += output_var_name
297
+ return {
298
+ "expression_name": expression_name,
299
+ "expression": expression,
300
+ "output": output_str,
301
+ }
302
+
303
+ table = Table({"a": [1, 2, 3, 4]})
304
+
305
+ for _ in range(5):
306
+ exprs = [make_expression() for _ in range(5)]
307
+ output_map = {expr["expression_name"]: expr["output"] for expr in exprs}
308
+ view = table.view(
309
+ expressions={
310
+ expr["expression_name"]: expr["expression"] for expr in exprs
311
+ }
312
+ )
313
+ expression_schema = view.expression_schema()
314
+ result = view.to_columns()
315
+ for expr in output_map.keys():
316
+ assert expression_schema[expr] == "string"
317
+ assert result[expr] == [output_map[expr] for _ in range(4)]
318
+
319
+ def test_view_expression_string_literal_compare(self):
320
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
321
+ validated = table.validate_expressions({"computed": " 'a' == 'a'"})
322
+
323
+ assert validated["expression_schema"] == {"computed": "boolean"}
324
+
325
+ view = table.view(expressions={"computed": " 'a' == 'a'"})
326
+
327
+ assert view.to_columns() == {
328
+ "a": [1, 2, 3, 4],
329
+ "b": [5, 6, 7, 8],
330
+ "computed": [True, True, True, True],
331
+ }
332
+
333
+ assert view.expression_schema() == {"computed": "boolean"}
334
+
335
+ def test_view_expression_string_literal_compare_null(self):
336
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
337
+ validated = table.validate_expressions({"computed": " 'a' == null"})
338
+
339
+ assert validated["expression_schema"] == {"computed": "float"}
340
+
341
+ view = table.view(expressions={"computed": " 'a' == null"})
342
+
343
+ assert view.to_columns() == {
344
+ "a": [1, 2, 3, 4],
345
+ "b": [5, 6, 7, 8],
346
+ "computed": [0, 0, 0, 0],
347
+ }
348
+
349
+ assert view.expression_schema() == {"computed": "float"}
350
+
351
+ def test_view_expression_string_literal_compare_column(self):
352
+ table = Table({"a": ["a", "a", "b", "c"]})
353
+ validated = table.validate_expressions({"computed": " \"a\" == 'a'"})
354
+ assert validated["expression_schema"] == {"computed": "boolean"}
355
+ view = table.view(expressions={"computed": " \"a\" == 'a'"})
356
+ assert view.to_columns() == {
357
+ "a": ["a", "a", "b", "c"],
358
+ "computed": [True, True, False, False],
359
+ }
360
+
361
+ assert view.expression_schema() == {"computed": "boolean"}
362
+
363
+ def test_view_expression_string_literal_compare_column_null(self):
364
+ table = Table({"a": ["a", None, "b", "c", None]})
365
+ validated = table.validate_expressions({"computed": " \"a\" == 'a'"})
366
+ assert validated["expression_schema"] == {"computed": "boolean"}
367
+ view = table.view(expressions={"computed": " \"a\" == 'a'"})
368
+ assert view.to_columns() == {
369
+ "a": ["a", None, "b", "c", None],
370
+ "computed": [True, False, False, False, False],
371
+ }
372
+
373
+ assert view.expression_schema() == {"computed": "boolean"}
374
+
375
+ def test_view_expression_string_literal_compare_column_null_long(self):
376
+ table = Table(
377
+ {
378
+ "a": [
379
+ "abcdefghijklmnopqrstuvwxyz",
380
+ None,
381
+ "abcdefghijklmnopqrstuvwxyz",
382
+ "aabcdefghijklmnopqrstuvwxyz",
383
+ None,
384
+ ]
385
+ }
386
+ )
387
+ validated = table.validate_expressions(
388
+ {"computed": " \"a\" == 'abcdefghijklmnopqrstuvwxyz'"}
389
+ )
390
+ assert validated["expression_schema"] == {"computed": "boolean"}
391
+ view = table.view(
392
+ expressions={"computed": "\"a\" == 'abcdefghijklmnopqrstuvwxyz'"}
393
+ )
394
+ result = view.to_columns()
395
+ assert result["computed"] == [True, False, True, False, False]
396
+ assert view.expression_schema() == {"computed": "boolean"}
397
+
398
+ def test_view_expression_string_literal_compare_column_null_long_var(self):
399
+ table = Table(
400
+ {
401
+ "a": [
402
+ "abcdefghijklmnopqrstuvwxyz",
403
+ None,
404
+ "abcdefghijklmnopqrstuvwxyz",
405
+ "aabcdefghijklmnopqrstuvwxyz",
406
+ None,
407
+ ]
408
+ }
409
+ )
410
+ validated = table.validate_expressions(
411
+ {"computed": " var xyz := 'abcdefghijklmnopqrstuvwxyz'; \"a\" == xyz"}
412
+ )
413
+ assert validated["expression_schema"] == {"computed": "boolean"}
414
+ view = table.view(
415
+ expressions={
416
+ "computed": "var xyz := 'abcdefghijklmnopqrstuvwxyz'; \"a\" == xyz"
417
+ }
418
+ )
419
+ result = view.to_columns()
420
+ assert result["computed"] == [True, False, True, False, False]
421
+ assert view.expression_schema() == {"computed": "boolean"}
422
+
423
+ def test_view_expression_string_literal_compare_if(self):
424
+ table = Table({"a": ["a", "a", "b", "c"]})
425
+ validated = table.validate_expressions({"computed": " if(\"a\" == 'a', 1, 2)"})
426
+ assert validated["expression_schema"] == {"computed": "float"}
427
+ view = table.view(expressions={"computed": "if(\"a\" == 'a', 1, 2)"})
428
+ assert view.to_columns() == {
429
+ "a": ["a", "a", "b", "c"],
430
+ "computed": [1, 1, 2, 2],
431
+ }
432
+
433
+ assert view.expression_schema() == {"computed": "float"}
434
+
435
+ def test_view_expression_string_literal_var(self):
436
+ table = Table({"a": [1, 2, 3]})
437
+
438
+ for _ in range(10):
439
+ view = table.view(
440
+ expressions=[
441
+ "var x := 'Eabcdefghijklmn'; var y := '0123456789'; concat(x, y)"
442
+ ]
443
+ )
444
+ assert view.to_columns() == {
445
+ "a": [1, 2, 3],
446
+ "var x := 'Eabcdefghijklmn'; var y := '0123456789'; concat(x, y)": [
447
+ "Eabcdefghijklmn0123456789",
448
+ "Eabcdefghijklmn0123456789",
449
+ "Eabcdefghijklmn0123456789",
450
+ ],
451
+ }
452
+
453
+ def test_view_streaming_expression(self):
454
+ def data():
455
+ return [{"a": random()} for _ in range(50)]
456
+
457
+ table = Table(data())
458
+ view = table.view(expressions=["123"])
459
+
460
+ for _ in range(5):
461
+ table.update(data())
462
+
463
+ assert table.size() == 300
464
+ result = view.to_columns()
465
+ assert result["123"] == [123 for _ in range(300)]
466
+
467
+ def test_view_streaming_expression_limit(self):
468
+ def data():
469
+ return [{"a": random()} for _ in range(55)]
470
+
471
+ table = Table(data(), limit=50)
472
+ view = table.view(expressions=["123"])
473
+
474
+ for _ in range(5):
475
+ table.update(data())
476
+
477
+ assert table.size() == 50
478
+ result = view.to_columns()
479
+ assert result["123"] == [123 for _ in range(50)]
480
+
481
+ def test_view_streaming_expression_one(self):
482
+ def data():
483
+ return [{"a": random()} for _ in range(50)]
484
+
485
+ table = Table(data())
486
+ view = table.view(group_by=["c0"], expressions={"c0": '"a" * 2'})
487
+ for _ in range(5):
488
+ table.update(data())
489
+
490
+ assert table.size() == 300
491
+ assert view.expression_schema() == {"c0": "float"}
492
+
493
+ def test_view_streaming_expression_two(self):
494
+ def data():
495
+ return [{"a": random()} for _ in range(50)]
496
+
497
+ table = Table(data())
498
+ view = table.view(
499
+ group_by=["c0"],
500
+ split_by=["c1"],
501
+ expressions={"c0": '"a" * 2', "c1": "'new string'"},
502
+ )
503
+ for i in range(5):
504
+ table.update(data())
505
+
506
+ assert table.size() == 300
507
+ assert view.expression_schema() == {"c0": "float", "c1": "integer"} # pivoted
508
+
509
+ def test_view_expression_create_no_alias(self):
510
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
511
+ view = table.view(expressions=['"a" + "b"'])
512
+ assert view.to_columns() == {
513
+ "a": [1, 2, 3, 4],
514
+ "b": [5, 6, 7, 8],
515
+ '"a" + "b"': [6, 8, 10, 12],
516
+ }
517
+
518
+ assert view.expression_schema() == {'"a" + "b"': "float"}
519
+
520
+ def test_view_expression_should_not_overwrite_real(self):
521
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
522
+ with raises(PerspectiveError) as ex:
523
+ table.view(expressions={"a": 'upper("a")'})
524
+
525
+ assert (
526
+ str(ex.value)
527
+ == 'Abort(): Value Error - expression "a" cannot overwrite an existing column.'
528
+ )
529
+
530
+ def test_legacy_view_duplicate_expression_should_resolve_to_last_alias(self):
531
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
532
+ view = table.view(
533
+ columns=["abc"],
534
+ expressions=['//abc\n"a" + "b"', '//abc\n"a" - "b"'],
535
+ )
536
+
537
+ assert view.to_columns() == {"abc": [-4, -4, -4, -4]}
538
+
539
+ def test_view_expression_multiple_alias(
540
+ self,
541
+ ):
542
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
543
+ view = table.view(
544
+ expressions={
545
+ "computed": '"a" + "b"',
546
+ "computed2": '"a" + "b"',
547
+ "computed3": '"a" + "b"',
548
+ "computed4": '"a" + "b"',
549
+ }
550
+ )
551
+
552
+ assert view.schema() == {
553
+ "a": "integer",
554
+ "b": "integer",
555
+ "computed": "float",
556
+ "computed2": "float",
557
+ "computed3": "float",
558
+ "computed4": "float",
559
+ }
560
+
561
+ assert view.expression_schema() == {
562
+ "computed": "float",
563
+ "computed2": "float",
564
+ "computed3": "float",
565
+ "computed4": "float",
566
+ }
567
+
568
+ def test_view_expression_multiple_views_with_the_same_alias_should_not_overwrite(
569
+ self,
570
+ ):
571
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
572
+ view = table.view(expressions={"computed": ' "a" + "b"'})
573
+ view2 = table.view(expressions={"computed": ' "a" * "b"'})
574
+ assert view.expression_schema() == {"computed": "float"}
575
+ assert view2.expression_schema() == {
576
+ "computed": "float",
577
+ }
578
+
579
+ assert view.to_columns()["computed"] == [6, 8, 10, 12]
580
+ assert view2.to_columns()["computed"] == [5, 12, 21, 32]
581
+
582
+ def test_view_expression_multiple_views_with_the_same_alias_pivoted(
583
+ self,
584
+ ):
585
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
586
+ view = table.view(
587
+ group_by=["computed"],
588
+ aggregates={"computed": ("weighted mean", ["b"])},
589
+ expressions={"computed": ' "a" + "b"'},
590
+ )
591
+ view2 = table.view(
592
+ group_by=["computed"],
593
+ aggregates={"computed": "last"},
594
+ expressions={"computed": "concat('abc', ' ', 'def')"},
595
+ )
596
+ assert view.expression_schema() == {"computed": "float"}
597
+ assert view2.expression_schema() == {
598
+ "computed": "string",
599
+ }
600
+
601
+ result = view.to_columns()
602
+ result2 = view2.to_columns()
603
+
604
+ assert result["__ROW_PATH__"] == [[], [6], [8], [10], [12]]
605
+ assert result2["__ROW_PATH__"] == [[], ["abc def"]]
606
+
607
+ assert result["computed"] == [9.384615384615385, 6, 8, 10, 12]
608
+ assert result2["computed"] == ["abc def", "abc def"]
609
+
610
+ def test_view_expression_multiple_views_with_the_same_alias_all_types(
611
+ self,
612
+ ):
613
+ now = datetime.now()
614
+ today = date.today()
615
+
616
+ month_bucketed = datetime(today.year, today.month, 1).timestamp() * 1000
617
+ minute_bucketed = (
618
+ datetime(
619
+ now.year, now.month, now.day, now.hour, now.minute, 0, 0
620
+ ).timestamp()
621
+ * 1000
622
+ )
623
+
624
+ table = Table(
625
+ {
626
+ "a": "integer",
627
+ "b": "float",
628
+ "c": "datetime",
629
+ "d": "date",
630
+ "e": "boolean",
631
+ "f": "string",
632
+ }
633
+ )
634
+
635
+ table.update(
636
+ {
637
+ "a": [1, 2, 3, 4],
638
+ "b": [5.5, 6.5, 7.5, 8.5],
639
+ "c": [str(datetime.now()) for _ in range(4)],
640
+ "d": [str(date.today()) for _ in range(4)],
641
+ "e": [True, False, True, False],
642
+ "f": ["a", "b", "c", "d"],
643
+ }
644
+ )
645
+
646
+ view = table.view(
647
+ expressions={
648
+ "computed": '"a" + "b"',
649
+ "computed2": "bucket(\"c\", 'M')",
650
+ "computed3": "concat('a', 'b', 'c')",
651
+ "computed4": "'new string'",
652
+ }
653
+ )
654
+
655
+ view2 = table.view(
656
+ expressions={
657
+ "computed": 'upper("f")',
658
+ "computed2": '20 + ("b" * "a")',
659
+ "computed4": "bucket(\"c\", 'm')",
660
+ }
661
+ )
662
+
663
+ assert view.expression_schema() == {
664
+ "computed": "float",
665
+ "computed2": "date",
666
+ "computed3": "string",
667
+ "computed4": "string",
668
+ }
669
+
670
+ assert view2.expression_schema() == {
671
+ "computed": "string",
672
+ "computed2": "float",
673
+ "computed4": "datetime",
674
+ }
675
+
676
+ result = view.to_columns()
677
+ result2 = view2.to_columns()
678
+
679
+ assert result["computed"] == [6.5, 8.5, 10.5, 12.5]
680
+ assert result2["computed"] == ["A", "B", "C", "D"]
681
+
682
+ assert result["computed2"] == [month_bucketed for _ in range(4)]
683
+ assert result2["computed2"] == [25.5, 33, 42.5, 54]
684
+
685
+ assert result["computed3"] == ["abc", "abc", "abc", "abc"]
686
+ assert "computed3" not in result2
687
+
688
+ assert result["computed4"] == ["new string" for _ in range(4)]
689
+ assert result2["computed4"] == [minute_bucketed for _ in range(4)]
690
+
691
+ def test_view_expression_create_no_columns(self):
692
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
693
+ view = table.view(columns=[], expressions={"computed": ' "a" + "b"'})
694
+ assert view.to_columns() == {}
695
+ assert view.schema() == {}
696
+
697
+ # computed column should still exist
698
+ assert view.expression_schema() == {"computed": "float"}
699
+
700
+ def test_view_expression_create_columns(self):
701
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
702
+ view = table.view(columns=["computed"], expressions={"computed": ' "a" + "b"'})
703
+ assert view.to_columns() == {"computed": [6, 8, 10, 12]}
704
+ assert view.schema() == {"computed": "float"}
705
+ # computed column should still exist
706
+ assert view.expression_schema() == {"computed": "float"}
707
+
708
+ def test_view_expression_create_clear(self):
709
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
710
+ view = table.view(expressions={"computed": ' "a" + "b"'})
711
+ assert view.to_columns() == {
712
+ "a": [1, 2, 3, 4],
713
+ "b": [5, 6, 7, 8],
714
+ "computed": [6, 8, 10, 12],
715
+ }
716
+ table.clear()
717
+ assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
718
+ assert view.to_columns() == {"a": [], "b": [], "computed": []}
719
+
720
+ def test_view_expression_create_replace(self):
721
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
722
+ view = table.view(expressions={"computed": ' "a" + "b"'})
723
+ assert view.to_columns() == {
724
+ "a": [1, 2, 3, 4],
725
+ "b": [5, 6, 7, 8],
726
+ "computed": [6, 8, 10, 12],
727
+ }
728
+ table.replace({"a": [10, 20, 30, 40], "b": [50, 60, 70, 80]})
729
+ assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
730
+ assert view.to_columns() == {
731
+ "a": [10, 20, 30, 40],
732
+ "b": [50, 60, 70, 80],
733
+ "computed": [60, 80, 100, 120],
734
+ }
735
+
736
+ def test_view_expression_multiple_dependents_replace(self):
737
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
738
+ view = table.view(
739
+ expressions={"computed": '"a" + "b"', "final": '("a" + "b") ^ 2'}
740
+ )
741
+ assert view.to_columns() == {
742
+ "a": [1, 2, 3, 4],
743
+ "b": [5, 6, 7, 8],
744
+ "computed": [6, 8, 10, 12],
745
+ "final": [36, 64, 100, 144],
746
+ }
747
+ table.replace({"a": [10, 20, 30, 40], "b": [50, 60, 70, 80]})
748
+ assert view.schema() == {
749
+ "a": "integer",
750
+ "b": "integer",
751
+ "computed": "float",
752
+ "final": "float",
753
+ }
754
+ assert view.to_columns() == {
755
+ "a": [10, 20, 30, 40],
756
+ "b": [50, 60, 70, 80],
757
+ "computed": [60, 80, 100, 120],
758
+ "final": [3600, 6400, 10000, 14400],
759
+ }
760
+
761
+ def test_view_expression_multiple_views_should_not_conflate(self):
762
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
763
+ view = table.view(
764
+ expressions={
765
+ "computed": '"a" + "b"',
766
+ }
767
+ )
768
+
769
+ view2 = table.view(expressions={"computed2": ' "a" - "b"'})
770
+ assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
771
+ assert view2.schema() == {"a": "integer", "b": "integer", "computed2": "float"}
772
+ assert view.to_columns() == {
773
+ "a": [1, 2, 3, 4],
774
+ "b": [5, 6, 7, 8],
775
+ "computed": [6, 8, 10, 12],
776
+ }
777
+
778
+ assert view2.to_columns() == {
779
+ "a": [1, 2, 3, 4],
780
+ "b": [5, 6, 7, 8],
781
+ "computed2": [-4, -4, -4, -4],
782
+ }
783
+
784
+ def test_view_expression_multiple_views_should_all_clear(self):
785
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
786
+
787
+ view = table.view(
788
+ expressions={
789
+ "computed": '"a" + "b"',
790
+ }
791
+ )
792
+
793
+ view2 = table.view(expressions={"computed2": ' "a" - "b"'})
794
+ assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
795
+ assert view2.schema() == {"a": "integer", "b": "integer", "computed2": "float"}
796
+ assert view.to_columns() == {
797
+ "a": [1, 2, 3, 4],
798
+ "b": [5, 6, 7, 8],
799
+ "computed": [6, 8, 10, 12],
800
+ }
801
+
802
+ assert view2.to_columns() == {
803
+ "a": [1, 2, 3, 4],
804
+ "b": [5, 6, 7, 8],
805
+ "computed2": [-4, -4, -4, -4],
806
+ }
807
+
808
+ table.clear()
809
+
810
+ assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
811
+
812
+ assert view2.schema() == {"a": "integer", "b": "integer", "computed2": "float"}
813
+
814
+ assert view.to_columns() == {"a": [], "b": [], "computed": []}
815
+
816
+ assert view2.to_columns() == {"a": [], "b": [], "computed2": []}
817
+
818
+ def test_view_expression_multiple_views_should_all_replace(self):
819
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
820
+ view = table.view(
821
+ expressions={
822
+ "computed": '"a" + "b"',
823
+ }
824
+ )
825
+
826
+ view2 = table.view(expressions={"computed2": ' "a" - "b"'})
827
+ assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
828
+ assert view2.schema() == {"a": "integer", "b": "integer", "computed2": "float"}
829
+ assert view.to_columns() == {
830
+ "a": [1, 2, 3, 4],
831
+ "b": [5, 6, 7, 8],
832
+ "computed": [6, 8, 10, 12],
833
+ }
834
+
835
+ assert view2.to_columns() == {
836
+ "a": [1, 2, 3, 4],
837
+ "b": [5, 6, 7, 8],
838
+ "computed2": [-4, -4, -4, -4],
839
+ }
840
+
841
+ table.replace({"a": [10, 20, 30, 40], "b": [50, 60, 70, 80]})
842
+ assert view.to_columns() == {
843
+ "a": [10, 20, 30, 40],
844
+ "b": [50, 60, 70, 80],
845
+ "computed": [60, 80, 100, 120],
846
+ }
847
+
848
+ assert view2.to_columns() == {
849
+ "a": [10, 20, 30, 40],
850
+ "b": [50, 60, 70, 80],
851
+ "computed2": [-40, -40, -40, -40],
852
+ }
853
+
854
+ def test_view_expression_delete_and_create(self):
855
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
856
+ view = table.view(
857
+ expressions={
858
+ "computed": '"a" + "b"',
859
+ }
860
+ )
861
+
862
+ assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
863
+ assert view.to_columns() == {
864
+ "a": [1, 2, 3, 4],
865
+ "b": [5, 6, 7, 8],
866
+ "computed": [6, 8, 10, 12],
867
+ }
868
+
869
+ view.delete()
870
+ view2 = table.view(expressions={"computed": ' "a" - "b"'})
871
+ assert view2.schema() == {"a": "integer", "b": "integer", "computed": "float"}
872
+ assert view2.to_columns() == {
873
+ "a": [1, 2, 3, 4],
874
+ "b": [5, 6, 7, 8],
875
+ "computed": [-4, -4, -4, -4],
876
+ }
877
+
878
+ def test_view_expression_delete_and_create_with_updates(self):
879
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
880
+ view = table.view(
881
+ expressions={
882
+ "computed": ' "a" + "b"',
883
+ "upper(concat('abc', 'def'))": "upper(concat('abc', 'def'))",
884
+ }
885
+ )
886
+ assert view.schema() == {
887
+ "a": "integer",
888
+ "b": "integer",
889
+ "computed": "float",
890
+ "upper(concat('abc', 'def'))": "string",
891
+ }
892
+ table.update({"a": [5, 6], "b": [9, 10]})
893
+ assert view.to_columns() == {
894
+ "a": [1, 2, 3, 4, 5, 6],
895
+ "b": [5, 6, 7, 8, 9, 10],
896
+ "computed": [6, 8, 10, 12, 14, 16],
897
+ "upper(concat('abc', 'def'))": ["ABCDEF" for _ in range(6)],
898
+ }
899
+ view.delete()
900
+ view2 = table.view(
901
+ expressions={
902
+ "computed2": '"a" - "b"',
903
+ }
904
+ )
905
+
906
+ assert view2.schema() == {"a": "integer", "b": "integer", "computed2": "float"}
907
+ table.update({"a": [5, 6], "b": [9, 10]})
908
+ table.update({"a": [5, 6], "b": [9, 10]})
909
+ assert view2.to_columns() == {
910
+ "a": [1, 2, 3, 4, 5, 6, 5, 6, 5, 6],
911
+ "b": [5, 6, 7, 8, 9, 10, 9, 10, 9, 10],
912
+ "computed2": [-4, -4, -4, -4, -4, -4, -4, -4, -4, -4],
913
+ }
914
+
915
+ def test_view_expression_append(self):
916
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
917
+
918
+ view = table.view(
919
+ expressions={
920
+ "computed": '"a" + "b"',
921
+ }
922
+ )
923
+
924
+ assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
925
+ assert view.to_columns() == {
926
+ "a": [1, 2, 3, 4],
927
+ "b": [5, 6, 7, 8],
928
+ "computed": [6, 8, 10, 12],
929
+ }
930
+
931
+ table.update({"a": [5, 6], "b": [9, 10]})
932
+
933
+ assert view.to_columns() == {
934
+ "a": [1, 2, 3, 4, 5, 6],
935
+ "b": [5, 6, 7, 8, 9, 10],
936
+ "computed": [6, 8, 10, 12, 14, 16],
937
+ }
938
+
939
+ def test_view_expression_delta_zero(self, util):
940
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
941
+
942
+ view = table.view(
943
+ expressions={
944
+ "computed": '"a" + "b"',
945
+ }
946
+ )
947
+
948
+ assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
949
+
950
+ assert view.to_columns() == {
951
+ "a": [1, 2, 3, 4],
952
+ "b": [5, 6, 7, 8],
953
+ "computed": [6, 8, 10, 12],
954
+ }
955
+
956
+ def updater(port, delta):
957
+ compare_delta(delta, {"a": [5, 6], "b": [9, 10]})
958
+
959
+ table.update({"a": [5, 6], "b": [9, 10]})
960
+
961
+ assert view.to_columns() == {
962
+ "a": [1, 2, 3, 4, 5, 6],
963
+ "b": [5, 6, 7, 8, 9, 10],
964
+ "computed": [6, 8, 10, 12, 14, 16],
965
+ }
966
+
967
+ def test_view_delete_with_scope(self):
968
+ """Tests that `View`'s `__del__` method, when called by the Python
969
+ reference counter, leaves an empty `Table` in a clean state.
970
+ """
971
+ table = Table(
972
+ {"id": "integer", "msg": "string", "val": "float"},
973
+ index="id",
974
+ )
975
+
976
+ table.view(
977
+ expressions={
978
+ "inverted": '1 / "val"',
979
+ },
980
+ columns=["inverted"],
981
+ )
982
+ table.update(
983
+ [
984
+ {
985
+ "id": 1,
986
+ "msg": "test",
987
+ "val": 1.0,
988
+ }
989
+ ]
990
+ )
991
+
992
+ def test_view_expression_with_custom_columns(self):
993
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
994
+ view = table.view(
995
+ columns=["computed", "b"],
996
+ expressions={
997
+ "computed": '"a" + "b"',
998
+ },
999
+ )
1000
+ assert view.to_columns() == {
1001
+ "b": [5, 6, 7, 8],
1002
+ "computed": [6, 8, 10, 12],
1003
+ }
1004
+
1005
+ def test_view_expression_with_group_by(self):
1006
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
1007
+ view = table.view(
1008
+ group_by=["computed"],
1009
+ expressions={
1010
+ "computed": '"a" + "b"',
1011
+ },
1012
+ )
1013
+ assert view.to_columns() == {
1014
+ "__ROW_PATH__": [[], [6], [8], [10], [12]],
1015
+ "a": [10, 1, 2, 3, 4],
1016
+ "b": [26, 5, 6, 7, 8],
1017
+ "computed": [36.0, 6.0, 8.0, 10.0, 12.0],
1018
+ }
1019
+
1020
+ def test_view_expression_with_group_by_clear(self):
1021
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
1022
+
1023
+ view = table.view(
1024
+ group_by=["computed"],
1025
+ expressions={
1026
+ "computed": '"a" + "b"',
1027
+ },
1028
+ )
1029
+
1030
+ assert view.to_columns() == {
1031
+ "__ROW_PATH__": [[], [6], [8], [10], [12]],
1032
+ "a": [10, 1, 2, 3, 4],
1033
+ "b": [26, 5, 6, 7, 8],
1034
+ "computed": [36.0, 6.0, 8.0, 10.0, 12.0],
1035
+ }
1036
+
1037
+ table.clear()
1038
+
1039
+ assert view.to_columns() == {
1040
+ "__ROW_PATH__": [[]],
1041
+ "a": [None],
1042
+ "b": [None],
1043
+ "computed": [None],
1044
+ }
1045
+
1046
+ def test_view_expression_with_group_by_replace(self):
1047
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
1048
+
1049
+ view = table.view(
1050
+ group_by=["computed"],
1051
+ expressions={
1052
+ "computed": '"a" + "b"',
1053
+ },
1054
+ )
1055
+
1056
+ assert view.to_columns() == {
1057
+ "__ROW_PATH__": [[], [6], [8], [10], [12]],
1058
+ "a": [10, 1, 2, 3, 4],
1059
+ "b": [26, 5, 6, 7, 8],
1060
+ "computed": [36.0, 6.0, 8.0, 10.0, 12.0],
1061
+ }
1062
+
1063
+ table.replace({"a": [10, 20, 30, 40], "b": [50, 60, 70, 80]})
1064
+
1065
+ assert view.to_columns() == {
1066
+ "__ROW_PATH__": [[], [60], [80], [100], [120]],
1067
+ "a": [100, 10, 20, 30, 40],
1068
+ "b": [260, 50, 60, 70, 80],
1069
+ "computed": [360.0, 60.0, 80.0, 100.0, 120.0],
1070
+ }
1071
+
1072
+ def test_view_expression_with_split_by(self):
1073
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
1074
+ view = table.view(
1075
+ split_by=["computed"],
1076
+ expressions={
1077
+ "computed": '"a" + "b"',
1078
+ },
1079
+ )
1080
+ assert view.to_columns() == {
1081
+ "6|a": [1, None, None, None],
1082
+ "6|b": [5, None, None, None],
1083
+ "6|computed": [6, None, None, None],
1084
+ "8|a": [None, 2, None, None],
1085
+ "8|b": [None, 6, None, None],
1086
+ "8|computed": [None, 8, None, None],
1087
+ "10|a": [None, None, 3, None],
1088
+ "10|b": [None, None, 7, None],
1089
+ "10|computed": [None, None, 10.0, None],
1090
+ "12|a": [None, None, None, 4],
1091
+ "12|b": [None, None, None, 8],
1092
+ "12|computed": [None, None, None, 12.0],
1093
+ }
1094
+
1095
+ def test_view_expression_with_row_split_by(self):
1096
+ table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
1097
+ view = table.view(
1098
+ split_by=["computed"],
1099
+ expressions={
1100
+ "computed": '"a" + "b"',
1101
+ },
1102
+ )
1103
+ assert view.to_columns() == {
1104
+ "6|a": [1, None, None, None],
1105
+ "6|b": [5, None, None, None],
1106
+ "6|computed": [6.0, None, None, None],
1107
+ "8|a": [None, 2, None, None],
1108
+ "8|b": [None, 6, None, None],
1109
+ "8|computed": [None, 8.0, None, None],
1110
+ "10|a": [None, None, 3, None],
1111
+ "10|b": [None, None, 7, None],
1112
+ "10|computed": [None, None, 10.0, None],
1113
+ "12|a": [None, None, None, 4],
1114
+ "12|b": [None, None, None, 8],
1115
+ "12|computed": [None, None, None, 12.0],
1116
+ }
1117
+
1118
+ def test_view_expression_with_sort(self):
1119
+ table = Table({"a": ["a", "ab", "abc", "abcd"]})
1120
+ view = table.view(
1121
+ sort=[["computed", "desc"]], expressions={"computed": 'length("a")'}
1122
+ )
1123
+ assert view.to_columns() == {
1124
+ "a": ["abcd", "abc", "ab", "a"],
1125
+ "computed": [4, 3, 2, 1],
1126
+ }
1127
+
1128
+ def test_view_expression_with_filter(self):
1129
+ table = Table({"a": ["a", "ab", "abc", "abcd"]})
1130
+ view = table.view(
1131
+ filter=[["computed", ">=", 3]], expressions={"computed": 'length("a")'}
1132
+ )
1133
+ assert view.to_columns() == {"a": ["abc", "abcd"], "computed": [3, 4]}
1134
+
1135
+ def test_view_day_of_week_date(self, util):
1136
+ table = Table({"a": [date(2020, 3, i) for i in range(9, 14)]})
1137
+ view = table.view(expressions={"bucket": 'day_of_week("a")'})
1138
+ assert view.schema() == {"a": "date", "bucket": "string"}
1139
+ assert view.to_columns() == {
1140
+ "a": [util.to_timestamp(datetime(2020, 3, i)) for i in range(9, 14)],
1141
+ "bucket": [
1142
+ "2 Monday",
1143
+ "3 Tuesday",
1144
+ "4 Wednesday",
1145
+ "5 Thursday",
1146
+ "6 Friday",
1147
+ ],
1148
+ }
1149
+
1150
+ def test_view_day_of_week_datetime(self, util):
1151
+ table = Table({"a": [datetime(2020, 3, i, 12, 30) for i in range(9, 14)]})
1152
+ view = table.view(expressions={"bucket": 'day_of_week("a")'})
1153
+ assert view.schema() == {"a": "datetime", "bucket": "string"}
1154
+ assert view.to_columns() == {
1155
+ "a": [
1156
+ util.to_timestamp(datetime(2020, 3, i, 12, 30)) for i in range(9, 14)
1157
+ ],
1158
+ "bucket": [
1159
+ "2 Monday",
1160
+ "3 Tuesday",
1161
+ "4 Wednesday",
1162
+ "5 Thursday",
1163
+ "6 Friday",
1164
+ ],
1165
+ }
1166
+
1167
+ def test_view_month_of_year_date(self, util):
1168
+ table = Table({"a": [date(2020, i, 15) for i in range(1, 13)]})
1169
+ view = table.view(expressions={"bucket": 'month_of_year("a")'})
1170
+ assert view.schema() == {"a": "date", "bucket": "string"}
1171
+ assert view.to_columns() == {
1172
+ "a": [util.to_timestamp(datetime(2020, i, 15)) for i in range(1, 13)],
1173
+ "bucket": [
1174
+ "01 January",
1175
+ "02 February",
1176
+ "03 March",
1177
+ "04 April",
1178
+ "05 May",
1179
+ "06 June",
1180
+ "07 July",
1181
+ "08 August",
1182
+ "09 September",
1183
+ "10 October",
1184
+ "11 November",
1185
+ "12 December",
1186
+ ],
1187
+ }
1188
+
1189
+ # XXX: these datetimes are being interpreted as dates!!
1190
+ # to get around this, I explicitly gave a scheme to `table.`
1191
+ def test_view_month_of_year_datetime(self, util):
1192
+ table = Table({"a": "datetime"})
1193
+ table.update(
1194
+ {
1195
+ "a": [datetime(2020, i, 15) for i in range(1, 13)],
1196
+ }
1197
+ )
1198
+ view = table.view(expressions={"bucket": 'month_of_year("a")'})
1199
+ assert view.schema() == {"a": "datetime", "bucket": "string"}
1200
+ assert view.to_columns() == {
1201
+ "a": [util.to_timestamp(datetime(2020, i, 15)) for i in range(1, 13)],
1202
+ "bucket": [
1203
+ "01 January",
1204
+ "02 February",
1205
+ "03 March",
1206
+ "04 April",
1207
+ "05 May",
1208
+ "06 June",
1209
+ "07 July",
1210
+ "08 August",
1211
+ "09 September",
1212
+ "10 October",
1213
+ "11 November",
1214
+ "12 December",
1215
+ ],
1216
+ }
1217
+
1218
+ # bucketing
1219
+ def test_view_day_bucket_date(self, util):
1220
+ table = Table(
1221
+ {
1222
+ "a": [
1223
+ date(2020, 1, 1),
1224
+ date(2020, 1, 1),
1225
+ date(2020, 2, 29),
1226
+ date(2020, 3, 1),
1227
+ ],
1228
+ }
1229
+ )
1230
+ view = table.view(expressions={"bucket": "bucket(\"a\", 'D')"})
1231
+ assert view.schema() == {"a": "date", "bucket": "date"}
1232
+ assert view.to_columns() == {
1233
+ "a": [
1234
+ util.to_timestamp(datetime(2020, 1, 1)),
1235
+ util.to_timestamp(datetime(2020, 1, 1)),
1236
+ util.to_timestamp(datetime(2020, 2, 29)),
1237
+ util.to_timestamp(datetime(2020, 3, 1)),
1238
+ ],
1239
+ "bucket": [
1240
+ util.to_timestamp(datetime(2020, 1, 1)),
1241
+ util.to_timestamp(datetime(2020, 1, 1)),
1242
+ util.to_timestamp(datetime(2020, 2, 29)),
1243
+ util.to_timestamp(datetime(2020, 3, 1)),
1244
+ ],
1245
+ }
1246
+
1247
+ def test_view_day_bucket_date_with_null(self, util):
1248
+ table = Table(
1249
+ {
1250
+ "a": [
1251
+ date(2020, 1, 1),
1252
+ None,
1253
+ date(2020, 2, 29),
1254
+ date(2020, 3, 15),
1255
+ ],
1256
+ }
1257
+ )
1258
+ view = table.view(expressions={"bucket": "bucket(\"a\", 'D')"})
1259
+ assert view.schema() == {"a": "date", "bucket": "date"}
1260
+ assert view.to_columns() == {
1261
+ "a": [
1262
+ util.to_timestamp(datetime(2020, 1, 1)),
1263
+ None,
1264
+ util.to_timestamp(datetime(2020, 2, 29)),
1265
+ util.to_timestamp(datetime(2020, 3, 15)),
1266
+ ],
1267
+ "bucket": [
1268
+ util.to_timestamp(datetime(2020, 1, 1)),
1269
+ None,
1270
+ util.to_timestamp(datetime(2020, 2, 29)),
1271
+ util.to_timestamp(datetime(2020, 3, 15)),
1272
+ ],
1273
+ }
1274
+
1275
+ def test_view_day_bucket_datetime(self, util):
1276
+ table = Table(
1277
+ {
1278
+ "a": [
1279
+ datetime(2020, 1, 1, 5),
1280
+ datetime(2020, 1, 1, 23),
1281
+ datetime(2020, 2, 29, 1),
1282
+ datetime(2020, 3, 1, 0),
1283
+ ],
1284
+ }
1285
+ )
1286
+ view = table.view(expressions={"bucket": "bucket(\"a\", 'D')"})
1287
+ assert view.schema() == {"a": "datetime", "bucket": "date"}
1288
+ assert view.to_columns() == {
1289
+ "a": [
1290
+ util.to_timestamp(datetime(2020, 1, 1, 5)),
1291
+ util.to_timestamp(datetime(2020, 1, 1, 23)),
1292
+ util.to_timestamp(datetime(2020, 2, 29, 1)),
1293
+ util.to_timestamp(datetime(2020, 3, 1, 0)),
1294
+ ],
1295
+ "bucket": [
1296
+ util.to_timestamp(datetime(2020, 1, 1)),
1297
+ util.to_timestamp(datetime(2020, 1, 1)),
1298
+ util.to_timestamp(datetime(2020, 2, 29)),
1299
+ util.to_timestamp(datetime(2020, 3, 1)),
1300
+ ],
1301
+ }
1302
+
1303
+ def test_view_month_bucket_date(self, util):
1304
+ table = Table({"a": "date"})
1305
+ table.update(
1306
+ {
1307
+ "a": [
1308
+ str(date(2020, 1, 1)),
1309
+ str(date(2020, 1, 28)),
1310
+ str(date(2020, 2, 29)),
1311
+ str(date(2020, 3, 15)),
1312
+ ],
1313
+ }
1314
+ )
1315
+ view = table.view(expressions={"bucket": "bucket(\"a\", 'M')"})
1316
+ assert view.schema() == {"a": "date", "bucket": "date"}
1317
+ assert view.to_columns() == {
1318
+ "a": [
1319
+ (datetime(2020, 1, 1).timestamp() * 1000),
1320
+ (datetime(2020, 1, 28).timestamp() * 1000),
1321
+ (datetime(2020, 2, 29).timestamp() * 1000),
1322
+ (datetime(2020, 3, 15).timestamp() * 1000),
1323
+ ],
1324
+ "bucket": [
1325
+ (datetime(2020, 1, 1).timestamp() * 1000),
1326
+ (datetime(2020, 1, 1).timestamp() * 1000),
1327
+ (datetime(2020, 2, 1).timestamp() * 1000),
1328
+ (datetime(2020, 3, 1).timestamp() * 1000),
1329
+ ],
1330
+ }
1331
+
1332
+ def test_view_month_bucket_date_with_null(self, util):
1333
+ table = Table(
1334
+ {
1335
+ "a": [
1336
+ date(2020, 1, 1),
1337
+ None,
1338
+ date(2020, 2, 29),
1339
+ date(2020, 3, 15),
1340
+ ],
1341
+ }
1342
+ )
1343
+ view = table.view(expressions={"bucket": "bucket(\"a\", 'M')"})
1344
+ assert view.schema() == {"a": "date", "bucket": "date"}
1345
+ assert view.to_columns() == {
1346
+ "a": [
1347
+ util.to_timestamp(datetime(2020, 1, 1)),
1348
+ None,
1349
+ util.to_timestamp(datetime(2020, 2, 29)),
1350
+ util.to_timestamp(datetime(2020, 3, 15)),
1351
+ ],
1352
+ "bucket": [
1353
+ util.to_timestamp(datetime(2020, 1, 1)),
1354
+ None,
1355
+ util.to_timestamp(datetime(2020, 2, 1)),
1356
+ util.to_timestamp(datetime(2020, 3, 1)),
1357
+ ],
1358
+ }
1359
+
1360
+ def test_view_month_bucket_datetime(self, util):
1361
+ table = Table({"a": "datetime"})
1362
+ table.update(
1363
+ {
1364
+ "a": [
1365
+ datetime(2020, 1, 1),
1366
+ datetime(2020, 1, 28),
1367
+ datetime(2020, 2, 29),
1368
+ datetime(2020, 3, 15),
1369
+ ],
1370
+ }
1371
+ )
1372
+ view = table.view(expressions={"bucket": "bucket(\"a\", 'M')"})
1373
+ assert view.schema() == {"a": "datetime", "bucket": "date"}
1374
+ assert view.to_columns() == {
1375
+ "a": [
1376
+ util.to_timestamp(datetime(2020, 1, 1)),
1377
+ util.to_timestamp(datetime(2020, 1, 28)),
1378
+ util.to_timestamp(datetime(2020, 2, 29)),
1379
+ util.to_timestamp(datetime(2020, 3, 15)),
1380
+ ],
1381
+ "bucket": [
1382
+ util.to_timestamp(datetime(2020, 1, 1)),
1383
+ util.to_timestamp(datetime(2020, 1, 1)),
1384
+ util.to_timestamp(datetime(2020, 2, 1)),
1385
+ util.to_timestamp(datetime(2020, 3, 1)),
1386
+ ],
1387
+ }
1388
+
1389
+ def test_view_month_bucket_datetime_with_null(self, util):
1390
+ table = Table({"a": "datetime"})
1391
+ table.update(
1392
+ {
1393
+ "a": [datetime(2020, 1, 1), None, None, datetime(2020, 3, 15)],
1394
+ }
1395
+ )
1396
+ view = table.view(expressions={"bucket": "bucket(\"a\", 'M')"})
1397
+ assert view.schema() == {"a": "datetime", "bucket": "date"}
1398
+ assert view.to_columns() == {
1399
+ "a": [
1400
+ util.to_timestamp(datetime(2020, 1, 1)),
1401
+ None,
1402
+ None,
1403
+ util.to_timestamp(datetime(2020, 3, 15)),
1404
+ ],
1405
+ "bucket": [
1406
+ util.to_timestamp(datetime(2020, 1, 1)),
1407
+ None,
1408
+ None,
1409
+ util.to_timestamp(datetime(2020, 3, 1)),
1410
+ ],
1411
+ }
1412
+
1413
+ def test_view_integer_expression(self):
1414
+ table = Table({"x": "integer", "y": "date", "z": "float"})
1415
+
1416
+ view = table.view(
1417
+ expressions={
1418
+ "computed": "integer(2147483648)",
1419
+ "computed2": "integer(-2147483649)",
1420
+ "computed3": "integer(123.456)",
1421
+ "computed4": 'integer("x")',
1422
+ "computed5": 'integer("y")',
1423
+ "computed6": 'integer("z")',
1424
+ }
1425
+ )
1426
+
1427
+ table.update({"x": [12136582], "y": [date(2020, 6, 30)], "z": [1.23456]})
1428
+
1429
+ assert view.expression_schema() == {
1430
+ "computed": "integer",
1431
+ "computed2": "integer",
1432
+ "computed3": "integer",
1433
+ "computed4": "integer",
1434
+ "computed5": "integer",
1435
+ "computed6": "integer",
1436
+ }
1437
+
1438
+ result = view.to_columns()
1439
+
1440
+ assert result["computed"] == [2147483648]
1441
+ assert result["computed2"] == [-2147483649]
1442
+ assert result["computed3"] == [123]
1443
+ assert result["computed4"] == [12136582]
1444
+ assert result["computed5"] == [132384030]
1445
+ assert result["computed6"] == [1]
1446
+
1447
+ def test_view_float_expression(self):
1448
+ table = Table({"w": "datetime", "x": "integer", "y": "date", "z": "float"})
1449
+
1450
+ view = table.view(
1451
+ expressions={
1452
+ "computed": "float(2147483648)",
1453
+ "computed2": "float(-2147483649)",
1454
+ "computed3": "float(123.456789123)",
1455
+ "computed4": 'float("x")',
1456
+ "computed5": 'float("y")',
1457
+ "computed6": 'float("z")',
1458
+ "computed7": 'float("w")',
1459
+ }
1460
+ )
1461
+
1462
+ dt = datetime(2018, 8, 12, 15, 32, 55)
1463
+
1464
+ table.update(
1465
+ {"w": [dt], "x": [12136582], "y": [date(2020, 6, 30)], "z": [1.23456]}
1466
+ )
1467
+
1468
+ assert view.expression_schema() == {
1469
+ "computed": "float",
1470
+ "computed2": "float",
1471
+ "computed3": "float",
1472
+ "computed4": "float",
1473
+ "computed5": "float",
1474
+ "computed6": "float",
1475
+ "computed7": "float",
1476
+ }
1477
+
1478
+ result = view.to_columns()
1479
+
1480
+ seconds_timestamp = mktime(dt.timetuple()) + dt.microsecond / 1000000.0
1481
+ ms_timestamp = int(seconds_timestamp * 1000)
1482
+
1483
+ assert result["computed"] == [2147483648]
1484
+ assert result["computed2"] == [-2147483649]
1485
+ assert result["computed3"] == [123.456789123]
1486
+ assert result["computed4"] == [12136582]
1487
+ assert result["computed5"] == [132384030]
1488
+ assert result["computed6"] == [1.23456]
1489
+ assert result["computed7"] == [ms_timestamp]
1490
+
1491
+ def test_view_date_expression(self, util):
1492
+ table = Table({"x": [1]})
1493
+ view = table.view(
1494
+ expressions={
1495
+ "computed": " date(2020, 5, 30)",
1496
+ "computed2": "date(1997, 8, 31)",
1497
+ }
1498
+ )
1499
+ assert view.expression_schema() == {"computed": "date", "computed2": "date"}
1500
+ result = view.to_columns()
1501
+ assert result["computed"] == [util.to_timestamp(datetime(2020, 5, 30))]
1502
+ assert result["computed2"] == [util.to_timestamp(datetime(1997, 8, 31))]
1503
+
1504
+ def test_view_datetime_expression(self, util):
1505
+ table = Table({"x": [1]})
1506
+
1507
+ dt = datetime(2015, 11, 29, 23, 59, 59)
1508
+ seconds_timestamp = mktime(dt.timetuple()) + dt.microsecond / 1000000.0
1509
+ ms_timestamp = int(seconds_timestamp * 1000)
1510
+ view = table.view(expressions={"computed": "datetime({})".format(ms_timestamp)})
1511
+ assert view.expression_schema() == {"computed": "datetime"}
1512
+ result = view.to_columns()
1513
+ assert result["computed"] == [
1514
+ util.to_timestamp(datetime(2015, 11, 29, 23, 59, 59))
1515
+ ]
1516
+
1517
+ def test_view_datetime_expression_roundtrip(self, util):
1518
+ table = Table({"x": [datetime(2015, 11, 29, 23, 59, 59)]})
1519
+ view = table.view(expressions={"computed": 'datetime(float("x"))'})
1520
+ assert view.expression_schema() == {"computed": "datetime"}
1521
+ result = view.to_columns()
1522
+ assert result["computed"] == [
1523
+ util.to_timestamp(datetime(2015, 11, 29, 23, 59, 59))
1524
+ ]
1525
+
1526
+ def test_view_string_expression(self):
1527
+ table = Table(
1528
+ {
1529
+ "a": "date",
1530
+ "b": "datetime",
1531
+ "c": "integer",
1532
+ "d": "float",
1533
+ "e": "string",
1534
+ "f": "boolean",
1535
+ }
1536
+ )
1537
+ view = table.view(
1538
+ expressions={
1539
+ "computed": 'string("a")',
1540
+ "computed2": 'string("b")',
1541
+ "computed3": 'string("c")',
1542
+ "computed4": 'string("d")',
1543
+ "computed5": 'string("e")',
1544
+ "computed6": 'string("f")',
1545
+ "computed7": "string(1234.5678)",
1546
+ }
1547
+ )
1548
+
1549
+ table.update(
1550
+ {
1551
+ "a": [date(2020, 5, 30), date(2021, 7, 13)],
1552
+ "b": [
1553
+ datetime(2015, 11, 29, 23, 59, 59),
1554
+ datetime(2016, 11, 29, 23, 59, 59),
1555
+ ],
1556
+ "c": [12345678, 1293879852],
1557
+ "d": [1.2792013981, 19.218975981],
1558
+ "e": ["abcdefghijklmnop", "def"],
1559
+ "f": [False, True],
1560
+ }
1561
+ )
1562
+
1563
+ assert view.expression_schema() == {
1564
+ "computed": "string",
1565
+ "computed2": "string",
1566
+ "computed3": "string",
1567
+ "computed4": "string",
1568
+ "computed5": "string",
1569
+ "computed6": "string",
1570
+ "computed7": "string",
1571
+ }
1572
+ result = view.to_columns()
1573
+ assert result["computed"] == ["2020-05-30", "2021-07-13"]
1574
+ assert result["computed2"] == [
1575
+ "2015-11-29 23:59:59.000",
1576
+ "2016-11-29 23:59:59.000",
1577
+ ]
1578
+ assert result["computed3"] == ["12345678", "1293879852"]
1579
+ assert result["computed4"] == ["1.2792", "19.219"]
1580
+ assert result["computed5"] == ["abcdefghijklmnop", "def"]
1581
+ assert result["computed6"] == ["false", "true"]
1582
+ assert result["computed7"] == ["1234.57"] * 2
1583
+
1584
+ def test_view_expession_multicomment(self):
1585
+ table = Table({"a": [1, 2, 3, 4]})
1586
+ view = table.view(expressions=["var x := 1 + 2;\n// def\nx + 100 // cdefghijk"])
1587
+ assert view.expression_schema() == {
1588
+ "var x := 1 + 2;\n// def\nx + 100 // cdefghijk": "float"
1589
+ }
1590
+ assert view.to_columns() == {
1591
+ "var x := 1 + 2;\n// def\nx + 100 // cdefghijk": [103, 103, 103, 103],
1592
+ "a": [1, 2, 3, 4],
1593
+ }
1594
+
1595
+ def test_view_regex_email(self):
1596
+ endings = ["com", "net", "co.uk", "ie", "me", "io", "co"]
1597
+ data = [
1598
+ "{}@{}.{}".format(
1599
+ randstr(30, ascii_letters + "0123456789" + "._-"),
1600
+ randstr(10),
1601
+ choices(endings, k=1)[0],
1602
+ )
1603
+ for _ in range(100)
1604
+ ]
1605
+ table = Table({"a": data})
1606
+ expressions = {
1607
+ "address": "search(\"a\", '^([a-zA-Z0-9._-]+)@')",
1608
+ "domain": "search(\"a\", '@([a-zA-Z.]+)$')",
1609
+ "is_email?": "match_all(\"a\", '^([a-zA-Z0-9._-]+)@([a-zA-Z.]+)$')",
1610
+ "has_at?": "match(\"a\", '@')",
1611
+ }
1612
+
1613
+ view = table.view(expressions=expressions)
1614
+ schema = view.expression_schema()
1615
+ assert schema == {
1616
+ "address": "string",
1617
+ "domain": "string",
1618
+ "is_email?": "boolean",
1619
+ "has_at?": "boolean",
1620
+ }
1621
+
1622
+ results = view.to_columns()
1623
+
1624
+ for i in range(100):
1625
+ source = results["a"][i]
1626
+ expected_address = re.match(r"^([a-zA-Z0-9._-]+)@", source).group(1)
1627
+ expected_domain = re.search(r"@([a-zA-Z.]+)$", source).group(1)
1628
+ assert results["address"][i] == expected_address
1629
+ assert results["domain"][i] == expected_domain
1630
+ assert results["is_email?"][i]
1631
+ assert results["has_at?"][i]
1632
+
1633
+ def test_view_expression_number(self):
1634
+ def digits():
1635
+ return randstr(4, "0123456789")
1636
+
1637
+ data = []
1638
+
1639
+ for _ in range(1000):
1640
+ separator = "-" if random() > 0.5 else " "
1641
+ data.append(
1642
+ "{}{}{}{}{}{}{}".format(
1643
+ digits(),
1644
+ separator,
1645
+ digits(),
1646
+ separator,
1647
+ digits(),
1648
+ separator,
1649
+ digits(),
1650
+ )
1651
+ )
1652
+
1653
+ table = Table({"a": data})
1654
+ view = table.view(
1655
+ expressions={
1656
+ "parsed": """
1657
+ var parts[4];
1658
+ parts[0] := search("a", '^([0-9]{4})[ -][0-9]{4}[ -][0-9]{4}[ -][0-9]{4}');
1659
+ parts[1] := search("a", '^[0-9]{4}[ -]([0-9]{4})[ -][0-9]{4}[ -][0-9]{4}');
1660
+ parts[2] := search("a", '^[0-9]{4}[ -][0-9]{4}[ -]([0-9]{4})[ -][0-9]{4}');
1661
+ parts[3] := search("a", '^[0-9]{4}[ -][0-9]{4}[ -][0-9]{4}[ -]([0-9]{4})');
1662
+ concat(parts[0], parts[1], parts[2], parts[3])
1663
+ """,
1664
+ "is_number?": "match_all(\"a\", '^[0-9]{4}[ -][0-9]{4}[ -][0-9]{4}[ -][0-9]{4}')",
1665
+ }
1666
+ )
1667
+ schema = view.expression_schema()
1668
+ assert schema == {"parsed": "string", "is_number?": "boolean"}
1669
+ results = view.to_columns()
1670
+
1671
+ for i in range(1000):
1672
+ source = results["a"][i]
1673
+ expected = re.sub(r"[ -]", "", source)
1674
+ assert results["parsed"][i] == expected
1675
+ assert results["is_number?"][i]
1676
+
1677
+ def test_view_expression_newlines(self):
1678
+ table = Table(
1679
+ {
1680
+ "a": [
1681
+ "abc\ndef",
1682
+ "\n\n\n\nabc\ndef",
1683
+ "abc\n\n\n\n\n\nabc\ndef\n\n\n\n",
1684
+ None,
1685
+ "def",
1686
+ ],
1687
+ "b": [
1688
+ "hello\tworld",
1689
+ "\n\n\n\n\nhello\n\n\n\n\n\tworld",
1690
+ "\tworld",
1691
+ "world",
1692
+ None,
1693
+ ],
1694
+ }
1695
+ )
1696
+
1697
+ view = table.view(
1698
+ expressions={
1699
+ "c1": "search(\"a\", '(\ndef)')",
1700
+ "c2": "search(\"b\", '(\tworld)')",
1701
+ "c3": "match(\"a\", '\\n')",
1702
+ "c4": "match(\"b\", '\\n')",
1703
+ }
1704
+ )
1705
+
1706
+ assert view.expression_schema() == {
1707
+ "c1": "string",
1708
+ "c2": "string",
1709
+ "c3": "boolean",
1710
+ "c4": "boolean",
1711
+ }
1712
+
1713
+ results = view.to_columns()
1714
+ assert results["c1"] == ["\ndef", "\ndef", "\ndef", None, None]
1715
+ assert results["c2"] == ["\tworld", "\tworld", "\tworld", None, None]
1716
+ assert results["c3"] == [True, True, True, None, False]
1717
+ assert results["c4"] == [False, True, False, False, None]
1718
+
1719
+ def test_view_regex_substring(self):
1720
+ data = ["abc, def", "efg", "", None, "aaaaaaaaaaaaa"]
1721
+ table = Table({"x": data})
1722
+ view = table.view(
1723
+ expressions={
1724
+ "a": "substring('abcdef', 0)",
1725
+ "abc": "substring('abcdef', 3)",
1726
+ "b": 'substring("x", 0)',
1727
+ "c": 'substring("x", 5, 1)',
1728
+ "d": 'substring("x", 100)',
1729
+ "e": 'substring("x", 0, 10000)',
1730
+ "f": 'substring("x", 5, 0)',
1731
+ }
1732
+ )
1733
+ results = view.to_columns()
1734
+
1735
+ assert results["a"] == ["abcdef" for _ in data]
1736
+ assert results["abc"] == ["def" for _ in data]
1737
+ assert results["b"] == [d if d else None for d in data]
1738
+ assert results["c"] == ["d", None, None, None, "a"]
1739
+ assert results["d"] == [None for _ in data]
1740
+ assert results["e"] == [None for _ in data]
1741
+ assert results["f"] == ["", None, None, None, ""]
1742
+
1743
+ # FIXME: // ending\nvar domain := search(\"a\", '@([a-zA-Z.]+)$'); length(domain) > 0 ? search(domain, '[.](.*)$') : null
1744
+ # is a broken expression without the newline after var domain
1745
+ def test_view_regex_email_substr(self):
1746
+ endings = ["com", "net", "co.uk", "ie", "me", "io", "co"]
1747
+ data = [
1748
+ "{}@{}.{}".format(
1749
+ randstr(30, ascii_letters + "0123456789" + "._-"),
1750
+ randstr(10),
1751
+ choices(endings, k=1)[0],
1752
+ )
1753
+ for _ in range(100)
1754
+ ]
1755
+ table = Table({"a": data})
1756
+ expressions = {
1757
+ "address": 'var vec[2]; indexof("a", \'^([a-zA-Z0-9._-]+)@\', vec) ? substring("a", vec[0], vec[1] - vec[0] + 1) : null',
1758
+ "ending": """
1759
+ var domain := search(\"a\", '@([a-zA-Z.]+)$');
1760
+ var len := length(domain);
1761
+ if (len > 0 and is_not_null(domain)) {
1762
+ search(domain, '[.](.*)$');
1763
+ } else {
1764
+ 'not found';
1765
+ }""",
1766
+ }
1767
+
1768
+ view = table.view(expressions=expressions)
1769
+ schema = view.expression_schema()
1770
+ assert schema == {
1771
+ "address": "string",
1772
+ "ending": "string",
1773
+ }
1774
+
1775
+ results = view.to_columns()
1776
+
1777
+ for i in range(100):
1778
+ source = results["a"][i]
1779
+ address = re.match(r"^([a-zA-Z0-9._-]+)@", source).group(1)
1780
+ domain = re.search(r"@([a-zA-Z.]+)$", source).group(1)
1781
+ ending = re.search(r"[.](.*)$", domain).group(1)
1782
+ assert results["address"][i] == address
1783
+ assert results["ending"][i] == ending
1784
+
1785
+ def test_view_expressions_replace(self):
1786
+ def digits():
1787
+ return randstr(4, "0123456789")
1788
+
1789
+ data = []
1790
+
1791
+ for _ in range(1000):
1792
+ separator = "-" if random() > 0.5 else " "
1793
+ data.append(
1794
+ "{}{}{}{}{}{}{}".format(
1795
+ digits(),
1796
+ separator,
1797
+ digits(),
1798
+ separator,
1799
+ digits(),
1800
+ separator,
1801
+ digits(),
1802
+ )
1803
+ )
1804
+
1805
+ # XXX: This test was broken because it thought `b` was an integer column.
1806
+ table = Table({"a": "string", "b": "string"})
1807
+ table.update({"a": data, "b": [str(i) for i in range(1000)]})
1808
+ expressions = [
1809
+ """//w
1810
+ replace('abc-def-hijk', '-', '')""",
1811
+ """//x
1812
+ replace("a", '[0-9]{4}$', "b")""",
1813
+ """//y
1814
+ replace("a", '[a-z]{4}$', "b")""",
1815
+ """//z
1816
+ var x := 'long string, very cool!'; replace("a", '^[0-9]{4}', x)""",
1817
+ ]
1818
+
1819
+ validate = table.validate_expressions(expressions)
1820
+ assert validate["expression_schema"] == {
1821
+ "w": "string",
1822
+ "x": "string",
1823
+ "y": "string",
1824
+ "z": "string",
1825
+ }
1826
+
1827
+ view = table.view(expressions=expressions)
1828
+ schema = view.expression_schema()
1829
+ assert schema == {
1830
+ "w": "string",
1831
+ "x": "string",
1832
+ "y": "string",
1833
+ "z": "string",
1834
+ }
1835
+ results = view.to_columns()
1836
+
1837
+ for i in range(1000):
1838
+ source = results["a"][i]
1839
+ idx = results["b"][i]
1840
+ assert results["w"][i] == "abcdef-hijk"
1841
+ assert results["x"][i] == re.sub(r"[0-9]{4}$", idx, source, 1)
1842
+ assert results["y"][i] == source
1843
+ assert results["z"][i] == re.sub(
1844
+ r"^[0-9]{4}", "long string, very cool!", source, 1
1845
+ )
1846
+
1847
+ def test_view_replace_invalid(self):
1848
+ table = Table({"a": "string", "b": "string"})
1849
+ expressions = [
1850
+ """//v
1851
+ replace('abc-def-hijk', '-', 123)""",
1852
+ """//w
1853
+ replace('', '-', today())""",
1854
+ """//x
1855
+ replace("a", '[0-9]{4}$', today())""",
1856
+ """//y
1857
+ replace("a", '[a-z]{4}$', null)""",
1858
+ """//z
1859
+ var x := 123; replace("a", '^[0-9]{4}', x)""",
1860
+ ]
1861
+ validate = table.validate_expressions(expressions)
1862
+ assert validate["expression_schema"] == {}
1863
+
1864
+ def test_view_expressions_replace_all(self):
1865
+ def digits():
1866
+ return randstr(4, "0123456789")
1867
+
1868
+ data = []
1869
+
1870
+ for _ in range(1000):
1871
+ separator = "-" if random() > 0.5 else " "
1872
+ data.append(
1873
+ "{}{}{}{}{}{}{}".format(
1874
+ digits(),
1875
+ separator,
1876
+ digits(),
1877
+ separator,
1878
+ digits(),
1879
+ separator,
1880
+ digits(),
1881
+ )
1882
+ )
1883
+ table = Table({"a": "string", "b": "string"})
1884
+ table.update({"a": data, "b": [str(i) for i in range(1000)]})
1885
+ expressions = [
1886
+ """//w
1887
+ replace_all('abc-def-hijk', '-', '')""",
1888
+ """//x
1889
+ replace_all("a", '[0-9]{4}$', "b")""",
1890
+ """//y
1891
+ replace_all("a", '[a-z]{4}$', "b")""",
1892
+ """//z
1893
+ var x := 'long string, very cool!'; replace_all("a", '^[0-9]{4}', x)""",
1894
+ ]
1895
+
1896
+ validate = table.validate_expressions(expressions)
1897
+ assert validate["expression_schema"] == {
1898
+ "w": "string",
1899
+ "x": "string",
1900
+ "y": "string",
1901
+ "z": "string",
1902
+ }
1903
+
1904
+ view = table.view(expressions=expressions)
1905
+ schema = view.expression_schema()
1906
+ assert schema == {
1907
+ "w": "string",
1908
+ "x": "string",
1909
+ "y": "string",
1910
+ "z": "string",
1911
+ }
1912
+
1913
+ results = view.to_columns()
1914
+
1915
+ for i in range(1000):
1916
+ source = results["a"][i]
1917
+ idx = results["b"][i]
1918
+ assert results["w"][i] == "abcdefhijk"
1919
+ assert results["x"][i] == re.sub(r"[0-9]{4}$", idx, source)
1920
+ assert results["y"][i] == source
1921
+ assert results["z"][i] == re.sub(
1922
+ r"^[0-9]{4}", "long string, very cool!", source
1923
+ )
1924
+
1925
+ def test_view_replace_invalid_variation(self):
1926
+ table = Table({"a": "string", "b": "string"})
1927
+ expressions = [
1928
+ """//v
1929
+ replace_all('abc-def-hijk', '-', 123)""",
1930
+ """//w
1931
+ replace_all('', '-', today())""",
1932
+ """//x
1933
+ replace_all("a", '[0-9]{4}$', today())""",
1934
+ """//y
1935
+ replace_all("a", '[a-z]{4}$', null)""",
1936
+ """//z
1937
+ var x := 123; replace_all("a", '^[0-9]{4}', x)""",
1938
+ ]
1939
+ validate = table.validate_expressions(expressions)
1940
+ assert validate["expression_schema"] == {}