perspective-python 4.2.0__cp311-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- perspective/__init__.py +396 -0
- perspective/extension/finos-perspective-nbextension.json +5 -0
- perspective/handlers/__init__.py +11 -0
- perspective/handlers/aiohttp.py +61 -0
- perspective/handlers/starlette.py +55 -0
- perspective/handlers/tornado.py +184 -0
- perspective/perspective.pyd +0 -0
- perspective/templates/exported_widget.html.template +35 -0
- perspective/tests/__init__.py +11 -0
- perspective/tests/async/test_async_client.py +83 -0
- perspective/tests/async/test_websocket_client.py +124 -0
- perspective/tests/conftest.py +272 -0
- perspective/tests/core/__init__.py +11 -0
- perspective/tests/core/test_async.py +351 -0
- perspective/tests/multi_threaded/__init__.py +11 -0
- perspective/tests/multi_threaded/test_multi_threaded.py +201 -0
- perspective/tests/server/__init__.py +11 -0
- perspective/tests/server/test_server.py +1016 -0
- perspective/tests/server/test_session.py +110 -0
- perspective/tests/table/__init__.py +11 -0
- perspective/tests/table/arrow/date32.arrow +0 -0
- perspective/tests/table/arrow/date64.arrow +0 -0
- perspective/tests/table/arrow/dict.arrow +0 -0
- perspective/tests/table/arrow/dict_update.arrow +0 -0
- perspective/tests/table/arrow/int_float_str.arrow +0 -0
- perspective/tests/table/arrow/int_float_str_file.arrow +0 -0
- perspective/tests/table/arrow/int_float_str_update.arrow +0 -0
- perspective/tests/table/object_sequence.py +402 -0
- perspective/tests/table/test_column_paths.py +89 -0
- perspective/tests/table/test_delete.py +124 -0
- perspective/tests/table/test_exception.py +65 -0
- perspective/tests/table/test_leaks.py +54 -0
- perspective/tests/table/test_ports.py +178 -0
- perspective/tests/table/test_remove.py +102 -0
- perspective/tests/table/test_table.py +641 -0
- perspective/tests/table/test_table_arrow.py +503 -0
- perspective/tests/table/test_table_datetime.py +2409 -0
- perspective/tests/table/test_table_infer.py +201 -0
- perspective/tests/table/test_table_limit.py +45 -0
- perspective/tests/table/test_table_numpy.py +1022 -0
- perspective/tests/table/test_table_pandas.py +1018 -0
- perspective/tests/table/test_table_polars.py +251 -0
- perspective/tests/table/test_table_view_table.py +130 -0
- perspective/tests/table/test_to_arrow.py +417 -0
- perspective/tests/table/test_to_arrow_lz4.py +32 -0
- perspective/tests/table/test_to_format.py +1024 -0
- perspective/tests/table/test_to_polars.py +26 -0
- perspective/tests/table/test_update.py +545 -0
- perspective/tests/table/test_update_arrow.py +980 -0
- perspective/tests/table/test_update_pandas.py +211 -0
- perspective/tests/table/test_view.py +2261 -0
- perspective/tests/table/test_view_expression.py +1940 -0
- perspective/tests/test_dependencies.py +53 -0
- perspective/tests/viewer/__init__.py +11 -0
- perspective/tests/viewer/test_viewer.py +246 -0
- perspective/tests/widget/__init__.py +11 -0
- perspective/tests/widget/test_widget.py +278 -0
- perspective/tests/widget/test_widget_pandas.py +453 -0
- perspective/virtual_servers/__init__.py +134 -0
- perspective/virtual_servers/clickhouse.py +245 -0
- perspective/virtual_servers/duckdb.py +236 -0
- perspective/widget/__init__.py +349 -0
- perspective/widget/viewer/__init__.py +15 -0
- perspective/widget/viewer/validate.py +22 -0
- perspective/widget/viewer/viewer.py +343 -0
- perspective/widget/viewer/viewer_traitlets.py +101 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/install.json +5 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/package.json +71 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/253.5f5c9e80605aa4106a28.js +2 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/253.5f5c9e80605aa4106a28.js.LICENSE.txt +25 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/523.c030af5d3c4f67ff83f6.js +1 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/remoteEntry.95a8ea1b44d96032833f.js +1 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/style.js +4 -0
- perspective_python-4.2.0.data/data/share/jupyter/labextensions/@perspective-dev/jupyterlab/static/third-party-licenses.json +16 -0
- perspective_python-4.2.0.dist-info/METADATA +27 -0
- perspective_python-4.2.0.dist-info/RECORD +79 -0
- perspective_python-4.2.0.dist-info/WHEEL +4 -0
- perspective_python-4.2.0.dist-info/licenses/LICENSE.md +193 -0
- perspective_python-4.2.0.dist-info/licenses/LICENSE_THIRDPARTY_cargo.yml +17395 -0
|
@@ -0,0 +1,1940 @@
|
|
|
1
|
+
# ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
|
|
2
|
+
# ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃
|
|
3
|
+
# ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃
|
|
4
|
+
# ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃
|
|
5
|
+
# ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃
|
|
6
|
+
# ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
|
|
7
|
+
# ┃ Copyright (c) 2017, the Perspective Authors. ┃
|
|
8
|
+
# ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃
|
|
9
|
+
# ┃ This file is part of the Perspective library, distributed under the terms ┃
|
|
10
|
+
# ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
|
|
11
|
+
# ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
|
|
12
|
+
|
|
13
|
+
import re
|
|
14
|
+
from random import random, randint, choices
|
|
15
|
+
from string import ascii_letters
|
|
16
|
+
from pytest import raises
|
|
17
|
+
from datetime import date, datetime
|
|
18
|
+
from time import mktime
|
|
19
|
+
from perspective import PerspectiveError
|
|
20
|
+
from .test_view import compare_delta
|
|
21
|
+
import perspective as psp
|
|
22
|
+
|
|
23
|
+
client = psp.Server().new_local_client()
|
|
24
|
+
Table = client.table
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def randstr(length, input=ascii_letters):
|
|
28
|
+
return "".join(choices(input, k=length))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class TestViewExpression(object):
|
|
32
|
+
def test_table_validate_expressions_empty(self):
|
|
33
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
34
|
+
validate = table.validate_expressions([])
|
|
35
|
+
assert validate["expression_schema"] == {}
|
|
36
|
+
assert validate["expression_alias"] == {}
|
|
37
|
+
assert validate["errors"] == {}
|
|
38
|
+
|
|
39
|
+
def test_view_expression_schema_empty(self):
|
|
40
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
41
|
+
view = table.view()
|
|
42
|
+
assert view.to_columns() == {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
|
|
43
|
+
assert view.expression_schema() == {}
|
|
44
|
+
|
|
45
|
+
def test_view_validate_expressions_alias_map_errors(self):
|
|
46
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
47
|
+
expressions = {
|
|
48
|
+
"x": '"a"',
|
|
49
|
+
"y": '"b" * 0.5',
|
|
50
|
+
"c": "'abcdefg'",
|
|
51
|
+
"d": "true and false",
|
|
52
|
+
"e": 'float("a") > 2 ? null : 1',
|
|
53
|
+
"f": "today()",
|
|
54
|
+
"g": "now()",
|
|
55
|
+
"h": "length(123)",
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
validated = table.validate_expressions(expressions)
|
|
59
|
+
aliases = ["x", "y", "c", "d", "e", "f", "g", "h"]
|
|
60
|
+
|
|
61
|
+
# Errored should also be in aliases
|
|
62
|
+
for alias in aliases:
|
|
63
|
+
assert validated["expression_alias"][alias] == expressions[alias]
|
|
64
|
+
|
|
65
|
+
def test_view_validate_expressions_alias_map(self):
|
|
66
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
67
|
+
expressions = {
|
|
68
|
+
"x": '"a"',
|
|
69
|
+
"y": '"b" * 0.5',
|
|
70
|
+
"c": "'abcdefg'",
|
|
71
|
+
"d": "true and false",
|
|
72
|
+
"e": 'float("a") > 2 ? null : 1',
|
|
73
|
+
"f": "today()",
|
|
74
|
+
"g": "now()",
|
|
75
|
+
"h": "length('abcd')",
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
validated = table.validate_expressions(expressions)
|
|
79
|
+
aliases = ["x", "y", "c", "d", "e", "f", "g", "h"]
|
|
80
|
+
for alias in aliases:
|
|
81
|
+
assert validated["expression_alias"][alias] == expressions[alias]
|
|
82
|
+
|
|
83
|
+
def test_view_expression_schema_all_types(self):
|
|
84
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
85
|
+
expressions = [
|
|
86
|
+
'"a"',
|
|
87
|
+
'"b" * 0.5',
|
|
88
|
+
"'abcdefg'",
|
|
89
|
+
"true and false",
|
|
90
|
+
'float("a") > 2 ? null : 1',
|
|
91
|
+
"today()",
|
|
92
|
+
"now()",
|
|
93
|
+
"length('abcd')",
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
view = table.view(expressions=expressions)
|
|
97
|
+
assert view.expression_schema() == {
|
|
98
|
+
'"a"': "integer",
|
|
99
|
+
'"b" * 0.5': "float",
|
|
100
|
+
"'abcdefg'": "string",
|
|
101
|
+
"true and false": "boolean",
|
|
102
|
+
'float("a") > 2 ? null : 1': "float",
|
|
103
|
+
"today()": "date",
|
|
104
|
+
"now()": "datetime",
|
|
105
|
+
"length('abcd')": "float",
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
result = view.to_columns()
|
|
109
|
+
today = datetime(date.today().year, date.today().month, date.today().day)
|
|
110
|
+
del result["now()"] # no need to match datetime.now()
|
|
111
|
+
|
|
112
|
+
assert result == {
|
|
113
|
+
"a": [1, 2, 3, 4],
|
|
114
|
+
"b": [5, 6, 7, 8],
|
|
115
|
+
'"a"': [1, 2, 3, 4],
|
|
116
|
+
'"b" * 0.5': [2.5, 3, 3.5, 4],
|
|
117
|
+
"'abcdefg'": ["abcdefg" for _ in range(4)],
|
|
118
|
+
"true and false": [False for _ in range(4)],
|
|
119
|
+
'float("a") > 2 ? null : 1': [1, 1, None, None],
|
|
120
|
+
"today()": [int(today.timestamp()) * 1000 for _ in range(4)],
|
|
121
|
+
"length('abcd')": [4 for _ in range(4)],
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
validated = table.validate_expressions(expressions)
|
|
125
|
+
for expr in expressions:
|
|
126
|
+
assert validated["expression_alias"][expr] == expr
|
|
127
|
+
|
|
128
|
+
def test_table_validate_expressions_with_errors(self):
|
|
129
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
130
|
+
expressions = ['"Sales" + "a"', "datetime()", "string()", "for () {}"]
|
|
131
|
+
validate = table.validate_expressions(expressions)
|
|
132
|
+
assert validate["expression_schema"] == {}
|
|
133
|
+
assert validate["expression_alias"] == {expr: expr for expr in expressions}
|
|
134
|
+
assert validate["errors"] == {
|
|
135
|
+
'"Sales" + "a"': {
|
|
136
|
+
"column": 0,
|
|
137
|
+
"error_message": 'Value Error - Input column "Sales" does not exist.',
|
|
138
|
+
"line": 0,
|
|
139
|
+
},
|
|
140
|
+
"datetime()": {
|
|
141
|
+
"column": 10,
|
|
142
|
+
"error_message": "Zero parameter call to generic function: datetime not allowed",
|
|
143
|
+
"line": 0,
|
|
144
|
+
},
|
|
145
|
+
"for () {}": {
|
|
146
|
+
"column": 5,
|
|
147
|
+
"error_message": "Premature end of expression[2]",
|
|
148
|
+
"line": 0,
|
|
149
|
+
},
|
|
150
|
+
"string()": {
|
|
151
|
+
"column": 8,
|
|
152
|
+
"error_message": "Zero parameter call to generic function: string not allowed",
|
|
153
|
+
"line": 0,
|
|
154
|
+
},
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
def test_view_expression_create(self):
|
|
158
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
159
|
+
view = table.view(expressions={"computed": ' "a" + "b"'})
|
|
160
|
+
assert view.to_columns() == {
|
|
161
|
+
"a": [1, 2, 3, 4],
|
|
162
|
+
"b": [5, 6, 7, 8],
|
|
163
|
+
"computed": [6, 8, 10, 12],
|
|
164
|
+
}
|
|
165
|
+
assert view.expression_schema() == {"computed": "float"}
|
|
166
|
+
|
|
167
|
+
def test_view_expression_string_per_page(self):
|
|
168
|
+
table = Table({"a": [i for i in range(100)]})
|
|
169
|
+
big_strings = [randstr(6400) for _ in range(4)]
|
|
170
|
+
view = table.view(
|
|
171
|
+
expressions={
|
|
172
|
+
"computed{}".format(i): "var x := '{}'; lower(x)".format(big_strings[i])
|
|
173
|
+
for i in range(4)
|
|
174
|
+
}
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
result = view.to_columns()
|
|
178
|
+
schema = view.expression_schema()
|
|
179
|
+
|
|
180
|
+
for i in range(4):
|
|
181
|
+
name = "computed{}".format(i)
|
|
182
|
+
res = big_strings[i].lower()
|
|
183
|
+
assert schema[name] == "string"
|
|
184
|
+
assert result[name] == [res for _ in range(100)]
|
|
185
|
+
|
|
186
|
+
def test_view_expression_string_page_stress(self):
|
|
187
|
+
table = Table({"a": [i for i in range(100)]})
|
|
188
|
+
big_strings = [
|
|
189
|
+
"".join(["a" for _ in range(640)]),
|
|
190
|
+
"".join(["b" for _ in range(640)]),
|
|
191
|
+
"".join(["c" for _ in range(640)]),
|
|
192
|
+
"".join(["d" for _ in range(640)]),
|
|
193
|
+
]
|
|
194
|
+
|
|
195
|
+
view = table.view(
|
|
196
|
+
expressions={
|
|
197
|
+
"computed": "var a := '{}'; var b := '{}'; var c := '{}'; var d := '{}'; concat(a, b, c, d)".format(
|
|
198
|
+
*big_strings
|
|
199
|
+
)
|
|
200
|
+
}
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
result = view.to_columns()
|
|
204
|
+
schema = view.expression_schema()
|
|
205
|
+
assert schema == {"computed": "string"}
|
|
206
|
+
assert result["computed"] == ["".join(big_strings) for _ in range(100)]
|
|
207
|
+
|
|
208
|
+
def test_view_expression_new_vocab_page(self):
|
|
209
|
+
table = Table({"a": [randstr(100) for _ in range(100)]})
|
|
210
|
+
|
|
211
|
+
def make_expression(idx):
|
|
212
|
+
expr = ["//computed{}".format(idx)]
|
|
213
|
+
num_vars = randint(1, 26)
|
|
214
|
+
concat_cols = []
|
|
215
|
+
concat_result = []
|
|
216
|
+
|
|
217
|
+
for i in range(num_vars):
|
|
218
|
+
name = ascii_letters[i]
|
|
219
|
+
string_literal = randstr(randint(100, 1000))
|
|
220
|
+
|
|
221
|
+
if random() > 0.5:
|
|
222
|
+
result = string_literal.upper()
|
|
223
|
+
string_literal = "upper('{}')".format(string_literal)
|
|
224
|
+
else:
|
|
225
|
+
result = string_literal.lower()
|
|
226
|
+
string_literal = "lower('{}')".format(string_literal)
|
|
227
|
+
|
|
228
|
+
concat_cols.append(name)
|
|
229
|
+
concat_result.append(result)
|
|
230
|
+
|
|
231
|
+
expr.append("var {} := {};".format(name, string_literal))
|
|
232
|
+
|
|
233
|
+
expr.append('concat("a", {})'.format(", ".join(concat_cols)))
|
|
234
|
+
|
|
235
|
+
return {
|
|
236
|
+
"expression_name": expr[0][2:],
|
|
237
|
+
"expression": "\n".join(expr),
|
|
238
|
+
"output": "".join(concat_result),
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
expressions = [make_expression(i) for i in range(10)]
|
|
242
|
+
|
|
243
|
+
view = table.view(expressions=[expr["expression"] for expr in expressions])
|
|
244
|
+
|
|
245
|
+
result = view.to_columns()
|
|
246
|
+
schema = view.expression_schema()
|
|
247
|
+
|
|
248
|
+
for expr in expressions:
|
|
249
|
+
name = expr["expression_name"]
|
|
250
|
+
assert schema[name] == "string"
|
|
251
|
+
|
|
252
|
+
for i in range(100):
|
|
253
|
+
val = result["a"][i]
|
|
254
|
+
assert result[name][i] == val + expr["output"]
|
|
255
|
+
|
|
256
|
+
def test_view_expression_collide_local_var(self):
|
|
257
|
+
"""Make sure that strings declared under the same var name in
|
|
258
|
+
different expressions do not collide."""
|
|
259
|
+
table = Table({"a": [1, 2, 3, 4]})
|
|
260
|
+
strings = [randstr(50) for _ in range(8)]
|
|
261
|
+
|
|
262
|
+
view = table.view(
|
|
263
|
+
expressions={
|
|
264
|
+
"computed": " var w := '{}'; var x := '{}'; var y := '{}'; var z := '{}'; concat(w, x, y, z)".format(
|
|
265
|
+
*strings[:4]
|
|
266
|
+
),
|
|
267
|
+
"computed2": " var w := '{}'; var x := '{}'; var y := '{}'; var z := '{}'; concat(w, x, y, z)".format(
|
|
268
|
+
*strings[4:]
|
|
269
|
+
),
|
|
270
|
+
}
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
result = view.to_columns()
|
|
274
|
+
schema = view.expression_schema()
|
|
275
|
+
assert schema == {"computed": "string", "computed2": "string"}
|
|
276
|
+
assert result["computed"] == ["".join(strings[:4]) for _ in range(4)]
|
|
277
|
+
assert result["computed2"] == ["".join(strings[4:]) for _ in range(4)]
|
|
278
|
+
|
|
279
|
+
def test_view_random_expressions(self):
|
|
280
|
+
def make_expression():
|
|
281
|
+
"""Create a random expression with a few local string vars that
|
|
282
|
+
are too long to be stored in-place."""
|
|
283
|
+
expression_name = randstr(10)
|
|
284
|
+
expression = ""
|
|
285
|
+
num_vars = randint(1, 26)
|
|
286
|
+
output_var_name = ""
|
|
287
|
+
output_str = ""
|
|
288
|
+
for i in range(num_vars):
|
|
289
|
+
name = ascii_letters[i]
|
|
290
|
+
string_literal = randstr(randint(15, 100))
|
|
291
|
+
expression += "var {} := '{}';\n".format(name, string_literal)
|
|
292
|
+
if i == num_vars - 1:
|
|
293
|
+
output_var_name = name
|
|
294
|
+
output_str = string_literal
|
|
295
|
+
|
|
296
|
+
expression += output_var_name
|
|
297
|
+
return {
|
|
298
|
+
"expression_name": expression_name,
|
|
299
|
+
"expression": expression,
|
|
300
|
+
"output": output_str,
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
table = Table({"a": [1, 2, 3, 4]})
|
|
304
|
+
|
|
305
|
+
for _ in range(5):
|
|
306
|
+
exprs = [make_expression() for _ in range(5)]
|
|
307
|
+
output_map = {expr["expression_name"]: expr["output"] for expr in exprs}
|
|
308
|
+
view = table.view(
|
|
309
|
+
expressions={
|
|
310
|
+
expr["expression_name"]: expr["expression"] for expr in exprs
|
|
311
|
+
}
|
|
312
|
+
)
|
|
313
|
+
expression_schema = view.expression_schema()
|
|
314
|
+
result = view.to_columns()
|
|
315
|
+
for expr in output_map.keys():
|
|
316
|
+
assert expression_schema[expr] == "string"
|
|
317
|
+
assert result[expr] == [output_map[expr] for _ in range(4)]
|
|
318
|
+
|
|
319
|
+
def test_view_expression_string_literal_compare(self):
|
|
320
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
321
|
+
validated = table.validate_expressions({"computed": " 'a' == 'a'"})
|
|
322
|
+
|
|
323
|
+
assert validated["expression_schema"] == {"computed": "boolean"}
|
|
324
|
+
|
|
325
|
+
view = table.view(expressions={"computed": " 'a' == 'a'"})
|
|
326
|
+
|
|
327
|
+
assert view.to_columns() == {
|
|
328
|
+
"a": [1, 2, 3, 4],
|
|
329
|
+
"b": [5, 6, 7, 8],
|
|
330
|
+
"computed": [True, True, True, True],
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
assert view.expression_schema() == {"computed": "boolean"}
|
|
334
|
+
|
|
335
|
+
def test_view_expression_string_literal_compare_null(self):
|
|
336
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
337
|
+
validated = table.validate_expressions({"computed": " 'a' == null"})
|
|
338
|
+
|
|
339
|
+
assert validated["expression_schema"] == {"computed": "float"}
|
|
340
|
+
|
|
341
|
+
view = table.view(expressions={"computed": " 'a' == null"})
|
|
342
|
+
|
|
343
|
+
assert view.to_columns() == {
|
|
344
|
+
"a": [1, 2, 3, 4],
|
|
345
|
+
"b": [5, 6, 7, 8],
|
|
346
|
+
"computed": [0, 0, 0, 0],
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
assert view.expression_schema() == {"computed": "float"}
|
|
350
|
+
|
|
351
|
+
def test_view_expression_string_literal_compare_column(self):
|
|
352
|
+
table = Table({"a": ["a", "a", "b", "c"]})
|
|
353
|
+
validated = table.validate_expressions({"computed": " \"a\" == 'a'"})
|
|
354
|
+
assert validated["expression_schema"] == {"computed": "boolean"}
|
|
355
|
+
view = table.view(expressions={"computed": " \"a\" == 'a'"})
|
|
356
|
+
assert view.to_columns() == {
|
|
357
|
+
"a": ["a", "a", "b", "c"],
|
|
358
|
+
"computed": [True, True, False, False],
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
assert view.expression_schema() == {"computed": "boolean"}
|
|
362
|
+
|
|
363
|
+
def test_view_expression_string_literal_compare_column_null(self):
|
|
364
|
+
table = Table({"a": ["a", None, "b", "c", None]})
|
|
365
|
+
validated = table.validate_expressions({"computed": " \"a\" == 'a'"})
|
|
366
|
+
assert validated["expression_schema"] == {"computed": "boolean"}
|
|
367
|
+
view = table.view(expressions={"computed": " \"a\" == 'a'"})
|
|
368
|
+
assert view.to_columns() == {
|
|
369
|
+
"a": ["a", None, "b", "c", None],
|
|
370
|
+
"computed": [True, False, False, False, False],
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
assert view.expression_schema() == {"computed": "boolean"}
|
|
374
|
+
|
|
375
|
+
def test_view_expression_string_literal_compare_column_null_long(self):
|
|
376
|
+
table = Table(
|
|
377
|
+
{
|
|
378
|
+
"a": [
|
|
379
|
+
"abcdefghijklmnopqrstuvwxyz",
|
|
380
|
+
None,
|
|
381
|
+
"abcdefghijklmnopqrstuvwxyz",
|
|
382
|
+
"aabcdefghijklmnopqrstuvwxyz",
|
|
383
|
+
None,
|
|
384
|
+
]
|
|
385
|
+
}
|
|
386
|
+
)
|
|
387
|
+
validated = table.validate_expressions(
|
|
388
|
+
{"computed": " \"a\" == 'abcdefghijklmnopqrstuvwxyz'"}
|
|
389
|
+
)
|
|
390
|
+
assert validated["expression_schema"] == {"computed": "boolean"}
|
|
391
|
+
view = table.view(
|
|
392
|
+
expressions={"computed": "\"a\" == 'abcdefghijklmnopqrstuvwxyz'"}
|
|
393
|
+
)
|
|
394
|
+
result = view.to_columns()
|
|
395
|
+
assert result["computed"] == [True, False, True, False, False]
|
|
396
|
+
assert view.expression_schema() == {"computed": "boolean"}
|
|
397
|
+
|
|
398
|
+
def test_view_expression_string_literal_compare_column_null_long_var(self):
|
|
399
|
+
table = Table(
|
|
400
|
+
{
|
|
401
|
+
"a": [
|
|
402
|
+
"abcdefghijklmnopqrstuvwxyz",
|
|
403
|
+
None,
|
|
404
|
+
"abcdefghijklmnopqrstuvwxyz",
|
|
405
|
+
"aabcdefghijklmnopqrstuvwxyz",
|
|
406
|
+
None,
|
|
407
|
+
]
|
|
408
|
+
}
|
|
409
|
+
)
|
|
410
|
+
validated = table.validate_expressions(
|
|
411
|
+
{"computed": " var xyz := 'abcdefghijklmnopqrstuvwxyz'; \"a\" == xyz"}
|
|
412
|
+
)
|
|
413
|
+
assert validated["expression_schema"] == {"computed": "boolean"}
|
|
414
|
+
view = table.view(
|
|
415
|
+
expressions={
|
|
416
|
+
"computed": "var xyz := 'abcdefghijklmnopqrstuvwxyz'; \"a\" == xyz"
|
|
417
|
+
}
|
|
418
|
+
)
|
|
419
|
+
result = view.to_columns()
|
|
420
|
+
assert result["computed"] == [True, False, True, False, False]
|
|
421
|
+
assert view.expression_schema() == {"computed": "boolean"}
|
|
422
|
+
|
|
423
|
+
def test_view_expression_string_literal_compare_if(self):
|
|
424
|
+
table = Table({"a": ["a", "a", "b", "c"]})
|
|
425
|
+
validated = table.validate_expressions({"computed": " if(\"a\" == 'a', 1, 2)"})
|
|
426
|
+
assert validated["expression_schema"] == {"computed": "float"}
|
|
427
|
+
view = table.view(expressions={"computed": "if(\"a\" == 'a', 1, 2)"})
|
|
428
|
+
assert view.to_columns() == {
|
|
429
|
+
"a": ["a", "a", "b", "c"],
|
|
430
|
+
"computed": [1, 1, 2, 2],
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
assert view.expression_schema() == {"computed": "float"}
|
|
434
|
+
|
|
435
|
+
def test_view_expression_string_literal_var(self):
|
|
436
|
+
table = Table({"a": [1, 2, 3]})
|
|
437
|
+
|
|
438
|
+
for _ in range(10):
|
|
439
|
+
view = table.view(
|
|
440
|
+
expressions=[
|
|
441
|
+
"var x := 'Eabcdefghijklmn'; var y := '0123456789'; concat(x, y)"
|
|
442
|
+
]
|
|
443
|
+
)
|
|
444
|
+
assert view.to_columns() == {
|
|
445
|
+
"a": [1, 2, 3],
|
|
446
|
+
"var x := 'Eabcdefghijklmn'; var y := '0123456789'; concat(x, y)": [
|
|
447
|
+
"Eabcdefghijklmn0123456789",
|
|
448
|
+
"Eabcdefghijklmn0123456789",
|
|
449
|
+
"Eabcdefghijklmn0123456789",
|
|
450
|
+
],
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
def test_view_streaming_expression(self):
|
|
454
|
+
def data():
|
|
455
|
+
return [{"a": random()} for _ in range(50)]
|
|
456
|
+
|
|
457
|
+
table = Table(data())
|
|
458
|
+
view = table.view(expressions=["123"])
|
|
459
|
+
|
|
460
|
+
for _ in range(5):
|
|
461
|
+
table.update(data())
|
|
462
|
+
|
|
463
|
+
assert table.size() == 300
|
|
464
|
+
result = view.to_columns()
|
|
465
|
+
assert result["123"] == [123 for _ in range(300)]
|
|
466
|
+
|
|
467
|
+
def test_view_streaming_expression_limit(self):
|
|
468
|
+
def data():
|
|
469
|
+
return [{"a": random()} for _ in range(55)]
|
|
470
|
+
|
|
471
|
+
table = Table(data(), limit=50)
|
|
472
|
+
view = table.view(expressions=["123"])
|
|
473
|
+
|
|
474
|
+
for _ in range(5):
|
|
475
|
+
table.update(data())
|
|
476
|
+
|
|
477
|
+
assert table.size() == 50
|
|
478
|
+
result = view.to_columns()
|
|
479
|
+
assert result["123"] == [123 for _ in range(50)]
|
|
480
|
+
|
|
481
|
+
def test_view_streaming_expression_one(self):
|
|
482
|
+
def data():
|
|
483
|
+
return [{"a": random()} for _ in range(50)]
|
|
484
|
+
|
|
485
|
+
table = Table(data())
|
|
486
|
+
view = table.view(group_by=["c0"], expressions={"c0": '"a" * 2'})
|
|
487
|
+
for _ in range(5):
|
|
488
|
+
table.update(data())
|
|
489
|
+
|
|
490
|
+
assert table.size() == 300
|
|
491
|
+
assert view.expression_schema() == {"c0": "float"}
|
|
492
|
+
|
|
493
|
+
def test_view_streaming_expression_two(self):
|
|
494
|
+
def data():
|
|
495
|
+
return [{"a": random()} for _ in range(50)]
|
|
496
|
+
|
|
497
|
+
table = Table(data())
|
|
498
|
+
view = table.view(
|
|
499
|
+
group_by=["c0"],
|
|
500
|
+
split_by=["c1"],
|
|
501
|
+
expressions={"c0": '"a" * 2', "c1": "'new string'"},
|
|
502
|
+
)
|
|
503
|
+
for i in range(5):
|
|
504
|
+
table.update(data())
|
|
505
|
+
|
|
506
|
+
assert table.size() == 300
|
|
507
|
+
assert view.expression_schema() == {"c0": "float", "c1": "integer"} # pivoted
|
|
508
|
+
|
|
509
|
+
def test_view_expression_create_no_alias(self):
|
|
510
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
511
|
+
view = table.view(expressions=['"a" + "b"'])
|
|
512
|
+
assert view.to_columns() == {
|
|
513
|
+
"a": [1, 2, 3, 4],
|
|
514
|
+
"b": [5, 6, 7, 8],
|
|
515
|
+
'"a" + "b"': [6, 8, 10, 12],
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
assert view.expression_schema() == {'"a" + "b"': "float"}
|
|
519
|
+
|
|
520
|
+
def test_view_expression_should_not_overwrite_real(self):
|
|
521
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
522
|
+
with raises(PerspectiveError) as ex:
|
|
523
|
+
table.view(expressions={"a": 'upper("a")'})
|
|
524
|
+
|
|
525
|
+
assert (
|
|
526
|
+
str(ex.value)
|
|
527
|
+
== 'Abort(): Value Error - expression "a" cannot overwrite an existing column.'
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
def test_legacy_view_duplicate_expression_should_resolve_to_last_alias(self):
|
|
531
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
532
|
+
view = table.view(
|
|
533
|
+
columns=["abc"],
|
|
534
|
+
expressions=['//abc\n"a" + "b"', '//abc\n"a" - "b"'],
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
assert view.to_columns() == {"abc": [-4, -4, -4, -4]}
|
|
538
|
+
|
|
539
|
+
def test_view_expression_multiple_alias(
|
|
540
|
+
self,
|
|
541
|
+
):
|
|
542
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
543
|
+
view = table.view(
|
|
544
|
+
expressions={
|
|
545
|
+
"computed": '"a" + "b"',
|
|
546
|
+
"computed2": '"a" + "b"',
|
|
547
|
+
"computed3": '"a" + "b"',
|
|
548
|
+
"computed4": '"a" + "b"',
|
|
549
|
+
}
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
assert view.schema() == {
|
|
553
|
+
"a": "integer",
|
|
554
|
+
"b": "integer",
|
|
555
|
+
"computed": "float",
|
|
556
|
+
"computed2": "float",
|
|
557
|
+
"computed3": "float",
|
|
558
|
+
"computed4": "float",
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
assert view.expression_schema() == {
|
|
562
|
+
"computed": "float",
|
|
563
|
+
"computed2": "float",
|
|
564
|
+
"computed3": "float",
|
|
565
|
+
"computed4": "float",
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
def test_view_expression_multiple_views_with_the_same_alias_should_not_overwrite(
|
|
569
|
+
self,
|
|
570
|
+
):
|
|
571
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
572
|
+
view = table.view(expressions={"computed": ' "a" + "b"'})
|
|
573
|
+
view2 = table.view(expressions={"computed": ' "a" * "b"'})
|
|
574
|
+
assert view.expression_schema() == {"computed": "float"}
|
|
575
|
+
assert view2.expression_schema() == {
|
|
576
|
+
"computed": "float",
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
assert view.to_columns()["computed"] == [6, 8, 10, 12]
|
|
580
|
+
assert view2.to_columns()["computed"] == [5, 12, 21, 32]
|
|
581
|
+
|
|
582
|
+
def test_view_expression_multiple_views_with_the_same_alias_pivoted(
|
|
583
|
+
self,
|
|
584
|
+
):
|
|
585
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
586
|
+
view = table.view(
|
|
587
|
+
group_by=["computed"],
|
|
588
|
+
aggregates={"computed": ("weighted mean", ["b"])},
|
|
589
|
+
expressions={"computed": ' "a" + "b"'},
|
|
590
|
+
)
|
|
591
|
+
view2 = table.view(
|
|
592
|
+
group_by=["computed"],
|
|
593
|
+
aggregates={"computed": "last"},
|
|
594
|
+
expressions={"computed": "concat('abc', ' ', 'def')"},
|
|
595
|
+
)
|
|
596
|
+
assert view.expression_schema() == {"computed": "float"}
|
|
597
|
+
assert view2.expression_schema() == {
|
|
598
|
+
"computed": "string",
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
result = view.to_columns()
|
|
602
|
+
result2 = view2.to_columns()
|
|
603
|
+
|
|
604
|
+
assert result["__ROW_PATH__"] == [[], [6], [8], [10], [12]]
|
|
605
|
+
assert result2["__ROW_PATH__"] == [[], ["abc def"]]
|
|
606
|
+
|
|
607
|
+
assert result["computed"] == [9.384615384615385, 6, 8, 10, 12]
|
|
608
|
+
assert result2["computed"] == ["abc def", "abc def"]
|
|
609
|
+
|
|
610
|
+
def test_view_expression_multiple_views_with_the_same_alias_all_types(
|
|
611
|
+
self,
|
|
612
|
+
):
|
|
613
|
+
now = datetime.now()
|
|
614
|
+
today = date.today()
|
|
615
|
+
|
|
616
|
+
month_bucketed = datetime(today.year, today.month, 1).timestamp() * 1000
|
|
617
|
+
minute_bucketed = (
|
|
618
|
+
datetime(
|
|
619
|
+
now.year, now.month, now.day, now.hour, now.minute, 0, 0
|
|
620
|
+
).timestamp()
|
|
621
|
+
* 1000
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
table = Table(
|
|
625
|
+
{
|
|
626
|
+
"a": "integer",
|
|
627
|
+
"b": "float",
|
|
628
|
+
"c": "datetime",
|
|
629
|
+
"d": "date",
|
|
630
|
+
"e": "boolean",
|
|
631
|
+
"f": "string",
|
|
632
|
+
}
|
|
633
|
+
)
|
|
634
|
+
|
|
635
|
+
table.update(
|
|
636
|
+
{
|
|
637
|
+
"a": [1, 2, 3, 4],
|
|
638
|
+
"b": [5.5, 6.5, 7.5, 8.5],
|
|
639
|
+
"c": [str(datetime.now()) for _ in range(4)],
|
|
640
|
+
"d": [str(date.today()) for _ in range(4)],
|
|
641
|
+
"e": [True, False, True, False],
|
|
642
|
+
"f": ["a", "b", "c", "d"],
|
|
643
|
+
}
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
view = table.view(
|
|
647
|
+
expressions={
|
|
648
|
+
"computed": '"a" + "b"',
|
|
649
|
+
"computed2": "bucket(\"c\", 'M')",
|
|
650
|
+
"computed3": "concat('a', 'b', 'c')",
|
|
651
|
+
"computed4": "'new string'",
|
|
652
|
+
}
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
view2 = table.view(
|
|
656
|
+
expressions={
|
|
657
|
+
"computed": 'upper("f")',
|
|
658
|
+
"computed2": '20 + ("b" * "a")',
|
|
659
|
+
"computed4": "bucket(\"c\", 'm')",
|
|
660
|
+
}
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
assert view.expression_schema() == {
|
|
664
|
+
"computed": "float",
|
|
665
|
+
"computed2": "date",
|
|
666
|
+
"computed3": "string",
|
|
667
|
+
"computed4": "string",
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
assert view2.expression_schema() == {
|
|
671
|
+
"computed": "string",
|
|
672
|
+
"computed2": "float",
|
|
673
|
+
"computed4": "datetime",
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
result = view.to_columns()
|
|
677
|
+
result2 = view2.to_columns()
|
|
678
|
+
|
|
679
|
+
assert result["computed"] == [6.5, 8.5, 10.5, 12.5]
|
|
680
|
+
assert result2["computed"] == ["A", "B", "C", "D"]
|
|
681
|
+
|
|
682
|
+
assert result["computed2"] == [month_bucketed for _ in range(4)]
|
|
683
|
+
assert result2["computed2"] == [25.5, 33, 42.5, 54]
|
|
684
|
+
|
|
685
|
+
assert result["computed3"] == ["abc", "abc", "abc", "abc"]
|
|
686
|
+
assert "computed3" not in result2
|
|
687
|
+
|
|
688
|
+
assert result["computed4"] == ["new string" for _ in range(4)]
|
|
689
|
+
assert result2["computed4"] == [minute_bucketed for _ in range(4)]
|
|
690
|
+
|
|
691
|
+
def test_view_expression_create_no_columns(self):
|
|
692
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
693
|
+
view = table.view(columns=[], expressions={"computed": ' "a" + "b"'})
|
|
694
|
+
assert view.to_columns() == {}
|
|
695
|
+
assert view.schema() == {}
|
|
696
|
+
|
|
697
|
+
# computed column should still exist
|
|
698
|
+
assert view.expression_schema() == {"computed": "float"}
|
|
699
|
+
|
|
700
|
+
def test_view_expression_create_columns(self):
|
|
701
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
702
|
+
view = table.view(columns=["computed"], expressions={"computed": ' "a" + "b"'})
|
|
703
|
+
assert view.to_columns() == {"computed": [6, 8, 10, 12]}
|
|
704
|
+
assert view.schema() == {"computed": "float"}
|
|
705
|
+
# computed column should still exist
|
|
706
|
+
assert view.expression_schema() == {"computed": "float"}
|
|
707
|
+
|
|
708
|
+
def test_view_expression_create_clear(self):
|
|
709
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
710
|
+
view = table.view(expressions={"computed": ' "a" + "b"'})
|
|
711
|
+
assert view.to_columns() == {
|
|
712
|
+
"a": [1, 2, 3, 4],
|
|
713
|
+
"b": [5, 6, 7, 8],
|
|
714
|
+
"computed": [6, 8, 10, 12],
|
|
715
|
+
}
|
|
716
|
+
table.clear()
|
|
717
|
+
assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
|
|
718
|
+
assert view.to_columns() == {"a": [], "b": [], "computed": []}
|
|
719
|
+
|
|
720
|
+
def test_view_expression_create_replace(self):
|
|
721
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
722
|
+
view = table.view(expressions={"computed": ' "a" + "b"'})
|
|
723
|
+
assert view.to_columns() == {
|
|
724
|
+
"a": [1, 2, 3, 4],
|
|
725
|
+
"b": [5, 6, 7, 8],
|
|
726
|
+
"computed": [6, 8, 10, 12],
|
|
727
|
+
}
|
|
728
|
+
table.replace({"a": [10, 20, 30, 40], "b": [50, 60, 70, 80]})
|
|
729
|
+
assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
|
|
730
|
+
assert view.to_columns() == {
|
|
731
|
+
"a": [10, 20, 30, 40],
|
|
732
|
+
"b": [50, 60, 70, 80],
|
|
733
|
+
"computed": [60, 80, 100, 120],
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
def test_view_expression_multiple_dependents_replace(self):
|
|
737
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
738
|
+
view = table.view(
|
|
739
|
+
expressions={"computed": '"a" + "b"', "final": '("a" + "b") ^ 2'}
|
|
740
|
+
)
|
|
741
|
+
assert view.to_columns() == {
|
|
742
|
+
"a": [1, 2, 3, 4],
|
|
743
|
+
"b": [5, 6, 7, 8],
|
|
744
|
+
"computed": [6, 8, 10, 12],
|
|
745
|
+
"final": [36, 64, 100, 144],
|
|
746
|
+
}
|
|
747
|
+
table.replace({"a": [10, 20, 30, 40], "b": [50, 60, 70, 80]})
|
|
748
|
+
assert view.schema() == {
|
|
749
|
+
"a": "integer",
|
|
750
|
+
"b": "integer",
|
|
751
|
+
"computed": "float",
|
|
752
|
+
"final": "float",
|
|
753
|
+
}
|
|
754
|
+
assert view.to_columns() == {
|
|
755
|
+
"a": [10, 20, 30, 40],
|
|
756
|
+
"b": [50, 60, 70, 80],
|
|
757
|
+
"computed": [60, 80, 100, 120],
|
|
758
|
+
"final": [3600, 6400, 10000, 14400],
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
def test_view_expression_multiple_views_should_not_conflate(self):
|
|
762
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
763
|
+
view = table.view(
|
|
764
|
+
expressions={
|
|
765
|
+
"computed": '"a" + "b"',
|
|
766
|
+
}
|
|
767
|
+
)
|
|
768
|
+
|
|
769
|
+
view2 = table.view(expressions={"computed2": ' "a" - "b"'})
|
|
770
|
+
assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
|
|
771
|
+
assert view2.schema() == {"a": "integer", "b": "integer", "computed2": "float"}
|
|
772
|
+
assert view.to_columns() == {
|
|
773
|
+
"a": [1, 2, 3, 4],
|
|
774
|
+
"b": [5, 6, 7, 8],
|
|
775
|
+
"computed": [6, 8, 10, 12],
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
assert view2.to_columns() == {
|
|
779
|
+
"a": [1, 2, 3, 4],
|
|
780
|
+
"b": [5, 6, 7, 8],
|
|
781
|
+
"computed2": [-4, -4, -4, -4],
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
def test_view_expression_multiple_views_should_all_clear(self):
|
|
785
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
786
|
+
|
|
787
|
+
view = table.view(
|
|
788
|
+
expressions={
|
|
789
|
+
"computed": '"a" + "b"',
|
|
790
|
+
}
|
|
791
|
+
)
|
|
792
|
+
|
|
793
|
+
view2 = table.view(expressions={"computed2": ' "a" - "b"'})
|
|
794
|
+
assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
|
|
795
|
+
assert view2.schema() == {"a": "integer", "b": "integer", "computed2": "float"}
|
|
796
|
+
assert view.to_columns() == {
|
|
797
|
+
"a": [1, 2, 3, 4],
|
|
798
|
+
"b": [5, 6, 7, 8],
|
|
799
|
+
"computed": [6, 8, 10, 12],
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
assert view2.to_columns() == {
|
|
803
|
+
"a": [1, 2, 3, 4],
|
|
804
|
+
"b": [5, 6, 7, 8],
|
|
805
|
+
"computed2": [-4, -4, -4, -4],
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
table.clear()
|
|
809
|
+
|
|
810
|
+
assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
|
|
811
|
+
|
|
812
|
+
assert view2.schema() == {"a": "integer", "b": "integer", "computed2": "float"}
|
|
813
|
+
|
|
814
|
+
assert view.to_columns() == {"a": [], "b": [], "computed": []}
|
|
815
|
+
|
|
816
|
+
assert view2.to_columns() == {"a": [], "b": [], "computed2": []}
|
|
817
|
+
|
|
818
|
+
def test_view_expression_multiple_views_should_all_replace(self):
|
|
819
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
820
|
+
view = table.view(
|
|
821
|
+
expressions={
|
|
822
|
+
"computed": '"a" + "b"',
|
|
823
|
+
}
|
|
824
|
+
)
|
|
825
|
+
|
|
826
|
+
view2 = table.view(expressions={"computed2": ' "a" - "b"'})
|
|
827
|
+
assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
|
|
828
|
+
assert view2.schema() == {"a": "integer", "b": "integer", "computed2": "float"}
|
|
829
|
+
assert view.to_columns() == {
|
|
830
|
+
"a": [1, 2, 3, 4],
|
|
831
|
+
"b": [5, 6, 7, 8],
|
|
832
|
+
"computed": [6, 8, 10, 12],
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
assert view2.to_columns() == {
|
|
836
|
+
"a": [1, 2, 3, 4],
|
|
837
|
+
"b": [5, 6, 7, 8],
|
|
838
|
+
"computed2": [-4, -4, -4, -4],
|
|
839
|
+
}
|
|
840
|
+
|
|
841
|
+
table.replace({"a": [10, 20, 30, 40], "b": [50, 60, 70, 80]})
|
|
842
|
+
assert view.to_columns() == {
|
|
843
|
+
"a": [10, 20, 30, 40],
|
|
844
|
+
"b": [50, 60, 70, 80],
|
|
845
|
+
"computed": [60, 80, 100, 120],
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
assert view2.to_columns() == {
|
|
849
|
+
"a": [10, 20, 30, 40],
|
|
850
|
+
"b": [50, 60, 70, 80],
|
|
851
|
+
"computed2": [-40, -40, -40, -40],
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
def test_view_expression_delete_and_create(self):
|
|
855
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
856
|
+
view = table.view(
|
|
857
|
+
expressions={
|
|
858
|
+
"computed": '"a" + "b"',
|
|
859
|
+
}
|
|
860
|
+
)
|
|
861
|
+
|
|
862
|
+
assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
|
|
863
|
+
assert view.to_columns() == {
|
|
864
|
+
"a": [1, 2, 3, 4],
|
|
865
|
+
"b": [5, 6, 7, 8],
|
|
866
|
+
"computed": [6, 8, 10, 12],
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
view.delete()
|
|
870
|
+
view2 = table.view(expressions={"computed": ' "a" - "b"'})
|
|
871
|
+
assert view2.schema() == {"a": "integer", "b": "integer", "computed": "float"}
|
|
872
|
+
assert view2.to_columns() == {
|
|
873
|
+
"a": [1, 2, 3, 4],
|
|
874
|
+
"b": [5, 6, 7, 8],
|
|
875
|
+
"computed": [-4, -4, -4, -4],
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
def test_view_expression_delete_and_create_with_updates(self):
|
|
879
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
880
|
+
view = table.view(
|
|
881
|
+
expressions={
|
|
882
|
+
"computed": ' "a" + "b"',
|
|
883
|
+
"upper(concat('abc', 'def'))": "upper(concat('abc', 'def'))",
|
|
884
|
+
}
|
|
885
|
+
)
|
|
886
|
+
assert view.schema() == {
|
|
887
|
+
"a": "integer",
|
|
888
|
+
"b": "integer",
|
|
889
|
+
"computed": "float",
|
|
890
|
+
"upper(concat('abc', 'def'))": "string",
|
|
891
|
+
}
|
|
892
|
+
table.update({"a": [5, 6], "b": [9, 10]})
|
|
893
|
+
assert view.to_columns() == {
|
|
894
|
+
"a": [1, 2, 3, 4, 5, 6],
|
|
895
|
+
"b": [5, 6, 7, 8, 9, 10],
|
|
896
|
+
"computed": [6, 8, 10, 12, 14, 16],
|
|
897
|
+
"upper(concat('abc', 'def'))": ["ABCDEF" for _ in range(6)],
|
|
898
|
+
}
|
|
899
|
+
view.delete()
|
|
900
|
+
view2 = table.view(
|
|
901
|
+
expressions={
|
|
902
|
+
"computed2": '"a" - "b"',
|
|
903
|
+
}
|
|
904
|
+
)
|
|
905
|
+
|
|
906
|
+
assert view2.schema() == {"a": "integer", "b": "integer", "computed2": "float"}
|
|
907
|
+
table.update({"a": [5, 6], "b": [9, 10]})
|
|
908
|
+
table.update({"a": [5, 6], "b": [9, 10]})
|
|
909
|
+
assert view2.to_columns() == {
|
|
910
|
+
"a": [1, 2, 3, 4, 5, 6, 5, 6, 5, 6],
|
|
911
|
+
"b": [5, 6, 7, 8, 9, 10, 9, 10, 9, 10],
|
|
912
|
+
"computed2": [-4, -4, -4, -4, -4, -4, -4, -4, -4, -4],
|
|
913
|
+
}
|
|
914
|
+
|
|
915
|
+
def test_view_expression_append(self):
|
|
916
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
917
|
+
|
|
918
|
+
view = table.view(
|
|
919
|
+
expressions={
|
|
920
|
+
"computed": '"a" + "b"',
|
|
921
|
+
}
|
|
922
|
+
)
|
|
923
|
+
|
|
924
|
+
assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
|
|
925
|
+
assert view.to_columns() == {
|
|
926
|
+
"a": [1, 2, 3, 4],
|
|
927
|
+
"b": [5, 6, 7, 8],
|
|
928
|
+
"computed": [6, 8, 10, 12],
|
|
929
|
+
}
|
|
930
|
+
|
|
931
|
+
table.update({"a": [5, 6], "b": [9, 10]})
|
|
932
|
+
|
|
933
|
+
assert view.to_columns() == {
|
|
934
|
+
"a": [1, 2, 3, 4, 5, 6],
|
|
935
|
+
"b": [5, 6, 7, 8, 9, 10],
|
|
936
|
+
"computed": [6, 8, 10, 12, 14, 16],
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
def test_view_expression_delta_zero(self, util):
|
|
940
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
941
|
+
|
|
942
|
+
view = table.view(
|
|
943
|
+
expressions={
|
|
944
|
+
"computed": '"a" + "b"',
|
|
945
|
+
}
|
|
946
|
+
)
|
|
947
|
+
|
|
948
|
+
assert view.schema() == {"a": "integer", "b": "integer", "computed": "float"}
|
|
949
|
+
|
|
950
|
+
assert view.to_columns() == {
|
|
951
|
+
"a": [1, 2, 3, 4],
|
|
952
|
+
"b": [5, 6, 7, 8],
|
|
953
|
+
"computed": [6, 8, 10, 12],
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
def updater(port, delta):
|
|
957
|
+
compare_delta(delta, {"a": [5, 6], "b": [9, 10]})
|
|
958
|
+
|
|
959
|
+
table.update({"a": [5, 6], "b": [9, 10]})
|
|
960
|
+
|
|
961
|
+
assert view.to_columns() == {
|
|
962
|
+
"a": [1, 2, 3, 4, 5, 6],
|
|
963
|
+
"b": [5, 6, 7, 8, 9, 10],
|
|
964
|
+
"computed": [6, 8, 10, 12, 14, 16],
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
def test_view_delete_with_scope(self):
|
|
968
|
+
"""Tests that `View`'s `__del__` method, when called by the Python
|
|
969
|
+
reference counter, leaves an empty `Table` in a clean state.
|
|
970
|
+
"""
|
|
971
|
+
table = Table(
|
|
972
|
+
{"id": "integer", "msg": "string", "val": "float"},
|
|
973
|
+
index="id",
|
|
974
|
+
)
|
|
975
|
+
|
|
976
|
+
table.view(
|
|
977
|
+
expressions={
|
|
978
|
+
"inverted": '1 / "val"',
|
|
979
|
+
},
|
|
980
|
+
columns=["inverted"],
|
|
981
|
+
)
|
|
982
|
+
table.update(
|
|
983
|
+
[
|
|
984
|
+
{
|
|
985
|
+
"id": 1,
|
|
986
|
+
"msg": "test",
|
|
987
|
+
"val": 1.0,
|
|
988
|
+
}
|
|
989
|
+
]
|
|
990
|
+
)
|
|
991
|
+
|
|
992
|
+
def test_view_expression_with_custom_columns(self):
|
|
993
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
994
|
+
view = table.view(
|
|
995
|
+
columns=["computed", "b"],
|
|
996
|
+
expressions={
|
|
997
|
+
"computed": '"a" + "b"',
|
|
998
|
+
},
|
|
999
|
+
)
|
|
1000
|
+
assert view.to_columns() == {
|
|
1001
|
+
"b": [5, 6, 7, 8],
|
|
1002
|
+
"computed": [6, 8, 10, 12],
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
def test_view_expression_with_group_by(self):
|
|
1006
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
1007
|
+
view = table.view(
|
|
1008
|
+
group_by=["computed"],
|
|
1009
|
+
expressions={
|
|
1010
|
+
"computed": '"a" + "b"',
|
|
1011
|
+
},
|
|
1012
|
+
)
|
|
1013
|
+
assert view.to_columns() == {
|
|
1014
|
+
"__ROW_PATH__": [[], [6], [8], [10], [12]],
|
|
1015
|
+
"a": [10, 1, 2, 3, 4],
|
|
1016
|
+
"b": [26, 5, 6, 7, 8],
|
|
1017
|
+
"computed": [36.0, 6.0, 8.0, 10.0, 12.0],
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
def test_view_expression_with_group_by_clear(self):
|
|
1021
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
1022
|
+
|
|
1023
|
+
view = table.view(
|
|
1024
|
+
group_by=["computed"],
|
|
1025
|
+
expressions={
|
|
1026
|
+
"computed": '"a" + "b"',
|
|
1027
|
+
},
|
|
1028
|
+
)
|
|
1029
|
+
|
|
1030
|
+
assert view.to_columns() == {
|
|
1031
|
+
"__ROW_PATH__": [[], [6], [8], [10], [12]],
|
|
1032
|
+
"a": [10, 1, 2, 3, 4],
|
|
1033
|
+
"b": [26, 5, 6, 7, 8],
|
|
1034
|
+
"computed": [36.0, 6.0, 8.0, 10.0, 12.0],
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
table.clear()
|
|
1038
|
+
|
|
1039
|
+
assert view.to_columns() == {
|
|
1040
|
+
"__ROW_PATH__": [[]],
|
|
1041
|
+
"a": [None],
|
|
1042
|
+
"b": [None],
|
|
1043
|
+
"computed": [None],
|
|
1044
|
+
}
|
|
1045
|
+
|
|
1046
|
+
def test_view_expression_with_group_by_replace(self):
|
|
1047
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
1048
|
+
|
|
1049
|
+
view = table.view(
|
|
1050
|
+
group_by=["computed"],
|
|
1051
|
+
expressions={
|
|
1052
|
+
"computed": '"a" + "b"',
|
|
1053
|
+
},
|
|
1054
|
+
)
|
|
1055
|
+
|
|
1056
|
+
assert view.to_columns() == {
|
|
1057
|
+
"__ROW_PATH__": [[], [6], [8], [10], [12]],
|
|
1058
|
+
"a": [10, 1, 2, 3, 4],
|
|
1059
|
+
"b": [26, 5, 6, 7, 8],
|
|
1060
|
+
"computed": [36.0, 6.0, 8.0, 10.0, 12.0],
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1063
|
+
table.replace({"a": [10, 20, 30, 40], "b": [50, 60, 70, 80]})
|
|
1064
|
+
|
|
1065
|
+
assert view.to_columns() == {
|
|
1066
|
+
"__ROW_PATH__": [[], [60], [80], [100], [120]],
|
|
1067
|
+
"a": [100, 10, 20, 30, 40],
|
|
1068
|
+
"b": [260, 50, 60, 70, 80],
|
|
1069
|
+
"computed": [360.0, 60.0, 80.0, 100.0, 120.0],
|
|
1070
|
+
}
|
|
1071
|
+
|
|
1072
|
+
def test_view_expression_with_split_by(self):
|
|
1073
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
1074
|
+
view = table.view(
|
|
1075
|
+
split_by=["computed"],
|
|
1076
|
+
expressions={
|
|
1077
|
+
"computed": '"a" + "b"',
|
|
1078
|
+
},
|
|
1079
|
+
)
|
|
1080
|
+
assert view.to_columns() == {
|
|
1081
|
+
"6|a": [1, None, None, None],
|
|
1082
|
+
"6|b": [5, None, None, None],
|
|
1083
|
+
"6|computed": [6, None, None, None],
|
|
1084
|
+
"8|a": [None, 2, None, None],
|
|
1085
|
+
"8|b": [None, 6, None, None],
|
|
1086
|
+
"8|computed": [None, 8, None, None],
|
|
1087
|
+
"10|a": [None, None, 3, None],
|
|
1088
|
+
"10|b": [None, None, 7, None],
|
|
1089
|
+
"10|computed": [None, None, 10.0, None],
|
|
1090
|
+
"12|a": [None, None, None, 4],
|
|
1091
|
+
"12|b": [None, None, None, 8],
|
|
1092
|
+
"12|computed": [None, None, None, 12.0],
|
|
1093
|
+
}
|
|
1094
|
+
|
|
1095
|
+
def test_view_expression_with_row_split_by(self):
|
|
1096
|
+
table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]})
|
|
1097
|
+
view = table.view(
|
|
1098
|
+
split_by=["computed"],
|
|
1099
|
+
expressions={
|
|
1100
|
+
"computed": '"a" + "b"',
|
|
1101
|
+
},
|
|
1102
|
+
)
|
|
1103
|
+
assert view.to_columns() == {
|
|
1104
|
+
"6|a": [1, None, None, None],
|
|
1105
|
+
"6|b": [5, None, None, None],
|
|
1106
|
+
"6|computed": [6.0, None, None, None],
|
|
1107
|
+
"8|a": [None, 2, None, None],
|
|
1108
|
+
"8|b": [None, 6, None, None],
|
|
1109
|
+
"8|computed": [None, 8.0, None, None],
|
|
1110
|
+
"10|a": [None, None, 3, None],
|
|
1111
|
+
"10|b": [None, None, 7, None],
|
|
1112
|
+
"10|computed": [None, None, 10.0, None],
|
|
1113
|
+
"12|a": [None, None, None, 4],
|
|
1114
|
+
"12|b": [None, None, None, 8],
|
|
1115
|
+
"12|computed": [None, None, None, 12.0],
|
|
1116
|
+
}
|
|
1117
|
+
|
|
1118
|
+
def test_view_expression_with_sort(self):
|
|
1119
|
+
table = Table({"a": ["a", "ab", "abc", "abcd"]})
|
|
1120
|
+
view = table.view(
|
|
1121
|
+
sort=[["computed", "desc"]], expressions={"computed": 'length("a")'}
|
|
1122
|
+
)
|
|
1123
|
+
assert view.to_columns() == {
|
|
1124
|
+
"a": ["abcd", "abc", "ab", "a"],
|
|
1125
|
+
"computed": [4, 3, 2, 1],
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
def test_view_expression_with_filter(self):
|
|
1129
|
+
table = Table({"a": ["a", "ab", "abc", "abcd"]})
|
|
1130
|
+
view = table.view(
|
|
1131
|
+
filter=[["computed", ">=", 3]], expressions={"computed": 'length("a")'}
|
|
1132
|
+
)
|
|
1133
|
+
assert view.to_columns() == {"a": ["abc", "abcd"], "computed": [3, 4]}
|
|
1134
|
+
|
|
1135
|
+
def test_view_day_of_week_date(self, util):
|
|
1136
|
+
table = Table({"a": [date(2020, 3, i) for i in range(9, 14)]})
|
|
1137
|
+
view = table.view(expressions={"bucket": 'day_of_week("a")'})
|
|
1138
|
+
assert view.schema() == {"a": "date", "bucket": "string"}
|
|
1139
|
+
assert view.to_columns() == {
|
|
1140
|
+
"a": [util.to_timestamp(datetime(2020, 3, i)) for i in range(9, 14)],
|
|
1141
|
+
"bucket": [
|
|
1142
|
+
"2 Monday",
|
|
1143
|
+
"3 Tuesday",
|
|
1144
|
+
"4 Wednesday",
|
|
1145
|
+
"5 Thursday",
|
|
1146
|
+
"6 Friday",
|
|
1147
|
+
],
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
def test_view_day_of_week_datetime(self, util):
|
|
1151
|
+
table = Table({"a": [datetime(2020, 3, i, 12, 30) for i in range(9, 14)]})
|
|
1152
|
+
view = table.view(expressions={"bucket": 'day_of_week("a")'})
|
|
1153
|
+
assert view.schema() == {"a": "datetime", "bucket": "string"}
|
|
1154
|
+
assert view.to_columns() == {
|
|
1155
|
+
"a": [
|
|
1156
|
+
util.to_timestamp(datetime(2020, 3, i, 12, 30)) for i in range(9, 14)
|
|
1157
|
+
],
|
|
1158
|
+
"bucket": [
|
|
1159
|
+
"2 Monday",
|
|
1160
|
+
"3 Tuesday",
|
|
1161
|
+
"4 Wednesday",
|
|
1162
|
+
"5 Thursday",
|
|
1163
|
+
"6 Friday",
|
|
1164
|
+
],
|
|
1165
|
+
}
|
|
1166
|
+
|
|
1167
|
+
def test_view_month_of_year_date(self, util):
|
|
1168
|
+
table = Table({"a": [date(2020, i, 15) for i in range(1, 13)]})
|
|
1169
|
+
view = table.view(expressions={"bucket": 'month_of_year("a")'})
|
|
1170
|
+
assert view.schema() == {"a": "date", "bucket": "string"}
|
|
1171
|
+
assert view.to_columns() == {
|
|
1172
|
+
"a": [util.to_timestamp(datetime(2020, i, 15)) for i in range(1, 13)],
|
|
1173
|
+
"bucket": [
|
|
1174
|
+
"01 January",
|
|
1175
|
+
"02 February",
|
|
1176
|
+
"03 March",
|
|
1177
|
+
"04 April",
|
|
1178
|
+
"05 May",
|
|
1179
|
+
"06 June",
|
|
1180
|
+
"07 July",
|
|
1181
|
+
"08 August",
|
|
1182
|
+
"09 September",
|
|
1183
|
+
"10 October",
|
|
1184
|
+
"11 November",
|
|
1185
|
+
"12 December",
|
|
1186
|
+
],
|
|
1187
|
+
}
|
|
1188
|
+
|
|
1189
|
+
# XXX: these datetimes are being interpreted as dates!!
|
|
1190
|
+
# to get around this, I explicitly gave a scheme to `table.`
|
|
1191
|
+
def test_view_month_of_year_datetime(self, util):
|
|
1192
|
+
table = Table({"a": "datetime"})
|
|
1193
|
+
table.update(
|
|
1194
|
+
{
|
|
1195
|
+
"a": [datetime(2020, i, 15) for i in range(1, 13)],
|
|
1196
|
+
}
|
|
1197
|
+
)
|
|
1198
|
+
view = table.view(expressions={"bucket": 'month_of_year("a")'})
|
|
1199
|
+
assert view.schema() == {"a": "datetime", "bucket": "string"}
|
|
1200
|
+
assert view.to_columns() == {
|
|
1201
|
+
"a": [util.to_timestamp(datetime(2020, i, 15)) for i in range(1, 13)],
|
|
1202
|
+
"bucket": [
|
|
1203
|
+
"01 January",
|
|
1204
|
+
"02 February",
|
|
1205
|
+
"03 March",
|
|
1206
|
+
"04 April",
|
|
1207
|
+
"05 May",
|
|
1208
|
+
"06 June",
|
|
1209
|
+
"07 July",
|
|
1210
|
+
"08 August",
|
|
1211
|
+
"09 September",
|
|
1212
|
+
"10 October",
|
|
1213
|
+
"11 November",
|
|
1214
|
+
"12 December",
|
|
1215
|
+
],
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
# bucketing
|
|
1219
|
+
def test_view_day_bucket_date(self, util):
|
|
1220
|
+
table = Table(
|
|
1221
|
+
{
|
|
1222
|
+
"a": [
|
|
1223
|
+
date(2020, 1, 1),
|
|
1224
|
+
date(2020, 1, 1),
|
|
1225
|
+
date(2020, 2, 29),
|
|
1226
|
+
date(2020, 3, 1),
|
|
1227
|
+
],
|
|
1228
|
+
}
|
|
1229
|
+
)
|
|
1230
|
+
view = table.view(expressions={"bucket": "bucket(\"a\", 'D')"})
|
|
1231
|
+
assert view.schema() == {"a": "date", "bucket": "date"}
|
|
1232
|
+
assert view.to_columns() == {
|
|
1233
|
+
"a": [
|
|
1234
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1235
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1236
|
+
util.to_timestamp(datetime(2020, 2, 29)),
|
|
1237
|
+
util.to_timestamp(datetime(2020, 3, 1)),
|
|
1238
|
+
],
|
|
1239
|
+
"bucket": [
|
|
1240
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1241
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1242
|
+
util.to_timestamp(datetime(2020, 2, 29)),
|
|
1243
|
+
util.to_timestamp(datetime(2020, 3, 1)),
|
|
1244
|
+
],
|
|
1245
|
+
}
|
|
1246
|
+
|
|
1247
|
+
def test_view_day_bucket_date_with_null(self, util):
|
|
1248
|
+
table = Table(
|
|
1249
|
+
{
|
|
1250
|
+
"a": [
|
|
1251
|
+
date(2020, 1, 1),
|
|
1252
|
+
None,
|
|
1253
|
+
date(2020, 2, 29),
|
|
1254
|
+
date(2020, 3, 15),
|
|
1255
|
+
],
|
|
1256
|
+
}
|
|
1257
|
+
)
|
|
1258
|
+
view = table.view(expressions={"bucket": "bucket(\"a\", 'D')"})
|
|
1259
|
+
assert view.schema() == {"a": "date", "bucket": "date"}
|
|
1260
|
+
assert view.to_columns() == {
|
|
1261
|
+
"a": [
|
|
1262
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1263
|
+
None,
|
|
1264
|
+
util.to_timestamp(datetime(2020, 2, 29)),
|
|
1265
|
+
util.to_timestamp(datetime(2020, 3, 15)),
|
|
1266
|
+
],
|
|
1267
|
+
"bucket": [
|
|
1268
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1269
|
+
None,
|
|
1270
|
+
util.to_timestamp(datetime(2020, 2, 29)),
|
|
1271
|
+
util.to_timestamp(datetime(2020, 3, 15)),
|
|
1272
|
+
],
|
|
1273
|
+
}
|
|
1274
|
+
|
|
1275
|
+
def test_view_day_bucket_datetime(self, util):
|
|
1276
|
+
table = Table(
|
|
1277
|
+
{
|
|
1278
|
+
"a": [
|
|
1279
|
+
datetime(2020, 1, 1, 5),
|
|
1280
|
+
datetime(2020, 1, 1, 23),
|
|
1281
|
+
datetime(2020, 2, 29, 1),
|
|
1282
|
+
datetime(2020, 3, 1, 0),
|
|
1283
|
+
],
|
|
1284
|
+
}
|
|
1285
|
+
)
|
|
1286
|
+
view = table.view(expressions={"bucket": "bucket(\"a\", 'D')"})
|
|
1287
|
+
assert view.schema() == {"a": "datetime", "bucket": "date"}
|
|
1288
|
+
assert view.to_columns() == {
|
|
1289
|
+
"a": [
|
|
1290
|
+
util.to_timestamp(datetime(2020, 1, 1, 5)),
|
|
1291
|
+
util.to_timestamp(datetime(2020, 1, 1, 23)),
|
|
1292
|
+
util.to_timestamp(datetime(2020, 2, 29, 1)),
|
|
1293
|
+
util.to_timestamp(datetime(2020, 3, 1, 0)),
|
|
1294
|
+
],
|
|
1295
|
+
"bucket": [
|
|
1296
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1297
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1298
|
+
util.to_timestamp(datetime(2020, 2, 29)),
|
|
1299
|
+
util.to_timestamp(datetime(2020, 3, 1)),
|
|
1300
|
+
],
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
def test_view_month_bucket_date(self, util):
|
|
1304
|
+
table = Table({"a": "date"})
|
|
1305
|
+
table.update(
|
|
1306
|
+
{
|
|
1307
|
+
"a": [
|
|
1308
|
+
str(date(2020, 1, 1)),
|
|
1309
|
+
str(date(2020, 1, 28)),
|
|
1310
|
+
str(date(2020, 2, 29)),
|
|
1311
|
+
str(date(2020, 3, 15)),
|
|
1312
|
+
],
|
|
1313
|
+
}
|
|
1314
|
+
)
|
|
1315
|
+
view = table.view(expressions={"bucket": "bucket(\"a\", 'M')"})
|
|
1316
|
+
assert view.schema() == {"a": "date", "bucket": "date"}
|
|
1317
|
+
assert view.to_columns() == {
|
|
1318
|
+
"a": [
|
|
1319
|
+
(datetime(2020, 1, 1).timestamp() * 1000),
|
|
1320
|
+
(datetime(2020, 1, 28).timestamp() * 1000),
|
|
1321
|
+
(datetime(2020, 2, 29).timestamp() * 1000),
|
|
1322
|
+
(datetime(2020, 3, 15).timestamp() * 1000),
|
|
1323
|
+
],
|
|
1324
|
+
"bucket": [
|
|
1325
|
+
(datetime(2020, 1, 1).timestamp() * 1000),
|
|
1326
|
+
(datetime(2020, 1, 1).timestamp() * 1000),
|
|
1327
|
+
(datetime(2020, 2, 1).timestamp() * 1000),
|
|
1328
|
+
(datetime(2020, 3, 1).timestamp() * 1000),
|
|
1329
|
+
],
|
|
1330
|
+
}
|
|
1331
|
+
|
|
1332
|
+
def test_view_month_bucket_date_with_null(self, util):
|
|
1333
|
+
table = Table(
|
|
1334
|
+
{
|
|
1335
|
+
"a": [
|
|
1336
|
+
date(2020, 1, 1),
|
|
1337
|
+
None,
|
|
1338
|
+
date(2020, 2, 29),
|
|
1339
|
+
date(2020, 3, 15),
|
|
1340
|
+
],
|
|
1341
|
+
}
|
|
1342
|
+
)
|
|
1343
|
+
view = table.view(expressions={"bucket": "bucket(\"a\", 'M')"})
|
|
1344
|
+
assert view.schema() == {"a": "date", "bucket": "date"}
|
|
1345
|
+
assert view.to_columns() == {
|
|
1346
|
+
"a": [
|
|
1347
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1348
|
+
None,
|
|
1349
|
+
util.to_timestamp(datetime(2020, 2, 29)),
|
|
1350
|
+
util.to_timestamp(datetime(2020, 3, 15)),
|
|
1351
|
+
],
|
|
1352
|
+
"bucket": [
|
|
1353
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1354
|
+
None,
|
|
1355
|
+
util.to_timestamp(datetime(2020, 2, 1)),
|
|
1356
|
+
util.to_timestamp(datetime(2020, 3, 1)),
|
|
1357
|
+
],
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
def test_view_month_bucket_datetime(self, util):
|
|
1361
|
+
table = Table({"a": "datetime"})
|
|
1362
|
+
table.update(
|
|
1363
|
+
{
|
|
1364
|
+
"a": [
|
|
1365
|
+
datetime(2020, 1, 1),
|
|
1366
|
+
datetime(2020, 1, 28),
|
|
1367
|
+
datetime(2020, 2, 29),
|
|
1368
|
+
datetime(2020, 3, 15),
|
|
1369
|
+
],
|
|
1370
|
+
}
|
|
1371
|
+
)
|
|
1372
|
+
view = table.view(expressions={"bucket": "bucket(\"a\", 'M')"})
|
|
1373
|
+
assert view.schema() == {"a": "datetime", "bucket": "date"}
|
|
1374
|
+
assert view.to_columns() == {
|
|
1375
|
+
"a": [
|
|
1376
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1377
|
+
util.to_timestamp(datetime(2020, 1, 28)),
|
|
1378
|
+
util.to_timestamp(datetime(2020, 2, 29)),
|
|
1379
|
+
util.to_timestamp(datetime(2020, 3, 15)),
|
|
1380
|
+
],
|
|
1381
|
+
"bucket": [
|
|
1382
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1383
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1384
|
+
util.to_timestamp(datetime(2020, 2, 1)),
|
|
1385
|
+
util.to_timestamp(datetime(2020, 3, 1)),
|
|
1386
|
+
],
|
|
1387
|
+
}
|
|
1388
|
+
|
|
1389
|
+
def test_view_month_bucket_datetime_with_null(self, util):
|
|
1390
|
+
table = Table({"a": "datetime"})
|
|
1391
|
+
table.update(
|
|
1392
|
+
{
|
|
1393
|
+
"a": [datetime(2020, 1, 1), None, None, datetime(2020, 3, 15)],
|
|
1394
|
+
}
|
|
1395
|
+
)
|
|
1396
|
+
view = table.view(expressions={"bucket": "bucket(\"a\", 'M')"})
|
|
1397
|
+
assert view.schema() == {"a": "datetime", "bucket": "date"}
|
|
1398
|
+
assert view.to_columns() == {
|
|
1399
|
+
"a": [
|
|
1400
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1401
|
+
None,
|
|
1402
|
+
None,
|
|
1403
|
+
util.to_timestamp(datetime(2020, 3, 15)),
|
|
1404
|
+
],
|
|
1405
|
+
"bucket": [
|
|
1406
|
+
util.to_timestamp(datetime(2020, 1, 1)),
|
|
1407
|
+
None,
|
|
1408
|
+
None,
|
|
1409
|
+
util.to_timestamp(datetime(2020, 3, 1)),
|
|
1410
|
+
],
|
|
1411
|
+
}
|
|
1412
|
+
|
|
1413
|
+
def test_view_integer_expression(self):
|
|
1414
|
+
table = Table({"x": "integer", "y": "date", "z": "float"})
|
|
1415
|
+
|
|
1416
|
+
view = table.view(
|
|
1417
|
+
expressions={
|
|
1418
|
+
"computed": "integer(2147483648)",
|
|
1419
|
+
"computed2": "integer(-2147483649)",
|
|
1420
|
+
"computed3": "integer(123.456)",
|
|
1421
|
+
"computed4": 'integer("x")',
|
|
1422
|
+
"computed5": 'integer("y")',
|
|
1423
|
+
"computed6": 'integer("z")',
|
|
1424
|
+
}
|
|
1425
|
+
)
|
|
1426
|
+
|
|
1427
|
+
table.update({"x": [12136582], "y": [date(2020, 6, 30)], "z": [1.23456]})
|
|
1428
|
+
|
|
1429
|
+
assert view.expression_schema() == {
|
|
1430
|
+
"computed": "integer",
|
|
1431
|
+
"computed2": "integer",
|
|
1432
|
+
"computed3": "integer",
|
|
1433
|
+
"computed4": "integer",
|
|
1434
|
+
"computed5": "integer",
|
|
1435
|
+
"computed6": "integer",
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
result = view.to_columns()
|
|
1439
|
+
|
|
1440
|
+
assert result["computed"] == [2147483648]
|
|
1441
|
+
assert result["computed2"] == [-2147483649]
|
|
1442
|
+
assert result["computed3"] == [123]
|
|
1443
|
+
assert result["computed4"] == [12136582]
|
|
1444
|
+
assert result["computed5"] == [132384030]
|
|
1445
|
+
assert result["computed6"] == [1]
|
|
1446
|
+
|
|
1447
|
+
def test_view_float_expression(self):
|
|
1448
|
+
table = Table({"w": "datetime", "x": "integer", "y": "date", "z": "float"})
|
|
1449
|
+
|
|
1450
|
+
view = table.view(
|
|
1451
|
+
expressions={
|
|
1452
|
+
"computed": "float(2147483648)",
|
|
1453
|
+
"computed2": "float(-2147483649)",
|
|
1454
|
+
"computed3": "float(123.456789123)",
|
|
1455
|
+
"computed4": 'float("x")',
|
|
1456
|
+
"computed5": 'float("y")',
|
|
1457
|
+
"computed6": 'float("z")',
|
|
1458
|
+
"computed7": 'float("w")',
|
|
1459
|
+
}
|
|
1460
|
+
)
|
|
1461
|
+
|
|
1462
|
+
dt = datetime(2018, 8, 12, 15, 32, 55)
|
|
1463
|
+
|
|
1464
|
+
table.update(
|
|
1465
|
+
{"w": [dt], "x": [12136582], "y": [date(2020, 6, 30)], "z": [1.23456]}
|
|
1466
|
+
)
|
|
1467
|
+
|
|
1468
|
+
assert view.expression_schema() == {
|
|
1469
|
+
"computed": "float",
|
|
1470
|
+
"computed2": "float",
|
|
1471
|
+
"computed3": "float",
|
|
1472
|
+
"computed4": "float",
|
|
1473
|
+
"computed5": "float",
|
|
1474
|
+
"computed6": "float",
|
|
1475
|
+
"computed7": "float",
|
|
1476
|
+
}
|
|
1477
|
+
|
|
1478
|
+
result = view.to_columns()
|
|
1479
|
+
|
|
1480
|
+
seconds_timestamp = mktime(dt.timetuple()) + dt.microsecond / 1000000.0
|
|
1481
|
+
ms_timestamp = int(seconds_timestamp * 1000)
|
|
1482
|
+
|
|
1483
|
+
assert result["computed"] == [2147483648]
|
|
1484
|
+
assert result["computed2"] == [-2147483649]
|
|
1485
|
+
assert result["computed3"] == [123.456789123]
|
|
1486
|
+
assert result["computed4"] == [12136582]
|
|
1487
|
+
assert result["computed5"] == [132384030]
|
|
1488
|
+
assert result["computed6"] == [1.23456]
|
|
1489
|
+
assert result["computed7"] == [ms_timestamp]
|
|
1490
|
+
|
|
1491
|
+
def test_view_date_expression(self, util):
|
|
1492
|
+
table = Table({"x": [1]})
|
|
1493
|
+
view = table.view(
|
|
1494
|
+
expressions={
|
|
1495
|
+
"computed": " date(2020, 5, 30)",
|
|
1496
|
+
"computed2": "date(1997, 8, 31)",
|
|
1497
|
+
}
|
|
1498
|
+
)
|
|
1499
|
+
assert view.expression_schema() == {"computed": "date", "computed2": "date"}
|
|
1500
|
+
result = view.to_columns()
|
|
1501
|
+
assert result["computed"] == [util.to_timestamp(datetime(2020, 5, 30))]
|
|
1502
|
+
assert result["computed2"] == [util.to_timestamp(datetime(1997, 8, 31))]
|
|
1503
|
+
|
|
1504
|
+
def test_view_datetime_expression(self, util):
|
|
1505
|
+
table = Table({"x": [1]})
|
|
1506
|
+
|
|
1507
|
+
dt = datetime(2015, 11, 29, 23, 59, 59)
|
|
1508
|
+
seconds_timestamp = mktime(dt.timetuple()) + dt.microsecond / 1000000.0
|
|
1509
|
+
ms_timestamp = int(seconds_timestamp * 1000)
|
|
1510
|
+
view = table.view(expressions={"computed": "datetime({})".format(ms_timestamp)})
|
|
1511
|
+
assert view.expression_schema() == {"computed": "datetime"}
|
|
1512
|
+
result = view.to_columns()
|
|
1513
|
+
assert result["computed"] == [
|
|
1514
|
+
util.to_timestamp(datetime(2015, 11, 29, 23, 59, 59))
|
|
1515
|
+
]
|
|
1516
|
+
|
|
1517
|
+
def test_view_datetime_expression_roundtrip(self, util):
|
|
1518
|
+
table = Table({"x": [datetime(2015, 11, 29, 23, 59, 59)]})
|
|
1519
|
+
view = table.view(expressions={"computed": 'datetime(float("x"))'})
|
|
1520
|
+
assert view.expression_schema() == {"computed": "datetime"}
|
|
1521
|
+
result = view.to_columns()
|
|
1522
|
+
assert result["computed"] == [
|
|
1523
|
+
util.to_timestamp(datetime(2015, 11, 29, 23, 59, 59))
|
|
1524
|
+
]
|
|
1525
|
+
|
|
1526
|
+
def test_view_string_expression(self):
|
|
1527
|
+
table = Table(
|
|
1528
|
+
{
|
|
1529
|
+
"a": "date",
|
|
1530
|
+
"b": "datetime",
|
|
1531
|
+
"c": "integer",
|
|
1532
|
+
"d": "float",
|
|
1533
|
+
"e": "string",
|
|
1534
|
+
"f": "boolean",
|
|
1535
|
+
}
|
|
1536
|
+
)
|
|
1537
|
+
view = table.view(
|
|
1538
|
+
expressions={
|
|
1539
|
+
"computed": 'string("a")',
|
|
1540
|
+
"computed2": 'string("b")',
|
|
1541
|
+
"computed3": 'string("c")',
|
|
1542
|
+
"computed4": 'string("d")',
|
|
1543
|
+
"computed5": 'string("e")',
|
|
1544
|
+
"computed6": 'string("f")',
|
|
1545
|
+
"computed7": "string(1234.5678)",
|
|
1546
|
+
}
|
|
1547
|
+
)
|
|
1548
|
+
|
|
1549
|
+
table.update(
|
|
1550
|
+
{
|
|
1551
|
+
"a": [date(2020, 5, 30), date(2021, 7, 13)],
|
|
1552
|
+
"b": [
|
|
1553
|
+
datetime(2015, 11, 29, 23, 59, 59),
|
|
1554
|
+
datetime(2016, 11, 29, 23, 59, 59),
|
|
1555
|
+
],
|
|
1556
|
+
"c": [12345678, 1293879852],
|
|
1557
|
+
"d": [1.2792013981, 19.218975981],
|
|
1558
|
+
"e": ["abcdefghijklmnop", "def"],
|
|
1559
|
+
"f": [False, True],
|
|
1560
|
+
}
|
|
1561
|
+
)
|
|
1562
|
+
|
|
1563
|
+
assert view.expression_schema() == {
|
|
1564
|
+
"computed": "string",
|
|
1565
|
+
"computed2": "string",
|
|
1566
|
+
"computed3": "string",
|
|
1567
|
+
"computed4": "string",
|
|
1568
|
+
"computed5": "string",
|
|
1569
|
+
"computed6": "string",
|
|
1570
|
+
"computed7": "string",
|
|
1571
|
+
}
|
|
1572
|
+
result = view.to_columns()
|
|
1573
|
+
assert result["computed"] == ["2020-05-30", "2021-07-13"]
|
|
1574
|
+
assert result["computed2"] == [
|
|
1575
|
+
"2015-11-29 23:59:59.000",
|
|
1576
|
+
"2016-11-29 23:59:59.000",
|
|
1577
|
+
]
|
|
1578
|
+
assert result["computed3"] == ["12345678", "1293879852"]
|
|
1579
|
+
assert result["computed4"] == ["1.2792", "19.219"]
|
|
1580
|
+
assert result["computed5"] == ["abcdefghijklmnop", "def"]
|
|
1581
|
+
assert result["computed6"] == ["false", "true"]
|
|
1582
|
+
assert result["computed7"] == ["1234.57"] * 2
|
|
1583
|
+
|
|
1584
|
+
def test_view_expession_multicomment(self):
|
|
1585
|
+
table = Table({"a": [1, 2, 3, 4]})
|
|
1586
|
+
view = table.view(expressions=["var x := 1 + 2;\n// def\nx + 100 // cdefghijk"])
|
|
1587
|
+
assert view.expression_schema() == {
|
|
1588
|
+
"var x := 1 + 2;\n// def\nx + 100 // cdefghijk": "float"
|
|
1589
|
+
}
|
|
1590
|
+
assert view.to_columns() == {
|
|
1591
|
+
"var x := 1 + 2;\n// def\nx + 100 // cdefghijk": [103, 103, 103, 103],
|
|
1592
|
+
"a": [1, 2, 3, 4],
|
|
1593
|
+
}
|
|
1594
|
+
|
|
1595
|
+
def test_view_regex_email(self):
|
|
1596
|
+
endings = ["com", "net", "co.uk", "ie", "me", "io", "co"]
|
|
1597
|
+
data = [
|
|
1598
|
+
"{}@{}.{}".format(
|
|
1599
|
+
randstr(30, ascii_letters + "0123456789" + "._-"),
|
|
1600
|
+
randstr(10),
|
|
1601
|
+
choices(endings, k=1)[0],
|
|
1602
|
+
)
|
|
1603
|
+
for _ in range(100)
|
|
1604
|
+
]
|
|
1605
|
+
table = Table({"a": data})
|
|
1606
|
+
expressions = {
|
|
1607
|
+
"address": "search(\"a\", '^([a-zA-Z0-9._-]+)@')",
|
|
1608
|
+
"domain": "search(\"a\", '@([a-zA-Z.]+)$')",
|
|
1609
|
+
"is_email?": "match_all(\"a\", '^([a-zA-Z0-9._-]+)@([a-zA-Z.]+)$')",
|
|
1610
|
+
"has_at?": "match(\"a\", '@')",
|
|
1611
|
+
}
|
|
1612
|
+
|
|
1613
|
+
view = table.view(expressions=expressions)
|
|
1614
|
+
schema = view.expression_schema()
|
|
1615
|
+
assert schema == {
|
|
1616
|
+
"address": "string",
|
|
1617
|
+
"domain": "string",
|
|
1618
|
+
"is_email?": "boolean",
|
|
1619
|
+
"has_at?": "boolean",
|
|
1620
|
+
}
|
|
1621
|
+
|
|
1622
|
+
results = view.to_columns()
|
|
1623
|
+
|
|
1624
|
+
for i in range(100):
|
|
1625
|
+
source = results["a"][i]
|
|
1626
|
+
expected_address = re.match(r"^([a-zA-Z0-9._-]+)@", source).group(1)
|
|
1627
|
+
expected_domain = re.search(r"@([a-zA-Z.]+)$", source).group(1)
|
|
1628
|
+
assert results["address"][i] == expected_address
|
|
1629
|
+
assert results["domain"][i] == expected_domain
|
|
1630
|
+
assert results["is_email?"][i]
|
|
1631
|
+
assert results["has_at?"][i]
|
|
1632
|
+
|
|
1633
|
+
def test_view_expression_number(self):
|
|
1634
|
+
def digits():
|
|
1635
|
+
return randstr(4, "0123456789")
|
|
1636
|
+
|
|
1637
|
+
data = []
|
|
1638
|
+
|
|
1639
|
+
for _ in range(1000):
|
|
1640
|
+
separator = "-" if random() > 0.5 else " "
|
|
1641
|
+
data.append(
|
|
1642
|
+
"{}{}{}{}{}{}{}".format(
|
|
1643
|
+
digits(),
|
|
1644
|
+
separator,
|
|
1645
|
+
digits(),
|
|
1646
|
+
separator,
|
|
1647
|
+
digits(),
|
|
1648
|
+
separator,
|
|
1649
|
+
digits(),
|
|
1650
|
+
)
|
|
1651
|
+
)
|
|
1652
|
+
|
|
1653
|
+
table = Table({"a": data})
|
|
1654
|
+
view = table.view(
|
|
1655
|
+
expressions={
|
|
1656
|
+
"parsed": """
|
|
1657
|
+
var parts[4];
|
|
1658
|
+
parts[0] := search("a", '^([0-9]{4})[ -][0-9]{4}[ -][0-9]{4}[ -][0-9]{4}');
|
|
1659
|
+
parts[1] := search("a", '^[0-9]{4}[ -]([0-9]{4})[ -][0-9]{4}[ -][0-9]{4}');
|
|
1660
|
+
parts[2] := search("a", '^[0-9]{4}[ -][0-9]{4}[ -]([0-9]{4})[ -][0-9]{4}');
|
|
1661
|
+
parts[3] := search("a", '^[0-9]{4}[ -][0-9]{4}[ -][0-9]{4}[ -]([0-9]{4})');
|
|
1662
|
+
concat(parts[0], parts[1], parts[2], parts[3])
|
|
1663
|
+
""",
|
|
1664
|
+
"is_number?": "match_all(\"a\", '^[0-9]{4}[ -][0-9]{4}[ -][0-9]{4}[ -][0-9]{4}')",
|
|
1665
|
+
}
|
|
1666
|
+
)
|
|
1667
|
+
schema = view.expression_schema()
|
|
1668
|
+
assert schema == {"parsed": "string", "is_number?": "boolean"}
|
|
1669
|
+
results = view.to_columns()
|
|
1670
|
+
|
|
1671
|
+
for i in range(1000):
|
|
1672
|
+
source = results["a"][i]
|
|
1673
|
+
expected = re.sub(r"[ -]", "", source)
|
|
1674
|
+
assert results["parsed"][i] == expected
|
|
1675
|
+
assert results["is_number?"][i]
|
|
1676
|
+
|
|
1677
|
+
def test_view_expression_newlines(self):
|
|
1678
|
+
table = Table(
|
|
1679
|
+
{
|
|
1680
|
+
"a": [
|
|
1681
|
+
"abc\ndef",
|
|
1682
|
+
"\n\n\n\nabc\ndef",
|
|
1683
|
+
"abc\n\n\n\n\n\nabc\ndef\n\n\n\n",
|
|
1684
|
+
None,
|
|
1685
|
+
"def",
|
|
1686
|
+
],
|
|
1687
|
+
"b": [
|
|
1688
|
+
"hello\tworld",
|
|
1689
|
+
"\n\n\n\n\nhello\n\n\n\n\n\tworld",
|
|
1690
|
+
"\tworld",
|
|
1691
|
+
"world",
|
|
1692
|
+
None,
|
|
1693
|
+
],
|
|
1694
|
+
}
|
|
1695
|
+
)
|
|
1696
|
+
|
|
1697
|
+
view = table.view(
|
|
1698
|
+
expressions={
|
|
1699
|
+
"c1": "search(\"a\", '(\ndef)')",
|
|
1700
|
+
"c2": "search(\"b\", '(\tworld)')",
|
|
1701
|
+
"c3": "match(\"a\", '\\n')",
|
|
1702
|
+
"c4": "match(\"b\", '\\n')",
|
|
1703
|
+
}
|
|
1704
|
+
)
|
|
1705
|
+
|
|
1706
|
+
assert view.expression_schema() == {
|
|
1707
|
+
"c1": "string",
|
|
1708
|
+
"c2": "string",
|
|
1709
|
+
"c3": "boolean",
|
|
1710
|
+
"c4": "boolean",
|
|
1711
|
+
}
|
|
1712
|
+
|
|
1713
|
+
results = view.to_columns()
|
|
1714
|
+
assert results["c1"] == ["\ndef", "\ndef", "\ndef", None, None]
|
|
1715
|
+
assert results["c2"] == ["\tworld", "\tworld", "\tworld", None, None]
|
|
1716
|
+
assert results["c3"] == [True, True, True, None, False]
|
|
1717
|
+
assert results["c4"] == [False, True, False, False, None]
|
|
1718
|
+
|
|
1719
|
+
def test_view_regex_substring(self):
|
|
1720
|
+
data = ["abc, def", "efg", "", None, "aaaaaaaaaaaaa"]
|
|
1721
|
+
table = Table({"x": data})
|
|
1722
|
+
view = table.view(
|
|
1723
|
+
expressions={
|
|
1724
|
+
"a": "substring('abcdef', 0)",
|
|
1725
|
+
"abc": "substring('abcdef', 3)",
|
|
1726
|
+
"b": 'substring("x", 0)',
|
|
1727
|
+
"c": 'substring("x", 5, 1)',
|
|
1728
|
+
"d": 'substring("x", 100)',
|
|
1729
|
+
"e": 'substring("x", 0, 10000)',
|
|
1730
|
+
"f": 'substring("x", 5, 0)',
|
|
1731
|
+
}
|
|
1732
|
+
)
|
|
1733
|
+
results = view.to_columns()
|
|
1734
|
+
|
|
1735
|
+
assert results["a"] == ["abcdef" for _ in data]
|
|
1736
|
+
assert results["abc"] == ["def" for _ in data]
|
|
1737
|
+
assert results["b"] == [d if d else None for d in data]
|
|
1738
|
+
assert results["c"] == ["d", None, None, None, "a"]
|
|
1739
|
+
assert results["d"] == [None for _ in data]
|
|
1740
|
+
assert results["e"] == [None for _ in data]
|
|
1741
|
+
assert results["f"] == ["", None, None, None, ""]
|
|
1742
|
+
|
|
1743
|
+
# FIXME: // ending\nvar domain := search(\"a\", '@([a-zA-Z.]+)$'); length(domain) > 0 ? search(domain, '[.](.*)$') : null
|
|
1744
|
+
# is a broken expression without the newline after var domain
|
|
1745
|
+
def test_view_regex_email_substr(self):
|
|
1746
|
+
endings = ["com", "net", "co.uk", "ie", "me", "io", "co"]
|
|
1747
|
+
data = [
|
|
1748
|
+
"{}@{}.{}".format(
|
|
1749
|
+
randstr(30, ascii_letters + "0123456789" + "._-"),
|
|
1750
|
+
randstr(10),
|
|
1751
|
+
choices(endings, k=1)[0],
|
|
1752
|
+
)
|
|
1753
|
+
for _ in range(100)
|
|
1754
|
+
]
|
|
1755
|
+
table = Table({"a": data})
|
|
1756
|
+
expressions = {
|
|
1757
|
+
"address": 'var vec[2]; indexof("a", \'^([a-zA-Z0-9._-]+)@\', vec) ? substring("a", vec[0], vec[1] - vec[0] + 1) : null',
|
|
1758
|
+
"ending": """
|
|
1759
|
+
var domain := search(\"a\", '@([a-zA-Z.]+)$');
|
|
1760
|
+
var len := length(domain);
|
|
1761
|
+
if (len > 0 and is_not_null(domain)) {
|
|
1762
|
+
search(domain, '[.](.*)$');
|
|
1763
|
+
} else {
|
|
1764
|
+
'not found';
|
|
1765
|
+
}""",
|
|
1766
|
+
}
|
|
1767
|
+
|
|
1768
|
+
view = table.view(expressions=expressions)
|
|
1769
|
+
schema = view.expression_schema()
|
|
1770
|
+
assert schema == {
|
|
1771
|
+
"address": "string",
|
|
1772
|
+
"ending": "string",
|
|
1773
|
+
}
|
|
1774
|
+
|
|
1775
|
+
results = view.to_columns()
|
|
1776
|
+
|
|
1777
|
+
for i in range(100):
|
|
1778
|
+
source = results["a"][i]
|
|
1779
|
+
address = re.match(r"^([a-zA-Z0-9._-]+)@", source).group(1)
|
|
1780
|
+
domain = re.search(r"@([a-zA-Z.]+)$", source).group(1)
|
|
1781
|
+
ending = re.search(r"[.](.*)$", domain).group(1)
|
|
1782
|
+
assert results["address"][i] == address
|
|
1783
|
+
assert results["ending"][i] == ending
|
|
1784
|
+
|
|
1785
|
+
def test_view_expressions_replace(self):
|
|
1786
|
+
def digits():
|
|
1787
|
+
return randstr(4, "0123456789")
|
|
1788
|
+
|
|
1789
|
+
data = []
|
|
1790
|
+
|
|
1791
|
+
for _ in range(1000):
|
|
1792
|
+
separator = "-" if random() > 0.5 else " "
|
|
1793
|
+
data.append(
|
|
1794
|
+
"{}{}{}{}{}{}{}".format(
|
|
1795
|
+
digits(),
|
|
1796
|
+
separator,
|
|
1797
|
+
digits(),
|
|
1798
|
+
separator,
|
|
1799
|
+
digits(),
|
|
1800
|
+
separator,
|
|
1801
|
+
digits(),
|
|
1802
|
+
)
|
|
1803
|
+
)
|
|
1804
|
+
|
|
1805
|
+
# XXX: This test was broken because it thought `b` was an integer column.
|
|
1806
|
+
table = Table({"a": "string", "b": "string"})
|
|
1807
|
+
table.update({"a": data, "b": [str(i) for i in range(1000)]})
|
|
1808
|
+
expressions = [
|
|
1809
|
+
"""//w
|
|
1810
|
+
replace('abc-def-hijk', '-', '')""",
|
|
1811
|
+
"""//x
|
|
1812
|
+
replace("a", '[0-9]{4}$', "b")""",
|
|
1813
|
+
"""//y
|
|
1814
|
+
replace("a", '[a-z]{4}$', "b")""",
|
|
1815
|
+
"""//z
|
|
1816
|
+
var x := 'long string, very cool!'; replace("a", '^[0-9]{4}', x)""",
|
|
1817
|
+
]
|
|
1818
|
+
|
|
1819
|
+
validate = table.validate_expressions(expressions)
|
|
1820
|
+
assert validate["expression_schema"] == {
|
|
1821
|
+
"w": "string",
|
|
1822
|
+
"x": "string",
|
|
1823
|
+
"y": "string",
|
|
1824
|
+
"z": "string",
|
|
1825
|
+
}
|
|
1826
|
+
|
|
1827
|
+
view = table.view(expressions=expressions)
|
|
1828
|
+
schema = view.expression_schema()
|
|
1829
|
+
assert schema == {
|
|
1830
|
+
"w": "string",
|
|
1831
|
+
"x": "string",
|
|
1832
|
+
"y": "string",
|
|
1833
|
+
"z": "string",
|
|
1834
|
+
}
|
|
1835
|
+
results = view.to_columns()
|
|
1836
|
+
|
|
1837
|
+
for i in range(1000):
|
|
1838
|
+
source = results["a"][i]
|
|
1839
|
+
idx = results["b"][i]
|
|
1840
|
+
assert results["w"][i] == "abcdef-hijk"
|
|
1841
|
+
assert results["x"][i] == re.sub(r"[0-9]{4}$", idx, source, 1)
|
|
1842
|
+
assert results["y"][i] == source
|
|
1843
|
+
assert results["z"][i] == re.sub(
|
|
1844
|
+
r"^[0-9]{4}", "long string, very cool!", source, 1
|
|
1845
|
+
)
|
|
1846
|
+
|
|
1847
|
+
def test_view_replace_invalid(self):
|
|
1848
|
+
table = Table({"a": "string", "b": "string"})
|
|
1849
|
+
expressions = [
|
|
1850
|
+
"""//v
|
|
1851
|
+
replace('abc-def-hijk', '-', 123)""",
|
|
1852
|
+
"""//w
|
|
1853
|
+
replace('', '-', today())""",
|
|
1854
|
+
"""//x
|
|
1855
|
+
replace("a", '[0-9]{4}$', today())""",
|
|
1856
|
+
"""//y
|
|
1857
|
+
replace("a", '[a-z]{4}$', null)""",
|
|
1858
|
+
"""//z
|
|
1859
|
+
var x := 123; replace("a", '^[0-9]{4}', x)""",
|
|
1860
|
+
]
|
|
1861
|
+
validate = table.validate_expressions(expressions)
|
|
1862
|
+
assert validate["expression_schema"] == {}
|
|
1863
|
+
|
|
1864
|
+
def test_view_expressions_replace_all(self):
|
|
1865
|
+
def digits():
|
|
1866
|
+
return randstr(4, "0123456789")
|
|
1867
|
+
|
|
1868
|
+
data = []
|
|
1869
|
+
|
|
1870
|
+
for _ in range(1000):
|
|
1871
|
+
separator = "-" if random() > 0.5 else " "
|
|
1872
|
+
data.append(
|
|
1873
|
+
"{}{}{}{}{}{}{}".format(
|
|
1874
|
+
digits(),
|
|
1875
|
+
separator,
|
|
1876
|
+
digits(),
|
|
1877
|
+
separator,
|
|
1878
|
+
digits(),
|
|
1879
|
+
separator,
|
|
1880
|
+
digits(),
|
|
1881
|
+
)
|
|
1882
|
+
)
|
|
1883
|
+
table = Table({"a": "string", "b": "string"})
|
|
1884
|
+
table.update({"a": data, "b": [str(i) for i in range(1000)]})
|
|
1885
|
+
expressions = [
|
|
1886
|
+
"""//w
|
|
1887
|
+
replace_all('abc-def-hijk', '-', '')""",
|
|
1888
|
+
"""//x
|
|
1889
|
+
replace_all("a", '[0-9]{4}$', "b")""",
|
|
1890
|
+
"""//y
|
|
1891
|
+
replace_all("a", '[a-z]{4}$', "b")""",
|
|
1892
|
+
"""//z
|
|
1893
|
+
var x := 'long string, very cool!'; replace_all("a", '^[0-9]{4}', x)""",
|
|
1894
|
+
]
|
|
1895
|
+
|
|
1896
|
+
validate = table.validate_expressions(expressions)
|
|
1897
|
+
assert validate["expression_schema"] == {
|
|
1898
|
+
"w": "string",
|
|
1899
|
+
"x": "string",
|
|
1900
|
+
"y": "string",
|
|
1901
|
+
"z": "string",
|
|
1902
|
+
}
|
|
1903
|
+
|
|
1904
|
+
view = table.view(expressions=expressions)
|
|
1905
|
+
schema = view.expression_schema()
|
|
1906
|
+
assert schema == {
|
|
1907
|
+
"w": "string",
|
|
1908
|
+
"x": "string",
|
|
1909
|
+
"y": "string",
|
|
1910
|
+
"z": "string",
|
|
1911
|
+
}
|
|
1912
|
+
|
|
1913
|
+
results = view.to_columns()
|
|
1914
|
+
|
|
1915
|
+
for i in range(1000):
|
|
1916
|
+
source = results["a"][i]
|
|
1917
|
+
idx = results["b"][i]
|
|
1918
|
+
assert results["w"][i] == "abcdefhijk"
|
|
1919
|
+
assert results["x"][i] == re.sub(r"[0-9]{4}$", idx, source)
|
|
1920
|
+
assert results["y"][i] == source
|
|
1921
|
+
assert results["z"][i] == re.sub(
|
|
1922
|
+
r"^[0-9]{4}", "long string, very cool!", source
|
|
1923
|
+
)
|
|
1924
|
+
|
|
1925
|
+
def test_view_replace_invalid_variation(self):
|
|
1926
|
+
table = Table({"a": "string", "b": "string"})
|
|
1927
|
+
expressions = [
|
|
1928
|
+
"""//v
|
|
1929
|
+
replace_all('abc-def-hijk', '-', 123)""",
|
|
1930
|
+
"""//w
|
|
1931
|
+
replace_all('', '-', today())""",
|
|
1932
|
+
"""//x
|
|
1933
|
+
replace_all("a", '[0-9]{4}$', today())""",
|
|
1934
|
+
"""//y
|
|
1935
|
+
replace_all("a", '[a-z]{4}$', null)""",
|
|
1936
|
+
"""//z
|
|
1937
|
+
var x := 123; replace_all("a", '^[0-9]{4}', x)""",
|
|
1938
|
+
]
|
|
1939
|
+
validate = table.validate_expressions(expressions)
|
|
1940
|
+
assert validate["expression_schema"] == {}
|