pixeltable 0.2.4__py3-none-any.whl → 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +18 -9
- pixeltable/__version__.py +3 -0
- pixeltable/catalog/column.py +31 -50
- pixeltable/catalog/insertable_table.py +7 -6
- pixeltable/catalog/table.py +171 -57
- pixeltable/catalog/table_version.py +417 -140
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/dataframe.py +239 -121
- pixeltable/env.py +82 -16
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/data_row_batch.py +6 -7
- pixeltable/exec/expr_eval_node.py +28 -28
- pixeltable/exec/in_memory_data_node.py +11 -7
- pixeltable/exec/sql_scan_node.py +7 -6
- pixeltable/exprs/__init__.py +4 -3
- pixeltable/exprs/column_ref.py +9 -0
- pixeltable/exprs/comparison.py +3 -3
- pixeltable/exprs/data_row.py +5 -1
- pixeltable/exprs/expr.py +15 -7
- pixeltable/exprs/function_call.py +17 -15
- pixeltable/exprs/image_member_access.py +9 -28
- pixeltable/exprs/in_predicate.py +96 -0
- pixeltable/exprs/inline_array.py +13 -11
- pixeltable/exprs/inline_dict.py +15 -13
- pixeltable/exprs/literal.py +16 -4
- pixeltable/exprs/row_builder.py +15 -41
- pixeltable/exprs/similarity_expr.py +65 -0
- pixeltable/ext/__init__.py +5 -0
- pixeltable/ext/functions/yolox.py +92 -0
- pixeltable/func/__init__.py +0 -2
- pixeltable/func/aggregate_function.py +18 -15
- pixeltable/func/callable_function.py +57 -13
- pixeltable/func/expr_template_function.py +20 -3
- pixeltable/func/function.py +35 -4
- pixeltable/func/globals.py +24 -14
- pixeltable/func/signature.py +23 -27
- pixeltable/func/udf.py +13 -12
- pixeltable/functions/__init__.py +8 -8
- pixeltable/functions/eval.py +7 -8
- pixeltable/functions/huggingface.py +64 -17
- pixeltable/functions/openai.py +36 -3
- pixeltable/functions/pil/image.py +61 -64
- pixeltable/functions/together.py +21 -0
- pixeltable/functions/util.py +11 -0
- pixeltable/globals.py +425 -0
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +51 -0
- pixeltable/index/embedding_index.py +168 -0
- pixeltable/io/__init__.py +3 -0
- pixeltable/{utils → io}/hf_datasets.py +48 -17
- pixeltable/io/pandas.py +148 -0
- pixeltable/{utils → io}/parquet.py +58 -33
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/iterators/base.py +4 -0
- pixeltable/iterators/document.py +218 -97
- pixeltable/iterators/video.py +8 -9
- pixeltable/metadata/__init__.py +7 -3
- pixeltable/metadata/converters/convert_12.py +3 -0
- pixeltable/metadata/converters/convert_13.py +41 -0
- pixeltable/metadata/schema.py +45 -22
- pixeltable/plan.py +15 -51
- pixeltable/store.py +38 -41
- pixeltable/tool/create_test_db_dump.py +39 -4
- pixeltable/type_system.py +47 -96
- pixeltable/utils/documents.py +42 -12
- pixeltable/utils/http_server.py +70 -0
- {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/METADATA +14 -10
- pixeltable-0.2.6.dist-info/RECORD +119 -0
- {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/WHEEL +1 -1
- pixeltable/client.py +0 -604
- pixeltable/exprs/image_similarity_predicate.py +0 -58
- pixeltable/func/batched_function.py +0 -53
- pixeltable/tests/conftest.py +0 -177
- pixeltable/tests/functions/test_fireworks.py +0 -42
- pixeltable/tests/functions/test_functions.py +0 -60
- pixeltable/tests/functions/test_huggingface.py +0 -158
- pixeltable/tests/functions/test_openai.py +0 -152
- pixeltable/tests/functions/test_together.py +0 -111
- pixeltable/tests/test_audio.py +0 -65
- pixeltable/tests/test_catalog.py +0 -27
- pixeltable/tests/test_client.py +0 -21
- pixeltable/tests/test_component_view.py +0 -370
- pixeltable/tests/test_dataframe.py +0 -439
- pixeltable/tests/test_dirs.py +0 -107
- pixeltable/tests/test_document.py +0 -120
- pixeltable/tests/test_exprs.py +0 -805
- pixeltable/tests/test_function.py +0 -324
- pixeltable/tests/test_migration.py +0 -43
- pixeltable/tests/test_nos.py +0 -54
- pixeltable/tests/test_snapshot.py +0 -208
- pixeltable/tests/test_table.py +0 -1267
- pixeltable/tests/test_transactional_directory.py +0 -42
- pixeltable/tests/test_types.py +0 -22
- pixeltable/tests/test_video.py +0 -159
- pixeltable/tests/test_view.py +0 -530
- pixeltable/tests/utils.py +0 -408
- pixeltable-0.2.4.dist-info/RECORD +0 -132
- {pixeltable-0.2.4.dist-info → pixeltable-0.2.6.dist-info}/LICENSE +0 -0
|
@@ -1,324 +0,0 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pytest
|
|
5
|
-
|
|
6
|
-
import pixeltable as pxt
|
|
7
|
-
import pixeltable.exceptions as excs
|
|
8
|
-
from pixeltable import catalog
|
|
9
|
-
from pixeltable.func import Function, FunctionRegistry, Batch
|
|
10
|
-
from pixeltable.type_system import IntType, FloatType
|
|
11
|
-
from pixeltable.tests.utils import assert_resultset_eq
|
|
12
|
-
import pixeltable.func as func
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def dummy_fn(i: int) -> int:
|
|
16
|
-
return i
|
|
17
|
-
|
|
18
|
-
class TestFunction:
|
|
19
|
-
@pxt.udf(return_type=IntType(), param_types=[IntType()])
|
|
20
|
-
def func(x: int) -> int:
|
|
21
|
-
return x + 1
|
|
22
|
-
|
|
23
|
-
@pxt.uda(name='agg', value_type=IntType(), update_types=[IntType()])
|
|
24
|
-
class Aggregator:
|
|
25
|
-
def __init__(self):
|
|
26
|
-
self.sum = 0
|
|
27
|
-
def update(self, val: int) -> None:
|
|
28
|
-
if val is not None:
|
|
29
|
-
self.sum += val
|
|
30
|
-
def value(self) -> int:
|
|
31
|
-
return self.sum
|
|
32
|
-
|
|
33
|
-
def test_serialize_anonymous(self, init_env) -> None:
|
|
34
|
-
d = self.func.as_dict()
|
|
35
|
-
FunctionRegistry.get().clear_cache()
|
|
36
|
-
deserialized = Function.from_dict(d)
|
|
37
|
-
# TODO: add Function.exec() and then use that
|
|
38
|
-
assert deserialized.py_fn(1) == 2
|
|
39
|
-
|
|
40
|
-
@pytest.mark.skip(reason='deprecated')
|
|
41
|
-
def test_create(self, test_client: pxt.Client) -> None:
|
|
42
|
-
cl = test_client
|
|
43
|
-
cl.create_function('test_fn', self.func)
|
|
44
|
-
assert self.func.md.fqn == 'test_fn'
|
|
45
|
-
FunctionRegistry.get().clear_cache()
|
|
46
|
-
cl = pxt.Client(reload=True)
|
|
47
|
-
_ = cl.list_functions()
|
|
48
|
-
fn2 = cl.get_function('test_fn')
|
|
49
|
-
assert fn2.md.fqn == 'test_fn'
|
|
50
|
-
assert fn2.py_fn(1) == 2
|
|
51
|
-
|
|
52
|
-
with pytest.raises(excs.Error):
|
|
53
|
-
cl.create_function('test_fn', self.func)
|
|
54
|
-
with pytest.raises(excs.Error):
|
|
55
|
-
cl.create_function('dir1.test_fn', self.func)
|
|
56
|
-
with pytest.raises(excs.Error):
|
|
57
|
-
library_fn = make_library_function(IntType(), [IntType()], __name__, 'dummy_fn')
|
|
58
|
-
cl.create_function('library_fn', library_fn)
|
|
59
|
-
|
|
60
|
-
@pytest.mark.skip(reason='deprecated')
|
|
61
|
-
def test_update(self, test_client: pxt.Client, test_tbl: catalog.Table) -> None:
|
|
62
|
-
cl = test_client
|
|
63
|
-
t = test_tbl
|
|
64
|
-
cl.create_function('test_fn', self.func)
|
|
65
|
-
res1 = t[self.func(t.c2)].show(0).to_pandas()
|
|
66
|
-
|
|
67
|
-
# load function from db and make sure it computes the same thing as before
|
|
68
|
-
FunctionRegistry.get().clear_cache()
|
|
69
|
-
cl = pxt.Client(reload=True)
|
|
70
|
-
fn = cl.get_function('test_fn')
|
|
71
|
-
res2 = t[fn(t.c2)].show(0).to_pandas()
|
|
72
|
-
assert res1.col_0.equals(res2.col_0)
|
|
73
|
-
fn.py_fn = lambda x: x + 2
|
|
74
|
-
cl.update_function('test_fn', fn)
|
|
75
|
-
assert self.func.md.fqn == fn.md.fqn # fqn doesn't change
|
|
76
|
-
|
|
77
|
-
FunctionRegistry.get().clear_cache()
|
|
78
|
-
cl = pxt.Client(reload=True)
|
|
79
|
-
fn = cl.get_function('test_fn')
|
|
80
|
-
assert self.func.md.fqn == fn.md.fqn # fqn doesn't change
|
|
81
|
-
res3 = t[fn(t.c2)].show(0).to_pandas()
|
|
82
|
-
assert (res2.col_0 + 1).equals(res3.col_0)
|
|
83
|
-
|
|
84
|
-
# signature changes
|
|
85
|
-
with pytest.raises(excs.Error):
|
|
86
|
-
cl.update_function('test_fn', make_function(FloatType(), [IntType()], fn.py_fn))
|
|
87
|
-
with pytest.raises(excs.Error):
|
|
88
|
-
cl.update_function('test_fn', make_function(IntType(), [FloatType()], fn.py_fn))
|
|
89
|
-
with pytest.raises(excs.Error):
|
|
90
|
-
cl.update_function('test_fn', self.agg)
|
|
91
|
-
|
|
92
|
-
@pytest.mark.skip(reason='deprecated')
|
|
93
|
-
def test_move(self, test_client: pxt.Client) -> None:
|
|
94
|
-
cl = test_client
|
|
95
|
-
cl.create_function('test_fn', self.func)
|
|
96
|
-
|
|
97
|
-
FunctionRegistry.get().clear_cache()
|
|
98
|
-
cl = pxt.Client(reload=True)
|
|
99
|
-
with pytest.raises(excs.Error):
|
|
100
|
-
cl.move('test_fn2', 'test_fn')
|
|
101
|
-
cl.move('test_fn', 'test_fn2')
|
|
102
|
-
func = cl.get_function('test_fn2')
|
|
103
|
-
assert func.py_fn(1) == 2
|
|
104
|
-
assert func.md.fqn == 'test_fn2'
|
|
105
|
-
|
|
106
|
-
with pytest.raises(excs.Error):
|
|
107
|
-
_ = cl.get_function('test_fn')
|
|
108
|
-
|
|
109
|
-
# move function between directories
|
|
110
|
-
cl.create_dir('functions')
|
|
111
|
-
cl.create_dir('functions2')
|
|
112
|
-
cl.create_function('functions.func1', self.func)
|
|
113
|
-
with pytest.raises(excs.Error):
|
|
114
|
-
cl.move('functions2.func1', 'functions.func1')
|
|
115
|
-
cl.move('functions.func1', 'functions2.func1')
|
|
116
|
-
func = cl.get_function('functions2.func1')
|
|
117
|
-
assert func.md.fqn == 'functions2.func1'
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
FunctionRegistry.get().clear_cache()
|
|
121
|
-
cl = pxt.Client(reload=True)
|
|
122
|
-
func = cl.get_function('functions2.func1')
|
|
123
|
-
assert func.py_fn(1) == 2
|
|
124
|
-
assert func.md.fqn == 'functions2.func1'
|
|
125
|
-
with pytest.raises(excs.Error):
|
|
126
|
-
_ = cl.get_function('functions.func1')
|
|
127
|
-
|
|
128
|
-
@pytest.mark.skip(reason='deprecated')
|
|
129
|
-
def test_drop(self, test_client: pxt.Client) -> None:
|
|
130
|
-
cl = test_client
|
|
131
|
-
cl.create_function('test_fn', self.func)
|
|
132
|
-
FunctionRegistry.get().clear_cache()
|
|
133
|
-
cl = pxt.Client(reload=True)
|
|
134
|
-
cl.drop_function('test_fn')
|
|
135
|
-
|
|
136
|
-
with pytest.raises(excs.Error):
|
|
137
|
-
_ = cl.get_function('test_fn')
|
|
138
|
-
|
|
139
|
-
def test_list(self, test_client: pxt.Client) -> None:
|
|
140
|
-
_ = FunctionRegistry.get().list_functions()
|
|
141
|
-
print(_)
|
|
142
|
-
|
|
143
|
-
def test_stored_udf(self, test_client: pxt.Client) -> None:
|
|
144
|
-
cl = test_client
|
|
145
|
-
t = cl.create_table('test', {'c1': pxt.IntType(), 'c2': pxt.FloatType()})
|
|
146
|
-
rows = [{'c1': i, 'c2': i + 0.5} for i in range(100)]
|
|
147
|
-
status = t.insert(rows)
|
|
148
|
-
assert status.num_rows == len(rows)
|
|
149
|
-
assert status.num_excs == 0
|
|
150
|
-
|
|
151
|
-
@pxt.udf(_force_stored=True)
|
|
152
|
-
def f1(a: int, b: float) -> float:
|
|
153
|
-
return a + b
|
|
154
|
-
t['f1'] = f1(t.c1, t.c2)
|
|
155
|
-
|
|
156
|
-
func.FunctionRegistry.get().clear_cache()
|
|
157
|
-
cl = pxt.Client(reload=True)
|
|
158
|
-
t = cl.get_table('test')
|
|
159
|
-
status = t.insert(rows)
|
|
160
|
-
assert status.num_rows == len(rows)
|
|
161
|
-
assert status.num_excs == 0
|
|
162
|
-
|
|
163
|
-
def test_call(self, test_tbl: catalog.Table) -> None:
|
|
164
|
-
t = test_tbl
|
|
165
|
-
|
|
166
|
-
@pxt.udf(return_type=IntType(), param_types=[IntType(), FloatType(), FloatType(), FloatType()])
|
|
167
|
-
def f1(a: int, b: float, c: float = 0.0, d: float = 1.0) -> float:
|
|
168
|
-
return a + b + c + d
|
|
169
|
-
|
|
170
|
-
r0 = t[t.c2, t.c3].show(0).to_pandas()
|
|
171
|
-
# positional params with default args
|
|
172
|
-
r1 = t[f1(t.c2, t.c3)].show(0).to_pandas()['col_0']
|
|
173
|
-
assert np.all(r1 == r0.c2 + r0.c3 + 1.0)
|
|
174
|
-
# kw args only
|
|
175
|
-
r2 = t[f1(c=0.0, b=t.c3, a=t.c2)].show(0).to_pandas()['col_0']
|
|
176
|
-
assert np.all(r1 == r2)
|
|
177
|
-
# overriding default args
|
|
178
|
-
r3 = t[f1(d=0.0, c=1.0, b=t.c3, a=t.c2)].show(0).to_pandas()['col_0']
|
|
179
|
-
assert np.all(r2 == r3)
|
|
180
|
-
# overriding default with positional arg
|
|
181
|
-
r4 = t[f1(t.c2, t.c3, 0.0)].show(0).to_pandas()['col_0']
|
|
182
|
-
assert np.all(r3 == r4)
|
|
183
|
-
# overriding default with positional arg and kw arg
|
|
184
|
-
r5 = t[f1(t.c2, t.c3, 1.0, d=0.0)].show(0).to_pandas()['col_0']
|
|
185
|
-
assert np.all(r4 == r5)
|
|
186
|
-
# d is kwarg
|
|
187
|
-
r6 = t[f1(t.c2, d=1.0, b=t.c3)].show(0).to_pandas()['col_0']
|
|
188
|
-
assert np.all(r5 == r6)
|
|
189
|
-
# d is Expr kwarg
|
|
190
|
-
r6 = t[f1(1, d=t.c3, b=t.c3)].show(0).to_pandas()['col_0']
|
|
191
|
-
assert np.all(r5 == r6)
|
|
192
|
-
|
|
193
|
-
# test handling of Nones
|
|
194
|
-
@pxt.udf(
|
|
195
|
-
return_type=IntType(),
|
|
196
|
-
param_types=[IntType(nullable=True), FloatType(nullable=False), FloatType(nullable=True)])
|
|
197
|
-
def f2(a: int, b: float = 0.0, c: float = 1.0) -> float:
|
|
198
|
-
return (0.0 if a is None else a) + b + (0.0 if c is None else c)
|
|
199
|
-
r0 = t[f2(1, t.c3)].show(0).to_pandas()['col_0']
|
|
200
|
-
r1 = t[f2(None, t.c3, 2.0)].show(0).to_pandas()['col_0']
|
|
201
|
-
assert np.all(r0 == r1)
|
|
202
|
-
r2 = t[f2(2, t.c3, None)].show(0).to_pandas()['col_0']
|
|
203
|
-
assert np.all(r1 == r2)
|
|
204
|
-
# kwarg with None
|
|
205
|
-
r3 = t[f2(c=None, a=t.c2)].show(0).to_pandas()['col_0']
|
|
206
|
-
# kwarg with Expr
|
|
207
|
-
r4 = t[f2(c=t.c3, a=None)].show(0).to_pandas()['col_0']
|
|
208
|
-
assert np.all(r3 == r4)
|
|
209
|
-
|
|
210
|
-
with pytest.raises(TypeError) as exc_info:
|
|
211
|
-
_ = t[f1(t.c2, c=0.0)].show(0)
|
|
212
|
-
assert "'b'" in str(exc_info.value)
|
|
213
|
-
with pytest.raises(TypeError) as exc_info:
|
|
214
|
-
_ = t[f1(t.c2)].show(0)
|
|
215
|
-
assert "'b'" in str(exc_info.value)
|
|
216
|
-
with pytest.raises(TypeError) as exc_info:
|
|
217
|
-
_ = t[f1(c=1.0, a=t.c2)].show(0)
|
|
218
|
-
assert "'b'" in str(exc_info.value)
|
|
219
|
-
|
|
220
|
-
# bad default value
|
|
221
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
222
|
-
@pxt.udf(return_type=IntType(), param_types=[IntType(), FloatType(), FloatType()])
|
|
223
|
-
def f1(a: int, b: float, c: str = '') -> float:
|
|
224
|
-
return a + b + c
|
|
225
|
-
assert 'default value' in str(exc_info.value).lower()
|
|
226
|
-
# missing param type
|
|
227
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
228
|
-
@pxt.udf(return_type=IntType(), param_types=[IntType(), FloatType()])
|
|
229
|
-
def f1(a: int, b: float, c: str = '') -> float:
|
|
230
|
-
return a + b + c
|
|
231
|
-
assert 'missing type for parameter c' in str(exc_info.value).lower()
|
|
232
|
-
# bad parameter name
|
|
233
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
234
|
-
@pxt.udf(return_type=IntType(), param_types=[IntType()])
|
|
235
|
-
def f1(group_by: int) -> int:
|
|
236
|
-
return group_by
|
|
237
|
-
assert 'reserved' in str(exc_info.value)
|
|
238
|
-
# bad parameter name
|
|
239
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
240
|
-
@pxt.udf(return_type=IntType(), param_types=[IntType()])
|
|
241
|
-
def f1(order_by: int) -> int:
|
|
242
|
-
return order_by
|
|
243
|
-
assert 'reserved' in str(exc_info.value)
|
|
244
|
-
|
|
245
|
-
def test_expr_udf(self, test_tbl: catalog.Table) -> None:
|
|
246
|
-
t = test_tbl
|
|
247
|
-
@pxt.expr_udf
|
|
248
|
-
def times2(x: int) -> int:
|
|
249
|
-
return x + x
|
|
250
|
-
res1 = t.select(out=times2(t.c2)).order_by(t.c2).collect()
|
|
251
|
-
res2 = t.select(t.c2 * 2).order_by(t.c2).collect()
|
|
252
|
-
assert_resultset_eq(res1, res2)
|
|
253
|
-
|
|
254
|
-
with pytest.raises(TypeError) as exc_info:
|
|
255
|
-
_ = t.select(times2(y=t.c2)).collect()
|
|
256
|
-
assert 'missing a required argument' in str(exc_info.value).lower()
|
|
257
|
-
|
|
258
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
259
|
-
# parameter types cannot be inferred
|
|
260
|
-
@pxt.expr_udf
|
|
261
|
-
def add1(x, y) -> int:
|
|
262
|
-
return x + y
|
|
263
|
-
assert 'cannot infer pixeltable type' in str(exc_info.value).lower()
|
|
264
|
-
|
|
265
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
266
|
-
# return type cannot be inferred
|
|
267
|
-
@pxt.expr_udf
|
|
268
|
-
def add1(x: int, y: int):
|
|
269
|
-
return x + y
|
|
270
|
-
assert 'cannot infer pixeltable return type' in str(exc_info.value).lower()
|
|
271
|
-
|
|
272
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
273
|
-
# missing param types
|
|
274
|
-
@pxt.expr_udf(param_types=[IntType()])
|
|
275
|
-
def add1(x, y) -> int:
|
|
276
|
-
return x + y
|
|
277
|
-
assert 'missing type for parameter y' in str(exc_info.value).lower()
|
|
278
|
-
|
|
279
|
-
with pytest.raises(TypeError) as exc_info:
|
|
280
|
-
# signature has correct parameter kind
|
|
281
|
-
@pxt.expr_udf
|
|
282
|
-
def add1(*, x: int) -> int:
|
|
283
|
-
return x + 1
|
|
284
|
-
_ = t.select(add1(t.c2)).collect()
|
|
285
|
-
assert 'takes 0 positional arguments' in str(exc_info.value).lower()
|
|
286
|
-
|
|
287
|
-
@pxt.expr_udf
|
|
288
|
-
def add2(x: int, y: int = 1) -> int:
|
|
289
|
-
return x + y
|
|
290
|
-
res1 = t.select(out=add2(t.c2)).order_by(t.c2).collect()
|
|
291
|
-
|
|
292
|
-
# Test that various invalid udf definitions generate
|
|
293
|
-
# correct error messages.
|
|
294
|
-
def test_invalid_udfs(self):
|
|
295
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
296
|
-
@pxt.udf
|
|
297
|
-
def udf1(name: Batch[str]) -> str:
|
|
298
|
-
return ''
|
|
299
|
-
assert 'batched parameters in udf, but no `batch_size` given' in str(exc_info.value).lower()
|
|
300
|
-
|
|
301
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
302
|
-
@pxt.udf(batch_size=32)
|
|
303
|
-
def udf2(name: Batch[str]) -> str:
|
|
304
|
-
return ''
|
|
305
|
-
assert 'batch_size is specified; Python return type must be a `Batch`' in str(exc_info.value)
|
|
306
|
-
|
|
307
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
308
|
-
@pxt.udf
|
|
309
|
-
def udf3(name: str) -> Optional[np.ndarray]:
|
|
310
|
-
return None
|
|
311
|
-
assert 'cannot infer pixeltable return type' in str(exc_info.value).lower()
|
|
312
|
-
|
|
313
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
314
|
-
@pxt.udf
|
|
315
|
-
def udf4(array: np.ndarray) -> str:
|
|
316
|
-
return ''
|
|
317
|
-
assert 'cannot infer pixeltable type for parameter array' in str(exc_info.value).lower()
|
|
318
|
-
|
|
319
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
320
|
-
@pxt.udf
|
|
321
|
-
def udf5(name: str, untyped) -> str:
|
|
322
|
-
return ''
|
|
323
|
-
assert 'cannot infer pixeltable type for parameter untyped' in str(exc_info.value).lower()
|
|
324
|
-
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
import glob
|
|
2
|
-
import logging
|
|
3
|
-
import os
|
|
4
|
-
import subprocess
|
|
5
|
-
|
|
6
|
-
import pgserver
|
|
7
|
-
import pytest
|
|
8
|
-
|
|
9
|
-
import pixeltable as pxt
|
|
10
|
-
from pixeltable.env import Env
|
|
11
|
-
from pixeltable.tests.conftest import clean_db
|
|
12
|
-
|
|
13
|
-
_logger = logging.getLogger('pixeltable')
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class TestMigration:
|
|
17
|
-
|
|
18
|
-
@pytest.mark.skip(reason='Suspended')
|
|
19
|
-
def test_db_migration(self, init_env) -> None:
|
|
20
|
-
env = Env.get()
|
|
21
|
-
pg_package_dir = os.path.dirname(pgserver.__file__)
|
|
22
|
-
pg_restore_binary = f'{pg_package_dir}/pginstall/bin/pg_restore'
|
|
23
|
-
_logger.info(f'Using pg_restore binary at: {pg_restore_binary}')
|
|
24
|
-
dump_files = glob.glob('pixeltable/tests/data/dbdumps/*.dump.gz')
|
|
25
|
-
dump_files.sort()
|
|
26
|
-
for dump_file in dump_files:
|
|
27
|
-
_logger.info(f'Testing migration from DB dump {dump_file}.')
|
|
28
|
-
_logger.info(f'DB URL: {env.db_url}')
|
|
29
|
-
clean_db(restore_tables=False)
|
|
30
|
-
with open(dump_file, 'rb') as dump:
|
|
31
|
-
gunzip_process = subprocess.Popen(
|
|
32
|
-
["gunzip", "-c"],
|
|
33
|
-
stdin=dump,
|
|
34
|
-
stdout=subprocess.PIPE
|
|
35
|
-
)
|
|
36
|
-
subprocess.run(
|
|
37
|
-
[pg_restore_binary, '-d', env.db_url, '-U', 'postgres'],
|
|
38
|
-
stdin=gunzip_process.stdout,
|
|
39
|
-
check=True
|
|
40
|
-
)
|
|
41
|
-
# TODO(aaron-siegel) This will test that the migration succeeds without raising any exceptions.
|
|
42
|
-
# We should also add some assertions to sanity-check the outcome.
|
|
43
|
-
_ = pxt.Client()
|
pixeltable/tests/test_nos.py
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
|
|
3
|
-
import pixeltable as pxt
|
|
4
|
-
from pixeltable.iterators import FrameIterator
|
|
5
|
-
from pixeltable.tests.utils import get_video_files, skip_test_if_not_installed
|
|
6
|
-
from pixeltable.type_system import ImageType, VideoType
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class TestNOS:
|
|
10
|
-
def test_basic(self, test_client: pxt.Client) -> None:
|
|
11
|
-
skip_test_if_not_installed('nos')
|
|
12
|
-
cl = test_client
|
|
13
|
-
video_t = cl.create_table('video_tbl', {'video': VideoType()})
|
|
14
|
-
# create frame view
|
|
15
|
-
args = {'video': video_t.video, 'fps': 1}
|
|
16
|
-
v = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
|
|
17
|
-
v.add_column(transform1=v.frame.rotate(30), stored=False)
|
|
18
|
-
from pixeltable.functions.nos.object_detection_2d import \
|
|
19
|
-
torchvision_fasterrcnn_mobilenet_v3_large_320_fpn as fasterrcnn
|
|
20
|
-
v.add_column(detections=fasterrcnn(v.transform1))
|
|
21
|
-
from pixeltable.functions.nos.image_embedding import openai_clip
|
|
22
|
-
v.add_column(embed=openai_clip(v.transform1.resize([224, 224])))
|
|
23
|
-
# add a stored column that isn't referenced in nos calls
|
|
24
|
-
v.add_column(transform2=v.frame.rotate(60), stored=True)
|
|
25
|
-
|
|
26
|
-
status = video_t.insert(video=get_video_files()[0])
|
|
27
|
-
pass
|
|
28
|
-
|
|
29
|
-
def test_exceptions(self, test_client: pxt.Client) -> None:
|
|
30
|
-
skip_test_if_not_installed('nos')
|
|
31
|
-
cl = test_client
|
|
32
|
-
video_t = cl.create_table('video_tbl', {'video': VideoType()})
|
|
33
|
-
# create frame view
|
|
34
|
-
args = {'video': video_t.video, 'fps': 1}
|
|
35
|
-
v = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
|
|
36
|
-
video_t.insert(video=get_video_files()[0])
|
|
37
|
-
|
|
38
|
-
v.add_column(frame_s=v.frame.resize([640, 480]))
|
|
39
|
-
# 'rotated' has exceptions
|
|
40
|
-
v.add_column(rotated=lambda frame_s, frame_idx: frame_s.rotate(int(360 / frame_idx)), type=ImageType())
|
|
41
|
-
from pixeltable.functions.nos.object_detection_2d import yolox_medium
|
|
42
|
-
v.add_column(detections=yolox_medium(v.rotated), stored=True)
|
|
43
|
-
assert v.where(v.detections.errortype != None).count() == 1
|
|
44
|
-
|
|
45
|
-
@pytest.mark.skip(reason='too slow')
|
|
46
|
-
def test_sd(self, test_client: pxt.Client) -> None:
|
|
47
|
-
skip_test_if_not_installed('nos')
|
|
48
|
-
"""Test model that mixes batched with scalar parameters"""
|
|
49
|
-
t = test_client.create_table('sd_test', {'prompt': pxt.StringType()})
|
|
50
|
-
t.insert(prompt='cat on a sofa')
|
|
51
|
-
from pixeltable.functions.nos.image_generation import stabilityai_stable_diffusion_2 as sd2
|
|
52
|
-
t.add_column(img=sd2(t.prompt, 1, 512, 512), stored=True)
|
|
53
|
-
img = t[t.img].show(1)[0, 0]
|
|
54
|
-
assert img.size == (512, 512)
|
|
@@ -1,208 +0,0 @@
|
|
|
1
|
-
from typing import Any, Dict
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pytest
|
|
5
|
-
|
|
6
|
-
import pixeltable as pxt
|
|
7
|
-
import pixeltable.exceptions as excs
|
|
8
|
-
from pixeltable.tests.utils import create_test_tbl, assert_resultset_eq
|
|
9
|
-
from pixeltable.type_system import IntType
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class TestSnapshot:
|
|
13
|
-
def run_basic_test(
|
|
14
|
-
self, cl: pxt.Client, tbl: pxt.Table, snap: pxt.Table, extra_items: Dict[str, Any], filter: Any,
|
|
15
|
-
reload_md: bool
|
|
16
|
-
) -> None:
|
|
17
|
-
tbl_path, snap_path = cl.get_path(tbl), cl.get_path(snap)
|
|
18
|
-
# run the initial query against the base table here, before reloading, otherwise the filter breaks
|
|
19
|
-
tbl_select_list = [tbl[col_name] for col_name in tbl.column_names()]
|
|
20
|
-
tbl_select_list.extend([value_expr for _, value_expr in extra_items.items()])
|
|
21
|
-
orig_resultset = tbl.select(*tbl_select_list).where(filter).order_by(tbl.c2).collect()
|
|
22
|
-
|
|
23
|
-
if reload_md:
|
|
24
|
-
# reload md
|
|
25
|
-
cl = pxt.Client(reload=True)
|
|
26
|
-
tbl = cl.get_table(tbl_path)
|
|
27
|
-
snap = cl.get_table(snap_path)
|
|
28
|
-
|
|
29
|
-
# view select list: base cols followed by view cols
|
|
30
|
-
snap_select_list = [snap[col_name] for col_name in snap.column_names()[len(extra_items):]]
|
|
31
|
-
snap_select_list.extend([snap[col_name] for col_name in extra_items.keys()])
|
|
32
|
-
snap_query = snap.select(*snap_select_list).order_by(snap.c2)
|
|
33
|
-
r1 = list(orig_resultset)
|
|
34
|
-
r2 = list(snap_query.collect())
|
|
35
|
-
assert_resultset_eq(snap_query.collect(), orig_resultset)
|
|
36
|
-
|
|
37
|
-
# adding data to a base table doesn't change the snapshot
|
|
38
|
-
rows = list(tbl.select(tbl.c1, tbl.c1n, tbl.c2, tbl.c3, tbl.c4, tbl.c5, tbl.c6, tbl.c7).collect())
|
|
39
|
-
status = tbl.insert(rows)
|
|
40
|
-
assert status.num_rows == len(rows)
|
|
41
|
-
assert_resultset_eq(snap_query.collect(), orig_resultset)
|
|
42
|
-
|
|
43
|
-
# update() doesn't affect the view
|
|
44
|
-
status = tbl.update({'c3': tbl.c3 + 1.0})
|
|
45
|
-
assert status.num_rows == tbl.count()
|
|
46
|
-
assert_resultset_eq(snap_query.collect(), orig_resultset)
|
|
47
|
-
|
|
48
|
-
# delete() doesn't affect the view
|
|
49
|
-
num_tbl_rows = tbl.count()
|
|
50
|
-
status = tbl.delete()
|
|
51
|
-
assert status.num_rows == num_tbl_rows
|
|
52
|
-
assert_resultset_eq(snap_query.collect(), orig_resultset)
|
|
53
|
-
|
|
54
|
-
tbl.revert() # undo delete()
|
|
55
|
-
tbl.revert() # undo update()
|
|
56
|
-
tbl.revert() # undo insert()
|
|
57
|
-
# can't revert a version referenced by a snapshot
|
|
58
|
-
with pytest.raises(excs.Error) as excinfo:
|
|
59
|
-
tbl.revert()
|
|
60
|
-
assert 'version is needed' in str(excinfo.value)
|
|
61
|
-
|
|
62
|
-
# can't drop a table with snapshots
|
|
63
|
-
with pytest.raises(excs.Error) as excinfo:
|
|
64
|
-
cl.drop_table(tbl_path)
|
|
65
|
-
assert snap_path in str(excinfo.value)
|
|
66
|
-
|
|
67
|
-
cl.drop_table(snap_path)
|
|
68
|
-
cl.drop_table(tbl_path)
|
|
69
|
-
|
|
70
|
-
def test_basic(self, test_client: pxt.Client) -> None:
|
|
71
|
-
cl = test_client
|
|
72
|
-
cl.create_dir('main')
|
|
73
|
-
cl.create_dir('snap')
|
|
74
|
-
tbl_path = 'main.tbl1'
|
|
75
|
-
snap_path = 'snap.snap1'
|
|
76
|
-
|
|
77
|
-
for reload_md in [False, True]:
|
|
78
|
-
for has_filter in [False, True]:
|
|
79
|
-
for has_cols in [False, True]:
|
|
80
|
-
cl = pxt.Client(reload=True)
|
|
81
|
-
tbl = create_test_tbl(name=tbl_path, client=cl)
|
|
82
|
-
schema = {
|
|
83
|
-
'v1': tbl.c3 * 2.0,
|
|
84
|
-
# include a lambda to make sure that is handled correctly
|
|
85
|
-
'v2': {'value': lambda c3: c3 * 2.0, 'type': pxt.FloatType()}
|
|
86
|
-
} if has_cols else {}
|
|
87
|
-
extra_items = {'v1': tbl.c3 * 2.0, 'v2': tbl.c3 * 2.0} if has_cols else {}
|
|
88
|
-
filter = tbl.c2 < 10 if has_filter else None
|
|
89
|
-
snap = cl.create_view(snap_path, tbl, schema=schema, filter=filter, is_snapshot=True)
|
|
90
|
-
self.run_basic_test(cl, tbl, snap, extra_items=extra_items, filter=filter, reload_md=reload_md)
|
|
91
|
-
|
|
92
|
-
def test_views_of_snapshots(self, test_client: pxt.Client) -> None:
|
|
93
|
-
cl = test_client
|
|
94
|
-
t = cl.create_table('tbl', {'a': IntType()})
|
|
95
|
-
rows = [{'a': 1}, {'a': 2}, {'a': 3}]
|
|
96
|
-
status = t.insert(rows)
|
|
97
|
-
assert status.num_rows == len(rows)
|
|
98
|
-
assert status.num_excs == 0
|
|
99
|
-
s1 = cl.create_view('s1', t, is_snapshot=True)
|
|
100
|
-
v1 = cl.create_view('v1', s1, is_snapshot=False)
|
|
101
|
-
s2 = cl.create_view('s2', v1, is_snapshot=True)
|
|
102
|
-
v2 = cl.create_view('v2', s2, is_snapshot=False)
|
|
103
|
-
|
|
104
|
-
def verify(s1: pxt.Table, s2: pxt.Table, v1: pxt.Table, v2: pxt.Table) -> None:
|
|
105
|
-
assert s1.count() == len(rows)
|
|
106
|
-
assert v1.count() == len(rows)
|
|
107
|
-
assert s2.count() == len(rows)
|
|
108
|
-
assert v2.count() == len(rows)
|
|
109
|
-
|
|
110
|
-
verify(s1, s2, v1, v2)
|
|
111
|
-
|
|
112
|
-
status = t.insert(rows)
|
|
113
|
-
assert status.num_rows == len(rows)
|
|
114
|
-
assert status.num_excs == 0
|
|
115
|
-
verify(s1, s2, v1, v2)
|
|
116
|
-
|
|
117
|
-
cl = pxt.Client(reload=True)
|
|
118
|
-
s1 = cl.get_table('s1')
|
|
119
|
-
s2 = cl.get_table('s2')
|
|
120
|
-
v1 = cl.get_table('v1')
|
|
121
|
-
v2 = cl.get_table('v2')
|
|
122
|
-
verify(s1, s2, v1, v2)
|
|
123
|
-
|
|
124
|
-
def test_snapshot_of_view_chain(self, test_client: pxt.Client) -> None:
|
|
125
|
-
cl = test_client
|
|
126
|
-
t = cl.create_table('tbl', {'a': IntType()})
|
|
127
|
-
rows = [{'a': 1}, {'a': 2}, {'a': 3}]
|
|
128
|
-
status = t.insert(rows)
|
|
129
|
-
assert status.num_rows == len(rows)
|
|
130
|
-
assert status.num_excs == 0
|
|
131
|
-
v1 = cl.create_view('v1', t, is_snapshot=False)
|
|
132
|
-
v2 = cl.create_view('v2', v1, is_snapshot=False)
|
|
133
|
-
s = cl.create_view('s', v2, is_snapshot=True)
|
|
134
|
-
|
|
135
|
-
def verify(v1: pxt.Table, v2: pxt.Table, s: pxt.Table) -> None:
|
|
136
|
-
assert v1.count() == t.count()
|
|
137
|
-
assert v2.count() == t.count()
|
|
138
|
-
assert s.count() == len(rows)
|
|
139
|
-
|
|
140
|
-
verify(v1, v2, s)
|
|
141
|
-
|
|
142
|
-
status = t.insert(rows)
|
|
143
|
-
assert status.num_rows == len(rows) * 3 # we also updated 2 views
|
|
144
|
-
assert status.num_excs == 0
|
|
145
|
-
verify(v1, v2, s)
|
|
146
|
-
|
|
147
|
-
cl = pxt.Client(reload=True)
|
|
148
|
-
v1 = cl.get_table('v1')
|
|
149
|
-
v2 = cl.get_table('v2')
|
|
150
|
-
s = cl.get_table('s')
|
|
151
|
-
verify(v1, v2, s)
|
|
152
|
-
|
|
153
|
-
def test_multiple_snapshot_paths(self, test_client: pxt.Client) -> None:
|
|
154
|
-
cl = test_client
|
|
155
|
-
t = create_test_tbl(cl)
|
|
156
|
-
c4 = t.select(t.c4).order_by(t.c2).collect().to_pandas()['c4']
|
|
157
|
-
orig_c3 = t.select(t.c3).collect().to_pandas()['c3']
|
|
158
|
-
v = cl.create_view('v', base=t, schema={'v1': t.c3 + 1})
|
|
159
|
-
s1 = cl.create_view('s1', v, is_snapshot=True)
|
|
160
|
-
t.drop_column('c4')
|
|
161
|
-
# s2 references the same view version as s1, but a different version of t (due to a schema change)
|
|
162
|
-
s2 = cl.create_view('s2', v, is_snapshot=True)
|
|
163
|
-
t.update({'c6': {'a': 17}})
|
|
164
|
-
# s3 references the same view version as s2, but a different version of t (due to a data change)
|
|
165
|
-
s3 = cl.create_view('s3', v, is_snapshot=True)
|
|
166
|
-
t.update({'c3': t.c3 + 1})
|
|
167
|
-
# s4 references different versions of t and v
|
|
168
|
-
s4 = cl.create_view('s4', v, is_snapshot=True)
|
|
169
|
-
|
|
170
|
-
def validate(t: pxt.Table, v: pxt.Table, s1: pxt.Table, s2: pxt.Table, s3: pxt.Table, s4: pxt.Table) -> None:
|
|
171
|
-
# c4 is only visible in s1
|
|
172
|
-
assert np.all(s1.select(s1.c4).collect().to_pandas()['c4'] == c4)
|
|
173
|
-
with pytest.raises(AttributeError):
|
|
174
|
-
_ = t.select(t.c4).collect()
|
|
175
|
-
with pytest.raises(AttributeError):
|
|
176
|
-
_ = v.select(v.c4).collect()
|
|
177
|
-
with pytest.raises(AttributeError):
|
|
178
|
-
_ = s2.select(s2.c4).collect()
|
|
179
|
-
with pytest.raises(AttributeError):
|
|
180
|
-
_ = s3.select(s3.c4).collect()
|
|
181
|
-
with pytest.raises(AttributeError):
|
|
182
|
-
_ = s4.select(s4.c4).collect()
|
|
183
|
-
|
|
184
|
-
# c3
|
|
185
|
-
assert np.all(t.select(t.c3).order_by(t.c2).collect().to_pandas()['c3'] == orig_c3 + 1)
|
|
186
|
-
assert np.all(s1.select(s1.c3).order_by(s1.c2).collect().to_pandas()['c3'] == orig_c3)
|
|
187
|
-
assert np.all(s2.select(s2.c3).order_by(s2.c2).collect().to_pandas()['c3'] == orig_c3)
|
|
188
|
-
assert np.all(s3.select(s3.c3).order_by(s3.c2).collect().to_pandas()['c3'] == orig_c3)
|
|
189
|
-
assert np.all(s4.select(s4.c3).order_by(s4.c2).collect().to_pandas()['c3'] == orig_c3 + 1)
|
|
190
|
-
|
|
191
|
-
# v1
|
|
192
|
-
assert np.all(
|
|
193
|
-
v.select(v.v1).order_by(v.c2).collect().to_pandas()['v1'] == \
|
|
194
|
-
t.select(t.c3).order_by(t.c2).collect().to_pandas()['c3'] + 1)
|
|
195
|
-
assert np.all(s1.select(s1.v1).order_by(s1.c2).collect().to_pandas()['v1'] == orig_c3 + 1)
|
|
196
|
-
assert np.all(s2.select(s2.v1).order_by(s2.c2).collect().to_pandas()['v1'] == orig_c3 + 1)
|
|
197
|
-
assert np.all(s3.select(s3.v1).order_by(s3.c2).collect().to_pandas()['v1'] == orig_c3 + 1)
|
|
198
|
-
assert np.all(
|
|
199
|
-
s4.select(s4.v1).order_by(s4.c2).collect().to_pandas()['v1'] == \
|
|
200
|
-
t.select(t.c3).order_by(t.c2).collect().to_pandas()['c3'] + 1)
|
|
201
|
-
|
|
202
|
-
validate(t, v, s1, s2, s3, s4)
|
|
203
|
-
|
|
204
|
-
# make sure it works after metadata reload
|
|
205
|
-
cl = pxt.Client(reload=True)
|
|
206
|
-
t, v = cl.get_table('test_tbl'), cl.get_table('v')
|
|
207
|
-
s1, s2, s3, s4 = cl.get_table('s1'), cl.get_table('s2'), cl.get_table('s3'), cl.get_table('s4')
|
|
208
|
-
validate(t, v, s1, s2, s3, s4)
|