pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +20 -9
- pixeltable/__version__.py +3 -0
- pixeltable/catalog/column.py +23 -7
- pixeltable/catalog/insertable_table.py +32 -19
- pixeltable/catalog/table.py +210 -20
- pixeltable/catalog/table_version.py +272 -111
- pixeltable/catalog/table_version_path.py +6 -1
- pixeltable/dataframe.py +184 -110
- pixeltable/datatransfer/__init__.py +1 -0
- pixeltable/datatransfer/label_studio.py +526 -0
- pixeltable/datatransfer/remote.py +113 -0
- pixeltable/env.py +213 -79
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/data_row_batch.py +6 -7
- pixeltable/exec/expr_eval_node.py +28 -28
- pixeltable/exec/sql_scan_node.py +7 -6
- pixeltable/exprs/__init__.py +4 -3
- pixeltable/exprs/column_ref.py +11 -2
- pixeltable/exprs/comparison.py +39 -1
- pixeltable/exprs/data_row.py +7 -0
- pixeltable/exprs/expr.py +26 -19
- pixeltable/exprs/function_call.py +17 -18
- pixeltable/exprs/globals.py +14 -2
- pixeltable/exprs/image_member_access.py +9 -28
- pixeltable/exprs/in_predicate.py +96 -0
- pixeltable/exprs/inline_array.py +13 -11
- pixeltable/exprs/inline_dict.py +15 -13
- pixeltable/exprs/row_builder.py +7 -1
- pixeltable/exprs/similarity_expr.py +67 -0
- pixeltable/ext/functions/whisperx.py +30 -0
- pixeltable/ext/functions/yolox.py +16 -0
- pixeltable/func/__init__.py +0 -2
- pixeltable/func/aggregate_function.py +5 -2
- pixeltable/func/callable_function.py +57 -13
- pixeltable/func/expr_template_function.py +14 -3
- pixeltable/func/function.py +35 -4
- pixeltable/func/signature.py +5 -15
- pixeltable/func/udf.py +8 -12
- pixeltable/functions/fireworks.py +9 -4
- pixeltable/functions/huggingface.py +48 -5
- pixeltable/functions/openai.py +49 -11
- pixeltable/functions/pil/image.py +61 -64
- pixeltable/functions/together.py +32 -6
- pixeltable/functions/util.py +0 -43
- pixeltable/functions/video.py +46 -8
- pixeltable/globals.py +443 -0
- pixeltable/index/__init__.py +1 -0
- pixeltable/index/base.py +9 -2
- pixeltable/index/btree.py +54 -0
- pixeltable/index/embedding_index.py +91 -15
- pixeltable/io/__init__.py +4 -0
- pixeltable/io/globals.py +59 -0
- pixeltable/{utils → io}/hf_datasets.py +48 -17
- pixeltable/io/pandas.py +148 -0
- pixeltable/{utils → io}/parquet.py +58 -33
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/iterators/base.py +8 -4
- pixeltable/iterators/document.py +225 -93
- pixeltable/iterators/video.py +16 -9
- pixeltable/metadata/__init__.py +8 -4
- pixeltable/metadata/converters/convert_12.py +3 -0
- pixeltable/metadata/converters/convert_13.py +41 -0
- pixeltable/metadata/converters/convert_14.py +13 -0
- pixeltable/metadata/converters/convert_15.py +29 -0
- pixeltable/metadata/converters/util.py +63 -0
- pixeltable/metadata/schema.py +12 -6
- pixeltable/plan.py +11 -24
- pixeltable/store.py +16 -23
- pixeltable/tool/create_test_db_dump.py +49 -14
- pixeltable/type_system.py +27 -58
- pixeltable/utils/coco.py +94 -0
- pixeltable/utils/documents.py +42 -12
- pixeltable/utils/http_server.py +70 -0
- pixeltable-0.2.7.dist-info/METADATA +137 -0
- pixeltable-0.2.7.dist-info/RECORD +126 -0
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
- pixeltable/client.py +0 -600
- pixeltable/exprs/image_similarity_predicate.py +0 -58
- pixeltable/func/batched_function.py +0 -53
- pixeltable/func/nos_function.py +0 -202
- pixeltable/tests/conftest.py +0 -171
- pixeltable/tests/ext/test_yolox.py +0 -21
- pixeltable/tests/functions/test_fireworks.py +0 -43
- pixeltable/tests/functions/test_functions.py +0 -60
- pixeltable/tests/functions/test_huggingface.py +0 -158
- pixeltable/tests/functions/test_openai.py +0 -162
- pixeltable/tests/functions/test_together.py +0 -112
- pixeltable/tests/test_audio.py +0 -65
- pixeltable/tests/test_catalog.py +0 -27
- pixeltable/tests/test_client.py +0 -21
- pixeltable/tests/test_component_view.py +0 -379
- pixeltable/tests/test_dataframe.py +0 -440
- pixeltable/tests/test_dirs.py +0 -107
- pixeltable/tests/test_document.py +0 -120
- pixeltable/tests/test_exprs.py +0 -802
- pixeltable/tests/test_function.py +0 -332
- pixeltable/tests/test_index.py +0 -138
- pixeltable/tests/test_migration.py +0 -44
- pixeltable/tests/test_nos.py +0 -54
- pixeltable/tests/test_snapshot.py +0 -231
- pixeltable/tests/test_table.py +0 -1343
- pixeltable/tests/test_transactional_directory.py +0 -42
- pixeltable/tests/test_types.py +0 -52
- pixeltable/tests/test_video.py +0 -159
- pixeltable/tests/test_view.py +0 -535
- pixeltable/tests/utils.py +0 -442
- pixeltable/utils/clip.py +0 -18
- pixeltable-0.2.5.dist-info/METADATA +0 -128
- pixeltable-0.2.5.dist-info/RECORD +0 -139
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
|
@@ -1,332 +0,0 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pytest
|
|
5
|
-
|
|
6
|
-
import pixeltable as pxt
|
|
7
|
-
import pixeltable.exceptions as excs
|
|
8
|
-
from pixeltable import catalog
|
|
9
|
-
from pixeltable.func import Function, FunctionRegistry, Batch
|
|
10
|
-
from pixeltable.type_system import IntType, FloatType
|
|
11
|
-
from pixeltable.tests.utils import assert_resultset_eq
|
|
12
|
-
import pixeltable.func as func
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def dummy_fn(i: int) -> int:
|
|
16
|
-
return i
|
|
17
|
-
|
|
18
|
-
class TestFunction:
|
|
19
|
-
@pxt.udf(return_type=IntType(), param_types=[IntType()])
|
|
20
|
-
def func(x: int) -> int:
|
|
21
|
-
return x + 1
|
|
22
|
-
|
|
23
|
-
@pxt.uda(value_type=IntType(), update_types=[IntType()])
|
|
24
|
-
class agg:
|
|
25
|
-
def __init__(self):
|
|
26
|
-
self.sum = 0
|
|
27
|
-
def update(self, val: int) -> None:
|
|
28
|
-
if val is not None:
|
|
29
|
-
self.sum += val
|
|
30
|
-
def value(self) -> int:
|
|
31
|
-
return self.sum
|
|
32
|
-
|
|
33
|
-
def test_serialize_anonymous(self, init_env) -> None:
|
|
34
|
-
d = self.func.as_dict()
|
|
35
|
-
FunctionRegistry.get().clear_cache()
|
|
36
|
-
deserialized = Function.from_dict(d)
|
|
37
|
-
# TODO: add Function.exec() and then use that
|
|
38
|
-
assert deserialized.py_fn(1) == 2
|
|
39
|
-
|
|
40
|
-
@pytest.mark.skip(reason='deprecated')
|
|
41
|
-
def test_create(self, test_client: pxt.Client) -> None:
|
|
42
|
-
cl = test_client
|
|
43
|
-
cl.create_function('test_fn', self.func)
|
|
44
|
-
assert self.func.md.fqn == 'test_fn'
|
|
45
|
-
FunctionRegistry.get().clear_cache()
|
|
46
|
-
cl = pxt.Client(reload=True)
|
|
47
|
-
_ = cl.list_functions()
|
|
48
|
-
fn2 = cl.get_function('test_fn')
|
|
49
|
-
assert fn2.md.fqn == 'test_fn'
|
|
50
|
-
assert fn2.py_fn(1) == 2
|
|
51
|
-
|
|
52
|
-
with pytest.raises(excs.Error):
|
|
53
|
-
cl.create_function('test_fn', self.func)
|
|
54
|
-
with pytest.raises(excs.Error):
|
|
55
|
-
cl.create_function('dir1.test_fn', self.func)
|
|
56
|
-
with pytest.raises(excs.Error):
|
|
57
|
-
library_fn = make_library_function(IntType(), [IntType()], __name__, 'dummy_fn')
|
|
58
|
-
cl.create_function('library_fn', library_fn)
|
|
59
|
-
|
|
60
|
-
@pytest.mark.skip(reason='deprecated')
|
|
61
|
-
def test_update(self, test_client: pxt.Client, test_tbl: catalog.Table) -> None:
|
|
62
|
-
cl = test_client
|
|
63
|
-
t = test_tbl
|
|
64
|
-
cl.create_function('test_fn', self.func)
|
|
65
|
-
res1 = t[self.func(t.c2)].show(0).to_pandas()
|
|
66
|
-
|
|
67
|
-
# load function from db and make sure it computes the same thing as before
|
|
68
|
-
FunctionRegistry.get().clear_cache()
|
|
69
|
-
cl = pxt.Client(reload=True)
|
|
70
|
-
fn = cl.get_function('test_fn')
|
|
71
|
-
res2 = t[fn(t.c2)].show(0).to_pandas()
|
|
72
|
-
assert res1.col_0.equals(res2.col_0)
|
|
73
|
-
fn.py_fn = lambda x: x + 2
|
|
74
|
-
cl.update_function('test_fn', fn)
|
|
75
|
-
assert self.func.md.fqn == fn.md.fqn # fqn doesn't change
|
|
76
|
-
|
|
77
|
-
FunctionRegistry.get().clear_cache()
|
|
78
|
-
cl = pxt.Client(reload=True)
|
|
79
|
-
fn = cl.get_function('test_fn')
|
|
80
|
-
assert self.func.md.fqn == fn.md.fqn # fqn doesn't change
|
|
81
|
-
res3 = t[fn(t.c2)].show(0).to_pandas()
|
|
82
|
-
assert (res2.col_0 + 1).equals(res3.col_0)
|
|
83
|
-
|
|
84
|
-
# signature changes
|
|
85
|
-
with pytest.raises(excs.Error):
|
|
86
|
-
cl.update_function('test_fn', make_function(FloatType(), [IntType()], fn.py_fn))
|
|
87
|
-
with pytest.raises(excs.Error):
|
|
88
|
-
cl.update_function('test_fn', make_function(IntType(), [FloatType()], fn.py_fn))
|
|
89
|
-
with pytest.raises(excs.Error):
|
|
90
|
-
cl.update_function('test_fn', self.agg)
|
|
91
|
-
|
|
92
|
-
@pytest.mark.skip(reason='deprecated')
|
|
93
|
-
def test_move(self, test_client: pxt.Client) -> None:
|
|
94
|
-
cl = test_client
|
|
95
|
-
cl.create_function('test_fn', self.func)
|
|
96
|
-
|
|
97
|
-
FunctionRegistry.get().clear_cache()
|
|
98
|
-
cl = pxt.Client(reload=True)
|
|
99
|
-
with pytest.raises(excs.Error):
|
|
100
|
-
cl.move('test_fn2', 'test_fn')
|
|
101
|
-
cl.move('test_fn', 'test_fn2')
|
|
102
|
-
func = cl.get_function('test_fn2')
|
|
103
|
-
assert func.py_fn(1) == 2
|
|
104
|
-
assert func.md.fqn == 'test_fn2'
|
|
105
|
-
|
|
106
|
-
with pytest.raises(excs.Error):
|
|
107
|
-
_ = cl.get_function('test_fn')
|
|
108
|
-
|
|
109
|
-
# move function between directories
|
|
110
|
-
cl.create_dir('functions')
|
|
111
|
-
cl.create_dir('functions2')
|
|
112
|
-
cl.create_function('functions.func1', self.func)
|
|
113
|
-
with pytest.raises(excs.Error):
|
|
114
|
-
cl.move('functions2.func1', 'functions.func1')
|
|
115
|
-
cl.move('functions.func1', 'functions2.func1')
|
|
116
|
-
func = cl.get_function('functions2.func1')
|
|
117
|
-
assert func.md.fqn == 'functions2.func1'
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
FunctionRegistry.get().clear_cache()
|
|
121
|
-
cl = pxt.Client(reload=True)
|
|
122
|
-
func = cl.get_function('functions2.func1')
|
|
123
|
-
assert func.py_fn(1) == 2
|
|
124
|
-
assert func.md.fqn == 'functions2.func1'
|
|
125
|
-
with pytest.raises(excs.Error):
|
|
126
|
-
_ = cl.get_function('functions.func1')
|
|
127
|
-
|
|
128
|
-
@pytest.mark.skip(reason='deprecated')
|
|
129
|
-
def test_drop(self, test_client: pxt.Client) -> None:
|
|
130
|
-
cl = test_client
|
|
131
|
-
cl.create_function('test_fn', self.func)
|
|
132
|
-
FunctionRegistry.get().clear_cache()
|
|
133
|
-
cl = pxt.Client(reload=True)
|
|
134
|
-
cl.drop_function('test_fn')
|
|
135
|
-
|
|
136
|
-
with pytest.raises(excs.Error):
|
|
137
|
-
_ = cl.get_function('test_fn')
|
|
138
|
-
|
|
139
|
-
def test_list(self, test_client: pxt.Client) -> None:
|
|
140
|
-
_ = FunctionRegistry.get().list_functions()
|
|
141
|
-
print(_)
|
|
142
|
-
|
|
143
|
-
def test_stored_udf(self, test_client: pxt.Client) -> None:
|
|
144
|
-
cl = test_client
|
|
145
|
-
t = cl.create_table('test', {'c1': pxt.IntType(), 'c2': pxt.FloatType()})
|
|
146
|
-
rows = [{'c1': i, 'c2': i + 0.5} for i in range(100)]
|
|
147
|
-
status = t.insert(rows)
|
|
148
|
-
assert status.num_rows == len(rows)
|
|
149
|
-
assert status.num_excs == 0
|
|
150
|
-
|
|
151
|
-
@pxt.udf(_force_stored=True)
|
|
152
|
-
def f1(a: int, b: float) -> float:
|
|
153
|
-
return a + b
|
|
154
|
-
t['f1'] = f1(t.c1, t.c2)
|
|
155
|
-
|
|
156
|
-
func.FunctionRegistry.get().clear_cache()
|
|
157
|
-
cl = pxt.Client(reload=True)
|
|
158
|
-
t = cl.get_table('test')
|
|
159
|
-
status = t.insert(rows)
|
|
160
|
-
assert status.num_rows == len(rows)
|
|
161
|
-
assert status.num_excs == 0
|
|
162
|
-
|
|
163
|
-
@pxt.udf(return_type=IntType(), param_types=[IntType(), FloatType(), FloatType(), FloatType()])
|
|
164
|
-
def f1(a: int, b: float, c: float = 0.0, d: float = 1.0) -> float:
|
|
165
|
-
return a + b + c + d
|
|
166
|
-
|
|
167
|
-
@pxt.udf(
|
|
168
|
-
return_type=IntType(),
|
|
169
|
-
param_types=[IntType(nullable=True), FloatType(nullable=False), FloatType(nullable=True)])
|
|
170
|
-
def f2(a: int, b: float = 0.0, c: float = 1.0) -> float:
|
|
171
|
-
return (0.0 if a is None else a) + b + (0.0 if c is None else c)
|
|
172
|
-
|
|
173
|
-
def test_call(self, test_tbl: catalog.Table) -> None:
|
|
174
|
-
t = test_tbl
|
|
175
|
-
|
|
176
|
-
r0 = t[t.c2, t.c3].show(0).to_pandas()
|
|
177
|
-
# positional params with default args
|
|
178
|
-
r1 = t[self.f1(t.c2, t.c3)].show(0).to_pandas()['col_0']
|
|
179
|
-
assert np.all(r1 == r0.c2 + r0.c3 + 1.0)
|
|
180
|
-
# kw args only
|
|
181
|
-
r2 = t[self.f1(c=0.0, b=t.c3, a=t.c2)].show(0).to_pandas()['col_0']
|
|
182
|
-
assert np.all(r1 == r2)
|
|
183
|
-
# overriding default args
|
|
184
|
-
r3 = t[self.f1(d=0.0, c=1.0, b=t.c3, a=t.c2)].show(0).to_pandas()['col_0']
|
|
185
|
-
assert np.all(r2 == r3)
|
|
186
|
-
# overriding default with positional arg
|
|
187
|
-
r4 = t[self.f1(t.c2, t.c3, 0.0)].show(0).to_pandas()['col_0']
|
|
188
|
-
assert np.all(r3 == r4)
|
|
189
|
-
# overriding default with positional arg and kw arg
|
|
190
|
-
r5 = t[self.f1(t.c2, t.c3, 1.0, d=0.0)].show(0).to_pandas()['col_0']
|
|
191
|
-
assert np.all(r4 == r5)
|
|
192
|
-
# d is kwarg
|
|
193
|
-
r6 = t[self.f1(t.c2, d=1.0, b=t.c3)].show(0).to_pandas()['col_0']
|
|
194
|
-
assert np.all(r5 == r6)
|
|
195
|
-
# d is Expr kwarg
|
|
196
|
-
r6 = t[self.f1(1, d=t.c3, b=t.c3)].show(0).to_pandas()['col_0']
|
|
197
|
-
assert np.all(r5 == r6)
|
|
198
|
-
|
|
199
|
-
# test handling of Nones
|
|
200
|
-
r0 = t[self.f2(1, t.c3)].show(0).to_pandas()['col_0']
|
|
201
|
-
r1 = t[self.f2(None, t.c3, 2.0)].show(0).to_pandas()['col_0']
|
|
202
|
-
assert np.all(r0 == r1)
|
|
203
|
-
r2 = t[self.f2(2, t.c3, None)].show(0).to_pandas()['col_0']
|
|
204
|
-
assert np.all(r1 == r2)
|
|
205
|
-
# kwarg with None
|
|
206
|
-
r3 = t[self.f2(c=None, a=t.c2)].show(0).to_pandas()['col_0']
|
|
207
|
-
# kwarg with Expr
|
|
208
|
-
r4 = t[self.f2(c=t.c3, a=None)].show(0).to_pandas()['col_0']
|
|
209
|
-
assert np.all(r3 == r4)
|
|
210
|
-
|
|
211
|
-
with pytest.raises(TypeError) as exc_info:
|
|
212
|
-
_ = t[self.f1(t.c2, c=0.0)].show(0)
|
|
213
|
-
assert "'b'" in str(exc_info.value)
|
|
214
|
-
with pytest.raises(TypeError) as exc_info:
|
|
215
|
-
_ = t[self.f1(t.c2)].show(0)
|
|
216
|
-
assert "'b'" in str(exc_info.value)
|
|
217
|
-
with pytest.raises(TypeError) as exc_info:
|
|
218
|
-
_ = t[self.f1(c=1.0, a=t.c2)].show(0)
|
|
219
|
-
assert "'b'" in str(exc_info.value)
|
|
220
|
-
|
|
221
|
-
# bad default value
|
|
222
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
223
|
-
@pxt.udf(return_type=IntType(), param_types=[IntType(), FloatType(), FloatType()])
|
|
224
|
-
def f1(a: int, b: float, c: str = '') -> float:
|
|
225
|
-
return a + b + c
|
|
226
|
-
assert 'default value' in str(exc_info.value).lower()
|
|
227
|
-
# missing param type
|
|
228
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
229
|
-
@pxt.udf(return_type=IntType(), param_types=[IntType(), FloatType()])
|
|
230
|
-
def f1(a: int, b: float, c: str = '') -> float:
|
|
231
|
-
return a + b + c
|
|
232
|
-
assert 'missing type for parameter c' in str(exc_info.value).lower()
|
|
233
|
-
# bad parameter name
|
|
234
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
235
|
-
@pxt.udf(return_type=IntType(), param_types=[IntType()])
|
|
236
|
-
def f1(group_by: int) -> int:
|
|
237
|
-
return group_by
|
|
238
|
-
assert 'reserved' in str(exc_info.value)
|
|
239
|
-
# bad parameter name
|
|
240
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
241
|
-
@pxt.udf(return_type=IntType(), param_types=[IntType()])
|
|
242
|
-
def f1(order_by: int) -> int:
|
|
243
|
-
return order_by
|
|
244
|
-
assert 'reserved' in str(exc_info.value)
|
|
245
|
-
|
|
246
|
-
@pxt.expr_udf
|
|
247
|
-
def add1(x: int) -> int:
|
|
248
|
-
return x + 1
|
|
249
|
-
|
|
250
|
-
@pxt.expr_udf
|
|
251
|
-
def add2(x: int, y: int):
|
|
252
|
-
return x + y
|
|
253
|
-
|
|
254
|
-
@pxt.expr_udf
|
|
255
|
-
def add2_with_default(x: int, y: int = 1) -> int:
|
|
256
|
-
return x + y
|
|
257
|
-
|
|
258
|
-
def test_expr_udf(self, test_tbl: catalog.Table) -> None:
|
|
259
|
-
t = test_tbl
|
|
260
|
-
|
|
261
|
-
res1 = t.select(out=self.add1(t.c2)).order_by(t.c2).collect()
|
|
262
|
-
res2 = t.select(t.c2 + 1).order_by(t.c2).collect()
|
|
263
|
-
assert_resultset_eq(res1, res2)
|
|
264
|
-
|
|
265
|
-
# return type inferred from expression
|
|
266
|
-
res1 = t.select(out=self.add2(t.c2, t.c2)).order_by(t.c2).collect()
|
|
267
|
-
res2 = t.select(t.c2 * 2).order_by(t.c2).collect()
|
|
268
|
-
assert_resultset_eq(res1, res2)
|
|
269
|
-
|
|
270
|
-
with pytest.raises(TypeError) as exc_info:
|
|
271
|
-
_ = t.select(self.add1(y=t.c2)).collect()
|
|
272
|
-
assert 'missing a required argument' in str(exc_info.value).lower()
|
|
273
|
-
|
|
274
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
275
|
-
# parameter types cannot be inferred
|
|
276
|
-
@pxt.expr_udf
|
|
277
|
-
def add1(x, y) -> int:
|
|
278
|
-
return x + y
|
|
279
|
-
assert 'cannot infer pixeltable type' in str(exc_info.value).lower()
|
|
280
|
-
|
|
281
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
282
|
-
# missing param types
|
|
283
|
-
@pxt.expr_udf(param_types=[IntType()])
|
|
284
|
-
def add1(x, y) -> int:
|
|
285
|
-
return x + y
|
|
286
|
-
assert 'missing type for parameter y' in str(exc_info.value).lower()
|
|
287
|
-
|
|
288
|
-
with pytest.raises(TypeError) as exc_info:
|
|
289
|
-
# signature has correct parameter kind
|
|
290
|
-
@pxt.expr_udf
|
|
291
|
-
def add1(*, x: int) -> int:
|
|
292
|
-
return x + y
|
|
293
|
-
_ = t.select(add1(t.c2)).collect()
|
|
294
|
-
assert 'takes 0 positional arguments' in str(exc_info.value).lower()
|
|
295
|
-
|
|
296
|
-
res1 = t.select(out=self.add2_with_default(t.c2)).order_by(t.c2).collect()
|
|
297
|
-
res2 = t.select(out=self.add2(t.c2, 1)).order_by(t.c2).collect()
|
|
298
|
-
assert_resultset_eq(res1, res2)
|
|
299
|
-
|
|
300
|
-
# Test that various invalid udf definitions generate
|
|
301
|
-
# correct error messages.
|
|
302
|
-
def test_invalid_udfs(self):
|
|
303
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
304
|
-
@pxt.udf
|
|
305
|
-
def udf1(name: Batch[str]) -> str:
|
|
306
|
-
return ''
|
|
307
|
-
assert 'batched parameters in udf, but no `batch_size` given' in str(exc_info.value).lower()
|
|
308
|
-
|
|
309
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
310
|
-
@pxt.udf(batch_size=32)
|
|
311
|
-
def udf2(name: Batch[str]) -> str:
|
|
312
|
-
return ''
|
|
313
|
-
assert 'batch_size is specified; Python return type must be a `Batch`' in str(exc_info.value)
|
|
314
|
-
|
|
315
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
316
|
-
@pxt.udf
|
|
317
|
-
def udf3(name: str) -> Optional[np.ndarray]:
|
|
318
|
-
return None
|
|
319
|
-
assert 'cannot infer pixeltable return type' in str(exc_info.value).lower()
|
|
320
|
-
|
|
321
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
322
|
-
@pxt.udf
|
|
323
|
-
def udf4(array: np.ndarray) -> str:
|
|
324
|
-
return ''
|
|
325
|
-
assert 'cannot infer pixeltable type for parameter array' in str(exc_info.value).lower()
|
|
326
|
-
|
|
327
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
328
|
-
@pxt.udf
|
|
329
|
-
def udf5(name: str, untyped) -> str:
|
|
330
|
-
return ''
|
|
331
|
-
assert 'cannot infer pixeltable type for parameter untyped' in str(exc_info.value).lower()
|
|
332
|
-
|
pixeltable/tests/test_index.py
DELETED
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
import PIL.Image
|
|
2
|
-
import numpy as np
|
|
3
|
-
import pytest
|
|
4
|
-
|
|
5
|
-
import pixeltable as pxt
|
|
6
|
-
from pixeltable.functions.huggingface import clip_image, clip_text
|
|
7
|
-
from pixeltable.tests.utils import text_embed, img_embed, skip_test_if_not_installed
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class TestIndex:
|
|
11
|
-
|
|
12
|
-
# wrong signature
|
|
13
|
-
@pxt.udf
|
|
14
|
-
def bad_embed(x: str) -> str:
|
|
15
|
-
return x
|
|
16
|
-
|
|
17
|
-
def test_embedding_basic(self, img_tbl: pxt.Table, test_tbl: pxt.Table) -> None:
|
|
18
|
-
skip_test_if_not_installed('transformers')
|
|
19
|
-
img_t = img_tbl
|
|
20
|
-
rows = list(img_t.select(img=img_t.img.fileurl, category=img_t.category, split=img_t.split).collect())
|
|
21
|
-
# create table with fewer rows to speed up testing
|
|
22
|
-
cl = pxt.Client()
|
|
23
|
-
schema = {
|
|
24
|
-
'img': pxt.ImageType(nullable=False),
|
|
25
|
-
'category': pxt.StringType(nullable=False),
|
|
26
|
-
'split': pxt.StringType(nullable=False),
|
|
27
|
-
}
|
|
28
|
-
tbl_name = 'index_test'
|
|
29
|
-
img_t = cl.create_table(tbl_name, schema=schema)
|
|
30
|
-
img_t.insert(rows[:30])
|
|
31
|
-
|
|
32
|
-
img_t.add_embedding_index('img', img_embed=img_embed, text_embed=text_embed)
|
|
33
|
-
|
|
34
|
-
with pytest.raises(pxt.Error) as exc_info:
|
|
35
|
-
# duplicate name
|
|
36
|
-
img_t.add_embedding_index('img', idx_name='idx0', img_embed=img_embed)
|
|
37
|
-
assert 'duplicate index name' in str(exc_info.value).lower()
|
|
38
|
-
|
|
39
|
-
img_t.add_embedding_index('category', text_embed=text_embed)
|
|
40
|
-
# revert() removes the index
|
|
41
|
-
img_t.revert()
|
|
42
|
-
with pytest.raises(pxt.Error) as exc_info:
|
|
43
|
-
img_t.drop_index(column_name='category')
|
|
44
|
-
assert 'does not have an index' in str(exc_info.value).lower()
|
|
45
|
-
|
|
46
|
-
rows = list(img_t.collect())
|
|
47
|
-
status = img_t.update({'split': 'other'}, where=img_t.split == 'test')
|
|
48
|
-
assert status.num_excs == 0
|
|
49
|
-
|
|
50
|
-
status = img_t.delete()
|
|
51
|
-
assert status.num_excs == 0
|
|
52
|
-
|
|
53
|
-
# revert delete()
|
|
54
|
-
img_t.revert()
|
|
55
|
-
# revert update()
|
|
56
|
-
img_t.revert()
|
|
57
|
-
|
|
58
|
-
# make sure we can still do DML after reloading the metadata
|
|
59
|
-
cl = pxt.Client(reload=True)
|
|
60
|
-
img_t = cl.get_table(tbl_name)
|
|
61
|
-
status = img_t.insert(rows)
|
|
62
|
-
assert status.num_excs == 0
|
|
63
|
-
|
|
64
|
-
status = img_t.update({'split': 'other'}, where=img_t.split == 'test')
|
|
65
|
-
assert status.num_excs == 0
|
|
66
|
-
|
|
67
|
-
status = img_t.delete()
|
|
68
|
-
assert status.num_excs == 0
|
|
69
|
-
|
|
70
|
-
# revert delete()
|
|
71
|
-
img_t.revert()
|
|
72
|
-
# revert update()
|
|
73
|
-
img_t.revert()
|
|
74
|
-
|
|
75
|
-
img_t.drop_index(idx_name='idx0')
|
|
76
|
-
with pytest.raises(pxt.Error) as exc_info:
|
|
77
|
-
img_t.drop_index(column_name='img')
|
|
78
|
-
assert 'does not have an index' in str(exc_info.value).lower()
|
|
79
|
-
|
|
80
|
-
# revert() makes the index reappear
|
|
81
|
-
img_t.revert()
|
|
82
|
-
with pytest.raises(pxt.Error) as exc_info:
|
|
83
|
-
img_t.add_embedding_index('img', idx_name='idx0', img_embed=img_embed)
|
|
84
|
-
assert 'duplicate index name' in str(exc_info.value).lower()
|
|
85
|
-
|
|
86
|
-
# dropping the indexed column also drops indices
|
|
87
|
-
img_t.drop_column('img')
|
|
88
|
-
with pytest.raises(pxt.Error) as exc_info:
|
|
89
|
-
img_t.drop_index(idx_name='idx0')
|
|
90
|
-
assert 'does not exist' in str(exc_info.value).lower()
|
|
91
|
-
|
|
92
|
-
def test_errors(self, img_tbl: pxt.Table, test_tbl: pxt.Table) -> None:
|
|
93
|
-
img_t = img_tbl
|
|
94
|
-
rows = list(img_t.select(img=img_t.img.fileurl, category=img_t.category, split=img_t.split).collect())
|
|
95
|
-
# create table with fewer rows to speed up testing
|
|
96
|
-
cl = pxt.Client()
|
|
97
|
-
schema = {
|
|
98
|
-
'img': pxt.ImageType(nullable=False),
|
|
99
|
-
'category': pxt.StringType(nullable=False),
|
|
100
|
-
'split': pxt.StringType(nullable=False),
|
|
101
|
-
}
|
|
102
|
-
tbl_name = 'index_test'
|
|
103
|
-
img_t = cl.create_table(tbl_name, schema=schema)
|
|
104
|
-
img_t.insert(rows[:30])
|
|
105
|
-
|
|
106
|
-
with pytest.raises(pxt.Error) as exc_info:
|
|
107
|
-
# unknown column
|
|
108
|
-
img_t.add_embedding_index('does_not_exist', idx_name='idx0', img_embed=img_embed)
|
|
109
|
-
assert 'column does_not_exist unknown' in str(exc_info.value).lower()
|
|
110
|
-
|
|
111
|
-
with pytest.raises(pxt.Error) as exc_info:
|
|
112
|
-
# wrong column type
|
|
113
|
-
test_tbl.add_embedding_index('c2', img_embed=img_embed)
|
|
114
|
-
assert 'requires string or image column' in str(exc_info.value).lower()
|
|
115
|
-
|
|
116
|
-
with pytest.raises(pxt.Error) as exc_info:
|
|
117
|
-
# missing embedding function
|
|
118
|
-
img_tbl.add_embedding_index('img', text_embed=text_embed)
|
|
119
|
-
assert 'image embedding function is required' in str(exc_info.value).lower()
|
|
120
|
-
|
|
121
|
-
with pytest.raises(pxt.Error) as exc_info:
|
|
122
|
-
# wrong signature
|
|
123
|
-
img_tbl.add_embedding_index('img', img_embed=clip_image)
|
|
124
|
-
assert 'but has signature' in str(exc_info.value).lower()
|
|
125
|
-
|
|
126
|
-
with pytest.raises(pxt.Error) as exc_info:
|
|
127
|
-
# missing embedding function
|
|
128
|
-
img_tbl.add_embedding_index('category', img_embed=img_embed)
|
|
129
|
-
assert 'text embedding function is required' in str(exc_info.value).lower()
|
|
130
|
-
|
|
131
|
-
with pytest.raises(pxt.Error) as exc_info:
|
|
132
|
-
# wrong signature
|
|
133
|
-
img_tbl.add_embedding_index('category', text_embed=clip_text)
|
|
134
|
-
assert 'but has signature' in str(exc_info.value).lower()
|
|
135
|
-
|
|
136
|
-
with pytest.raises(pxt.Error) as exc_info:
|
|
137
|
-
img_tbl.add_embedding_index('category', text_embed=self.bad_embed)
|
|
138
|
-
assert 'must return an array' in str(exc_info.value).lower()
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import glob
|
|
2
|
-
import logging
|
|
3
|
-
import os
|
|
4
|
-
import platform
|
|
5
|
-
import subprocess
|
|
6
|
-
|
|
7
|
-
import pgserver
|
|
8
|
-
import pytest
|
|
9
|
-
|
|
10
|
-
import pixeltable as pxt
|
|
11
|
-
from pixeltable.env import Env
|
|
12
|
-
from pixeltable.tests.conftest import clean_db
|
|
13
|
-
|
|
14
|
-
_logger = logging.getLogger('pixeltable')
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class TestMigration:
|
|
18
|
-
|
|
19
|
-
@pytest.mark.skipif(platform.system() == 'Windows', reason='Does not run on Windows')
|
|
20
|
-
def test_db_migration(self, init_env) -> None:
|
|
21
|
-
env = Env.get()
|
|
22
|
-
pg_package_dir = os.path.dirname(pgserver.__file__)
|
|
23
|
-
pg_restore_binary = f'{pg_package_dir}/pginstall/bin/pg_restore'
|
|
24
|
-
_logger.info(f'Using pg_restore binary at: {pg_restore_binary}')
|
|
25
|
-
dump_files = glob.glob('pixeltable/tests/data/dbdumps/*.dump.gz')
|
|
26
|
-
dump_files.sort()
|
|
27
|
-
for dump_file in dump_files:
|
|
28
|
-
_logger.info(f'Testing migration from DB dump {dump_file}.')
|
|
29
|
-
_logger.info(f'DB URL: {env.db_url}')
|
|
30
|
-
clean_db(restore_tables=False)
|
|
31
|
-
with open(dump_file, 'rb') as dump:
|
|
32
|
-
gunzip_process = subprocess.Popen(
|
|
33
|
-
["gunzip", "-c"],
|
|
34
|
-
stdin=dump,
|
|
35
|
-
stdout=subprocess.PIPE
|
|
36
|
-
)
|
|
37
|
-
subprocess.run(
|
|
38
|
-
[pg_restore_binary, '-d', env.db_url, '-U', 'postgres'],
|
|
39
|
-
stdin=gunzip_process.stdout,
|
|
40
|
-
check=True
|
|
41
|
-
)
|
|
42
|
-
# TODO(aaron-siegel) This will test that the migration succeeds without raising any exceptions.
|
|
43
|
-
# We should also add some assertions to sanity-check the outcome.
|
|
44
|
-
_ = pxt.Client()
|
pixeltable/tests/test_nos.py
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
|
|
3
|
-
import pixeltable as pxt
|
|
4
|
-
from pixeltable.iterators import FrameIterator
|
|
5
|
-
from pixeltable.tests.utils import get_video_files, skip_test_if_not_installed
|
|
6
|
-
from pixeltable.type_system import ImageType, VideoType
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class TestNOS:
|
|
10
|
-
def test_basic(self, test_client: pxt.Client) -> None:
|
|
11
|
-
skip_test_if_not_installed('nos')
|
|
12
|
-
cl = test_client
|
|
13
|
-
video_t = cl.create_table('video_tbl', {'video': VideoType()})
|
|
14
|
-
# create frame view
|
|
15
|
-
args = {'video': video_t.video, 'fps': 1}
|
|
16
|
-
v = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
|
|
17
|
-
v.add_column(transform1=v.frame.rotate(30), stored=False)
|
|
18
|
-
from pixeltable.functions.nos.object_detection_2d import \
|
|
19
|
-
torchvision_fasterrcnn_mobilenet_v3_large_320_fpn as fasterrcnn
|
|
20
|
-
v.add_column(detections=fasterrcnn(v.transform1))
|
|
21
|
-
from pixeltable.functions.nos.image_embedding import openai_clip
|
|
22
|
-
v.add_column(embed=openai_clip(v.transform1.resize([224, 224])))
|
|
23
|
-
# add a stored column that isn't referenced in nos calls
|
|
24
|
-
v.add_column(transform2=v.frame.rotate(60), stored=True)
|
|
25
|
-
|
|
26
|
-
status = video_t.insert(video=get_video_files()[0])
|
|
27
|
-
pass
|
|
28
|
-
|
|
29
|
-
def test_exceptions(self, test_client: pxt.Client) -> None:
|
|
30
|
-
skip_test_if_not_installed('nos')
|
|
31
|
-
cl = test_client
|
|
32
|
-
video_t = cl.create_table('video_tbl', {'video': VideoType()})
|
|
33
|
-
# create frame view
|
|
34
|
-
args = {'video': video_t.video, 'fps': 1}
|
|
35
|
-
v = cl.create_view('test_view', video_t, iterator_class=FrameIterator, iterator_args=args)
|
|
36
|
-
video_t.insert(video=get_video_files()[0])
|
|
37
|
-
|
|
38
|
-
v.add_column(frame_s=v.frame.resize([640, 480]))
|
|
39
|
-
# 'rotated' has exceptions
|
|
40
|
-
v.add_column(rotated=lambda frame_s, frame_idx: frame_s.rotate(int(360 / frame_idx)), type=ImageType())
|
|
41
|
-
from pixeltable.functions.nos.object_detection_2d import yolox_medium
|
|
42
|
-
v.add_column(detections=yolox_medium(v.rotated), stored=True)
|
|
43
|
-
assert v.where(v.detections.errortype != None).count() == 1
|
|
44
|
-
|
|
45
|
-
@pytest.mark.skip(reason='too slow')
|
|
46
|
-
def test_sd(self, test_client: pxt.Client) -> None:
|
|
47
|
-
skip_test_if_not_installed('nos')
|
|
48
|
-
"""Test model that mixes batched with scalar parameters"""
|
|
49
|
-
t = test_client.create_table('sd_test', {'prompt': pxt.StringType()})
|
|
50
|
-
t.insert(prompt='cat on a sofa')
|
|
51
|
-
from pixeltable.functions.nos.image_generation import stabilityai_stable_diffusion_2 as sd2
|
|
52
|
-
t.add_column(img=sd2(t.prompt, 1, 512, 512), stored=True)
|
|
53
|
-
img = t[t.img].show(1)[0, 0]
|
|
54
|
-
assert img.size == (512, 512)
|