pixeltable 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/catalog/column.py +25 -48
- pixeltable/catalog/insertable_table.py +7 -4
- pixeltable/catalog/table.py +163 -57
- pixeltable/catalog/table_version.py +416 -140
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/client.py +0 -4
- pixeltable/dataframe.py +65 -21
- pixeltable/env.py +16 -1
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/in_memory_data_node.py +11 -7
- pixeltable/exprs/comparison.py +3 -3
- pixeltable/exprs/data_row.py +5 -1
- pixeltable/exprs/literal.py +16 -4
- pixeltable/exprs/row_builder.py +8 -40
- pixeltable/ext/__init__.py +5 -0
- pixeltable/ext/functions/yolox.py +92 -0
- pixeltable/func/aggregate_function.py +15 -15
- pixeltable/func/expr_template_function.py +9 -1
- pixeltable/func/globals.py +24 -14
- pixeltable/func/signature.py +18 -12
- pixeltable/func/udf.py +7 -2
- pixeltable/functions/__init__.py +8 -8
- pixeltable/functions/eval.py +7 -8
- pixeltable/functions/huggingface.py +47 -19
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/util.py +11 -0
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +49 -0
- pixeltable/index/embedding_index.py +95 -0
- pixeltable/metadata/schema.py +45 -22
- pixeltable/plan.py +15 -34
- pixeltable/store.py +38 -41
- pixeltable/tests/conftest.py +5 -11
- pixeltable/tests/ext/test_yolox.py +21 -0
- pixeltable/tests/functions/test_fireworks.py +1 -0
- pixeltable/tests/functions/test_huggingface.py +2 -2
- pixeltable/tests/functions/test_openai.py +15 -5
- pixeltable/tests/functions/test_together.py +1 -0
- pixeltable/tests/test_component_view.py +14 -5
- pixeltable/tests/test_dataframe.py +19 -18
- pixeltable/tests/test_exprs.py +99 -102
- pixeltable/tests/test_function.py +51 -43
- pixeltable/tests/test_index.py +138 -0
- pixeltable/tests/test_migration.py +2 -1
- pixeltable/tests/test_snapshot.py +24 -1
- pixeltable/tests/test_table.py +101 -25
- pixeltable/tests/test_types.py +30 -0
- pixeltable/tests/test_video.py +16 -16
- pixeltable/tests/test_view.py +5 -0
- pixeltable/tests/utils.py +43 -9
- pixeltable/tool/create_test_db_dump.py +16 -0
- pixeltable/type_system.py +37 -45
- {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/METADATA +5 -4
- {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/RECORD +56 -49
- {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/WHEEL +0 -0
pixeltable/tests/test_exprs.py
CHANGED
|
@@ -22,6 +22,48 @@ from pixeltable.type_system import StringType, BoolType, IntType, ArrayType, Col
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class TestExprs:
|
|
25
|
+
@pxt.udf(return_type=FloatType(), param_types=[IntType(), IntType()])
|
|
26
|
+
def div_0_error(a: int, b: int) -> float:
|
|
27
|
+
return a / b
|
|
28
|
+
|
|
29
|
+
# function that does allow nulls
|
|
30
|
+
@pxt.udf(return_type=FloatType(nullable=True),
|
|
31
|
+
param_types=[FloatType(nullable=False), FloatType(nullable=True)])
|
|
32
|
+
def null_args_fn(a: int, b: int) -> int:
|
|
33
|
+
if b is None:
|
|
34
|
+
return a
|
|
35
|
+
return a + b
|
|
36
|
+
|
|
37
|
+
# error in agg.init()
|
|
38
|
+
@pxt.uda(update_types=[IntType()], value_type=IntType())
|
|
39
|
+
class init_exc(pxt.Aggregator):
|
|
40
|
+
def __init__(self):
|
|
41
|
+
self.sum = 1 / 0
|
|
42
|
+
def update(self, val):
|
|
43
|
+
pass
|
|
44
|
+
def value(self):
|
|
45
|
+
return 1
|
|
46
|
+
|
|
47
|
+
# error in agg.update()
|
|
48
|
+
@pxt.uda(update_types=[IntType()], value_type=IntType())
|
|
49
|
+
class update_exc(pxt.Aggregator):
|
|
50
|
+
def __init__(self):
|
|
51
|
+
self.sum = 0
|
|
52
|
+
def update(self, val):
|
|
53
|
+
self.sum += 1 / val
|
|
54
|
+
def value(self):
|
|
55
|
+
return 1
|
|
56
|
+
|
|
57
|
+
# error in agg.value()
|
|
58
|
+
@pxt.uda(update_types=[IntType()], value_type=IntType())
|
|
59
|
+
class value_exc(pxt.Aggregator):
|
|
60
|
+
def __init__(self):
|
|
61
|
+
self.sum = 0
|
|
62
|
+
def update(self, val):
|
|
63
|
+
self.sum += val
|
|
64
|
+
def value(self):
|
|
65
|
+
return 1 / self.sum
|
|
66
|
+
|
|
25
67
|
def test_basic(self, test_tbl: catalog.Table) -> None:
|
|
26
68
|
t = test_tbl
|
|
27
69
|
assert t['c1'].equals(t.c1)
|
|
@@ -62,13 +104,13 @@ class TestExprs:
|
|
|
62
104
|
_ = t.where((t.c1 == 'test string') or (t.c6.f1 > 50)).collect()
|
|
63
105
|
assert 'cannot be used in conjunction with python boolean operators' in str(exc_info.value).lower()
|
|
64
106
|
|
|
65
|
-
# compound predicates with Python functions
|
|
66
|
-
@
|
|
67
|
-
def udf(_: str) -> bool:
|
|
68
|
-
|
|
69
|
-
@
|
|
70
|
-
def udf2(_: int) -> bool:
|
|
71
|
-
|
|
107
|
+
# # compound predicates with Python functions
|
|
108
|
+
# @pt.udf(return_type=BoolType(), param_types=[StringType()])
|
|
109
|
+
# def udf(_: str) -> bool:
|
|
110
|
+
# return True
|
|
111
|
+
# @pt.udf(return_type=BoolType(), param_types=[IntType()])
|
|
112
|
+
# def udf2(_: int) -> bool:
|
|
113
|
+
# return True
|
|
72
114
|
|
|
73
115
|
# TODO: find a way to test this
|
|
74
116
|
# # & can be split
|
|
@@ -120,47 +162,21 @@ class TestExprs:
|
|
|
120
162
|
_ = t[(t.c6.f2 + 1) / (t.c2 - 10)].show()
|
|
121
163
|
|
|
122
164
|
# the same, but with an inline function
|
|
123
|
-
@pxt.udf(return_type=FloatType(), param_types=[IntType(), IntType()])
|
|
124
|
-
def f(a: int, b: int) -> float:
|
|
125
|
-
return a / b
|
|
126
165
|
with pytest.raises(excs.Error):
|
|
127
|
-
_ = t[
|
|
166
|
+
_ = t[self.div_0_error(t.c2 + 1, t.c2)].show()
|
|
128
167
|
|
|
129
168
|
# error in agg.init()
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
self.sum = 1 / 0
|
|
134
|
-
def update(self, val):
|
|
135
|
-
pass
|
|
136
|
-
def value(self):
|
|
137
|
-
return 1
|
|
138
|
-
with pytest.raises(excs.Error):
|
|
139
|
-
_ = t[agg(t.c2)].show()
|
|
169
|
+
with pytest.raises(excs.Error) as exc_info:
|
|
170
|
+
_ = t[self.init_exc(t.c2)].show()
|
|
171
|
+
assert 'division by zero' in str(exc_info.value)
|
|
140
172
|
|
|
141
173
|
# error in agg.update()
|
|
142
|
-
@pxt.uda(update_types=[IntType()], value_type=IntType(), name='agg')
|
|
143
|
-
class Aggregator(pxt.Aggregator):
|
|
144
|
-
def __init__(self):
|
|
145
|
-
self.sum = 0
|
|
146
|
-
def update(self, val):
|
|
147
|
-
self.sum += 1 / val
|
|
148
|
-
def value(self):
|
|
149
|
-
return 1
|
|
150
174
|
with pytest.raises(excs.Error):
|
|
151
|
-
_ = t[
|
|
175
|
+
_ = t[self.update_exc(t.c2 - 10)].show()
|
|
152
176
|
|
|
153
177
|
# error in agg.value()
|
|
154
|
-
@pxt.uda(update_types=[IntType()], value_type=IntType(), name='agg')
|
|
155
|
-
class Aggregator(pxt.Aggregator):
|
|
156
|
-
def __init__(self):
|
|
157
|
-
self.sum = 0
|
|
158
|
-
def update(self, val):
|
|
159
|
-
self.sum += val
|
|
160
|
-
def value(self):
|
|
161
|
-
return 1 / self.sum
|
|
162
178
|
with pytest.raises(excs.Error):
|
|
163
|
-
_ = t[t.c2 <= 2][
|
|
179
|
+
_ = t[t.c2 <= 2][self.value_exc(t.c2 - 1)].show()
|
|
164
180
|
|
|
165
181
|
def test_props(self, test_tbl: catalog.Table, img_tbl: catalog.Table) -> None:
|
|
166
182
|
t = test_tbl
|
|
@@ -221,14 +237,7 @@ class TestExprs:
|
|
|
221
237
|
|
|
222
238
|
# computed column that doesn't allow nulls
|
|
223
239
|
t.add_column(c3=lambda c1, c2: c1 + c2, type=FloatType(nullable=False))
|
|
224
|
-
|
|
225
|
-
@pxt.udf(return_type=FloatType(nullable=True),
|
|
226
|
-
param_types=[FloatType(nullable=False), FloatType(nullable=True)])
|
|
227
|
-
def f(a: int, b: int) -> int:
|
|
228
|
-
if b is None:
|
|
229
|
-
return a
|
|
230
|
-
return a + b
|
|
231
|
-
t.add_column(c4=f(t.c1, t.c2))
|
|
240
|
+
t.add_column(c4=self.null_args_fn(t.c1, t.c2))
|
|
232
241
|
|
|
233
242
|
# data that tests all combinations of nulls
|
|
234
243
|
data = [{'c1': 1.0, 'c2': 1.0}, {'c1': 1.0, 'c2': None}, {'c1': None, 'c2': 1.0}, {'c1': None, 'c2': None}]
|
|
@@ -513,9 +522,10 @@ class TestExprs:
|
|
|
513
522
|
][t.img, t.split].show()
|
|
514
523
|
print(result)
|
|
515
524
|
|
|
516
|
-
|
|
525
|
+
@pytest.mark.skip(reason='temporarily disabled')
|
|
526
|
+
def test_similarity(self, small_img_tbl) -> None:
|
|
517
527
|
skip_test_if_not_installed('nos')
|
|
518
|
-
t =
|
|
528
|
+
t = small_img_tbl
|
|
519
529
|
_ = t.show(30)
|
|
520
530
|
probe = t.select(t.img, t.category).show(1)
|
|
521
531
|
img = probe[0, 0]
|
|
@@ -656,68 +666,67 @@ class TestExprs:
|
|
|
656
666
|
# nested aggregates
|
|
657
667
|
_ = t[sum(count(t.c2))].group_by(t.c2 % 2).show()
|
|
658
668
|
|
|
669
|
+
@pxt.uda(
|
|
670
|
+
init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
|
|
671
|
+
allows_window=True, requires_order_by=False)
|
|
672
|
+
class window_agg:
|
|
673
|
+
def __init__(self, val: int = 0):
|
|
674
|
+
self.val = val
|
|
675
|
+
def update(self, ignore: int) -> None:
|
|
676
|
+
pass
|
|
677
|
+
def value(self) -> int:
|
|
678
|
+
return self.val
|
|
679
|
+
|
|
680
|
+
@pxt.uda(
|
|
681
|
+
init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
|
|
682
|
+
requires_order_by=True, allows_window=True)
|
|
683
|
+
class ordered_agg:
|
|
684
|
+
def __init__(self, val: int = 0):
|
|
685
|
+
self.val = val
|
|
686
|
+
def update(self, i: int) -> None:
|
|
687
|
+
pass
|
|
688
|
+
def value(self) -> int:
|
|
689
|
+
return self.val
|
|
690
|
+
|
|
691
|
+
@pxt.uda(
|
|
692
|
+
init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
|
|
693
|
+
requires_order_by=False, allows_window=False)
|
|
694
|
+
class std_agg:
|
|
695
|
+
def __init__(self, val: int = 0):
|
|
696
|
+
self.val = val
|
|
697
|
+
def update(self, i: int) -> None:
|
|
698
|
+
pass
|
|
699
|
+
def value(self) -> int:
|
|
700
|
+
return self.val
|
|
701
|
+
|
|
659
702
|
def test_udas(self, test_tbl: catalog.Table) -> None:
|
|
660
703
|
t = test_tbl
|
|
661
|
-
|
|
662
|
-
@pxt.uda(
|
|
663
|
-
name='window_agg', init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
|
|
664
|
-
allows_window=True, requires_order_by=False)
|
|
665
|
-
class WindowAgg:
|
|
666
|
-
def __init__(self, val: int = 0):
|
|
667
|
-
self.val = val
|
|
668
|
-
def update(self, ignore: int) -> None:
|
|
669
|
-
pass
|
|
670
|
-
def value(self) -> int:
|
|
671
|
-
return self.val
|
|
672
|
-
|
|
673
|
-
@pxt.uda(
|
|
674
|
-
name='ordered_agg', init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
|
|
675
|
-
requires_order_by=True, allows_window=True)
|
|
676
|
-
class WindowAgg:
|
|
677
|
-
def __init__(self, val: int = 0):
|
|
678
|
-
self.val = val
|
|
679
|
-
def update(self, i: int) -> None:
|
|
680
|
-
pass
|
|
681
|
-
def value(self) -> int:
|
|
682
|
-
return self.val
|
|
683
|
-
|
|
684
|
-
@pxt.uda(
|
|
685
|
-
name='std_agg', init_types=[IntType()], update_types=[IntType()], value_type=IntType(),
|
|
686
|
-
requires_order_by=False, allows_window=False)
|
|
687
|
-
class StdAgg:
|
|
688
|
-
def __init__(self, val: int = 0):
|
|
689
|
-
self.val = val
|
|
690
|
-
def update(self, i: int) -> None:
|
|
691
|
-
pass
|
|
692
|
-
def value(self) -> int:
|
|
693
|
-
return self.val
|
|
694
|
-
|
|
695
704
|
# init arg is passed along
|
|
696
|
-
assert t.select(out=window_agg(t.c2, order_by=t.c2)).collect()[0]['out'] == 0
|
|
697
|
-
assert t.select(out=window_agg(t.c2, val=1, order_by=t.c2)).collect()[0]['out'] == 1
|
|
705
|
+
assert t.select(out=self.window_agg(t.c2, order_by=t.c2)).collect()[0]['out'] == 0
|
|
706
|
+
assert t.select(out=self.window_agg(t.c2, val=1, order_by=t.c2)).collect()[0]['out'] == 1
|
|
698
707
|
|
|
699
708
|
with pytest.raises(excs.Error) as exc_info:
|
|
700
|
-
_ = t.select(window_agg(t.c2, val=t.c2, order_by=t.c2)).collect()
|
|
709
|
+
_ = t.select(self.window_agg(t.c2, val=t.c2, order_by=t.c2)).collect()
|
|
701
710
|
assert 'needs to be a constant' in str(exc_info.value)
|
|
702
711
|
|
|
703
712
|
with pytest.raises(excs.Error) as exc_info:
|
|
704
713
|
# ordering expression not a pixeltable expr
|
|
705
|
-
_ = t.select(ordered_agg(1, t.c2)).collect()
|
|
714
|
+
_ = t.select(self.ordered_agg(1, t.c2)).collect()
|
|
706
715
|
assert 'but instead is a' in str(exc_info.value).lower()
|
|
707
716
|
|
|
708
717
|
with pytest.raises(excs.Error) as exc_info:
|
|
709
718
|
# explicit order_by
|
|
710
|
-
_ = t.select(ordered_agg(t.c2, order_by=t.c2)).collect()
|
|
719
|
+
_ = t.select(self.ordered_agg(t.c2, order_by=t.c2)).collect()
|
|
711
720
|
assert 'order_by invalid' in str(exc_info.value).lower()
|
|
712
721
|
|
|
713
722
|
with pytest.raises(excs.Error) as exc_info:
|
|
714
723
|
# order_by for non-window function
|
|
715
|
-
_ = t.select(std_agg(t.c2, order_by=t.c2)).collect()
|
|
724
|
+
_ = t.select(self.std_agg(t.c2, order_by=t.c2)).collect()
|
|
716
725
|
assert 'does not allow windows' in str(exc_info.value).lower()
|
|
717
726
|
|
|
718
727
|
with pytest.raises(excs.Error) as exc_info:
|
|
719
728
|
# group_by for non-window function
|
|
720
|
-
_ = t.select(std_agg(t.c2, group_by=t.c4)).collect()
|
|
729
|
+
_ = t.select(self.std_agg(t.c2, group_by=t.c4)).collect()
|
|
721
730
|
assert 'group_by invalid' in str(exc_info.value).lower()
|
|
722
731
|
|
|
723
732
|
with pytest.raises(excs.Error) as exc_info:
|
|
@@ -768,18 +777,6 @@ class TestExprs:
|
|
|
768
777
|
return self.val
|
|
769
778
|
assert 'cannot have parameters with the same name: val' in str(exc_info.value)
|
|
770
779
|
|
|
771
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
772
|
-
# invalid name
|
|
773
|
-
@pxt.uda(name='not an identifier', init_types=[IntType()], update_types=[IntType()], value_type=IntType())
|
|
774
|
-
class WindowAgg:
|
|
775
|
-
def __init__(self, val: int = 0):
|
|
776
|
-
self.val = val
|
|
777
|
-
def update(self, i1: int, i2: int) -> None:
|
|
778
|
-
pass
|
|
779
|
-
def value(self) -> int:
|
|
780
|
-
return self.val
|
|
781
|
-
assert 'invalid name' in str(exc_info.value).lower()
|
|
782
|
-
|
|
783
780
|
with pytest.raises(excs.Error) as exc_info:
|
|
784
781
|
# reserved parameter name
|
|
785
782
|
@pxt.uda(init_types=[IntType()], update_types=[IntType()], value_type=IntType())
|
|
@@ -20,8 +20,8 @@ class TestFunction:
|
|
|
20
20
|
def func(x: int) -> int:
|
|
21
21
|
return x + 1
|
|
22
22
|
|
|
23
|
-
@pxt.uda(
|
|
24
|
-
class
|
|
23
|
+
@pxt.uda(value_type=IntType(), update_types=[IntType()])
|
|
24
|
+
class agg:
|
|
25
25
|
def __init__(self):
|
|
26
26
|
self.sum = 0
|
|
27
27
|
def update(self, val: int) -> None:
|
|
@@ -160,61 +160,62 @@ class TestFunction:
|
|
|
160
160
|
assert status.num_rows == len(rows)
|
|
161
161
|
assert status.num_excs == 0
|
|
162
162
|
|
|
163
|
+
@pxt.udf(return_type=IntType(), param_types=[IntType(), FloatType(), FloatType(), FloatType()])
|
|
164
|
+
def f1(a: int, b: float, c: float = 0.0, d: float = 1.0) -> float:
|
|
165
|
+
return a + b + c + d
|
|
166
|
+
|
|
167
|
+
@pxt.udf(
|
|
168
|
+
return_type=IntType(),
|
|
169
|
+
param_types=[IntType(nullable=True), FloatType(nullable=False), FloatType(nullable=True)])
|
|
170
|
+
def f2(a: int, b: float = 0.0, c: float = 1.0) -> float:
|
|
171
|
+
return (0.0 if a is None else a) + b + (0.0 if c is None else c)
|
|
172
|
+
|
|
163
173
|
def test_call(self, test_tbl: catalog.Table) -> None:
|
|
164
174
|
t = test_tbl
|
|
165
175
|
|
|
166
|
-
@pxt.udf(return_type=IntType(), param_types=[IntType(), FloatType(), FloatType(), FloatType()])
|
|
167
|
-
def f1(a: int, b: float, c: float = 0.0, d: float = 1.0) -> float:
|
|
168
|
-
return a + b + c + d
|
|
169
|
-
|
|
170
176
|
r0 = t[t.c2, t.c3].show(0).to_pandas()
|
|
171
177
|
# positional params with default args
|
|
172
|
-
r1 = t[f1(t.c2, t.c3)].show(0).to_pandas()['col_0']
|
|
178
|
+
r1 = t[self.f1(t.c2, t.c3)].show(0).to_pandas()['col_0']
|
|
173
179
|
assert np.all(r1 == r0.c2 + r0.c3 + 1.0)
|
|
174
180
|
# kw args only
|
|
175
|
-
r2 = t[f1(c=0.0, b=t.c3, a=t.c2)].show(0).to_pandas()['col_0']
|
|
181
|
+
r2 = t[self.f1(c=0.0, b=t.c3, a=t.c2)].show(0).to_pandas()['col_0']
|
|
176
182
|
assert np.all(r1 == r2)
|
|
177
183
|
# overriding default args
|
|
178
|
-
r3 = t[f1(d=0.0, c=1.0, b=t.c3, a=t.c2)].show(0).to_pandas()['col_0']
|
|
184
|
+
r3 = t[self.f1(d=0.0, c=1.0, b=t.c3, a=t.c2)].show(0).to_pandas()['col_0']
|
|
179
185
|
assert np.all(r2 == r3)
|
|
180
186
|
# overriding default with positional arg
|
|
181
|
-
r4 = t[f1(t.c2, t.c3, 0.0)].show(0).to_pandas()['col_0']
|
|
187
|
+
r4 = t[self.f1(t.c2, t.c3, 0.0)].show(0).to_pandas()['col_0']
|
|
182
188
|
assert np.all(r3 == r4)
|
|
183
189
|
# overriding default with positional arg and kw arg
|
|
184
|
-
r5 = t[f1(t.c2, t.c3, 1.0, d=0.0)].show(0).to_pandas()['col_0']
|
|
190
|
+
r5 = t[self.f1(t.c2, t.c3, 1.0, d=0.0)].show(0).to_pandas()['col_0']
|
|
185
191
|
assert np.all(r4 == r5)
|
|
186
192
|
# d is kwarg
|
|
187
|
-
r6 = t[f1(t.c2, d=1.0, b=t.c3)].show(0).to_pandas()['col_0']
|
|
193
|
+
r6 = t[self.f1(t.c2, d=1.0, b=t.c3)].show(0).to_pandas()['col_0']
|
|
188
194
|
assert np.all(r5 == r6)
|
|
189
195
|
# d is Expr kwarg
|
|
190
|
-
r6 = t[f1(1, d=t.c3, b=t.c3)].show(0).to_pandas()['col_0']
|
|
196
|
+
r6 = t[self.f1(1, d=t.c3, b=t.c3)].show(0).to_pandas()['col_0']
|
|
191
197
|
assert np.all(r5 == r6)
|
|
192
198
|
|
|
193
199
|
# test handling of Nones
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
param_types=[IntType(nullable=True), FloatType(nullable=False), FloatType(nullable=True)])
|
|
197
|
-
def f2(a: int, b: float = 0.0, c: float = 1.0) -> float:
|
|
198
|
-
return (0.0 if a is None else a) + b + (0.0 if c is None else c)
|
|
199
|
-
r0 = t[f2(1, t.c3)].show(0).to_pandas()['col_0']
|
|
200
|
-
r1 = t[f2(None, t.c3, 2.0)].show(0).to_pandas()['col_0']
|
|
200
|
+
r0 = t[self.f2(1, t.c3)].show(0).to_pandas()['col_0']
|
|
201
|
+
r1 = t[self.f2(None, t.c3, 2.0)].show(0).to_pandas()['col_0']
|
|
201
202
|
assert np.all(r0 == r1)
|
|
202
|
-
r2 = t[f2(2, t.c3, None)].show(0).to_pandas()['col_0']
|
|
203
|
+
r2 = t[self.f2(2, t.c3, None)].show(0).to_pandas()['col_0']
|
|
203
204
|
assert np.all(r1 == r2)
|
|
204
205
|
# kwarg with None
|
|
205
|
-
r3 = t[f2(c=None, a=t.c2)].show(0).to_pandas()['col_0']
|
|
206
|
+
r3 = t[self.f2(c=None, a=t.c2)].show(0).to_pandas()['col_0']
|
|
206
207
|
# kwarg with Expr
|
|
207
|
-
r4 = t[f2(c=t.c3, a=None)].show(0).to_pandas()['col_0']
|
|
208
|
+
r4 = t[self.f2(c=t.c3, a=None)].show(0).to_pandas()['col_0']
|
|
208
209
|
assert np.all(r3 == r4)
|
|
209
210
|
|
|
210
211
|
with pytest.raises(TypeError) as exc_info:
|
|
211
|
-
_ = t[f1(t.c2, c=0.0)].show(0)
|
|
212
|
+
_ = t[self.f1(t.c2, c=0.0)].show(0)
|
|
212
213
|
assert "'b'" in str(exc_info.value)
|
|
213
214
|
with pytest.raises(TypeError) as exc_info:
|
|
214
|
-
_ = t[f1(t.c2)].show(0)
|
|
215
|
+
_ = t[self.f1(t.c2)].show(0)
|
|
215
216
|
assert "'b'" in str(exc_info.value)
|
|
216
217
|
with pytest.raises(TypeError) as exc_info:
|
|
217
|
-
_ = t[f1(c=1.0, a=t.c2)].show(0)
|
|
218
|
+
_ = t[self.f1(c=1.0, a=t.c2)].show(0)
|
|
218
219
|
assert "'b'" in str(exc_info.value)
|
|
219
220
|
|
|
220
221
|
# bad default value
|
|
@@ -242,17 +243,32 @@ class TestFunction:
|
|
|
242
243
|
return order_by
|
|
243
244
|
assert 'reserved' in str(exc_info.value)
|
|
244
245
|
|
|
246
|
+
@pxt.expr_udf
|
|
247
|
+
def add1(x: int) -> int:
|
|
248
|
+
return x + 1
|
|
249
|
+
|
|
250
|
+
@pxt.expr_udf
|
|
251
|
+
def add2(x: int, y: int):
|
|
252
|
+
return x + y
|
|
253
|
+
|
|
254
|
+
@pxt.expr_udf
|
|
255
|
+
def add2_with_default(x: int, y: int = 1) -> int:
|
|
256
|
+
return x + y
|
|
257
|
+
|
|
245
258
|
def test_expr_udf(self, test_tbl: catalog.Table) -> None:
|
|
246
259
|
t = test_tbl
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
res1
|
|
260
|
+
|
|
261
|
+
res1 = t.select(out=self.add1(t.c2)).order_by(t.c2).collect()
|
|
262
|
+
res2 = t.select(t.c2 + 1).order_by(t.c2).collect()
|
|
263
|
+
assert_resultset_eq(res1, res2)
|
|
264
|
+
|
|
265
|
+
# return type inferred from expression
|
|
266
|
+
res1 = t.select(out=self.add2(t.c2, t.c2)).order_by(t.c2).collect()
|
|
251
267
|
res2 = t.select(t.c2 * 2).order_by(t.c2).collect()
|
|
252
268
|
assert_resultset_eq(res1, res2)
|
|
253
269
|
|
|
254
270
|
with pytest.raises(TypeError) as exc_info:
|
|
255
|
-
_ = t.select(
|
|
271
|
+
_ = t.select(self.add1(y=t.c2)).collect()
|
|
256
272
|
assert 'missing a required argument' in str(exc_info.value).lower()
|
|
257
273
|
|
|
258
274
|
with pytest.raises(excs.Error) as exc_info:
|
|
@@ -262,13 +278,6 @@ class TestFunction:
|
|
|
262
278
|
return x + y
|
|
263
279
|
assert 'cannot infer pixeltable type' in str(exc_info.value).lower()
|
|
264
280
|
|
|
265
|
-
with pytest.raises(excs.Error) as exc_info:
|
|
266
|
-
# return type cannot be inferred
|
|
267
|
-
@pxt.expr_udf
|
|
268
|
-
def add1(x: int, y: int):
|
|
269
|
-
return x + y
|
|
270
|
-
assert 'cannot infer pixeltable return type' in str(exc_info.value).lower()
|
|
271
|
-
|
|
272
281
|
with pytest.raises(excs.Error) as exc_info:
|
|
273
282
|
# missing param types
|
|
274
283
|
@pxt.expr_udf(param_types=[IntType()])
|
|
@@ -280,14 +289,13 @@ class TestFunction:
|
|
|
280
289
|
# signature has correct parameter kind
|
|
281
290
|
@pxt.expr_udf
|
|
282
291
|
def add1(*, x: int) -> int:
|
|
283
|
-
return x +
|
|
292
|
+
return x + y
|
|
284
293
|
_ = t.select(add1(t.c2)).collect()
|
|
285
294
|
assert 'takes 0 positional arguments' in str(exc_info.value).lower()
|
|
286
295
|
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
res1 = t.select(out=add2(t.c2)).order_by(t.c2).collect()
|
|
296
|
+
res1 = t.select(out=self.add2_with_default(t.c2)).order_by(t.c2).collect()
|
|
297
|
+
res2 = t.select(out=self.add2(t.c2, 1)).order_by(t.c2).collect()
|
|
298
|
+
assert_resultset_eq(res1, res2)
|
|
291
299
|
|
|
292
300
|
# Test that various invalid udf definitions generate
|
|
293
301
|
# correct error messages.
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import PIL.Image
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
import pixeltable as pxt
|
|
6
|
+
from pixeltable.functions.huggingface import clip_image, clip_text
|
|
7
|
+
from pixeltable.tests.utils import text_embed, img_embed, skip_test_if_not_installed
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestIndex:
|
|
11
|
+
|
|
12
|
+
# wrong signature
|
|
13
|
+
@pxt.udf
|
|
14
|
+
def bad_embed(x: str) -> str:
|
|
15
|
+
return x
|
|
16
|
+
|
|
17
|
+
def test_embedding_basic(self, img_tbl: pxt.Table, test_tbl: pxt.Table) -> None:
|
|
18
|
+
skip_test_if_not_installed('transformers')
|
|
19
|
+
img_t = img_tbl
|
|
20
|
+
rows = list(img_t.select(img=img_t.img.fileurl, category=img_t.category, split=img_t.split).collect())
|
|
21
|
+
# create table with fewer rows to speed up testing
|
|
22
|
+
cl = pxt.Client()
|
|
23
|
+
schema = {
|
|
24
|
+
'img': pxt.ImageType(nullable=False),
|
|
25
|
+
'category': pxt.StringType(nullable=False),
|
|
26
|
+
'split': pxt.StringType(nullable=False),
|
|
27
|
+
}
|
|
28
|
+
tbl_name = 'index_test'
|
|
29
|
+
img_t = cl.create_table(tbl_name, schema=schema)
|
|
30
|
+
img_t.insert(rows[:30])
|
|
31
|
+
|
|
32
|
+
img_t.add_embedding_index('img', img_embed=img_embed, text_embed=text_embed)
|
|
33
|
+
|
|
34
|
+
with pytest.raises(pxt.Error) as exc_info:
|
|
35
|
+
# duplicate name
|
|
36
|
+
img_t.add_embedding_index('img', idx_name='idx0', img_embed=img_embed)
|
|
37
|
+
assert 'duplicate index name' in str(exc_info.value).lower()
|
|
38
|
+
|
|
39
|
+
img_t.add_embedding_index('category', text_embed=text_embed)
|
|
40
|
+
# revert() removes the index
|
|
41
|
+
img_t.revert()
|
|
42
|
+
with pytest.raises(pxt.Error) as exc_info:
|
|
43
|
+
img_t.drop_index(column_name='category')
|
|
44
|
+
assert 'does not have an index' in str(exc_info.value).lower()
|
|
45
|
+
|
|
46
|
+
rows = list(img_t.collect())
|
|
47
|
+
status = img_t.update({'split': 'other'}, where=img_t.split == 'test')
|
|
48
|
+
assert status.num_excs == 0
|
|
49
|
+
|
|
50
|
+
status = img_t.delete()
|
|
51
|
+
assert status.num_excs == 0
|
|
52
|
+
|
|
53
|
+
# revert delete()
|
|
54
|
+
img_t.revert()
|
|
55
|
+
# revert update()
|
|
56
|
+
img_t.revert()
|
|
57
|
+
|
|
58
|
+
# make sure we can still do DML after reloading the metadata
|
|
59
|
+
cl = pxt.Client(reload=True)
|
|
60
|
+
img_t = cl.get_table(tbl_name)
|
|
61
|
+
status = img_t.insert(rows)
|
|
62
|
+
assert status.num_excs == 0
|
|
63
|
+
|
|
64
|
+
status = img_t.update({'split': 'other'}, where=img_t.split == 'test')
|
|
65
|
+
assert status.num_excs == 0
|
|
66
|
+
|
|
67
|
+
status = img_t.delete()
|
|
68
|
+
assert status.num_excs == 0
|
|
69
|
+
|
|
70
|
+
# revert delete()
|
|
71
|
+
img_t.revert()
|
|
72
|
+
# revert update()
|
|
73
|
+
img_t.revert()
|
|
74
|
+
|
|
75
|
+
img_t.drop_index(idx_name='idx0')
|
|
76
|
+
with pytest.raises(pxt.Error) as exc_info:
|
|
77
|
+
img_t.drop_index(column_name='img')
|
|
78
|
+
assert 'does not have an index' in str(exc_info.value).lower()
|
|
79
|
+
|
|
80
|
+
# revert() makes the index reappear
|
|
81
|
+
img_t.revert()
|
|
82
|
+
with pytest.raises(pxt.Error) as exc_info:
|
|
83
|
+
img_t.add_embedding_index('img', idx_name='idx0', img_embed=img_embed)
|
|
84
|
+
assert 'duplicate index name' in str(exc_info.value).lower()
|
|
85
|
+
|
|
86
|
+
# dropping the indexed column also drops indices
|
|
87
|
+
img_t.drop_column('img')
|
|
88
|
+
with pytest.raises(pxt.Error) as exc_info:
|
|
89
|
+
img_t.drop_index(idx_name='idx0')
|
|
90
|
+
assert 'does not exist' in str(exc_info.value).lower()
|
|
91
|
+
|
|
92
|
+
def test_errors(self, img_tbl: pxt.Table, test_tbl: pxt.Table) -> None:
|
|
93
|
+
img_t = img_tbl
|
|
94
|
+
rows = list(img_t.select(img=img_t.img.fileurl, category=img_t.category, split=img_t.split).collect())
|
|
95
|
+
# create table with fewer rows to speed up testing
|
|
96
|
+
cl = pxt.Client()
|
|
97
|
+
schema = {
|
|
98
|
+
'img': pxt.ImageType(nullable=False),
|
|
99
|
+
'category': pxt.StringType(nullable=False),
|
|
100
|
+
'split': pxt.StringType(nullable=False),
|
|
101
|
+
}
|
|
102
|
+
tbl_name = 'index_test'
|
|
103
|
+
img_t = cl.create_table(tbl_name, schema=schema)
|
|
104
|
+
img_t.insert(rows[:30])
|
|
105
|
+
|
|
106
|
+
with pytest.raises(pxt.Error) as exc_info:
|
|
107
|
+
# unknown column
|
|
108
|
+
img_t.add_embedding_index('does_not_exist', idx_name='idx0', img_embed=img_embed)
|
|
109
|
+
assert 'column does_not_exist unknown' in str(exc_info.value).lower()
|
|
110
|
+
|
|
111
|
+
with pytest.raises(pxt.Error) as exc_info:
|
|
112
|
+
# wrong column type
|
|
113
|
+
test_tbl.add_embedding_index('c2', img_embed=img_embed)
|
|
114
|
+
assert 'requires string or image column' in str(exc_info.value).lower()
|
|
115
|
+
|
|
116
|
+
with pytest.raises(pxt.Error) as exc_info:
|
|
117
|
+
# missing embedding function
|
|
118
|
+
img_tbl.add_embedding_index('img', text_embed=text_embed)
|
|
119
|
+
assert 'image embedding function is required' in str(exc_info.value).lower()
|
|
120
|
+
|
|
121
|
+
with pytest.raises(pxt.Error) as exc_info:
|
|
122
|
+
# wrong signature
|
|
123
|
+
img_tbl.add_embedding_index('img', img_embed=clip_image)
|
|
124
|
+
assert 'but has signature' in str(exc_info.value).lower()
|
|
125
|
+
|
|
126
|
+
with pytest.raises(pxt.Error) as exc_info:
|
|
127
|
+
# missing embedding function
|
|
128
|
+
img_tbl.add_embedding_index('category', img_embed=img_embed)
|
|
129
|
+
assert 'text embedding function is required' in str(exc_info.value).lower()
|
|
130
|
+
|
|
131
|
+
with pytest.raises(pxt.Error) as exc_info:
|
|
132
|
+
# wrong signature
|
|
133
|
+
img_tbl.add_embedding_index('category', text_embed=clip_text)
|
|
134
|
+
assert 'but has signature' in str(exc_info.value).lower()
|
|
135
|
+
|
|
136
|
+
with pytest.raises(pxt.Error) as exc_info:
|
|
137
|
+
img_tbl.add_embedding_index('category', text_embed=self.bad_embed)
|
|
138
|
+
assert 'must return an array' in str(exc_info.value).lower()
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import glob
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
+
import platform
|
|
4
5
|
import subprocess
|
|
5
6
|
|
|
6
7
|
import pgserver
|
|
@@ -15,7 +16,7 @@ _logger = logging.getLogger('pixeltable')
|
|
|
15
16
|
|
|
16
17
|
class TestMigration:
|
|
17
18
|
|
|
18
|
-
@pytest.mark.
|
|
19
|
+
@pytest.mark.skipif(platform.system() == 'Windows', reason='Does not run on Windows')
|
|
19
20
|
def test_db_migration(self, init_env) -> None:
|
|
20
21
|
env = Env.get()
|
|
21
22
|
pg_package_dir = os.path.dirname(pgserver.__file__)
|
|
@@ -5,7 +5,7 @@ import pytest
|
|
|
5
5
|
|
|
6
6
|
import pixeltable as pxt
|
|
7
7
|
import pixeltable.exceptions as excs
|
|
8
|
-
from pixeltable.tests.utils import create_test_tbl, assert_resultset_eq
|
|
8
|
+
from pixeltable.tests.utils import create_test_tbl, assert_resultset_eq, create_img_tbl, img_embed
|
|
9
9
|
from pixeltable.type_system import IntType
|
|
10
10
|
|
|
11
11
|
|
|
@@ -89,6 +89,29 @@ class TestSnapshot:
|
|
|
89
89
|
snap = cl.create_view(snap_path, tbl, schema=schema, filter=filter, is_snapshot=True)
|
|
90
90
|
self.run_basic_test(cl, tbl, snap, extra_items=extra_items, filter=filter, reload_md=reload_md)
|
|
91
91
|
|
|
92
|
+
def test_errors(self, test_client: pxt.Client) -> None:
|
|
93
|
+
cl = test_client
|
|
94
|
+
tbl = create_test_tbl(client=cl)
|
|
95
|
+
snap = cl.create_view('snap', tbl, is_snapshot=True)
|
|
96
|
+
|
|
97
|
+
with pytest.raises(pxt.Error) as excinfo:
|
|
98
|
+
_ = snap.update({'c3': snap.c3 + 1.0})
|
|
99
|
+
assert 'cannot update a snapshot' in str(excinfo.value).lower()
|
|
100
|
+
|
|
101
|
+
with pytest.raises(pxt.Error) as excinfo:
|
|
102
|
+
_ = snap.batch_update([{'c3': 1.0, 'c2': 1}])
|
|
103
|
+
assert 'cannot update a snapshot' in str(excinfo.value).lower()
|
|
104
|
+
|
|
105
|
+
with pytest.raises(pxt.Error) as excinfo:
|
|
106
|
+
_ = snap.revert()
|
|
107
|
+
assert 'cannot revert a snapshot' in str(excinfo.value).lower()
|
|
108
|
+
|
|
109
|
+
with pytest.raises(pxt.Error) as excinfo:
|
|
110
|
+
img_tbl = create_img_tbl(cl)
|
|
111
|
+
snap = cl.create_view('img_snap', img_tbl, is_snapshot=True)
|
|
112
|
+
snap.add_embedding_index('img', img_embed=img_embed)
|
|
113
|
+
assert 'cannot add an index to a snapshot' in str(excinfo.value).lower()
|
|
114
|
+
|
|
92
115
|
def test_views_of_snapshots(self, test_client: pxt.Client) -> None:
|
|
93
116
|
cl = test_client
|
|
94
117
|
t = cl.create_table('tbl', {'a': IntType()})
|