pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +20 -9
- pixeltable/__version__.py +3 -0
- pixeltable/catalog/column.py +23 -7
- pixeltable/catalog/insertable_table.py +32 -19
- pixeltable/catalog/table.py +210 -20
- pixeltable/catalog/table_version.py +272 -111
- pixeltable/catalog/table_version_path.py +6 -1
- pixeltable/dataframe.py +184 -110
- pixeltable/datatransfer/__init__.py +1 -0
- pixeltable/datatransfer/label_studio.py +526 -0
- pixeltable/datatransfer/remote.py +113 -0
- pixeltable/env.py +213 -79
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/data_row_batch.py +6 -7
- pixeltable/exec/expr_eval_node.py +28 -28
- pixeltable/exec/sql_scan_node.py +7 -6
- pixeltable/exprs/__init__.py +4 -3
- pixeltable/exprs/column_ref.py +11 -2
- pixeltable/exprs/comparison.py +39 -1
- pixeltable/exprs/data_row.py +7 -0
- pixeltable/exprs/expr.py +26 -19
- pixeltable/exprs/function_call.py +17 -18
- pixeltable/exprs/globals.py +14 -2
- pixeltable/exprs/image_member_access.py +9 -28
- pixeltable/exprs/in_predicate.py +96 -0
- pixeltable/exprs/inline_array.py +13 -11
- pixeltable/exprs/inline_dict.py +15 -13
- pixeltable/exprs/row_builder.py +7 -1
- pixeltable/exprs/similarity_expr.py +67 -0
- pixeltable/ext/functions/whisperx.py +30 -0
- pixeltable/ext/functions/yolox.py +16 -0
- pixeltable/func/__init__.py +0 -2
- pixeltable/func/aggregate_function.py +5 -2
- pixeltable/func/callable_function.py +57 -13
- pixeltable/func/expr_template_function.py +14 -3
- pixeltable/func/function.py +35 -4
- pixeltable/func/signature.py +5 -15
- pixeltable/func/udf.py +8 -12
- pixeltable/functions/fireworks.py +9 -4
- pixeltable/functions/huggingface.py +48 -5
- pixeltable/functions/openai.py +49 -11
- pixeltable/functions/pil/image.py +61 -64
- pixeltable/functions/together.py +32 -6
- pixeltable/functions/util.py +0 -43
- pixeltable/functions/video.py +46 -8
- pixeltable/globals.py +443 -0
- pixeltable/index/__init__.py +1 -0
- pixeltable/index/base.py +9 -2
- pixeltable/index/btree.py +54 -0
- pixeltable/index/embedding_index.py +91 -15
- pixeltable/io/__init__.py +4 -0
- pixeltable/io/globals.py +59 -0
- pixeltable/{utils → io}/hf_datasets.py +48 -17
- pixeltable/io/pandas.py +148 -0
- pixeltable/{utils → io}/parquet.py +58 -33
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/iterators/base.py +8 -4
- pixeltable/iterators/document.py +225 -93
- pixeltable/iterators/video.py +16 -9
- pixeltable/metadata/__init__.py +8 -4
- pixeltable/metadata/converters/convert_12.py +3 -0
- pixeltable/metadata/converters/convert_13.py +41 -0
- pixeltable/metadata/converters/convert_14.py +13 -0
- pixeltable/metadata/converters/convert_15.py +29 -0
- pixeltable/metadata/converters/util.py +63 -0
- pixeltable/metadata/schema.py +12 -6
- pixeltable/plan.py +11 -24
- pixeltable/store.py +16 -23
- pixeltable/tool/create_test_db_dump.py +49 -14
- pixeltable/type_system.py +27 -58
- pixeltable/utils/coco.py +94 -0
- pixeltable/utils/documents.py +42 -12
- pixeltable/utils/http_server.py +70 -0
- pixeltable-0.2.7.dist-info/METADATA +137 -0
- pixeltable-0.2.7.dist-info/RECORD +126 -0
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
- pixeltable/client.py +0 -600
- pixeltable/exprs/image_similarity_predicate.py +0 -58
- pixeltable/func/batched_function.py +0 -53
- pixeltable/func/nos_function.py +0 -202
- pixeltable/tests/conftest.py +0 -171
- pixeltable/tests/ext/test_yolox.py +0 -21
- pixeltable/tests/functions/test_fireworks.py +0 -43
- pixeltable/tests/functions/test_functions.py +0 -60
- pixeltable/tests/functions/test_huggingface.py +0 -158
- pixeltable/tests/functions/test_openai.py +0 -162
- pixeltable/tests/functions/test_together.py +0 -112
- pixeltable/tests/test_audio.py +0 -65
- pixeltable/tests/test_catalog.py +0 -27
- pixeltable/tests/test_client.py +0 -21
- pixeltable/tests/test_component_view.py +0 -379
- pixeltable/tests/test_dataframe.py +0 -440
- pixeltable/tests/test_dirs.py +0 -107
- pixeltable/tests/test_document.py +0 -120
- pixeltable/tests/test_exprs.py +0 -802
- pixeltable/tests/test_function.py +0 -332
- pixeltable/tests/test_index.py +0 -138
- pixeltable/tests/test_migration.py +0 -44
- pixeltable/tests/test_nos.py +0 -54
- pixeltable/tests/test_snapshot.py +0 -231
- pixeltable/tests/test_table.py +0 -1343
- pixeltable/tests/test_transactional_directory.py +0 -42
- pixeltable/tests/test_types.py +0 -52
- pixeltable/tests/test_video.py +0 -159
- pixeltable/tests/test_view.py +0 -535
- pixeltable/tests/utils.py +0 -442
- pixeltable/utils/clip.py +0 -18
- pixeltable-0.2.5.dist-info/METADATA +0 -128
- pixeltable-0.2.5.dist-info/RECORD +0 -139
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
|
@@ -1,231 +0,0 @@
|
|
|
1
|
-
from typing import Any, Dict
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
import pytest
|
|
5
|
-
|
|
6
|
-
import pixeltable as pxt
|
|
7
|
-
import pixeltable.exceptions as excs
|
|
8
|
-
from pixeltable.tests.utils import create_test_tbl, assert_resultset_eq, create_img_tbl, img_embed
|
|
9
|
-
from pixeltable.type_system import IntType
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class TestSnapshot:
|
|
13
|
-
def run_basic_test(
|
|
14
|
-
self, cl: pxt.Client, tbl: pxt.Table, snap: pxt.Table, extra_items: Dict[str, Any], filter: Any,
|
|
15
|
-
reload_md: bool
|
|
16
|
-
) -> None:
|
|
17
|
-
tbl_path, snap_path = cl.get_path(tbl), cl.get_path(snap)
|
|
18
|
-
# run the initial query against the base table here, before reloading, otherwise the filter breaks
|
|
19
|
-
tbl_select_list = [tbl[col_name] for col_name in tbl.column_names()]
|
|
20
|
-
tbl_select_list.extend([value_expr for _, value_expr in extra_items.items()])
|
|
21
|
-
orig_resultset = tbl.select(*tbl_select_list).where(filter).order_by(tbl.c2).collect()
|
|
22
|
-
|
|
23
|
-
if reload_md:
|
|
24
|
-
# reload md
|
|
25
|
-
cl = pxt.Client(reload=True)
|
|
26
|
-
tbl = cl.get_table(tbl_path)
|
|
27
|
-
snap = cl.get_table(snap_path)
|
|
28
|
-
|
|
29
|
-
# view select list: base cols followed by view cols
|
|
30
|
-
snap_select_list = [snap[col_name] for col_name in snap.column_names()[len(extra_items):]]
|
|
31
|
-
snap_select_list.extend([snap[col_name] for col_name in extra_items.keys()])
|
|
32
|
-
snap_query = snap.select(*snap_select_list).order_by(snap.c2)
|
|
33
|
-
r1 = list(orig_resultset)
|
|
34
|
-
r2 = list(snap_query.collect())
|
|
35
|
-
assert_resultset_eq(snap_query.collect(), orig_resultset)
|
|
36
|
-
|
|
37
|
-
# adding data to a base table doesn't change the snapshot
|
|
38
|
-
rows = list(tbl.select(tbl.c1, tbl.c1n, tbl.c2, tbl.c3, tbl.c4, tbl.c5, tbl.c6, tbl.c7).collect())
|
|
39
|
-
status = tbl.insert(rows)
|
|
40
|
-
assert status.num_rows == len(rows)
|
|
41
|
-
assert_resultset_eq(snap_query.collect(), orig_resultset)
|
|
42
|
-
|
|
43
|
-
# update() doesn't affect the view
|
|
44
|
-
status = tbl.update({'c3': tbl.c3 + 1.0})
|
|
45
|
-
assert status.num_rows == tbl.count()
|
|
46
|
-
assert_resultset_eq(snap_query.collect(), orig_resultset)
|
|
47
|
-
|
|
48
|
-
# delete() doesn't affect the view
|
|
49
|
-
num_tbl_rows = tbl.count()
|
|
50
|
-
status = tbl.delete()
|
|
51
|
-
assert status.num_rows == num_tbl_rows
|
|
52
|
-
assert_resultset_eq(snap_query.collect(), orig_resultset)
|
|
53
|
-
|
|
54
|
-
tbl.revert() # undo delete()
|
|
55
|
-
tbl.revert() # undo update()
|
|
56
|
-
tbl.revert() # undo insert()
|
|
57
|
-
# can't revert a version referenced by a snapshot
|
|
58
|
-
with pytest.raises(excs.Error) as excinfo:
|
|
59
|
-
tbl.revert()
|
|
60
|
-
assert 'version is needed' in str(excinfo.value)
|
|
61
|
-
|
|
62
|
-
# can't drop a table with snapshots
|
|
63
|
-
with pytest.raises(excs.Error) as excinfo:
|
|
64
|
-
cl.drop_table(tbl_path)
|
|
65
|
-
assert snap_path in str(excinfo.value)
|
|
66
|
-
|
|
67
|
-
cl.drop_table(snap_path)
|
|
68
|
-
cl.drop_table(tbl_path)
|
|
69
|
-
|
|
70
|
-
def test_basic(self, test_client: pxt.Client) -> None:
|
|
71
|
-
cl = test_client
|
|
72
|
-
cl.create_dir('main')
|
|
73
|
-
cl.create_dir('snap')
|
|
74
|
-
tbl_path = 'main.tbl1'
|
|
75
|
-
snap_path = 'snap.snap1'
|
|
76
|
-
|
|
77
|
-
for reload_md in [False, True]:
|
|
78
|
-
for has_filter in [False, True]:
|
|
79
|
-
for has_cols in [False, True]:
|
|
80
|
-
cl = pxt.Client(reload=True)
|
|
81
|
-
tbl = create_test_tbl(name=tbl_path, client=cl)
|
|
82
|
-
schema = {
|
|
83
|
-
'v1': tbl.c3 * 2.0,
|
|
84
|
-
# include a lambda to make sure that is handled correctly
|
|
85
|
-
'v2': {'value': lambda c3: c3 * 2.0, 'type': pxt.FloatType()}
|
|
86
|
-
} if has_cols else {}
|
|
87
|
-
extra_items = {'v1': tbl.c3 * 2.0, 'v2': tbl.c3 * 2.0} if has_cols else {}
|
|
88
|
-
filter = tbl.c2 < 10 if has_filter else None
|
|
89
|
-
snap = cl.create_view(snap_path, tbl, schema=schema, filter=filter, is_snapshot=True)
|
|
90
|
-
self.run_basic_test(cl, tbl, snap, extra_items=extra_items, filter=filter, reload_md=reload_md)
|
|
91
|
-
|
|
92
|
-
def test_errors(self, test_client: pxt.Client) -> None:
|
|
93
|
-
cl = test_client
|
|
94
|
-
tbl = create_test_tbl(client=cl)
|
|
95
|
-
snap = cl.create_view('snap', tbl, is_snapshot=True)
|
|
96
|
-
|
|
97
|
-
with pytest.raises(pxt.Error) as excinfo:
|
|
98
|
-
_ = snap.update({'c3': snap.c3 + 1.0})
|
|
99
|
-
assert 'cannot update a snapshot' in str(excinfo.value).lower()
|
|
100
|
-
|
|
101
|
-
with pytest.raises(pxt.Error) as excinfo:
|
|
102
|
-
_ = snap.batch_update([{'c3': 1.0, 'c2': 1}])
|
|
103
|
-
assert 'cannot update a snapshot' in str(excinfo.value).lower()
|
|
104
|
-
|
|
105
|
-
with pytest.raises(pxt.Error) as excinfo:
|
|
106
|
-
_ = snap.revert()
|
|
107
|
-
assert 'cannot revert a snapshot' in str(excinfo.value).lower()
|
|
108
|
-
|
|
109
|
-
with pytest.raises(pxt.Error) as excinfo:
|
|
110
|
-
img_tbl = create_img_tbl(cl)
|
|
111
|
-
snap = cl.create_view('img_snap', img_tbl, is_snapshot=True)
|
|
112
|
-
snap.add_embedding_index('img', img_embed=img_embed)
|
|
113
|
-
assert 'cannot add an index to a snapshot' in str(excinfo.value).lower()
|
|
114
|
-
|
|
115
|
-
def test_views_of_snapshots(self, test_client: pxt.Client) -> None:
|
|
116
|
-
cl = test_client
|
|
117
|
-
t = cl.create_table('tbl', {'a': IntType()})
|
|
118
|
-
rows = [{'a': 1}, {'a': 2}, {'a': 3}]
|
|
119
|
-
status = t.insert(rows)
|
|
120
|
-
assert status.num_rows == len(rows)
|
|
121
|
-
assert status.num_excs == 0
|
|
122
|
-
s1 = cl.create_view('s1', t, is_snapshot=True)
|
|
123
|
-
v1 = cl.create_view('v1', s1, is_snapshot=False)
|
|
124
|
-
s2 = cl.create_view('s2', v1, is_snapshot=True)
|
|
125
|
-
v2 = cl.create_view('v2', s2, is_snapshot=False)
|
|
126
|
-
|
|
127
|
-
def verify(s1: pxt.Table, s2: pxt.Table, v1: pxt.Table, v2: pxt.Table) -> None:
|
|
128
|
-
assert s1.count() == len(rows)
|
|
129
|
-
assert v1.count() == len(rows)
|
|
130
|
-
assert s2.count() == len(rows)
|
|
131
|
-
assert v2.count() == len(rows)
|
|
132
|
-
|
|
133
|
-
verify(s1, s2, v1, v2)
|
|
134
|
-
|
|
135
|
-
status = t.insert(rows)
|
|
136
|
-
assert status.num_rows == len(rows)
|
|
137
|
-
assert status.num_excs == 0
|
|
138
|
-
verify(s1, s2, v1, v2)
|
|
139
|
-
|
|
140
|
-
cl = pxt.Client(reload=True)
|
|
141
|
-
s1 = cl.get_table('s1')
|
|
142
|
-
s2 = cl.get_table('s2')
|
|
143
|
-
v1 = cl.get_table('v1')
|
|
144
|
-
v2 = cl.get_table('v2')
|
|
145
|
-
verify(s1, s2, v1, v2)
|
|
146
|
-
|
|
147
|
-
def test_snapshot_of_view_chain(self, test_client: pxt.Client) -> None:
|
|
148
|
-
cl = test_client
|
|
149
|
-
t = cl.create_table('tbl', {'a': IntType()})
|
|
150
|
-
rows = [{'a': 1}, {'a': 2}, {'a': 3}]
|
|
151
|
-
status = t.insert(rows)
|
|
152
|
-
assert status.num_rows == len(rows)
|
|
153
|
-
assert status.num_excs == 0
|
|
154
|
-
v1 = cl.create_view('v1', t, is_snapshot=False)
|
|
155
|
-
v2 = cl.create_view('v2', v1, is_snapshot=False)
|
|
156
|
-
s = cl.create_view('s', v2, is_snapshot=True)
|
|
157
|
-
|
|
158
|
-
def verify(v1: pxt.Table, v2: pxt.Table, s: pxt.Table) -> None:
|
|
159
|
-
assert v1.count() == t.count()
|
|
160
|
-
assert v2.count() == t.count()
|
|
161
|
-
assert s.count() == len(rows)
|
|
162
|
-
|
|
163
|
-
verify(v1, v2, s)
|
|
164
|
-
|
|
165
|
-
status = t.insert(rows)
|
|
166
|
-
assert status.num_rows == len(rows) * 3 # we also updated 2 views
|
|
167
|
-
assert status.num_excs == 0
|
|
168
|
-
verify(v1, v2, s)
|
|
169
|
-
|
|
170
|
-
cl = pxt.Client(reload=True)
|
|
171
|
-
v1 = cl.get_table('v1')
|
|
172
|
-
v2 = cl.get_table('v2')
|
|
173
|
-
s = cl.get_table('s')
|
|
174
|
-
verify(v1, v2, s)
|
|
175
|
-
|
|
176
|
-
def test_multiple_snapshot_paths(self, test_client: pxt.Client) -> None:
|
|
177
|
-
cl = test_client
|
|
178
|
-
t = create_test_tbl(cl)
|
|
179
|
-
c4 = t.select(t.c4).order_by(t.c2).collect().to_pandas()['c4']
|
|
180
|
-
orig_c3 = t.select(t.c3).collect().to_pandas()['c3']
|
|
181
|
-
v = cl.create_view('v', base=t, schema={'v1': t.c3 + 1})
|
|
182
|
-
s1 = cl.create_view('s1', v, is_snapshot=True)
|
|
183
|
-
t.drop_column('c4')
|
|
184
|
-
# s2 references the same view version as s1, but a different version of t (due to a schema change)
|
|
185
|
-
s2 = cl.create_view('s2', v, is_snapshot=True)
|
|
186
|
-
t.update({'c6': {'a': 17}})
|
|
187
|
-
# s3 references the same view version as s2, but a different version of t (due to a data change)
|
|
188
|
-
s3 = cl.create_view('s3', v, is_snapshot=True)
|
|
189
|
-
t.update({'c3': t.c3 + 1})
|
|
190
|
-
# s4 references different versions of t and v
|
|
191
|
-
s4 = cl.create_view('s4', v, is_snapshot=True)
|
|
192
|
-
|
|
193
|
-
def validate(t: pxt.Table, v: pxt.Table, s1: pxt.Table, s2: pxt.Table, s3: pxt.Table, s4: pxt.Table) -> None:
|
|
194
|
-
# c4 is only visible in s1
|
|
195
|
-
assert np.all(s1.select(s1.c4).collect().to_pandas()['c4'] == c4)
|
|
196
|
-
with pytest.raises(AttributeError):
|
|
197
|
-
_ = t.select(t.c4).collect()
|
|
198
|
-
with pytest.raises(AttributeError):
|
|
199
|
-
_ = v.select(v.c4).collect()
|
|
200
|
-
with pytest.raises(AttributeError):
|
|
201
|
-
_ = s2.select(s2.c4).collect()
|
|
202
|
-
with pytest.raises(AttributeError):
|
|
203
|
-
_ = s3.select(s3.c4).collect()
|
|
204
|
-
with pytest.raises(AttributeError):
|
|
205
|
-
_ = s4.select(s4.c4).collect()
|
|
206
|
-
|
|
207
|
-
# c3
|
|
208
|
-
assert np.all(t.select(t.c3).order_by(t.c2).collect().to_pandas()['c3'] == orig_c3 + 1)
|
|
209
|
-
assert np.all(s1.select(s1.c3).order_by(s1.c2).collect().to_pandas()['c3'] == orig_c3)
|
|
210
|
-
assert np.all(s2.select(s2.c3).order_by(s2.c2).collect().to_pandas()['c3'] == orig_c3)
|
|
211
|
-
assert np.all(s3.select(s3.c3).order_by(s3.c2).collect().to_pandas()['c3'] == orig_c3)
|
|
212
|
-
assert np.all(s4.select(s4.c3).order_by(s4.c2).collect().to_pandas()['c3'] == orig_c3 + 1)
|
|
213
|
-
|
|
214
|
-
# v1
|
|
215
|
-
assert np.all(
|
|
216
|
-
v.select(v.v1).order_by(v.c2).collect().to_pandas()['v1'] == \
|
|
217
|
-
t.select(t.c3).order_by(t.c2).collect().to_pandas()['c3'] + 1)
|
|
218
|
-
assert np.all(s1.select(s1.v1).order_by(s1.c2).collect().to_pandas()['v1'] == orig_c3 + 1)
|
|
219
|
-
assert np.all(s2.select(s2.v1).order_by(s2.c2).collect().to_pandas()['v1'] == orig_c3 + 1)
|
|
220
|
-
assert np.all(s3.select(s3.v1).order_by(s3.c2).collect().to_pandas()['v1'] == orig_c3 + 1)
|
|
221
|
-
assert np.all(
|
|
222
|
-
s4.select(s4.v1).order_by(s4.c2).collect().to_pandas()['v1'] == \
|
|
223
|
-
t.select(t.c3).order_by(t.c2).collect().to_pandas()['c3'] + 1)
|
|
224
|
-
|
|
225
|
-
validate(t, v, s1, s2, s3, s4)
|
|
226
|
-
|
|
227
|
-
# make sure it works after metadata reload
|
|
228
|
-
cl = pxt.Client(reload=True)
|
|
229
|
-
t, v = cl.get_table('test_tbl'), cl.get_table('v')
|
|
230
|
-
s1, s2, s3, s4 = cl.get_table('s1'), cl.get_table('s2'), cl.get_table('s3'), cl.get_table('s4')
|
|
231
|
-
validate(t, v, s1, s2, s3, s4)
|