pixeltable 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +20 -9
- pixeltable/__version__.py +3 -0
- pixeltable/catalog/column.py +23 -7
- pixeltable/catalog/insertable_table.py +32 -19
- pixeltable/catalog/table.py +210 -20
- pixeltable/catalog/table_version.py +272 -111
- pixeltable/catalog/table_version_path.py +6 -1
- pixeltable/dataframe.py +184 -110
- pixeltable/datatransfer/__init__.py +1 -0
- pixeltable/datatransfer/label_studio.py +526 -0
- pixeltable/datatransfer/remote.py +113 -0
- pixeltable/env.py +213 -79
- pixeltable/exec/__init__.py +2 -1
- pixeltable/exec/data_row_batch.py +6 -7
- pixeltable/exec/expr_eval_node.py +28 -28
- pixeltable/exec/sql_scan_node.py +7 -6
- pixeltable/exprs/__init__.py +4 -3
- pixeltable/exprs/column_ref.py +11 -2
- pixeltable/exprs/comparison.py +39 -1
- pixeltable/exprs/data_row.py +7 -0
- pixeltable/exprs/expr.py +26 -19
- pixeltable/exprs/function_call.py +17 -18
- pixeltable/exprs/globals.py +14 -2
- pixeltable/exprs/image_member_access.py +9 -28
- pixeltable/exprs/in_predicate.py +96 -0
- pixeltable/exprs/inline_array.py +13 -11
- pixeltable/exprs/inline_dict.py +15 -13
- pixeltable/exprs/row_builder.py +7 -1
- pixeltable/exprs/similarity_expr.py +67 -0
- pixeltable/ext/functions/whisperx.py +30 -0
- pixeltable/ext/functions/yolox.py +16 -0
- pixeltable/func/__init__.py +0 -2
- pixeltable/func/aggregate_function.py +5 -2
- pixeltable/func/callable_function.py +57 -13
- pixeltable/func/expr_template_function.py +14 -3
- pixeltable/func/function.py +35 -4
- pixeltable/func/signature.py +5 -15
- pixeltable/func/udf.py +8 -12
- pixeltable/functions/fireworks.py +9 -4
- pixeltable/functions/huggingface.py +48 -5
- pixeltable/functions/openai.py +49 -11
- pixeltable/functions/pil/image.py +61 -64
- pixeltable/functions/together.py +32 -6
- pixeltable/functions/util.py +0 -43
- pixeltable/functions/video.py +46 -8
- pixeltable/globals.py +443 -0
- pixeltable/index/__init__.py +1 -0
- pixeltable/index/base.py +9 -2
- pixeltable/index/btree.py +54 -0
- pixeltable/index/embedding_index.py +91 -15
- pixeltable/io/__init__.py +4 -0
- pixeltable/io/globals.py +59 -0
- pixeltable/{utils → io}/hf_datasets.py +48 -17
- pixeltable/io/pandas.py +148 -0
- pixeltable/{utils → io}/parquet.py +58 -33
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/iterators/base.py +8 -4
- pixeltable/iterators/document.py +225 -93
- pixeltable/iterators/video.py +16 -9
- pixeltable/metadata/__init__.py +8 -4
- pixeltable/metadata/converters/convert_12.py +3 -0
- pixeltable/metadata/converters/convert_13.py +41 -0
- pixeltable/metadata/converters/convert_14.py +13 -0
- pixeltable/metadata/converters/convert_15.py +29 -0
- pixeltable/metadata/converters/util.py +63 -0
- pixeltable/metadata/schema.py +12 -6
- pixeltable/plan.py +11 -24
- pixeltable/store.py +16 -23
- pixeltable/tool/create_test_db_dump.py +49 -14
- pixeltable/type_system.py +27 -58
- pixeltable/utils/coco.py +94 -0
- pixeltable/utils/documents.py +42 -12
- pixeltable/utils/http_server.py +70 -0
- pixeltable-0.2.7.dist-info/METADATA +137 -0
- pixeltable-0.2.7.dist-info/RECORD +126 -0
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +1 -1
- pixeltable/client.py +0 -600
- pixeltable/exprs/image_similarity_predicate.py +0 -58
- pixeltable/func/batched_function.py +0 -53
- pixeltable/func/nos_function.py +0 -202
- pixeltable/tests/conftest.py +0 -171
- pixeltable/tests/ext/test_yolox.py +0 -21
- pixeltable/tests/functions/test_fireworks.py +0 -43
- pixeltable/tests/functions/test_functions.py +0 -60
- pixeltable/tests/functions/test_huggingface.py +0 -158
- pixeltable/tests/functions/test_openai.py +0 -162
- pixeltable/tests/functions/test_together.py +0 -112
- pixeltable/tests/test_audio.py +0 -65
- pixeltable/tests/test_catalog.py +0 -27
- pixeltable/tests/test_client.py +0 -21
- pixeltable/tests/test_component_view.py +0 -379
- pixeltable/tests/test_dataframe.py +0 -440
- pixeltable/tests/test_dirs.py +0 -107
- pixeltable/tests/test_document.py +0 -120
- pixeltable/tests/test_exprs.py +0 -802
- pixeltable/tests/test_function.py +0 -332
- pixeltable/tests/test_index.py +0 -138
- pixeltable/tests/test_migration.py +0 -44
- pixeltable/tests/test_nos.py +0 -54
- pixeltable/tests/test_snapshot.py +0 -231
- pixeltable/tests/test_table.py +0 -1343
- pixeltable/tests/test_transactional_directory.py +0 -42
- pixeltable/tests/test_types.py +0 -52
- pixeltable/tests/test_video.py +0 -159
- pixeltable/tests/test_view.py +0 -535
- pixeltable/tests/utils.py +0 -442
- pixeltable/utils/clip.py +0 -18
- pixeltable-0.2.5.dist-info/METADATA +0 -128
- pixeltable-0.2.5.dist-info/RECORD +0 -139
- {pixeltable-0.2.5.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
import pathlib
|
|
2
|
-
import tempfile
|
|
3
|
-
|
|
4
|
-
from pixeltable.utils.transactional_directory import transactional_directory
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class MyException(Exception):
|
|
8
|
-
pass
|
|
9
|
-
|
|
10
|
-
class TestTransactionalDirectory:
|
|
11
|
-
def test_success(self) -> None:
|
|
12
|
-
test_dir = pathlib.Path(tempfile.mkdtemp())
|
|
13
|
-
assert test_dir.exists()
|
|
14
|
-
final = test_dir / "test_success"
|
|
15
|
-
assert not final.exists()
|
|
16
|
-
with transactional_directory(final) as folder:
|
|
17
|
-
assert folder.exists()
|
|
18
|
-
(folder / "subfolder1").mkdir()
|
|
19
|
-
with (folder / "test.txt").open("w") as f:
|
|
20
|
-
f.write("test")
|
|
21
|
-
|
|
22
|
-
assert final.exists()
|
|
23
|
-
assert (final / "subfolder1").is_dir()
|
|
24
|
-
assert (final / "test.txt").read_text() == "test"
|
|
25
|
-
|
|
26
|
-
def test_failure(self) -> None:
|
|
27
|
-
test_dir = pathlib.Path(tempfile.mkdtemp())
|
|
28
|
-
assert test_dir.exists()
|
|
29
|
-
final = test_dir / "test_failure"
|
|
30
|
-
assert not final.exists()
|
|
31
|
-
|
|
32
|
-
try:
|
|
33
|
-
with transactional_directory(final) as folder:
|
|
34
|
-
assert folder.exists()
|
|
35
|
-
(folder / "subfolder1").mkdir()
|
|
36
|
-
with (folder / "test.txt").open("w") as f:
|
|
37
|
-
f.write("test")
|
|
38
|
-
raise MyException()
|
|
39
|
-
except MyException:
|
|
40
|
-
pass
|
|
41
|
-
|
|
42
|
-
assert not final.exists()
|
pixeltable/tests/test_types.py
DELETED
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
import datetime
|
|
2
|
-
from copy import copy
|
|
3
|
-
from typing import List, Dict, Optional
|
|
4
|
-
|
|
5
|
-
from pixeltable.type_system import \
|
|
6
|
-
ColumnType, StringType, IntType, BoolType, ImageType, InvalidType, FloatType, TimestampType, JsonType, ArrayType
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class TestTypes:
|
|
10
|
-
def test_serialize(self, init_env) -> None:
|
|
11
|
-
type_vals = [
|
|
12
|
-
InvalidType(), StringType(), IntType(), BoolType(), TimestampType(),
|
|
13
|
-
ImageType(height=100, width=200, mode='RGB'),
|
|
14
|
-
JsonType({
|
|
15
|
-
'a': StringType(), 'b': IntType(), 'c': FloatType(), 'd': BoolType(), 'e': TimestampType(),
|
|
16
|
-
'f': ImageType(height=100, width=200, mode='RGB'),
|
|
17
|
-
'g': JsonType({'f1': StringType(), 'f2': IntType()}),
|
|
18
|
-
'h': ArrayType((224, 224, 3), dtype=IntType()),
|
|
19
|
-
}),
|
|
20
|
-
ArrayType((224, 224, 3), dtype=IntType()),
|
|
21
|
-
]
|
|
22
|
-
|
|
23
|
-
for t in type_vals:
|
|
24
|
-
t_serialized = t.serialize()
|
|
25
|
-
t_deserialized = ColumnType.deserialize(t_serialized)
|
|
26
|
-
assert t == t_deserialized
|
|
27
|
-
|
|
28
|
-
def test_from_python_type(self) -> None:
|
|
29
|
-
test_cases = {
|
|
30
|
-
str: StringType(),
|
|
31
|
-
int: IntType(),
|
|
32
|
-
float: FloatType(),
|
|
33
|
-
bool: BoolType(),
|
|
34
|
-
datetime.date: TimestampType(),
|
|
35
|
-
datetime.datetime: TimestampType(),
|
|
36
|
-
list: JsonType(),
|
|
37
|
-
dict: JsonType(),
|
|
38
|
-
list[int]: JsonType(),
|
|
39
|
-
list[dict[str, int]]: JsonType(),
|
|
40
|
-
dict[int, str]: JsonType(),
|
|
41
|
-
dict[dict[str, int], list[int]]: JsonType(),
|
|
42
|
-
List: JsonType(),
|
|
43
|
-
Dict: JsonType(),
|
|
44
|
-
List[int]: JsonType(),
|
|
45
|
-
List[Dict[str, int]]: JsonType(),
|
|
46
|
-
Dict[int, str]: JsonType()
|
|
47
|
-
}
|
|
48
|
-
for py_type, pxt_type in test_cases.items():
|
|
49
|
-
assert ColumnType.from_python_type(py_type) == pxt_type
|
|
50
|
-
opt_pxt_type = copy(pxt_type)
|
|
51
|
-
opt_pxt_type.nullable = True
|
|
52
|
-
assert ColumnType.from_python_type(Optional[py_type]) == opt_pxt_type
|
pixeltable/tests/test_video.py
DELETED
|
@@ -1,159 +0,0 @@
|
|
|
1
|
-
from typing import Optional, List, Tuple
|
|
2
|
-
|
|
3
|
-
import PIL
|
|
4
|
-
import pytest
|
|
5
|
-
|
|
6
|
-
import pixeltable as pxt
|
|
7
|
-
from pixeltable import catalog
|
|
8
|
-
from pixeltable import exceptions as excs
|
|
9
|
-
from pixeltable.iterators import FrameIterator
|
|
10
|
-
from pixeltable.tests.utils import get_video_files
|
|
11
|
-
from pixeltable.tests.utils import skip_test_if_not_installed
|
|
12
|
-
from pixeltable.type_system import VideoType, ImageType
|
|
13
|
-
from pixeltable.utils.media_store import MediaStore
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class TestVideo:
|
|
17
|
-
def create_tbls(
|
|
18
|
-
self, cl: pxt.Client, base_name: str = 'video_tbl', view_name: str = 'frame_view'
|
|
19
|
-
) -> Tuple[catalog.InsertableTable, catalog.Table]:
|
|
20
|
-
cl.drop_table(view_name, ignore_errors=True)
|
|
21
|
-
cl.drop_table(base_name, ignore_errors=True)
|
|
22
|
-
base_t = cl.create_table(base_name, {'video': VideoType()})
|
|
23
|
-
args = {'video': base_t.video, 'fps': 1}
|
|
24
|
-
view_t = cl.create_view(view_name, base_t, iterator_class=FrameIterator, iterator_args=args)
|
|
25
|
-
return base_t, view_t
|
|
26
|
-
|
|
27
|
-
def create_and_insert(
|
|
28
|
-
self, cl: pxt.Client, stored: Optional[bool], paths: List[str]
|
|
29
|
-
) -> Tuple[catalog.InsertableTable, catalog.Table]:
|
|
30
|
-
base_t, view_t = self.create_tbls(cl)
|
|
31
|
-
|
|
32
|
-
view_t.add_column(transform=view_t.frame.rotate(90), stored=stored)
|
|
33
|
-
base_t.insert({'video': p} for p in paths)
|
|
34
|
-
total_num_rows = view_t.count()
|
|
35
|
-
result = view_t[view_t.frame_idx >= 5][view_t.frame_idx, view_t.frame, view_t.transform].show(0)
|
|
36
|
-
assert len(result) == total_num_rows - len(paths) * 5
|
|
37
|
-
result = view_t[view_t.frame_idx, view_t.frame, view_t.transform].show(3)
|
|
38
|
-
assert len(result) == 3
|
|
39
|
-
result = view_t[view_t.frame_idx, view_t.frame, view_t.transform].show(0)
|
|
40
|
-
assert len(result) == total_num_rows
|
|
41
|
-
return base_t, view_t
|
|
42
|
-
|
|
43
|
-
def test_basic(self, test_client: pxt.Client) -> None:
|
|
44
|
-
video_filepaths = get_video_files()
|
|
45
|
-
cl = test_client
|
|
46
|
-
|
|
47
|
-
# default case: computed images are not stored
|
|
48
|
-
_, view = self.create_and_insert(cl, None, video_filepaths)
|
|
49
|
-
assert MediaStore.count(view.get_id()) == 0
|
|
50
|
-
|
|
51
|
-
# computed images are explicitly not stored
|
|
52
|
-
_, view = self.create_and_insert(cl, False, video_filepaths)
|
|
53
|
-
assert MediaStore.count(view.get_id()) == 0
|
|
54
|
-
|
|
55
|
-
# computed images are stored
|
|
56
|
-
tbl, view = self.create_and_insert(cl, True, video_filepaths)
|
|
57
|
-
assert MediaStore.count(view.get_id()) == view.count()
|
|
58
|
-
|
|
59
|
-
# revert() also removes computed images
|
|
60
|
-
tbl.insert({'video': p} for p in video_filepaths)
|
|
61
|
-
tbl.revert()
|
|
62
|
-
assert MediaStore.count(view.get_id()) == view.count()
|
|
63
|
-
|
|
64
|
-
def test_query(self, test_client: pxt.client) -> None:
|
|
65
|
-
skip_test_if_not_installed('boto3')
|
|
66
|
-
video_filepaths = get_video_files()
|
|
67
|
-
cl = test_client
|
|
68
|
-
base_t, view_t = self.create_tbls(cl)
|
|
69
|
-
# also include an external file, to make sure that prefetching works
|
|
70
|
-
url = 's3://multimedia-commons/data/videos/mp4/ffe/ff3/ffeff3c6bf57504e7a6cecaff6aefbc9.mp4'
|
|
71
|
-
video_filepaths.append(url)
|
|
72
|
-
status = base_t.insert({'video': p} for p in video_filepaths)
|
|
73
|
-
assert status.num_excs == 0
|
|
74
|
-
# make sure that we can get the frames back
|
|
75
|
-
res = view_t.select(view_t.frame).collect().to_pandas()
|
|
76
|
-
assert res['frame'].notnull().all()
|
|
77
|
-
# make sure we can select a specific video
|
|
78
|
-
all_rows = view_t.select(url=view_t.video.fileurl).collect().to_pandas()
|
|
79
|
-
res = view_t.where(view_t.video == url).collect()
|
|
80
|
-
assert len(res) == len(all_rows[all_rows.url == url])
|
|
81
|
-
|
|
82
|
-
def test_fps(self, test_client: pxt.client) -> None:
|
|
83
|
-
cl = test_client
|
|
84
|
-
path = get_video_files()[0]
|
|
85
|
-
videos = cl.create_table('videos', {'video': VideoType()})
|
|
86
|
-
frames_1_0 = cl.create_view(
|
|
87
|
-
'frames_1_0', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1})
|
|
88
|
-
frames_0_5 = cl.create_view(
|
|
89
|
-
'frames_0_5', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1/2})
|
|
90
|
-
frames_0_33 = cl.create_view(
|
|
91
|
-
'frames_0_33', videos, iterator_class=FrameIterator, iterator_args={'video': videos.video, 'fps': 1/3})
|
|
92
|
-
videos.insert(video=path)
|
|
93
|
-
assert frames_0_5.count() == frames_1_0.count() // 2 or frames_0_5.count() == frames_1_0.count() // 2 + 1
|
|
94
|
-
assert frames_0_33.count() == frames_1_0.count() // 3 or frames_0_33.count() == frames_1_0.count() // 3 + 1
|
|
95
|
-
|
|
96
|
-
def test_computed_cols(self, test_client: pxt.client) -> None:
|
|
97
|
-
video_filepaths = get_video_files()
|
|
98
|
-
cl = test_client
|
|
99
|
-
base_t, view_t = self.create_tbls(cl)
|
|
100
|
-
# c2 and c4 depend directly on c1, c3 depends on it indirectly
|
|
101
|
-
view_t.add_column(c1=view_t.frame.resize([224, 224]))
|
|
102
|
-
view_t.add_column(c2=view_t.c1.rotate(10))
|
|
103
|
-
view_t.add_column(c3=view_t.c2.rotate(20))
|
|
104
|
-
view_t.add_column(c4=view_t.c1.rotate(30))
|
|
105
|
-
for name in ['c1', 'c2', 'c3', 'c4']:
|
|
106
|
-
assert not view_t.tbl_version_path.tbl_version.cols_by_name[name].is_stored
|
|
107
|
-
base_t.insert({'video': p} for p in video_filepaths)
|
|
108
|
-
_ = view_t[view_t.c1, view_t.c2, view_t.c3, view_t.c4].show(0)
|
|
109
|
-
|
|
110
|
-
# window function that simply passes through the frame
|
|
111
|
-
@pxt.uda(
|
|
112
|
-
update_types=[ImageType()], value_type=ImageType(),
|
|
113
|
-
requires_order_by=True, allows_std_agg=False, allows_window=True)
|
|
114
|
-
class agg_fn:
|
|
115
|
-
def __init__(self):
|
|
116
|
-
self.img = None
|
|
117
|
-
def update(self, frame: PIL.Image.Image) -> None:
|
|
118
|
-
self.img = frame
|
|
119
|
-
def value(self) -> PIL.Image.Image:
|
|
120
|
-
return self.img
|
|
121
|
-
|
|
122
|
-
def test_make_video(self, test_client: pxt.Client) -> None:
|
|
123
|
-
video_filepaths = get_video_files()
|
|
124
|
-
cl = test_client
|
|
125
|
-
base_t, view_t = self.create_tbls(cl)
|
|
126
|
-
base_t.insert({'video': p} for p in video_filepaths)
|
|
127
|
-
# reference to the frame col requires ordering by base, pos
|
|
128
|
-
from pixeltable.functions import make_video
|
|
129
|
-
_ = view_t.select(make_video(view_t.pos, view_t.frame)).group_by(base_t).show()
|
|
130
|
-
# the same without frame col
|
|
131
|
-
view_t.add_column(transformed=view_t.frame.rotate(30), stored=True)
|
|
132
|
-
_ = view_t.select(make_video(view_t.pos, view_t.transformed)).group_by(base_t).show()
|
|
133
|
-
|
|
134
|
-
with pytest.raises(excs.Error):
|
|
135
|
-
# make_video() doesn't allow windows
|
|
136
|
-
_ = view_t.select(make_video(view_t.pos, view_t.frame, group_by=base_t)).show()
|
|
137
|
-
with pytest.raises(excs.Error):
|
|
138
|
-
# make_video() requires ordering
|
|
139
|
-
_ = view_t.select(make_video(view_t.frame, order_by=view_t.pos)).show()
|
|
140
|
-
with pytest.raises(excs.Error):
|
|
141
|
-
# incompatible ordering requirements
|
|
142
|
-
_ = view_t.select(
|
|
143
|
-
make_video(view_t.pos, view_t.frame),
|
|
144
|
-
make_video(view_t.pos - 1, view_t.transformed)).group_by(base_t).show()
|
|
145
|
-
|
|
146
|
-
# make sure it works
|
|
147
|
-
_ = view_t.select(self.agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()
|
|
148
|
-
status = view_t.add_column(agg=self.agg_fn(view_t.pos, view_t.frame, group_by=base_t))
|
|
149
|
-
assert status.num_excs == 0
|
|
150
|
-
_ = view_t.select(make_video(view_t.pos, view_t.agg)).group_by(base_t).show()
|
|
151
|
-
|
|
152
|
-
# image cols computed with a window function currently need to be stored
|
|
153
|
-
with pytest.raises(excs.Error):
|
|
154
|
-
view_t.add_column(agg2=self.agg_fn(view_t.pos, view_t.frame, group_by=base_t), stored=False)
|
|
155
|
-
|
|
156
|
-
# reload from store
|
|
157
|
-
cl = pxt.Client(reload=True)
|
|
158
|
-
base_t, view_t = cl.get_table(base_t.get_name()), cl.get_table(view_t.get_name())
|
|
159
|
-
_ = view_t.select(self.agg_fn(view_t.pos, view_t.frame, group_by=base_t)).show()
|