pixeltable 0.2.20__py3-none-any.whl → 0.2.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +7 -19
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +7 -7
- pixeltable/catalog/column.py +37 -11
- pixeltable/catalog/globals.py +21 -0
- pixeltable/catalog/insertable_table.py +6 -4
- pixeltable/catalog/table.py +227 -148
- pixeltable/catalog/table_version.py +66 -28
- pixeltable/catalog/table_version_path.py +0 -8
- pixeltable/catalog/view.py +18 -19
- pixeltable/dataframe.py +16 -32
- pixeltable/env.py +6 -1
- pixeltable/exec/__init__.py +1 -2
- pixeltable/exec/aggregation_node.py +27 -17
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/data_row_batch.py +9 -26
- pixeltable/exec/exec_node.py +36 -7
- pixeltable/exec/expr_eval_node.py +19 -11
- pixeltable/exec/in_memory_data_node.py +14 -11
- pixeltable/exec/sql_node.py +266 -138
- pixeltable/exprs/__init__.py +1 -0
- pixeltable/exprs/arithmetic_expr.py +3 -1
- pixeltable/exprs/array_slice.py +7 -7
- pixeltable/exprs/column_property_ref.py +37 -10
- pixeltable/exprs/column_ref.py +93 -14
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +8 -7
- pixeltable/exprs/data_row.py +56 -36
- pixeltable/exprs/expr.py +65 -63
- pixeltable/exprs/expr_dict.py +55 -0
- pixeltable/exprs/expr_set.py +26 -15
- pixeltable/exprs/function_call.py +53 -24
- pixeltable/exprs/globals.py +4 -1
- pixeltable/exprs/in_predicate.py +8 -7
- pixeltable/exprs/inline_expr.py +4 -4
- pixeltable/exprs/is_null.py +4 -4
- pixeltable/exprs/json_mapper.py +11 -12
- pixeltable/exprs/json_path.py +5 -10
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +5 -4
- pixeltable/exprs/object_ref.py +2 -1
- pixeltable/exprs/row_builder.py +88 -36
- pixeltable/exprs/rowid_ref.py +14 -13
- pixeltable/exprs/similarity_expr.py +12 -7
- pixeltable/exprs/sql_element_cache.py +12 -6
- pixeltable/exprs/type_cast.py +8 -6
- pixeltable/exprs/variable.py +5 -4
- pixeltable/ext/functions/whisperx.py +7 -2
- pixeltable/func/aggregate_function.py +1 -1
- pixeltable/func/callable_function.py +2 -2
- pixeltable/func/function.py +11 -10
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/query_template_function.py +11 -12
- pixeltable/func/signature.py +17 -15
- pixeltable/func/udf.py +0 -4
- pixeltable/functions/__init__.py +2 -2
- pixeltable/functions/audio.py +4 -6
- pixeltable/functions/globals.py +84 -42
- pixeltable/functions/huggingface.py +31 -34
- pixeltable/functions/image.py +59 -45
- pixeltable/functions/json.py +0 -1
- pixeltable/functions/llama_cpp.py +106 -0
- pixeltable/functions/mistralai.py +2 -2
- pixeltable/functions/ollama.py +147 -0
- pixeltable/functions/openai.py +22 -25
- pixeltable/functions/replicate.py +72 -0
- pixeltable/functions/string.py +59 -50
- pixeltable/functions/timestamp.py +20 -20
- pixeltable/functions/together.py +2 -2
- pixeltable/functions/video.py +11 -20
- pixeltable/functions/whisper.py +2 -20
- pixeltable/globals.py +65 -74
- pixeltable/index/base.py +2 -2
- pixeltable/index/btree.py +20 -7
- pixeltable/index/embedding_index.py +12 -14
- pixeltable/io/__init__.py +1 -2
- pixeltable/io/external_store.py +11 -5
- pixeltable/io/fiftyone.py +178 -0
- pixeltable/io/globals.py +98 -2
- pixeltable/io/hf_datasets.py +1 -1
- pixeltable/io/label_studio.py +6 -6
- pixeltable/io/parquet.py +14 -13
- pixeltable/iterators/base.py +3 -2
- pixeltable/iterators/document.py +10 -8
- pixeltable/iterators/video.py +126 -60
- pixeltable/metadata/__init__.py +4 -3
- pixeltable/metadata/converters/convert_14.py +4 -2
- pixeltable/metadata/converters/convert_15.py +1 -1
- pixeltable/metadata/converters/convert_19.py +1 -0
- pixeltable/metadata/converters/convert_20.py +1 -1
- pixeltable/metadata/converters/convert_21.py +34 -0
- pixeltable/metadata/converters/util.py +54 -12
- pixeltable/metadata/notes.py +1 -0
- pixeltable/metadata/schema.py +40 -21
- pixeltable/plan.py +149 -165
- pixeltable/py.typed +0 -0
- pixeltable/store.py +57 -37
- pixeltable/tool/create_test_db_dump.py +6 -6
- pixeltable/tool/create_test_video.py +1 -1
- pixeltable/tool/doc_plugins/griffe.py +3 -34
- pixeltable/tool/embed_udf.py +1 -1
- pixeltable/tool/mypy_plugin.py +55 -0
- pixeltable/type_system.py +260 -61
- pixeltable/utils/arrow.py +10 -9
- pixeltable/utils/coco.py +4 -4
- pixeltable/utils/documents.py +16 -2
- pixeltable/utils/filecache.py +9 -9
- pixeltable/utils/formatter.py +10 -11
- pixeltable/utils/http_server.py +2 -5
- pixeltable/utils/media_store.py +6 -6
- pixeltable/utils/pytorch.py +10 -11
- pixeltable/utils/sql.py +2 -1
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/METADATA +50 -13
- pixeltable-0.2.22.dist-info/RECORD +153 -0
- pixeltable/exec/media_validation_node.py +0 -43
- pixeltable/utils/help.py +0 -11
- pixeltable-0.2.20.dist-info/RECORD +0 -147
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/WHEEL +0 -0
- {pixeltable-0.2.20.dist-info → pixeltable-0.2.22.dist-info}/entry_points.txt +0 -0
pixeltable/utils/filecache.py
CHANGED
|
@@ -77,8 +77,8 @@ class FileCache:
|
|
|
77
77
|
evicted_working_set_keys: set[str]
|
|
78
78
|
new_redownload_witnessed: bool # whether a new re-download has occurred since the last time a warning was issued
|
|
79
79
|
|
|
80
|
-
|
|
81
|
-
|
|
80
|
+
FileCacheColumnStats = namedtuple('FileCacheColumnStats', ('tbl_id', 'col_id', 'num_files', 'total_size'))
|
|
81
|
+
FileCacheStats = namedtuple(
|
|
82
82
|
'FileCacheStats',
|
|
83
83
|
('total_size', 'num_requests', 'num_hits', 'num_evictions', 'column_stats')
|
|
84
84
|
)
|
|
@@ -96,7 +96,7 @@ class FileCache:
|
|
|
96
96
|
def __init__(self):
|
|
97
97
|
self.cache = OrderedDict()
|
|
98
98
|
self.total_size = 0
|
|
99
|
-
self.capacity_bytes = Env.get()._file_cache_size_g * (1 << 30)
|
|
99
|
+
self.capacity_bytes = int(Env.get()._file_cache_size_g * (1 << 30))
|
|
100
100
|
self.num_requests = 0
|
|
101
101
|
self.num_hits = 0
|
|
102
102
|
self.num_evictions = 0
|
|
@@ -174,7 +174,7 @@ class FileCache:
|
|
|
174
174
|
path = entry.path
|
|
175
175
|
path.touch(exist_ok=True)
|
|
176
176
|
file_info = os.stat(str(path))
|
|
177
|
-
entry.last_used = file_info.st_mtime
|
|
177
|
+
entry.last_used = datetime.fromtimestamp(file_info.st_mtime)
|
|
178
178
|
self.cache.move_to_end(key, last=True)
|
|
179
179
|
self.num_hits += 1
|
|
180
180
|
self.keys_retrieved.add(key)
|
|
@@ -195,7 +195,7 @@ class FileCache:
|
|
|
195
195
|
self.evicted_working_set_keys.add(key)
|
|
196
196
|
self.new_redownload_witnessed = True
|
|
197
197
|
self.keys_retrieved.add(key)
|
|
198
|
-
entry = CacheEntry(key, tbl_id, col_id, file_info.st_size, file_info.st_mtime, path.suffix)
|
|
198
|
+
entry = CacheEntry(key, tbl_id, col_id, file_info.st_size, datetime.fromtimestamp(file_info.st_mtime), path.suffix)
|
|
199
199
|
self.cache[key] = entry
|
|
200
200
|
self.total_size += entry.size
|
|
201
201
|
new_path = entry.path
|
|
@@ -223,19 +223,19 @@ class FileCache:
|
|
|
223
223
|
self.capacity_bytes = capacity_bytes
|
|
224
224
|
self.ensure_capacity(0) # evict entries if necessary
|
|
225
225
|
|
|
226
|
-
def stats(self) ->
|
|
226
|
+
def stats(self) -> FileCacheStats:
|
|
227
227
|
# collect column stats
|
|
228
228
|
# (tbl_id, col_id) -> (num_files, total_size)
|
|
229
|
-
d: dict[tuple[
|
|
229
|
+
d: dict[tuple[UUID, int], list[int]] = defaultdict(lambda: [0, 0])
|
|
230
230
|
for entry in self.cache.values():
|
|
231
231
|
t = d[(entry.tbl_id, entry.col_id)]
|
|
232
232
|
t[0] += 1
|
|
233
233
|
t[1] += entry.size
|
|
234
234
|
col_stats = [
|
|
235
|
-
self.
|
|
235
|
+
self.FileCacheColumnStats(tbl_id, col_id, num_files, size) for (tbl_id, col_id), (num_files, size) in d.items()
|
|
236
236
|
]
|
|
237
237
|
col_stats.sort(key=lambda e: e[3], reverse=True)
|
|
238
|
-
return self.
|
|
238
|
+
return self.FileCacheStats(self.total_size, self.num_requests, self.num_hits, self.num_evictions, col_stats)
|
|
239
239
|
|
|
240
240
|
def debug_print(self) -> None:
|
|
241
241
|
for entry in self.cache.values():
|
pixeltable/utils/formatter.py
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import html
|
|
3
|
+
import io
|
|
3
4
|
import json
|
|
4
5
|
import logging
|
|
5
6
|
import mimetypes
|
|
6
7
|
from typing import Any, Callable, Optional
|
|
7
8
|
|
|
9
|
+
import av # type: ignore[import-untyped]
|
|
10
|
+
import numpy as np
|
|
8
11
|
import PIL
|
|
9
12
|
import PIL.Image as Image
|
|
10
|
-
import cv2
|
|
11
|
-
import numpy as np
|
|
12
13
|
|
|
13
|
-
import io
|
|
14
14
|
import pixeltable.type_system as ts
|
|
15
15
|
from pixeltable.utils.http_server import get_file_uri
|
|
16
16
|
|
|
@@ -159,17 +159,16 @@ class Formatter:
|
|
|
159
159
|
# the video itself is not accessible.
|
|
160
160
|
# TODO(aaron-siegel): If the video is backed by a concrete external URL,
|
|
161
161
|
# should we link to that instead?
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)
|
|
167
|
-
thumb = PIL.Image.fromarray(img_array)
|
|
162
|
+
with av.open(file_path) as container:
|
|
163
|
+
try:
|
|
164
|
+
thumb = next(container.decode(video=0)).to_image()
|
|
165
|
+
assert isinstance(thumb, Image.Image)
|
|
168
166
|
with io.BytesIO() as buffer:
|
|
169
167
|
thumb.save(buffer, 'jpeg')
|
|
170
168
|
thumb_base64 = base64.b64encode(buffer.getvalue()).decode()
|
|
171
169
|
thumb_tag = f'poster="data:image/jpeg;base64,{thumb_base64}"'
|
|
172
|
-
|
|
170
|
+
except Exception:
|
|
171
|
+
pass
|
|
173
172
|
if self.__num_rows > 1:
|
|
174
173
|
width = 320
|
|
175
174
|
elif self.__num_cols > 1:
|
|
@@ -201,7 +200,7 @@ class Formatter:
|
|
|
201
200
|
# try generating a thumbnail for different types and use that if successful
|
|
202
201
|
if file_path.lower().endswith('.pdf'):
|
|
203
202
|
try:
|
|
204
|
-
import fitz
|
|
203
|
+
import fitz # type: ignore[import-untyped]
|
|
205
204
|
|
|
206
205
|
doc = fitz.open(file_path)
|
|
207
206
|
p = doc.get_page_pixmap(0)
|
pixeltable/utils/http_server.py
CHANGED
|
@@ -1,11 +1,8 @@
|
|
|
1
1
|
import http
|
|
2
2
|
import http.server
|
|
3
3
|
import logging
|
|
4
|
-
import urllib
|
|
5
|
-
import posixpath
|
|
6
4
|
import pathlib
|
|
7
|
-
import
|
|
8
|
-
import string
|
|
5
|
+
import urllib
|
|
9
6
|
|
|
10
7
|
_logger = logging.getLogger('pixeltable.http.server')
|
|
11
8
|
|
|
@@ -43,7 +40,7 @@ class AbsolutePathHandler(http.server.SimpleHTTPRequestHandler):
|
|
|
43
40
|
def log_message(self, format, *args) -> None:
|
|
44
41
|
"""override logging to stderr in http.server.BaseHTTPRequestHandler"""
|
|
45
42
|
message = format % args
|
|
46
|
-
_logger.info(message.translate(self._control_char_table))
|
|
43
|
+
_logger.info(message.translate(self._control_char_table)) # type: ignore[attr-defined]
|
|
47
44
|
|
|
48
45
|
|
|
49
46
|
class LoggingHTTPServer(http.server.ThreadingHTTPServer):
|
pixeltable/utils/media_store.py
CHANGED
|
@@ -3,9 +3,9 @@ import os
|
|
|
3
3
|
import re
|
|
4
4
|
import shutil
|
|
5
5
|
import uuid
|
|
6
|
-
from typing import Optional, List, Tuple, Dict
|
|
7
|
-
from pathlib import Path
|
|
8
6
|
from collections import defaultdict
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
9
|
from uuid import UUID
|
|
10
10
|
|
|
11
11
|
from pixeltable.env import Env
|
|
@@ -46,8 +46,8 @@ class MediaStore:
|
|
|
46
46
|
else:
|
|
47
47
|
# Remove only the elements for the specified version.
|
|
48
48
|
paths = glob.glob(str(Env.get().media_dir / tbl_id.hex) + f'/**/{tbl_id.hex}_*_{version}_*', recursive=True)
|
|
49
|
-
for
|
|
50
|
-
os.remove(
|
|
49
|
+
for p in paths:
|
|
50
|
+
os.remove(p)
|
|
51
51
|
|
|
52
52
|
@classmethod
|
|
53
53
|
def count(cls, tbl_id: UUID) -> int:
|
|
@@ -58,10 +58,10 @@ class MediaStore:
|
|
|
58
58
|
return len(paths)
|
|
59
59
|
|
|
60
60
|
@classmethod
|
|
61
|
-
def stats(cls) ->
|
|
61
|
+
def stats(cls) -> list[tuple[UUID, int, int, int]]:
|
|
62
62
|
paths = glob.glob(str(Env.get().media_dir) + "/**", recursive=True)
|
|
63
63
|
# key: (tbl_id, col_id), value: (num_files, size)
|
|
64
|
-
d:
|
|
64
|
+
d: dict[tuple[UUID, int], list[int]] = defaultdict(lambda: [0, 0])
|
|
65
65
|
for p in paths:
|
|
66
66
|
if not os.path.isdir(p):
|
|
67
67
|
matched = re.match(cls.pattern, Path(p).name)
|
pixeltable/utils/pytorch.py
CHANGED
|
@@ -2,13 +2,13 @@ import datetime
|
|
|
2
2
|
import io
|
|
3
3
|
import json
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import Any,
|
|
5
|
+
from typing import Any, Iterator, Sequence
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import PIL.Image
|
|
9
|
-
import pyarrow as pa
|
|
10
9
|
import torch
|
|
11
10
|
import torch.utils.data
|
|
11
|
+
import torchvision # type: ignore[import-untyped]
|
|
12
12
|
from pyarrow import parquet
|
|
13
13
|
|
|
14
14
|
from pixeltable.type_system import ColumnType
|
|
@@ -41,7 +41,7 @@ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
|
|
|
41
41
|
with column_type_path.open() as f:
|
|
42
42
|
column_types = json.load(f)
|
|
43
43
|
self.column_types = {k: ColumnType.from_dict(v) for k, v in column_types.items()}
|
|
44
|
-
self.part_metadata = parquet.ParquetDataset(path).files
|
|
44
|
+
self.part_metadata: list = parquet.ParquetDataset(str(path)).files
|
|
45
45
|
|
|
46
46
|
def _unmarshall(self, k: str, v: Any) -> Any:
|
|
47
47
|
if self.column_types[k].is_image_type():
|
|
@@ -54,7 +54,6 @@ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
|
|
|
54
54
|
return arr
|
|
55
55
|
|
|
56
56
|
assert self.image_format == "pt"
|
|
57
|
-
import torchvision
|
|
58
57
|
|
|
59
58
|
# use arr instead of im in ToTensor() to guarantee array input
|
|
60
59
|
# to torch.from_numpy is writable. Using im is a suspected cause of
|
|
@@ -77,17 +76,17 @@ class PixeltablePytorchDataset(torch.utils.data.IterableDataset):
|
|
|
77
76
|
assert not isinstance(v, np.ndarray) # all array outputs should be handled above
|
|
78
77
|
return v
|
|
79
78
|
|
|
80
|
-
def __iter__(self) -> Iterator[
|
|
81
|
-
|
|
79
|
+
def __iter__(self) -> Iterator[dict[str, Any]]:
|
|
80
|
+
from pixeltable.utils import arrow
|
|
81
|
+
|
|
82
82
|
worker_info = torch.utils.data.get_worker_info()
|
|
83
83
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
part_list = [ i for i in part_list if (i % worker_info.num_workers) == worker_info.id ]
|
|
84
|
+
part_list: Sequence[int] = range(len(self.part_metadata))
|
|
85
|
+
if worker_info is not None:
|
|
86
|
+
part_list = [i for i in part_list if (i % worker_info.num_workers) == worker_info.id]
|
|
88
87
|
|
|
89
88
|
for part_no in part_list:
|
|
90
89
|
pqf = parquet.ParquetFile(self.part_metadata[part_no])
|
|
91
90
|
for batch in pqf.iter_batches():
|
|
92
91
|
for tup in arrow.iter_tuples(batch):
|
|
93
|
-
yield {k: self._unmarshall(k, v) for k, v in tup.items()}
|
|
92
|
+
yield {k: self._unmarshall(k, v) for k, v in tup.items()}
|
pixeltable/utils/sql.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
|
+
from sqlalchemy.dialects import postgresql
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def log_stmt(logger: logging.Logger, stmt) -> None:
|
|
7
|
-
logger.debug(f'executing {str(stmt.compile(dialect=
|
|
8
|
+
logger.debug(f'executing {str(stmt.compile(dialect=postgresql.dialect()))}')
|
|
8
9
|
|
|
9
10
|
def log_explain(logger: logging.Logger, stmt: sql.sql.ClauseElement, conn: sql.engine.Connection) -> None:
|
|
10
11
|
try:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.22
|
|
4
4
|
Summary: Pixeltable: The Multimodal AI Data Plane
|
|
5
5
|
Author: Pixeltable, Inc.
|
|
6
6
|
Author-email: contact@pixeltable.com
|
|
@@ -16,9 +16,9 @@ Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
|
|
|
16
16
|
Requires-Dist: ftfy (>=6.2.0,<7.0.0)
|
|
17
17
|
Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
|
|
18
18
|
Requires-Dist: jmespath (>=1.0.1,<2.0.0)
|
|
19
|
+
Requires-Dist: lxml (>=5.0)
|
|
19
20
|
Requires-Dist: more-itertools (>=10.2,<11.0)
|
|
20
21
|
Requires-Dist: numpy (>=1.25,<2.0)
|
|
21
|
-
Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
|
|
22
22
|
Requires-Dist: pandas (>=2.0,<3.0)
|
|
23
23
|
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
24
24
|
Requires-Dist: pillow (>=9.3.0)
|
|
@@ -39,15 +39,19 @@ Description-Content-Type: text/markdown
|
|
|
39
39
|
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="50%" />
|
|
40
40
|
<br></br>
|
|
41
41
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
](https://opensource.org/licenses/Apache-2.0)
|
|
45
|
+

|
|
46
|
+

|
|
45
47
|
<br>
|
|
46
48
|
[](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml)
|
|
47
49
|
[](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml)
|
|
48
|
-
[](https://pypi.org/project/pixeltable/)
|
|
51
|
+
[](https://app.fossa.com/projects/git%2Bgithub.com%2Fpixeltable%2Fpixeltable?ref=badge_shield&issueType=security)
|
|
52
|
+
<a target="_blank" href="https://huggingface.co/Pixeltable"> <img src="https://img.shields.io/badge/🤗-HF Space-FF7D04" alt="Visit our Hugging Face space"/></a>
|
|
49
53
|
|
|
50
|
-
[Installation](https://pixeltable.
|
|
54
|
+
[Installation](https://docs.pixeltable.com/docs/installation) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://github.com/pixeltable/pixeltable?tab=readme-ov-file#-code-samples) | [Computer Vision](https://docs.pixeltable.com/docs/object-detection-in-videos) | [LLM](https://docs.pixeltable.com/docs/document-indexing-and-rag)
|
|
51
55
|
</div>
|
|
52
56
|
|
|
53
57
|
Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to **store**, **transform**, **index**, and **iterate** on data for their ML workflows.
|
|
@@ -73,8 +77,9 @@ Learn how to create tables, populate them with data, and enhance them with built
|
|
|
73
77
|
|:----------|:-----------------|:-------------------------|:---------------------------------:|
|
|
74
78
|
| 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Tables and Data Operations | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
75
79
|
| User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
76
|
-
|
|
|
77
|
-
| Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/
|
|
80
|
+
| Incremental Prompt Engineering | <a target="_blank" href="https://colab.research.google.com/github/mistralai/cookbook/blob/main/third_party/Pixeltable/incremental_prompt_engineering_and_model_comparison.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Github"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
81
|
+
| Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Documentation-013056" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
|
82
|
+
| Multimodal Application | <a target="_blank" href="https://huggingface.co/spaces/Pixeltable/Multimodal-Powerhouse"> <img src="https://img.shields.io/badge/Hugging Face-FF7D04" alt="Visit our documentation"/></a> | Document Indexing and RAG | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/rag-demo.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
|
78
83
|
|
|
79
84
|
## 🧱 Code Samples
|
|
80
85
|
|
|
@@ -82,7 +87,7 @@ Learn how to create tables, populate them with data, and enhance them with built
|
|
|
82
87
|
```python
|
|
83
88
|
import pixeltable as pxt
|
|
84
89
|
|
|
85
|
-
v = pxt.create_table('external_data.videos', {'video': pxt.
|
|
90
|
+
v = pxt.create_table('external_data.videos', {'video': pxt.Video})
|
|
86
91
|
|
|
87
92
|
prefix = 's3://multimedia-commons/'
|
|
88
93
|
paths = [
|
|
@@ -100,7 +105,7 @@ import pixeltable as pxt
|
|
|
100
105
|
from pixeltable.functions import huggingface
|
|
101
106
|
|
|
102
107
|
# Create a table to store data persistently
|
|
103
|
-
t = pxt.create_table('image', {'image': pxt.
|
|
108
|
+
t = pxt.create_table('image', {'image': pxt.Image})
|
|
104
109
|
|
|
105
110
|
# Insert some images
|
|
106
111
|
prefix = 'https://upload.wikimedia.org/wikipedia/commons'
|
|
@@ -157,7 +162,7 @@ Learn how to leverage Pixeltable for [Model analytics](https://pixeltable.readme
|
|
|
157
162
|
|
|
158
163
|
### Working with inference services
|
|
159
164
|
```python
|
|
160
|
-
chat_table = pxt.create_table('together_demo.chat', {'input': pxt.
|
|
165
|
+
chat_table = pxt.create_table('together_demo.chat', {'input': pxt.String})
|
|
161
166
|
|
|
162
167
|
# The chat-completions API expects JSON-formatted input:
|
|
163
168
|
messages = [{'role': 'user', 'content': chat_table.input}]
|
|
@@ -193,7 +198,7 @@ from pixeltable.functions.huggingface import clip_image, clip_text
|
|
|
193
198
|
from pixeltable.iterators import FrameIterator
|
|
194
199
|
import PIL.Image
|
|
195
200
|
|
|
196
|
-
video_table = pxt.create_table('videos', {'video': pxt.
|
|
201
|
+
video_table = pxt.create_table('videos', {'video': pxt.Video})
|
|
197
202
|
|
|
198
203
|
video_table.insert([{'video': '/video.mp4'}])
|
|
199
204
|
|
|
@@ -224,6 +229,38 @@ frames_view.order_by(sim, asc=False).limit(5).select(frames_view.frame, sim=sim)
|
|
|
224
229
|
```
|
|
225
230
|
Learn how to work with [Embedding and Vector Indexes](https://docs.pixeltable.com/docs/embedding-vector-indexes).
|
|
226
231
|
|
|
232
|
+
## 🔄 AI Stack Comparison
|
|
233
|
+
|
|
234
|
+
### 🎯 Computer Vision Workflows
|
|
235
|
+
|
|
236
|
+
| Requirement | Traditional | Pixeltable |
|
|
237
|
+
|-------------|---------------------|------------|
|
|
238
|
+
| Frame Extraction | ffmpeg + custom code | Automatic via FrameIterator |
|
|
239
|
+
| Object Detection | Multiple scripts + caching | Single computed column |
|
|
240
|
+
| Video Indexing | Custom pipelines + Vector DB | Native similarity search |
|
|
241
|
+
| Annotation Management | Separate tools + custom code | Label Studio integration |
|
|
242
|
+
| Model Evaluation | Custom metrics pipeline | Built-in mAP computation |
|
|
243
|
+
|
|
244
|
+
### 🤖 LLM Workflows
|
|
245
|
+
|
|
246
|
+
| Requirement | Traditional | Pixeltable |
|
|
247
|
+
|-------------|---------------------|------------|
|
|
248
|
+
| Document Chunking | Tool + custom code | Native DocumentSplitter |
|
|
249
|
+
| Embedding Generation | Separate pipeline + caching | Computed columns |
|
|
250
|
+
| Vector Search | External vector DB | Built-in vector indexing |
|
|
251
|
+
| Prompt Management | Custom tracking solution | Version-controlled columns |
|
|
252
|
+
| Chain Management | Tool + custom code | Computed column DAGs |
|
|
253
|
+
|
|
254
|
+
### 🎨 Multimodal Workflows
|
|
255
|
+
|
|
256
|
+
| Requirement | Traditional | Pixeltable |
|
|
257
|
+
|-------------|---------------------|------------|
|
|
258
|
+
| Data Types | Multiple storage systems | Unified table interface |
|
|
259
|
+
| Cross-Modal Search | Complex integration | Native similarity support |
|
|
260
|
+
| Pipeline Orchestration | Multiple tools (Airflow, etc.) | Single declarative interface |
|
|
261
|
+
| Asset Management | Custom tracking system | Automatic lineage |
|
|
262
|
+
| Quality Control | Multiple validation tools | Computed validation columns |
|
|
263
|
+
|
|
227
264
|
## ❓ FAQ
|
|
228
265
|
|
|
229
266
|
### What is Pixeltable?
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
pixeltable/__init__.py,sha256=r-kbRRoAp3rD61EEZbmgrMfo3X0TMRNl16hXMgHVHRg,1374
|
|
2
|
+
pixeltable/__version__.py,sha256=8hdYkuW_Aobhb48yBMyzCJUcf2bpelFP4zORLc4rC_o,114
|
|
3
|
+
pixeltable/catalog/__init__.py,sha256=Ar6_F_6C7tkznIlCPBHVHDop5YssBDjKQr2NPQ21QCI,484
|
|
4
|
+
pixeltable/catalog/catalog.py,sha256=tyDyI5wQw7vV6_FChrp9qgGCRClcjiSdW3eygYT0p9s,7849
|
|
5
|
+
pixeltable/catalog/column.py,sha256=zIWQp0TWQleXsgRSWHJjjoshI-WNhiXcgo8zJopsooM,10508
|
|
6
|
+
pixeltable/catalog/dir.py,sha256=fG_BQM-fLuABpTstMVH-9dvZPx7kqi3sgTQgKveVXJI,922
|
|
7
|
+
pixeltable/catalog/globals.py,sha256=2P9cEfgnYQ19MFS4OsbnA8ZQkWLZPqJmmknDYWCXjWw,2415
|
|
8
|
+
pixeltable/catalog/insertable_table.py,sha256=bzwHsUU_KZPhXs1jzzJzmTonzV5gCfXJR2hXTRT3Ejs,6964
|
|
9
|
+
pixeltable/catalog/named_function.py,sha256=W8vikP_3jMJ9pQQsksO2EfQAlaVxuQHBlo65M4924dc,1150
|
|
10
|
+
pixeltable/catalog/path.py,sha256=QgccEi_QOfaKt8YsR2zLtd_z7z7QQkU_1kprJFi2SPQ,1677
|
|
11
|
+
pixeltable/catalog/path_dict.py,sha256=4b9_Ax7Q8tkmoCYPaKNedpQkU17pE0oGDd2XB53eNZA,5979
|
|
12
|
+
pixeltable/catalog/schema_object.py,sha256=qhpeeUPOYT5doDbsyUNBcPm5QzAQPCAsikqh1PQ6d1k,2226
|
|
13
|
+
pixeltable/catalog/table.py,sha256=Vzsr1MU_9HvfIUwrS4gnV5ZCysDdQX2lMlWfmLuAa5U,45884
|
|
14
|
+
pixeltable/catalog/table_version.py,sha256=ulN1x9DfPpJkAIAcYJmJj0-TcyPt5qv8NHVO8VQdxPc,59021
|
|
15
|
+
pixeltable/catalog/table_version_path.py,sha256=YpnNh8QYyoMQCuAZPr1SGovSFD-70lcvZasY4dh_-xM,5778
|
|
16
|
+
pixeltable/catalog/view.py,sha256=vctwgzihDoKh3CSj2iGT0KrFeB18RL68AX5t9JCdXYg,10690
|
|
17
|
+
pixeltable/dataframe.py,sha256=F14t9HLoZeBKXIfUP9wkWHHKRgwXa2wAB9Nh0jsEhF8,34001
|
|
18
|
+
pixeltable/env.py,sha256=2JSDM0m87T5HtuOWxCXRL9h9iK7u-kOfigRf-3ZxBhU,30425
|
|
19
|
+
pixeltable/exceptions.py,sha256=NuFY2WtkQpLfLHT_J70kOw9Tr0kEDkkgo-u7As4Gaq4,410
|
|
20
|
+
pixeltable/exec/__init__.py,sha256=QmaKzkZ704nbJ1BiAFwjYvIX3R2_EHzpO8R7HxcEY-g,481
|
|
21
|
+
pixeltable/exec/aggregation_node.py,sha256=0LdoPp_CR_UbcS60XkDw66SqlrQnw6Dy6KDWqi4PJ6k,4005
|
|
22
|
+
pixeltable/exec/cache_prefetch_node.py,sha256=c1y2m6Zqbq6fRfUyD2hAxzKrkNnRSXeA8K5ehaIMpa0,5242
|
|
23
|
+
pixeltable/exec/component_iteration_node.py,sha256=ABuXGbDRQWLGuaBnfK7bvOxCrz81vMMiAvXHHI8SX4c,4930
|
|
24
|
+
pixeltable/exec/data_row_batch.py,sha256=RM56YaYcr4JeNdw4npVT329IcWlel2ff-o1UDp69k3Y,2931
|
|
25
|
+
pixeltable/exec/exec_context.py,sha256=0rg5V8HzSy-BvqmSbGr-U4aJ4eOZg2JN0x6zjYQGtBc,1090
|
|
26
|
+
pixeltable/exec/exec_node.py,sha256=FrIZzBMaKlWvEMfkgSnGu9GK_EEGhWYcJLT__FI6WKY,3254
|
|
27
|
+
pixeltable/exec/expr_eval_node.py,sha256=PNscxhG4o208UBi0d9pwsoUDdkPiCoH1xM8RlTaTlHY,11896
|
|
28
|
+
pixeltable/exec/in_memory_data_node.py,sha256=uK3puLZDzUSJUJwGwkTMYoPqNIvE67jtQodhE2sCz2M,3452
|
|
29
|
+
pixeltable/exec/row_update_node.py,sha256=b8yuKtkmI2Q6U-7svKbkjdM3Z85ddZoJgJgUa17j-YE,2773
|
|
30
|
+
pixeltable/exec/sql_node.py,sha256=9huPLpMqq4J5JhSh5vtt-T4K1Tu3EXJ9omv6mMi-zD4,17616
|
|
31
|
+
pixeltable/exprs/__init__.py,sha256=toKJm6y1_2fD8Fo-ld5feD-nMN-yZGMWwMHYyb8vsrg,967
|
|
32
|
+
pixeltable/exprs/arithmetic_expr.py,sha256=F97EF0y-OW8edIB_Trg5bJmCV_nif7QBnVGL4Zv7Xiw,5997
|
|
33
|
+
pixeltable/exprs/array_slice.py,sha256=HgB9BV8a9Qg6Y-t7PRjFszr40jDArensZ75-wDyFpcQ,2179
|
|
34
|
+
pixeltable/exprs/column_property_ref.py,sha256=rCYNv9o_UMVhIz-U0-bhTxLf4c7_CTYWH6VB9LLo0F8,3787
|
|
35
|
+
pixeltable/exprs/column_ref.py,sha256=YjqxAqBVHDc0pNHEJmbQNGq7eqOiloy_Umr4qW6Gb1c,9626
|
|
36
|
+
pixeltable/exprs/comparison.py,sha256=K5WWA2FvnZuts2VcLVGKtUyAOol67NgqGoeNiuhdyKU,4757
|
|
37
|
+
pixeltable/exprs/compound_predicate.py,sha256=H35SB053aCHigtPp_JYrOTLRC2AGDQK3NAkOYOBPDQg,3845
|
|
38
|
+
pixeltable/exprs/data_row.py,sha256=rLtKxlst9mK6684A5y-nsjBcalyKEcKAWcYCtNpK10w,9852
|
|
39
|
+
pixeltable/exprs/expr.py,sha256=QlVslNfAOPxPVvsNiEv_k_S2QWp4-8dc3eocT6zclNU,29986
|
|
40
|
+
pixeltable/exprs/expr_dict.py,sha256=xkvo_iVPOLMq3WkBZQ2FOoXqYoebuV6XGlidPJxdOkY,1588
|
|
41
|
+
pixeltable/exprs/expr_set.py,sha256=kAV-PbudbAlW0dqhKqfstXnxKbK_FWKXyrzkpvDrpjQ,2330
|
|
42
|
+
pixeltable/exprs/function_call.py,sha256=Sp23zxpzA4f7ztd71sn_B0sgos1hyMPKRR8wICzxOZ4,21365
|
|
43
|
+
pixeltable/exprs/globals.py,sha256=5pwn5vdi-EEpYBpPty658YV45myY7W0iFIfTH7QIzak,2032
|
|
44
|
+
pixeltable/exprs/in_predicate.py,sha256=eR6qFf_0lsqM0jPRI9IxTsDlXM6Bt9RBY9a05bZ6bDs,3782
|
|
45
|
+
pixeltable/exprs/inline_expr.py,sha256=OF-yLIoU7LfHqFdbZkZnC1oQ2_X26WyHbfxbxrdZr-k,7104
|
|
46
|
+
pixeltable/exprs/is_null.py,sha256=x9-CijQf1JuUioUAv1_8OvP9nK9Ahjc1wJfgE1QEOL8,1091
|
|
47
|
+
pixeltable/exprs/json_mapper.py,sha256=OJ8ohAZhuLxTGtmmbB8voVG9AzUuCHLHuNBi-3hJZEg,4544
|
|
48
|
+
pixeltable/exprs/json_path.py,sha256=CiX5hXWNL8caNFajVXn3PGxkINK7rAfHDIfbNIkWT04,6753
|
|
49
|
+
pixeltable/exprs/literal.py,sha256=TTKb0gw6qck9D61SwVDuBrLrBrGwEhkCB-m0ILFpWFk,3764
|
|
50
|
+
pixeltable/exprs/method_ref.py,sha256=x9rQzlRQlVnbTpBQoV0COFsiOPstJcOifXl0lJC-roI,2614
|
|
51
|
+
pixeltable/exprs/object_ref.py,sha256=GVg6uxZnKwFVTC0kouJq-wMFP-gUPb_ld_syHrsGMdE,1283
|
|
52
|
+
pixeltable/exprs/row_builder.py,sha256=7f-h4y8xv0ktkk6GYqGrMJvLSwkVYtMPHsBBIskmQLw,18435
|
|
53
|
+
pixeltable/exprs/rowid_ref.py,sha256=hjGrbk9zHH3H-00uFAopyRvLTeQeB2e44kAJUAxiy3k,4400
|
|
54
|
+
pixeltable/exprs/similarity_expr.py,sha256=snOseawC4ySvyHo8TCqbh_bDxIrENfIzO_0lXlzL-BA,4016
|
|
55
|
+
pixeltable/exprs/sql_element_cache.py,sha256=DRW5Aa0WQZ-yMf5anlUg-_Rmq3Oz3G6Us1X_KseMC68,1229
|
|
56
|
+
pixeltable/exprs/type_cast.py,sha256=BTnhgoll7LVZdOU_282QlzGZ4EEMTzPYcNdDcFOfkTs,1837
|
|
57
|
+
pixeltable/exprs/variable.py,sha256=VATAmLaPrQipv2AXqg-m6FYMLNGyhPtys8c5Et8Ba0g,1414
|
|
58
|
+
pixeltable/ext/__init__.py,sha256=iO0J_Jfnv38F5y40sDAW54gpXjIyZgOGgoWQJAwjQec,423
|
|
59
|
+
pixeltable/ext/functions/__init__.py,sha256=hIjPEKC5E5uJOXlQqUyhP9yn9ZqWOCJAlj0kXWDlhlE,159
|
|
60
|
+
pixeltable/ext/functions/whisperx.py,sha256=jojjNhazcYiAh1scwUl-erhIDRr4kOTkcLrjy0xcp6g,2325
|
|
61
|
+
pixeltable/ext/functions/yolox.py,sha256=k-pQTelv4Tea3AXvDB7Kc7YCIa1uexjVGqxETP0B_hc,5351
|
|
62
|
+
pixeltable/func/__init__.py,sha256=WjftUGyNkd6bF_qSxqZ5Gd7Elf8oExb3dUlpydhdFTo,407
|
|
63
|
+
pixeltable/func/aggregate_function.py,sha256=x3BW-lRlMFmStpOwmMXDKK0Zoli_wBffXKw5wDqb-80,9495
|
|
64
|
+
pixeltable/func/callable_function.py,sha256=PO5Mn5WL2cd7y5LcKr_K0AaYHf2-1NfuXP2IPOfsiVs,4933
|
|
65
|
+
pixeltable/func/expr_template_function.py,sha256=0krKY-0i-aTZdgAS8wZkq4NQc-EVtAA7-a0zh2vHmTI,4059
|
|
66
|
+
pixeltable/func/function.py,sha256=BLJbahyKteGemp0EcG7joTrgrG2hnIfwHkuKoLZm4mo,6941
|
|
67
|
+
pixeltable/func/function_registry.py,sha256=fBXe7NKyk8_JzZz6fsS0LF-WHTdMnmIP_XzrICuj9fA,12328
|
|
68
|
+
pixeltable/func/globals.py,sha256=sEwn6lGgHMp6VQORb_P5qRd_-Q2_bUSqvqM9-XPN_ec,1483
|
|
69
|
+
pixeltable/func/query_template_function.py,sha256=pGqwtWiPsEmo7phVoJJODiuD1Sh0gZoW4BpKnZV9cRE,3537
|
|
70
|
+
pixeltable/func/signature.py,sha256=vBKs3igtijTQGK7rEGTxBiOznKo6Tj9Ut6OrSfiVcA0,8609
|
|
71
|
+
pixeltable/func/udf.py,sha256=yn3D2vTn7eJ_rCB8MoJp2MAphufOddOP89sxnBsbyfg,7634
|
|
72
|
+
pixeltable/functions/__init__.py,sha256=EtR9M3ewYpmtHeshNULqZVBd87bNeKSFAdpOuWCMl6o,389
|
|
73
|
+
pixeltable/functions/anthropic.py,sha256=P1E5o4-8QP1LTIUsWVgo_wMJ4WOnxtXUUXuFWUagChU,3032
|
|
74
|
+
pixeltable/functions/audio.py,sha256=7213nTnqKJ6vM9kalaoJ283OwX5SGEJN10vDhaRNZ6E,644
|
|
75
|
+
pixeltable/functions/fireworks.py,sha256=qwFC_eIaDs-glxyJ_IVXaNGkpgPzeRsQ_SdpzueBxq0,2605
|
|
76
|
+
pixeltable/functions/globals.py,sha256=pCFX2a_N87SwG9GxyPjSOC3TVMowMB6XIHSWKfFOuGE,3917
|
|
77
|
+
pixeltable/functions/huggingface.py,sha256=Zvj4REfXX7cB0trdiDAemkqpWxrsq2sZyrSULgqJU7U,16043
|
|
78
|
+
pixeltable/functions/image.py,sha256=3Qm4ybAT_o4YUl3bzhEXy8dKOwgZ7RCUV-ky-dbL_jc,13836
|
|
79
|
+
pixeltable/functions/json.py,sha256=ehCnBA5WiIl-crV9PFVgmxrsWsiO8FpRs9LDwcSpLa4,879
|
|
80
|
+
pixeltable/functions/llama_cpp.py,sha256=2t8o2m6jsEFFmVcVl3Fjyoy6Ogxbt-uYJ6a_jcrpEso,3838
|
|
81
|
+
pixeltable/functions/mistralai.py,sha256=qdcAiUQcbTd6gTHiUCVcH2bQUh-gFunudGbqYpwxzdo,5565
|
|
82
|
+
pixeltable/functions/ollama.py,sha256=eZh461HvChjlr0CvQyd93m7qrv889PAoM-Z1IKierY0,4335
|
|
83
|
+
pixeltable/functions/openai.py,sha256=YCV1b6IV_zROjQQl3R-r40WeCjIWuOGqTeRgcQzM-WE,15579
|
|
84
|
+
pixeltable/functions/replicate.py,sha256=j8ZedScOMInmHWmriQSUOviw6tp8gQr-W6n21PNNL2g,2188
|
|
85
|
+
pixeltable/functions/string.py,sha256=VqzhVildxTt_XblW89Kl5Zd6MVOU71eaX2LTMY5jkUg,20366
|
|
86
|
+
pixeltable/functions/timestamp.py,sha256=Q5l2iEscrS3ZfKAa4R940bSM_x4FsmF-PF2i-wQ_4_c,9096
|
|
87
|
+
pixeltable/functions/together.py,sha256=llha20aaTuCo2dewcAUkbT-QbDNXTajwoBLlSiDRPoo,9436
|
|
88
|
+
pixeltable/functions/util.py,sha256=F2iiIL7UfhYdCVzdCa3efYqWbaeLKFrbycKnuPkG57M,650
|
|
89
|
+
pixeltable/functions/video.py,sha256=yW1Lwqu4_jYXp1aAOUThKB5-_Qxy-In_vTgB5cuW7Lg,6809
|
|
90
|
+
pixeltable/functions/vision.py,sha256=K_E1Q-n2plPuFoOPlbKWRMiJp9dPgftIJ2T_o3TNL3I,15594
|
|
91
|
+
pixeltable/functions/whisper.py,sha256=f2wqRd0n9jSBqRZN3W93UaetiAHtbsK0j9jXR2j2kkQ,2913
|
|
92
|
+
pixeltable/globals.py,sha256=qO6-RBZ9j7nPGqAkUSyJpl8EQd7DLDswjCLMP3_EW80,17799
|
|
93
|
+
pixeltable/index/__init__.py,sha256=XBwetNQQwnz0fiKwonOKhyy_U32l_cjt77kNvEIdjWs,102
|
|
94
|
+
pixeltable/index/base.py,sha256=zo0YvJI3oXiK6hZJztB36ZftKKhLfO75Zq3t-PeQA6M,1556
|
|
95
|
+
pixeltable/index/btree.py,sha256=JFerLyyLoBaD0FSF_jJ6iJFBVa-z_et--KdNR02xjRg,2264
|
|
96
|
+
pixeltable/index/embedding_index.py,sha256=g5bajq8Dn82o8ZlkDwPT5kBUClR4ZCuH6dttwCW6KWI,7793
|
|
97
|
+
pixeltable/io/__init__.py,sha256=PHqbiEJXFtCzTsia7LmsHLfBIkA41tzII3n9L4UkfJ8,547
|
|
98
|
+
pixeltable/io/external_store.py,sha256=H1jt7MDn464QRgBvU-PmcPcFlo3EZBCG7fKWEZXOfyc,16676
|
|
99
|
+
pixeltable/io/fiftyone.py,sha256=hH-FahW6BuMQY8lGa2atnNnJto_pK8kWrP_y_EMsq6g,6965
|
|
100
|
+
pixeltable/io/globals.py,sha256=9S9wnlIAuhZq7eC_GklTM_UX0UATK9fEagk8-SRCeXQ,17794
|
|
101
|
+
pixeltable/io/hf_datasets.py,sha256=o5fqm2CJAjhFd3z-NYGxN0jM1tfrp4szuUX0TGnyNRY,8316
|
|
102
|
+
pixeltable/io/label_studio.py,sha256=7KTro1H-AlVbwWuoYwU-mxH3zejZWTpQbz56uX-Wnjs,31078
|
|
103
|
+
pixeltable/io/pandas.py,sha256=7eHg7wnAfRA9eBk4iC0iSSVTKOM59Ne4pXokKWdt3dY,9793
|
|
104
|
+
pixeltable/io/parquet.py,sha256=4bAQNCahtLGuHRF669kLGx2MhOFmuwTkUYYHLC-qKcs,7862
|
|
105
|
+
pixeltable/iterators/__init__.py,sha256=sjldFckkT8aVRiKgEP6faeAK2NQBdzbmpwAeRhI1FkM,366
|
|
106
|
+
pixeltable/iterators/base.py,sha256=ZC0ZvXL4iw6AmT8cu-Mdx-T2UG9nmJYV1C6LK4efAfw,1669
|
|
107
|
+
pixeltable/iterators/document.py,sha256=AsvEmZ5RGRi3AFGCrH2_-UNx5rTCFA-0WmMYQBTQI20,19679
|
|
108
|
+
pixeltable/iterators/string.py,sha256=NG_fWc_GAITDfzl6MvrDOMrSoMcZdMZf6hPQztCSatE,1305
|
|
109
|
+
pixeltable/iterators/video.py,sha256=CKx6jHFW-393r4xN-ulVDDZFETSkhhMwT7bPX2NHVU8,9442
|
|
110
|
+
pixeltable/metadata/__init__.py,sha256=CI0ZWcxsCbXEWhdbbByNJFdSmvIBrMEbf_vqrVb0b-Q,2209
|
|
111
|
+
pixeltable/metadata/converters/convert_10.py,sha256=J1_r7LNNAWTdb042AwqFpJ4sEB-i4qhUdk5iOjcZk34,719
|
|
112
|
+
pixeltable/metadata/converters/convert_12.py,sha256=Ci-qyZW1gqci-8wnjeOB5afdq7KTuN-hVSV9OqSPx8g,162
|
|
113
|
+
pixeltable/metadata/converters/convert_13.py,sha256=yFR6lD3pOrZ46ZQBFKYvxiIYa7rRxh46Bsq7yiCBNak,1356
|
|
114
|
+
pixeltable/metadata/converters/convert_14.py,sha256=9e_JNm3a35Gs4dvFFaV-_jtCqp3ud6OEOqMIhTQmasE,428
|
|
115
|
+
pixeltable/metadata/converters/convert_15.py,sha256=jMfL5wGil0-gZeIfmCbgtR3LSVNyOcxVp6YRhBECqY4,1741
|
|
116
|
+
pixeltable/metadata/converters/convert_16.py,sha256=SvcWOYgLwRw_gLTnLbCSI9f2cpdkXazYOmmtJUOOzv4,476
|
|
117
|
+
pixeltable/metadata/converters/convert_17.py,sha256=vJg4y2lg53WSj9OSntWsdUiCr6yRgMQm0eFbs_Geqjg,861
|
|
118
|
+
pixeltable/metadata/converters/convert_18.py,sha256=NxSroQntVLgmvXfae1f0-jYJIhM2W7dhRY3I7g0RxPY,1482
|
|
119
|
+
pixeltable/metadata/converters/convert_19.py,sha256=QgUDjNoH87KZg_f3cx4k0ZR67NqWRhZQKIIojbqxSkg,2080
|
|
120
|
+
pixeltable/metadata/converters/convert_20.py,sha256=NLMeke9QUGqIJUe5MNqKmVdaLs6pPFrjyigImhogaT4,2768
|
|
121
|
+
pixeltable/metadata/converters/convert_21.py,sha256=YTztkbqOC2zQcTWrXfhrP8diUbfxy5DHwsu_IT-bBok,1115
|
|
122
|
+
pixeltable/metadata/converters/util.py,sha256=nycZk_UecJcrVZsIyxQrz5ngbke8-yfY-_UcERuzhPk,5983
|
|
123
|
+
pixeltable/metadata/notes.py,sha256=HJmeA9fo37iREFIhlbGbxWnwbwIDcvdqvJO-BavIvxE,597
|
|
124
|
+
pixeltable/metadata/schema.py,sha256=CnEsMqLn4hzLDaAr5lyd-NqiOUFQdvhxdCoXMR4Qcs4,9352
|
|
125
|
+
pixeltable/plan.py,sha256=LN5R-0gEmGX9kwm67syCqvrLuPaWSiT3pUfMJoudkLY,37442
|
|
126
|
+
pixeltable/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
127
|
+
pixeltable/store.py,sha256=IG33KEu0eHGD2CGMBVV-HmPj7xGUMCLmLGdJn49_4Cs,21990
|
|
128
|
+
pixeltable/tool/create_test_db_dump.py,sha256=axTlPFqodZJ5p8kRVmya3PgghqBaF8oYh7a4vbusmcA,11986
|
|
129
|
+
pixeltable/tool/create_test_video.py,sha256=4cQmqoKjn3juy7Ilty75gWBygqBxTZ1E9XPlrsC0Ssk,2931
|
|
130
|
+
pixeltable/tool/doc_plugins/griffe.py,sha256=J5zxyEUchfR3mkWmhx4Vjl_iSodL_pHiuOyD2eczbNU,2182
|
|
131
|
+
pixeltable/tool/doc_plugins/mkdocstrings.py,sha256=afq7XOaSC5WRmugkh-FMFMK8PqOgIlDIsJdD8cuPhtE,207
|
|
132
|
+
pixeltable/tool/doc_plugins/templates/material/udf.html.jinja,sha256=R-7Q57nmDd5BUea-F1-MjwjK3pq7uBHXNoSo8_QjZG4,4890
|
|
133
|
+
pixeltable/tool/embed_udf.py,sha256=EXvfvuzZm0uTgH-aAATSrKV8ixCU8OMwpzXlJMg845Y,299
|
|
134
|
+
pixeltable/tool/mypy_plugin.py,sha256=__oTFElirrK25jCX1z_asD_gxGnGxtD2TaU6r1if-Ic,1784
|
|
135
|
+
pixeltable/type_system.py,sha256=y_fbms9LyC8lFCdXiewMJKqkzwiYBJE9ZoQbrQxJv2c,39980
|
|
136
|
+
pixeltable/utils/__init__.py,sha256=UYlrf6TIWJT0g-Hac0b34-dEk478B5Qx8dGco34YlIk,439
|
|
137
|
+
pixeltable/utils/arrow.py,sha256=UQkMxyU4G_ikUF9OnEHcaBXHjVqdPXHSY4JXGmuARhA,3776
|
|
138
|
+
pixeltable/utils/coco.py,sha256=bqdSFHw8lulYDIGuTdV-7XSuuaW6KNkcrH0jD60-iJI,7302
|
|
139
|
+
pixeltable/utils/code.py,sha256=AOw1u2r8_DQXpX-lxJhyHWARGrCRDXOJHFVgKOi54Uc,1231
|
|
140
|
+
pixeltable/utils/documents.py,sha256=B984nVigJgHZ5Rm-zX4LLuHuMnfmz-zr24bbAsc_y3w,2511
|
|
141
|
+
pixeltable/utils/filecache.py,sha256=6HKQdItqSSTQvj2HkSJulyhfBedi4PgC7umwxXGOVG8,10637
|
|
142
|
+
pixeltable/utils/formatter.py,sha256=fVgcaoWFTvpc_2G3MnU8QGUjHaQkj4vmScNI1s_SwBg,9209
|
|
143
|
+
pixeltable/utils/http_server.py,sha256=xYPTvmYrkUpKfOaLDq08D-eHswkcgDf4qAt76ZFH6lM,2411
|
|
144
|
+
pixeltable/utils/media_store.py,sha256=YwvTjbVqC_aLbDvLuqnDSL8xeIVMZcmzp0ANuM6uMbw,3092
|
|
145
|
+
pixeltable/utils/pytorch.py,sha256=6RvOCjy_QV4gc-aht-3d0zoASkuv-warfpl87vgmuKw,3450
|
|
146
|
+
pixeltable/utils/s3.py,sha256=DBfXp0SYubhiKckdAD7PsiVBX_YfVP8Rcu6DCG_3SaQ,433
|
|
147
|
+
pixeltable/utils/sql.py,sha256=j_tj0h4ffm-DhUIJbvGphxrVyBKlNTwDKqWGhRQ5_PY,795
|
|
148
|
+
pixeltable/utils/transactional_directory.py,sha256=UGzCrGtLR3hEEf8sYGuWBzLVFAEQml3vdIavigWeTBM,1349
|
|
149
|
+
pixeltable-0.2.22.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
150
|
+
pixeltable-0.2.22.dist-info/METADATA,sha256=8vkY__cKJFa_AXNwYLfo0IQYXgsMRPMI2_ZeG1DRM_U,17406
|
|
151
|
+
pixeltable-0.2.22.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
152
|
+
pixeltable-0.2.22.dist-info/entry_points.txt,sha256=TNI1Gb5vPwFrTdw6TimSYjO8FeK8c_HuPr28vcf7o_I,108
|
|
153
|
+
pixeltable-0.2.22.dist-info/RECORD,,
|
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
from typing import Iterable, Optional
|
|
3
|
-
|
|
4
|
-
from .data_row_batch import DataRowBatch
|
|
5
|
-
from .exec_node import ExecNode
|
|
6
|
-
import pixeltable.exprs as exprs
|
|
7
|
-
import pixeltable.exceptions as excs
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class MediaValidationNode(ExecNode):
|
|
11
|
-
"""Validation of selected media slots
|
|
12
|
-
Records exceptions in the rows of the input batch
|
|
13
|
-
"""
|
|
14
|
-
def __init__(
|
|
15
|
-
self, row_builder: exprs.RowBuilder, media_slots: Iterable[exprs.ColumnSlotIdx],
|
|
16
|
-
input: Optional[ExecNode]):
|
|
17
|
-
super().__init__(row_builder, [], [], input)
|
|
18
|
-
self.row_builder = row_builder
|
|
19
|
-
self.input = input
|
|
20
|
-
for col in [c.col for c in media_slots]:
|
|
21
|
-
assert col.col_type.is_media_type()
|
|
22
|
-
self.media_slots = media_slots
|
|
23
|
-
|
|
24
|
-
def __next__(self) -> DataRowBatch:
|
|
25
|
-
assert self.input is not None
|
|
26
|
-
row_batch = next(self.input)
|
|
27
|
-
for row in row_batch:
|
|
28
|
-
for slot_idx, col in [(c.slot_idx, c.col) for c in self.media_slots]:
|
|
29
|
-
if row.has_exc(slot_idx):
|
|
30
|
-
continue
|
|
31
|
-
assert row.has_val[slot_idx]
|
|
32
|
-
path = row.file_paths[slot_idx]
|
|
33
|
-
if path is None:
|
|
34
|
-
continue
|
|
35
|
-
|
|
36
|
-
try:
|
|
37
|
-
col.col_type.validate_media(path)
|
|
38
|
-
except excs.Error as exc:
|
|
39
|
-
self.row_builder.set_exc(row, slot_idx, exc)
|
|
40
|
-
if not self.ctx.ignore_errors:
|
|
41
|
-
raise exc
|
|
42
|
-
|
|
43
|
-
return row_batch
|
pixeltable/utils/help.py
DELETED