pixeltable 0.3.10__py3-none-any.whl β 0.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +2 -2
- pixeltable/__version__.py +2 -2
- pixeltable/catalog/__init__.py +2 -1
- pixeltable/catalog/catalog.py +370 -93
- pixeltable/catalog/column.py +6 -4
- pixeltable/catalog/dir.py +5 -5
- pixeltable/catalog/globals.py +14 -16
- pixeltable/catalog/insertable_table.py +6 -8
- pixeltable/catalog/path.py +14 -7
- pixeltable/catalog/table.py +72 -62
- pixeltable/catalog/table_version.py +137 -107
- pixeltable/catalog/table_version_handle.py +3 -0
- pixeltable/catalog/table_version_path.py +1 -1
- pixeltable/catalog/view.py +10 -14
- pixeltable/dataframe.py +5 -3
- pixeltable/env.py +108 -42
- pixeltable/exec/__init__.py +2 -0
- pixeltable/exec/aggregation_node.py +6 -8
- pixeltable/exec/cache_prefetch_node.py +4 -7
- pixeltable/exec/component_iteration_node.py +1 -3
- pixeltable/exec/data_row_batch.py +1 -2
- pixeltable/exec/exec_context.py +1 -1
- pixeltable/exec/exec_node.py +1 -2
- pixeltable/exec/expr_eval/__init__.py +2 -0
- pixeltable/exec/expr_eval/evaluators.py +137 -20
- pixeltable/exec/expr_eval/expr_eval_node.py +43 -64
- pixeltable/exec/expr_eval/globals.py +68 -7
- pixeltable/exec/expr_eval/schedulers.py +25 -23
- pixeltable/exec/in_memory_data_node.py +8 -6
- pixeltable/exec/row_update_node.py +3 -4
- pixeltable/exec/sql_node.py +16 -18
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/column_property_ref.py +1 -1
- pixeltable/exprs/column_ref.py +3 -3
- pixeltable/exprs/compound_predicate.py +1 -1
- pixeltable/exprs/data_row.py +17 -1
- pixeltable/exprs/expr.py +12 -12
- pixeltable/exprs/function_call.py +34 -2
- pixeltable/exprs/json_mapper.py +95 -48
- pixeltable/exprs/json_path.py +4 -9
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +33 -6
- pixeltable/exprs/similarity_expr.py +1 -1
- pixeltable/exprs/sql_element_cache.py +1 -1
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/ext/__init__.py +1 -1
- pixeltable/ext/functions/__init__.py +1 -1
- pixeltable/ext/functions/whisperx.py +1 -1
- pixeltable/ext/functions/yolox.py +1 -1
- pixeltable/func/__init__.py +1 -1
- pixeltable/func/aggregate_function.py +2 -2
- pixeltable/func/callable_function.py +3 -6
- pixeltable/func/expr_template_function.py +24 -4
- pixeltable/func/function.py +7 -9
- pixeltable/func/function_registry.py +1 -1
- pixeltable/func/query_template_function.py +87 -4
- pixeltable/func/signature.py +1 -1
- pixeltable/func/tools.py +1 -1
- pixeltable/func/udf.py +2 -2
- pixeltable/functions/__init__.py +1 -1
- pixeltable/functions/anthropic.py +2 -2
- pixeltable/functions/audio.py +1 -1
- pixeltable/functions/deepseek.py +1 -1
- pixeltable/functions/fireworks.py +1 -1
- pixeltable/functions/globals.py +6 -6
- pixeltable/functions/huggingface.py +1 -1
- pixeltable/functions/image.py +1 -1
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +1 -1
- pixeltable/functions/math.py +1 -1
- pixeltable/functions/mistralai.py +1 -1
- pixeltable/functions/ollama.py +1 -1
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/replicate.py +1 -1
- pixeltable/functions/string.py +1 -1
- pixeltable/functions/timestamp.py +1 -1
- pixeltable/functions/together.py +1 -1
- pixeltable/functions/util.py +1 -1
- pixeltable/functions/video.py +2 -2
- pixeltable/functions/vision.py +2 -2
- pixeltable/globals.py +7 -2
- pixeltable/index/embedding_index.py +12 -1
- pixeltable/io/__init__.py +5 -3
- pixeltable/io/fiftyone.py +6 -7
- pixeltable/io/label_studio.py +21 -20
- pixeltable/io/pandas.py +6 -5
- pixeltable/iterators/__init__.py +1 -1
- pixeltable/metadata/__init__.py +6 -4
- pixeltable/metadata/converters/convert_24.py +3 -3
- pixeltable/metadata/converters/convert_25.py +1 -1
- pixeltable/metadata/converters/convert_29.py +1 -1
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/notes.py +3 -0
- pixeltable/metadata/schema.py +26 -1
- pixeltable/plan.py +2 -3
- pixeltable/share/packager.py +8 -24
- pixeltable/share/publish.py +20 -9
- pixeltable/store.py +9 -6
- pixeltable/type_system.py +19 -7
- pixeltable/utils/console_output.py +3 -2
- pixeltable/utils/coroutine.py +3 -3
- pixeltable/utils/dbms.py +66 -0
- pixeltable/utils/documents.py +61 -67
- pixeltable/utils/exception_handler.py +59 -0
- pixeltable/utils/filecache.py +1 -1
- pixeltable/utils/http_server.py +3 -2
- pixeltable/utils/pytorch.py +1 -1
- pixeltable/utils/sql.py +1 -1
- pixeltable-0.3.12.dist-info/METADATA +436 -0
- pixeltable-0.3.12.dist-info/RECORD +183 -0
- pixeltable/catalog/path_dict.py +0 -169
- pixeltable-0.3.10.dist-info/METADATA +0 -382
- pixeltable-0.3.10.dist-info/RECORD +0 -179
- {pixeltable-0.3.10.dist-info β pixeltable-0.3.12.dist-info}/LICENSE +0 -0
- {pixeltable-0.3.10.dist-info β pixeltable-0.3.12.dist-info}/WHEEL +0 -0
- {pixeltable-0.3.10.dist-info β pixeltable-0.3.12.dist-info}/entry_points.txt +0 -0
pixeltable/catalog/path_dict.py
DELETED
|
@@ -1,169 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import copy
|
|
4
|
-
import logging
|
|
5
|
-
from typing import Optional
|
|
6
|
-
from uuid import UUID
|
|
7
|
-
|
|
8
|
-
import sqlalchemy.orm as orm
|
|
9
|
-
|
|
10
|
-
from pixeltable import exceptions as excs
|
|
11
|
-
from pixeltable.env import Env
|
|
12
|
-
from pixeltable.metadata import schema
|
|
13
|
-
|
|
14
|
-
from .dir import Dir
|
|
15
|
-
from .path import Path
|
|
16
|
-
from .schema_object import SchemaObject
|
|
17
|
-
|
|
18
|
-
_logger = logging.getLogger('pixeltable')
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class PathDict:
|
|
22
|
-
"""Keep track of all paths in a Db instance"""
|
|
23
|
-
|
|
24
|
-
def __init__(self):
|
|
25
|
-
self.dir_contents: dict[UUID, dict[str, SchemaObject]] = {}
|
|
26
|
-
self.schema_objs: dict[UUID, SchemaObject] = {}
|
|
27
|
-
|
|
28
|
-
# load dirs
|
|
29
|
-
with orm.Session(Env.get().engine, future=True) as session:
|
|
30
|
-
_ = [dir_record for dir_record in session.query(schema.Dir).all()]
|
|
31
|
-
self.schema_objs = {
|
|
32
|
-
dir_record.id: Dir(dir_record.id, dir_record.parent_id, schema.DirMd(**dir_record.md).name)
|
|
33
|
-
for dir_record in session.query(schema.Dir).all()
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
# identify root dir
|
|
37
|
-
root_dirs = [dir for dir in self.schema_objs.values() if dir._dir_id is None]
|
|
38
|
-
assert len(root_dirs) == 1
|
|
39
|
-
self.root_dir = root_dirs[0]
|
|
40
|
-
|
|
41
|
-
# build dir_contents
|
|
42
|
-
def record_dir(dir: SchemaObject) -> None:
|
|
43
|
-
assert isinstance(dir, Dir)
|
|
44
|
-
if dir._id in self.dir_contents:
|
|
45
|
-
return
|
|
46
|
-
else:
|
|
47
|
-
self.dir_contents[dir._id] = {}
|
|
48
|
-
if dir._dir_id is not None:
|
|
49
|
-
record_dir(self.schema_objs[dir._dir_id])
|
|
50
|
-
self.dir_contents[dir._dir_id][dir._name] = dir
|
|
51
|
-
|
|
52
|
-
for dir in self.schema_objs.values():
|
|
53
|
-
record_dir(dir)
|
|
54
|
-
|
|
55
|
-
def _resolve_path(self, path: Path) -> SchemaObject:
|
|
56
|
-
"""Resolve the path to a SchemaObject.
|
|
57
|
-
|
|
58
|
-
Args:
|
|
59
|
-
path: path to resolve
|
|
60
|
-
|
|
61
|
-
Returns:
|
|
62
|
-
SchemaObject at the path.
|
|
63
|
-
|
|
64
|
-
Raises:
|
|
65
|
-
Error if path is invalid or does not exist.
|
|
66
|
-
"""
|
|
67
|
-
schema_obj = self.get_object(path)
|
|
68
|
-
if schema_obj is None:
|
|
69
|
-
raise excs.Error(f'No such path: {str(path)}')
|
|
70
|
-
return schema_obj
|
|
71
|
-
|
|
72
|
-
def get_object(self, path: Path) -> Optional[SchemaObject]:
|
|
73
|
-
"""Get the object at the given path, if any.
|
|
74
|
-
|
|
75
|
-
Args:
|
|
76
|
-
path: path to object
|
|
77
|
-
|
|
78
|
-
Returns:
|
|
79
|
-
SchemaObject at the path if it exists, None otherwise.
|
|
80
|
-
|
|
81
|
-
Raises:
|
|
82
|
-
Error if path is invalid.
|
|
83
|
-
"""
|
|
84
|
-
if path.is_root:
|
|
85
|
-
return self.root_dir
|
|
86
|
-
dir = self.root_dir
|
|
87
|
-
for i, component in enumerate(path.components):
|
|
88
|
-
if component not in self.dir_contents[dir._id]:
|
|
89
|
-
if i == len(path.components) - 1:
|
|
90
|
-
return None
|
|
91
|
-
raise excs.Error(f'No such path: {".".join(path.components[: i + 1])}')
|
|
92
|
-
schema_obj = self.dir_contents[dir._id][component]
|
|
93
|
-
if i < len(path.components) - 1:
|
|
94
|
-
if not isinstance(schema_obj, Dir):
|
|
95
|
-
raise excs.Error(f'Not a directory: {".".join(path.components[: i + 1])}')
|
|
96
|
-
dir = schema_obj
|
|
97
|
-
return schema_obj
|
|
98
|
-
|
|
99
|
-
def __getitem__(self, path: Path) -> SchemaObject:
|
|
100
|
-
return self._resolve_path(path)
|
|
101
|
-
|
|
102
|
-
def get_schema_obj(self, id: UUID) -> Optional[SchemaObject]:
|
|
103
|
-
return self.schema_objs.get(id)
|
|
104
|
-
|
|
105
|
-
def add_schema_obj(self, dir_id: UUID, name: str, val: SchemaObject) -> None:
|
|
106
|
-
self.dir_contents[dir_id][name] = val
|
|
107
|
-
self.schema_objs[val._id] = val
|
|
108
|
-
|
|
109
|
-
def __setitem__(self, path: Path, val: SchemaObject) -> None:
|
|
110
|
-
parent_dir = self._resolve_path(path.parent)
|
|
111
|
-
assert path.name not in self.dir_contents[parent_dir._id]
|
|
112
|
-
self.schema_objs[val._id] = val
|
|
113
|
-
self.dir_contents[parent_dir._id][path.name] = val
|
|
114
|
-
if isinstance(val, Dir):
|
|
115
|
-
self.dir_contents[val._id] = {}
|
|
116
|
-
|
|
117
|
-
def __delitem__(self, path: Path) -> None:
|
|
118
|
-
parent_dir = self._resolve_path(path.parent)
|
|
119
|
-
assert path.name in self.dir_contents[parent_dir._id]
|
|
120
|
-
obj = self.dir_contents[parent_dir._id][path.name]
|
|
121
|
-
del self.dir_contents[parent_dir._id][path.name]
|
|
122
|
-
if isinstance(obj, Dir):
|
|
123
|
-
del self.dir_contents[obj._id]
|
|
124
|
-
del self.schema_objs[obj._id]
|
|
125
|
-
|
|
126
|
-
def move(self, from_path: Path, to_path: Path) -> None:
|
|
127
|
-
from_dir = self._resolve_path(from_path.parent)
|
|
128
|
-
assert isinstance(from_dir, Dir)
|
|
129
|
-
assert from_path.name in self.dir_contents[from_dir._id]
|
|
130
|
-
obj = self.dir_contents[from_dir._id][from_path.name]
|
|
131
|
-
del self.dir_contents[from_dir._id][from_path.name]
|
|
132
|
-
to_dir = self._resolve_path(to_path.parent)
|
|
133
|
-
assert to_path.name not in self.dir_contents[to_dir._id]
|
|
134
|
-
self.dir_contents[to_dir._id][to_path.name] = obj
|
|
135
|
-
|
|
136
|
-
def check_is_valid(self, path: Path, expected: Optional[type[SchemaObject]]) -> None:
|
|
137
|
-
"""Check that path is valid and that the object at path has the expected type.
|
|
138
|
-
|
|
139
|
-
Args:
|
|
140
|
-
path: path to check
|
|
141
|
-
expected: expected type of object at path or None if object should not exist
|
|
142
|
-
|
|
143
|
-
Raises:
|
|
144
|
-
Error if path is invalid or object at path has wrong type
|
|
145
|
-
"""
|
|
146
|
-
# check for existence
|
|
147
|
-
obj = self.get_object(path)
|
|
148
|
-
if expected is not None:
|
|
149
|
-
if obj is None:
|
|
150
|
-
raise excs.Error(f'No such path: {str(path)}')
|
|
151
|
-
if not isinstance(obj, expected):
|
|
152
|
-
raise excs.Error(
|
|
153
|
-
f'{str(path)} needs to be a {expected._display_name()} but is a {type(obj)._display_name()}'
|
|
154
|
-
)
|
|
155
|
-
if expected is None and obj is not None:
|
|
156
|
-
raise excs.Error(f"{type(obj)._display_name()} '{str(path)}' already exists")
|
|
157
|
-
|
|
158
|
-
def get_children(self, parent: Path, child_type: Optional[type[SchemaObject]], recursive: bool) -> list[Path]:
|
|
159
|
-
dir = self._resolve_path(parent)
|
|
160
|
-
if not isinstance(dir, Dir):
|
|
161
|
-
raise excs.Error(f'{str(parent)} is a {type(dir)._display_name()}, not a directory')
|
|
162
|
-
matches = [
|
|
163
|
-
obj for obj in self.dir_contents[dir._id].values() if child_type is None or isinstance(obj, child_type)
|
|
164
|
-
]
|
|
165
|
-
result = [copy.copy(parent).append(obj._name) for obj in matches]
|
|
166
|
-
if recursive:
|
|
167
|
-
for dir in [obj for obj in self.dir_contents[dir._id].values() if isinstance(obj, Dir)]:
|
|
168
|
-
result.extend(self.get_children(copy.copy(parent).append(dir._name), child_type, recursive))
|
|
169
|
-
return result
|
|
@@ -1,382 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: pixeltable
|
|
3
|
-
Version: 0.3.10
|
|
4
|
-
Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
|
|
5
|
-
License: Apache-2.0
|
|
6
|
-
Keywords: data-science,machine-learning,database,ai,computer-vision,chatbot,ml,artificial-intelligence,feature-engineering,multimodal,mlops,feature-store,vector-database,llm,genai
|
|
7
|
-
Author: Pixeltable, Inc.
|
|
8
|
-
Author-email: contact@pixeltable.com>
|
|
9
|
-
Requires-Python: >=3.9,<4.0
|
|
10
|
-
Classifier: Intended Audience :: Developers
|
|
11
|
-
Classifier: Intended Audience :: Science/Research
|
|
12
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
|
-
Classifier: Operating System :: MacOS
|
|
14
|
-
Classifier: Operating System :: Microsoft :: Windows
|
|
15
|
-
Classifier: Operating System :: POSIX :: Linux
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
-
Classifier: Topic :: Database
|
|
22
|
-
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
|
-
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
24
|
-
Requires-Dist: av (>=10.0.0)
|
|
25
|
-
Requires-Dist: beautifulsoup4 (>=4.0.0)
|
|
26
|
-
Requires-Dist: cloudpickle (>=2.2.1)
|
|
27
|
-
Requires-Dist: ftfy (>=6.2.0)
|
|
28
|
-
Requires-Dist: httpcore (>=1.0.3)
|
|
29
|
-
Requires-Dist: httpx (>=0.27)
|
|
30
|
-
Requires-Dist: jinja2 (>=3.1.3)
|
|
31
|
-
Requires-Dist: jmespath (>=1.0.1)
|
|
32
|
-
Requires-Dist: jsonschema (>=4.1.0)
|
|
33
|
-
Requires-Dist: lxml (>=5.0)
|
|
34
|
-
Requires-Dist: more-itertools (>=10.2)
|
|
35
|
-
Requires-Dist: nest_asyncio (>=1.5)
|
|
36
|
-
Requires-Dist: numpy (>=1.25,<2.0)
|
|
37
|
-
Requires-Dist: pandas (>=2.0,<3.0)
|
|
38
|
-
Requires-Dist: pgvector (>=0.2.1)
|
|
39
|
-
Requires-Dist: pillow (>=9.3.0)
|
|
40
|
-
Requires-Dist: pixeltable-pgserver (==0.3.1)
|
|
41
|
-
Requires-Dist: psutil (>=5.9.5)
|
|
42
|
-
Requires-Dist: psycopg[binary] (>=3.1.18)
|
|
43
|
-
Requires-Dist: puremagic (>=1.20)
|
|
44
|
-
Requires-Dist: pyarrow (>=13.0.0)
|
|
45
|
-
Requires-Dist: pydantic (>=2.7.4)
|
|
46
|
-
Requires-Dist: pyiceberg (>=0.6.0)
|
|
47
|
-
Requires-Dist: pymupdf (>=1.24.1)
|
|
48
|
-
Requires-Dist: pyyaml (>=6.0.1)
|
|
49
|
-
Requires-Dist: requests (>=2.31.0)
|
|
50
|
-
Requires-Dist: sqlalchemy (>=2.0.23)
|
|
51
|
-
Requires-Dist: tenacity (>=8.2)
|
|
52
|
-
Requires-Dist: toml (>=0.10)
|
|
53
|
-
Requires-Dist: tqdm (>=4.64)
|
|
54
|
-
Project-URL: Documentation, https://docs.pixeltable.com/
|
|
55
|
-
Project-URL: Homepage, https://pixeltable.com/
|
|
56
|
-
Project-URL: Repository, https://github.com/pixeltable/pixeltable
|
|
57
|
-
Description-Content-Type: text/markdown
|
|
58
|
-
|
|
59
|
-
<div align="center">
|
|
60
|
-
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/resources/pixeltable-logo-large.png"
|
|
61
|
-
alt="Pixeltable" width="50%" />
|
|
62
|
-
<br></br>
|
|
63
|
-
|
|
64
|
-
<h2>Build Multimodal AI Apps with Declarative Data Infrastructure</h2>
|
|
65
|
-
|
|
66
|
-
[](https://opensource.org/licenses/Apache-2.0)
|
|
67
|
-

|
|
68
|
-

|
|
69
|
-
<br>
|
|
70
|
-
[](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml)
|
|
71
|
-
[](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml)
|
|
72
|
-
[](https://pypi.org/project/pixeltable/)
|
|
73
|
-
[](https://discord.gg/QPyqFYx2UN)
|
|
74
|
-
<a target="_blank" href="https://huggingface.co/Pixeltable">
|
|
75
|
-
<img src="https://img.shields.io/badge/π€-HF Space-FF7D04" alt="Visit our Hugging Face space"/>
|
|
76
|
-
</a>
|
|
77
|
-
|
|
78
|
-
[Installation](https://docs.pixeltable.com/docs/installation) |
|
|
79
|
-
[Documentation](https://docs.pixeltable.com/docs/overview/quick-start) |
|
|
80
|
-
[API Reference](https://pixeltable.github.io/pixeltable/) |
|
|
81
|
-
[Code Samples](https://github.com/pixeltable/pixeltable?tab=readme-ov-file#-code-samples) |
|
|
82
|
-
[Computer Vision](https://docs.pixeltable.com/docs/object-detection-in-videos) |
|
|
83
|
-
[LLM](https://docs.pixeltable.com/docs/document-indexing-and-rag)
|
|
84
|
-
</div>
|
|
85
|
-
|
|
86
|
-
## π What is Pixeltable?
|
|
87
|
-
Pixeltable is a declarative data infrastructure for building multimodal AI applications, enabling incremental storage, transformation, indexing, and orchestration of your data.
|
|
88
|
-
- **Data Ingestion**: Unified interface for all [data types](https://docs.pixeltable.com/docs/datastore/bringing-data) (images, videos, audio, documents, URLs, blob storage, structured data)
|
|
89
|
-
- **Data Transformation**: [Chunking](https://docs.pixeltable.com/docs/datastore/views), [embedding](https://docs.pixeltable.com/docs/datastore/embedding-index), and processing with declarative [computed columns](https://docs.pixeltable.com/docs/datastore/computed-columns)
|
|
90
|
-
- **Indexing & Storage**: Type-safe tables with [built-in vector indexing](https://docs.pixeltable.com/docs/cookbooks/search/website)
|
|
91
|
-
- **Query & Retrieval**: [Queries](https://docs.pixeltable.com/docs/datastore/filtering-and-selecting) combining filtering, sorting, and similarity search
|
|
92
|
-
- **Inference & Generation**: [Integration](https://docs.pixeltable.com/docs/integrations/frameworks#cloud-llm-providers) with AI models (OpenAI, Anthropic, PyTorch, YOLOX, DETR, Together, Hugging Face and more...)
|
|
93
|
-
|
|
94
|
-
All with your [custom functions (UDFs)](https://docs.pixeltable.com/docs/datastore/custom-functions), and built-in caching, versioning, lineage tracking, and incremental computation.
|
|
95
|
-
|
|
96
|
-
## πΎ Installation
|
|
97
|
-
|
|
98
|
-
```python
|
|
99
|
-
pip install pixeltable
|
|
100
|
-
```
|
|
101
|
-
|
|
102
|
-
**Pixeltable is persistent. Unlike in-memory Python libraries such as Pandas, Pixeltable is a database.**
|
|
103
|
-
|
|
104
|
-
## π‘ Getting Started
|
|
105
|
-
|
|
106
|
-
Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations.
|
|
107
|
-
|
|
108
|
-
| Topic | Notebook | Topic | Notebook |
|
|
109
|
-
|:----------|:-----------------|:-------------------------|:---------------------------------:|
|
|
110
|
-
| 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Tables and Data Operations | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> |
|
|
111
|
-
| User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/feature-guides/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> |
|
|
112
|
-
| Incremental Prompt Engineering | <a target="_blank" href="https://colab.research.google.com/github/mistralai/cookbook/blob/main/third_party/Pixeltable/incremental_prompt_engineering_and_model_comparison.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Github"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/feature-guides/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> |
|
|
113
|
-
| Integrating with Label Studio | <a target="_blank" href="https://docs.pixeltable.com/docs/cookbooks/vision/label-studio"> <img src="https://img.shields.io/badge/π Documentation-013056" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> |
|
|
114
|
-
| Multimodal Application | <a target="_blank" href="https://huggingface.co/spaces/Pixeltable/Multimodal-Powerhouse"> <img src="https://img.shields.io/badge/π€-Gradio App-FF7D04" alt="Visit our Hugging Face Space"/></a> | Document Indexing and RAG | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/rag-demo.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> |
|
|
115
|
-
| Context-Aware Discord Bot | <a target="_blank" href="https://github.com/pixeltable/pixeltable/blob/main/docs/sample-apps/context-aware-discord-bot"> <img src="https://img.shields.io/badge/%F0%9F%92%AC-Discord Bot-%235865F2.svg" alt="Visit our documentation"/></a> | Image/Text Similarity Search | <a target="_blank" href="https://github.com/pixeltable/pixeltable/tree/main/docs/sample-apps/text-and-image-similarity-search-nextjs-fastapi"> <img src="https://img.shields.io/badge/π₯οΈ-Next.js + FastAPI-black.svg" alt="Open In Colab"/> |
|
|
116
|
-
|
|
117
|
-
## π§± Code Samples
|
|
118
|
-
|
|
119
|
-
### Import media data into Pixeltable (videos, images, audio...)
|
|
120
|
-
|
|
121
|
-
```python
|
|
122
|
-
import pixeltable as pxt
|
|
123
|
-
|
|
124
|
-
v = pxt.create_table('external_data.videos', {'video': pxt.Video})
|
|
125
|
-
|
|
126
|
-
prefix = 's3://multimedia-commons/'
|
|
127
|
-
paths = [
|
|
128
|
-
'data/videos/mp4/ffe/ffb/ffeffbef41bbc269810b2a1a888de.mp4',
|
|
129
|
-
'data/videos/mp4/ffe/feb/ffefebb41485539f964760e6115fbc44.mp4',
|
|
130
|
-
'data/videos/mp4/ffe/f73/ffef7384d698b5f70d411c696247169.mp4'
|
|
131
|
-
]
|
|
132
|
-
v.insert({'video': prefix + p} for p in paths)
|
|
133
|
-
```
|
|
134
|
-
|
|
135
|
-
Learn how to [work with data in Pixeltable](https://docs.pixeltable.com/docs/datastore/tables-and-operations).
|
|
136
|
-
|
|
137
|
-
### Object detection in images using DETR model
|
|
138
|
-
|
|
139
|
-
```python
|
|
140
|
-
import pixeltable as pxt
|
|
141
|
-
from pixeltable.functions import huggingface
|
|
142
|
-
|
|
143
|
-
# Create a table to store data persistently
|
|
144
|
-
t = pxt.create_table('image', {'image': pxt.Image})
|
|
145
|
-
|
|
146
|
-
# Insert some images
|
|
147
|
-
prefix = 'https://upload.wikimedia.org/wikipedia/commons'
|
|
148
|
-
paths = [
|
|
149
|
-
'/1/15/Cat_August_2010-4.jpg',
|
|
150
|
-
'/e/e1/Example_of_a_Dog.jpg',
|
|
151
|
-
'/thumb/b/bf/Bird_Diversity_2013.png/300px-Bird_Diversity_2013.png'
|
|
152
|
-
]
|
|
153
|
-
t.insert({'image': prefix + p} for p in paths)
|
|
154
|
-
|
|
155
|
-
# Add a computed column for image classification
|
|
156
|
-
t.add_computed_column(classification=huggingface.detr_for_object_detection(
|
|
157
|
-
t.image,
|
|
158
|
-
model_id='facebook/detr-resnet-50'
|
|
159
|
-
))
|
|
160
|
-
|
|
161
|
-
# Retrieve the rows where cats have been identified
|
|
162
|
-
t.select(animal = t.image,
|
|
163
|
-
classification = t.classification.label_text[0]) \
|
|
164
|
-
.where(t.classification.label_text[0]=='cat').head()
|
|
165
|
-
```
|
|
166
|
-
|
|
167
|
-
Learn about computed columns and object detection:
|
|
168
|
-
[Comparing object detection models](https://docs.pixeltable.com/docs/examples/use-cases#multimodal-processing).
|
|
169
|
-
|
|
170
|
-
### Extend Pixeltable's capabilities with user-defined functions
|
|
171
|
-
|
|
172
|
-
```python
|
|
173
|
-
@pxt.udf
|
|
174
|
-
def draw_boxes(img: PIL.Image.Image, boxes: list[list[float]]) -> PIL.Image.Image:
|
|
175
|
-
result = img.copy() # Create a copy of `img`
|
|
176
|
-
d = PIL.ImageDraw.Draw(result)
|
|
177
|
-
for box in boxes:
|
|
178
|
-
d.rectangle(box, width=3) # Draw bounding box rectangles on the copied image
|
|
179
|
-
return result
|
|
180
|
-
```
|
|
181
|
-
|
|
182
|
-
Learn more about user-defined functions:
|
|
183
|
-
[UDFs in Pixeltable](https://docs.pixeltable.com/docs/datastore/custom-functions).
|
|
184
|
-
|
|
185
|
-
### Automate data operations with views, e.g., split documents into chunks
|
|
186
|
-
|
|
187
|
-
```python
|
|
188
|
-
# In this example, the view is defined by iteration over the chunks of a DocumentSplitter
|
|
189
|
-
chunks_table = pxt.create_view(
|
|
190
|
-
'rag_demo.chunks',
|
|
191
|
-
documents_table,
|
|
192
|
-
iterator=DocumentSplitter.create(
|
|
193
|
-
document=documents_table.document,
|
|
194
|
-
separators='token_limit', limit=300)
|
|
195
|
-
)
|
|
196
|
-
```
|
|
197
|
-
|
|
198
|
-
Learn how to leverage views to build your
|
|
199
|
-
[RAG workflow](https://docs.pixeltable.com/docs/cookbooks/chat/memory).
|
|
200
|
-
|
|
201
|
-
### Evaluate model performance
|
|
202
|
-
|
|
203
|
-
```python
|
|
204
|
-
# The computation of the mAP metric can become a query over the evaluation output
|
|
205
|
-
frames_view.select(mean_ap(frames_view.eval_yolox_tiny), mean_ap(frames_view.eval_yolox_m)).show()
|
|
206
|
-
```
|
|
207
|
-
|
|
208
|
-
Learn how to leverage Pixeltable for [Model analytics](https://github.com/pixeltable/pixeltable/blob/main/docs/notebooks/use-cases/object-detection-in-videos.ipynb).
|
|
209
|
-
|
|
210
|
-
### Working with inference services
|
|
211
|
-
|
|
212
|
-
```python
|
|
213
|
-
chat_table = pxt.create_table('together_demo.chat', {'input': pxt.String})
|
|
214
|
-
|
|
215
|
-
# The chat-completions API expects JSON-formatted input:
|
|
216
|
-
messages = [{'role': 'user', 'content': chat_table.input}]
|
|
217
|
-
|
|
218
|
-
# This example shows how additional parameters from the Together API can be used in Pixeltable
|
|
219
|
-
chat_table.add_computed_column(
|
|
220
|
-
output=chat_completions(
|
|
221
|
-
messages=messages,
|
|
222
|
-
model='mistralai/Mixtral-8x7B-Instruct-v0.1',
|
|
223
|
-
max_tokens=300,
|
|
224
|
-
stop=['\n'],
|
|
225
|
-
temperature=0.7,
|
|
226
|
-
top_p=0.9,
|
|
227
|
-
top_k=40,
|
|
228
|
-
repetition_penalty=1.1,
|
|
229
|
-
logprobs=1,
|
|
230
|
-
echo=True
|
|
231
|
-
)
|
|
232
|
-
)
|
|
233
|
-
chat_table.add_computed_column(
|
|
234
|
-
response=chat_table.output.choices[0].message.content
|
|
235
|
-
)
|
|
236
|
-
|
|
237
|
-
# Start a conversation
|
|
238
|
-
chat_table.insert([
|
|
239
|
-
{'input': 'How many species of felids have been classified?'},
|
|
240
|
-
{'input': 'Can you make me a coffee?'}
|
|
241
|
-
])
|
|
242
|
-
chat_table.select(chat_table.input, chat_table.response).head()
|
|
243
|
-
```
|
|
244
|
-
|
|
245
|
-
Learn how to interact with inference services such as [Together AI](https://github.com/pixeltable/pixeltable/blob/main/docs/notebooks/integrations/working-with-together.ipynb) in Pixeltable.
|
|
246
|
-
|
|
247
|
-
### Text and image similarity search on video frames with embedding indexes
|
|
248
|
-
|
|
249
|
-
```python
|
|
250
|
-
import pixeltable as pxt
|
|
251
|
-
from pixeltable.functions.huggingface import clip
|
|
252
|
-
from pixeltable.iterators import FrameIterator
|
|
253
|
-
import PIL.Image
|
|
254
|
-
|
|
255
|
-
video_table = pxt.create_table('videos', {'video': pxt.Video})
|
|
256
|
-
|
|
257
|
-
video_table.insert([{'video': '/video.mp4'}])
|
|
258
|
-
|
|
259
|
-
frames_view = pxt.create_view(
|
|
260
|
-
'frames', video_table, iterator=FrameIterator.create(video=video_table.video))
|
|
261
|
-
|
|
262
|
-
# Create an index on the 'frame' column that allows text and image search
|
|
263
|
-
frames_view.add_embedding_index('frame', embed=clip.using('openai/clip-vit-base-patch32'))
|
|
264
|
-
|
|
265
|
-
# Now we will retrieve images based on a sample image
|
|
266
|
-
sample_image = '/image.jpeg'
|
|
267
|
-
sim = frames_view.frame.similarity(sample_image)
|
|
268
|
-
frames_view.order_by(sim, asc=False).limit(5).select(frames_view.frame, sim=sim).collect()
|
|
269
|
-
|
|
270
|
-
# Now we will retrieve images based on a string
|
|
271
|
-
sample_text = 'red truck'
|
|
272
|
-
sim = frames_view.frame.similarity(sample_text)
|
|
273
|
-
frames_view.order_by(sim, asc=False).limit(5).select(frames_view.frame, sim=sim).collect()
|
|
274
|
-
```
|
|
275
|
-
|
|
276
|
-
Learn how to work with [Embedding and Vector Indexes](https://docs.pixeltable.com/docs/datastore/embedding-index).
|
|
277
|
-
|
|
278
|
-
## π AI Stack Comparison
|
|
279
|
-
|
|
280
|
-
### π― Computer Vision Workflows
|
|
281
|
-
|
|
282
|
-
| Requirement | Traditional | Pixeltable |
|
|
283
|
-
|-------------|---------------------|------------|
|
|
284
|
-
| Frame Extraction | ffmpeg + custom code | Automatic via FrameIterator |
|
|
285
|
-
| Object Detection | Multiple scripts + caching | Single computed column |
|
|
286
|
-
| Video Indexing | Custom pipelines + Vector DB | Native similarity search |
|
|
287
|
-
| Annotation Management | Separate tools + custom code | Label Studio integration |
|
|
288
|
-
| Model Evaluation | Custom metrics pipeline | Built-in mAP computation |
|
|
289
|
-
|
|
290
|
-
### π€ LLM Workflows
|
|
291
|
-
|
|
292
|
-
| Requirement | Traditional | Pixeltable |
|
|
293
|
-
|-------------|---------------------|------------|
|
|
294
|
-
| Document Chunking | Tool + custom code | Native DocumentSplitter |
|
|
295
|
-
| Embedding Generation | Separate pipeline + caching | Computed columns |
|
|
296
|
-
| Vector Search | External vector DB | Built-in vector indexing |
|
|
297
|
-
| Prompt Management | Custom tracking solution | Version-controlled columns |
|
|
298
|
-
| Chain Management | Tool + custom code | Computed column DAGs |
|
|
299
|
-
|
|
300
|
-
### π¨ Multimodal Workflows
|
|
301
|
-
|
|
302
|
-
| Requirement | Traditional | Pixeltable |
|
|
303
|
-
|-------------|---------------------|------------|
|
|
304
|
-
| Data Types | Multiple storage systems | Unified table interface |
|
|
305
|
-
| Cross-Modal Search | Complex integration | Native similarity support |
|
|
306
|
-
| Pipeline Orchestration | Multiple tools (Airflow, etc.) | Single declarative interface |
|
|
307
|
-
| Asset Management | Custom tracking system | Automatic lineage |
|
|
308
|
-
| Quality Control | Multiple validation tools | Computed validation columns |
|
|
309
|
-
|
|
310
|
-
## β FAQ
|
|
311
|
-
|
|
312
|
-
### What problems does Pixeltable solve?
|
|
313
|
-
|
|
314
|
-
Today's solutions for AI app development require extensive custom coding and infrastructure plumbing.
|
|
315
|
-
Tracking lineage and versions between and across data transformations, models, and deployments is cumbersome.
|
|
316
|
-
Pixeltable lets ML Engineers and Data Scientists focus on exploration, modeling, and app development without
|
|
317
|
-
dealing with the customary data plumbing.
|
|
318
|
-
|
|
319
|
-
### What does Pixeltable provide me with? Pixeltable provides:
|
|
320
|
-
|
|
321
|
-
- Data storage and versioning
|
|
322
|
-
- Combined Data and Model Lineage
|
|
323
|
-
- Indexing (e.g. embedding vectors) and Data Retrieval
|
|
324
|
-
- Orchestration of multimodal workloads
|
|
325
|
-
- Incremental updates
|
|
326
|
-
- Code is automatically production-ready
|
|
327
|
-
|
|
328
|
-
### Why should you use Pixeltable?
|
|
329
|
-
|
|
330
|
-
- **It gives you transparency and reproducibility**
|
|
331
|
-
- All generated data is automatically recorded and versioned
|
|
332
|
-
- You will never need to re-run a workload because you lost track of the input data
|
|
333
|
-
- **It saves you money**
|
|
334
|
-
- All data changes are automatically incremental
|
|
335
|
-
- You never need to re-run pipelines from scratch because youβre adding data
|
|
336
|
-
- **It integrates with any existing Python code or libraries**
|
|
337
|
-
- Bring your ever-changing code and workloads
|
|
338
|
-
- You choose the models, tools, and AI practices (e.g., your embedding model for a vector index);
|
|
339
|
-
Pixeltable orchestrates the data
|
|
340
|
-
|
|
341
|
-
### What is Pixeltable not providing?
|
|
342
|
-
|
|
343
|
-
- Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for
|
|
344
|
-
your specific needs.
|
|
345
|
-
- We do not aim to replace your existing AI toolkit, but rather enhance it by streamlining the underlying data
|
|
346
|
-
infrastructure and orchestration.
|
|
347
|
-
|
|
348
|
-
> [!TIP]
|
|
349
|
-
> Check out the [Integrations](https://docs.pixeltable.com/docs/integrations/frameworks) section, and feel free to submit
|
|
350
|
-
> a request for additional ones.
|
|
351
|
-
|
|
352
|
-
## π€ Contributing to Pixeltable
|
|
353
|
-
|
|
354
|
-
We're excited to welcome contributions from the community! Here's how you can get involved:
|
|
355
|
-
|
|
356
|
-
### π Report Issues
|
|
357
|
-
|
|
358
|
-
- Found a bug? [Open an issue](https://github.com/pixeltable/pixeltable/issues)
|
|
359
|
-
- Include steps to reproduce and environment details
|
|
360
|
-
|
|
361
|
-
### π‘ Submit Changes
|
|
362
|
-
|
|
363
|
-
- Fork the repository
|
|
364
|
-
- Create a feature branch
|
|
365
|
-
- Submit a [pull request](https://github.com/pixeltable/pixeltable/pulls)
|
|
366
|
-
- See our [Contributing Guide](CONTRIBUTING.md) for detailed instructions
|
|
367
|
-
|
|
368
|
-
### π¬ Join the Discussion
|
|
369
|
-
|
|
370
|
-
- Have questions? Start a [Discussion](https://github.com/pixeltable/pixeltable/discussions)
|
|
371
|
-
- Share your Pixeltable projects and use cases
|
|
372
|
-
- Help others in the community
|
|
373
|
-
|
|
374
|
-
### π Improve Documentation
|
|
375
|
-
|
|
376
|
-
- Suggest examples and tutorials
|
|
377
|
-
- Propose improvements
|
|
378
|
-
|
|
379
|
-
## π’ License
|
|
380
|
-
|
|
381
|
-
This library is licensed under the Apache 2.0 License.
|
|
382
|
-
|