pixeltable 0.2.3__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- {pixeltable-0.2.3 → pixeltable-0.2.5}/PKG-INFO +35 -28
- {pixeltable-0.2.3 → pixeltable-0.2.5}/README.md +30 -24
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/column.py +26 -49
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/insertable_table.py +7 -4
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/table.py +163 -57
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/table_version.py +416 -140
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/table_version_path.py +2 -2
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/client.py +72 -6
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/dataframe.py +65 -21
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/env.py +52 -53
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/cache_prefetch_node.py +1 -1
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/in_memory_data_node.py +11 -7
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/comparison.py +3 -3
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/data_row.py +5 -1
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/literal.py +16 -4
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/row_builder.py +8 -40
- pixeltable-0.2.5/pixeltable/ext/__init__.py +5 -0
- pixeltable-0.2.5/pixeltable/ext/functions/yolox.py +92 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/aggregate_function.py +15 -15
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/expr_template_function.py +9 -1
- pixeltable-0.2.5/pixeltable/func/globals.py +46 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/signature.py +18 -12
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/udf.py +7 -2
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/__init__.py +9 -9
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/eval.py +7 -8
- pixeltable-0.2.5/pixeltable/functions/fireworks.py +34 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/huggingface.py +47 -19
- pixeltable-0.2.5/pixeltable/functions/openai.py +256 -0
- pixeltable-0.2.5/pixeltable/functions/together.py +122 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/util.py +11 -0
- pixeltable-0.2.5/pixeltable/index/__init__.py +2 -0
- pixeltable-0.2.5/pixeltable/index/base.py +49 -0
- pixeltable-0.2.5/pixeltable/index/embedding_index.py +95 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/metadata/schema.py +45 -22
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/plan.py +15 -34
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/store.py +38 -41
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/conftest.py +8 -14
- pixeltable-0.2.5/pixeltable/tests/ext/test_yolox.py +21 -0
- pixeltable-0.2.5/pixeltable/tests/functions/test_fireworks.py +43 -0
- pixeltable-0.2.5/pixeltable/tests/functions/test_functions.py +60 -0
- pixeltable-0.2.3/pixeltable/tests/test_functions.py → pixeltable-0.2.5/pixeltable/tests/functions/test_huggingface.py +7 -143
- pixeltable-0.2.5/pixeltable/tests/functions/test_openai.py +162 -0
- pixeltable-0.2.5/pixeltable/tests/functions/test_together.py +112 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_component_view.py +14 -5
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_dataframe.py +23 -22
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_exprs.py +99 -102
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_function.py +51 -43
- pixeltable-0.2.5/pixeltable/tests/test_index.py +138 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_migration.py +2 -1
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_snapshot.py +24 -1
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_table.py +205 -26
- pixeltable-0.2.5/pixeltable/tests/test_types.py +52 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_video.py +16 -16
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_view.py +5 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/utils.py +171 -14
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tool/create_test_db_dump.py +16 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/type_system.py +77 -128
- pixeltable-0.2.5/pixeltable/utils/arrow.py +98 -0
- pixeltable-0.2.5/pixeltable/utils/hf_datasets.py +157 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/parquet.py +68 -27
- pixeltable-0.2.5/pixeltable/utils/pytorch.py +91 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pyproject.toml +15 -6
- pixeltable-0.2.3/pixeltable/func/globals.py +0 -36
- pixeltable-0.2.3/pixeltable/functions/fireworks.py +0 -61
- pixeltable-0.2.3/pixeltable/functions/openai.py +0 -88
- pixeltable-0.2.3/pixeltable/functions/together.py +0 -27
- pixeltable-0.2.3/pixeltable/tests/test_types.py +0 -22
- pixeltable-0.2.3/pixeltable/utils/pytorch.py +0 -172
- {pixeltable-0.2.3 → pixeltable-0.2.5}/LICENSE +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/__init__.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/__init__.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/catalog.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/dir.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/globals.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/named_function.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/path.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/path_dict.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/schema_object.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/catalog/view.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exceptions.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/__init__.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/aggregation_node.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/component_iteration_node.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/data_row_batch.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/exec_context.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/exec_node.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/expr_eval_node.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/media_validation_node.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exec/sql_scan_node.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/__init__.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/arithmetic_expr.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/array_slice.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/column_property_ref.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/column_ref.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/compound_predicate.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/expr.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/expr_set.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/function_call.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/globals.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/image_member_access.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/image_similarity_predicate.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/inline_array.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/inline_dict.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/is_null.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/json_mapper.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/json_path.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/object_ref.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/predicate.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/rowid_ref.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/type_cast.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/exprs/variable.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/__init__.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/batched_function.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/callable_function.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/function.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/function_registry.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/func/nos_function.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/image.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/pil/image.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/string.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/functions/video.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/iterators/__init__.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/iterators/base.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/iterators/document.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/iterators/video.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/metadata/__init__.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/metadata/converters/convert_10.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_audio.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_catalog.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_client.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_dirs.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_document.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_nos.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tests/test_transactional_directory.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/tool/create_test_video.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/__init__.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/clip.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/coco.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/documents.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/filecache.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/help.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/media_store.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/s3.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/sql.py +0 -0
- {pixeltable-0.2.3 → pixeltable-0.2.5}/pixeltable/utils/transactional_directory.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.5
|
|
4
4
|
Summary: Pixeltable: The Multimodal AI Data Plane
|
|
5
5
|
Author: Marcel Kornacker
|
|
6
6
|
Author-email: marcelk@gmail.com
|
|
@@ -15,12 +15,12 @@ Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
|
|
|
15
15
|
Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
|
|
16
16
|
Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
|
|
17
17
|
Requires-Dist: jmespath (>=1.0.1,<2.0.0)
|
|
18
|
-
Requires-Dist: numpy (>=1.26
|
|
18
|
+
Requires-Dist: numpy (>=1.26)
|
|
19
19
|
Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
|
|
20
20
|
Requires-Dist: pandas (>=2.0,<3.0)
|
|
21
|
-
Requires-Dist: pgserver (==0.
|
|
21
|
+
Requires-Dist: pgserver (==0.1.2)
|
|
22
22
|
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
23
|
-
Requires-Dist: pillow (>=
|
|
23
|
+
Requires-Dist: pillow (>=10.0)
|
|
24
24
|
Requires-Dist: psutil (>=5.9.5,<6.0.0)
|
|
25
25
|
Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
|
|
26
26
|
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
@@ -28,38 +28,44 @@ Requires-Dist: regex (>=2022.10.31,<2023.0.0)
|
|
|
28
28
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
29
29
|
Requires-Dist: sqlalchemy-utils (>=0.41.1,<0.42.0)
|
|
30
30
|
Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
|
|
31
|
+
Requires-Dist: tenacity (>=8.2,<9.0)
|
|
31
32
|
Requires-Dist: tqdm (>=4.64.1,<5.0.0)
|
|
32
33
|
Description-Content-Type: text/markdown
|
|
33
34
|
|
|
35
|
+
<div align="center">
|
|
34
36
|
<img src="docs/pixeltable-banner.png" width="45%"/>
|
|
35
37
|
|
|
36
|
-
#
|
|
38
|
+
# Unifying Data, Models, and Orchestration for AI Products
|
|
37
39
|
|
|
38
40
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
39
41
|
|
|
40
42
|

|
|
41
43
|
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
data plumbing.
|
|
44
|
+
[Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.github.io/pixeltable/)
|
|
45
|
+
</div>
|
|
45
46
|
|
|
46
|
-
|
|
47
|
-
It brings together data storage, versioning, and indexing with orchestration and model
|
|
48
|
-
versioning under a declarative table interface, with transformations, model inference,
|
|
49
|
-
and custom logic represented as computed columns.
|
|
47
|
+
Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
|
|
50
48
|
|
|
51
|
-
##
|
|
49
|
+
## What problems does Pixeltable solve?
|
|
50
|
+
|
|
51
|
+
Today’s solutions for AI app development require extensive custom coding and infrastructure
|
|
52
|
+
plumbing. Tracking lineage and versions between and across data transformations, models, and
|
|
53
|
+
deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
|
|
54
|
+
a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
|
|
55
|
+
building and productionizing AI applications.
|
|
56
|
+
|
|
57
|
+
## ⚡Quick Start
|
|
58
|
+
Learn the basics of Pixeltable through interactive examples. View the notebooks on Google Colab or Kaggle, for free.
|
|
52
59
|
|
|
53
|
-
|
|
54
|
-
|
|
60
|
+
### Pixeltable Basics
|
|
61
|
+
In this tutorial, we'll survey how to create tables, populate them with data, and enhance them with built-in and user-defined transformations and AI operations.
|
|
55
62
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
</a>
|
|
63
|
+
[](https://kaggle.com/kernels/welcome?src=https://github.com/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb)
|
|
64
|
+
<a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
59
65
|
|
|
60
|
-
## Installation
|
|
61
66
|
|
|
62
|
-
|
|
67
|
+
## 💾 Installation
|
|
68
|
+
Pixeltable works with Python 3.9, 3.10, 3.11, or 3.12 running on Linux, MacOS, or Windows.
|
|
63
69
|
|
|
64
70
|
```
|
|
65
71
|
pip install pixeltable
|
|
@@ -78,14 +84,6 @@ guide. Then, check out the
|
|
|
78
84
|
[Pixeltable Basics](https://pixeltable.github.io/pixeltable/tutorials/pixeltable-basics/)
|
|
79
85
|
tutorial for a tour of its most important features.
|
|
80
86
|
|
|
81
|
-
## What problems does Pixeltable solve?
|
|
82
|
-
|
|
83
|
-
Today’s solutions for AI app development require extensive custom coding and infrastructure
|
|
84
|
-
plumbing. Tracking lineage and versions between and across data transformations, models, and
|
|
85
|
-
deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
|
|
86
|
-
a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
|
|
87
|
-
building and productionizing AI applications.
|
|
88
|
-
|
|
89
87
|
## Why should you use Pixeltable?
|
|
90
88
|
|
|
91
89
|
- It gives you transparency and reproducibility
|
|
@@ -119,3 +117,12 @@ get cost projections before adding new data and new augmentations.
|
|
|
119
117
|
* Rely on Pixeltable's automatic versioning and snapshot functionality to protect against regressions
|
|
120
118
|
and to ensure reproducibility.
|
|
121
119
|
|
|
120
|
+
## Contributions & Feedback
|
|
121
|
+
|
|
122
|
+
Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
|
|
123
|
+
</br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
|
|
124
|
+
|
|
125
|
+
## :classical_building: License
|
|
126
|
+
|
|
127
|
+
This library is licensed under the Apache 2.0 License.
|
|
128
|
+
|
|
@@ -1,32 +1,37 @@
|
|
|
1
|
+
<div align="center">
|
|
1
2
|
<img src="docs/pixeltable-banner.png" width="45%"/>
|
|
2
3
|
|
|
3
|
-
#
|
|
4
|
+
# Unifying Data, Models, and Orchestration for AI Products
|
|
4
5
|
|
|
5
6
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
6
7
|
|
|
7
8
|

|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
data plumbing.
|
|
10
|
+
[Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.github.io/pixeltable/)
|
|
11
|
+
</div>
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
It brings together data storage, versioning, and indexing with orchestration and model
|
|
15
|
-
versioning under a declarative table interface, with transformations, model inference,
|
|
16
|
-
and custom logic represented as computed columns.
|
|
13
|
+
Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
|
|
17
14
|
|
|
18
|
-
##
|
|
15
|
+
## What problems does Pixeltable solve?
|
|
16
|
+
|
|
17
|
+
Today’s solutions for AI app development require extensive custom coding and infrastructure
|
|
18
|
+
plumbing. Tracking lineage and versions between and across data transformations, models, and
|
|
19
|
+
deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
|
|
20
|
+
a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
|
|
21
|
+
building and productionizing AI applications.
|
|
22
|
+
|
|
23
|
+
## ⚡Quick Start
|
|
24
|
+
Learn the basics of Pixeltable through interactive examples. View the notebooks on Google Colab or Kaggle, for free.
|
|
19
25
|
|
|
20
|
-
|
|
21
|
-
|
|
26
|
+
### Pixeltable Basics
|
|
27
|
+
In this tutorial, we'll survey how to create tables, populate them with data, and enhance them with built-in and user-defined transformations and AI operations.
|
|
22
28
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
</a>
|
|
29
|
+
[](https://kaggle.com/kernels/welcome?src=https://github.com/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb)
|
|
30
|
+
<a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
26
31
|
|
|
27
|
-
## Installation
|
|
28
32
|
|
|
29
|
-
|
|
33
|
+
## 💾 Installation
|
|
34
|
+
Pixeltable works with Python 3.9, 3.10, 3.11, or 3.12 running on Linux, MacOS, or Windows.
|
|
30
35
|
|
|
31
36
|
```
|
|
32
37
|
pip install pixeltable
|
|
@@ -45,14 +50,6 @@ guide. Then, check out the
|
|
|
45
50
|
[Pixeltable Basics](https://pixeltable.github.io/pixeltable/tutorials/pixeltable-basics/)
|
|
46
51
|
tutorial for a tour of its most important features.
|
|
47
52
|
|
|
48
|
-
## What problems does Pixeltable solve?
|
|
49
|
-
|
|
50
|
-
Today’s solutions for AI app development require extensive custom coding and infrastructure
|
|
51
|
-
plumbing. Tracking lineage and versions between and across data transformations, models, and
|
|
52
|
-
deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
|
|
53
|
-
a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
|
|
54
|
-
building and productionizing AI applications.
|
|
55
|
-
|
|
56
53
|
## Why should you use Pixeltable?
|
|
57
54
|
|
|
58
55
|
- It gives you transparency and reproducibility
|
|
@@ -85,3 +82,12 @@ storage.
|
|
|
85
82
|
get cost projections before adding new data and new augmentations.
|
|
86
83
|
* Rely on Pixeltable's automatic versioning and snapshot functionality to protect against regressions
|
|
87
84
|
and to ensure reproducibility.
|
|
85
|
+
|
|
86
|
+
## Contributions & Feedback
|
|
87
|
+
|
|
88
|
+
Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
|
|
89
|
+
</br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
|
|
90
|
+
|
|
91
|
+
## :classical_building: License
|
|
92
|
+
|
|
93
|
+
This library is licensed under the Apache 2.0 License.
|
|
@@ -4,10 +4,8 @@ import logging
|
|
|
4
4
|
from typing import Optional, Union, Callable, Set
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
|
-
from pgvector.sqlalchemy import Vector
|
|
8
7
|
|
|
9
8
|
from pixeltable import exceptions as excs
|
|
10
|
-
from pixeltable.metadata import schema
|
|
11
9
|
from pixeltable.type_system import ColumnType, StringType
|
|
12
10
|
from .globals import is_valid_identifier
|
|
13
11
|
|
|
@@ -20,48 +18,42 @@ class Column:
|
|
|
20
18
|
table/view.
|
|
21
19
|
"""
|
|
22
20
|
def __init__(
|
|
23
|
-
self, name: str, col_type: Optional[ColumnType] = None,
|
|
21
|
+
self, name: Optional[str], col_type: Optional[ColumnType] = None,
|
|
24
22
|
computed_with: Optional[Union['Expr', Callable]] = None,
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
23
|
+
is_pk: bool = False, stored: Optional[bool] = None,
|
|
24
|
+
col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
|
|
25
|
+
schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None
|
|
26
|
+
):
|
|
29
27
|
"""Column constructor.
|
|
30
28
|
|
|
31
29
|
Args:
|
|
32
|
-
name: column name
|
|
30
|
+
name: column name; None for system columns (eg, index columns)
|
|
33
31
|
col_type: column type; can be None if the type can be derived from ``computed_with``
|
|
34
32
|
computed_with: a callable or an Expr object that computes the column value
|
|
35
|
-
|
|
33
|
+
is_pk: if True, this column is part of the primary key
|
|
36
34
|
stored: determines whether a computed column is present in the stored table or recomputed on demand
|
|
37
|
-
indexed: if True, this column has a nearest neighbor index (only valid for image columns)
|
|
38
35
|
col_id: column ID (only used internally)
|
|
39
36
|
|
|
40
37
|
Computed columns: those have a non-None ``computed_with`` argument
|
|
41
|
-
|
|
42
38
|
- when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
|
|
43
39
|
col_type is None
|
|
44
40
|
- when loaded from md store: ``computed_with`` is set and col_type is set
|
|
45
41
|
|
|
46
42
|
``computed_with`` is a Callable:
|
|
47
|
-
|
|
48
43
|
- the callable's parameter names must correspond to existing columns in the table for which this Column
|
|
49
44
|
is being used
|
|
50
45
|
- ``col_type`` needs to be set to the callable's return type
|
|
51
46
|
|
|
52
47
|
``stored`` (only valid for computed image columns):
|
|
53
|
-
|
|
54
48
|
- if True: the column is present in the stored table
|
|
55
49
|
- if False: the column is not present in the stored table and recomputed during a query
|
|
56
50
|
- if None: the system chooses for you (at present, this is always False, but this may change in the future)
|
|
57
|
-
|
|
58
|
-
indexed: only valid for image columns; if true, maintains an NN index for this column
|
|
59
51
|
"""
|
|
60
|
-
if not is_valid_identifier(name):
|
|
52
|
+
if name is not None and not is_valid_identifier(name):
|
|
61
53
|
raise excs.Error(f"Invalid column name: '{name}'")
|
|
62
54
|
self.name = name
|
|
63
55
|
if col_type is None and computed_with is None:
|
|
64
|
-
raise excs.Error(f'Column {name}
|
|
56
|
+
raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
|
|
65
57
|
|
|
66
58
|
self.value_expr: Optional['Expr'] = None
|
|
67
59
|
self.compute_func: Optional[Callable] = None
|
|
@@ -90,35 +82,20 @@ class Column:
|
|
|
90
82
|
self.stored = stored
|
|
91
83
|
self.dependent_cols: Set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
|
|
92
84
|
self.id = col_id
|
|
93
|
-
self.
|
|
85
|
+
self.is_pk = is_pk
|
|
86
|
+
self.schema_version_add = schema_version_add
|
|
87
|
+
self.schema_version_drop = schema_version_drop
|
|
94
88
|
|
|
95
89
|
# column in the stored table for the values of this Column
|
|
96
90
|
self.sa_col: Optional[sql.schema.Column] = None
|
|
91
|
+
self.sa_col_type = sa_col_type
|
|
97
92
|
|
|
98
93
|
# computed cols also have storage columns for the exception string and type
|
|
99
94
|
self.sa_errormsg_col: Optional[sql.schema.Column] = None
|
|
100
95
|
self.sa_errortype_col: Optional[sql.schema.Column] = None
|
|
101
|
-
# indexed columns also have a column for the embeddings
|
|
102
|
-
self.sa_idx_col: Optional[sql.schema.Column] = None
|
|
103
96
|
from .table_version import TableVersion
|
|
104
97
|
self.tbl: Optional[TableVersion] = None # set by owning TableVersion
|
|
105
98
|
|
|
106
|
-
if indexed and not self.col_type.is_image_type():
|
|
107
|
-
raise excs.Error(f'Column {name}: indexed=True requires ImageType')
|
|
108
|
-
self.is_indexed = indexed
|
|
109
|
-
|
|
110
|
-
@classmethod
|
|
111
|
-
def from_md(cls, col_id: int, md: schema.SchemaColumn, tbl: 'TableVersion') -> Column:
|
|
112
|
-
"""Construct a Column from metadata.
|
|
113
|
-
|
|
114
|
-
Leaves out value_expr, because that requires TableVersion.cols to be complete.
|
|
115
|
-
"""
|
|
116
|
-
col = cls(
|
|
117
|
-
md.name, col_type=ColumnType.from_dict(md.col_type), primary_key=md.is_pk,
|
|
118
|
-
stored=md.stored, indexed=md.is_indexed, col_id=col_id)
|
|
119
|
-
col.tbl = tbl
|
|
120
|
-
return col
|
|
121
|
-
|
|
122
99
|
def __hash__(self) -> int:
|
|
123
100
|
assert self.tbl is not None
|
|
124
101
|
return hash((self.tbl.id, self.id))
|
|
@@ -167,26 +144,26 @@ class Column:
|
|
|
167
144
|
"""
|
|
168
145
|
assert self.is_stored
|
|
169
146
|
# all storage columns are nullable (we deal with null errors in Pixeltable directly)
|
|
170
|
-
self.sa_col = sql.Column(
|
|
147
|
+
self.sa_col = sql.Column(
|
|
148
|
+
self.store_name(), self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type,
|
|
149
|
+
nullable=True)
|
|
171
150
|
if self.is_computed or self.col_type.is_media_type():
|
|
172
|
-
self.sa_errormsg_col = sql.Column(self.
|
|
173
|
-
self.sa_errortype_col = sql.Column(self.
|
|
174
|
-
if self.is_indexed:
|
|
175
|
-
self.sa_idx_col = sql.Column(self.index_storage_name(), Vector(512), nullable=True)
|
|
151
|
+
self.sa_errormsg_col = sql.Column(self.errormsg_store_name(), StringType().to_sa_type(), nullable=True)
|
|
152
|
+
self.sa_errortype_col = sql.Column(self.errortype_store_name(), StringType().to_sa_type(), nullable=True)
|
|
176
153
|
|
|
177
|
-
def
|
|
154
|
+
def get_sa_col_type(self) -> sql.sqltypes.TypeEngine:
|
|
155
|
+
return self.col_type.to_sa_type() if self.sa_col_type is None else self.sa_col_type
|
|
156
|
+
|
|
157
|
+
def store_name(self) -> str:
|
|
178
158
|
assert self.id is not None
|
|
179
159
|
assert self.is_stored
|
|
180
160
|
return f'col_{self.id}'
|
|
181
161
|
|
|
182
|
-
def
|
|
183
|
-
return f'{self.
|
|
184
|
-
|
|
185
|
-
def errortype_storage_name(self) -> str:
|
|
186
|
-
return f'{self.storage_name()}_errortype'
|
|
162
|
+
def errormsg_store_name(self) -> str:
|
|
163
|
+
return f'{self.store_name()}_errormsg'
|
|
187
164
|
|
|
188
|
-
def
|
|
189
|
-
return f'{self.
|
|
165
|
+
def errortype_store_name(self) -> str:
|
|
166
|
+
return f'{self.store_name()}_errortype'
|
|
190
167
|
|
|
191
168
|
def __str__(self) -> str:
|
|
192
169
|
return f'{self.name}: {self.col_type}'
|
|
@@ -11,14 +11,17 @@ import pixeltable.type_system as ts
|
|
|
11
11
|
from pixeltable import exceptions as excs
|
|
12
12
|
from pixeltable.env import Env
|
|
13
13
|
from .catalog import Catalog
|
|
14
|
+
from .globals import UpdateStatus
|
|
14
15
|
from .table import Table
|
|
15
16
|
from .table_version import TableVersion
|
|
16
17
|
from .table_version_path import TableVersionPath
|
|
17
18
|
|
|
18
19
|
_logger = logging.getLogger('pixeltable')
|
|
19
20
|
|
|
21
|
+
|
|
20
22
|
class InsertableTable(Table):
|
|
21
23
|
"""A `Table` that allows inserting and deleting rows."""
|
|
24
|
+
|
|
22
25
|
def __init__(self, dir_id: UUID, tbl_version: TableVersion):
|
|
23
26
|
tbl_version_path = TableVersionPath(tbl_version)
|
|
24
27
|
super().__init__(tbl_version.id, dir_id, tbl_version.name, tbl_version_path)
|
|
@@ -42,7 +45,7 @@ class InsertableTable(Table):
|
|
|
42
45
|
col = columns[column_names.index(pk_col)]
|
|
43
46
|
if col.col_type.nullable:
|
|
44
47
|
raise excs.Error(f'Primary key column {pk_col} cannot be nullable')
|
|
45
|
-
col.
|
|
48
|
+
col.is_pk = True
|
|
46
49
|
|
|
47
50
|
with orm.Session(Env.get().engine, future=True) as session:
|
|
48
51
|
_, tbl_version = TableVersion.create(session, dir_id, name, columns, num_retained_versions, comment)
|
|
@@ -62,7 +65,7 @@ class InsertableTable(Table):
|
|
|
62
65
|
@overload
|
|
63
66
|
def insert(self, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any): ...
|
|
64
67
|
|
|
65
|
-
def insert(self, *args, **kwargs) ->
|
|
68
|
+
def insert(self, *args, **kwargs) -> UpdateStatus:
|
|
66
69
|
"""Insert rows into table.
|
|
67
70
|
|
|
68
71
|
To insert multiple rows at a time:
|
|
@@ -161,7 +164,7 @@ class InsertableTable(Table):
|
|
|
161
164
|
msg = str(e)
|
|
162
165
|
raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}')
|
|
163
166
|
|
|
164
|
-
def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) ->
|
|
167
|
+
def delete(self, where: Optional['pixeltable.exprs.Predicate'] = None) -> UpdateStatus:
|
|
165
168
|
"""Delete rows in this table.
|
|
166
169
|
|
|
167
170
|
Args:
|
|
@@ -181,7 +184,7 @@ class InsertableTable(Table):
|
|
|
181
184
|
if where is not None:
|
|
182
185
|
if not isinstance(where, Predicate):
|
|
183
186
|
raise excs.Error(f"'where' argument must be a Predicate, got {type(where)}")
|
|
184
|
-
analysis_info = Planner.analyze(self.
|
|
187
|
+
analysis_info = Planner.analyze(self.tbl_version_path, where)
|
|
185
188
|
if analysis_info.similarity_clause is not None:
|
|
186
189
|
raise excs.Error('nearest() cannot be used with delete()')
|
|
187
190
|
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|