pixeltable 0.2.12__tar.gz → 0.2.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable-0.2.13/PKG-INFO +206 -0
- pixeltable-0.2.13/README.md +168 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/__version__.py +2 -2
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/catalog/insertable_table.py +2 -2
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/catalog/table.py +5 -5
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/catalog/table_version.py +12 -14
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/catalog/view.py +2 -2
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/dataframe.py +7 -6
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exec/expr_eval_node.py +8 -1
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exec/sql_scan_node.py +1 -1
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/__init__.py +0 -1
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/comparison.py +5 -5
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/compound_predicate.py +12 -12
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/expr.py +32 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/in_predicate.py +3 -3
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/is_null.py +5 -5
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/func/aggregate_function.py +10 -4
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/func/callable_function.py +4 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/func/function_registry.py +2 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/functions/globals.py +36 -1
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/functions/huggingface.py +62 -4
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/functions/image.py +17 -0
- pixeltable-0.2.13/pixeltable/functions/string.py +636 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/functions/video.py +26 -8
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/globals.py +3 -3
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/io/globals.py +53 -4
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/io/label_studio.py +42 -2
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/io/pandas.py +18 -7
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/plan.py +6 -6
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/tool/create_test_db_dump.py +1 -1
- pixeltable-0.2.13/pixeltable/tool/doc_plugins/griffe.py +77 -0
- pixeltable-0.2.13/pixeltable/tool/doc_plugins/mkdocstrings.py +6 -0
- pixeltable-0.2.13/pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +135 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/utils/s3.py +1 -1
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pyproject.toml +4 -1
- pixeltable-0.2.12/PKG-INFO +0 -137
- pixeltable-0.2.12/README.md +0 -99
- pixeltable-0.2.12/pixeltable/exprs/predicate.py +0 -44
- pixeltable-0.2.12/pixeltable/functions/string.py +0 -21
- {pixeltable-0.2.12 → pixeltable-0.2.13}/LICENSE +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/__init__.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/catalog/__init__.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/catalog/catalog.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/catalog/column.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/catalog/dir.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/catalog/globals.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/catalog/named_function.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/catalog/path.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/catalog/path_dict.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/catalog/schema_object.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/catalog/table_version_path.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/env.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exceptions.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exec/__init__.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exec/aggregation_node.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exec/cache_prefetch_node.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exec/component_iteration_node.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exec/data_row_batch.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exec/exec_context.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exec/exec_node.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exec/in_memory_data_node.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exec/media_validation_node.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/arithmetic_expr.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/array_slice.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/column_property_ref.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/column_ref.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/data_row.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/expr_set.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/function_call.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/globals.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/image_member_access.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/inline_array.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/inline_dict.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/json_mapper.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/json_path.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/literal.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/object_ref.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/row_builder.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/rowid_ref.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/similarity_expr.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/type_cast.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/exprs/variable.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/ext/__init__.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/ext/functions/whisperx.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/ext/functions/yolox.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/func/__init__.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/func/expr_template_function.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/func/function.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/func/globals.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/func/query_template_function.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/func/signature.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/func/udf.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/functions/__init__.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/functions/eval.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/functions/fireworks.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/functions/openai.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/functions/together.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/functions/util.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/functions/whisper.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/index/__init__.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/index/base.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/index/btree.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/index/embedding_index.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/io/__init__.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/io/external_store.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/io/hf_datasets.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/io/parquet.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/iterators/__init__.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/iterators/base.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/iterators/document.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/iterators/string.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/iterators/video.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/metadata/__init__.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/metadata/converters/convert_10.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/metadata/converters/convert_12.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/metadata/converters/convert_13.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/metadata/converters/convert_14.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/metadata/converters/convert_15.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/metadata/converters/convert_16.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/metadata/converters/convert_17.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/metadata/converters/util.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/metadata/schema.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/store.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/tool/create_test_video.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/tool/embed_udf.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/type_system.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/utils/__init__.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/utils/arrow.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/utils/coco.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/utils/code.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/utils/documents.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/utils/filecache.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/utils/formatter.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/utils/help.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/utils/http_server.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/utils/media_store.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/utils/pytorch.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/utils/sql.py +0 -0
- {pixeltable-0.2.12 → pixeltable-0.2.13}/pixeltable/utils/transactional_directory.py +0 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: pixeltable
|
|
3
|
+
Version: 0.2.13
|
|
4
|
+
Summary: Pixeltable: The Multimodal AI Data Plane
|
|
5
|
+
Author: Pixeltable, Inc.
|
|
6
|
+
Author-email: contact@pixeltable.com
|
|
7
|
+
Requires-Python: >=3.9,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Requires-Dist: av (>=10.0.0)
|
|
14
|
+
Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
|
|
15
|
+
Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
|
|
16
|
+
Requires-Dist: ftfy (>=6.2.0,<7.0.0)
|
|
17
|
+
Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
|
|
18
|
+
Requires-Dist: jmespath (>=1.0.1,<2.0.0)
|
|
19
|
+
Requires-Dist: mistune (>=3.0.2,<4.0.0)
|
|
20
|
+
Requires-Dist: more-itertools (>=10.2,<11.0)
|
|
21
|
+
Requires-Dist: numpy (>=1.25)
|
|
22
|
+
Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
|
|
23
|
+
Requires-Dist: pandas (>=2.0,<3.0)
|
|
24
|
+
Requires-Dist: pgserver (==0.1.4)
|
|
25
|
+
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
26
|
+
Requires-Dist: pillow (>=9.3.0)
|
|
27
|
+
Requires-Dist: psutil (>=5.9.5,<6.0.0)
|
|
28
|
+
Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
|
|
29
|
+
Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
|
|
30
|
+
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
31
|
+
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
32
|
+
Requires-Dist: setuptools (==69.1.1)
|
|
33
|
+
Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
|
|
34
|
+
Requires-Dist: tenacity (>=8.2,<9.0)
|
|
35
|
+
Requires-Dist: tqdm (>=4.64)
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
<div align="center">
|
|
39
|
+
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
|
|
40
|
+
|
|
41
|
+
# Unifying Data, Models, and Orchestration for AI Products
|
|
42
|
+
|
|
43
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
44
|
+

|
|
45
|
+
[]()
|
|
46
|
+
[](https://github.com/pixeltable/pixeltable/actions)
|
|
47
|
+
[](https://pypi.org/project/pixeltable/)
|
|
48
|
+
|
|
49
|
+
[Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/master/docs/release/tutorials)
|
|
50
|
+
</div>
|
|
51
|
+
|
|
52
|
+
Pixeltable is a Python library that lets ML Engineers and Data Scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
|
|
53
|
+
|
|
54
|
+
### What problems does Pixeltable solve?
|
|
55
|
+
|
|
56
|
+
Today’s solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployment is cumbersome.
|
|
57
|
+
|
|
58
|
+
## 💾 Installation
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
pip install pixeltable
|
|
62
|
+
```
|
|
63
|
+
> [!IMPORTANT]
|
|
64
|
+
> Pixeltable is persistent. Unlike in-memory Python libraries such as Pandas, Pixeltable is a database. When working locally or against an hosted version of Pixeltable, use [get_table](https://pixeltable.github.io/pixeltable/api/pixeltable/#pixeltable.get_table) at any time to retrieve an existing table.
|
|
65
|
+
|
|
66
|
+
## 💡 Getting Started
|
|
67
|
+
Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations and AI operations.
|
|
68
|
+
|
|
69
|
+
| Topic | Notebook | Topic | Notebook |
|
|
70
|
+
|:----------|:-----------------|:-------------------------|:---------------------------------:|
|
|
71
|
+
| 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Tables and Data Operations | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
72
|
+
| User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
73
|
+
| Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
74
|
+
| Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Docs-Label Studio-blue" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
75
|
+
|
|
76
|
+
## 🧱 Code Samples
|
|
77
|
+
|
|
78
|
+
### Import media data into Pixeltable (videos, images, audio...)
|
|
79
|
+
```python
|
|
80
|
+
import pixeltable as pxt
|
|
81
|
+
|
|
82
|
+
v = pxt.create_table('external_data.videos', {'video': pxt.VideoType()})
|
|
83
|
+
|
|
84
|
+
prefix = 's3://multimedia-commons/'
|
|
85
|
+
paths = [
|
|
86
|
+
'data/videos/mp4/ffe/ffb/ffeffbef41bbc269810b2a1a888de.mp4',
|
|
87
|
+
'data/videos/mp4/ffe/feb/ffefebb41485539f964760e6115fbc44.mp4',
|
|
88
|
+
'data/videos/mp4/ffe/f73/ffef7384d698b5f70d411c696247169.mp4'
|
|
89
|
+
]
|
|
90
|
+
v.insert({'video': prefix + p} for p in paths)
|
|
91
|
+
```
|
|
92
|
+
Learn how to [work with data in Pixeltable](https://pixeltable.readme.io/docs/working-with-external-files).
|
|
93
|
+
|
|
94
|
+
### Add an object detection model to your workflow
|
|
95
|
+
```python
|
|
96
|
+
table['detections'] = huggingface.detr_for_object_detection(table.input_image, model_id='facebook/detr-resnet-50')
|
|
97
|
+
```
|
|
98
|
+
Learn about computed columns and object detection: [Comparing object detection models](https://pixeltable.readme.io/docs/object-detection-in-videos).
|
|
99
|
+
|
|
100
|
+
### Extend Pixeltable's capabilities with user-defined functions
|
|
101
|
+
```python
|
|
102
|
+
@pxt.udf
|
|
103
|
+
def draw_boxes(img: PIL.Image.Image, boxes: list[list[float]]) -> PIL.Image.Image:
|
|
104
|
+
result = img.copy() # Create a copy of `img`
|
|
105
|
+
d = PIL.ImageDraw.Draw(result)
|
|
106
|
+
for box in boxes:
|
|
107
|
+
d.rectangle(box, width=3) # Draw bounding box rectangles on the copied image
|
|
108
|
+
return result
|
|
109
|
+
```
|
|
110
|
+
Learn more about user-defined functions: [UDFs in Pixeltable](https://pixeltable.readme.io/docs/user-defined-functions-udfs).
|
|
111
|
+
|
|
112
|
+
### Automate data operations with views
|
|
113
|
+
```python
|
|
114
|
+
# In this example, the view is defined by iteration over the chunks of a DocumentSplitter.
|
|
115
|
+
chunks_table = pxt.create_view(
|
|
116
|
+
'rag_demo.chunks',
|
|
117
|
+
documents_table,
|
|
118
|
+
iterator=DocumentSplitter.create(
|
|
119
|
+
document=documents_table.document,
|
|
120
|
+
separators='token_limit', limit=300)
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
Learn how to leverage views to build your [RAG workflow](https://pixeltable.readme.io/docs/document-indexing-and-rag).
|
|
124
|
+
|
|
125
|
+
### Evaluate model performance
|
|
126
|
+
```python
|
|
127
|
+
# The computation of the mAP metric can simply become a query over the evaluation output, aggregated with the mean_ap() function.
|
|
128
|
+
frames_view.select(mean_ap(frames_view.eval_yolox_tiny), mean_ap(frames_view.eval_yolox_m)).show()
|
|
129
|
+
```
|
|
130
|
+
Learn how to leverage Pixeltable for [Model analytics](https://pixeltable.readme.io/docs/object-detection-in-videos).
|
|
131
|
+
|
|
132
|
+
### Working with inference services
|
|
133
|
+
```python
|
|
134
|
+
chat_table = pxt.create_table('together_demo.chat', {'input': pxt.StringType()})
|
|
135
|
+
|
|
136
|
+
# The chat-completions API expects JSON-formatted input:
|
|
137
|
+
messages = [{'role': 'user', 'content': chat_table.input}]
|
|
138
|
+
|
|
139
|
+
# This example shows how additional parameters from the Together API can be used in Pixeltable to customize the model behavior.
|
|
140
|
+
chat_table['output'] = chat_completions(
|
|
141
|
+
messages=messages,
|
|
142
|
+
model='mistralai/Mixtral-8x7B-Instruct-v0.1',
|
|
143
|
+
max_tokens=300,
|
|
144
|
+
stop=['\n'],
|
|
145
|
+
temperature=0.7,
|
|
146
|
+
top_p=0.9,
|
|
147
|
+
top_k=40,
|
|
148
|
+
repetition_penalty=1.1,
|
|
149
|
+
logprobs=1,
|
|
150
|
+
echo=True
|
|
151
|
+
)
|
|
152
|
+
chat_table['response'] = chat_table.output.choices[0].message.content
|
|
153
|
+
|
|
154
|
+
# Start a conversation
|
|
155
|
+
chat_table.insert([
|
|
156
|
+
{'input': 'How many species of felids have been classified?'},
|
|
157
|
+
{'input': 'Can you make me a coffee?'}
|
|
158
|
+
])
|
|
159
|
+
chat_table.select(chat_table.input, chat_table.response).head()
|
|
160
|
+
```
|
|
161
|
+
Learn how to interact with inference services such as [Together AI](https://pixeltable.readme.io/docs/together-ai) in Pixeltable.
|
|
162
|
+
|
|
163
|
+
## ❓ FAQ
|
|
164
|
+
|
|
165
|
+
### What is Pixeltable?
|
|
166
|
+
|
|
167
|
+
Pixeltable unifies data storage, versioning, and indexing with orchestration and model versioning under a declarative table interface, with transformations, model inference, and custom logic represented as computed columns.
|
|
168
|
+
|
|
169
|
+
### What does Pixeltable provide me with? Pixeltable provides:
|
|
170
|
+
|
|
171
|
+
- Data storage and versioning
|
|
172
|
+
- Combined Data and Model Lineage
|
|
173
|
+
- Indexing (e.g. embedding vectors) and Data Retrieval
|
|
174
|
+
- Orchestration of multimodal workloads
|
|
175
|
+
- Incremental updates
|
|
176
|
+
- Code is automatically production-ready
|
|
177
|
+
|
|
178
|
+
### Why should you use Pixeltable?
|
|
179
|
+
|
|
180
|
+
- **It gives you transparency and reproducibility**
|
|
181
|
+
- All generated data is automatically recorded and versioned
|
|
182
|
+
- You will never need to re-run a workload because you lost track of the input data
|
|
183
|
+
- **It saves you money**
|
|
184
|
+
- All data changes are automatically incremental
|
|
185
|
+
- You never need to re-run pipelines from scratch because you’re adding data
|
|
186
|
+
- **It integrates with any existing Python code or libraries**
|
|
187
|
+
- Bring your ever-changing code and workloads
|
|
188
|
+
- You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
|
|
189
|
+
|
|
190
|
+
### What is Pixeltable not providing?
|
|
191
|
+
|
|
192
|
+
- Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.
|
|
193
|
+
- We do not aim to replace your existing AI toolkit, but rather enhance it by streamlining the underlying data infrastructure and orchestration.
|
|
194
|
+
|
|
195
|
+
> [!TIP]
|
|
196
|
+
> Check out the [Integrations](https://pixeltable.readme.io/docs/working-with-openai) section, and feel free to submit a request for additional ones.
|
|
197
|
+
|
|
198
|
+
## 🐛 Contributions & Feedback
|
|
199
|
+
|
|
200
|
+
Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
|
|
201
|
+
</br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
|
|
202
|
+
|
|
203
|
+
## :classical_building: License
|
|
204
|
+
|
|
205
|
+
This library is licensed under the Apache 2.0 License.
|
|
206
|
+
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
|
|
3
|
+
|
|
4
|
+
# Unifying Data, Models, and Orchestration for AI Products
|
|
5
|
+
|
|
6
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
7
|
+

|
|
8
|
+
[]()
|
|
9
|
+
[](https://github.com/pixeltable/pixeltable/actions)
|
|
10
|
+
[](https://pypi.org/project/pixeltable/)
|
|
11
|
+
|
|
12
|
+
[Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/master/docs/release/tutorials)
|
|
13
|
+
</div>
|
|
14
|
+
|
|
15
|
+
Pixeltable is a Python library that lets ML Engineers and Data Scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
|
|
16
|
+
|
|
17
|
+
### What problems does Pixeltable solve?
|
|
18
|
+
|
|
19
|
+
Today’s solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployment is cumbersome.
|
|
20
|
+
|
|
21
|
+
## 💾 Installation
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
pip install pixeltable
|
|
25
|
+
```
|
|
26
|
+
> [!IMPORTANT]
|
|
27
|
+
> Pixeltable is persistent. Unlike in-memory Python libraries such as Pandas, Pixeltable is a database. When working locally or against an hosted version of Pixeltable, use [get_table](https://pixeltable.github.io/pixeltable/api/pixeltable/#pixeltable.get_table) at any time to retrieve an existing table.
|
|
28
|
+
|
|
29
|
+
## 💡 Getting Started
|
|
30
|
+
Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations and AI operations.
|
|
31
|
+
|
|
32
|
+
| Topic | Notebook | Topic | Notebook |
|
|
33
|
+
|:----------|:-----------------|:-------------------------|:---------------------------------:|
|
|
34
|
+
| 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Tables and Data Operations | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
35
|
+
| User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
36
|
+
| Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
37
|
+
| Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Docs-Label Studio-blue" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
38
|
+
|
|
39
|
+
## 🧱 Code Samples
|
|
40
|
+
|
|
41
|
+
### Import media data into Pixeltable (videos, images, audio...)
|
|
42
|
+
```python
|
|
43
|
+
import pixeltable as pxt
|
|
44
|
+
|
|
45
|
+
v = pxt.create_table('external_data.videos', {'video': pxt.VideoType()})
|
|
46
|
+
|
|
47
|
+
prefix = 's3://multimedia-commons/'
|
|
48
|
+
paths = [
|
|
49
|
+
'data/videos/mp4/ffe/ffb/ffeffbef41bbc269810b2a1a888de.mp4',
|
|
50
|
+
'data/videos/mp4/ffe/feb/ffefebb41485539f964760e6115fbc44.mp4',
|
|
51
|
+
'data/videos/mp4/ffe/f73/ffef7384d698b5f70d411c696247169.mp4'
|
|
52
|
+
]
|
|
53
|
+
v.insert({'video': prefix + p} for p in paths)
|
|
54
|
+
```
|
|
55
|
+
Learn how to [work with data in Pixeltable](https://pixeltable.readme.io/docs/working-with-external-files).
|
|
56
|
+
|
|
57
|
+
### Add an object detection model to your workflow
|
|
58
|
+
```python
|
|
59
|
+
table['detections'] = huggingface.detr_for_object_detection(table.input_image, model_id='facebook/detr-resnet-50')
|
|
60
|
+
```
|
|
61
|
+
Learn about computed columns and object detection: [Comparing object detection models](https://pixeltable.readme.io/docs/object-detection-in-videos).
|
|
62
|
+
|
|
63
|
+
### Extend Pixeltable's capabilities with user-defined functions
|
|
64
|
+
```python
|
|
65
|
+
@pxt.udf
|
|
66
|
+
def draw_boxes(img: PIL.Image.Image, boxes: list[list[float]]) -> PIL.Image.Image:
|
|
67
|
+
result = img.copy() # Create a copy of `img`
|
|
68
|
+
d = PIL.ImageDraw.Draw(result)
|
|
69
|
+
for box in boxes:
|
|
70
|
+
d.rectangle(box, width=3) # Draw bounding box rectangles on the copied image
|
|
71
|
+
return result
|
|
72
|
+
```
|
|
73
|
+
Learn more about user-defined functions: [UDFs in Pixeltable](https://pixeltable.readme.io/docs/user-defined-functions-udfs).
|
|
74
|
+
|
|
75
|
+
### Automate data operations with views
|
|
76
|
+
```python
|
|
77
|
+
# In this example, the view is defined by iteration over the chunks of a DocumentSplitter.
|
|
78
|
+
chunks_table = pxt.create_view(
|
|
79
|
+
'rag_demo.chunks',
|
|
80
|
+
documents_table,
|
|
81
|
+
iterator=DocumentSplitter.create(
|
|
82
|
+
document=documents_table.document,
|
|
83
|
+
separators='token_limit', limit=300)
|
|
84
|
+
)
|
|
85
|
+
```
|
|
86
|
+
Learn how to leverage views to build your [RAG workflow](https://pixeltable.readme.io/docs/document-indexing-and-rag).
|
|
87
|
+
|
|
88
|
+
### Evaluate model performance
|
|
89
|
+
```python
|
|
90
|
+
# The computation of the mAP metric can simply become a query over the evaluation output, aggregated with the mean_ap() function.
|
|
91
|
+
frames_view.select(mean_ap(frames_view.eval_yolox_tiny), mean_ap(frames_view.eval_yolox_m)).show()
|
|
92
|
+
```
|
|
93
|
+
Learn how to leverage Pixeltable for [Model analytics](https://pixeltable.readme.io/docs/object-detection-in-videos).
|
|
94
|
+
|
|
95
|
+
### Working with inference services
|
|
96
|
+
```python
|
|
97
|
+
chat_table = pxt.create_table('together_demo.chat', {'input': pxt.StringType()})
|
|
98
|
+
|
|
99
|
+
# The chat-completions API expects JSON-formatted input:
|
|
100
|
+
messages = [{'role': 'user', 'content': chat_table.input}]
|
|
101
|
+
|
|
102
|
+
# This example shows how additional parameters from the Together API can be used in Pixeltable to customize the model behavior.
|
|
103
|
+
chat_table['output'] = chat_completions(
|
|
104
|
+
messages=messages,
|
|
105
|
+
model='mistralai/Mixtral-8x7B-Instruct-v0.1',
|
|
106
|
+
max_tokens=300,
|
|
107
|
+
stop=['\n'],
|
|
108
|
+
temperature=0.7,
|
|
109
|
+
top_p=0.9,
|
|
110
|
+
top_k=40,
|
|
111
|
+
repetition_penalty=1.1,
|
|
112
|
+
logprobs=1,
|
|
113
|
+
echo=True
|
|
114
|
+
)
|
|
115
|
+
chat_table['response'] = chat_table.output.choices[0].message.content
|
|
116
|
+
|
|
117
|
+
# Start a conversation
|
|
118
|
+
chat_table.insert([
|
|
119
|
+
{'input': 'How many species of felids have been classified?'},
|
|
120
|
+
{'input': 'Can you make me a coffee?'}
|
|
121
|
+
])
|
|
122
|
+
chat_table.select(chat_table.input, chat_table.response).head()
|
|
123
|
+
```
|
|
124
|
+
Learn how to interact with inference services such as [Together AI](https://pixeltable.readme.io/docs/together-ai) in Pixeltable.
|
|
125
|
+
|
|
126
|
+
## ❓ FAQ
|
|
127
|
+
|
|
128
|
+
### What is Pixeltable?
|
|
129
|
+
|
|
130
|
+
Pixeltable unifies data storage, versioning, and indexing with orchestration and model versioning under a declarative table interface, with transformations, model inference, and custom logic represented as computed columns.
|
|
131
|
+
|
|
132
|
+
### What does Pixeltable provide me with? Pixeltable provides:
|
|
133
|
+
|
|
134
|
+
- Data storage and versioning
|
|
135
|
+
- Combined Data and Model Lineage
|
|
136
|
+
- Indexing (e.g. embedding vectors) and Data Retrieval
|
|
137
|
+
- Orchestration of multimodal workloads
|
|
138
|
+
- Incremental updates
|
|
139
|
+
- Code is automatically production-ready
|
|
140
|
+
|
|
141
|
+
### Why should you use Pixeltable?
|
|
142
|
+
|
|
143
|
+
- **It gives you transparency and reproducibility**
|
|
144
|
+
- All generated data is automatically recorded and versioned
|
|
145
|
+
- You will never need to re-run a workload because you lost track of the input data
|
|
146
|
+
- **It saves you money**
|
|
147
|
+
- All data changes are automatically incremental
|
|
148
|
+
- You never need to re-run pipelines from scratch because you’re adding data
|
|
149
|
+
- **It integrates with any existing Python code or libraries**
|
|
150
|
+
- Bring your ever-changing code and workloads
|
|
151
|
+
- You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
|
|
152
|
+
|
|
153
|
+
### What is Pixeltable not providing?
|
|
154
|
+
|
|
155
|
+
- Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.
|
|
156
|
+
- We do not aim to replace your existing AI toolkit, but rather enhance it by streamlining the underlying data infrastructure and orchestration.
|
|
157
|
+
|
|
158
|
+
> [!TIP]
|
|
159
|
+
> Check out the [Integrations](https://pixeltable.readme.io/docs/working-with-openai) section, and feel free to submit a request for additional ones.
|
|
160
|
+
|
|
161
|
+
## 🐛 Contributions & Feedback
|
|
162
|
+
|
|
163
|
+
Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
|
|
164
|
+
</br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
|
|
165
|
+
|
|
166
|
+
## :classical_building: License
|
|
167
|
+
|
|
168
|
+
This library is licensed under the Apache 2.0 License.
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.13"
|
|
3
|
+
__version_tuple__ = (0, 2, 13)
|
|
@@ -129,11 +129,11 @@ class InsertableTable(Table):
|
|
|
129
129
|
msg = str(e)
|
|
130
130
|
raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}')
|
|
131
131
|
|
|
132
|
-
def delete(self, where: Optional['pixeltable.exprs.
|
|
132
|
+
def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
|
|
133
133
|
"""Delete rows in this table.
|
|
134
134
|
|
|
135
135
|
Args:
|
|
136
|
-
where: a
|
|
136
|
+
where: a predicate to filter rows to delete.
|
|
137
137
|
|
|
138
138
|
Examples:
|
|
139
139
|
Delete all rows in a table:
|
|
@@ -113,7 +113,7 @@ class Table(SchemaObject):
|
|
|
113
113
|
from pixeltable.dataframe import DataFrame
|
|
114
114
|
return DataFrame(self._tbl_version_path).select(*items, **named_items)
|
|
115
115
|
|
|
116
|
-
def where(self, pred: 'exprs.
|
|
116
|
+
def where(self, pred: 'exprs.Expr') -> 'pixeltable.dataframe.DataFrame':
|
|
117
117
|
"""Return a DataFrame for this table.
|
|
118
118
|
"""
|
|
119
119
|
# local import: avoid circular imports
|
|
@@ -716,13 +716,13 @@ class Table(SchemaObject):
|
|
|
716
716
|
raise NotImplementedError
|
|
717
717
|
|
|
718
718
|
def update(
|
|
719
|
-
self, value_spec: dict[str, Any], where: Optional['pixeltable.exprs.
|
|
719
|
+
self, value_spec: dict[str, Any], where: Optional['pixeltable.exprs.Expr'] = None, cascade: bool = True
|
|
720
720
|
) -> UpdateStatus:
|
|
721
721
|
"""Update rows in this table.
|
|
722
722
|
|
|
723
723
|
Args:
|
|
724
724
|
value_spec: a dictionary mapping column names to literal values or Pixeltable expressions.
|
|
725
|
-
where: a
|
|
725
|
+
where: a predicate to filter rows to update.
|
|
726
726
|
cascade: if True, also update all computed columns that transitively depend on the updated columns.
|
|
727
727
|
|
|
728
728
|
Examples:
|
|
@@ -786,11 +786,11 @@ class Table(SchemaObject):
|
|
|
786
786
|
row_updates.append(col_vals)
|
|
787
787
|
return self._tbl_version.batch_update(row_updates, rowids, cascade)
|
|
788
788
|
|
|
789
|
-
def delete(self, where: Optional['pixeltable.exprs.
|
|
789
|
+
def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
|
|
790
790
|
"""Delete rows in this table.
|
|
791
791
|
|
|
792
792
|
Args:
|
|
793
|
-
where: a
|
|
793
|
+
where: a predicate to filter rows to delete.
|
|
794
794
|
|
|
795
795
|
Examples:
|
|
796
796
|
Delete all rows in a table:
|
|
@@ -678,12 +678,12 @@ class TableVersion:
|
|
|
678
678
|
return result
|
|
679
679
|
|
|
680
680
|
def update(
|
|
681
|
-
self, value_spec: dict[str, Any], where: Optional['exprs.
|
|
681
|
+
self, value_spec: dict[str, Any], where: Optional['exprs.Expr'] = None, cascade: bool = True
|
|
682
682
|
) -> UpdateStatus:
|
|
683
683
|
"""Update rows in this TableVersionPath.
|
|
684
684
|
Args:
|
|
685
685
|
value_spec: a list of (column, value) pairs specifying the columns to update and their new values.
|
|
686
|
-
where: a
|
|
686
|
+
where: a predicate to filter rows to update.
|
|
687
687
|
cascade: if True, also update all computed columns that transitively depend on the updated columns,
|
|
688
688
|
including within views.
|
|
689
689
|
"""
|
|
@@ -694,8 +694,8 @@ class TableVersion:
|
|
|
694
694
|
|
|
695
695
|
update_spec = self._validate_update_spec(value_spec, allow_pk=False, allow_exprs=True)
|
|
696
696
|
if where is not None:
|
|
697
|
-
if not isinstance(where, exprs.
|
|
698
|
-
raise excs.Error(f"'where' argument must be a
|
|
697
|
+
if not isinstance(where, exprs.Expr):
|
|
698
|
+
raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
|
|
699
699
|
analysis_info = Planner.analyze(self.path, where)
|
|
700
700
|
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
701
701
|
if analysis_info.filter is not None:
|
|
@@ -757,7 +757,7 @@ class TableVersion:
|
|
|
757
757
|
|
|
758
758
|
def _update(
|
|
759
759
|
self, conn: sql.engine.Connection, update_targets: dict[Column, 'pixeltable.exprs.Expr'],
|
|
760
|
-
where_clause: Optional['pixeltable.exprs.
|
|
760
|
+
where_clause: Optional['pixeltable.exprs.Expr'] = None, cascade: bool = True,
|
|
761
761
|
show_progress: bool = True
|
|
762
762
|
) -> UpdateStatus:
|
|
763
763
|
from pixeltable.plan import Planner
|
|
@@ -789,8 +789,6 @@ class TableVersion:
|
|
|
789
789
|
raise excs.Error(f'Column {col_name} is computed and cannot be updated')
|
|
790
790
|
if col.is_pk and not allow_pk:
|
|
791
791
|
raise excs.Error(f'Column {col_name} is a primary key column and cannot be updated')
|
|
792
|
-
if col.col_type.is_media_type():
|
|
793
|
-
raise excs.Error(f'Column {col_name} has type image/video/audio/document and cannot be updated')
|
|
794
792
|
|
|
795
793
|
# make sure that the value is compatible with the column type
|
|
796
794
|
try:
|
|
@@ -848,17 +846,17 @@ class TableVersion:
|
|
|
848
846
|
result.cols_with_excs = list(dict.fromkeys(result.cols_with_excs).keys()) # remove duplicates
|
|
849
847
|
return result
|
|
850
848
|
|
|
851
|
-
def delete(self, where: Optional['exprs.
|
|
849
|
+
def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
|
|
852
850
|
"""Delete rows in this table.
|
|
853
851
|
Args:
|
|
854
|
-
where: a
|
|
852
|
+
where: a predicate to filter rows to delete.
|
|
855
853
|
"""
|
|
856
854
|
assert self.is_insertable()
|
|
857
|
-
from pixeltable.exprs import
|
|
855
|
+
from pixeltable.exprs import Expr
|
|
858
856
|
from pixeltable.plan import Planner
|
|
859
857
|
if where is not None:
|
|
860
|
-
if not isinstance(where,
|
|
861
|
-
raise excs.Error(f"'where' argument must be a
|
|
858
|
+
if not isinstance(where, Expr):
|
|
859
|
+
raise excs.Error(f"'where' argument must be a predicate, got {type(where)}")
|
|
862
860
|
analysis_info = Planner.analyze(self.path, where)
|
|
863
861
|
# for now we require that the updated rows can be identified via SQL, rather than via a Python filter
|
|
864
862
|
if analysis_info.filter is not None:
|
|
@@ -872,11 +870,11 @@ class TableVersion:
|
|
|
872
870
|
return status
|
|
873
871
|
|
|
874
872
|
def propagate_delete(
|
|
875
|
-
self, where: Optional['exprs.
|
|
873
|
+
self, where: Optional['exprs.Expr'], base_versions: List[Optional[int]],
|
|
876
874
|
conn: sql.engine.Connection, timestamp: float) -> int:
|
|
877
875
|
"""Delete rows in this table and propagate to views.
|
|
878
876
|
Args:
|
|
879
|
-
where: a
|
|
877
|
+
where: a predicate to filter rows to delete.
|
|
880
878
|
Returns:
|
|
881
879
|
number of deleted rows
|
|
882
880
|
"""
|
|
@@ -51,7 +51,7 @@ class View(Table):
|
|
|
51
51
|
@classmethod
|
|
52
52
|
def create(
|
|
53
53
|
cls, dir_id: UUID, name: str, base: TableVersionPath, schema: Dict[str, Any],
|
|
54
|
-
predicate: 'pxt.exprs.
|
|
54
|
+
predicate: 'pxt.exprs.Expr', is_snapshot: bool, num_retained_versions: int, comment: str,
|
|
55
55
|
iterator_cls: Optional[Type[ComponentIterator]], iterator_args: Optional[Dict]
|
|
56
56
|
) -> View:
|
|
57
57
|
columns = cls._create_columns(schema)
|
|
@@ -213,5 +213,5 @@ class View(Table):
|
|
|
213
213
|
) -> UpdateStatus:
|
|
214
214
|
raise excs.Error(f'{self.display_name()} {self._name!r}: cannot insert into view')
|
|
215
215
|
|
|
216
|
-
def delete(self, where: Optional['pixeltable.exprs.
|
|
216
|
+
def delete(self, where: Optional['pixeltable.exprs.Expr'] = None) -> UpdateStatus:
|
|
217
217
|
raise excs.Error(f'{self.display_name()} {self._name!r}: cannot delete from view')
|
|
@@ -153,7 +153,7 @@ class DataFrame:
|
|
|
153
153
|
self,
|
|
154
154
|
tbl: catalog.TableVersionPath,
|
|
155
155
|
select_list: Optional[List[Tuple[exprs.Expr, Optional[str]]]] = None,
|
|
156
|
-
where_clause: Optional[exprs.
|
|
156
|
+
where_clause: Optional[exprs.Expr] = None,
|
|
157
157
|
group_by_clause: Optional[List[exprs.Expr]] = None,
|
|
158
158
|
grouping_tbl: Optional[catalog.TableVersion] = None,
|
|
159
159
|
order_by_clause: Optional[List[Tuple[exprs.Expr, bool]]] = None, # List[(expr, asc)]
|
|
@@ -530,7 +530,11 @@ class DataFrame:
|
|
|
530
530
|
limit=self.limit_val,
|
|
531
531
|
)
|
|
532
532
|
|
|
533
|
-
def where(self, pred: exprs.
|
|
533
|
+
def where(self, pred: exprs.Expr) -> DataFrame:
|
|
534
|
+
if not isinstance(pred, exprs.Expr):
|
|
535
|
+
raise excs.Error(f'Where() requires a Pixeltable expression, but instead got {type(pred)}')
|
|
536
|
+
if not pred.col_type.is_bool_type():
|
|
537
|
+
raise excs.Error(f'Where(): expression needs to return bool, but instead returns {pred.col_type}')
|
|
534
538
|
return DataFrame(
|
|
535
539
|
self.tbl,
|
|
536
540
|
select_list=self.select_list,
|
|
@@ -628,12 +632,9 @@ class DataFrame:
|
|
|
628
632
|
def __getitem__(self, index: object) -> DataFrame:
|
|
629
633
|
"""
|
|
630
634
|
Allowed:
|
|
631
|
-
- [<Predicate>]: filter operation
|
|
632
635
|
- [List[Expr]]/[Tuple[Expr]]: setting the select list
|
|
633
636
|
- [Expr]: setting a single-col select list
|
|
634
637
|
"""
|
|
635
|
-
if isinstance(index, exprs.Predicate):
|
|
636
|
-
return self.where(index)
|
|
637
638
|
if isinstance(index, tuple):
|
|
638
639
|
index = list(index)
|
|
639
640
|
if isinstance(index, exprs.Expr):
|
|
@@ -668,7 +669,7 @@ class DataFrame:
|
|
|
668
669
|
tbl = catalog.TableVersionPath.from_dict(d['tbl'])
|
|
669
670
|
select_list = [(exprs.Expr.from_dict(e), name) for e, name in d['select_list']] \
|
|
670
671
|
if d['select_list'] is not None else None
|
|
671
|
-
where_clause = exprs.
|
|
672
|
+
where_clause = exprs.Expr.from_dict(d['where_clause']) \
|
|
672
673
|
if d['where_clause'] is not None else None
|
|
673
674
|
group_by_clause = [exprs.Expr.from_dict(e) for e in d['group_by_clause']] \
|
|
674
675
|
if d['group_by_clause'] is not None else None
|
|
@@ -50,7 +50,14 @@ class ExprEvalNode(ExecNode):
|
|
|
50
50
|
|
|
51
51
|
def _open(self) -> None:
|
|
52
52
|
warnings.simplefilter("ignore", category=TqdmWarning)
|
|
53
|
-
|
|
53
|
+
# This is a temporary hack. When B-tree indices on string columns were implemented (via computed columns
|
|
54
|
+
# that invoke the `BtreeIndex.str_filter` udf), it resulted in frivolous progress bars appearing on every
|
|
55
|
+
# insertion. This special-cases the `str_filter` call to suppress the corresponding progress bar.
|
|
56
|
+
# TODO(aaron-siegel) Remove this hack once we clean up progress bars more generally.
|
|
57
|
+
is_str_filter_node = all(
|
|
58
|
+
isinstance(expr, exprs.FunctionCall) and expr.fn.name == 'str_filter' for expr in self.output_exprs
|
|
59
|
+
)
|
|
60
|
+
if self.ctx.show_pbar and not is_str_filter_node:
|
|
54
61
|
self.pbar = tqdm(
|
|
55
62
|
total=len(self.target_exprs) * self.ctx.num_rows,
|
|
56
63
|
desc='Computing cells',
|
|
@@ -19,7 +19,7 @@ class SqlScanNode(ExecNode):
|
|
|
19
19
|
def __init__(
|
|
20
20
|
self, tbl: catalog.TableVersionPath, row_builder: exprs.RowBuilder,
|
|
21
21
|
select_list: Iterable[exprs.Expr],
|
|
22
|
-
where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.
|
|
22
|
+
where_clause: Optional[exprs.Expr] = None, filter: Optional[exprs.Expr] = None,
|
|
23
23
|
order_by_items: Optional[List[Tuple[exprs.Expr, bool]]] = None,
|
|
24
24
|
limit: int = 0, set_pk: bool = False, exact_version_only: Optional[List[catalog.TableVersion]] = None
|
|
25
25
|
):
|
|
@@ -17,7 +17,6 @@ from .json_mapper import JsonMapper
|
|
|
17
17
|
from .json_path import RELATIVE_PATH_ROOT, JsonPath
|
|
18
18
|
from .literal import Literal
|
|
19
19
|
from .object_ref import ObjectRef
|
|
20
|
-
from .predicate import Predicate
|
|
21
20
|
from .row_builder import RowBuilder, ColumnSlotIdx, ExecProfile
|
|
22
21
|
from .rowid_ref import RowidRef
|
|
23
22
|
from .similarity_expr import SimilarityExpr
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional, List, Any, Dict
|
|
3
|
+
from typing import Optional, List, Any, Dict
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
6
6
|
|
|
@@ -9,15 +9,15 @@ from .data_row import DataRow
|
|
|
9
9
|
from .expr import Expr
|
|
10
10
|
from .globals import ComparisonOperator
|
|
11
11
|
from .literal import Literal
|
|
12
|
-
from .predicate import Predicate
|
|
13
12
|
from .row_builder import RowBuilder
|
|
14
13
|
import pixeltable.exceptions as excs
|
|
15
14
|
import pixeltable.index as index
|
|
15
|
+
import pixeltable.type_system as ts
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
class Comparison(
|
|
18
|
+
class Comparison(Expr):
|
|
19
19
|
def __init__(self, operator: ComparisonOperator, op1: Expr, op2: Expr):
|
|
20
|
-
super().__init__()
|
|
20
|
+
super().__init__(ts.BoolType())
|
|
21
21
|
self.operator = operator
|
|
22
22
|
|
|
23
23
|
# if this is a comparison of a column to a literal (ie, could be used as a search argument in an index lookup),
|
|
@@ -50,7 +50,7 @@ class Comparison(Predicate):
|
|
|
50
50
|
def _equals(self, other: Comparison) -> bool:
|
|
51
51
|
return self.operator == other.operator
|
|
52
52
|
|
|
53
|
-
def _id_attrs(self) ->
|
|
53
|
+
def _id_attrs(self) -> list[tuple[str, Any]]:
|
|
54
54
|
return super()._id_attrs() + [('operator', self.operator.value)]
|
|
55
55
|
|
|
56
56
|
@property
|