pixeltable 0.2.6__tar.gz → 0.2.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable-0.2.8/PKG-INFO +137 -0
- pixeltable-0.2.8/README.md +99 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/__init__.py +3 -1
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/__version__.py +2 -2
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/column.py +8 -2
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/insertable_table.py +32 -17
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/table.py +167 -12
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/table_version.py +185 -106
- pixeltable-0.2.8/pixeltable/datatransfer/__init__.py +1 -0
- pixeltable-0.2.8/pixeltable/datatransfer/label_studio.py +452 -0
- pixeltable-0.2.8/pixeltable/datatransfer/remote.py +85 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/env.py +148 -69
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/column_ref.py +2 -2
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/comparison.py +39 -1
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/data_row.py +7 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/expr.py +11 -12
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/function_call.py +0 -3
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/globals.py +14 -2
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/similarity_expr.py +5 -3
- pixeltable-0.2.8/pixeltable/ext/functions/whisperx.py +30 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/ext/functions/yolox.py +16 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/aggregate_function.py +2 -2
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/expr_template_function.py +3 -1
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/udf.py +2 -2
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/fireworks.py +9 -4
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/huggingface.py +25 -1
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/openai.py +15 -10
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/together.py +11 -6
- pixeltable-0.2.8/pixeltable/functions/util.py +9 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/video.py +46 -8
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/globals.py +20 -2
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/index/__init__.py +1 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/index/base.py +6 -1
- pixeltable-0.2.8/pixeltable/index/btree.py +54 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/index/embedding_index.py +4 -1
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/io/__init__.py +1 -0
- pixeltable-0.2.8/pixeltable/io/globals.py +58 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/iterators/base.py +4 -4
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/iterators/document.py +26 -15
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/iterators/video.py +9 -1
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/metadata/__init__.py +2 -2
- pixeltable-0.2.8/pixeltable/metadata/converters/convert_14.py +13 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/metadata/schema.py +9 -6
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/plan.py +9 -5
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/store.py +14 -21
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/tool/create_test_db_dump.py +14 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/type_system.py +14 -4
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/coco.py +94 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pyproject.toml +27 -14
- pixeltable-0.2.6/PKG-INFO +0 -131
- pixeltable-0.2.6/README.md +0 -93
- pixeltable-0.2.6/pixeltable/func/nos_function.py +0 -202
- pixeltable-0.2.6/pixeltable/functions/util.py +0 -52
- pixeltable-0.2.6/pixeltable/utils/clip.py +0 -18
- {pixeltable-0.2.6 → pixeltable-0.2.8}/LICENSE +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/__init__.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/catalog.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/dir.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/globals.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/named_function.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/path.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/path_dict.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/schema_object.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/table_version_path.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/view.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/dataframe.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exceptions.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/__init__.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/aggregation_node.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/cache_prefetch_node.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/component_iteration_node.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/data_row_batch.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/exec_context.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/exec_node.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/expr_eval_node.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/in_memory_data_node.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/media_validation_node.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/sql_scan_node.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/__init__.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/arithmetic_expr.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/array_slice.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/column_property_ref.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/compound_predicate.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/expr_set.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/image_member_access.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/in_predicate.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/inline_array.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/inline_dict.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/is_null.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/json_mapper.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/json_path.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/literal.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/object_ref.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/predicate.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/row_builder.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/rowid_ref.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/type_cast.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/variable.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/ext/__init__.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/__init__.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/callable_function.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/function.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/function_registry.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/globals.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/signature.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/__init__.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/eval.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/image.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/pil/image.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/string.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/io/hf_datasets.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/io/pandas.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/io/parquet.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/iterators/__init__.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/metadata/converters/convert_10.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/metadata/converters/convert_12.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/metadata/converters/convert_13.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/tool/create_test_video.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/__init__.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/arrow.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/documents.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/filecache.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/help.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/http_server.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/media_store.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/pytorch.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/s3.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/sql.py +0 -0
- {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/transactional_directory.py +0 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: pixeltable
|
|
3
|
+
Version: 0.2.8
|
|
4
|
+
Summary: Pixeltable: The Multimodal AI Data Plane
|
|
5
|
+
Author: Marcel Kornacker
|
|
6
|
+
Author-email: marcelk@gmail.com
|
|
7
|
+
Requires-Python: >=3.9,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Requires-Dist: av (>=10.0.0)
|
|
14
|
+
Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
|
|
15
|
+
Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
|
|
16
|
+
Requires-Dist: ftfy (>=6.2.0,<7.0.0)
|
|
17
|
+
Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
|
|
18
|
+
Requires-Dist: jmespath (>=1.0.1,<2.0.0)
|
|
19
|
+
Requires-Dist: mistune (>=3.0.2,<4.0.0)
|
|
20
|
+
Requires-Dist: more-itertools (>=10.2,<11.0)
|
|
21
|
+
Requires-Dist: numpy (>=1.25)
|
|
22
|
+
Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
|
|
23
|
+
Requires-Dist: pandas (>=2.0,<3.0)
|
|
24
|
+
Requires-Dist: pgserver (==0.1.4)
|
|
25
|
+
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
26
|
+
Requires-Dist: pillow (>=9.3.0)
|
|
27
|
+
Requires-Dist: psutil (>=5.9.5,<6.0.0)
|
|
28
|
+
Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
|
|
29
|
+
Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
|
|
30
|
+
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
31
|
+
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
32
|
+
Requires-Dist: setuptools (==69.1.1)
|
|
33
|
+
Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
|
|
34
|
+
Requires-Dist: tenacity (>=8.2,<9.0)
|
|
35
|
+
Requires-Dist: tqdm (>=4.64)
|
|
36
|
+
Description-Content-Type: text/markdown
|
|
37
|
+
|
|
38
|
+
<div align="center">
|
|
39
|
+
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
|
|
40
|
+
|
|
41
|
+
# Unifying Data, Models, and Orchestration for AI Products
|
|
42
|
+
|
|
43
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
44
|
+

|
|
45
|
+
[]()
|
|
46
|
+
[](https://github.com/pixeltable/pixeltable/actions)
|
|
47
|
+
[](https://pypi.org/project/pixeltable/)
|
|
48
|
+
|
|
49
|
+
[Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/master/docs/release/tutorials)
|
|
50
|
+
</div>
|
|
51
|
+
|
|
52
|
+
Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
|
|
53
|
+
|
|
54
|
+
## What problems does Pixeltable solve?
|
|
55
|
+
|
|
56
|
+
Today’s solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployment is cumbersome. With Pixeltable you can store, transform, index, and iterate on your data within the same table interface, whether it's text, images, embeddings, or even video. Built-in lineage and versioning ensure transparency and reproducibility, while the development-to-production mirror streamlines deployment.
|
|
57
|
+
|
|
58
|
+
## 💾 Installation
|
|
59
|
+
|
|
60
|
+
```python
|
|
61
|
+
%pip install pixeltable
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
To verify that it's working:
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
import pixeltable as pxt
|
|
68
|
+
pxt.init()
|
|
69
|
+
```
|
|
70
|
+
> [!NOTE]
|
|
71
|
+
> Check out the [Pixeltable Basics](https://pixeltable.readme.io/docs/pixeltable-basics) tutorial for a tour of its most important features.
|
|
72
|
+
|
|
73
|
+
## 💡 Get Started
|
|
74
|
+
Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations and AI operations.
|
|
75
|
+
|
|
76
|
+
| Topic | Notebook | API |
|
|
77
|
+
|:--------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------:|
|
|
78
|
+
| Get Started | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [](https://pixeltable.github.io/pixeltable/api/pixeltable/) |
|
|
79
|
+
| User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
|
|
80
|
+
| Comparing Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#frame-extraction-for-video-data) |
|
|
81
|
+
| Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
|
|
82
|
+
| Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#inserting-data-into-a-table) |
|
|
83
|
+
|
|
84
|
+
## ❓ FAQ
|
|
85
|
+
|
|
86
|
+
### What does Pixeltable provide me with? Pixeltable provides:
|
|
87
|
+
|
|
88
|
+
- Data storage and versioning
|
|
89
|
+
- Combined Data and Model Lineage
|
|
90
|
+
- Indexing (e.g. embedding vectors) and Data Retrieval
|
|
91
|
+
- Orchestration of multimodal workloads
|
|
92
|
+
- Incremental updates
|
|
93
|
+
- Code is automatically production-ready
|
|
94
|
+
|
|
95
|
+
### Why should you use Pixeltable?
|
|
96
|
+
|
|
97
|
+
- **It gives you transparency and reproducibility**
|
|
98
|
+
- All generated data is automatically recorded and versioned
|
|
99
|
+
- You will never need to re-run a workload because you lost track of the input data
|
|
100
|
+
- **It saves you money**
|
|
101
|
+
- All data changes are automatically incremental
|
|
102
|
+
- You never need to re-run pipelines from scratch because you’re adding data
|
|
103
|
+
- **It integrates with any existing Python code or libraries**
|
|
104
|
+
- Bring your ever-changing code and workloads
|
|
105
|
+
- You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
|
|
106
|
+
|
|
107
|
+
### What is Pixeltable not providing?
|
|
108
|
+
|
|
109
|
+
- Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.
|
|
110
|
+
- We do not aim to replace your existing AI toolkit, but rather enhance it by streamlining the underlying data infrastructure and orchestration.
|
|
111
|
+
|
|
112
|
+
> [!TIP]
|
|
113
|
+
> Check out the [Integrations](https://pixeltable.readme.io/docs/working-with-openai) section, and feel free to submit a request for additional ones.
|
|
114
|
+
|
|
115
|
+
## 📙 Example of Use Cases
|
|
116
|
+
|
|
117
|
+
- **Interact with video data at the frame level** without having to think about frame extraction, intermediate file storage, or storage space explosion.
|
|
118
|
+
- **Augment your data incrementally and interactively with built-in functions and UDFs**, such as image transformations, model inference, and visualizations, without having to think about data pipelines, incremental updates, or capturing function output.
|
|
119
|
+
- **Interact with all the data relevant to your AI application** (video, images, documents, audio, structured data, JSON) through a simple dataframe-style API directly in Python. This includes:
|
|
120
|
+
- similarity search on embeddings, supported by high-dimensional vector indexing;
|
|
121
|
+
- path expressions and transformations on JSON data;
|
|
122
|
+
- PIL and OpenCV image operations;
|
|
123
|
+
- assembling frames into videos.
|
|
124
|
+
- **Perform keyword and image similarity search at the video frame level** without having to worry about frame storage.
|
|
125
|
+
- **Access all Pixeltable-resident data directly as a PyTorch dataset** in your training scripts.
|
|
126
|
+
- **Understand the compute and storage costs of your data at the granularity** of individual augmentations and get cost projections before adding new data and new augmentations.
|
|
127
|
+
- **Rely on Pixeltable's automatic versioning and snapshot functionality** to protect against regressions and to ensure reproducibility.
|
|
128
|
+
|
|
129
|
+
## 🐛 Contributions & Feedback
|
|
130
|
+
|
|
131
|
+
Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
|
|
132
|
+
</br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
|
|
133
|
+
|
|
134
|
+
## :classical_building: License
|
|
135
|
+
|
|
136
|
+
This library is licensed under the Apache 2.0 License.
|
|
137
|
+
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
|
|
3
|
+
|
|
4
|
+
# Unifying Data, Models, and Orchestration for AI Products
|
|
5
|
+
|
|
6
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
7
|
+

|
|
8
|
+
[]()
|
|
9
|
+
[](https://github.com/pixeltable/pixeltable/actions)
|
|
10
|
+
[](https://pypi.org/project/pixeltable/)
|
|
11
|
+
|
|
12
|
+
[Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/master/docs/release/tutorials)
|
|
13
|
+
</div>
|
|
14
|
+
|
|
15
|
+
Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
|
|
16
|
+
|
|
17
|
+
## What problems does Pixeltable solve?
|
|
18
|
+
|
|
19
|
+
Today’s solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployment is cumbersome. With Pixeltable you can store, transform, index, and iterate on your data within the same table interface, whether it's text, images, embeddings, or even video. Built-in lineage and versioning ensure transparency and reproducibility, while the development-to-production mirror streamlines deployment.
|
|
20
|
+
|
|
21
|
+
## 💾 Installation
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
%pip install pixeltable
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
To verify that it's working:
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import pixeltable as pxt
|
|
31
|
+
pxt.init()
|
|
32
|
+
```
|
|
33
|
+
> [!NOTE]
|
|
34
|
+
> Check out the [Pixeltable Basics](https://pixeltable.readme.io/docs/pixeltable-basics) tutorial for a tour of its most important features.
|
|
35
|
+
|
|
36
|
+
## 💡 Get Started
|
|
37
|
+
Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations and AI operations.
|
|
38
|
+
|
|
39
|
+
| Topic | Notebook | API |
|
|
40
|
+
|:--------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------:|
|
|
41
|
+
| Get Started | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [](https://pixeltable.github.io/pixeltable/api/pixeltable/) |
|
|
42
|
+
| User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
|
|
43
|
+
| Comparing Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#frame-extraction-for-video-data) |
|
|
44
|
+
| Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
|
|
45
|
+
| Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#inserting-data-into-a-table) |
|
|
46
|
+
|
|
47
|
+
## ❓ FAQ
|
|
48
|
+
|
|
49
|
+
### What does Pixeltable provide me with? Pixeltable provides:
|
|
50
|
+
|
|
51
|
+
- Data storage and versioning
|
|
52
|
+
- Combined Data and Model Lineage
|
|
53
|
+
- Indexing (e.g. embedding vectors) and Data Retrieval
|
|
54
|
+
- Orchestration of multimodal workloads
|
|
55
|
+
- Incremental updates
|
|
56
|
+
- Code is automatically production-ready
|
|
57
|
+
|
|
58
|
+
### Why should you use Pixeltable?
|
|
59
|
+
|
|
60
|
+
- **It gives you transparency and reproducibility**
|
|
61
|
+
- All generated data is automatically recorded and versioned
|
|
62
|
+
- You will never need to re-run a workload because you lost track of the input data
|
|
63
|
+
- **It saves you money**
|
|
64
|
+
- All data changes are automatically incremental
|
|
65
|
+
- You never need to re-run pipelines from scratch because you’re adding data
|
|
66
|
+
- **It integrates with any existing Python code or libraries**
|
|
67
|
+
- Bring your ever-changing code and workloads
|
|
68
|
+
- You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
|
|
69
|
+
|
|
70
|
+
### What is Pixeltable not providing?
|
|
71
|
+
|
|
72
|
+
- Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.
|
|
73
|
+
- We do not aim to replace your existing AI toolkit, but rather enhance it by streamlining the underlying data infrastructure and orchestration.
|
|
74
|
+
|
|
75
|
+
> [!TIP]
|
|
76
|
+
> Check out the [Integrations](https://pixeltable.readme.io/docs/working-with-openai) section, and feel free to submit a request for additional ones.
|
|
77
|
+
|
|
78
|
+
## 📙 Example of Use Cases
|
|
79
|
+
|
|
80
|
+
- **Interact with video data at the frame level** without having to think about frame extraction, intermediate file storage, or storage space explosion.
|
|
81
|
+
- **Augment your data incrementally and interactively with built-in functions and UDFs**, such as image transformations, model inference, and visualizations, without having to think about data pipelines, incremental updates, or capturing function output.
|
|
82
|
+
- **Interact with all the data relevant to your AI application** (video, images, documents, audio, structured data, JSON) through a simple dataframe-style API directly in Python. This includes:
|
|
83
|
+
- similarity search on embeddings, supported by high-dimensional vector indexing;
|
|
84
|
+
- path expressions and transformations on JSON data;
|
|
85
|
+
- PIL and OpenCV image operations;
|
|
86
|
+
- assembling frames into videos.
|
|
87
|
+
- **Perform keyword and image similarity search at the video frame level** without having to worry about frame storage.
|
|
88
|
+
- **Access all Pixeltable-resident data directly as a PyTorch dataset** in your training scripts.
|
|
89
|
+
- **Understand the compute and storage costs of your data at the granularity** of individual augmentations and get cost projections before adding new data and new augmentations.
|
|
90
|
+
- **Rely on Pixeltable's automatic versioning and snapshot functionality** to protect against regressions and to ensure reproducibility.
|
|
91
|
+
|
|
92
|
+
## 🐛 Contributions & Feedback
|
|
93
|
+
|
|
94
|
+
Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
|
|
95
|
+
</br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
|
|
96
|
+
|
|
97
|
+
## :classical_building: License
|
|
98
|
+
|
|
99
|
+
This library is licensed under the Apache 2.0 License.
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from .catalog import Column, Table, InsertableTable, View
|
|
2
2
|
from .dataframe import DataFrame
|
|
3
|
+
from .datatransfer import Remote
|
|
4
|
+
from .catalog import Column, Table, InsertableTable, View
|
|
3
5
|
from .exceptions import Error, Error
|
|
4
6
|
from .exprs import RELATIVE_PATH_ROOT
|
|
5
7
|
from .func import Function, udf, uda, Aggregator, expr_udf
|
|
@@ -21,7 +23,7 @@ from .type_system import (
|
|
|
21
23
|
from .utils.help import help
|
|
22
24
|
|
|
23
25
|
# noinspection PyUnresolvedReferences
|
|
24
|
-
from . import functions, io
|
|
26
|
+
from . import functions, io, iterators
|
|
25
27
|
from .__version__ import __version__, __version_tuple__
|
|
26
28
|
|
|
27
29
|
__all__ = [
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.8"
|
|
3
|
+
__version_tuple__ = (0, 2, 8)
|
|
@@ -22,7 +22,8 @@ class Column:
|
|
|
22
22
|
computed_with: Optional[Union['Expr', Callable]] = None,
|
|
23
23
|
is_pk: bool = False, stored: Optional[bool] = None,
|
|
24
24
|
col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
|
|
25
|
-
schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None
|
|
25
|
+
schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
|
|
26
|
+
records_errors: Optional[bool] = None
|
|
26
27
|
):
|
|
27
28
|
"""Column constructor.
|
|
28
29
|
|
|
@@ -80,12 +81,14 @@ class Column:
|
|
|
80
81
|
assert self.col_type is not None
|
|
81
82
|
|
|
82
83
|
self.stored = stored
|
|
83
|
-
self.dependent_cols:
|
|
84
|
+
self.dependent_cols: set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
|
|
84
85
|
self.id = col_id
|
|
85
86
|
self.is_pk = is_pk
|
|
86
87
|
self.schema_version_add = schema_version_add
|
|
87
88
|
self.schema_version_drop = schema_version_drop
|
|
88
89
|
|
|
90
|
+
self._records_errors = records_errors
|
|
91
|
+
|
|
89
92
|
# column in the stored table for the values of this Column
|
|
90
93
|
self.sa_col: Optional[sql.schema.Column] = None
|
|
91
94
|
self.sa_col_type = sa_col_type
|
|
@@ -131,6 +134,9 @@ class Column:
|
|
|
131
134
|
@property
|
|
132
135
|
def records_errors(self) -> bool:
|
|
133
136
|
"""True if this column also stores error information."""
|
|
137
|
+
# default: record errors for computed and media columns
|
|
138
|
+
if self._records_errors is not None:
|
|
139
|
+
return self._records_errors
|
|
134
140
|
return self.is_stored and (self.is_computed or self.col_type.is_media_type())
|
|
135
141
|
|
|
136
142
|
def source(self) -> None:
|
|
@@ -60,25 +60,29 @@ class InsertableTable(Table):
|
|
|
60
60
|
return tbl
|
|
61
61
|
|
|
62
62
|
@overload
|
|
63
|
-
def insert(
|
|
63
|
+
def insert(
|
|
64
|
+
self, rows: Iterable[Dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True
|
|
65
|
+
) -> UpdateStatus: ...
|
|
64
66
|
|
|
65
67
|
@overload
|
|
66
|
-
def insert(self, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any): ...
|
|
68
|
+
def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
|
|
67
69
|
|
|
68
|
-
def insert(
|
|
69
|
-
|
|
70
|
+
def insert(
|
|
71
|
+
self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
|
|
72
|
+
fail_on_exception: bool = True, **kwargs: Any
|
|
73
|
+
) -> UpdateStatus:
|
|
74
|
+
"""Inserts rows into this table. There are two mutually exclusive call patterns:
|
|
70
75
|
|
|
71
76
|
To insert multiple rows at a time:
|
|
72
|
-
|
|
73
|
-
``insert(rows: List[Dict[str, Any]], print_stats: bool = False, fail_on_exception: bool = True)``
|
|
77
|
+
``insert(rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True)``
|
|
74
78
|
|
|
75
79
|
To insert just a single row, you can use the more convenient syntax:
|
|
76
|
-
``insert(print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
|
|
80
|
+
``insert(*, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
|
|
77
81
|
|
|
78
82
|
Args:
|
|
79
83
|
rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
|
|
80
84
|
names to values.
|
|
81
|
-
kwargs: (if inserting a single row)
|
|
85
|
+
kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
|
|
82
86
|
print_stats: If ``True``, print statistics about the cost of computed columns.
|
|
83
87
|
fail_on_exception:
|
|
84
88
|
Determines how exceptions in computed columns and invalid media files (e.g., corrupt images)
|
|
@@ -102,16 +106,27 @@ class InsertableTable(Table):
|
|
|
102
106
|
|
|
103
107
|
>>> tbl.insert(a=1, b=1, c=1)
|
|
104
108
|
"""
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
109
|
+
# The commented code is the intended implementation, with signature (*args, **kwargs).
|
|
110
|
+
# That signature cannot be used currently, due to a present limitation in mkdocs.
|
|
111
|
+
# See: https://github.com/mkdocstrings/mkdocstrings/issues/669
|
|
112
|
+
|
|
113
|
+
# print_stats = kwargs.pop('print_stats', False)
|
|
114
|
+
# fail_on_exception = kwargs.pop('fail_on_exception', True)
|
|
115
|
+
# if len(args) > 0:
|
|
116
|
+
# # There's a positional argument; this means `rows` is expressed as a
|
|
117
|
+
# # list of dicts (multi-insert)
|
|
118
|
+
# rows = list(args[0])
|
|
119
|
+
# else:
|
|
120
|
+
# # No positional argument; this means we're inserting a single row
|
|
121
|
+
# # using kwargs syntax
|
|
122
|
+
# rows = [kwargs]
|
|
123
|
+
|
|
124
|
+
if rows is None:
|
|
114
125
|
rows = [kwargs]
|
|
126
|
+
else:
|
|
127
|
+
rows = list(rows)
|
|
128
|
+
if len(kwargs) > 0:
|
|
129
|
+
raise excs.Error('`kwargs` cannot be specified unless `rows is None`.')
|
|
115
130
|
|
|
116
131
|
if not isinstance(rows, list):
|
|
117
132
|
raise excs.Error('rows must be a list of dictionaries')
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import itertools
|
|
3
4
|
import json
|
|
4
5
|
import logging
|
|
5
6
|
from pathlib import Path
|
|
6
|
-
from typing import Union, Any, List, Dict, Optional, Callable, Set, Tuple, Iterable
|
|
7
|
+
from typing import Union, Any, List, Dict, Optional, Callable, Set, Tuple, Iterable, Type
|
|
7
8
|
from uuid import UUID
|
|
8
9
|
|
|
9
10
|
import pandas as pd
|
|
@@ -16,6 +17,7 @@ import pixeltable.exceptions as excs
|
|
|
16
17
|
import pixeltable.exprs as exprs
|
|
17
18
|
import pixeltable.metadata.schema as schema
|
|
18
19
|
import pixeltable.type_system as ts
|
|
20
|
+
import pixeltable.index as index
|
|
19
21
|
from .column import Column
|
|
20
22
|
from .globals import is_valid_identifier, is_system_column_name, UpdateStatus
|
|
21
23
|
from .schema_object import SchemaObject
|
|
@@ -102,27 +104,26 @@ class Table(SchemaObject):
|
|
|
102
104
|
from pixeltable.dataframe import DataFrame
|
|
103
105
|
return DataFrame(self.tbl_version_path).group_by(*items)
|
|
104
106
|
|
|
105
|
-
def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet':
|
|
106
|
-
"""Return rows from this table.
|
|
107
|
-
"""
|
|
107
|
+
def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet':
|
|
108
|
+
"""Return rows from this table."""
|
|
108
109
|
return self.df().collect()
|
|
109
110
|
|
|
110
111
|
def show(
|
|
111
112
|
self, *args, **kwargs
|
|
112
|
-
) -> 'pixeltable.dataframe.DataFrameResultSet':
|
|
113
|
+
) -> 'pixeltable.dataframe.DataFrameResultSet':
|
|
113
114
|
"""Return rows from this table.
|
|
114
115
|
"""
|
|
115
116
|
return self.df().show(*args, **kwargs)
|
|
116
117
|
|
|
117
118
|
def head(
|
|
118
119
|
self, *args, **kwargs
|
|
119
|
-
) -> 'pixeltable.dataframe.DataFrameResultSet':
|
|
120
|
+
) -> 'pixeltable.dataframe.DataFrameResultSet':
|
|
120
121
|
"""Return the first n rows inserted into this table."""
|
|
121
122
|
return self.df().head(*args, **kwargs)
|
|
122
123
|
|
|
123
124
|
def tail(
|
|
124
125
|
self, *args, **kwargs
|
|
125
|
-
) -> 'pixeltable.dataframe.DataFrameResultSet':
|
|
126
|
+
) -> 'pixeltable.dataframe.DataFrameResultSet':
|
|
126
127
|
"""Return the last n rows inserted into this table."""
|
|
127
128
|
return self.df().tail(*args, **kwargs)
|
|
128
129
|
|
|
@@ -514,6 +515,24 @@ class Table(SchemaObject):
|
|
|
514
515
|
status = self.tbl_version_path.tbl_version.add_index(col, idx_name=idx_name, idx=idx)
|
|
515
516
|
# TODO: how to deal with exceptions here? drop the index and raise?
|
|
516
517
|
|
|
518
|
+
def drop_embedding_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
|
|
519
|
+
"""Drop an embedding index from the table.
|
|
520
|
+
|
|
521
|
+
Args:
|
|
522
|
+
column_name: The name of the column whose embedding index to drop. Invalid if the column has multiple
|
|
523
|
+
embedding indices.
|
|
524
|
+
idx_name: The name of the index to drop.
|
|
525
|
+
|
|
526
|
+
Raises:
|
|
527
|
+
Error: If the index does not exist.
|
|
528
|
+
|
|
529
|
+
Examples:
|
|
530
|
+
Drop embedding index on the ``img`` column:
|
|
531
|
+
|
|
532
|
+
>>> tbl.drop_embedding_index(column_name='img')
|
|
533
|
+
"""
|
|
534
|
+
self._drop_index(column_name=column_name, idx_name=idx_name, _idx_class=index.EmbeddingIndex)
|
|
535
|
+
|
|
517
536
|
def drop_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
|
|
518
537
|
"""Drop an index from the table.
|
|
519
538
|
|
|
@@ -529,6 +548,12 @@ class Table(SchemaObject):
|
|
|
529
548
|
|
|
530
549
|
>>> tbl.drop_index(column_name='img')
|
|
531
550
|
"""
|
|
551
|
+
self._drop_index(column_name=column_name, idx_name=idx_name)
|
|
552
|
+
|
|
553
|
+
def _drop_index(
|
|
554
|
+
self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None,
|
|
555
|
+
_idx_class: Optional[Type[index.IndexBase]] = None
|
|
556
|
+
) -> None:
|
|
532
557
|
if self.tbl_version_path.is_snapshot():
|
|
533
558
|
raise excs.Error('Cannot drop an index from a snapshot')
|
|
534
559
|
self._check_is_dropped()
|
|
@@ -547,12 +572,14 @@ class Table(SchemaObject):
|
|
|
547
572
|
if col.tbl.id != tbl_version.id:
|
|
548
573
|
raise excs.Error(
|
|
549
574
|
f'Column {column_name}: cannot drop index from column that belongs to base ({col.tbl.name})')
|
|
550
|
-
|
|
551
|
-
if
|
|
575
|
+
idx_info = [info for info in tbl_version.idxs_by_name.values() if info.col.id == col.id]
|
|
576
|
+
if _idx_class is not None:
|
|
577
|
+
idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
|
|
578
|
+
if len(idx_info) == 0:
|
|
552
579
|
raise excs.Error(f'Column {column_name} does not have an index')
|
|
553
|
-
if len(
|
|
580
|
+
if len(idx_info) > 1:
|
|
554
581
|
raise excs.Error(f'Column {column_name} has multiple indices; specify idx_name instead')
|
|
555
|
-
idx_id =
|
|
582
|
+
idx_id = idx_info[0].id
|
|
556
583
|
self.tbl_version_path.tbl_version.drop_index(idx_id)
|
|
557
584
|
|
|
558
585
|
def update(
|
|
@@ -682,7 +709,6 @@ class Table(SchemaObject):
|
|
|
682
709
|
|
|
683
710
|
return update_targets
|
|
684
711
|
|
|
685
|
-
|
|
686
712
|
def revert(self) -> None:
|
|
687
713
|
"""Reverts the table to the previous version.
|
|
688
714
|
|
|
@@ -693,3 +719,132 @@ class Table(SchemaObject):
|
|
|
693
719
|
raise excs.Error('Cannot revert a snapshot')
|
|
694
720
|
self._check_is_dropped()
|
|
695
721
|
self.tbl_version_path.tbl_version.revert()
|
|
722
|
+
|
|
723
|
+
def _link(
|
|
724
|
+
self,
|
|
725
|
+
remote: 'pixeltable.datatransfer.Remote',
|
|
726
|
+
col_mapping: Optional[dict[str, str]] = None
|
|
727
|
+
) -> None:
|
|
728
|
+
"""
|
|
729
|
+
Links the specified `Remote` to this table. Once a remote is linked, it can be synchronized with
|
|
730
|
+
this `Table` by calling [`Table.sync()`]. A record of the link
|
|
731
|
+
is stored in table metadata and will persist across sessions.
|
|
732
|
+
|
|
733
|
+
Args:
|
|
734
|
+
remote (pixeltable.datatransfer.Remote): The `Remote` to link to this table.
|
|
735
|
+
col_mapping: An optional mapping of columns from this `Table` to columns in the `Remote`.
|
|
736
|
+
"""
|
|
737
|
+
# TODO(aaron-siegel): Refactor `col_mapping`
|
|
738
|
+
if len(self._get_remotes()) > 0:
|
|
739
|
+
raise excs.Error('Linking more than one `Remote` to a table is not currently supported.')
|
|
740
|
+
self._check_is_dropped()
|
|
741
|
+
export_cols = remote.get_export_columns()
|
|
742
|
+
import_cols = remote.get_import_columns()
|
|
743
|
+
is_col_mapping_user_specified = col_mapping is not None
|
|
744
|
+
if col_mapping is None:
|
|
745
|
+
# Use the identity mapping by default if `col_mapping` is not specified
|
|
746
|
+
col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
|
|
747
|
+
self._validate_remote(export_cols, import_cols, col_mapping, is_col_mapping_user_specified)
|
|
748
|
+
self.tbl_version_path.tbl_version.link(remote, col_mapping)
|
|
749
|
+
print(f'Linked remote {remote} to table `{self.get_name()}`.')
|
|
750
|
+
|
|
751
|
+
def unlink(self) -> None:
|
|
752
|
+
"""
|
|
753
|
+
Unlinks this table's `Remote`s.
|
|
754
|
+
"""
|
|
755
|
+
self._check_is_dropped()
|
|
756
|
+
remotes = self._get_remotes()
|
|
757
|
+
assert len(remotes) <= 1
|
|
758
|
+
|
|
759
|
+
remote = next(iter(remotes.keys()))
|
|
760
|
+
self.tbl_version_path.tbl_version.unlink(remote)
|
|
761
|
+
# TODO: Provide an option to auto-delete the project
|
|
762
|
+
print(f'Unlinked remote {remote} from table `{self.get_name()}`.')
|
|
763
|
+
|
|
764
|
+
def _validate_remote(
|
|
765
|
+
self,
|
|
766
|
+
export_cols: dict[str, ts.ColumnType],
|
|
767
|
+
import_cols: dict[str, ts.ColumnType],
|
|
768
|
+
col_mapping: Optional[dict[str, str]],
|
|
769
|
+
is_col_mapping_user_specified: bool
|
|
770
|
+
):
|
|
771
|
+
# Validate names
|
|
772
|
+
t_cols = self.column_names()
|
|
773
|
+
for t_col, r_col in col_mapping.items():
|
|
774
|
+
if t_col not in t_cols:
|
|
775
|
+
if is_col_mapping_user_specified:
|
|
776
|
+
raise excs.Error(
|
|
777
|
+
f'Column name `{t_col}` appears as a key in `col_mapping`, but Table `{self.get_name()}` '
|
|
778
|
+
'contains no such column.'
|
|
779
|
+
)
|
|
780
|
+
else:
|
|
781
|
+
raise excs.Error(
|
|
782
|
+
f'Column `{t_col}` does not exist in Table `{self.get_name()}`. Either add a column `{t_col}`, '
|
|
783
|
+
f'or specify a `col_mapping` to associate a different column with the remote field `{r_col}`.'
|
|
784
|
+
)
|
|
785
|
+
if r_col not in export_cols and r_col not in import_cols:
|
|
786
|
+
raise excs.Error(
|
|
787
|
+
f'Column name `{r_col}` appears as a value in `col_mapping`, but the remote '
|
|
788
|
+
f'configuration has no column `{r_col}`.'
|
|
789
|
+
)
|
|
790
|
+
# Validate column specs
|
|
791
|
+
t_col_types = self.column_types()
|
|
792
|
+
for t_col, r_col in col_mapping.items():
|
|
793
|
+
t_col_type = t_col_types[t_col]
|
|
794
|
+
if r_col in export_cols:
|
|
795
|
+
# Validate that the table column can be assigned to the remote column
|
|
796
|
+
r_col_type = export_cols[r_col]
|
|
797
|
+
if not r_col_type.is_supertype_of(t_col_type):
|
|
798
|
+
raise excs.Error(
|
|
799
|
+
f'Column `{t_col}` cannot be exported to remote column `{r_col}` (incompatible types)'
|
|
800
|
+
)
|
|
801
|
+
if r_col in import_cols:
|
|
802
|
+
# Validate that the remote column can be assigned to the table column
|
|
803
|
+
if self.tbl_version_path.get_column(t_col).is_computed:
|
|
804
|
+
raise excs.Error(
|
|
805
|
+
f'Column `{t_col}` is a computed column, which cannot be populated from a remote column'
|
|
806
|
+
)
|
|
807
|
+
r_col_type = import_cols[r_col]
|
|
808
|
+
if not t_col_type.is_supertype_of(r_col_type):
|
|
809
|
+
raise excs.Error(
|
|
810
|
+
f'Column `{t_col}` cannot be imported from remote column `{r_col}` (incompatible types)'
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
def _get_remotes(self) -> dict[pixeltable.datatransfer.Remote, dict[str, str]]:
|
|
814
|
+
"""
|
|
815
|
+
Gets a `dict` of all `Remote`s linked to this table.
|
|
816
|
+
"""
|
|
817
|
+
return self.tbl_version_path.tbl_version.get_remotes()
|
|
818
|
+
|
|
819
|
+
def sync(
|
|
820
|
+
self,
|
|
821
|
+
*,
|
|
822
|
+
export_data: bool = True,
|
|
823
|
+
import_data: bool = True
|
|
824
|
+
):
|
|
825
|
+
"""
|
|
826
|
+
Synchronizes this table with its linked `Remote`s.
|
|
827
|
+
|
|
828
|
+
Args:
|
|
829
|
+
export_data: If `True`, data from this table will be exported to the external store during synchronization.
|
|
830
|
+
import_data: If `True`, data from the external store will be imported to this table during synchronization.
|
|
831
|
+
"""
|
|
832
|
+
remotes = self._get_remotes()
|
|
833
|
+
assert len(remotes) <= 1
|
|
834
|
+
|
|
835
|
+
# Validation
|
|
836
|
+
for remote in remotes:
|
|
837
|
+
col_mapping = remotes[remote]
|
|
838
|
+
r_cols = set(col_mapping.values())
|
|
839
|
+
# Validate export/import
|
|
840
|
+
if export_data and not any(col in r_cols for col in remote.get_export_columns()):
|
|
841
|
+
raise excs.Error(
|
|
842
|
+
f'Attempted to sync with export_data=True, but there are no columns to export: {remote}'
|
|
843
|
+
)
|
|
844
|
+
if import_data and not any(col in r_cols for col in remote.get_import_columns()):
|
|
845
|
+
raise excs.Error(
|
|
846
|
+
f'Attempted to sync with import_data=True, but there are no columns to import: {remote}'
|
|
847
|
+
)
|
|
848
|
+
|
|
849
|
+
for remote in remotes:
|
|
850
|
+
remote.sync(self, remotes[remote], export_data=export_data, import_data=import_data)
|