pixeltable 0.1.2__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable-0.2.1/LICENSE +18 -0
- pixeltable-0.2.1/PKG-INFO +119 -0
- pixeltable-0.2.1/README.md +87 -0
- pixeltable-0.2.1/pixeltable/__init__.py +44 -0
- pixeltable-0.2.1/pixeltable/catalog/__init__.py +13 -0
- pixeltable-0.2.1/pixeltable/catalog/catalog.py +159 -0
- pixeltable-0.2.1/pixeltable/catalog/column.py +200 -0
- pixeltable-0.2.1/pixeltable/catalog/dir.py +32 -0
- pixeltable-0.2.1/pixeltable/catalog/globals.py +33 -0
- pixeltable-0.2.1/pixeltable/catalog/insertable_table.py +191 -0
- pixeltable-0.2.1/pixeltable/catalog/named_function.py +36 -0
- pixeltable-0.2.1/pixeltable/catalog/path.py +58 -0
- pixeltable-0.2.1/pixeltable/catalog/path_dict.py +139 -0
- pixeltable-0.2.1/pixeltable/catalog/schema_object.py +39 -0
- pixeltable-0.2.1/pixeltable/catalog/table.py +581 -0
- pixeltable-0.2.1/pixeltable/catalog/table_version.py +749 -0
- pixeltable-0.2.1/pixeltable/catalog/table_version_path.py +133 -0
- pixeltable-0.2.1/pixeltable/catalog/view.py +203 -0
- pixeltable-0.2.1/pixeltable/client.py +534 -0
- pixeltable-0.2.1/pixeltable/dataframe.py +631 -0
- pixeltable-0.2.1/pixeltable/env.py +414 -0
- pixeltable-0.2.1/pixeltable/exceptions.py +17 -0
- pixeltable-0.2.1/pixeltable/exec/__init__.py +9 -0
- pixeltable-0.2.1/pixeltable/exec/aggregation_node.py +78 -0
- pixeltable-0.2.1/pixeltable/exec/cache_prefetch_node.py +113 -0
- pixeltable-0.2.1/pixeltable/exec/component_iteration_node.py +79 -0
- pixeltable-0.2.1/pixeltable/exec/data_row_batch.py +95 -0
- pixeltable-0.2.1/pixeltable/exec/exec_context.py +22 -0
- pixeltable-0.2.1/pixeltable/exec/exec_node.py +61 -0
- pixeltable-0.2.1/pixeltable/exec/expr_eval_node.py +217 -0
- pixeltable-0.2.1/pixeltable/exec/in_memory_data_node.py +69 -0
- pixeltable-0.2.1/pixeltable/exec/media_validation_node.py +43 -0
- pixeltable-0.2.1/pixeltable/exec/sql_scan_node.py +225 -0
- pixeltable-0.2.1/pixeltable/exprs/__init__.py +24 -0
- pixeltable-0.2.1/pixeltable/exprs/arithmetic_expr.py +102 -0
- pixeltable-0.2.1/pixeltable/exprs/array_slice.py +71 -0
- pixeltable-0.2.1/pixeltable/exprs/column_property_ref.py +77 -0
- pixeltable-0.2.1/pixeltable/exprs/column_ref.py +105 -0
- pixeltable-0.2.1/pixeltable/exprs/comparison.py +77 -0
- pixeltable-0.2.1/pixeltable/exprs/compound_predicate.py +98 -0
- pixeltable-0.2.1/pixeltable/exprs/data_row.py +187 -0
- pixeltable-0.2.1/pixeltable/exprs/expr.py +586 -0
- pixeltable-0.2.1/pixeltable/exprs/expr_set.py +39 -0
- pixeltable-0.2.1/pixeltable/exprs/function_call.py +380 -0
- pixeltable-0.2.1/pixeltable/exprs/globals.py +69 -0
- pixeltable-0.2.1/pixeltable/exprs/image_member_access.py +115 -0
- pixeltable-0.2.1/pixeltable/exprs/image_similarity_predicate.py +58 -0
- pixeltable-0.2.1/pixeltable/exprs/inline_array.py +107 -0
- pixeltable-0.2.1/pixeltable/exprs/inline_dict.py +101 -0
- pixeltable-0.2.1/pixeltable/exprs/is_null.py +38 -0
- pixeltable-0.2.1/pixeltable/exprs/json_mapper.py +121 -0
- pixeltable-0.2.1/pixeltable/exprs/json_path.py +159 -0
- pixeltable-0.2.1/pixeltable/exprs/literal.py +54 -0
- pixeltable-0.2.1/pixeltable/exprs/object_ref.py +41 -0
- pixeltable-0.2.1/pixeltable/exprs/predicate.py +44 -0
- pixeltable-0.2.1/pixeltable/exprs/row_builder.py +355 -0
- pixeltable-0.2.1/pixeltable/exprs/rowid_ref.py +94 -0
- pixeltable-0.2.1/pixeltable/exprs/type_cast.py +53 -0
- pixeltable-0.2.1/pixeltable/exprs/variable.py +45 -0
- pixeltable-0.2.1/pixeltable/func/__init__.py +9 -0
- pixeltable-0.2.1/pixeltable/func/aggregate_function.py +194 -0
- pixeltable-0.2.1/pixeltable/func/batched_function.py +53 -0
- pixeltable-0.2.1/pixeltable/func/callable_function.py +69 -0
- pixeltable-0.2.1/pixeltable/func/expr_template_function.py +82 -0
- pixeltable-0.2.1/pixeltable/func/function.py +110 -0
- pixeltable-0.2.1/pixeltable/func/function_registry.py +227 -0
- pixeltable-0.2.1/pixeltable/func/globals.py +36 -0
- pixeltable-0.2.1/pixeltable/func/nos_function.py +202 -0
- pixeltable-0.2.1/pixeltable/func/signature.py +166 -0
- pixeltable-0.2.1/pixeltable/func/udf.py +163 -0
- pixeltable-0.2.1/pixeltable/functions/__init__.py +95 -0
- pixeltable-0.2.1/pixeltable/functions/eval.py +216 -0
- pixeltable-0.2.1/pixeltable/functions/fireworks.py +61 -0
- pixeltable-0.2.1/pixeltable/functions/huggingface.py +120 -0
- pixeltable-0.2.1/pixeltable/functions/image.py +16 -0
- pixeltable-0.2.1/pixeltable/functions/openai.py +88 -0
- pixeltable-0.2.1/pixeltable/functions/pil/image.py +150 -0
- pixeltable-0.2.1/pixeltable/functions/string.py +13 -0
- pixeltable-0.2.1/pixeltable/functions/together.py +27 -0
- pixeltable-0.2.1/pixeltable/functions/util.py +41 -0
- pixeltable-0.2.1/pixeltable/functions/video.py +62 -0
- pixeltable-0.2.1/pixeltable/iterators/__init__.py +3 -0
- pixeltable-0.2.1/pixeltable/iterators/base.py +48 -0
- pixeltable-0.2.1/pixeltable/iterators/document.py +311 -0
- pixeltable-0.2.1/pixeltable/iterators/video.py +89 -0
- pixeltable-0.2.1/pixeltable/metadata/__init__.py +54 -0
- pixeltable-0.2.1/pixeltable/metadata/converters/convert_10.py +18 -0
- pixeltable-0.2.1/pixeltable/metadata/schema.py +211 -0
- pixeltable-0.2.1/pixeltable/plan.py +656 -0
- pixeltable-0.2.1/pixeltable/store.py +422 -0
- pixeltable-0.2.1/pixeltable/tests/conftest.py +175 -0
- pixeltable-0.2.1/pixeltable/tests/test_audio.py +65 -0
- pixeltable-0.2.1/pixeltable/tests/test_catalog.py +27 -0
- pixeltable-0.2.1/pixeltable/tests/test_client.py +21 -0
- pixeltable-0.2.1/pixeltable/tests/test_component_view.py +372 -0
- pixeltable-0.2.1/pixeltable/tests/test_dataframe.py +433 -0
- pixeltable-0.2.1/pixeltable/tests/test_dirs.py +107 -0
- pixeltable-0.2.1/pixeltable/tests/test_document.py +117 -0
- pixeltable-0.2.1/pixeltable/tests/test_exprs.py +804 -0
- pixeltable-0.2.1/pixeltable/tests/test_function.py +324 -0
- pixeltable-0.2.1/pixeltable/tests/test_functions.py +293 -0
- pixeltable-0.2.1/pixeltable/tests/test_migration.py +43 -0
- pixeltable-0.2.1/pixeltable/tests/test_nos.py +54 -0
- pixeltable-0.2.1/pixeltable/tests/test_snapshot.py +208 -0
- pixeltable-0.2.1/pixeltable/tests/test_table.py +1158 -0
- pixeltable-0.2.1/pixeltable/tests/test_transactional_directory.py +42 -0
- {pixeltable-0.1.2 → pixeltable-0.2.1}/pixeltable/tests/test_types.py +5 -11
- pixeltable-0.2.1/pixeltable/tests/test_video.py +157 -0
- pixeltable-0.2.1/pixeltable/tests/test_view.py +530 -0
- pixeltable-0.2.1/pixeltable/tests/utils.py +274 -0
- pixeltable-0.2.1/pixeltable/tool/create_test_db_dump.py +149 -0
- pixeltable-0.2.1/pixeltable/type_system.py +938 -0
- pixeltable-0.2.1/pixeltable/utils/__init__.py +17 -0
- pixeltable-0.2.1/pixeltable/utils/clip.py +18 -0
- pixeltable-0.2.1/pixeltable/utils/coco.py +136 -0
- pixeltable-0.2.1/pixeltable/utils/documents.py +39 -0
- pixeltable-0.2.1/pixeltable/utils/filecache.py +195 -0
- pixeltable-0.2.1/pixeltable/utils/help.py +11 -0
- pixeltable-0.2.1/pixeltable/utils/media_store.py +76 -0
- pixeltable-0.2.1/pixeltable/utils/parquet.py +126 -0
- pixeltable-0.2.1/pixeltable/utils/pytorch.py +172 -0
- pixeltable-0.2.1/pixeltable/utils/s3.py +13 -0
- pixeltable-0.2.1/pixeltable/utils/sql.py +17 -0
- pixeltable-0.2.1/pixeltable/utils/transactional_directory.py +35 -0
- pixeltable-0.2.1/pyproject.toml +122 -0
- pixeltable-0.1.2/LICENSE +0 -201
- pixeltable-0.1.2/PKG-INFO +0 -89
- pixeltable-0.1.2/README.md +0 -61
- pixeltable-0.1.2/pixeltable/__init__.py +0 -27
- pixeltable-0.1.2/pixeltable/catalog.py +0 -1421
- pixeltable-0.1.2/pixeltable/client.py +0 -45
- pixeltable-0.1.2/pixeltable/dataframe.py +0 -440
- pixeltable-0.1.2/pixeltable/env.py +0 -89
- pixeltable-0.1.2/pixeltable/exceptions.py +0 -26
- pixeltable-0.1.2/pixeltable/exprs.py +0 -1745
- pixeltable-0.1.2/pixeltable/function.py +0 -269
- pixeltable-0.1.2/pixeltable/functions/__init__.py +0 -146
- pixeltable-0.1.2/pixeltable/functions/clip.py +0 -10
- pixeltable-0.1.2/pixeltable/functions/pil/__init__.py +0 -23
- pixeltable-0.1.2/pixeltable/functions/pil/image.py +0 -9
- pixeltable-0.1.2/pixeltable/functions/tf.py +0 -21
- pixeltable-0.1.2/pixeltable/index.py +0 -57
- pixeltable-0.1.2/pixeltable/store.py +0 -191
- pixeltable-0.1.2/pixeltable/tests/conftest.py +0 -118
- pixeltable-0.1.2/pixeltable/tests/test_client.py +0 -21
- pixeltable-0.1.2/pixeltable/tests/test_dict.py +0 -24
- pixeltable-0.1.2/pixeltable/tests/test_dirs.py +0 -91
- pixeltable-0.1.2/pixeltable/tests/test_exprs.py +0 -348
- pixeltable-0.1.2/pixeltable/tests/test_function.py +0 -94
- pixeltable-0.1.2/pixeltable/tests/test_functions.py +0 -11
- pixeltable-0.1.2/pixeltable/tests/test_table.py +0 -330
- pixeltable-0.1.2/pixeltable/tests/test_tf.py +0 -69
- pixeltable-0.1.2/pixeltable/tests/test_video.py +0 -42
- pixeltable-0.1.2/pixeltable/tests/utils.py +0 -133
- pixeltable-0.1.2/pixeltable/tf.py +0 -33
- pixeltable-0.1.2/pixeltable/type_system.py +0 -581
- pixeltable-0.1.2/pixeltable/utils/__init__.py +0 -46
- pixeltable-0.1.2/pixeltable/utils/clip.py +0 -21
- pixeltable-0.1.2/pixeltable/utils/tf.py +0 -33
- pixeltable-0.1.2/pixeltable/utils/video.py +0 -32
- pixeltable-0.1.2/pyproject.toml +0 -48
- pixeltable-0.1.2/setup.py +0 -47
pixeltable-0.2.1/LICENSE
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
Copyright 2023 Marcel Kornacker
|
|
7
|
+
|
|
8
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
you may not use this file except in compliance with the License.
|
|
10
|
+
You may obtain a copy of the License at
|
|
11
|
+
|
|
12
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
13
|
+
|
|
14
|
+
Unless required by applicable law or agreed to in writing, software
|
|
15
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
16
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
17
|
+
See the License for the specific language governing permissions and
|
|
18
|
+
limitations under the License.
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: pixeltable
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: Pixeltable: The Multimodal AI Data Plane
|
|
5
|
+
Author: Marcel Kornacker
|
|
6
|
+
Author-email: marcelk@gmail.com
|
|
7
|
+
Requires-Python: >=3.9,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Requires-Dist: av (>=10.0.0)
|
|
14
|
+
Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
|
|
15
|
+
Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
|
|
16
|
+
Requires-Dist: jmespath (>=1.0.1,<2.0.0)
|
|
17
|
+
Requires-Dist: numpy (>=1.24.1,<2.0.0)
|
|
18
|
+
Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
|
|
19
|
+
Requires-Dist: pandas (>=1.5.3,<2.0.0)
|
|
20
|
+
Requires-Dist: pgserver (==0.0.5)
|
|
21
|
+
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
22
|
+
Requires-Dist: pillow (>=9.4.0,<10.0.0)
|
|
23
|
+
Requires-Dist: psutil (>=5.9.5,<6.0.0)
|
|
24
|
+
Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
|
|
25
|
+
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
26
|
+
Requires-Dist: regex (>=2022.10.31,<2023.0.0)
|
|
27
|
+
Requires-Dist: sqlalchemy-utils (>=0.41.1,<0.42.0)
|
|
28
|
+
Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
|
|
29
|
+
Requires-Dist: tqdm (>=4.64.1,<5.0.0)
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
<img src="docs/pixeltable-banner.png" width="45%"/>
|
|
33
|
+
|
|
34
|
+
# Pixeltable: The Multimodal AI Data Plane
|
|
35
|
+
|
|
36
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
37
|
+
|
|
38
|
+

|
|
39
|
+
|
|
40
|
+
Pixeltable is a Python library that lets AI engineers and data scientists focus on
|
|
41
|
+
exploration, modeling, and app development without having to deal with the customary
|
|
42
|
+
data plumbing.
|
|
43
|
+
|
|
44
|
+
**Pixeltable redefines data infrastructure and workflow orchestration for AI development.**
|
|
45
|
+
It brings together data storage, versioning, and indexing with orchestration and model
|
|
46
|
+
versioning under a declarative table interface, with transformations, model inference,
|
|
47
|
+
and custom logic represented as computed columns.
|
|
48
|
+
<!--
|
|
49
|
+
## Quick Start
|
|
50
|
+
|
|
51
|
+
If you just want to play around with Pixeltable to see what it's capable of, the easiest way is to run
|
|
52
|
+
the Pixeltable Basics tutorial in colab:
|
|
53
|
+
|
|
54
|
+
<a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/docs/tutorials/pixeltable-basics.ipynb">
|
|
55
|
+
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
|
56
|
+
</a>
|
|
57
|
+
-->
|
|
58
|
+
## Installation
|
|
59
|
+
|
|
60
|
+
Pixeltable works with Python 3.9, 3.10, or 3.11 running on Linux or MacOS.
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
pip install pixeltable
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
To verify that it's working:
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
import pixeltable as pxt
|
|
70
|
+
cl = pxt.Client()
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
For more detailed installation instructions, see the
|
|
74
|
+
[Getting Started with Pixeltable](https://pixeltable.github.io/pixeltable/getting-started/)
|
|
75
|
+
guide. Then, check out the
|
|
76
|
+
[Pixeltable Basics](https://pixeltable.github.io/pixeltable/tutorials/pixeltable-basics/)
|
|
77
|
+
tutorial for a tour of its most important features.
|
|
78
|
+
|
|
79
|
+
## What problems does Pixeltable solve?
|
|
80
|
+
|
|
81
|
+
Today’s solutions for AI app development require extensive custom coding and infrastructure
|
|
82
|
+
plumbing. Tracking lineage and versions between and across data transformations, models, and
|
|
83
|
+
deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
|
|
84
|
+
a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
|
|
85
|
+
building and productionizing AI applications.
|
|
86
|
+
|
|
87
|
+
## Why should you use Pixeltable?
|
|
88
|
+
|
|
89
|
+
- It gives you transparency and reproducibility
|
|
90
|
+
- All generated data is automatically recorded and versioned
|
|
91
|
+
- You will never need to re-run a workload because you lost track of the input data
|
|
92
|
+
- It saves you money
|
|
93
|
+
- All data changes are automatically incremental
|
|
94
|
+
- You never need to re-run pipelines from scratch because you’re adding data
|
|
95
|
+
- It integrates with any existing Python code or libraries
|
|
96
|
+
- Bring your ever-changing code and workloads
|
|
97
|
+
- You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
|
|
98
|
+
|
|
99
|
+
## Example Use Cases
|
|
100
|
+
|
|
101
|
+
* Interact with video data at the frame level without having to think about frame extraction,
|
|
102
|
+
intermediate file storage, or storage space explosion.
|
|
103
|
+
* Augment your data incrementally and interactively with built-in functions and UDFs, such as
|
|
104
|
+
image transformations, model inference, and visualizations, without having to think about data pipelines,
|
|
105
|
+
incremental updates, or capturing function output.
|
|
106
|
+
* Interact with all the data relevant to your AI application (video, images, documents, audio, structured data, JSON) through
|
|
107
|
+
a simple dataframe-style API directly in Python. This includes:
|
|
108
|
+
* similarity search on embeddings, supported by high-dimensional vector indexing
|
|
109
|
+
* path expressions and transformations on JSON data
|
|
110
|
+
* PIL and OpenCV image operations
|
|
111
|
+
* assembling frames into videos
|
|
112
|
+
* Perform keyword and image similarity search at the video frame level without having to worry about frame
|
|
113
|
+
storage.
|
|
114
|
+
* Access all Pixeltable-resident data directly as a PyTorch dataset in your training scripts.
|
|
115
|
+
* Understand the compute and storage costs of your data at the granularity of individual augmentations and
|
|
116
|
+
get cost projections before adding new data and new augmentations.
|
|
117
|
+
* Rely on Pixeltable's automatic versioning and snapshot functionality to protect against regressions
|
|
118
|
+
and to ensure reproducibility.
|
|
119
|
+
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
<img src="docs/pixeltable-banner.png" width="45%"/>
|
|
2
|
+
|
|
3
|
+
# Pixeltable: The Multimodal AI Data Plane
|
|
4
|
+
|
|
5
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
6
|
+
|
|
7
|
+

|
|
8
|
+
|
|
9
|
+
Pixeltable is a Python library that lets AI engineers and data scientists focus on
|
|
10
|
+
exploration, modeling, and app development without having to deal with the customary
|
|
11
|
+
data plumbing.
|
|
12
|
+
|
|
13
|
+
**Pixeltable redefines data infrastructure and workflow orchestration for AI development.**
|
|
14
|
+
It brings together data storage, versioning, and indexing with orchestration and model
|
|
15
|
+
versioning under a declarative table interface, with transformations, model inference,
|
|
16
|
+
and custom logic represented as computed columns.
|
|
17
|
+
<!--
|
|
18
|
+
## Quick Start
|
|
19
|
+
|
|
20
|
+
If you just want to play around with Pixeltable to see what it's capable of, the easiest way is to run
|
|
21
|
+
the Pixeltable Basics tutorial in colab:
|
|
22
|
+
|
|
23
|
+
<a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/docs/tutorials/pixeltable-basics.ipynb">
|
|
24
|
+
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
|
25
|
+
</a>
|
|
26
|
+
-->
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
Pixeltable works with Python 3.9, 3.10, or 3.11 running on Linux or MacOS.
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
pip install pixeltable
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
To verify that it's working:
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
import pixeltable as pxt
|
|
39
|
+
cl = pxt.Client()
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
For more detailed installation instructions, see the
|
|
43
|
+
[Getting Started with Pixeltable](https://pixeltable.github.io/pixeltable/getting-started/)
|
|
44
|
+
guide. Then, check out the
|
|
45
|
+
[Pixeltable Basics](https://pixeltable.github.io/pixeltable/tutorials/pixeltable-basics/)
|
|
46
|
+
tutorial for a tour of its most important features.
|
|
47
|
+
|
|
48
|
+
## What problems does Pixeltable solve?
|
|
49
|
+
|
|
50
|
+
Today’s solutions for AI app development require extensive custom coding and infrastructure
|
|
51
|
+
plumbing. Tracking lineage and versions between and across data transformations, models, and
|
|
52
|
+
deployment is cumbersome. Pixeltable is a replacement for traditional data plumbing, providing
|
|
53
|
+
a unified plane for data, models, and orchestration. It removes the data plumbing overhead in
|
|
54
|
+
building and productionizing AI applications.
|
|
55
|
+
|
|
56
|
+
## Why should you use Pixeltable?
|
|
57
|
+
|
|
58
|
+
- It gives you transparency and reproducibility
|
|
59
|
+
- All generated data is automatically recorded and versioned
|
|
60
|
+
- You will never need to re-run a workload because you lost track of the input data
|
|
61
|
+
- It saves you money
|
|
62
|
+
- All data changes are automatically incremental
|
|
63
|
+
- You never need to re-run pipelines from scratch because you’re adding data
|
|
64
|
+
- It integrates with any existing Python code or libraries
|
|
65
|
+
- Bring your ever-changing code and workloads
|
|
66
|
+
- You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
|
|
67
|
+
|
|
68
|
+
## Example Use Cases
|
|
69
|
+
|
|
70
|
+
* Interact with video data at the frame level without having to think about frame extraction,
|
|
71
|
+
intermediate file storage, or storage space explosion.
|
|
72
|
+
* Augment your data incrementally and interactively with built-in functions and UDFs, such as
|
|
73
|
+
image transformations, model inference, and visualizations, without having to think about data pipelines,
|
|
74
|
+
incremental updates, or capturing function output.
|
|
75
|
+
* Interact with all the data relevant to your AI application (video, images, documents, audio, structured data, JSON) through
|
|
76
|
+
a simple dataframe-style API directly in Python. This includes:
|
|
77
|
+
* similarity search on embeddings, supported by high-dimensional vector indexing
|
|
78
|
+
* path expressions and transformations on JSON data
|
|
79
|
+
* PIL and OpenCV image operations
|
|
80
|
+
* assembling frames into videos
|
|
81
|
+
* Perform keyword and image similarity search at the video frame level without having to worry about frame
|
|
82
|
+
storage.
|
|
83
|
+
* Access all Pixeltable-resident data directly as a PyTorch dataset in your training scripts.
|
|
84
|
+
* Understand the compute and storage costs of your data at the granularity of individual augmentations and
|
|
85
|
+
get cost projections before adding new data and new augmentations.
|
|
86
|
+
* Rely on Pixeltable's automatic versioning and snapshot functionality to protect against regressions
|
|
87
|
+
and to ensure reproducibility.
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from .catalog import Column, Table, InsertableTable, View
|
|
2
|
+
from .client import Client
|
|
3
|
+
from .dataframe import DataFrame
|
|
4
|
+
from .exceptions import Error, Error
|
|
5
|
+
from .exprs import RELATIVE_PATH_ROOT
|
|
6
|
+
from .func import Function, udf, uda, Aggregator, expr_udf
|
|
7
|
+
from .type_system import \
|
|
8
|
+
ColumnType, StringType, IntType, FloatType, BoolType, TimestampType, JsonType, ArrayType, ImageType, VideoType, \
|
|
9
|
+
AudioType, DocumentType
|
|
10
|
+
from .utils.help import help
|
|
11
|
+
# noinspection PyUnresolvedReferences
|
|
12
|
+
from . import functions
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
'Client',
|
|
16
|
+
'DataFrame',
|
|
17
|
+
'Column',
|
|
18
|
+
'Table',
|
|
19
|
+
'InsertableTable',
|
|
20
|
+
'View',
|
|
21
|
+
'Error',
|
|
22
|
+
'ColumnType',
|
|
23
|
+
'StringType',
|
|
24
|
+
'IntType',
|
|
25
|
+
'FloatType',
|
|
26
|
+
'BoolType',
|
|
27
|
+
'TimestampType',
|
|
28
|
+
'JsonType',
|
|
29
|
+
'RELATIVE_PATH_ROOT',
|
|
30
|
+
'ArrayType',
|
|
31
|
+
'ImageType',
|
|
32
|
+
'VideoType',
|
|
33
|
+
'AudioType',
|
|
34
|
+
'DocumentType',
|
|
35
|
+
'Function',
|
|
36
|
+
'help',
|
|
37
|
+
'udf',
|
|
38
|
+
'Aggregator',
|
|
39
|
+
'uda',
|
|
40
|
+
'expr_udf',
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from .catalog import Catalog
|
|
2
|
+
from .column import Column
|
|
3
|
+
from .table_version_path import TableVersionPath
|
|
4
|
+
from .table_version import TableVersion
|
|
5
|
+
from .schema_object import SchemaObject
|
|
6
|
+
from .named_function import NamedFunction
|
|
7
|
+
from .dir import Dir
|
|
8
|
+
from .table import Table
|
|
9
|
+
from .insertable_table import InsertableTable
|
|
10
|
+
from .view import View
|
|
11
|
+
from .path import Path
|
|
12
|
+
from .path_dict import PathDict
|
|
13
|
+
from .globals import is_valid_identifier, is_valid_path
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from typing import Optional, List, Any, Dict, Tuple
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
import dataclasses
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
import sqlalchemy as sql
|
|
8
|
+
import sqlalchemy.orm as orm
|
|
9
|
+
|
|
10
|
+
from .table_version import TableVersion
|
|
11
|
+
from .table_version_path import TableVersionPath
|
|
12
|
+
from .table import Table
|
|
13
|
+
from .named_function import NamedFunction
|
|
14
|
+
from .path_dict import PathDict
|
|
15
|
+
import pixeltable.env as env
|
|
16
|
+
import pixeltable.metadata.schema as schema
|
|
17
|
+
|
|
18
|
+
_logger = logging.getLogger('pixeltable')
|
|
19
|
+
|
|
20
|
+
class Catalog:
|
|
21
|
+
"""A repository of catalog objects"""
|
|
22
|
+
_instance: Optional[Catalog] = None
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def get(cls) -> Catalog:
|
|
26
|
+
if cls._instance is None:
|
|
27
|
+
cls._instance = cls()
|
|
28
|
+
with orm.Session(env.Env.get().engine, future=True) as session:
|
|
29
|
+
cls._instance._load_table_versions(session)
|
|
30
|
+
#cls._instance._load_functions(session)
|
|
31
|
+
return cls._instance
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def clear(cls) -> None:
|
|
35
|
+
"""Remove the instance. Used for testing."""
|
|
36
|
+
cls._instance = None
|
|
37
|
+
|
|
38
|
+
def __init__(self) -> None:
|
|
39
|
+
# key: [id, version]
|
|
40
|
+
# - mutable version of a table: version == None (even though TableVersion.version is set correctly)
|
|
41
|
+
# - snapshot versions: records the version of the snapshot
|
|
42
|
+
self.tbl_versions: Dict[Tuple[UUID, int], TableVersion] = {}
|
|
43
|
+
|
|
44
|
+
self.tbls: Dict[UUID, Table] = {} # don't use a defaultdict here, it doesn't cooperate with the debugger
|
|
45
|
+
self.tbl_dependents: Dict[UUID, List[Table]] = {}
|
|
46
|
+
|
|
47
|
+
self._init_store()
|
|
48
|
+
self.paths = PathDict() # do this after _init_catalog()
|
|
49
|
+
|
|
50
|
+
def _init_store(self) -> None:
|
|
51
|
+
"""One-time initialization of the stored catalog. Idempotent."""
|
|
52
|
+
with orm.Session(env.Env.get().engine, future=True) as session:
|
|
53
|
+
if session.query(sql.func.count(schema.Dir.id)).scalar() > 0:
|
|
54
|
+
return
|
|
55
|
+
# create a top-level directory, so that every schema object has a directory
|
|
56
|
+
dir_md = schema.DirMd(name='')
|
|
57
|
+
dir_record = schema.Dir(parent_id=None, md=dataclasses.asdict(dir_md))
|
|
58
|
+
session.add(dir_record)
|
|
59
|
+
session.flush()
|
|
60
|
+
session.commit()
|
|
61
|
+
_logger.info(f'Initialized catalog')
|
|
62
|
+
|
|
63
|
+
def _load_snapshot_version(
|
|
64
|
+
self, tbl_id: UUID, version: int, base: Optional[TableVersion], session: orm.Session
|
|
65
|
+
) -> TableVersion:
|
|
66
|
+
q = session.query(schema.Table, schema.TableSchemaVersion) \
|
|
67
|
+
.select_from(schema.Table) \
|
|
68
|
+
.join(schema.TableVersion) \
|
|
69
|
+
.join(schema.TableSchemaVersion) \
|
|
70
|
+
.where(schema.Table.id == tbl_id) \
|
|
71
|
+
.where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = {version}")) \
|
|
72
|
+
.where(sql.text((
|
|
73
|
+
f"({schema.TableVersion.__table__}.md->>'schema_version')::int = "
|
|
74
|
+
f"{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}")))
|
|
75
|
+
tbl_record, schema_version_record = q.one()
|
|
76
|
+
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
77
|
+
schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
|
|
78
|
+
# we ignore tbl_record.base_tbl_id/base_snapshot_id and use 'base' instead: if the base is a snapshot
|
|
79
|
+
# we'd have to look that up first
|
|
80
|
+
return TableVersion(tbl_record.id, tbl_md, version, schema_version_md, is_snapshot=True, base=base)
|
|
81
|
+
|
|
82
|
+
def _load_table_versions(self, session: orm.Session) -> None:
|
|
83
|
+
from .insertable_table import InsertableTable
|
|
84
|
+
from .view import View
|
|
85
|
+
|
|
86
|
+
# load tables/views;
|
|
87
|
+
# do this in ascending order of creation ts so that we can resolve base references in one pass
|
|
88
|
+
q = session.query(schema.Table, schema.TableSchemaVersion) \
|
|
89
|
+
.select_from(schema.Table) \
|
|
90
|
+
.join(schema.TableVersion) \
|
|
91
|
+
.join(schema.TableSchemaVersion) \
|
|
92
|
+
.where(sql.text(f"({schema.TableVersion.__table__}.md->>'version')::int = 0")) \
|
|
93
|
+
.where(sql.text((
|
|
94
|
+
f"({schema.Table.__table__}.md->>'current_schema_version')::int = "
|
|
95
|
+
f"{schema.TableSchemaVersion.__table__}.{schema.TableSchemaVersion.schema_version.name}"))) \
|
|
96
|
+
.order_by(sql.text(f"({schema.TableVersion.__table__}.md->>'created_at')::float"))
|
|
97
|
+
|
|
98
|
+
for tbl_record, schema_version_record in q.all():
|
|
99
|
+
tbl_md = schema.md_from_dict(schema.TableMd, tbl_record.md)
|
|
100
|
+
schema_version_md = schema.md_from_dict(schema.TableSchemaVersionMd, schema_version_record.md)
|
|
101
|
+
view_md = tbl_md.view_md
|
|
102
|
+
|
|
103
|
+
if view_md is not None:
|
|
104
|
+
assert len(view_md.base_versions) > 0
|
|
105
|
+
# construct a TableVersionPath for the view
|
|
106
|
+
refd_versions = [(UUID(tbl_id), version) for tbl_id, version in view_md.base_versions]
|
|
107
|
+
base_path: Optional[TableVersionPath] = None
|
|
108
|
+
base: Optional[TableVersion] = None
|
|
109
|
+
# go through the versions in reverse order, so we can construct TableVersionPaths
|
|
110
|
+
for base_id, version in refd_versions[::-1]:
|
|
111
|
+
base_version = self.tbl_versions.get((base_id, version), None)
|
|
112
|
+
if base_version is None:
|
|
113
|
+
if version is None:
|
|
114
|
+
# debugging
|
|
115
|
+
pass
|
|
116
|
+
# if this is a reference to a mutable table, we should have loaded it already
|
|
117
|
+
assert version is not None
|
|
118
|
+
base_version = self._load_snapshot_version(base_id, version, base, session)
|
|
119
|
+
base_path = TableVersionPath(base_version, base=base_path)
|
|
120
|
+
base = base_version
|
|
121
|
+
assert base_path is not None
|
|
122
|
+
|
|
123
|
+
base_tbl = self.tbls[base_path.tbl_version.id]
|
|
124
|
+
is_snapshot = view_md is not None and view_md.is_snapshot
|
|
125
|
+
snapshot_only = is_snapshot and view_md.predicate is None and len(schema_version_md.columns) == 0
|
|
126
|
+
if snapshot_only:
|
|
127
|
+
# this is a pure snapshot, without a physical table backing it
|
|
128
|
+
view_path = base_path
|
|
129
|
+
else:
|
|
130
|
+
tbl_version = TableVersion(
|
|
131
|
+
tbl_record.id, tbl_md, tbl_md.current_version, schema_version_md, is_snapshot=is_snapshot,
|
|
132
|
+
base=base_path.tbl_version if is_snapshot else None,
|
|
133
|
+
base_path=base_path if not is_snapshot else None)
|
|
134
|
+
view_path = TableVersionPath(tbl_version, base=base_path)
|
|
135
|
+
|
|
136
|
+
tbl = View(
|
|
137
|
+
tbl_record.id, tbl_record.dir_id, tbl_md.name, view_path, base_tbl,
|
|
138
|
+
snapshot_only=snapshot_only)
|
|
139
|
+
self.tbl_dependents[base_tbl._id].append(tbl)
|
|
140
|
+
|
|
141
|
+
else:
|
|
142
|
+
tbl_version = TableVersion(tbl_record.id, tbl_md, tbl_md.current_version, schema_version_md)
|
|
143
|
+
tbl = InsertableTable(tbl_record.dir_id, tbl_version)
|
|
144
|
+
|
|
145
|
+
self.tbls[tbl._id] = tbl
|
|
146
|
+
self.tbl_dependents[tbl._id] = []
|
|
147
|
+
self.paths.add_schema_obj(tbl._dir_id, tbl_md.name, tbl)
|
|
148
|
+
|
|
149
|
+
# def _load_functions(self, session: orm.Session) -> None:
|
|
150
|
+
# # load Function metadata; doesn't load the actual callable, which can be large and is only done on-demand by the
|
|
151
|
+
# # FunctionRegistry
|
|
152
|
+
# q = session.query(schema.Function.id, schema.Function.dir_id, schema.Function.md) \
|
|
153
|
+
# .where(sql.text(f"({schema.Function.__table__}.md->>'name')::text IS NOT NULL"))
|
|
154
|
+
# for id, dir_id, md in q.all():
|
|
155
|
+
# assert 'name' in md
|
|
156
|
+
# name = md['name']
|
|
157
|
+
# assert name is not None
|
|
158
|
+
# named_fn = NamedFunction(id, dir_id, name)
|
|
159
|
+
# self.paths.add_schema_obj(dir_id, name, named_fn)
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Optional, Union, Callable, Set
|
|
5
|
+
|
|
6
|
+
import sqlalchemy as sql
|
|
7
|
+
from pgvector.sqlalchemy import Vector
|
|
8
|
+
|
|
9
|
+
from pixeltable import exceptions as excs
|
|
10
|
+
from pixeltable.metadata import schema
|
|
11
|
+
from pixeltable.type_system import ColumnType, StringType
|
|
12
|
+
from .globals import is_valid_identifier
|
|
13
|
+
|
|
14
|
+
_logger = logging.getLogger('pixeltable')
|
|
15
|
+
|
|
16
|
+
class Column:
|
|
17
|
+
"""Representation of a column in the schema of a Table/DataFrame.
|
|
18
|
+
|
|
19
|
+
A Column contains all the metadata necessary for executing queries and updates against a particular version of a
|
|
20
|
+
table/view.
|
|
21
|
+
"""
|
|
22
|
+
def __init__(
|
|
23
|
+
self, name: str, col_type: Optional[ColumnType] = None,
|
|
24
|
+
computed_with: Optional[Union['Expr', Callable]] = None,
|
|
25
|
+
primary_key: bool = False, stored: Optional[bool] = None,
|
|
26
|
+
indexed: bool = False,
|
|
27
|
+
# these parameters aren't set by users
|
|
28
|
+
col_id: Optional[int] = None):
|
|
29
|
+
"""Column constructor.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
name: column name
|
|
33
|
+
col_type: column type; can be None if the type can be derived from ``computed_with``
|
|
34
|
+
computed_with: a callable or an Expr object that computes the column value
|
|
35
|
+
primary_key: if True, this column is part of the primary key
|
|
36
|
+
stored: determines whether a computed column is present in the stored table or recomputed on demand
|
|
37
|
+
indexed: if True, this column has a nearest neighbor index (only valid for image columns)
|
|
38
|
+
col_id: column ID (only used internally)
|
|
39
|
+
|
|
40
|
+
Computed columns: those have a non-None ``computed_with`` argument
|
|
41
|
+
|
|
42
|
+
- when constructed by the user: ``computed_with`` was constructed explicitly and is passed in;
|
|
43
|
+
col_type is None
|
|
44
|
+
- when loaded from md store: ``computed_with`` is set and col_type is set
|
|
45
|
+
|
|
46
|
+
``computed_with`` is a Callable:
|
|
47
|
+
|
|
48
|
+
- the callable's parameter names must correspond to existing columns in the table for which this Column
|
|
49
|
+
is being used
|
|
50
|
+
- ``col_type`` needs to be set to the callable's return type
|
|
51
|
+
|
|
52
|
+
``stored`` (only valid for computed image columns):
|
|
53
|
+
|
|
54
|
+
- if True: the column is present in the stored table
|
|
55
|
+
- if False: the column is not present in the stored table and recomputed during a query
|
|
56
|
+
- if None: the system chooses for you (at present, this is always False, but this may change in the future)
|
|
57
|
+
|
|
58
|
+
indexed: only valid for image columns; if true, maintains an NN index for this column
|
|
59
|
+
"""
|
|
60
|
+
if not is_valid_identifier(name):
|
|
61
|
+
raise excs.Error(f"Invalid column name: '{name}'")
|
|
62
|
+
self.name = name
|
|
63
|
+
if col_type is None and computed_with is None:
|
|
64
|
+
raise excs.Error(f'Column {name}: col_type is required if computed_with is not specified')
|
|
65
|
+
|
|
66
|
+
self.value_expr: Optional['Expr'] = None
|
|
67
|
+
self.compute_func: Optional[Callable] = None
|
|
68
|
+
from pixeltable import exprs
|
|
69
|
+
if computed_with is not None:
|
|
70
|
+
value_expr = exprs.Expr.from_object(computed_with)
|
|
71
|
+
if value_expr is None:
|
|
72
|
+
# computed_with needs to be a Callable
|
|
73
|
+
if not isinstance(computed_with, Callable):
|
|
74
|
+
raise excs.Error(
|
|
75
|
+
f'Column {name}: computed_with needs to be either a Pixeltable expression or a Callable, '
|
|
76
|
+
f'but it is a {type(computed_with)}')
|
|
77
|
+
if col_type is None:
|
|
78
|
+
raise excs.Error(f'Column {name}: col_type is required if computed_with is a Callable')
|
|
79
|
+
# we need to turn the computed_with function into an Expr, but this requires resolving
|
|
80
|
+
# column name references and for that we need to wait until we're assigned to a Table
|
|
81
|
+
self.compute_func = computed_with
|
|
82
|
+
else:
|
|
83
|
+
self.value_expr = value_expr.copy()
|
|
84
|
+
self.col_type = self.value_expr.col_type
|
|
85
|
+
|
|
86
|
+
if col_type is not None:
|
|
87
|
+
self.col_type = col_type
|
|
88
|
+
assert self.col_type is not None
|
|
89
|
+
|
|
90
|
+
self.stored = stored
|
|
91
|
+
self.dependent_cols: Set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
|
|
92
|
+
self.id = col_id
|
|
93
|
+
self.primary_key = primary_key
|
|
94
|
+
|
|
95
|
+
# column in the stored table for the values of this Column
|
|
96
|
+
self.sa_col: Optional[sql.schema.Column] = None
|
|
97
|
+
|
|
98
|
+
# computed cols also have storage columns for the exception string and type
|
|
99
|
+
self.sa_errormsg_col: Optional[sql.schema.Column] = None
|
|
100
|
+
self.sa_errortype_col: Optional[sql.schema.Column] = None
|
|
101
|
+
# indexed columns also have a column for the embeddings
|
|
102
|
+
self.sa_idx_col: Optional[sql.schema.Column] = None
|
|
103
|
+
from .table_version import TableVersion
|
|
104
|
+
self.tbl: Optional[TableVersion] = None # set by owning TableVersion
|
|
105
|
+
|
|
106
|
+
if indexed and not self.col_type.is_image_type():
|
|
107
|
+
raise excs.Error(f'Column {name}: indexed=True requires ImageType')
|
|
108
|
+
self.is_indexed = indexed
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def from_md(cls, col_id: int, md: schema.SchemaColumn, tbl: 'TableVersion') -> Column:
|
|
112
|
+
"""Construct a Column from metadata.
|
|
113
|
+
|
|
114
|
+
Leaves out value_expr, because that requires TableVersion.cols to be complete.
|
|
115
|
+
"""
|
|
116
|
+
col = cls(
|
|
117
|
+
md.name, col_type=ColumnType.from_dict(md.col_type), primary_key=md.is_pk,
|
|
118
|
+
stored=md.stored, indexed=md.is_indexed, col_id=col_id)
|
|
119
|
+
col.tbl = tbl
|
|
120
|
+
return col
|
|
121
|
+
|
|
122
|
+
def __hash__(self) -> int:
|
|
123
|
+
assert self.tbl is not None
|
|
124
|
+
return hash((self.tbl.id, self.id))
|
|
125
|
+
|
|
126
|
+
def check_value_expr(self) -> None:
|
|
127
|
+
assert self.value_expr is not None
|
|
128
|
+
if self.stored == False and self.is_computed and self.has_window_fn_call():
|
|
129
|
+
raise excs.Error(
|
|
130
|
+
f'Column {self.name}: stored={self.stored} not supported for columns computed with window functions:'
|
|
131
|
+
f'\n{self.value_expr}')
|
|
132
|
+
|
|
133
|
+
def has_window_fn_call(self) -> bool:
|
|
134
|
+
if self.value_expr is None:
|
|
135
|
+
return False
|
|
136
|
+
from pixeltable import exprs
|
|
137
|
+
l = list(self.value_expr.subexprs(filter=lambda e: isinstance(e, exprs.FunctionCall) and e.is_window_fn_call))
|
|
138
|
+
return len(l) > 0
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def is_computed(self) -> bool:
|
|
142
|
+
return self.compute_func is not None or self.value_expr is not None
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def is_stored(self) -> bool:
|
|
146
|
+
"""Returns True if column is materialized in the stored table."""
|
|
147
|
+
assert self.stored is not None
|
|
148
|
+
return self.stored
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def records_errors(self) -> bool:
|
|
152
|
+
"""True if this column also stores error information."""
|
|
153
|
+
return self.is_stored and (self.is_computed or self.col_type.is_media_type())
|
|
154
|
+
|
|
155
|
+
def source(self) -> None:
|
|
156
|
+
"""
|
|
157
|
+
If this is a computed col and the top-level expr is a function call, print the source, if possible.
|
|
158
|
+
"""
|
|
159
|
+
from pixeltable import exprs
|
|
160
|
+
if self.value_expr is None or not isinstance(self.value_expr, exprs.FunctionCall):
|
|
161
|
+
return
|
|
162
|
+
self.value_expr.fn.source()
|
|
163
|
+
|
|
164
|
+
def create_sa_cols(self) -> None:
|
|
165
|
+
"""
|
|
166
|
+
These need to be recreated for every new table schema version.
|
|
167
|
+
"""
|
|
168
|
+
assert self.is_stored
|
|
169
|
+
# all storage columns are nullable (we deal with null errors in Pixeltable directly)
|
|
170
|
+
self.sa_col = sql.Column(self.storage_name(), self.col_type.to_sa_type(), nullable=True)
|
|
171
|
+
if self.is_computed or self.col_type.is_media_type():
|
|
172
|
+
self.sa_errormsg_col = sql.Column(self.errormsg_storage_name(), StringType().to_sa_type(), nullable=True)
|
|
173
|
+
self.sa_errortype_col = sql.Column(self.errortype_storage_name(), StringType().to_sa_type(), nullable=True)
|
|
174
|
+
if self.is_indexed:
|
|
175
|
+
self.sa_idx_col = sql.Column(self.index_storage_name(), Vector(512), nullable=True)
|
|
176
|
+
|
|
177
|
+
def storage_name(self) -> str:
|
|
178
|
+
assert self.id is not None
|
|
179
|
+
assert self.is_stored
|
|
180
|
+
return f'col_{self.id}'
|
|
181
|
+
|
|
182
|
+
def errormsg_storage_name(self) -> str:
|
|
183
|
+
return f'{self.storage_name()}_errormsg'
|
|
184
|
+
|
|
185
|
+
def errortype_storage_name(self) -> str:
|
|
186
|
+
return f'{self.storage_name()}_errortype'
|
|
187
|
+
|
|
188
|
+
def index_storage_name(self) -> str:
|
|
189
|
+
return f'{self.storage_name()}_idx_0'
|
|
190
|
+
|
|
191
|
+
def __str__(self) -> str:
|
|
192
|
+
return f'{self.name}: {self.col_type}'
|
|
193
|
+
|
|
194
|
+
def __eq__(self, other: object) -> bool:
|
|
195
|
+
if not isinstance(other, Column):
|
|
196
|
+
return False
|
|
197
|
+
assert self.tbl is not None
|
|
198
|
+
assert other.tbl is not None
|
|
199
|
+
return self.tbl.id == other.tbl.id and self.id == other.id
|
|
200
|
+
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import logging
|
|
5
|
+
from uuid import UUID
|
|
6
|
+
|
|
7
|
+
import sqlalchemy as sql
|
|
8
|
+
|
|
9
|
+
from .schema_object import SchemaObject
|
|
10
|
+
from pixeltable.env import Env
|
|
11
|
+
from pixeltable.metadata import schema
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
_logger = logging.getLogger('pixeltable')
|
|
15
|
+
|
|
16
|
+
class Dir(SchemaObject):
|
|
17
|
+
def __init__(self, id: UUID, parent_id: UUID, name: str):
|
|
18
|
+
super().__init__(id, name, parent_id)
|
|
19
|
+
|
|
20
|
+
@classmethod
|
|
21
|
+
def display_name(cls) -> str:
|
|
22
|
+
return 'directory'
|
|
23
|
+
|
|
24
|
+
def move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
25
|
+
super().move(new_name, new_dir_id)
|
|
26
|
+
with Env.get().engine.begin() as conn:
|
|
27
|
+
dir_md = schema.DirMd(name=new_name)
|
|
28
|
+
conn.execute(
|
|
29
|
+
sql.update(schema.Dir.__table__)
|
|
30
|
+
.values({schema.Dir.parent_id: self._dir_id, schema.Dir.md: dataclasses.asdict(dir_md)})
|
|
31
|
+
.where(schema.Dir.id == self._id))
|
|
32
|
+
|