pixeltable 0.2.21__tar.gz → 0.2.22__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- {pixeltable-0.2.21 → pixeltable-0.2.22}/PKG-INFO +46 -10
- {pixeltable-0.2.21 → pixeltable-0.2.22}/README.md +44 -8
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/__version__.py +2 -2
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/__init__.py +1 -1
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/column.py +37 -11
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/globals.py +18 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/insertable_table.py +6 -4
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/table.py +19 -3
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/table_version.py +34 -14
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/view.py +16 -17
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/dataframe.py +7 -8
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/env.py +5 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/__init__.py +0 -1
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/aggregation_node.py +6 -3
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/cache_prefetch_node.py +1 -1
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/data_row_batch.py +2 -19
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/exec_node.py +2 -1
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/expr_eval_node.py +17 -10
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/in_memory_data_node.py +6 -3
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/sql_node.py +24 -25
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/arithmetic_expr.py +3 -1
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/array_slice.py +7 -7
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/column_property_ref.py +37 -10
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/column_ref.py +93 -14
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/comparison.py +5 -5
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/compound_predicate.py +8 -7
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/data_row.py +27 -18
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/expr.py +53 -52
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/expr_set.py +5 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/function_call.py +32 -16
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/globals.py +4 -1
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/in_predicate.py +8 -7
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/inline_expr.py +4 -4
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/is_null.py +4 -4
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/json_mapper.py +11 -12
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/json_path.py +5 -10
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/literal.py +5 -5
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/method_ref.py +5 -4
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/object_ref.py +2 -1
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/row_builder.py +88 -36
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/rowid_ref.py +12 -11
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/similarity_expr.py +12 -7
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/sql_element_cache.py +7 -5
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/type_cast.py +8 -6
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/variable.py +5 -4
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/aggregate_function.py +1 -1
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/function.py +11 -10
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/__init__.py +2 -2
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/globals.py +5 -7
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/huggingface.py +19 -20
- pixeltable-0.2.22/pixeltable/functions/llama_cpp.py +106 -0
- pixeltable-0.2.22/pixeltable/functions/ollama.py +147 -0
- pixeltable-0.2.22/pixeltable/functions/replicate.py +72 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/string.py +9 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/globals.py +12 -20
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/index/btree.py +16 -3
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/index/embedding_index.py +4 -4
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/io/__init__.py +1 -2
- pixeltable-0.2.22/pixeltable/io/fiftyone.py +178 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/io/globals.py +96 -2
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/iterators/base.py +3 -2
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/iterators/document.py +1 -1
- pixeltable-0.2.22/pixeltable/iterators/video.py +194 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/__init__.py +1 -1
- pixeltable-0.2.22/pixeltable/metadata/converters/convert_21.py +34 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/util.py +45 -4
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/notes.py +1 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/schema.py +8 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/plan.py +16 -14
- pixeltable-0.2.22/pixeltable/py.typed +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/store.py +7 -2
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/tool/create_test_video.py +1 -1
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/tool/embed_udf.py +1 -1
- pixeltable-0.2.22/pixeltable/tool/mypy_plugin.py +55 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/type_system.py +17 -1
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/documents.py +15 -1
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/formatter.py +9 -10
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pyproject.toml +12 -8
- pixeltable-0.2.21/pixeltable/exec/media_validation_node.py +0 -43
- pixeltable-0.2.21/pixeltable/iterators/video.py +0 -137
- pixeltable-0.2.21/pixeltable/tool/mypy_plugin.py +0 -32
- {pixeltable-0.2.21 → pixeltable-0.2.22}/LICENSE +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/__init__.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/catalog.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/dir.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/named_function.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/path.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/path_dict.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/schema_object.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/table_version_path.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exceptions.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/component_iteration_node.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/exec_context.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/row_update_node.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/__init__.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/expr_dict.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/ext/__init__.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/ext/functions/__init__.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/ext/functions/whisperx.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/ext/functions/yolox.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/__init__.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/callable_function.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/expr_template_function.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/function_registry.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/globals.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/query_template_function.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/signature.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/udf.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/anthropic.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/audio.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/fireworks.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/image.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/json.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/mistralai.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/openai.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/timestamp.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/together.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/util.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/video.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/vision.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/whisper.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/index/__init__.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/index/base.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/io/external_store.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/io/hf_datasets.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/io/label_studio.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/io/pandas.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/io/parquet.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/iterators/__init__.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/iterators/string.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_10.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_12.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_13.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_14.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_15.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_16.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_17.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_18.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_19.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_20.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/tool/create_test_db_dump.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/tool/doc_plugins/griffe.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/tool/doc_plugins/mkdocstrings.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/__init__.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/arrow.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/coco.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/code.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/filecache.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/http_server.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/media_store.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/pytorch.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/s3.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/sql.py +0 -0
- {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/transactional_directory.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pixeltable
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.22
|
|
4
4
|
Summary: Pixeltable: The Multimodal AI Data Plane
|
|
5
5
|
Author: Pixeltable, Inc.
|
|
6
6
|
Author-email: contact@pixeltable.com
|
|
@@ -16,9 +16,9 @@ Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
|
|
|
16
16
|
Requires-Dist: ftfy (>=6.2.0,<7.0.0)
|
|
17
17
|
Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
|
|
18
18
|
Requires-Dist: jmespath (>=1.0.1,<2.0.0)
|
|
19
|
+
Requires-Dist: lxml (>=5.0)
|
|
19
20
|
Requires-Dist: more-itertools (>=10.2,<11.0)
|
|
20
21
|
Requires-Dist: numpy (>=1.25,<2.0)
|
|
21
|
-
Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
|
|
22
22
|
Requires-Dist: pandas (>=2.0,<3.0)
|
|
23
23
|
Requires-Dist: pgvector (>=0.2.1,<0.3.0)
|
|
24
24
|
Requires-Dist: pillow (>=9.3.0)
|
|
@@ -39,6 +39,8 @@ Description-Content-Type: text/markdown
|
|
|
39
39
|
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="50%" />
|
|
40
40
|
<br></br>
|
|
41
41
|
|
|
42
|
+
<h2>AI Data Insfrastructure — Declarative, Multimodal, and Incremental</h2>
|
|
43
|
+
|
|
42
44
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
43
45
|

|
|
44
46
|

|
|
@@ -46,9 +48,10 @@ Description-Content-Type: text/markdown
|
|
|
46
48
|
[](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml)
|
|
47
49
|
[](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml)
|
|
48
50
|
[](https://pypi.org/project/pixeltable/)
|
|
49
|
-
|
|
51
|
+
[](https://app.fossa.com/projects/git%2Bgithub.com%2Fpixeltable%2Fpixeltable?ref=badge_shield&issueType=security)
|
|
52
|
+
<a target="_blank" href="https://huggingface.co/Pixeltable"> <img src="https://img.shields.io/badge/🤗-HF Space-FF7D04" alt="Visit our Hugging Face space"/></a>
|
|
50
53
|
|
|
51
|
-
[Installation](https://pixeltable.
|
|
54
|
+
[Installation](https://docs.pixeltable.com/docs/installation) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://github.com/pixeltable/pixeltable?tab=readme-ov-file#-code-samples) | [Computer Vision](https://docs.pixeltable.com/docs/object-detection-in-videos) | [LLM](https://docs.pixeltable.com/docs/document-indexing-and-rag)
|
|
52
55
|
</div>
|
|
53
56
|
|
|
54
57
|
Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to **store**, **transform**, **index**, and **iterate** on data for their ML workflows.
|
|
@@ -74,8 +77,9 @@ Learn how to create tables, populate them with data, and enhance them with built
|
|
|
74
77
|
|:----------|:-----------------|:-------------------------|:---------------------------------:|
|
|
75
78
|
| 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Tables and Data Operations | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
76
79
|
| User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
77
|
-
|
|
|
78
|
-
| Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/
|
|
80
|
+
| Incremental Prompt Engineering | <a target="_blank" href="https://colab.research.google.com/github/mistralai/cookbook/blob/main/third_party/Pixeltable/incremental_prompt_engineering_and_model_comparison.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Github"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
81
|
+
| Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Documentation-013056" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
|
82
|
+
| Multimodal Application | <a target="_blank" href="https://huggingface.co/spaces/Pixeltable/Multimodal-Powerhouse"> <img src="https://img.shields.io/badge/Hugging Face-FF7D04" alt="Visit our documentation"/></a> | Document Indexing and RAG | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/rag-demo.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
|
79
83
|
|
|
80
84
|
## 🧱 Code Samples
|
|
81
85
|
|
|
@@ -83,7 +87,7 @@ Learn how to create tables, populate them with data, and enhance them with built
|
|
|
83
87
|
```python
|
|
84
88
|
import pixeltable as pxt
|
|
85
89
|
|
|
86
|
-
v = pxt.create_table('external_data.videos', {'video': pxt.
|
|
90
|
+
v = pxt.create_table('external_data.videos', {'video': pxt.Video})
|
|
87
91
|
|
|
88
92
|
prefix = 's3://multimedia-commons/'
|
|
89
93
|
paths = [
|
|
@@ -101,7 +105,7 @@ import pixeltable as pxt
|
|
|
101
105
|
from pixeltable.functions import huggingface
|
|
102
106
|
|
|
103
107
|
# Create a table to store data persistently
|
|
104
|
-
t = pxt.create_table('image', {'image': pxt.
|
|
108
|
+
t = pxt.create_table('image', {'image': pxt.Image})
|
|
105
109
|
|
|
106
110
|
# Insert some images
|
|
107
111
|
prefix = 'https://upload.wikimedia.org/wikipedia/commons'
|
|
@@ -158,7 +162,7 @@ Learn how to leverage Pixeltable for [Model analytics](https://pixeltable.readme
|
|
|
158
162
|
|
|
159
163
|
### Working with inference services
|
|
160
164
|
```python
|
|
161
|
-
chat_table = pxt.create_table('together_demo.chat', {'input': pxt.
|
|
165
|
+
chat_table = pxt.create_table('together_demo.chat', {'input': pxt.String})
|
|
162
166
|
|
|
163
167
|
# The chat-completions API expects JSON-formatted input:
|
|
164
168
|
messages = [{'role': 'user', 'content': chat_table.input}]
|
|
@@ -194,7 +198,7 @@ from pixeltable.functions.huggingface import clip_image, clip_text
|
|
|
194
198
|
from pixeltable.iterators import FrameIterator
|
|
195
199
|
import PIL.Image
|
|
196
200
|
|
|
197
|
-
video_table = pxt.create_table('videos', {'video': pxt.
|
|
201
|
+
video_table = pxt.create_table('videos', {'video': pxt.Video})
|
|
198
202
|
|
|
199
203
|
video_table.insert([{'video': '/video.mp4'}])
|
|
200
204
|
|
|
@@ -225,6 +229,38 @@ frames_view.order_by(sim, asc=False).limit(5).select(frames_view.frame, sim=sim)
|
|
|
225
229
|
```
|
|
226
230
|
Learn how to work with [Embedding and Vector Indexes](https://docs.pixeltable.com/docs/embedding-vector-indexes).
|
|
227
231
|
|
|
232
|
+
## 🔄 AI Stack Comparison
|
|
233
|
+
|
|
234
|
+
### 🎯 Computer Vision Workflows
|
|
235
|
+
|
|
236
|
+
| Requirement | Traditional | Pixeltable |
|
|
237
|
+
|-------------|---------------------|------------|
|
|
238
|
+
| Frame Extraction | ffmpeg + custom code | Automatic via FrameIterator |
|
|
239
|
+
| Object Detection | Multiple scripts + caching | Single computed column |
|
|
240
|
+
| Video Indexing | Custom pipelines + Vector DB | Native similarity search |
|
|
241
|
+
| Annotation Management | Separate tools + custom code | Label Studio integration |
|
|
242
|
+
| Model Evaluation | Custom metrics pipeline | Built-in mAP computation |
|
|
243
|
+
|
|
244
|
+
### 🤖 LLM Workflows
|
|
245
|
+
|
|
246
|
+
| Requirement | Traditional | Pixeltable |
|
|
247
|
+
|-------------|---------------------|------------|
|
|
248
|
+
| Document Chunking | Tool + custom code | Native DocumentSplitter |
|
|
249
|
+
| Embedding Generation | Separate pipeline + caching | Computed columns |
|
|
250
|
+
| Vector Search | External vector DB | Built-in vector indexing |
|
|
251
|
+
| Prompt Management | Custom tracking solution | Version-controlled columns |
|
|
252
|
+
| Chain Management | Tool + custom code | Computed column DAGs |
|
|
253
|
+
|
|
254
|
+
### 🎨 Multimodal Workflows
|
|
255
|
+
|
|
256
|
+
| Requirement | Traditional | Pixeltable |
|
|
257
|
+
|-------------|---------------------|------------|
|
|
258
|
+
| Data Types | Multiple storage systems | Unified table interface |
|
|
259
|
+
| Cross-Modal Search | Complex integration | Native similarity support |
|
|
260
|
+
| Pipeline Orchestration | Multiple tools (Airflow, etc.) | Single declarative interface |
|
|
261
|
+
| Asset Management | Custom tracking system | Automatic lineage |
|
|
262
|
+
| Quality Control | Multiple validation tools | Computed validation columns |
|
|
263
|
+
|
|
228
264
|
## ❓ FAQ
|
|
229
265
|
|
|
230
266
|
### What is Pixeltable?
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
<img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="50%" />
|
|
3
3
|
<br></br>
|
|
4
4
|
|
|
5
|
+
<h2>AI Data Insfrastructure — Declarative, Multimodal, and Incremental</h2>
|
|
6
|
+
|
|
5
7
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
6
8
|

|
|
7
9
|

|
|
@@ -9,9 +11,10 @@
|
|
|
9
11
|
[](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml)
|
|
10
12
|
[](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml)
|
|
11
13
|
[](https://pypi.org/project/pixeltable/)
|
|
12
|
-
|
|
14
|
+
[](https://app.fossa.com/projects/git%2Bgithub.com%2Fpixeltable%2Fpixeltable?ref=badge_shield&issueType=security)
|
|
15
|
+
<a target="_blank" href="https://huggingface.co/Pixeltable"> <img src="https://img.shields.io/badge/🤗-HF Space-FF7D04" alt="Visit our Hugging Face space"/></a>
|
|
13
16
|
|
|
14
|
-
[Installation](https://pixeltable.
|
|
17
|
+
[Installation](https://docs.pixeltable.com/docs/installation) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://github.com/pixeltable/pixeltable?tab=readme-ov-file#-code-samples) | [Computer Vision](https://docs.pixeltable.com/docs/object-detection-in-videos) | [LLM](https://docs.pixeltable.com/docs/document-indexing-and-rag)
|
|
15
18
|
</div>
|
|
16
19
|
|
|
17
20
|
Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to **store**, **transform**, **index**, and **iterate** on data for their ML workflows.
|
|
@@ -37,8 +40,9 @@ Learn how to create tables, populate them with data, and enhance them with built
|
|
|
37
40
|
|:----------|:-----------------|:-------------------------|:---------------------------------:|
|
|
38
41
|
| 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Tables and Data Operations | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
39
42
|
| User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
40
|
-
|
|
|
41
|
-
| Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/
|
|
43
|
+
| Incremental Prompt Engineering | <a target="_blank" href="https://colab.research.google.com/github/mistralai/cookbook/blob/main/third_party/Pixeltable/incremental_prompt_engineering_and_model_comparison.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Github"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
|
|
44
|
+
| Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Documentation-013056" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
|
45
|
+
| Multimodal Application | <a target="_blank" href="https://huggingface.co/spaces/Pixeltable/Multimodal-Powerhouse"> <img src="https://img.shields.io/badge/Hugging Face-FF7D04" alt="Visit our documentation"/></a> | Document Indexing and RAG | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/rag-demo.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
|
|
42
46
|
|
|
43
47
|
## 🧱 Code Samples
|
|
44
48
|
|
|
@@ -46,7 +50,7 @@ Learn how to create tables, populate them with data, and enhance them with built
|
|
|
46
50
|
```python
|
|
47
51
|
import pixeltable as pxt
|
|
48
52
|
|
|
49
|
-
v = pxt.create_table('external_data.videos', {'video': pxt.
|
|
53
|
+
v = pxt.create_table('external_data.videos', {'video': pxt.Video})
|
|
50
54
|
|
|
51
55
|
prefix = 's3://multimedia-commons/'
|
|
52
56
|
paths = [
|
|
@@ -64,7 +68,7 @@ import pixeltable as pxt
|
|
|
64
68
|
from pixeltable.functions import huggingface
|
|
65
69
|
|
|
66
70
|
# Create a table to store data persistently
|
|
67
|
-
t = pxt.create_table('image', {'image': pxt.
|
|
71
|
+
t = pxt.create_table('image', {'image': pxt.Image})
|
|
68
72
|
|
|
69
73
|
# Insert some images
|
|
70
74
|
prefix = 'https://upload.wikimedia.org/wikipedia/commons'
|
|
@@ -121,7 +125,7 @@ Learn how to leverage Pixeltable for [Model analytics](https://pixeltable.readme
|
|
|
121
125
|
|
|
122
126
|
### Working with inference services
|
|
123
127
|
```python
|
|
124
|
-
chat_table = pxt.create_table('together_demo.chat', {'input': pxt.
|
|
128
|
+
chat_table = pxt.create_table('together_demo.chat', {'input': pxt.String})
|
|
125
129
|
|
|
126
130
|
# The chat-completions API expects JSON-formatted input:
|
|
127
131
|
messages = [{'role': 'user', 'content': chat_table.input}]
|
|
@@ -157,7 +161,7 @@ from pixeltable.functions.huggingface import clip_image, clip_text
|
|
|
157
161
|
from pixeltable.iterators import FrameIterator
|
|
158
162
|
import PIL.Image
|
|
159
163
|
|
|
160
|
-
video_table = pxt.create_table('videos', {'video': pxt.
|
|
164
|
+
video_table = pxt.create_table('videos', {'video': pxt.Video})
|
|
161
165
|
|
|
162
166
|
video_table.insert([{'video': '/video.mp4'}])
|
|
163
167
|
|
|
@@ -188,6 +192,38 @@ frames_view.order_by(sim, asc=False).limit(5).select(frames_view.frame, sim=sim)
|
|
|
188
192
|
```
|
|
189
193
|
Learn how to work with [Embedding and Vector Indexes](https://docs.pixeltable.com/docs/embedding-vector-indexes).
|
|
190
194
|
|
|
195
|
+
## 🔄 AI Stack Comparison
|
|
196
|
+
|
|
197
|
+
### 🎯 Computer Vision Workflows
|
|
198
|
+
|
|
199
|
+
| Requirement | Traditional | Pixeltable |
|
|
200
|
+
|-------------|---------------------|------------|
|
|
201
|
+
| Frame Extraction | ffmpeg + custom code | Automatic via FrameIterator |
|
|
202
|
+
| Object Detection | Multiple scripts + caching | Single computed column |
|
|
203
|
+
| Video Indexing | Custom pipelines + Vector DB | Native similarity search |
|
|
204
|
+
| Annotation Management | Separate tools + custom code | Label Studio integration |
|
|
205
|
+
| Model Evaluation | Custom metrics pipeline | Built-in mAP computation |
|
|
206
|
+
|
|
207
|
+
### 🤖 LLM Workflows
|
|
208
|
+
|
|
209
|
+
| Requirement | Traditional | Pixeltable |
|
|
210
|
+
|-------------|---------------------|------------|
|
|
211
|
+
| Document Chunking | Tool + custom code | Native DocumentSplitter |
|
|
212
|
+
| Embedding Generation | Separate pipeline + caching | Computed columns |
|
|
213
|
+
| Vector Search | External vector DB | Built-in vector indexing |
|
|
214
|
+
| Prompt Management | Custom tracking solution | Version-controlled columns |
|
|
215
|
+
| Chain Management | Tool + custom code | Computed column DAGs |
|
|
216
|
+
|
|
217
|
+
### 🎨 Multimodal Workflows
|
|
218
|
+
|
|
219
|
+
| Requirement | Traditional | Pixeltable |
|
|
220
|
+
|-------------|---------------------|------------|
|
|
221
|
+
| Data Types | Multiple storage systems | Unified table interface |
|
|
222
|
+
| Cross-Modal Search | Complex integration | Native similarity support |
|
|
223
|
+
| Pipeline Orchestration | Multiple tools (Airflow, etc.) | Single declarative interface |
|
|
224
|
+
| Asset Management | Custom tracking system | Automatic lineage |
|
|
225
|
+
| Quality Control | Multiple validation tools | Computed validation columns |
|
|
226
|
+
|
|
191
227
|
## ❓ FAQ
|
|
192
228
|
|
|
193
229
|
### What is Pixeltable?
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
# These version placeholders will be replaced during build.
|
|
2
|
-
__version__ = "0.2.
|
|
3
|
-
__version_tuple__ = (0, 2,
|
|
2
|
+
__version__ = "0.2.22"
|
|
3
|
+
__version_tuple__ = (0, 2, 22)
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from .catalog import Catalog
|
|
2
2
|
from .column import Column
|
|
3
3
|
from .dir import Dir
|
|
4
|
-
from .globals import UpdateStatus, is_valid_identifier, is_valid_path
|
|
4
|
+
from .globals import UpdateStatus, is_valid_identifier, is_valid_path, MediaValidation
|
|
5
5
|
from .insertable_table import InsertableTable
|
|
6
6
|
from .named_function import NamedFunction
|
|
7
7
|
from .path import Path
|
|
@@ -8,24 +8,43 @@ import sqlalchemy as sql
|
|
|
8
8
|
import pixeltable.exceptions as excs
|
|
9
9
|
import pixeltable.type_system as ts
|
|
10
10
|
from pixeltable import exprs
|
|
11
|
-
|
|
12
|
-
from .globals import is_valid_identifier
|
|
11
|
+
from .globals import is_valid_identifier, MediaValidation
|
|
13
12
|
|
|
14
13
|
if TYPE_CHECKING:
|
|
15
14
|
from .table_version import TableVersion
|
|
16
15
|
|
|
17
16
|
_logger = logging.getLogger('pixeltable')
|
|
18
17
|
|
|
18
|
+
|
|
19
19
|
class Column:
|
|
20
20
|
"""Representation of a column in the schema of a Table/DataFrame.
|
|
21
21
|
|
|
22
22
|
A Column contains all the metadata necessary for executing queries and updates against a particular version of a
|
|
23
23
|
table/view.
|
|
24
24
|
"""
|
|
25
|
+
name: str
|
|
26
|
+
id: Optional[int]
|
|
27
|
+
col_type: ts.ColumnType
|
|
28
|
+
stored: bool
|
|
29
|
+
is_pk: bool
|
|
30
|
+
_media_validation: Optional[MediaValidation] # if not set, TableVersion.media_validation applies
|
|
31
|
+
schema_version_add: Optional[int]
|
|
32
|
+
schema_version_drop: Optional[int]
|
|
33
|
+
_records_errors: Optional[bool]
|
|
34
|
+
sa_col: Optional[sql.schema.Column]
|
|
35
|
+
sa_col_type: Optional[sql.sqltypes.TypeEngine]
|
|
36
|
+
sa_errormsg_col: Optional[sql.schema.Column]
|
|
37
|
+
sa_errortype_col: Optional[sql.schema.Column]
|
|
38
|
+
compute_func: Optional[Callable]
|
|
39
|
+
_value_expr: Optional[exprs.Expr]
|
|
40
|
+
value_expr_dict: Optional[dict[str, Any]]
|
|
41
|
+
dependent_cols: set[Column]
|
|
42
|
+
tbl: Optional[TableVersion]
|
|
43
|
+
|
|
25
44
|
def __init__(
|
|
26
45
|
self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
|
|
27
46
|
computed_with: Optional[Union[exprs.Expr, Callable]] = None,
|
|
28
|
-
is_pk: bool = False, stored: bool = True,
|
|
47
|
+
is_pk: bool = False, stored: bool = True, media_validation: Optional[MediaValidation] = None,
|
|
29
48
|
col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
|
|
30
49
|
schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
|
|
31
50
|
records_errors: Optional[bool] = None, value_expr_dict: Optional[dict[str, Any]] = None,
|
|
@@ -61,8 +80,8 @@ class Column:
|
|
|
61
80
|
if col_type is None and computed_with is None:
|
|
62
81
|
raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
|
|
63
82
|
|
|
64
|
-
self._value_expr
|
|
65
|
-
self.compute_func
|
|
83
|
+
self._value_expr = None
|
|
84
|
+
self.compute_func = None
|
|
66
85
|
self.value_expr_dict = value_expr_dict
|
|
67
86
|
if computed_with is not None:
|
|
68
87
|
value_expr = exprs.Expr.from_object(computed_with)
|
|
@@ -86,24 +105,24 @@ class Column:
|
|
|
86
105
|
assert self.col_type is not None
|
|
87
106
|
|
|
88
107
|
self.stored = stored
|
|
89
|
-
self.dependent_cols
|
|
108
|
+
self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
|
|
90
109
|
self.id = col_id
|
|
91
110
|
self.is_pk = is_pk
|
|
111
|
+
self._media_validation = media_validation
|
|
92
112
|
self.schema_version_add = schema_version_add
|
|
93
113
|
self.schema_version_drop = schema_version_drop
|
|
94
114
|
|
|
95
115
|
self._records_errors = records_errors
|
|
96
116
|
|
|
97
117
|
# column in the stored table for the values of this Column
|
|
98
|
-
self.sa_col
|
|
118
|
+
self.sa_col = None
|
|
99
119
|
self.sa_col_type = sa_col_type
|
|
100
120
|
|
|
101
121
|
# computed cols also have storage columns for the exception string and type
|
|
102
|
-
self.sa_errormsg_col
|
|
103
|
-
self.sa_errortype_col
|
|
122
|
+
self.sa_errormsg_col = None
|
|
123
|
+
self.sa_errortype_col = None
|
|
104
124
|
|
|
105
|
-
|
|
106
|
-
self.tbl: Optional[TableVersion] = None # set by owning TableVersion
|
|
125
|
+
self.tbl = None # set by owning TableVersion
|
|
107
126
|
|
|
108
127
|
@property
|
|
109
128
|
def value_expr(self) -> Optional[exprs.Expr]:
|
|
@@ -160,6 +179,13 @@ class Column:
|
|
|
160
179
|
assert self.tbl is not None
|
|
161
180
|
return f'{self.tbl.name}.{self.name}'
|
|
162
181
|
|
|
182
|
+
@property
|
|
183
|
+
def media_validation(self) -> MediaValidation:
|
|
184
|
+
if self._media_validation is not None:
|
|
185
|
+
return self._media_validation
|
|
186
|
+
assert self.tbl is not None
|
|
187
|
+
return self.tbl.media_validation
|
|
188
|
+
|
|
163
189
|
def source(self) -> None:
|
|
164
190
|
"""
|
|
165
191
|
If this is a computed col and the top-level expr is a function call, print the source, if possible.
|
|
@@ -1,8 +1,12 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
1
2
|
import dataclasses
|
|
3
|
+
import enum
|
|
2
4
|
import itertools
|
|
3
5
|
import logging
|
|
4
6
|
from typing import Optional
|
|
5
7
|
|
|
8
|
+
import pixeltable.exceptions as excs
|
|
9
|
+
|
|
6
10
|
_logger = logging.getLogger('pixeltable')
|
|
7
11
|
|
|
8
12
|
# name of the position column in a component view
|
|
@@ -34,6 +38,20 @@ class UpdateStatus:
|
|
|
34
38
|
self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
|
|
35
39
|
return self
|
|
36
40
|
|
|
41
|
+
|
|
42
|
+
class MediaValidation(enum.Enum):
|
|
43
|
+
ON_READ = 0
|
|
44
|
+
ON_WRITE = 1
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def validated(cls, name: str, error_prefix: str) -> MediaValidation:
|
|
48
|
+
try:
|
|
49
|
+
return cls[name.upper()]
|
|
50
|
+
except KeyError:
|
|
51
|
+
val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__.keys())
|
|
52
|
+
raise excs.Error(f'{error_prefix} must be one of: [{val_strs}]')
|
|
53
|
+
|
|
54
|
+
|
|
37
55
|
def is_valid_identifier(name: str) -> bool:
|
|
38
56
|
return name.isidentifier() and not name.startswith('_')
|
|
39
57
|
|
|
@@ -13,7 +13,7 @@ from pixeltable.env import Env
|
|
|
13
13
|
from pixeltable.utils.filecache import FileCache
|
|
14
14
|
|
|
15
15
|
from .catalog import Catalog
|
|
16
|
-
from .globals import UpdateStatus
|
|
16
|
+
from .globals import UpdateStatus, MediaValidation
|
|
17
17
|
from .table import Table
|
|
18
18
|
from .table_version import TableVersion
|
|
19
19
|
from .table_version_path import TableVersionPath
|
|
@@ -35,8 +35,8 @@ class InsertableTable(Table):
|
|
|
35
35
|
# MODULE-LOCAL, NOT PUBLIC
|
|
36
36
|
@classmethod
|
|
37
37
|
def _create(
|
|
38
|
-
|
|
39
|
-
|
|
38
|
+
cls, dir_id: UUID, name: str, schema: dict[str, ts.ColumnType], df: Optional[pxt.DataFrame],
|
|
39
|
+
primary_key: List[str], num_retained_versions: int, comment: str, media_validation: MediaValidation
|
|
40
40
|
) -> InsertableTable:
|
|
41
41
|
columns = cls._create_columns(schema)
|
|
42
42
|
cls._verify_schema(columns)
|
|
@@ -50,7 +50,9 @@ class InsertableTable(Table):
|
|
|
50
50
|
col.is_pk = True
|
|
51
51
|
|
|
52
52
|
with orm.Session(Env.get().engine, future=True) as session:
|
|
53
|
-
_, tbl_version = TableVersion.create(
|
|
53
|
+
_, tbl_version = TableVersion.create(
|
|
54
|
+
session, dir_id, name, columns, num_retained_versions=num_retained_versions, comment=comment,
|
|
55
|
+
media_validation=media_validation)
|
|
54
56
|
tbl = cls(dir_id, tbl_version)
|
|
55
57
|
# TODO We need to commit before doing the insertion, in order to avoid a primary key (version) collision
|
|
56
58
|
# when the table metadata gets updated. Once we have a notion of user-defined transactions in
|
|
@@ -24,7 +24,7 @@ import pixeltable.type_system as ts
|
|
|
24
24
|
from pixeltable.utils.filecache import FileCache
|
|
25
25
|
|
|
26
26
|
from .column import Column
|
|
27
|
-
from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier
|
|
27
|
+
from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier, MediaValidation
|
|
28
28
|
from .schema_object import SchemaObject
|
|
29
29
|
from .table_version import TableVersion
|
|
30
30
|
from .table_version_path import TableVersionPath
|
|
@@ -91,6 +91,7 @@ class Table(SchemaObject):
|
|
|
91
91
|
'num_retained_versions': 10,
|
|
92
92
|
'is_view': False,
|
|
93
93
|
'is_snapshot': False,
|
|
94
|
+
'media_validation': 'on_write',
|
|
94
95
|
}
|
|
95
96
|
```
|
|
96
97
|
"""
|
|
@@ -101,6 +102,7 @@ class Table(SchemaObject):
|
|
|
101
102
|
md['schema_version'] = self._tbl_version.schema_version
|
|
102
103
|
md['comment'] = self._comment
|
|
103
104
|
md['num_retained_versions'] = self._num_retained_versions
|
|
105
|
+
md['media_validation'] = self._media_validation.name.lower()
|
|
104
106
|
return md
|
|
105
107
|
|
|
106
108
|
@property
|
|
@@ -244,6 +246,10 @@ class Table(SchemaObject):
|
|
|
244
246
|
def _num_retained_versions(self):
|
|
245
247
|
return self._tbl_version.num_retained_versions
|
|
246
248
|
|
|
249
|
+
@property
|
|
250
|
+
def _media_validation(self) -> MediaValidation:
|
|
251
|
+
return self._tbl_version.media_validation
|
|
252
|
+
|
|
247
253
|
def _description(self) -> pd.DataFrame:
|
|
248
254
|
cols = self._tbl_version_path.columns()
|
|
249
255
|
df = pd.DataFrame({
|
|
@@ -422,7 +428,7 @@ class Table(SchemaObject):
|
|
|
422
428
|
(on account of containing Python Callables or Exprs).
|
|
423
429
|
"""
|
|
424
430
|
assert isinstance(spec, dict)
|
|
425
|
-
valid_keys = {'type', 'value', 'stored'}
|
|
431
|
+
valid_keys = {'type', 'value', 'stored', 'media_validation'}
|
|
426
432
|
has_type = False
|
|
427
433
|
for k in spec.keys():
|
|
428
434
|
if k not in valid_keys:
|
|
@@ -449,6 +455,9 @@ class Table(SchemaObject):
|
|
|
449
455
|
if 'type' in spec:
|
|
450
456
|
raise excs.Error(f'Column {name}: "type" is redundant if value is a Pixeltable expression')
|
|
451
457
|
|
|
458
|
+
if 'media_validation' in spec:
|
|
459
|
+
_ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name}: media_validation')
|
|
460
|
+
|
|
452
461
|
if 'stored' in spec and not isinstance(spec['stored'], bool):
|
|
453
462
|
raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
|
|
454
463
|
if not has_type:
|
|
@@ -462,6 +471,7 @@ class Table(SchemaObject):
|
|
|
462
471
|
col_type: Optional[ts.ColumnType] = None
|
|
463
472
|
value_expr: Optional[exprs.Expr] = None
|
|
464
473
|
primary_key: Optional[bool] = None
|
|
474
|
+
media_validation: Optional[catalog.MediaValidation] = None
|
|
465
475
|
stored = True
|
|
466
476
|
|
|
467
477
|
if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
|
|
@@ -484,9 +494,15 @@ class Table(SchemaObject):
|
|
|
484
494
|
value_expr = value_expr.copy()
|
|
485
495
|
stored = spec.get('stored', True)
|
|
486
496
|
primary_key = spec.get('primary_key')
|
|
497
|
+
media_validation_str = spec.get('media_validation')
|
|
498
|
+
media_validation = (
|
|
499
|
+
catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None
|
|
500
|
+
else None
|
|
501
|
+
)
|
|
487
502
|
|
|
488
503
|
column = Column(
|
|
489
|
-
name, col_type=col_type, computed_with=value_expr, stored=stored, is_pk=primary_key
|
|
504
|
+
name, col_type=col_type, computed_with=value_expr, stored=stored, is_pk=primary_key,
|
|
505
|
+
media_validation=media_validation)
|
|
490
506
|
columns.append(column)
|
|
491
507
|
return columns
|
|
492
508
|
|
|
@@ -26,7 +26,7 @@ from pixeltable.utils.media_store import MediaStore
|
|
|
26
26
|
|
|
27
27
|
from ..func.globals import resolve_symbol
|
|
28
28
|
from .column import Column
|
|
29
|
-
from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, UpdateStatus, is_valid_identifier
|
|
29
|
+
from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, UpdateStatus, is_valid_identifier, MediaValidation
|
|
30
30
|
|
|
31
31
|
if TYPE_CHECKING:
|
|
32
32
|
from pixeltable import exec, store
|
|
@@ -53,6 +53,7 @@ class TableVersion:
|
|
|
53
53
|
name: str
|
|
54
54
|
version: int
|
|
55
55
|
comment: str
|
|
56
|
+
media_validation: MediaValidation
|
|
56
57
|
num_retained_versions: int
|
|
57
58
|
schema_version: int
|
|
58
59
|
view_md: Optional[schema.ViewMd]
|
|
@@ -109,6 +110,7 @@ class TableVersion:
|
|
|
109
110
|
self.view_md = tbl_md.view_md # save this as-is, it's needed for _create_md()
|
|
110
111
|
is_view = tbl_md.view_md is not None
|
|
111
112
|
self.is_snapshot = (is_view and tbl_md.view_md.is_snapshot) or bool(is_snapshot)
|
|
113
|
+
self.media_validation = MediaValidation[schema_version_md.media_validation.upper()]
|
|
112
114
|
# a mutable TableVersion doesn't have a static version
|
|
113
115
|
self.effective_version = self.version if self.is_snapshot else None
|
|
114
116
|
|
|
@@ -182,7 +184,7 @@ class TableVersion:
|
|
|
182
184
|
@classmethod
|
|
183
185
|
def create(
|
|
184
186
|
cls, session: orm.Session, dir_id: UUID, name: str, cols: list[Column], num_retained_versions: int,
|
|
185
|
-
comment: str, base_path: Optional[pxt.catalog.TableVersionPath] = None,
|
|
187
|
+
comment: str, media_validation: MediaValidation, base_path: Optional[pxt.catalog.TableVersionPath] = None,
|
|
186
188
|
view_md: Optional[schema.ViewMd] = None
|
|
187
189
|
) -> tuple[UUID, Optional[TableVersion]]:
|
|
188
190
|
# assign ids
|
|
@@ -214,11 +216,17 @@ class TableVersion:
|
|
|
214
216
|
tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md))
|
|
215
217
|
|
|
216
218
|
# create schema.TableSchemaVersion
|
|
217
|
-
schema_col_md
|
|
219
|
+
schema_col_md: dict[int, schema.SchemaColumn] = {}
|
|
220
|
+
for pos, col in enumerate(cols):
|
|
221
|
+
md = schema.SchemaColumn(
|
|
222
|
+
pos=pos, name=col.name,
|
|
223
|
+
media_validation=col._media_validation.name.lower() if col._media_validation is not None else None)
|
|
224
|
+
schema_col_md[col.id] = md
|
|
218
225
|
|
|
219
226
|
schema_version_md = schema.TableSchemaVersionMd(
|
|
220
227
|
schema_version=0, preceding_schema_version=None, columns=schema_col_md,
|
|
221
|
-
num_retained_versions=num_retained_versions, comment=comment
|
|
228
|
+
num_retained_versions=num_retained_versions, comment=comment,
|
|
229
|
+
media_validation=media_validation.name.lower())
|
|
222
230
|
schema_version_record = schema.TableSchemaVersion(
|
|
223
231
|
tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md))
|
|
224
232
|
|
|
@@ -285,10 +293,15 @@ class TableVersion:
|
|
|
285
293
|
self.cols_by_name = {}
|
|
286
294
|
self.cols_by_id = {}
|
|
287
295
|
for col_md in tbl_md.column_md.values():
|
|
288
|
-
|
|
296
|
+
schema_col_md = schema_version_md.columns[col_md.id] if col_md.id in schema_version_md.columns else None
|
|
297
|
+
col_name = schema_col_md.name if schema_col_md is not None else None
|
|
298
|
+
media_val = (
|
|
299
|
+
MediaValidation[schema_col_md.media_validation.upper()]
|
|
300
|
+
if schema_col_md is not None and schema_col_md.media_validation is not None else None
|
|
301
|
+
)
|
|
289
302
|
col = Column(
|
|
290
303
|
col_id=col_md.id, name=col_name, col_type=ts.ColumnType.from_dict(col_md.col_type),
|
|
291
|
-
is_pk=col_md.is_pk, stored=col_md.stored,
|
|
304
|
+
is_pk=col_md.is_pk, stored=col_md.stored, media_validation=media_val,
|
|
292
305
|
schema_version_add=col_md.schema_version_add, schema_version_drop=col_md.schema_version_drop,
|
|
293
306
|
value_expr_dict=col_md.value_expr)
|
|
294
307
|
col.tbl = self
|
|
@@ -349,7 +362,8 @@ class TableVersion:
|
|
|
349
362
|
self.store_tbl = StoreTable(self)
|
|
350
363
|
|
|
351
364
|
def _update_md(
|
|
352
|
-
|
|
365
|
+
self, timestamp: float, conn: sql.engine.Connection, update_tbl_version: bool = True,
|
|
366
|
+
preceding_schema_version: Optional[int] = None
|
|
353
367
|
) -> None:
|
|
354
368
|
"""Writes table metadata to the database.
|
|
355
369
|
|
|
@@ -710,20 +724,22 @@ class TableVersion:
|
|
|
710
724
|
|
|
711
725
|
if conn is None:
|
|
712
726
|
with Env.get().engine.begin() as conn:
|
|
713
|
-
return self._insert(
|
|
727
|
+
return self._insert(
|
|
728
|
+
plan, conn, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
|
|
714
729
|
else:
|
|
715
|
-
return self._insert(
|
|
730
|
+
return self._insert(
|
|
731
|
+
plan, conn, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
|
|
716
732
|
|
|
717
733
|
def _insert(
|
|
718
734
|
self, exec_plan: 'exec.ExecNode', conn: sql.engine.Connection, timestamp: float, *,
|
|
719
|
-
rowids: Optional[Iterator[int]] = None, print_stats: bool = False,
|
|
735
|
+
rowids: Optional[Iterator[int]] = None, print_stats: bool = False, abort_on_exc: bool = False
|
|
720
736
|
) -> UpdateStatus:
|
|
721
737
|
"""Insert rows produced by exec_plan and propagate to views"""
|
|
722
738
|
# we're creating a new version
|
|
723
739
|
self.version += 1
|
|
724
740
|
result = UpdateStatus()
|
|
725
741
|
num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(
|
|
726
|
-
exec_plan, conn, v_min=self.version, rowids=rowids)
|
|
742
|
+
exec_plan, conn, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc)
|
|
727
743
|
result.num_rows = num_rows
|
|
728
744
|
result.num_excs = num_excs
|
|
729
745
|
result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
|
|
@@ -1203,7 +1219,8 @@ class TableVersion:
|
|
|
1203
1219
|
name=self.name, current_version=self.version, current_schema_version=self.schema_version,
|
|
1204
1220
|
next_col_id=self.next_col_id, next_idx_id=self.next_idx_id, next_row_id=self.next_rowid,
|
|
1205
1221
|
column_md=self._create_column_md(self.cols), index_md=self.idx_md,
|
|
1206
|
-
external_stores=self._create_stores_md(self.external_stores.values()), view_md=self.view_md
|
|
1222
|
+
external_stores=self._create_stores_md(self.external_stores.values()), view_md=self.view_md,
|
|
1223
|
+
)
|
|
1207
1224
|
|
|
1208
1225
|
def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
|
|
1209
1226
|
return schema.TableVersionMd(created_at=timestamp, version=self.version, schema_version=self.schema_version)
|
|
@@ -1211,11 +1228,14 @@ class TableVersion:
|
|
|
1211
1228
|
def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
|
|
1212
1229
|
column_md: dict[int, schema.SchemaColumn] = {}
|
|
1213
1230
|
for pos, col in enumerate(self.cols_by_name.values()):
|
|
1214
|
-
column_md[col.id] = schema.SchemaColumn(
|
|
1231
|
+
column_md[col.id] = schema.SchemaColumn(
|
|
1232
|
+
pos=pos, name=col.name,
|
|
1233
|
+
media_validation=col._media_validation.name.lower() if col._media_validation is not None else None)
|
|
1215
1234
|
# preceding_schema_version to be set by the caller
|
|
1216
1235
|
return schema.TableSchemaVersionMd(
|
|
1217
1236
|
schema_version=self.schema_version, preceding_schema_version=preceding_schema_version,
|
|
1218
|
-
columns=column_md, num_retained_versions=self.num_retained_versions, comment=self.comment
|
|
1237
|
+
columns=column_md, num_retained_versions=self.num_retained_versions, comment=self.comment,
|
|
1238
|
+
media_validation=self.media_validation.name.lower())
|
|
1219
1239
|
|
|
1220
1240
|
def as_dict(self) -> dict:
|
|
1221
1241
|
return {'id': str(self.id), 'effective_version': self.effective_version}
|