pixeltable 0.2.21__tar.gz → 0.2.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (155) hide show
  1. {pixeltable-0.2.21 → pixeltable-0.2.22}/PKG-INFO +46 -10
  2. {pixeltable-0.2.21 → pixeltable-0.2.22}/README.md +44 -8
  3. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/__version__.py +2 -2
  4. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/__init__.py +1 -1
  5. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/column.py +37 -11
  6. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/globals.py +18 -0
  7. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/insertable_table.py +6 -4
  8. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/table.py +19 -3
  9. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/table_version.py +34 -14
  10. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/view.py +16 -17
  11. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/dataframe.py +7 -8
  12. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/env.py +5 -0
  13. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/__init__.py +0 -1
  14. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/aggregation_node.py +6 -3
  15. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/cache_prefetch_node.py +1 -1
  16. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/data_row_batch.py +2 -19
  17. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/exec_node.py +2 -1
  18. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/expr_eval_node.py +17 -10
  19. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/in_memory_data_node.py +6 -3
  20. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/sql_node.py +24 -25
  21. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/arithmetic_expr.py +3 -1
  22. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/array_slice.py +7 -7
  23. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/column_property_ref.py +37 -10
  24. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/column_ref.py +93 -14
  25. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/comparison.py +5 -5
  26. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/compound_predicate.py +8 -7
  27. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/data_row.py +27 -18
  28. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/expr.py +53 -52
  29. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/expr_set.py +5 -0
  30. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/function_call.py +32 -16
  31. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/globals.py +4 -1
  32. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/in_predicate.py +8 -7
  33. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/inline_expr.py +4 -4
  34. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/is_null.py +4 -4
  35. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/json_mapper.py +11 -12
  36. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/json_path.py +5 -10
  37. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/literal.py +5 -5
  38. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/method_ref.py +5 -4
  39. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/object_ref.py +2 -1
  40. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/row_builder.py +88 -36
  41. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/rowid_ref.py +12 -11
  42. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/similarity_expr.py +12 -7
  43. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/sql_element_cache.py +7 -5
  44. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/type_cast.py +8 -6
  45. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/variable.py +5 -4
  46. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/aggregate_function.py +1 -1
  47. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/function.py +11 -10
  48. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/__init__.py +2 -2
  49. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/globals.py +5 -7
  50. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/huggingface.py +19 -20
  51. pixeltable-0.2.22/pixeltable/functions/llama_cpp.py +106 -0
  52. pixeltable-0.2.22/pixeltable/functions/ollama.py +147 -0
  53. pixeltable-0.2.22/pixeltable/functions/replicate.py +72 -0
  54. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/string.py +9 -0
  55. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/globals.py +12 -20
  56. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/index/btree.py +16 -3
  57. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/index/embedding_index.py +4 -4
  58. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/io/__init__.py +1 -2
  59. pixeltable-0.2.22/pixeltable/io/fiftyone.py +178 -0
  60. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/io/globals.py +96 -2
  61. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/iterators/base.py +3 -2
  62. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/iterators/document.py +1 -1
  63. pixeltable-0.2.22/pixeltable/iterators/video.py +194 -0
  64. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/__init__.py +1 -1
  65. pixeltable-0.2.22/pixeltable/metadata/converters/convert_21.py +34 -0
  66. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/util.py +45 -4
  67. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/notes.py +1 -0
  68. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/schema.py +8 -0
  69. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/plan.py +16 -14
  70. pixeltable-0.2.22/pixeltable/py.typed +0 -0
  71. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/store.py +7 -2
  72. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/tool/create_test_video.py +1 -1
  73. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/tool/embed_udf.py +1 -1
  74. pixeltable-0.2.22/pixeltable/tool/mypy_plugin.py +55 -0
  75. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/type_system.py +17 -1
  76. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/documents.py +15 -1
  77. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/formatter.py +9 -10
  78. {pixeltable-0.2.21 → pixeltable-0.2.22}/pyproject.toml +12 -8
  79. pixeltable-0.2.21/pixeltable/exec/media_validation_node.py +0 -43
  80. pixeltable-0.2.21/pixeltable/iterators/video.py +0 -137
  81. pixeltable-0.2.21/pixeltable/tool/mypy_plugin.py +0 -32
  82. {pixeltable-0.2.21 → pixeltable-0.2.22}/LICENSE +0 -0
  83. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/__init__.py +0 -0
  84. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/catalog.py +0 -0
  85. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/dir.py +0 -0
  86. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/named_function.py +0 -0
  87. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/path.py +0 -0
  88. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/path_dict.py +0 -0
  89. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/schema_object.py +0 -0
  90. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/catalog/table_version_path.py +0 -0
  91. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exceptions.py +0 -0
  92. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/component_iteration_node.py +0 -0
  93. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/exec_context.py +0 -0
  94. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exec/row_update_node.py +0 -0
  95. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/__init__.py +0 -0
  96. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/exprs/expr_dict.py +0 -0
  97. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/ext/__init__.py +0 -0
  98. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/ext/functions/__init__.py +0 -0
  99. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/ext/functions/whisperx.py +0 -0
  100. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/ext/functions/yolox.py +0 -0
  101. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/__init__.py +0 -0
  102. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/callable_function.py +0 -0
  103. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/expr_template_function.py +0 -0
  104. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/function_registry.py +0 -0
  105. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/globals.py +0 -0
  106. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/query_template_function.py +0 -0
  107. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/signature.py +0 -0
  108. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/func/udf.py +0 -0
  109. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/anthropic.py +0 -0
  110. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/audio.py +0 -0
  111. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/fireworks.py +0 -0
  112. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/image.py +0 -0
  113. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/json.py +0 -0
  114. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/mistralai.py +0 -0
  115. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/openai.py +0 -0
  116. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/timestamp.py +0 -0
  117. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/together.py +0 -0
  118. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/util.py +0 -0
  119. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/video.py +0 -0
  120. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/vision.py +0 -0
  121. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/functions/whisper.py +0 -0
  122. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/index/__init__.py +0 -0
  123. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/index/base.py +0 -0
  124. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/io/external_store.py +0 -0
  125. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/io/hf_datasets.py +0 -0
  126. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/io/label_studio.py +0 -0
  127. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/io/pandas.py +0 -0
  128. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/io/parquet.py +0 -0
  129. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/iterators/__init__.py +0 -0
  130. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/iterators/string.py +0 -0
  131. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_10.py +0 -0
  132. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_12.py +0 -0
  133. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_13.py +0 -0
  134. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_14.py +0 -0
  135. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_15.py +0 -0
  136. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_16.py +0 -0
  137. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_17.py +0 -0
  138. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_18.py +0 -0
  139. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_19.py +0 -0
  140. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/metadata/converters/convert_20.py +0 -0
  141. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/tool/create_test_db_dump.py +0 -0
  142. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/tool/doc_plugins/griffe.py +0 -0
  143. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/tool/doc_plugins/mkdocstrings.py +0 -0
  144. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -0
  145. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/__init__.py +0 -0
  146. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/arrow.py +0 -0
  147. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/coco.py +0 -0
  148. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/code.py +0 -0
  149. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/filecache.py +0 -0
  150. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/http_server.py +0 -0
  151. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/media_store.py +0 -0
  152. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/pytorch.py +0 -0
  153. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/s3.py +0 -0
  154. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/sql.py +0 -0
  155. {pixeltable-0.2.21 → pixeltable-0.2.22}/pixeltable/utils/transactional_directory.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pixeltable
3
- Version: 0.2.21
3
+ Version: 0.2.22
4
4
  Summary: Pixeltable: The Multimodal AI Data Plane
5
5
  Author: Pixeltable, Inc.
6
6
  Author-email: contact@pixeltable.com
@@ -16,9 +16,9 @@ Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
16
16
  Requires-Dist: ftfy (>=6.2.0,<7.0.0)
17
17
  Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
18
18
  Requires-Dist: jmespath (>=1.0.1,<2.0.0)
19
+ Requires-Dist: lxml (>=5.0)
19
20
  Requires-Dist: more-itertools (>=10.2,<11.0)
20
21
  Requires-Dist: numpy (>=1.25,<2.0)
21
- Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
22
22
  Requires-Dist: pandas (>=2.0,<3.0)
23
23
  Requires-Dist: pgvector (>=0.2.1,<0.3.0)
24
24
  Requires-Dist: pillow (>=9.3.0)
@@ -39,6 +39,8 @@ Description-Content-Type: text/markdown
39
39
  <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="50%" />
40
40
  <br></br>
41
41
 
42
+ <h2>AI Data Insfrastructure — Declarative, Multimodal, and Incremental</h2>
43
+
42
44
  [![License](https://img.shields.io/badge/License-Apache%202.0-0530AD.svg)](https://opensource.org/licenses/Apache-2.0)
43
45
  ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pixeltable?logo=python&logoColor=white&)
44
46
  ![Platform Support](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-E5DDD4)
@@ -46,9 +48,10 @@ Description-Content-Type: text/markdown
46
48
  [![tests status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml)
47
49
  [![tests status](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml)
48
50
  [![PyPI Package](https://img.shields.io/pypi/v/pixeltable?color=4D148C)](https://pypi.org/project/pixeltable/)
49
- <a target="_blank" href="https://huggingface.co/Pixeltable"> <img src="https://img.shields.io/badge/🤗-HF Space-F25022" alt="Visit our Hugging Face space"/></a>
51
+ [![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2Fpixeltable%2Fpixeltable.svg?type=shield&issueType=security)](https://app.fossa.com/projects/git%2Bgithub.com%2Fpixeltable%2Fpixeltable?ref=badge_shield&issueType=security)
52
+ <a target="_blank" href="https://huggingface.co/Pixeltable"> <img src="https://img.shields.io/badge/🤗-HF Space-FF7D04" alt="Visit our Hugging Face space"/></a>
50
53
 
51
- [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://github.com/pixeltable/pixeltable?tab=readme-ov-file#-code-samples) | [Computer Vision](https://docs.pixeltable.com/docs/object-detection-in-videos) | [LLM](https://docs.pixeltable.com/docs/document-indexing-and-rag)
54
+ [Installation](https://docs.pixeltable.com/docs/installation) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://github.com/pixeltable/pixeltable?tab=readme-ov-file#-code-samples) | [Computer Vision](https://docs.pixeltable.com/docs/object-detection-in-videos) | [LLM](https://docs.pixeltable.com/docs/document-indexing-and-rag)
52
55
  </div>
53
56
 
54
57
  Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to **store**, **transform**, **index**, and **iterate** on data for their ML workflows.
@@ -74,8 +77,9 @@ Learn how to create tables, populate them with data, and enhance them with built
74
77
  |:----------|:-----------------|:-------------------------|:---------------------------------:|
75
78
  | 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Tables and Data Operations | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
76
79
  | User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
77
- | Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
78
- | Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Docs-Label Studio-blue" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
80
+ | Incremental Prompt Engineering | <a target="_blank" href="https://colab.research.google.com/github/mistralai/cookbook/blob/main/third_party/Pixeltable/incremental_prompt_engineering_and_model_comparison.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Github"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
81
+ | Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Documentation-013056" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
82
+ | Multimodal Application | <a target="_blank" href="https://huggingface.co/spaces/Pixeltable/Multimodal-Powerhouse"> <img src="https://img.shields.io/badge/Hugging Face-FF7D04" alt="Visit our documentation"/></a> | Document Indexing and RAG | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/rag-demo.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
79
83
 
80
84
  ## 🧱 Code Samples
81
85
 
@@ -83,7 +87,7 @@ Learn how to create tables, populate them with data, and enhance them with built
83
87
  ```python
84
88
  import pixeltable as pxt
85
89
 
86
- v = pxt.create_table('external_data.videos', {'video': pxt.VideoType()})
90
+ v = pxt.create_table('external_data.videos', {'video': pxt.Video})
87
91
 
88
92
  prefix = 's3://multimedia-commons/'
89
93
  paths = [
@@ -101,7 +105,7 @@ import pixeltable as pxt
101
105
  from pixeltable.functions import huggingface
102
106
 
103
107
  # Create a table to store data persistently
104
- t = pxt.create_table('image', {'image': pxt.ImageType()})
108
+ t = pxt.create_table('image', {'image': pxt.Image})
105
109
 
106
110
  # Insert some images
107
111
  prefix = 'https://upload.wikimedia.org/wikipedia/commons'
@@ -158,7 +162,7 @@ Learn how to leverage Pixeltable for [Model analytics](https://pixeltable.readme
158
162
 
159
163
  ### Working with inference services
160
164
  ```python
161
- chat_table = pxt.create_table('together_demo.chat', {'input': pxt.StringType()})
165
+ chat_table = pxt.create_table('together_demo.chat', {'input': pxt.String})
162
166
 
163
167
  # The chat-completions API expects JSON-formatted input:
164
168
  messages = [{'role': 'user', 'content': chat_table.input}]
@@ -194,7 +198,7 @@ from pixeltable.functions.huggingface import clip_image, clip_text
194
198
  from pixeltable.iterators import FrameIterator
195
199
  import PIL.Image
196
200
 
197
- video_table = pxt.create_table('videos', {'video': pxt.VideoType()})
201
+ video_table = pxt.create_table('videos', {'video': pxt.Video})
198
202
 
199
203
  video_table.insert([{'video': '/video.mp4'}])
200
204
 
@@ -225,6 +229,38 @@ frames_view.order_by(sim, asc=False).limit(5).select(frames_view.frame, sim=sim)
225
229
  ```
226
230
  Learn how to work with [Embedding and Vector Indexes](https://docs.pixeltable.com/docs/embedding-vector-indexes).
227
231
 
232
+ ## 🔄 AI Stack Comparison
233
+
234
+ ### 🎯 Computer Vision Workflows
235
+
236
+ | Requirement | Traditional | Pixeltable |
237
+ |-------------|---------------------|------------|
238
+ | Frame Extraction | ffmpeg + custom code | Automatic via FrameIterator |
239
+ | Object Detection | Multiple scripts + caching | Single computed column |
240
+ | Video Indexing | Custom pipelines + Vector DB | Native similarity search |
241
+ | Annotation Management | Separate tools + custom code | Label Studio integration |
242
+ | Model Evaluation | Custom metrics pipeline | Built-in mAP computation |
243
+
244
+ ### 🤖 LLM Workflows
245
+
246
+ | Requirement | Traditional | Pixeltable |
247
+ |-------------|---------------------|------------|
248
+ | Document Chunking | Tool + custom code | Native DocumentSplitter |
249
+ | Embedding Generation | Separate pipeline + caching | Computed columns |
250
+ | Vector Search | External vector DB | Built-in vector indexing |
251
+ | Prompt Management | Custom tracking solution | Version-controlled columns |
252
+ | Chain Management | Tool + custom code | Computed column DAGs |
253
+
254
+ ### 🎨 Multimodal Workflows
255
+
256
+ | Requirement | Traditional | Pixeltable |
257
+ |-------------|---------------------|------------|
258
+ | Data Types | Multiple storage systems | Unified table interface |
259
+ | Cross-Modal Search | Complex integration | Native similarity support |
260
+ | Pipeline Orchestration | Multiple tools (Airflow, etc.) | Single declarative interface |
261
+ | Asset Management | Custom tracking system | Automatic lineage |
262
+ | Quality Control | Multiple validation tools | Computed validation columns |
263
+
228
264
  ## ❓ FAQ
229
265
 
230
266
  ### What is Pixeltable?
@@ -2,6 +2,8 @@
2
2
  <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" width="50%" />
3
3
  <br></br>
4
4
 
5
+ <h2>AI Data Insfrastructure — Declarative, Multimodal, and Incremental</h2>
6
+
5
7
  [![License](https://img.shields.io/badge/License-Apache%202.0-0530AD.svg)](https://opensource.org/licenses/Apache-2.0)
6
8
  ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pixeltable?logo=python&logoColor=white&)
7
9
  ![Platform Support](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-E5DDD4)
@@ -9,9 +11,10 @@
9
11
  [![tests status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml)
10
12
  [![tests status](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml)
11
13
  [![PyPI Package](https://img.shields.io/pypi/v/pixeltable?color=4D148C)](https://pypi.org/project/pixeltable/)
12
- <a target="_blank" href="https://huggingface.co/Pixeltable"> <img src="https://img.shields.io/badge/🤗-HF Space-F25022" alt="Visit our Hugging Face space"/></a>
14
+ [![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2Fpixeltable%2Fpixeltable.svg?type=shield&issueType=security)](https://app.fossa.com/projects/git%2Bgithub.com%2Fpixeltable%2Fpixeltable?ref=badge_shield&issueType=security)
15
+ <a target="_blank" href="https://huggingface.co/Pixeltable"> <img src="https://img.shields.io/badge/🤗-HF Space-FF7D04" alt="Visit our Hugging Face space"/></a>
13
16
 
14
- [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://github.com/pixeltable/pixeltable?tab=readme-ov-file#-code-samples) | [Computer Vision](https://docs.pixeltable.com/docs/object-detection-in-videos) | [LLM](https://docs.pixeltable.com/docs/document-indexing-and-rag)
17
+ [Installation](https://docs.pixeltable.com/docs/installation) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://github.com/pixeltable/pixeltable?tab=readme-ov-file#-code-samples) | [Computer Vision](https://docs.pixeltable.com/docs/object-detection-in-videos) | [LLM](https://docs.pixeltable.com/docs/document-indexing-and-rag)
15
18
  </div>
16
19
 
17
20
  Pixeltable is a Python library providing a declarative interface for multimodal data (text, images, audio, video). It features built-in versioning, lineage tracking, and incremental updates, enabling users to **store**, **transform**, **index**, and **iterate** on data for their ML workflows.
@@ -37,8 +40,9 @@ Learn how to create tables, populate them with data, and enhance them with built
37
40
  |:----------|:-----------------|:-------------------------|:---------------------------------:|
38
41
  | 10-Minute Tour of Pixeltable | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Tables and Data Operations | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
39
42
  | User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
40
- | Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
41
- | Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Docs-Label Studio-blue" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
43
+ | Incremental Prompt Engineering | <a target="_blank" href="https://colab.research.google.com/github/mistralai/cookbook/blob/main/third_party/Pixeltable/incremental_prompt_engineering_and_model_comparison.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Github"/> | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>
44
+ | Integrating with Label Studio | <a target="_blank" href="https://pixeltable.readme.io/docs/label-studio"> <img src="https://img.shields.io/badge/Documentation-013056" alt="Visit our documentation"/></a> | Audio/Video Transcript Indexing | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
45
+ | Multimodal Application | <a target="_blank" href="https://huggingface.co/spaces/Pixeltable/Multimodal-Powerhouse"> <img src="https://img.shields.io/badge/Hugging Face-FF7D04" alt="Visit our documentation"/></a> | Document Indexing and RAG | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/release/tutorials/rag-demo.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
42
46
 
43
47
  ## 🧱 Code Samples
44
48
 
@@ -46,7 +50,7 @@ Learn how to create tables, populate them with data, and enhance them with built
46
50
  ```python
47
51
  import pixeltable as pxt
48
52
 
49
- v = pxt.create_table('external_data.videos', {'video': pxt.VideoType()})
53
+ v = pxt.create_table('external_data.videos', {'video': pxt.Video})
50
54
 
51
55
  prefix = 's3://multimedia-commons/'
52
56
  paths = [
@@ -64,7 +68,7 @@ import pixeltable as pxt
64
68
  from pixeltable.functions import huggingface
65
69
 
66
70
  # Create a table to store data persistently
67
- t = pxt.create_table('image', {'image': pxt.ImageType()})
71
+ t = pxt.create_table('image', {'image': pxt.Image})
68
72
 
69
73
  # Insert some images
70
74
  prefix = 'https://upload.wikimedia.org/wikipedia/commons'
@@ -121,7 +125,7 @@ Learn how to leverage Pixeltable for [Model analytics](https://pixeltable.readme
121
125
 
122
126
  ### Working with inference services
123
127
  ```python
124
- chat_table = pxt.create_table('together_demo.chat', {'input': pxt.StringType()})
128
+ chat_table = pxt.create_table('together_demo.chat', {'input': pxt.String})
125
129
 
126
130
  # The chat-completions API expects JSON-formatted input:
127
131
  messages = [{'role': 'user', 'content': chat_table.input}]
@@ -157,7 +161,7 @@ from pixeltable.functions.huggingface import clip_image, clip_text
157
161
  from pixeltable.iterators import FrameIterator
158
162
  import PIL.Image
159
163
 
160
- video_table = pxt.create_table('videos', {'video': pxt.VideoType()})
164
+ video_table = pxt.create_table('videos', {'video': pxt.Video})
161
165
 
162
166
  video_table.insert([{'video': '/video.mp4'}])
163
167
 
@@ -188,6 +192,38 @@ frames_view.order_by(sim, asc=False).limit(5).select(frames_view.frame, sim=sim)
188
192
  ```
189
193
  Learn how to work with [Embedding and Vector Indexes](https://docs.pixeltable.com/docs/embedding-vector-indexes).
190
194
 
195
+ ## 🔄 AI Stack Comparison
196
+
197
+ ### 🎯 Computer Vision Workflows
198
+
199
+ | Requirement | Traditional | Pixeltable |
200
+ |-------------|---------------------|------------|
201
+ | Frame Extraction | ffmpeg + custom code | Automatic via FrameIterator |
202
+ | Object Detection | Multiple scripts + caching | Single computed column |
203
+ | Video Indexing | Custom pipelines + Vector DB | Native similarity search |
204
+ | Annotation Management | Separate tools + custom code | Label Studio integration |
205
+ | Model Evaluation | Custom metrics pipeline | Built-in mAP computation |
206
+
207
+ ### 🤖 LLM Workflows
208
+
209
+ | Requirement | Traditional | Pixeltable |
210
+ |-------------|---------------------|------------|
211
+ | Document Chunking | Tool + custom code | Native DocumentSplitter |
212
+ | Embedding Generation | Separate pipeline + caching | Computed columns |
213
+ | Vector Search | External vector DB | Built-in vector indexing |
214
+ | Prompt Management | Custom tracking solution | Version-controlled columns |
215
+ | Chain Management | Tool + custom code | Computed column DAGs |
216
+
217
+ ### 🎨 Multimodal Workflows
218
+
219
+ | Requirement | Traditional | Pixeltable |
220
+ |-------------|---------------------|------------|
221
+ | Data Types | Multiple storage systems | Unified table interface |
222
+ | Cross-Modal Search | Complex integration | Native similarity support |
223
+ | Pipeline Orchestration | Multiple tools (Airflow, etc.) | Single declarative interface |
224
+ | Asset Management | Custom tracking system | Automatic lineage |
225
+ | Quality Control | Multiple validation tools | Computed validation columns |
226
+
191
227
  ## ❓ FAQ
192
228
 
193
229
  ### What is Pixeltable?
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.21"
3
- __version_tuple__ = (0, 2, 21)
2
+ __version__ = "0.2.22"
3
+ __version_tuple__ = (0, 2, 22)
@@ -1,7 +1,7 @@
1
1
  from .catalog import Catalog
2
2
  from .column import Column
3
3
  from .dir import Dir
4
- from .globals import UpdateStatus, is_valid_identifier, is_valid_path
4
+ from .globals import UpdateStatus, is_valid_identifier, is_valid_path, MediaValidation
5
5
  from .insertable_table import InsertableTable
6
6
  from .named_function import NamedFunction
7
7
  from .path import Path
@@ -8,24 +8,43 @@ import sqlalchemy as sql
8
8
  import pixeltable.exceptions as excs
9
9
  import pixeltable.type_system as ts
10
10
  from pixeltable import exprs
11
-
12
- from .globals import is_valid_identifier
11
+ from .globals import is_valid_identifier, MediaValidation
13
12
 
14
13
  if TYPE_CHECKING:
15
14
  from .table_version import TableVersion
16
15
 
17
16
  _logger = logging.getLogger('pixeltable')
18
17
 
18
+
19
19
  class Column:
20
20
  """Representation of a column in the schema of a Table/DataFrame.
21
21
 
22
22
  A Column contains all the metadata necessary for executing queries and updates against a particular version of a
23
23
  table/view.
24
24
  """
25
+ name: str
26
+ id: Optional[int]
27
+ col_type: ts.ColumnType
28
+ stored: bool
29
+ is_pk: bool
30
+ _media_validation: Optional[MediaValidation] # if not set, TableVersion.media_validation applies
31
+ schema_version_add: Optional[int]
32
+ schema_version_drop: Optional[int]
33
+ _records_errors: Optional[bool]
34
+ sa_col: Optional[sql.schema.Column]
35
+ sa_col_type: Optional[sql.sqltypes.TypeEngine]
36
+ sa_errormsg_col: Optional[sql.schema.Column]
37
+ sa_errortype_col: Optional[sql.schema.Column]
38
+ compute_func: Optional[Callable]
39
+ _value_expr: Optional[exprs.Expr]
40
+ value_expr_dict: Optional[dict[str, Any]]
41
+ dependent_cols: set[Column]
42
+ tbl: Optional[TableVersion]
43
+
25
44
  def __init__(
26
45
  self, name: Optional[str], col_type: Optional[ts.ColumnType] = None,
27
46
  computed_with: Optional[Union[exprs.Expr, Callable]] = None,
28
- is_pk: bool = False, stored: bool = True,
47
+ is_pk: bool = False, stored: bool = True, media_validation: Optional[MediaValidation] = None,
29
48
  col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
30
49
  schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
31
50
  records_errors: Optional[bool] = None, value_expr_dict: Optional[dict[str, Any]] = None,
@@ -61,8 +80,8 @@ class Column:
61
80
  if col_type is None and computed_with is None:
62
81
  raise excs.Error(f'Column `{name}`: col_type is required if computed_with is not specified')
63
82
 
64
- self._value_expr: Optional[exprs.Expr] = None
65
- self.compute_func: Optional[Callable] = None
83
+ self._value_expr = None
84
+ self.compute_func = None
66
85
  self.value_expr_dict = value_expr_dict
67
86
  if computed_with is not None:
68
87
  value_expr = exprs.Expr.from_object(computed_with)
@@ -86,24 +105,24 @@ class Column:
86
105
  assert self.col_type is not None
87
106
 
88
107
  self.stored = stored
89
- self.dependent_cols: set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
108
+ self.dependent_cols = set() # cols with value_exprs that reference us; set by TableVersion
90
109
  self.id = col_id
91
110
  self.is_pk = is_pk
111
+ self._media_validation = media_validation
92
112
  self.schema_version_add = schema_version_add
93
113
  self.schema_version_drop = schema_version_drop
94
114
 
95
115
  self._records_errors = records_errors
96
116
 
97
117
  # column in the stored table for the values of this Column
98
- self.sa_col: Optional[sql.schema.Column] = None
118
+ self.sa_col = None
99
119
  self.sa_col_type = sa_col_type
100
120
 
101
121
  # computed cols also have storage columns for the exception string and type
102
- self.sa_errormsg_col: Optional[sql.schema.Column] = None
103
- self.sa_errortype_col: Optional[sql.schema.Column] = None
122
+ self.sa_errormsg_col = None
123
+ self.sa_errortype_col = None
104
124
 
105
- from .table_version import TableVersion
106
- self.tbl: Optional[TableVersion] = None # set by owning TableVersion
125
+ self.tbl = None # set by owning TableVersion
107
126
 
108
127
  @property
109
128
  def value_expr(self) -> Optional[exprs.Expr]:
@@ -160,6 +179,13 @@ class Column:
160
179
  assert self.tbl is not None
161
180
  return f'{self.tbl.name}.{self.name}'
162
181
 
182
+ @property
183
+ def media_validation(self) -> MediaValidation:
184
+ if self._media_validation is not None:
185
+ return self._media_validation
186
+ assert self.tbl is not None
187
+ return self.tbl.media_validation
188
+
163
189
  def source(self) -> None:
164
190
  """
165
191
  If this is a computed col and the top-level expr is a function call, print the source, if possible.
@@ -1,8 +1,12 @@
1
+ from __future__ import annotations
1
2
  import dataclasses
3
+ import enum
2
4
  import itertools
3
5
  import logging
4
6
  from typing import Optional
5
7
 
8
+ import pixeltable.exceptions as excs
9
+
6
10
  _logger = logging.getLogger('pixeltable')
7
11
 
8
12
  # name of the position column in a component view
@@ -34,6 +38,20 @@ class UpdateStatus:
34
38
  self.cols_with_excs = list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs))
35
39
  return self
36
40
 
41
+
42
+ class MediaValidation(enum.Enum):
43
+ ON_READ = 0
44
+ ON_WRITE = 1
45
+
46
+ @classmethod
47
+ def validated(cls, name: str, error_prefix: str) -> MediaValidation:
48
+ try:
49
+ return cls[name.upper()]
50
+ except KeyError:
51
+ val_strs = ', '.join(f'{s.lower()!r}' for s in cls.__members__.keys())
52
+ raise excs.Error(f'{error_prefix} must be one of: [{val_strs}]')
53
+
54
+
37
55
  def is_valid_identifier(name: str) -> bool:
38
56
  return name.isidentifier() and not name.startswith('_')
39
57
 
@@ -13,7 +13,7 @@ from pixeltable.env import Env
13
13
  from pixeltable.utils.filecache import FileCache
14
14
 
15
15
  from .catalog import Catalog
16
- from .globals import UpdateStatus
16
+ from .globals import UpdateStatus, MediaValidation
17
17
  from .table import Table
18
18
  from .table_version import TableVersion
19
19
  from .table_version_path import TableVersionPath
@@ -35,8 +35,8 @@ class InsertableTable(Table):
35
35
  # MODULE-LOCAL, NOT PUBLIC
36
36
  @classmethod
37
37
  def _create(
38
- cls, dir_id: UUID, name: str, schema: dict[str, ts.ColumnType], df: Optional[pxt.DataFrame], primary_key: List[str],
39
- num_retained_versions: int, comment: str
38
+ cls, dir_id: UUID, name: str, schema: dict[str, ts.ColumnType], df: Optional[pxt.DataFrame],
39
+ primary_key: List[str], num_retained_versions: int, comment: str, media_validation: MediaValidation
40
40
  ) -> InsertableTable:
41
41
  columns = cls._create_columns(schema)
42
42
  cls._verify_schema(columns)
@@ -50,7 +50,9 @@ class InsertableTable(Table):
50
50
  col.is_pk = True
51
51
 
52
52
  with orm.Session(Env.get().engine, future=True) as session:
53
- _, tbl_version = TableVersion.create(session, dir_id, name, columns, num_retained_versions, comment)
53
+ _, tbl_version = TableVersion.create(
54
+ session, dir_id, name, columns, num_retained_versions=num_retained_versions, comment=comment,
55
+ media_validation=media_validation)
54
56
  tbl = cls(dir_id, tbl_version)
55
57
  # TODO We need to commit before doing the insertion, in order to avoid a primary key (version) collision
56
58
  # when the table metadata gets updated. Once we have a notion of user-defined transactions in
@@ -24,7 +24,7 @@ import pixeltable.type_system as ts
24
24
  from pixeltable.utils.filecache import FileCache
25
25
 
26
26
  from .column import Column
27
- from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier
27
+ from .globals import _ROWID_COLUMN_NAME, UpdateStatus, is_system_column_name, is_valid_identifier, MediaValidation
28
28
  from .schema_object import SchemaObject
29
29
  from .table_version import TableVersion
30
30
  from .table_version_path import TableVersionPath
@@ -91,6 +91,7 @@ class Table(SchemaObject):
91
91
  'num_retained_versions': 10,
92
92
  'is_view': False,
93
93
  'is_snapshot': False,
94
+ 'media_validation': 'on_write',
94
95
  }
95
96
  ```
96
97
  """
@@ -101,6 +102,7 @@ class Table(SchemaObject):
101
102
  md['schema_version'] = self._tbl_version.schema_version
102
103
  md['comment'] = self._comment
103
104
  md['num_retained_versions'] = self._num_retained_versions
105
+ md['media_validation'] = self._media_validation.name.lower()
104
106
  return md
105
107
 
106
108
  @property
@@ -244,6 +246,10 @@ class Table(SchemaObject):
244
246
  def _num_retained_versions(self):
245
247
  return self._tbl_version.num_retained_versions
246
248
 
249
+ @property
250
+ def _media_validation(self) -> MediaValidation:
251
+ return self._tbl_version.media_validation
252
+
247
253
  def _description(self) -> pd.DataFrame:
248
254
  cols = self._tbl_version_path.columns()
249
255
  df = pd.DataFrame({
@@ -422,7 +428,7 @@ class Table(SchemaObject):
422
428
  (on account of containing Python Callables or Exprs).
423
429
  """
424
430
  assert isinstance(spec, dict)
425
- valid_keys = {'type', 'value', 'stored'}
431
+ valid_keys = {'type', 'value', 'stored', 'media_validation'}
426
432
  has_type = False
427
433
  for k in spec.keys():
428
434
  if k not in valid_keys:
@@ -449,6 +455,9 @@ class Table(SchemaObject):
449
455
  if 'type' in spec:
450
456
  raise excs.Error(f'Column {name}: "type" is redundant if value is a Pixeltable expression')
451
457
 
458
+ if 'media_validation' in spec:
459
+ _ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name}: media_validation')
460
+
452
461
  if 'stored' in spec and not isinstance(spec['stored'], bool):
453
462
  raise excs.Error(f'Column {name}: "stored" must be a bool, got {spec["stored"]}')
454
463
  if not has_type:
@@ -462,6 +471,7 @@ class Table(SchemaObject):
462
471
  col_type: Optional[ts.ColumnType] = None
463
472
  value_expr: Optional[exprs.Expr] = None
464
473
  primary_key: Optional[bool] = None
474
+ media_validation: Optional[catalog.MediaValidation] = None
465
475
  stored = True
466
476
 
467
477
  if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
@@ -484,9 +494,15 @@ class Table(SchemaObject):
484
494
  value_expr = value_expr.copy()
485
495
  stored = spec.get('stored', True)
486
496
  primary_key = spec.get('primary_key')
497
+ media_validation_str = spec.get('media_validation')
498
+ media_validation = (
499
+ catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None
500
+ else None
501
+ )
487
502
 
488
503
  column = Column(
489
- name, col_type=col_type, computed_with=value_expr, stored=stored, is_pk=primary_key)
504
+ name, col_type=col_type, computed_with=value_expr, stored=stored, is_pk=primary_key,
505
+ media_validation=media_validation)
490
506
  columns.append(column)
491
507
  return columns
492
508
 
@@ -26,7 +26,7 @@ from pixeltable.utils.media_store import MediaStore
26
26
 
27
27
  from ..func.globals import resolve_symbol
28
28
  from .column import Column
29
- from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, UpdateStatus, is_valid_identifier
29
+ from .globals import _POS_COLUMN_NAME, _ROWID_COLUMN_NAME, UpdateStatus, is_valid_identifier, MediaValidation
30
30
 
31
31
  if TYPE_CHECKING:
32
32
  from pixeltable import exec, store
@@ -53,6 +53,7 @@ class TableVersion:
53
53
  name: str
54
54
  version: int
55
55
  comment: str
56
+ media_validation: MediaValidation
56
57
  num_retained_versions: int
57
58
  schema_version: int
58
59
  view_md: Optional[schema.ViewMd]
@@ -109,6 +110,7 @@ class TableVersion:
109
110
  self.view_md = tbl_md.view_md # save this as-is, it's needed for _create_md()
110
111
  is_view = tbl_md.view_md is not None
111
112
  self.is_snapshot = (is_view and tbl_md.view_md.is_snapshot) or bool(is_snapshot)
113
+ self.media_validation = MediaValidation[schema_version_md.media_validation.upper()]
112
114
  # a mutable TableVersion doesn't have a static version
113
115
  self.effective_version = self.version if self.is_snapshot else None
114
116
 
@@ -182,7 +184,7 @@ class TableVersion:
182
184
  @classmethod
183
185
  def create(
184
186
  cls, session: orm.Session, dir_id: UUID, name: str, cols: list[Column], num_retained_versions: int,
185
- comment: str, base_path: Optional[pxt.catalog.TableVersionPath] = None,
187
+ comment: str, media_validation: MediaValidation, base_path: Optional[pxt.catalog.TableVersionPath] = None,
186
188
  view_md: Optional[schema.ViewMd] = None
187
189
  ) -> tuple[UUID, Optional[TableVersion]]:
188
190
  # assign ids
@@ -214,11 +216,17 @@ class TableVersion:
214
216
  tbl_id=tbl_record.id, version=0, md=dataclasses.asdict(table_version_md))
215
217
 
216
218
  # create schema.TableSchemaVersion
217
- schema_col_md = {col.id: schema.SchemaColumn(pos=pos, name=col.name) for pos, col in enumerate(cols)}
219
+ schema_col_md: dict[int, schema.SchemaColumn] = {}
220
+ for pos, col in enumerate(cols):
221
+ md = schema.SchemaColumn(
222
+ pos=pos, name=col.name,
223
+ media_validation=col._media_validation.name.lower() if col._media_validation is not None else None)
224
+ schema_col_md[col.id] = md
218
225
 
219
226
  schema_version_md = schema.TableSchemaVersionMd(
220
227
  schema_version=0, preceding_schema_version=None, columns=schema_col_md,
221
- num_retained_versions=num_retained_versions, comment=comment)
228
+ num_retained_versions=num_retained_versions, comment=comment,
229
+ media_validation=media_validation.name.lower())
222
230
  schema_version_record = schema.TableSchemaVersion(
223
231
  tbl_id=tbl_record.id, schema_version=0, md=dataclasses.asdict(schema_version_md))
224
232
 
@@ -285,10 +293,15 @@ class TableVersion:
285
293
  self.cols_by_name = {}
286
294
  self.cols_by_id = {}
287
295
  for col_md in tbl_md.column_md.values():
288
- col_name = schema_version_md.columns[col_md.id].name if col_md.id in schema_version_md.columns else None
296
+ schema_col_md = schema_version_md.columns[col_md.id] if col_md.id in schema_version_md.columns else None
297
+ col_name = schema_col_md.name if schema_col_md is not None else None
298
+ media_val = (
299
+ MediaValidation[schema_col_md.media_validation.upper()]
300
+ if schema_col_md is not None and schema_col_md.media_validation is not None else None
301
+ )
289
302
  col = Column(
290
303
  col_id=col_md.id, name=col_name, col_type=ts.ColumnType.from_dict(col_md.col_type),
291
- is_pk=col_md.is_pk, stored=col_md.stored,
304
+ is_pk=col_md.is_pk, stored=col_md.stored, media_validation=media_val,
292
305
  schema_version_add=col_md.schema_version_add, schema_version_drop=col_md.schema_version_drop,
293
306
  value_expr_dict=col_md.value_expr)
294
307
  col.tbl = self
@@ -349,7 +362,8 @@ class TableVersion:
349
362
  self.store_tbl = StoreTable(self)
350
363
 
351
364
  def _update_md(
352
- self, timestamp: float, conn: sql.engine.Connection, update_tbl_version: bool = True, preceding_schema_version: Optional[int] = None
365
+ self, timestamp: float, conn: sql.engine.Connection, update_tbl_version: bool = True,
366
+ preceding_schema_version: Optional[int] = None
353
367
  ) -> None:
354
368
  """Writes table metadata to the database.
355
369
 
@@ -710,20 +724,22 @@ class TableVersion:
710
724
 
711
725
  if conn is None:
712
726
  with Env.get().engine.begin() as conn:
713
- return self._insert(plan, conn, time.time(), print_stats=print_stats, rowids=rowids())
727
+ return self._insert(
728
+ plan, conn, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
714
729
  else:
715
- return self._insert(plan, conn, time.time(), print_stats=print_stats, rowids=rowids())
730
+ return self._insert(
731
+ plan, conn, time.time(), print_stats=print_stats, rowids=rowids(), abort_on_exc=fail_on_exception)
716
732
 
717
733
  def _insert(
718
734
  self, exec_plan: 'exec.ExecNode', conn: sql.engine.Connection, timestamp: float, *,
719
- rowids: Optional[Iterator[int]] = None, print_stats: bool = False,
735
+ rowids: Optional[Iterator[int]] = None, print_stats: bool = False, abort_on_exc: bool = False
720
736
  ) -> UpdateStatus:
721
737
  """Insert rows produced by exec_plan and propagate to views"""
722
738
  # we're creating a new version
723
739
  self.version += 1
724
740
  result = UpdateStatus()
725
741
  num_rows, num_excs, cols_with_excs = self.store_tbl.insert_rows(
726
- exec_plan, conn, v_min=self.version, rowids=rowids)
742
+ exec_plan, conn, v_min=self.version, rowids=rowids, abort_on_exc=abort_on_exc)
727
743
  result.num_rows = num_rows
728
744
  result.num_excs = num_excs
729
745
  result.num_computed_values += exec_plan.ctx.num_computed_exprs * num_rows
@@ -1203,7 +1219,8 @@ class TableVersion:
1203
1219
  name=self.name, current_version=self.version, current_schema_version=self.schema_version,
1204
1220
  next_col_id=self.next_col_id, next_idx_id=self.next_idx_id, next_row_id=self.next_rowid,
1205
1221
  column_md=self._create_column_md(self.cols), index_md=self.idx_md,
1206
- external_stores=self._create_stores_md(self.external_stores.values()), view_md=self.view_md)
1222
+ external_stores=self._create_stores_md(self.external_stores.values()), view_md=self.view_md,
1223
+ )
1207
1224
 
1208
1225
  def _create_version_md(self, timestamp: float) -> schema.TableVersionMd:
1209
1226
  return schema.TableVersionMd(created_at=timestamp, version=self.version, schema_version=self.schema_version)
@@ -1211,11 +1228,14 @@ class TableVersion:
1211
1228
  def _create_schema_version_md(self, preceding_schema_version: int) -> schema.TableSchemaVersionMd:
1212
1229
  column_md: dict[int, schema.SchemaColumn] = {}
1213
1230
  for pos, col in enumerate(self.cols_by_name.values()):
1214
- column_md[col.id] = schema.SchemaColumn(pos=pos, name=col.name)
1231
+ column_md[col.id] = schema.SchemaColumn(
1232
+ pos=pos, name=col.name,
1233
+ media_validation=col._media_validation.name.lower() if col._media_validation is not None else None)
1215
1234
  # preceding_schema_version to be set by the caller
1216
1235
  return schema.TableSchemaVersionMd(
1217
1236
  schema_version=self.schema_version, preceding_schema_version=preceding_schema_version,
1218
- columns=column_md, num_retained_versions=self.num_retained_versions, comment=self.comment)
1237
+ columns=column_md, num_retained_versions=self.num_retained_versions, comment=self.comment,
1238
+ media_validation=self.media_validation.name.lower())
1219
1239
 
1220
1240
  def as_dict(self) -> dict:
1221
1241
  return {'id': str(self.id), 'effective_version': self.effective_version}