pixeltable 0.2.6__tar.gz → 0.2.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (129) hide show
  1. pixeltable-0.2.8/PKG-INFO +137 -0
  2. pixeltable-0.2.8/README.md +99 -0
  3. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/__init__.py +3 -1
  4. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/__version__.py +2 -2
  5. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/column.py +8 -2
  6. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/insertable_table.py +32 -17
  7. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/table.py +167 -12
  8. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/table_version.py +185 -106
  9. pixeltable-0.2.8/pixeltable/datatransfer/__init__.py +1 -0
  10. pixeltable-0.2.8/pixeltable/datatransfer/label_studio.py +452 -0
  11. pixeltable-0.2.8/pixeltable/datatransfer/remote.py +85 -0
  12. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/env.py +148 -69
  13. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/column_ref.py +2 -2
  14. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/comparison.py +39 -1
  15. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/data_row.py +7 -0
  16. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/expr.py +11 -12
  17. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/function_call.py +0 -3
  18. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/globals.py +14 -2
  19. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/similarity_expr.py +5 -3
  20. pixeltable-0.2.8/pixeltable/ext/functions/whisperx.py +30 -0
  21. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/ext/functions/yolox.py +16 -0
  22. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/aggregate_function.py +2 -2
  23. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/expr_template_function.py +3 -1
  24. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/udf.py +2 -2
  25. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/fireworks.py +9 -4
  26. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/huggingface.py +25 -1
  27. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/openai.py +15 -10
  28. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/together.py +11 -6
  29. pixeltable-0.2.8/pixeltable/functions/util.py +9 -0
  30. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/video.py +46 -8
  31. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/globals.py +20 -2
  32. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/index/__init__.py +1 -0
  33. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/index/base.py +6 -1
  34. pixeltable-0.2.8/pixeltable/index/btree.py +54 -0
  35. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/index/embedding_index.py +4 -1
  36. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/io/__init__.py +1 -0
  37. pixeltable-0.2.8/pixeltable/io/globals.py +58 -0
  38. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/iterators/base.py +4 -4
  39. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/iterators/document.py +26 -15
  40. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/iterators/video.py +9 -1
  41. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/metadata/__init__.py +2 -2
  42. pixeltable-0.2.8/pixeltable/metadata/converters/convert_14.py +13 -0
  43. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/metadata/schema.py +9 -6
  44. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/plan.py +9 -5
  45. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/store.py +14 -21
  46. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/tool/create_test_db_dump.py +14 -0
  47. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/type_system.py +14 -4
  48. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/coco.py +94 -0
  49. {pixeltable-0.2.6 → pixeltable-0.2.8}/pyproject.toml +27 -14
  50. pixeltable-0.2.6/PKG-INFO +0 -131
  51. pixeltable-0.2.6/README.md +0 -93
  52. pixeltable-0.2.6/pixeltable/func/nos_function.py +0 -202
  53. pixeltable-0.2.6/pixeltable/functions/util.py +0 -52
  54. pixeltable-0.2.6/pixeltable/utils/clip.py +0 -18
  55. {pixeltable-0.2.6 → pixeltable-0.2.8}/LICENSE +0 -0
  56. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/__init__.py +0 -0
  57. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/catalog.py +0 -0
  58. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/dir.py +0 -0
  59. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/globals.py +0 -0
  60. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/named_function.py +0 -0
  61. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/path.py +0 -0
  62. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/path_dict.py +0 -0
  63. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/schema_object.py +0 -0
  64. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/table_version_path.py +0 -0
  65. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/catalog/view.py +0 -0
  66. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/dataframe.py +0 -0
  67. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exceptions.py +0 -0
  68. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/__init__.py +0 -0
  69. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/aggregation_node.py +0 -0
  70. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/cache_prefetch_node.py +0 -0
  71. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/component_iteration_node.py +0 -0
  72. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/data_row_batch.py +0 -0
  73. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/exec_context.py +0 -0
  74. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/exec_node.py +0 -0
  75. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/expr_eval_node.py +0 -0
  76. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/in_memory_data_node.py +0 -0
  77. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/media_validation_node.py +0 -0
  78. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exec/sql_scan_node.py +0 -0
  79. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/__init__.py +0 -0
  80. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/arithmetic_expr.py +0 -0
  81. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/array_slice.py +0 -0
  82. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/column_property_ref.py +0 -0
  83. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/compound_predicate.py +0 -0
  84. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/expr_set.py +0 -0
  85. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/image_member_access.py +0 -0
  86. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/in_predicate.py +0 -0
  87. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/inline_array.py +0 -0
  88. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/inline_dict.py +0 -0
  89. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/is_null.py +0 -0
  90. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/json_mapper.py +0 -0
  91. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/json_path.py +0 -0
  92. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/literal.py +0 -0
  93. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/object_ref.py +0 -0
  94. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/predicate.py +0 -0
  95. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/row_builder.py +0 -0
  96. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/rowid_ref.py +0 -0
  97. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/type_cast.py +0 -0
  98. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/exprs/variable.py +0 -0
  99. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/ext/__init__.py +0 -0
  100. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/__init__.py +0 -0
  101. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/callable_function.py +0 -0
  102. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/function.py +0 -0
  103. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/function_registry.py +0 -0
  104. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/globals.py +0 -0
  105. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/func/signature.py +0 -0
  106. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/__init__.py +0 -0
  107. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/eval.py +0 -0
  108. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/image.py +0 -0
  109. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/pil/image.py +0 -0
  110. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/functions/string.py +0 -0
  111. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/io/hf_datasets.py +0 -0
  112. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/io/pandas.py +0 -0
  113. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/io/parquet.py +0 -0
  114. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/iterators/__init__.py +0 -0
  115. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/metadata/converters/convert_10.py +0 -0
  116. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/metadata/converters/convert_12.py +0 -0
  117. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/metadata/converters/convert_13.py +0 -0
  118. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/tool/create_test_video.py +0 -0
  119. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/__init__.py +0 -0
  120. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/arrow.py +0 -0
  121. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/documents.py +0 -0
  122. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/filecache.py +0 -0
  123. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/help.py +0 -0
  124. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/http_server.py +0 -0
  125. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/media_store.py +0 -0
  126. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/pytorch.py +0 -0
  127. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/s3.py +0 -0
  128. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/sql.py +0 -0
  129. {pixeltable-0.2.6 → pixeltable-0.2.8}/pixeltable/utils/transactional_directory.py +0 -0
@@ -0,0 +1,137 @@
1
+ Metadata-Version: 2.1
2
+ Name: pixeltable
3
+ Version: 0.2.8
4
+ Summary: Pixeltable: The Multimodal AI Data Plane
5
+ Author: Marcel Kornacker
6
+ Author-email: marcelk@gmail.com
7
+ Requires-Python: >=3.9,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Dist: av (>=10.0.0)
14
+ Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
15
+ Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
16
+ Requires-Dist: ftfy (>=6.2.0,<7.0.0)
17
+ Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
18
+ Requires-Dist: jmespath (>=1.0.1,<2.0.0)
19
+ Requires-Dist: mistune (>=3.0.2,<4.0.0)
20
+ Requires-Dist: more-itertools (>=10.2,<11.0)
21
+ Requires-Dist: numpy (>=1.25)
22
+ Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
23
+ Requires-Dist: pandas (>=2.0,<3.0)
24
+ Requires-Dist: pgserver (==0.1.4)
25
+ Requires-Dist: pgvector (>=0.2.1,<0.3.0)
26
+ Requires-Dist: pillow (>=9.3.0)
27
+ Requires-Dist: psutil (>=5.9.5,<6.0.0)
28
+ Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
29
+ Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
30
+ Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
31
+ Requires-Dist: requests (>=2.31.0,<3.0.0)
32
+ Requires-Dist: setuptools (==69.1.1)
33
+ Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
34
+ Requires-Dist: tenacity (>=8.2,<9.0)
35
+ Requires-Dist: tqdm (>=4.64)
36
+ Description-Content-Type: text/markdown
37
+
38
+ <div align="center">
39
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
40
+
41
+ # Unifying Data, Models, and Orchestration for AI Products
42
+
43
+ [![License](https://img.shields.io/badge/License-Apache%202.0-darkblue.svg)](https://opensource.org/licenses/Apache-2.0)
44
+ ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pixeltable?logo=python&logoColor=white)
45
+ [![Platform Support](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-8A2BE2)]()
46
+ [![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions)
47
+ [![PyPI Package](https://img.shields.io/pypi/v/pixeltable?color=darkorange)](https://pypi.org/project/pixeltable/)
48
+
49
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/master/docs/release/tutorials)
50
+ </div>
51
+
52
+ Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
53
+
54
+ ## What problems does Pixeltable solve?
55
+
56
+ Today’s solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployment is cumbersome. With Pixeltable you can store, transform, index, and iterate on your data within the same table interface, whether it's text, images, embeddings, or even video. Built-in lineage and versioning ensure transparency and reproducibility, while the development-to-production mirror streamlines deployment.
57
+
58
+ ## 💾 Installation
59
+
60
+ ```python
61
+ %pip install pixeltable
62
+ ```
63
+
64
+ To verify that it's working:
65
+
66
+ ```python
67
+ import pixeltable as pxt
68
+ pxt.init()
69
+ ```
70
+ > [!NOTE]
71
+ > Check out the [Pixeltable Basics](https://pixeltable.readme.io/docs/pixeltable-basics) tutorial for a tour of its most important features.
72
+
73
+ ## 💡 Get Started
74
+ Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations and AI operations.
75
+
76
+ | Topic | Notebook | API |
77
+ |:--------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------:|
78
+ | Get Started | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/pixeltable/) |
79
+ | User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
80
+ | Comparing Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#frame-extraction-for-video-data) |
81
+ | Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
82
+ | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#inserting-data-into-a-table) |
83
+
84
+ ## ❓ FAQ
85
+
86
+ ### What does Pixeltable provide me with? Pixeltable provides:
87
+
88
+ - Data storage and versioning
89
+ - Combined Data and Model Lineage
90
+ - Indexing (e.g. embedding vectors) and Data Retrieval
91
+ - Orchestration of multimodal workloads
92
+ - Incremental updates
93
+ - Code is automatically production-ready
94
+
95
+ ### Why should you use Pixeltable?
96
+
97
+ - **It gives you transparency and reproducibility**
98
+ - All generated data is automatically recorded and versioned
99
+ - You will never need to re-run a workload because you lost track of the input data
100
+ - **It saves you money**
101
+ - All data changes are automatically incremental
102
+ - You never need to re-run pipelines from scratch because you’re adding data
103
+ - **It integrates with any existing Python code or libraries**
104
+ - Bring your ever-changing code and workloads
105
+ - You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
106
+
107
+ ### What is Pixeltable not providing?
108
+
109
+ - Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.
110
+ - We do not aim to replace your existing AI toolkit, but rather enhance it by streamlining the underlying data infrastructure and orchestration.
111
+
112
+ > [!TIP]
113
+ > Check out the [Integrations](https://pixeltable.readme.io/docs/working-with-openai) section, and feel free to submit a request for additional ones.
114
+
115
+ ## 📙 Example of Use Cases
116
+
117
+ - **Interact with video data at the frame level** without having to think about frame extraction, intermediate file storage, or storage space explosion.
118
+ - **Augment your data incrementally and interactively with built-in functions and UDFs**, such as image transformations, model inference, and visualizations, without having to think about data pipelines, incremental updates, or capturing function output.
119
+ - **Interact with all the data relevant to your AI application** (video, images, documents, audio, structured data, JSON) through a simple dataframe-style API directly in Python. This includes:
120
+ - similarity search on embeddings, supported by high-dimensional vector indexing;
121
+ - path expressions and transformations on JSON data;
122
+ - PIL and OpenCV image operations;
123
+ - assembling frames into videos.
124
+ - **Perform keyword and image similarity search at the video frame level** without having to worry about frame storage.
125
+ - **Access all Pixeltable-resident data directly as a PyTorch dataset** in your training scripts.
126
+ - **Understand the compute and storage costs of your data at the granularity** of individual augmentations and get cost projections before adding new data and new augmentations.
127
+ - **Rely on Pixeltable's automatic versioning and snapshot functionality** to protect against regressions and to ensure reproducibility.
128
+
129
+ ## 🐛 Contributions & Feedback
130
+
131
+ Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
132
+ </br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
133
+
134
+ ## :classical_building: License
135
+
136
+ This library is licensed under the Apache 2.0 License.
137
+
@@ -0,0 +1,99 @@
1
+ <div align="center">
2
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
3
+
4
+ # Unifying Data, Models, and Orchestration for AI Products
5
+
6
+ [![License](https://img.shields.io/badge/License-Apache%202.0-darkblue.svg)](https://opensource.org/licenses/Apache-2.0)
7
+ ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pixeltable?logo=python&logoColor=white)
8
+ [![Platform Support](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-8A2BE2)]()
9
+ [![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions)
10
+ [![PyPI Package](https://img.shields.io/pypi/v/pixeltable?color=darkorange)](https://pypi.org/project/pixeltable/)
11
+
12
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/master/docs/release/tutorials)
13
+ </div>
14
+
15
+ Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
16
+
17
+ ## What problems does Pixeltable solve?
18
+
19
+ Today’s solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployment is cumbersome. With Pixeltable you can store, transform, index, and iterate on your data within the same table interface, whether it's text, images, embeddings, or even video. Built-in lineage and versioning ensure transparency and reproducibility, while the development-to-production mirror streamlines deployment.
20
+
21
+ ## 💾 Installation
22
+
23
+ ```python
24
+ %pip install pixeltable
25
+ ```
26
+
27
+ To verify that it's working:
28
+
29
+ ```python
30
+ import pixeltable as pxt
31
+ pxt.init()
32
+ ```
33
+ > [!NOTE]
34
+ > Check out the [Pixeltable Basics](https://pixeltable.readme.io/docs/pixeltable-basics) tutorial for a tour of its most important features.
35
+
36
+ ## 💡 Get Started
37
+ Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations and AI operations.
38
+
39
+ | Topic | Notebook | API |
40
+ |:--------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------:|
41
+ | Get Started | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/pixeltable/) |
42
+ | User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
43
+ | Comparing Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#frame-extraction-for-video-data) |
44
+ | Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
45
+ | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#inserting-data-into-a-table) |
46
+
47
+ ## ❓ FAQ
48
+
49
+ ### What does Pixeltable provide me with? Pixeltable provides:
50
+
51
+ - Data storage and versioning
52
+ - Combined Data and Model Lineage
53
+ - Indexing (e.g. embedding vectors) and Data Retrieval
54
+ - Orchestration of multimodal workloads
55
+ - Incremental updates
56
+ - Code is automatically production-ready
57
+
58
+ ### Why should you use Pixeltable?
59
+
60
+ - **It gives you transparency and reproducibility**
61
+ - All generated data is automatically recorded and versioned
62
+ - You will never need to re-run a workload because you lost track of the input data
63
+ - **It saves you money**
64
+ - All data changes are automatically incremental
65
+ - You never need to re-run pipelines from scratch because you’re adding data
66
+ - **It integrates with any existing Python code or libraries**
67
+ - Bring your ever-changing code and workloads
68
+ - You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
69
+
70
+ ### What is Pixeltable not providing?
71
+
72
+ - Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.
73
+ - We do not aim to replace your existing AI toolkit, but rather enhance it by streamlining the underlying data infrastructure and orchestration.
74
+
75
+ > [!TIP]
76
+ > Check out the [Integrations](https://pixeltable.readme.io/docs/working-with-openai) section, and feel free to submit a request for additional ones.
77
+
78
+ ## 📙 Example of Use Cases
79
+
80
+ - **Interact with video data at the frame level** without having to think about frame extraction, intermediate file storage, or storage space explosion.
81
+ - **Augment your data incrementally and interactively with built-in functions and UDFs**, such as image transformations, model inference, and visualizations, without having to think about data pipelines, incremental updates, or capturing function output.
82
+ - **Interact with all the data relevant to your AI application** (video, images, documents, audio, structured data, JSON) through a simple dataframe-style API directly in Python. This includes:
83
+ - similarity search on embeddings, supported by high-dimensional vector indexing;
84
+ - path expressions and transformations on JSON data;
85
+ - PIL and OpenCV image operations;
86
+ - assembling frames into videos.
87
+ - **Perform keyword and image similarity search at the video frame level** without having to worry about frame storage.
88
+ - **Access all Pixeltable-resident data directly as a PyTorch dataset** in your training scripts.
89
+ - **Understand the compute and storage costs of your data at the granularity** of individual augmentations and get cost projections before adding new data and new augmentations.
90
+ - **Rely on Pixeltable's automatic versioning and snapshot functionality** to protect against regressions and to ensure reproducibility.
91
+
92
+ ## 🐛 Contributions & Feedback
93
+
94
+ Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
95
+ </br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
96
+
97
+ ## :classical_building: License
98
+
99
+ This library is licensed under the Apache 2.0 License.
@@ -1,5 +1,7 @@
1
1
  from .catalog import Column, Table, InsertableTable, View
2
2
  from .dataframe import DataFrame
3
+ from .datatransfer import Remote
4
+ from .catalog import Column, Table, InsertableTable, View
3
5
  from .exceptions import Error, Error
4
6
  from .exprs import RELATIVE_PATH_ROOT
5
7
  from .func import Function, udf, uda, Aggregator, expr_udf
@@ -21,7 +23,7 @@ from .type_system import (
21
23
  from .utils.help import help
22
24
 
23
25
  # noinspection PyUnresolvedReferences
24
- from . import functions, io
26
+ from . import functions, io, iterators
25
27
  from .__version__ import __version__, __version_tuple__
26
28
 
27
29
  __all__ = [
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.6"
3
- __version_tuple__ = (0, 2, 6)
2
+ __version__ = "0.2.8"
3
+ __version_tuple__ = (0, 2, 8)
@@ -22,7 +22,8 @@ class Column:
22
22
  computed_with: Optional[Union['Expr', Callable]] = None,
23
23
  is_pk: bool = False, stored: Optional[bool] = None,
24
24
  col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
25
- schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None
25
+ schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
26
+ records_errors: Optional[bool] = None
26
27
  ):
27
28
  """Column constructor.
28
29
 
@@ -80,12 +81,14 @@ class Column:
80
81
  assert self.col_type is not None
81
82
 
82
83
  self.stored = stored
83
- self.dependent_cols: Set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
84
+ self.dependent_cols: set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
84
85
  self.id = col_id
85
86
  self.is_pk = is_pk
86
87
  self.schema_version_add = schema_version_add
87
88
  self.schema_version_drop = schema_version_drop
88
89
 
90
+ self._records_errors = records_errors
91
+
89
92
  # column in the stored table for the values of this Column
90
93
  self.sa_col: Optional[sql.schema.Column] = None
91
94
  self.sa_col_type = sa_col_type
@@ -131,6 +134,9 @@ class Column:
131
134
  @property
132
135
  def records_errors(self) -> bool:
133
136
  """True if this column also stores error information."""
137
+ # default: record errors for computed and media columns
138
+ if self._records_errors is not None:
139
+ return self._records_errors
134
140
  return self.is_stored and (self.is_computed or self.col_type.is_media_type())
135
141
 
136
142
  def source(self) -> None:
@@ -60,25 +60,29 @@ class InsertableTable(Table):
60
60
  return tbl
61
61
 
62
62
  @overload
63
- def insert(self, rows: Iterable[Dict[str, Any]], /, print_stats: bool = False, fail_on_exception: bool = True): ...
63
+ def insert(
64
+ self, rows: Iterable[Dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True
65
+ ) -> UpdateStatus: ...
64
66
 
65
67
  @overload
66
- def insert(self, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any): ...
68
+ def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
67
69
 
68
- def insert(self, *args, **kwargs) -> UpdateStatus:
69
- """Insert rows into table.
70
+ def insert(
71
+ self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
72
+ fail_on_exception: bool = True, **kwargs: Any
73
+ ) -> UpdateStatus:
74
+ """Inserts rows into this table. There are two mutually exclusive call patterns:
70
75
 
71
76
  To insert multiple rows at a time:
72
-
73
- ``insert(rows: List[Dict[str, Any]], print_stats: bool = False, fail_on_exception: bool = True)``
77
+ ``insert(rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True)``
74
78
 
75
79
  To insert just a single row, you can use the more convenient syntax:
76
- ``insert(print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
80
+ ``insert(*, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
77
81
 
78
82
  Args:
79
83
  rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
80
84
  names to values.
81
- kwargs: (if inserting a single row) keyword-argument pairs representing column names and values.
85
+ kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
82
86
  print_stats: If ``True``, print statistics about the cost of computed columns.
83
87
  fail_on_exception:
84
88
  Determines how exceptions in computed columns and invalid media files (e.g., corrupt images)
@@ -102,16 +106,27 @@ class InsertableTable(Table):
102
106
 
103
107
  >>> tbl.insert(a=1, b=1, c=1)
104
108
  """
105
- print_stats = kwargs.pop('print_stats', False)
106
- fail_on_exception = kwargs.pop('fail_on_exception', True)
107
- if len(args) > 0:
108
- # There's a positional argument; this means `rows` is expressed as a
109
- # list of dicts (multi-insert)
110
- rows = list(args[0])
111
- else:
112
- # No positional argument; this means we're inserting a single row
113
- # using kwargs syntax
109
+ # The commented code is the intended implementation, with signature (*args, **kwargs).
110
+ # That signature cannot be used currently, due to a present limitation in mkdocs.
111
+ # See: https://github.com/mkdocstrings/mkdocstrings/issues/669
112
+
113
+ # print_stats = kwargs.pop('print_stats', False)
114
+ # fail_on_exception = kwargs.pop('fail_on_exception', True)
115
+ # if len(args) > 0:
116
+ # # There's a positional argument; this means `rows` is expressed as a
117
+ # # list of dicts (multi-insert)
118
+ # rows = list(args[0])
119
+ # else:
120
+ # # No positional argument; this means we're inserting a single row
121
+ # # using kwargs syntax
122
+ # rows = [kwargs]
123
+
124
+ if rows is None:
114
125
  rows = [kwargs]
126
+ else:
127
+ rows = list(rows)
128
+ if len(kwargs) > 0:
129
+ raise excs.Error('`kwargs` cannot be specified unless `rows is None`.')
115
130
 
116
131
  if not isinstance(rows, list):
117
132
  raise excs.Error('rows must be a list of dictionaries')
@@ -1,9 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import itertools
3
4
  import json
4
5
  import logging
5
6
  from pathlib import Path
6
- from typing import Union, Any, List, Dict, Optional, Callable, Set, Tuple, Iterable
7
+ from typing import Union, Any, List, Dict, Optional, Callable, Set, Tuple, Iterable, Type
7
8
  from uuid import UUID
8
9
 
9
10
  import pandas as pd
@@ -16,6 +17,7 @@ import pixeltable.exceptions as excs
16
17
  import pixeltable.exprs as exprs
17
18
  import pixeltable.metadata.schema as schema
18
19
  import pixeltable.type_system as ts
20
+ import pixeltable.index as index
19
21
  from .column import Column
20
22
  from .globals import is_valid_identifier, is_system_column_name, UpdateStatus
21
23
  from .schema_object import SchemaObject
@@ -102,27 +104,26 @@ class Table(SchemaObject):
102
104
  from pixeltable.dataframe import DataFrame
103
105
  return DataFrame(self.tbl_version_path).group_by(*items)
104
106
 
105
- def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
106
- """Return rows from this table.
107
- """
107
+ def collect(self) -> 'pixeltable.dataframe.DataFrameResultSet':
108
+ """Return rows from this table."""
108
109
  return self.df().collect()
109
110
 
110
111
  def show(
111
112
  self, *args, **kwargs
112
- ) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
113
+ ) -> 'pixeltable.dataframe.DataFrameResultSet':
113
114
  """Return rows from this table.
114
115
  """
115
116
  return self.df().show(*args, **kwargs)
116
117
 
117
118
  def head(
118
119
  self, *args, **kwargs
119
- ) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
120
+ ) -> 'pixeltable.dataframe.DataFrameResultSet':
120
121
  """Return the first n rows inserted into this table."""
121
122
  return self.df().head(*args, **kwargs)
122
123
 
123
124
  def tail(
124
125
  self, *args, **kwargs
125
- ) -> 'pixeltable.dataframe.DataFrameResultSet': # type: ignore[name-defined, no-untyped-def]
126
+ ) -> 'pixeltable.dataframe.DataFrameResultSet':
126
127
  """Return the last n rows inserted into this table."""
127
128
  return self.df().tail(*args, **kwargs)
128
129
 
@@ -514,6 +515,24 @@ class Table(SchemaObject):
514
515
  status = self.tbl_version_path.tbl_version.add_index(col, idx_name=idx_name, idx=idx)
515
516
  # TODO: how to deal with exceptions here? drop the index and raise?
516
517
 
518
+ def drop_embedding_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
519
+ """Drop an embedding index from the table.
520
+
521
+ Args:
522
+ column_name: The name of the column whose embedding index to drop. Invalid if the column has multiple
523
+ embedding indices.
524
+ idx_name: The name of the index to drop.
525
+
526
+ Raises:
527
+ Error: If the index does not exist.
528
+
529
+ Examples:
530
+ Drop embedding index on the ``img`` column:
531
+
532
+ >>> tbl.drop_embedding_index(column_name='img')
533
+ """
534
+ self._drop_index(column_name=column_name, idx_name=idx_name, _idx_class=index.EmbeddingIndex)
535
+
517
536
  def drop_index(self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None) -> None:
518
537
  """Drop an index from the table.
519
538
 
@@ -529,6 +548,12 @@ class Table(SchemaObject):
529
548
 
530
549
  >>> tbl.drop_index(column_name='img')
531
550
  """
551
+ self._drop_index(column_name=column_name, idx_name=idx_name)
552
+
553
+ def _drop_index(
554
+ self, *, column_name: Optional[str] = None, idx_name: Optional[str] = None,
555
+ _idx_class: Optional[Type[index.IndexBase]] = None
556
+ ) -> None:
532
557
  if self.tbl_version_path.is_snapshot():
533
558
  raise excs.Error('Cannot drop an index from a snapshot')
534
559
  self._check_is_dropped()
@@ -547,12 +572,14 @@ class Table(SchemaObject):
547
572
  if col.tbl.id != tbl_version.id:
548
573
  raise excs.Error(
549
574
  f'Column {column_name}: cannot drop index from column that belongs to base ({col.tbl.name})')
550
- idx_ids = [info.id for info in tbl_version.idxs_by_name.values() if info.col.id == col.id]
551
- if len(idx_ids) == 0:
575
+ idx_info = [info for info in tbl_version.idxs_by_name.values() if info.col.id == col.id]
576
+ if _idx_class is not None:
577
+ idx_info = [info for info in idx_info if isinstance(info.idx, _idx_class)]
578
+ if len(idx_info) == 0:
552
579
  raise excs.Error(f'Column {column_name} does not have an index')
553
- if len(idx_ids) > 1:
580
+ if len(idx_info) > 1:
554
581
  raise excs.Error(f'Column {column_name} has multiple indices; specify idx_name instead')
555
- idx_id = idx_ids[0]
582
+ idx_id = idx_info[0].id
556
583
  self.tbl_version_path.tbl_version.drop_index(idx_id)
557
584
 
558
585
  def update(
@@ -682,7 +709,6 @@ class Table(SchemaObject):
682
709
 
683
710
  return update_targets
684
711
 
685
-
686
712
  def revert(self) -> None:
687
713
  """Reverts the table to the previous version.
688
714
 
@@ -693,3 +719,132 @@ class Table(SchemaObject):
693
719
  raise excs.Error('Cannot revert a snapshot')
694
720
  self._check_is_dropped()
695
721
  self.tbl_version_path.tbl_version.revert()
722
+
723
+ def _link(
724
+ self,
725
+ remote: 'pixeltable.datatransfer.Remote',
726
+ col_mapping: Optional[dict[str, str]] = None
727
+ ) -> None:
728
+ """
729
+ Links the specified `Remote` to this table. Once a remote is linked, it can be synchronized with
730
+ this `Table` by calling [`Table.sync()`]. A record of the link
731
+ is stored in table metadata and will persist across sessions.
732
+
733
+ Args:
734
+ remote (pixeltable.datatransfer.Remote): The `Remote` to link to this table.
735
+ col_mapping: An optional mapping of columns from this `Table` to columns in the `Remote`.
736
+ """
737
+ # TODO(aaron-siegel): Refactor `col_mapping`
738
+ if len(self._get_remotes()) > 0:
739
+ raise excs.Error('Linking more than one `Remote` to a table is not currently supported.')
740
+ self._check_is_dropped()
741
+ export_cols = remote.get_export_columns()
742
+ import_cols = remote.get_import_columns()
743
+ is_col_mapping_user_specified = col_mapping is not None
744
+ if col_mapping is None:
745
+ # Use the identity mapping by default if `col_mapping` is not specified
746
+ col_mapping = {col: col for col in itertools.chain(export_cols.keys(), import_cols.keys())}
747
+ self._validate_remote(export_cols, import_cols, col_mapping, is_col_mapping_user_specified)
748
+ self.tbl_version_path.tbl_version.link(remote, col_mapping)
749
+ print(f'Linked remote {remote} to table `{self.get_name()}`.')
750
+
751
+ def unlink(self) -> None:
752
+ """
753
+ Unlinks this table's `Remote`s.
754
+ """
755
+ self._check_is_dropped()
756
+ remotes = self._get_remotes()
757
+ assert len(remotes) <= 1
758
+
759
+ remote = next(iter(remotes.keys()))
760
+ self.tbl_version_path.tbl_version.unlink(remote)
761
+ # TODO: Provide an option to auto-delete the project
762
+ print(f'Unlinked remote {remote} from table `{self.get_name()}`.')
763
+
764
+ def _validate_remote(
765
+ self,
766
+ export_cols: dict[str, ts.ColumnType],
767
+ import_cols: dict[str, ts.ColumnType],
768
+ col_mapping: Optional[dict[str, str]],
769
+ is_col_mapping_user_specified: bool
770
+ ):
771
+ # Validate names
772
+ t_cols = self.column_names()
773
+ for t_col, r_col in col_mapping.items():
774
+ if t_col not in t_cols:
775
+ if is_col_mapping_user_specified:
776
+ raise excs.Error(
777
+ f'Column name `{t_col}` appears as a key in `col_mapping`, but Table `{self.get_name()}` '
778
+ 'contains no such column.'
779
+ )
780
+ else:
781
+ raise excs.Error(
782
+ f'Column `{t_col}` does not exist in Table `{self.get_name()}`. Either add a column `{t_col}`, '
783
+ f'or specify a `col_mapping` to associate a different column with the remote field `{r_col}`.'
784
+ )
785
+ if r_col not in export_cols and r_col not in import_cols:
786
+ raise excs.Error(
787
+ f'Column name `{r_col}` appears as a value in `col_mapping`, but the remote '
788
+ f'configuration has no column `{r_col}`.'
789
+ )
790
+ # Validate column specs
791
+ t_col_types = self.column_types()
792
+ for t_col, r_col in col_mapping.items():
793
+ t_col_type = t_col_types[t_col]
794
+ if r_col in export_cols:
795
+ # Validate that the table column can be assigned to the remote column
796
+ r_col_type = export_cols[r_col]
797
+ if not r_col_type.is_supertype_of(t_col_type):
798
+ raise excs.Error(
799
+ f'Column `{t_col}` cannot be exported to remote column `{r_col}` (incompatible types)'
800
+ )
801
+ if r_col in import_cols:
802
+ # Validate that the remote column can be assigned to the table column
803
+ if self.tbl_version_path.get_column(t_col).is_computed:
804
+ raise excs.Error(
805
+ f'Column `{t_col}` is a computed column, which cannot be populated from a remote column'
806
+ )
807
+ r_col_type = import_cols[r_col]
808
+ if not t_col_type.is_supertype_of(r_col_type):
809
+ raise excs.Error(
810
+ f'Column `{t_col}` cannot be imported from remote column `{r_col}` (incompatible types)'
811
+ )
812
+
813
+ def _get_remotes(self) -> dict[pixeltable.datatransfer.Remote, dict[str, str]]:
814
+ """
815
+ Gets a `dict` of all `Remote`s linked to this table.
816
+ """
817
+ return self.tbl_version_path.tbl_version.get_remotes()
818
+
819
+ def sync(
820
+ self,
821
+ *,
822
+ export_data: bool = True,
823
+ import_data: bool = True
824
+ ):
825
+ """
826
+ Synchronizes this table with its linked `Remote`s.
827
+
828
+ Args:
829
+ export_data: If `True`, data from this table will be exported to the external store during synchronization.
830
+ import_data: If `True`, data from the external store will be imported to this table during synchronization.
831
+ """
832
+ remotes = self._get_remotes()
833
+ assert len(remotes) <= 1
834
+
835
+ # Validation
836
+ for remote in remotes:
837
+ col_mapping = remotes[remote]
838
+ r_cols = set(col_mapping.values())
839
+ # Validate export/import
840
+ if export_data and not any(col in r_cols for col in remote.get_export_columns()):
841
+ raise excs.Error(
842
+ f'Attempted to sync with export_data=True, but there are no columns to export: {remote}'
843
+ )
844
+ if import_data and not any(col in r_cols for col in remote.get_import_columns()):
845
+ raise excs.Error(
846
+ f'Attempted to sync with import_data=True, but there are no columns to import: {remote}'
847
+ )
848
+
849
+ for remote in remotes:
850
+ remote.sync(self, remotes[remote], export_data=export_data, import_data=import_data)