pixeltable 0.2.6__tar.gz → 0.2.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (131) hide show
  1. pixeltable-0.2.7/PKG-INFO +137 -0
  2. pixeltable-0.2.7/README.md +99 -0
  3. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/__init__.py +3 -1
  4. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/__version__.py +2 -2
  5. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/catalog/column.py +14 -2
  6. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/catalog/insertable_table.py +32 -17
  7. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/catalog/table.py +194 -12
  8. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/catalog/table_version.py +270 -110
  9. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/catalog/table_version_path.py +6 -1
  10. pixeltable-0.2.7/pixeltable/datatransfer/__init__.py +1 -0
  11. pixeltable-0.2.7/pixeltable/datatransfer/label_studio.py +526 -0
  12. pixeltable-0.2.7/pixeltable/datatransfer/remote.py +113 -0
  13. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/env.py +156 -73
  14. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/column_ref.py +2 -2
  15. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/comparison.py +39 -1
  16. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/data_row.py +7 -0
  17. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/expr.py +11 -12
  18. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/function_call.py +0 -3
  19. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/globals.py +14 -2
  20. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/similarity_expr.py +5 -3
  21. pixeltable-0.2.7/pixeltable/ext/functions/whisperx.py +30 -0
  22. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/ext/functions/yolox.py +16 -0
  23. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/func/aggregate_function.py +2 -2
  24. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/func/expr_template_function.py +3 -1
  25. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/func/udf.py +2 -2
  26. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/functions/fireworks.py +9 -4
  27. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/functions/huggingface.py +25 -1
  28. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/functions/openai.py +15 -10
  29. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/functions/together.py +11 -6
  30. pixeltable-0.2.7/pixeltable/functions/util.py +9 -0
  31. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/functions/video.py +46 -8
  32. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/globals.py +20 -2
  33. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/index/__init__.py +1 -0
  34. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/index/base.py +6 -1
  35. pixeltable-0.2.7/pixeltable/index/btree.py +54 -0
  36. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/index/embedding_index.py +4 -1
  37. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/io/__init__.py +1 -0
  38. pixeltable-0.2.7/pixeltable/io/globals.py +59 -0
  39. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/iterators/base.py +4 -4
  40. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/iterators/document.py +26 -15
  41. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/iterators/video.py +9 -1
  42. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/metadata/__init__.py +2 -2
  43. pixeltable-0.2.7/pixeltable/metadata/converters/convert_14.py +13 -0
  44. pixeltable-0.2.7/pixeltable/metadata/converters/convert_15.py +29 -0
  45. pixeltable-0.2.7/pixeltable/metadata/converters/util.py +63 -0
  46. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/metadata/schema.py +12 -6
  47. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/plan.py +9 -5
  48. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/store.py +14 -21
  49. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/tool/create_test_db_dump.py +16 -0
  50. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/type_system.py +14 -4
  51. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/utils/coco.py +94 -0
  52. {pixeltable-0.2.6 → pixeltable-0.2.7}/pyproject.toml +27 -14
  53. pixeltable-0.2.6/PKG-INFO +0 -131
  54. pixeltable-0.2.6/README.md +0 -93
  55. pixeltable-0.2.6/pixeltable/func/nos_function.py +0 -202
  56. pixeltable-0.2.6/pixeltable/functions/util.py +0 -52
  57. pixeltable-0.2.6/pixeltable/utils/clip.py +0 -18
  58. {pixeltable-0.2.6 → pixeltable-0.2.7}/LICENSE +0 -0
  59. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/catalog/__init__.py +0 -0
  60. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/catalog/catalog.py +0 -0
  61. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/catalog/dir.py +0 -0
  62. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/catalog/globals.py +0 -0
  63. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/catalog/named_function.py +0 -0
  64. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/catalog/path.py +0 -0
  65. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/catalog/path_dict.py +0 -0
  66. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/catalog/schema_object.py +0 -0
  67. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/catalog/view.py +0 -0
  68. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/dataframe.py +0 -0
  69. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exceptions.py +0 -0
  70. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exec/__init__.py +0 -0
  71. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exec/aggregation_node.py +0 -0
  72. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exec/cache_prefetch_node.py +0 -0
  73. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exec/component_iteration_node.py +0 -0
  74. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exec/data_row_batch.py +0 -0
  75. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exec/exec_context.py +0 -0
  76. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exec/exec_node.py +0 -0
  77. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exec/expr_eval_node.py +0 -0
  78. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exec/in_memory_data_node.py +0 -0
  79. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exec/media_validation_node.py +0 -0
  80. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exec/sql_scan_node.py +0 -0
  81. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/__init__.py +0 -0
  82. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/arithmetic_expr.py +0 -0
  83. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/array_slice.py +0 -0
  84. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/column_property_ref.py +0 -0
  85. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/compound_predicate.py +0 -0
  86. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/expr_set.py +0 -0
  87. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/image_member_access.py +0 -0
  88. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/in_predicate.py +0 -0
  89. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/inline_array.py +0 -0
  90. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/inline_dict.py +0 -0
  91. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/is_null.py +0 -0
  92. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/json_mapper.py +0 -0
  93. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/json_path.py +0 -0
  94. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/literal.py +0 -0
  95. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/object_ref.py +0 -0
  96. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/predicate.py +0 -0
  97. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/row_builder.py +0 -0
  98. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/rowid_ref.py +0 -0
  99. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/type_cast.py +0 -0
  100. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/exprs/variable.py +0 -0
  101. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/ext/__init__.py +0 -0
  102. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/func/__init__.py +0 -0
  103. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/func/callable_function.py +0 -0
  104. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/func/function.py +0 -0
  105. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/func/function_registry.py +0 -0
  106. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/func/globals.py +0 -0
  107. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/func/signature.py +0 -0
  108. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/functions/__init__.py +0 -0
  109. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/functions/eval.py +0 -0
  110. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/functions/image.py +0 -0
  111. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/functions/pil/image.py +0 -0
  112. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/functions/string.py +0 -0
  113. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/io/hf_datasets.py +0 -0
  114. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/io/pandas.py +0 -0
  115. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/io/parquet.py +0 -0
  116. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/iterators/__init__.py +0 -0
  117. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/metadata/converters/convert_10.py +0 -0
  118. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/metadata/converters/convert_12.py +0 -0
  119. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/metadata/converters/convert_13.py +0 -0
  120. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/tool/create_test_video.py +0 -0
  121. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/utils/__init__.py +0 -0
  122. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/utils/arrow.py +0 -0
  123. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/utils/documents.py +0 -0
  124. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/utils/filecache.py +0 -0
  125. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/utils/help.py +0 -0
  126. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/utils/http_server.py +0 -0
  127. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/utils/media_store.py +0 -0
  128. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/utils/pytorch.py +0 -0
  129. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/utils/s3.py +0 -0
  130. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/utils/sql.py +0 -0
  131. {pixeltable-0.2.6 → pixeltable-0.2.7}/pixeltable/utils/transactional_directory.py +0 -0
@@ -0,0 +1,137 @@
1
+ Metadata-Version: 2.1
2
+ Name: pixeltable
3
+ Version: 0.2.7
4
+ Summary: Pixeltable: The Multimodal AI Data Plane
5
+ Author: Marcel Kornacker
6
+ Author-email: marcelk@gmail.com
7
+ Requires-Python: >=3.9,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Dist: av (>=10.0.0)
14
+ Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
15
+ Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
16
+ Requires-Dist: ftfy (>=6.2.0,<7.0.0)
17
+ Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
18
+ Requires-Dist: jmespath (>=1.0.1,<2.0.0)
19
+ Requires-Dist: mistune (>=3.0.2,<4.0.0)
20
+ Requires-Dist: more-itertools (>=10.2,<11.0)
21
+ Requires-Dist: numpy (>=1.25)
22
+ Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
23
+ Requires-Dist: pandas (>=2.0,<3.0)
24
+ Requires-Dist: pgserver (==0.1.3)
25
+ Requires-Dist: pgvector (>=0.2.1,<0.3.0)
26
+ Requires-Dist: pillow (>=9.3.0)
27
+ Requires-Dist: psutil (>=5.9.5,<6.0.0)
28
+ Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
29
+ Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
30
+ Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
31
+ Requires-Dist: requests (>=2.31.0,<3.0.0)
32
+ Requires-Dist: setuptools (==69.1.1)
33
+ Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
34
+ Requires-Dist: tenacity (>=8.2,<9.0)
35
+ Requires-Dist: tqdm (>=4.64)
36
+ Description-Content-Type: text/markdown
37
+
38
+ <div align="center">
39
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
40
+
41
+ # Unifying Data, Models, and Orchestration for AI Products
42
+
43
+ [![License](https://img.shields.io/badge/License-Apache%202.0-darkblue.svg)](https://opensource.org/licenses/Apache-2.0)
44
+ ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pixeltable?logo=python&logoColor=white)
45
+ [![Platform Support](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-8A2BE2)]()
46
+ [![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions)
47
+ [![PyPI Package](https://img.shields.io/pypi/v/pixeltable?color=darkorange)](https://pypi.org/project/pixeltable/)
48
+
49
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/master/docs/release/tutorials)
50
+ </div>
51
+
52
+ Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
53
+
54
+ ## What problems does Pixeltable solve?
55
+
56
+ Today’s solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployment is cumbersome. With Pixeltable you can store, transform, index, and iterate on your data within the same table interface, whether it's text, images, embeddings, or even video. Built-in lineage and versioning ensure transparency and reproducibility, while the development-to-production mirror streamlines deployment.
57
+
58
+ ## 💾 Installation
59
+
60
+ ```python
61
+ %pip install pixeltable
62
+ ```
63
+
64
+ To verify that it's working:
65
+
66
+ ```python
67
+ import pixeltable as pxt
68
+ pxt.init()
69
+ ```
70
+ > [!NOTE]
71
+ > Check out the [Pixeltable Basics](https://pixeltable.readme.io/docs/pixeltable-basics) tutorial for a tour of its most important features.
72
+
73
+ ## 💡 Get Started
74
+ Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations and AI operations.
75
+
76
+ | Topic | Notebook | API |
77
+ |:--------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------:|
78
+ | Get Started | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/pixeltable/) |
79
+ | User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
80
+ | Comparing Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#frame-extraction-for-video-data) |
81
+ | Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
82
+ | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#inserting-data-into-a-table) |
83
+
84
+ ## ❓ FAQ
85
+
86
+ ### What does Pixeltable provide me with? Pixeltable provides:
87
+
88
+ - Data storage and versioning
89
+ - Combined Data and Model Lineage
90
+ - Indexing (e.g. embedding vectors) and Data Retrieval
91
+ - Orchestration of multimodal workloads
92
+ - Incremental updates
93
+ - Code is automatically production-ready
94
+
95
+ ### Why should you use Pixeltable?
96
+
97
+ - **It gives you transparency and reproducibility**
98
+ - All generated data is automatically recorded and versioned
99
+ - You will never need to re-run a workload because you lost track of the input data
100
+ - **It saves you money**
101
+ - All data changes are automatically incremental
102
+ - You never need to re-run pipelines from scratch because you’re adding data
103
+ - **It integrates with any existing Python code or libraries**
104
+ - Bring your ever-changing code and workloads
105
+ - You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
106
+
107
+ ### What is Pixeltable not providing?
108
+
109
+ - Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.
110
+ - We do not aim to replace your existing AI toolkit, but rather enhance it by streamlining the underlying data infrastructure and orchestration.
111
+
112
+ > [!TIP]
113
+ > Check out the [Integrations](https://pixeltable.readme.io/docs/working-with-openai) section, and feel free to submit a request for additional ones.
114
+
115
+ ## 📙 Example of Use Cases
116
+
117
+ - **Interact with video data at the frame level** without having to think about frame extraction, intermediate file storage, or storage space explosion.
118
+ - **Augment your data incrementally and interactively with built-in functions and UDFs**, such as image transformations, model inference, and visualizations, without having to think about data pipelines, incremental updates, or capturing function output.
119
+ - **Interact with all the data relevant to your AI application** (video, images, documents, audio, structured data, JSON) through a simple dataframe-style API directly in Python. This includes:
120
+ - similarity search on embeddings, supported by high-dimensional vector indexing;
121
+ - path expressions and transformations on JSON data;
122
+ - PIL and OpenCV image operations;
123
+ - assembling frames into videos.
124
+ - **Perform keyword and image similarity search at the video frame level** without having to worry about frame storage.
125
+ - **Access all Pixeltable-resident data directly as a PyTorch dataset** in your training scripts.
126
+ - **Understand the compute and storage costs of your data at the granularity** of individual augmentations and get cost projections before adding new data and new augmentations.
127
+ - **Rely on Pixeltable's automatic versioning and snapshot functionality** to protect against regressions and to ensure reproducibility.
128
+
129
+ ## 🐛 Contributions & Feedback
130
+
131
+ Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
132
+ </br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
133
+
134
+ ## :classical_building: License
135
+
136
+ This library is licensed under the Apache 2.0 License.
137
+
@@ -0,0 +1,99 @@
1
+ <div align="center">
2
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
3
+
4
+ # Unifying Data, Models, and Orchestration for AI Products
5
+
6
+ [![License](https://img.shields.io/badge/License-Apache%202.0-darkblue.svg)](https://opensource.org/licenses/Apache-2.0)
7
+ ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pixeltable?logo=python&logoColor=white)
8
+ [![Platform Support](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-8A2BE2)]()
9
+ [![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions)
10
+ [![PyPI Package](https://img.shields.io/pypi/v/pixeltable?color=darkorange)](https://pypi.org/project/pixeltable/)
11
+
12
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/master/docs/release/tutorials)
13
+ </div>
14
+
15
+ Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
16
+
17
+ ## What problems does Pixeltable solve?
18
+
19
+ Today’s solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployment is cumbersome. With Pixeltable you can store, transform, index, and iterate on your data within the same table interface, whether it's text, images, embeddings, or even video. Built-in lineage and versioning ensure transparency and reproducibility, while the development-to-production mirror streamlines deployment.
20
+
21
+ ## 💾 Installation
22
+
23
+ ```python
24
+ %pip install pixeltable
25
+ ```
26
+
27
+ To verify that it's working:
28
+
29
+ ```python
30
+ import pixeltable as pxt
31
+ pxt.init()
32
+ ```
33
+ > [!NOTE]
34
+ > Check out the [Pixeltable Basics](https://pixeltable.readme.io/docs/pixeltable-basics) tutorial for a tour of its most important features.
35
+
36
+ ## 💡 Get Started
37
+ Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations and AI operations.
38
+
39
+ | Topic | Notebook | API |
40
+ |:--------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------:|
41
+ | Get Started | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/pixeltable/) |
42
+ | User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
43
+ | Comparing Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#frame-extraction-for-video-data) |
44
+ | Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
45
+ | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#inserting-data-into-a-table) |
46
+
47
+ ## ❓ FAQ
48
+
49
+ ### What does Pixeltable provide me with? Pixeltable provides:
50
+
51
+ - Data storage and versioning
52
+ - Combined Data and Model Lineage
53
+ - Indexing (e.g. embedding vectors) and Data Retrieval
54
+ - Orchestration of multimodal workloads
55
+ - Incremental updates
56
+ - Code is automatically production-ready
57
+
58
+ ### Why should you use Pixeltable?
59
+
60
+ - **It gives you transparency and reproducibility**
61
+ - All generated data is automatically recorded and versioned
62
+ - You will never need to re-run a workload because you lost track of the input data
63
+ - **It saves you money**
64
+ - All data changes are automatically incremental
65
+ - You never need to re-run pipelines from scratch because you’re adding data
66
+ - **It integrates with any existing Python code or libraries**
67
+ - Bring your ever-changing code and workloads
68
+ - You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
69
+
70
+ ### What is Pixeltable not providing?
71
+
72
+ - Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.
73
+ - We do not aim to replace your existing AI toolkit, but rather enhance it by streamlining the underlying data infrastructure and orchestration.
74
+
75
+ > [!TIP]
76
+ > Check out the [Integrations](https://pixeltable.readme.io/docs/working-with-openai) section, and feel free to submit a request for additional ones.
77
+
78
+ ## 📙 Example of Use Cases
79
+
80
+ - **Interact with video data at the frame level** without having to think about frame extraction, intermediate file storage, or storage space explosion.
81
+ - **Augment your data incrementally and interactively with built-in functions and UDFs**, such as image transformations, model inference, and visualizations, without having to think about data pipelines, incremental updates, or capturing function output.
82
+ - **Interact with all the data relevant to your AI application** (video, images, documents, audio, structured data, JSON) through a simple dataframe-style API directly in Python. This includes:
83
+ - similarity search on embeddings, supported by high-dimensional vector indexing;
84
+ - path expressions and transformations on JSON data;
85
+ - PIL and OpenCV image operations;
86
+ - assembling frames into videos.
87
+ - **Perform keyword and image similarity search at the video frame level** without having to worry about frame storage.
88
+ - **Access all Pixeltable-resident data directly as a PyTorch dataset** in your training scripts.
89
+ - **Understand the compute and storage costs of your data at the granularity** of individual augmentations and get cost projections before adding new data and new augmentations.
90
+ - **Rely on Pixeltable's automatic versioning and snapshot functionality** to protect against regressions and to ensure reproducibility.
91
+
92
+ ## 🐛 Contributions & Feedback
93
+
94
+ Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
95
+ </br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
96
+
97
+ ## :classical_building: License
98
+
99
+ This library is licensed under the Apache 2.0 License.
@@ -1,5 +1,7 @@
1
1
  from .catalog import Column, Table, InsertableTable, View
2
2
  from .dataframe import DataFrame
3
+ from .datatransfer import Remote
4
+ from .catalog import Column, Table, InsertableTable, View
3
5
  from .exceptions import Error, Error
4
6
  from .exprs import RELATIVE_PATH_ROOT
5
7
  from .func import Function, udf, uda, Aggregator, expr_udf
@@ -21,7 +23,7 @@ from .type_system import (
21
23
  from .utils.help import help
22
24
 
23
25
  # noinspection PyUnresolvedReferences
24
- from . import functions, io
26
+ from . import functions, io, iterators
25
27
  from .__version__ import __version__, __version_tuple__
26
28
 
27
29
  __all__ = [
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.6"
3
- __version_tuple__ = (0, 2, 6)
2
+ __version__ = "0.2.7"
3
+ __version_tuple__ = (0, 2, 7)
@@ -22,7 +22,8 @@ class Column:
22
22
  computed_with: Optional[Union['Expr', Callable]] = None,
23
23
  is_pk: bool = False, stored: Optional[bool] = None,
24
24
  col_id: Optional[int] = None, schema_version_add: Optional[int] = None,
25
- schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None
25
+ schema_version_drop: Optional[int] = None, sa_col_type: Optional[sql.sqltypes.TypeEngine] = None,
26
+ records_errors: Optional[bool] = None
26
27
  ):
27
28
  """Column constructor.
28
29
 
@@ -80,12 +81,19 @@ class Column:
80
81
  assert self.col_type is not None
81
82
 
82
83
  self.stored = stored
83
- self.dependent_cols: Set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
84
+ self.dependent_cols: set[Column] = set() # cols with value_exprs that reference us; set by TableVersion
84
85
  self.id = col_id
85
86
  self.is_pk = is_pk
86
87
  self.schema_version_add = schema_version_add
87
88
  self.schema_version_drop = schema_version_drop
88
89
 
90
+ # stored_proxy may be set later if this is a non-stored column.
91
+ # if col1.stored_proxy == col2, then also col1 == col2.proxy_base.
92
+ self.stored_proxy: Optional[Column] = None
93
+ self.proxy_base: Optional[Column] = None
94
+
95
+ self._records_errors = records_errors
96
+
89
97
  # column in the stored table for the values of this Column
90
98
  self.sa_col: Optional[sql.schema.Column] = None
91
99
  self.sa_col_type = sa_col_type
@@ -93,6 +101,7 @@ class Column:
93
101
  # computed cols also have storage columns for the exception string and type
94
102
  self.sa_errormsg_col: Optional[sql.schema.Column] = None
95
103
  self.sa_errortype_col: Optional[sql.schema.Column] = None
104
+
96
105
  from .table_version import TableVersion
97
106
  self.tbl: Optional[TableVersion] = None # set by owning TableVersion
98
107
 
@@ -131,6 +140,9 @@ class Column:
131
140
  @property
132
141
  def records_errors(self) -> bool:
133
142
  """True if this column also stores error information."""
143
+ # default: record errors for computed and media columns
144
+ if self._records_errors is not None:
145
+ return self._records_errors
134
146
  return self.is_stored and (self.is_computed or self.col_type.is_media_type())
135
147
 
136
148
  def source(self) -> None:
@@ -60,25 +60,29 @@ class InsertableTable(Table):
60
60
  return tbl
61
61
 
62
62
  @overload
63
- def insert(self, rows: Iterable[Dict[str, Any]], /, print_stats: bool = False, fail_on_exception: bool = True): ...
63
+ def insert(
64
+ self, rows: Iterable[Dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True
65
+ ) -> UpdateStatus: ...
64
66
 
65
67
  @overload
66
- def insert(self, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any): ...
68
+ def insert(self, *, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any) -> UpdateStatus: ...
67
69
 
68
- def insert(self, *args, **kwargs) -> UpdateStatus:
69
- """Insert rows into table.
70
+ def insert(
71
+ self, rows: Optional[Iterable[dict[str, Any]]] = None, /, *, print_stats: bool = False,
72
+ fail_on_exception: bool = True, **kwargs: Any
73
+ ) -> UpdateStatus:
74
+ """Inserts rows into this table. There are two mutually exclusive call patterns:
70
75
 
71
76
  To insert multiple rows at a time:
72
-
73
- ``insert(rows: List[Dict[str, Any]], print_stats: bool = False, fail_on_exception: bool = True)``
77
+ ``insert(rows: Iterable[dict[str, Any]], /, *, print_stats: bool = False, fail_on_exception: bool = True)``
74
78
 
75
79
  To insert just a single row, you can use the more convenient syntax:
76
- ``insert(print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
80
+ ``insert(*, print_stats: bool = False, fail_on_exception: bool = True, **kwargs: Any)``
77
81
 
78
82
  Args:
79
83
  rows: (if inserting multiple rows) A list of rows to insert, each of which is a dictionary mapping column
80
84
  names to values.
81
- kwargs: (if inserting a single row) keyword-argument pairs representing column names and values.
85
+ kwargs: (if inserting a single row) Keyword-argument pairs representing column names and values.
82
86
  print_stats: If ``True``, print statistics about the cost of computed columns.
83
87
  fail_on_exception:
84
88
  Determines how exceptions in computed columns and invalid media files (e.g., corrupt images)
@@ -102,16 +106,27 @@ class InsertableTable(Table):
102
106
 
103
107
  >>> tbl.insert(a=1, b=1, c=1)
104
108
  """
105
- print_stats = kwargs.pop('print_stats', False)
106
- fail_on_exception = kwargs.pop('fail_on_exception', True)
107
- if len(args) > 0:
108
- # There's a positional argument; this means `rows` is expressed as a
109
- # list of dicts (multi-insert)
110
- rows = list(args[0])
111
- else:
112
- # No positional argument; this means we're inserting a single row
113
- # using kwargs syntax
109
+ # The commented code is the intended implementation, with signature (*args, **kwargs).
110
+ # That signature cannot be used currently, due to a present limitation in mkdocs.
111
+ # See: https://github.com/mkdocstrings/mkdocstrings/issues/669
112
+
113
+ # print_stats = kwargs.pop('print_stats', False)
114
+ # fail_on_exception = kwargs.pop('fail_on_exception', True)
115
+ # if len(args) > 0:
116
+ # # There's a positional argument; this means `rows` is expressed as a
117
+ # # list of dicts (multi-insert)
118
+ # rows = list(args[0])
119
+ # else:
120
+ # # No positional argument; this means we're inserting a single row
121
+ # # using kwargs syntax
122
+ # rows = [kwargs]
123
+
124
+ if rows is None:
114
125
  rows = [kwargs]
126
+ else:
127
+ rows = list(rows)
128
+ if len(kwargs) > 0:
129
+ raise excs.Error('`kwargs` cannot be specified unless `rows is None`.')
115
130
 
116
131
  if not isinstance(rows, list):
117
132
  raise excs.Error('rows must be a list of dictionaries')