pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (202) hide show
  1. pixeltable/__init__.py +23 -5
  2. pixeltable/_version.py +1 -0
  3. pixeltable/catalog/__init__.py +5 -3
  4. pixeltable/catalog/catalog.py +1318 -404
  5. pixeltable/catalog/column.py +186 -115
  6. pixeltable/catalog/dir.py +1 -2
  7. pixeltable/catalog/globals.py +11 -43
  8. pixeltable/catalog/insertable_table.py +167 -79
  9. pixeltable/catalog/path.py +61 -23
  10. pixeltable/catalog/schema_object.py +9 -10
  11. pixeltable/catalog/table.py +626 -308
  12. pixeltable/catalog/table_metadata.py +101 -0
  13. pixeltable/catalog/table_version.py +713 -569
  14. pixeltable/catalog/table_version_handle.py +37 -6
  15. pixeltable/catalog/table_version_path.py +42 -29
  16. pixeltable/catalog/tbl_ops.py +50 -0
  17. pixeltable/catalog/update_status.py +191 -0
  18. pixeltable/catalog/view.py +108 -94
  19. pixeltable/config.py +128 -22
  20. pixeltable/dataframe.py +188 -100
  21. pixeltable/env.py +407 -136
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +3 -0
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +231 -0
  27. pixeltable/exec/cell_reconstruction_node.py +135 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +7 -6
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +190 -30
  37. pixeltable/exec/globals.py +32 -0
  38. pixeltable/exec/in_memory_data_node.py +18 -18
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +206 -101
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +34 -30
  46. pixeltable/exprs/column_ref.py +92 -96
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +152 -55
  50. pixeltable/exprs/expr.py +62 -43
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +75 -37
  54. pixeltable/exprs/globals.py +1 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +10 -27
  57. pixeltable/exprs/is_null.py +1 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +5 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +127 -53
  64. pixeltable/exprs/rowid_ref.py +8 -12
  65. pixeltable/exprs/similarity_expr.py +50 -25
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +10 -10
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +20 -18
  78. pixeltable/func/signature.py +43 -16
  79. pixeltable/func/tools.py +23 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +6 -0
  82. pixeltable/functions/anthropic.py +93 -33
  83. pixeltable/functions/audio.py +114 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +1 -1
  86. pixeltable/functions/deepseek.py +20 -9
  87. pixeltable/functions/fireworks.py +2 -2
  88. pixeltable/functions/gemini.py +28 -11
  89. pixeltable/functions/globals.py +13 -13
  90. pixeltable/functions/groq.py +108 -0
  91. pixeltable/functions/huggingface.py +1046 -23
  92. pixeltable/functions/image.py +9 -18
  93. pixeltable/functions/llama_cpp.py +23 -8
  94. pixeltable/functions/math.py +3 -4
  95. pixeltable/functions/mistralai.py +4 -15
  96. pixeltable/functions/ollama.py +16 -9
  97. pixeltable/functions/openai.py +104 -82
  98. pixeltable/functions/openrouter.py +143 -0
  99. pixeltable/functions/replicate.py +2 -2
  100. pixeltable/functions/reve.py +250 -0
  101. pixeltable/functions/string.py +21 -28
  102. pixeltable/functions/timestamp.py +13 -14
  103. pixeltable/functions/together.py +4 -6
  104. pixeltable/functions/twelvelabs.py +92 -0
  105. pixeltable/functions/util.py +6 -1
  106. pixeltable/functions/video.py +1388 -106
  107. pixeltable/functions/vision.py +7 -7
  108. pixeltable/functions/whisper.py +15 -7
  109. pixeltable/functions/whisperx.py +179 -0
  110. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  111. pixeltable/globals.py +332 -105
  112. pixeltable/index/base.py +13 -22
  113. pixeltable/index/btree.py +23 -22
  114. pixeltable/index/embedding_index.py +32 -44
  115. pixeltable/io/__init__.py +4 -2
  116. pixeltable/io/datarows.py +7 -6
  117. pixeltable/io/external_store.py +49 -77
  118. pixeltable/io/fiftyone.py +11 -11
  119. pixeltable/io/globals.py +29 -28
  120. pixeltable/io/hf_datasets.py +17 -9
  121. pixeltable/io/label_studio.py +70 -66
  122. pixeltable/io/lancedb.py +3 -0
  123. pixeltable/io/pandas.py +12 -11
  124. pixeltable/io/parquet.py +13 -93
  125. pixeltable/io/table_data_conduit.py +71 -47
  126. pixeltable/io/utils.py +3 -3
  127. pixeltable/iterators/__init__.py +2 -1
  128. pixeltable/iterators/audio.py +21 -11
  129. pixeltable/iterators/document.py +116 -55
  130. pixeltable/iterators/image.py +5 -2
  131. pixeltable/iterators/video.py +293 -13
  132. pixeltable/metadata/__init__.py +4 -2
  133. pixeltable/metadata/converters/convert_18.py +2 -2
  134. pixeltable/metadata/converters/convert_19.py +2 -2
  135. pixeltable/metadata/converters/convert_20.py +2 -2
  136. pixeltable/metadata/converters/convert_21.py +2 -2
  137. pixeltable/metadata/converters/convert_22.py +2 -2
  138. pixeltable/metadata/converters/convert_24.py +2 -2
  139. pixeltable/metadata/converters/convert_25.py +2 -2
  140. pixeltable/metadata/converters/convert_26.py +2 -2
  141. pixeltable/metadata/converters/convert_29.py +4 -4
  142. pixeltable/metadata/converters/convert_34.py +2 -2
  143. pixeltable/metadata/converters/convert_36.py +2 -2
  144. pixeltable/metadata/converters/convert_37.py +15 -0
  145. pixeltable/metadata/converters/convert_38.py +39 -0
  146. pixeltable/metadata/converters/convert_39.py +124 -0
  147. pixeltable/metadata/converters/convert_40.py +73 -0
  148. pixeltable/metadata/converters/util.py +13 -12
  149. pixeltable/metadata/notes.py +4 -0
  150. pixeltable/metadata/schema.py +79 -42
  151. pixeltable/metadata/utils.py +74 -0
  152. pixeltable/mypy/__init__.py +3 -0
  153. pixeltable/mypy/mypy_plugin.py +123 -0
  154. pixeltable/plan.py +274 -223
  155. pixeltable/share/__init__.py +1 -1
  156. pixeltable/share/packager.py +259 -129
  157. pixeltable/share/protocol/__init__.py +34 -0
  158. pixeltable/share/protocol/common.py +170 -0
  159. pixeltable/share/protocol/operation_types.py +33 -0
  160. pixeltable/share/protocol/replica.py +109 -0
  161. pixeltable/share/publish.py +213 -57
  162. pixeltable/store.py +238 -175
  163. pixeltable/type_system.py +104 -63
  164. pixeltable/utils/__init__.py +2 -3
  165. pixeltable/utils/arrow.py +108 -13
  166. pixeltable/utils/av.py +298 -0
  167. pixeltable/utils/azure_store.py +305 -0
  168. pixeltable/utils/code.py +3 -3
  169. pixeltable/utils/console_output.py +4 -1
  170. pixeltable/utils/coroutine.py +6 -23
  171. pixeltable/utils/dbms.py +31 -5
  172. pixeltable/utils/description_helper.py +4 -5
  173. pixeltable/utils/documents.py +5 -6
  174. pixeltable/utils/exception_handler.py +7 -30
  175. pixeltable/utils/filecache.py +6 -6
  176. pixeltable/utils/formatter.py +4 -6
  177. pixeltable/utils/gcs_store.py +283 -0
  178. pixeltable/utils/http_server.py +2 -3
  179. pixeltable/utils/iceberg.py +1 -2
  180. pixeltable/utils/image.py +17 -0
  181. pixeltable/utils/lancedb.py +88 -0
  182. pixeltable/utils/local_store.py +316 -0
  183. pixeltable/utils/misc.py +5 -0
  184. pixeltable/utils/object_stores.py +528 -0
  185. pixeltable/utils/pydantic.py +60 -0
  186. pixeltable/utils/pytorch.py +5 -6
  187. pixeltable/utils/s3_store.py +392 -0
  188. pixeltable-0.4.20.dist-info/METADATA +587 -0
  189. pixeltable-0.4.20.dist-info/RECORD +218 -0
  190. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
  191. pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
  192. pixeltable/__version__.py +0 -3
  193. pixeltable/ext/__init__.py +0 -17
  194. pixeltable/ext/functions/__init__.py +0 -11
  195. pixeltable/ext/functions/whisperx.py +0 -77
  196. pixeltable/utils/media_store.py +0 -77
  197. pixeltable/utils/s3.py +0 -17
  198. pixeltable/utils/sample.py +0 -25
  199. pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
  200. pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
  201. pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
  202. {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,587 @@
1
+ Metadata-Version: 2.4
2
+ Name: pixeltable
3
+ Version: 0.4.20
4
+ Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
5
+ Project-URL: homepage, https://pixeltable.com/
6
+ Project-URL: repository, https://github.com/pixeltable/pixeltable
7
+ Project-URL: documentation, https://docs.pixeltable.com/
8
+ Author-email: "Pixeltable, Inc." <contact@pixeltable.com>
9
+ License-Expression: Apache-2.0
10
+ License-File: LICENSE
11
+ Keywords: ai,artificial-intelligence,chatbot,computer-vision,data-science,database,feature-engineering,feature-store,genai,llm,machine-learning,ml,mlops,multimodal,vector-database
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: MacOS
16
+ Classifier: Operating System :: Microsoft :: Windows
17
+ Classifier: Operating System :: POSIX :: Linux
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Database
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
+ Requires-Python: >=3.10
26
+ Requires-Dist: aiohttp>=3.9
27
+ Requires-Dist: av>=10.0.0
28
+ Requires-Dist: beautifulsoup4>=4.10
29
+ Requires-Dist: cloudpickle>=2.2.1
30
+ Requires-Dist: ftfy>=6.2.0
31
+ Requires-Dist: httpcore>=1.0.3
32
+ Requires-Dist: httpx>=0.27
33
+ Requires-Dist: jinja2>=3.1.3
34
+ Requires-Dist: jmespath>=1.0.1
35
+ Requires-Dist: jsonschema>=4.1.0
36
+ Requires-Dist: lxml>=5.1
37
+ Requires-Dist: more-itertools>=10.2
38
+ Requires-Dist: nest-asyncio>=1.5
39
+ Requires-Dist: numpy>=1.25
40
+ Requires-Dist: pandas>=2.0
41
+ Requires-Dist: pgvector>=0.4.0
42
+ Requires-Dist: pillow-heif>=0.15.0
43
+ Requires-Dist: pillow>=9.3.0
44
+ Requires-Dist: pixeltable-pgserver==0.4.0
45
+ Requires-Dist: psutil>=5.9.5
46
+ Requires-Dist: psycopg[binary]>=3.1.18
47
+ Requires-Dist: puremagic>=1.20
48
+ Requires-Dist: pyarrow>=19
49
+ Requires-Dist: pydantic>=2.7.4
50
+ Requires-Dist: pymupdf>=1.24.1
51
+ Requires-Dist: pyyaml>=6.0.1
52
+ Requires-Dist: requests>=2.31.0
53
+ Requires-Dist: sqlalchemy>=2.0.23
54
+ Requires-Dist: tenacity>=8.2
55
+ Requires-Dist: toml>=0.10
56
+ Requires-Dist: tqdm>=4.64
57
+ Requires-Dist: tzlocal>=5.0
58
+ Description-Content-Type: text/markdown
59
+
60
+ <picture class="github-only">
61
+ <source media="(prefers-color-scheme: light)" srcset="https://github.com/user-attachments/assets/e9bf82b2-cace-4bd8-9523-b65495eb8131">
62
+ <source media="(prefers-color-scheme: dark)" srcset="https://github.com/user-attachments/assets/c5ab123e-806c-49bf-93e7-151353719b16">
63
+ <img alt="Pixeltable Logo" src="https://github.com/user-attachments/assets/e9bf82b2-cace-4bd8-9523-b65495eb8131" width="40%">
64
+ </picture>
65
+
66
+ <div>
67
+ <br>
68
+ </div>
69
+
70
+ The only open source Python library providing declarative data infrastructure for building multimodal AI applications, enabling incremental storage, transformation, indexing, retrieval, and orchestration of data.
71
+
72
+ [![License](https://img.shields.io/badge/License-Apache%202.0-0530AD.svg)](https://opensource.org/licenses/Apache-2.0)
73
+ [![tests status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml)
74
+ [![nightly status](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml)
75
+ [![stress-tests status](https://github.com/pixeltable/pixeltable/actions/workflows/stress-tests.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions/workflows/stress-tests.yml)
76
+ [![PyPI Package](https://img.shields.io/pypi/v/pixeltable?color=4D148C)](https://pypi.org/project/pixeltable/)
77
+ [![My Discord (1306431018890166272)](https://img.shields.io/badge/💬-Discord-%235865F2.svg)](https://discord.gg/QPyqFYx2UN)
78
+
79
+ [**Quick Start**](https://docs.pixeltable.com/overview/quick-start) |
80
+ [**Documentation**](https://docs.pixeltable.com/) |
81
+ [**API Reference**](https://pixeltable.github.io/pixeltable/) |
82
+ [**Sample Apps**](https://github.com/pixeltable/pixeltable/tree/main/docs/sample-apps) |
83
+ [**Discord Community**](https://discord.gg/QPyqFYx2UN)
84
+
85
+ ---
86
+
87
+ ## Installation
88
+
89
+ ```python
90
+ pip install pixeltable
91
+ ```
92
+ Pixeltable replaces the complex multi-system architecture typically needed for AI applications (databases, file storage, vector DBs, APIs, orchestration) with a single declarative table interface that natively handles multimodal data like images, videos, and documents.
93
+
94
+ ## Demo
95
+
96
+ https://github.com/user-attachments/assets/b50fd6df-5169-4881-9dbe-1b6e5d06cede
97
+
98
+ ## Quick Start
99
+
100
+ With Pixeltable, you define your *entire* data processing and AI workflow declaratively using
101
+ **[computed columns](https://docs.pixeltable.com/datastore/computed-columns)** on
102
+ **[tables](https://docs.pixeltable.com/datastore/tables-and-operations)**.
103
+ Focus on your application logic, not the data plumbing.
104
+
105
+ ```python
106
+
107
+ # Installation
108
+ pip install -qU torch transformers openai pixeltable
109
+
110
+ # Basic setup
111
+ import pixeltable as pxt
112
+
113
+ # Table with multimodal column types (Image, Video, Audio, Document)
114
+ t = pxt.create_table('images', {'input_image': pxt.Image})
115
+
116
+ # Computed columns: define transformation logic once, runs on all data
117
+ from pixeltable.functions import huggingface
118
+
119
+ # Object detection with automatic model management
120
+ t.add_computed_column(
121
+ detections=huggingface.detr_for_object_detection(
122
+ t.input_image,
123
+ model_id='facebook/detr-resnet-50'
124
+ )
125
+ )
126
+
127
+ # Extract specific fields from detection results
128
+ t.add_computed_column(detections_text=t.detections.label_text)
129
+
130
+ # OpenAI Vision API integration with built-in rate limiting and async management
131
+ from pixeltable.functions import openai
132
+
133
+ t.add_computed_column(
134
+ vision=openai.vision(
135
+ prompt="Describe what's in this image.",
136
+ image=t.input_image,
137
+ model='gpt-4o-mini'
138
+ )
139
+ )
140
+
141
+ # Insert data directly from an external URL
142
+ # Automatically triggers computation of all computed columns
143
+ t.insert(input_image='https://raw.github.com/pixeltable/pixeltable/release/docs/resources/images/000000000025.jpg')
144
+
145
+ # Query - All data, metadata, and computed results are persistently stored
146
+ # Structured and unstructured data are returned side-by-side
147
+ results = t.select(
148
+ t.input_image,
149
+ t.detections_text,
150
+ t.vision
151
+ ).collect()
152
+ ```
153
+
154
+ ## What Happened?
155
+
156
+ * **Data Ingestion & Storage:** References [files](https://docs.pixeltable.com/datastore/bringing-data)
157
+ (images, videos, audio, docs) in place, handles structured data.
158
+ * **Transformation & Processing:** Applies *any* Python function ([UDFs](https://docs.pixeltable.com/datastore/custom-functions))
159
+ or built-in operations ([chunking, frame extraction](https://docs.pixeltable.com/datastore/iterators)) automatically.
160
+ * **AI Model Integration:** Runs inference ([embeddings](https://docs.pixeltable.com/datastore/vector-database),
161
+ [object detection](https://docs.pixeltable.com/examples/vision/yolox),
162
+ [LLMs](https://docs.pixeltable.com/integrations/frameworks#cloud-llm-providers)) as part of the data pipeline.
163
+ * **Indexing & Retrieval:** Creates and manages vector indexes for fast
164
+ [semantic search](https://docs.pixeltable.com/datastore/vector-database#phase-3%3A-query)
165
+ alongside traditional filtering.
166
+ * **Incremental Computation:** Only [recomputes](https://docs.pixeltable.com/overview/quick-start) what's
167
+ necessary when data or code changes, saving time and cost.
168
+ * **Versioning & Lineage:** Automatically tracks data and schema changes for reproducibility. See below for an example
169
+ that uses "time travel" to query an older version of a table.
170
+
171
+ Pixeltable can ingest data from local storage or directly from a URL. When external media files are referenced by URL,
172
+ as in the `insert` statement above, Pixeltable caches them locally before processing. See the
173
+ [Working with External Files](https://github.com/pixeltable/pixeltable/blob/main/docs/notebooks/feature-guides/working-with-external-files.ipynb)
174
+ notebook for more details.
175
+
176
+ ## Where Did My Data Go?
177
+
178
+ Pixeltable workloads generate various outputs, including both structured outputs (such as bounding boxes for detected
179
+ objects) and/or unstructured outputs (such as generated images or video). By default, everything resides in your
180
+ Pixeltable user directory at `~/.pixeltable`. Structured data is stored in a Postgres instance in `~/.pixeltable`.
181
+ Generated media (images, video, audio, documents) are stored outside the Postgres database, in separate flat files in
182
+ `~/.pixeltable/media`. Those media files are referenced by URL in the database, and Pixeltable provides the "glue" for
183
+ a unified table interface over both structured and unstructured data.
184
+
185
+ In general, the user is not expected to interact directly with the data in `~/.pixeltable`; the data store is fully
186
+ managed by Pixeltable and is intended to be accessed through the Pixeltable Python SDK.
187
+
188
+ ## Key Principles
189
+
190
+ **[Unified Multimodal Interface:](https://docs.pixeltable.com/datastore/tables-and-operations)** `pxt.Image`,
191
+ `pxt.Video`, `pxt.Audio`, `pxt.Document`, etc. – manage diverse data consistently.
192
+
193
+ ```python
194
+ t = pxt.create_table(
195
+ 'media',
196
+ {
197
+ 'img': pxt.Image,
198
+ 'video': pxt.Video
199
+ }
200
+ )
201
+ ```
202
+
203
+ **[Declarative Computed Columns:](https://docs.pixeltable.com/datastore/computed-columns)** Define processing
204
+ steps once; they run automatically on new/updated data.
205
+
206
+ ```python
207
+ t.add_computed_column(
208
+ classification=huggingface.vit_for_image_classification(
209
+ t.image
210
+ )
211
+ )
212
+ ```
213
+
214
+ **[Built-in Vector Search:](https://docs.pixeltable.com/datastore/vector-database)** Add embedding indexes and
215
+ perform similarity searches directly on tables/views.
216
+
217
+ ```python
218
+ t.add_embedding_index(
219
+ 'img',
220
+ embedding=clip.using(
221
+ model_id='openai/clip-vit-base-patch32'
222
+ )
223
+ )
224
+
225
+ sim = t.img.similarity("cat playing with yarn")
226
+ ```
227
+
228
+ **[Incremental View Maintenance:](https://docs.pixeltable.com/datastore/views)** Create virtual tables using iterators
229
+ for efficient processing without data duplication.
230
+
231
+ ```python
232
+ # Document chunking with overlap & metadata and many more options to build your own iterator
233
+ chunks = pxt.create_view('chunks', docs,
234
+ iterator=DocumentSplitter.create(
235
+ document=docs.doc,
236
+ separators='sentence,token_limit',
237
+ overlap=50, limit=500
238
+ ))
239
+
240
+ # Video frame extraction
241
+ frames = pxt.create_view('frames', videos,
242
+ iterator=FrameIterator.create(video=videos.video, fps=0.5))
243
+ ```
244
+
245
+ **[Seamless AI Integration:](https://docs.pixeltable.com/integrations/frameworks)** Built-in functions for
246
+ OpenAI, Anthropic, Hugging Face, CLIP, YOLOX, and more.
247
+
248
+ ```python
249
+ # LLM integration (OpenAI, Anthropic, etc.)
250
+ t.add_computed_column(
251
+ response=openai.chat_completions(
252
+ messages=[{"role": "user", "content": t.prompt}], model='gpt-4o-mini'
253
+ )
254
+ )
255
+
256
+ # Computer vision (YOLOX object detection)
257
+ t.add_computed_column(
258
+ detections=yolox(t.image, model_id='yolox_s', threshold=0.5)
259
+ )
260
+
261
+ # Embedding models (Hugging Face, CLIP)
262
+ t.add_computed_column(
263
+ embeddings=huggingface.sentence_transformer(
264
+ t.text, model_id='all-MiniLM-L6-v2'
265
+ )
266
+ )
267
+ ```
268
+
269
+ **[Bring Your Own Code:](https://docs.pixeltable.com/datastore/custom-functions)** Extend Pixeltable with UDFs, batch processing, and custom aggregators.
270
+
271
+ ```python
272
+ @pxt.udf
273
+ def format_prompt(context: list, question: str) -> str:
274
+ return f"Context: {context}\nQuestion: {question}"
275
+ ```
276
+
277
+ **[Agentic Workflows / Tool Calling:](https://docs.pixeltable.com/examples/chat/tools)** Register `@pxt.udf`,
278
+ `@pxt.query` functions, or **MCP tools** as tools.
279
+
280
+ ```python
281
+ # Example tools: UDFs, Query functions, and MCP tools
282
+ mcp_tools = pxt.mcp_udfs('http://localhost:8000/mcp') # Load from MCP server
283
+ tools = pxt.tools(get_weather_udf, search_context_query, *mcp_tools)
284
+
285
+ # LLM decides which tool to call; Pixeltable executes it
286
+ t.add_computed_column(
287
+ tool_output=invoke_tools(tools, t.llm_tool_choice)
288
+ )
289
+ ```
290
+
291
+ **[Data Persistence:](https://docs.pixeltable.com/datastore/tables-and-operations#data-operations)** All data,
292
+ metadata, and computed results are automatically stored and versioned.
293
+
294
+ ```python
295
+ t = pxt.get_table('my_table') # Get a handle to an existing table
296
+ t.select(t.account, t.balance).collect() # Query its contents
297
+ t.revert() # Undo the last modification to the table and restore its previous state
298
+ ```
299
+
300
+ **[Time Travel:](https://docs.pixeltable.com/datastore/tables-and-operations#data-operations)** By default,
301
+ Pixeltable preserves the full change history of each table, and any prior version can be selected and queried.
302
+
303
+ ```python
304
+ t.history() # Display a human-readable list of all prior versions of the table
305
+ old_version = pxt.get_table('my_table:472') # Get a handle to a specific table version
306
+ old_version.select(t.account, t.balance).collect() # Query the older version
307
+ ```
308
+
309
+ **[SQL-like Python Querying:](https://docs.pixeltable.com/datastore/filtering-and-selecting)** Familiar syntax
310
+ combined with powerful AI capabilities.
311
+
312
+ ```python
313
+ results = (
314
+ t.where(t.score > 0.8)
315
+ .order_by(t.timestamp)
316
+ .select(t.image, score=t.score)
317
+ .limit(10)
318
+ .collect()
319
+ )
320
+ ```
321
+
322
+ **[I/O & Integration:](https://pixeltable.github.io/pixeltable/pixeltable/io/)** Export to multiple
323
+ formats and integrate with ML/AI tools ecosystem.
324
+
325
+ ```python
326
+ # Export to analytics/ML formats
327
+ pxt.export_parquet(table, 'data.parquet', partition_size_bytes=100_000_000)
328
+ pxt.export_lancedb(table, 'vector_db')
329
+
330
+ # DataFrame conversions
331
+ results = table.select(table.image, table.labels).collect()
332
+ df = results.to_pandas() # → pandas DataFrame
333
+ models = results.to_pydantic(MyModel) # → Pydantic models
334
+
335
+ # Specialized ML dataset formats
336
+ coco_path = table.to_coco_dataset() # → COCO annotations
337
+ pytorch_ds = table.to_pytorch_dataset('pt') # → PyTorch DataLoader ready
338
+
339
+ # ML tool integrations
340
+ pxt.create_label_studio_project(table, label_config) # Annotation
341
+ pxt.export_images_as_fo_dataset(table, table.image) # FiftyOne
342
+ ```
343
+
344
+ ## Key Examples
345
+
346
+ *(See the [Full Quick Start](https://docs.pixeltable.com/overview/quick-start) or
347
+ [Notebook Gallery](#notebook-gallery) for more details)*
348
+
349
+ **1. Multimodal Data Store and Data Transformation (Computed Column):**
350
+
351
+ ```bash
352
+ pip install pixeltable
353
+ ```
354
+
355
+ ```python
356
+ import pixeltable as pxt
357
+
358
+ # Create a table
359
+ t = pxt.create_table(
360
+ 'films',
361
+ {'name': pxt.String, 'revenue': pxt.Float, 'budget': pxt.Float},
362
+ if_exists="replace"
363
+ )
364
+
365
+ t.insert([
366
+ {'name': 'Inside Out', 'revenue': 800.5, 'budget': 200.0},
367
+ {'name': 'Toy Story', 'revenue': 1073.4, 'budget': 200.0}
368
+ ])
369
+
370
+ # Add a computed column for profit - runs automatically!
371
+ t.add_computed_column(profit=(t.revenue - t.budget), if_exists="replace")
372
+
373
+ # Query the results
374
+ print(t.select(t.name, t.profit).collect())
375
+ # Output includes the automatically computed 'profit' column
376
+ ```
377
+
378
+ **2. Object Detection with [YOLOX](https://github.com/pixeltable/pixeltable-yolox):**
379
+
380
+ ```bash
381
+ pip install pixeltable pixeltable-yolox
382
+ ```
383
+
384
+ ```python
385
+ import PIL
386
+ import pixeltable as pxt
387
+ from yolox.models import Yolox
388
+ from yolox.data.datasets import COCO_CLASSES
389
+
390
+ t = pxt.create_table('image', {'image': pxt.Image}, if_exists='replace')
391
+
392
+ # Insert some images
393
+ prefix = 'https://upload.wikimedia.org/wikipedia/commons'
394
+ paths = [
395
+ '/1/15/Cat_August_2010-4.jpg',
396
+ '/e/e1/Example_of_a_Dog.jpg',
397
+ '/thumb/b/bf/Bird_Diversity_2013.png/300px-Bird_Diversity_2013.png'
398
+ ]
399
+ t.insert({'image': prefix + p} for p in paths)
400
+
401
+ @pxt.udf
402
+ def detect(image: PIL.Image.Image) -> list[str]:
403
+ model = Yolox.from_pretrained("yolox_s")
404
+ result = model([image])
405
+ coco_labels = [COCO_CLASSES[label] for label in result[0]["labels"]]
406
+ return coco_labels
407
+
408
+ t.add_computed_column(classification=detect(t.image))
409
+
410
+ print(t.select().collect())
411
+ ```
412
+
413
+ **3. Image Similarity Search (CLIP Embedding Index):**
414
+
415
+ ```bash
416
+ pip install pixeltable sentence-transformers
417
+ ```
418
+
419
+ ```python
420
+ import pixeltable as pxt
421
+ from pixeltable.functions.huggingface import clip
422
+
423
+ # Create image table and add sample images
424
+ images = pxt.create_table('my_images', {'img': pxt.Image}, if_exists='replace')
425
+ images.insert([
426
+ {'img': 'https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Orange_tabby_cat_sitting_on_fallen_leaves-Hisashi-01A.jpg/1920px-Orange_tabby_cat_sitting_on_fallen_leaves-Hisashi-01A.jpg'},
427
+ {'img': 'https://upload.wikimedia.org/wikipedia/commons/d/d5/Retriever_in_water.jpg'}
428
+ ])
429
+
430
+ # Add CLIP embedding index for similarity search
431
+ images.add_embedding_index(
432
+ 'img',
433
+ embedding=clip.using(model_id='openai/clip-vit-base-patch32')
434
+ )
435
+
436
+ # Text-based image search
437
+ query_text = "a dog playing fetch"
438
+ sim_text = images.img.similarity(query_text)
439
+ results_text = images.order_by(sim_text, asc=False).limit(3).select(
440
+ image=images.img, similarity=sim_text
441
+ ).collect()
442
+ print("--- Text Query Results ---")
443
+ print(results_text)
444
+
445
+ # Image-based image search
446
+ query_image_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/7/7a/Huskiesatrest.jpg/2880px-Huskiesatrest.jpg'
447
+ sim_image = images.img.similarity(query_image_url)
448
+ results_image = images.order_by(sim_image, asc=False).limit(3).select(
449
+ image=images.img, similarity=sim_image
450
+ ).collect()
451
+ print("--- Image URL Query Results ---")
452
+ print(results_image)
453
+ ```
454
+
455
+ **4. Multimodal/Incremental RAG Workflow (Document Chunking & LLM Call):**
456
+
457
+ ```bash
458
+ pip install pixeltable openai spacy sentence-transformers
459
+ ```
460
+
461
+ ```bash
462
+ python -m spacy download en_core_web_sm
463
+ ```
464
+
465
+ ```python
466
+ import pixeltable as pxt
467
+ import pixeltable.functions as pxtf
468
+ from pixeltable.functions import openai, huggingface
469
+ from pixeltable.iterators import DocumentSplitter
470
+
471
+ # Manage your tables by directories
472
+ directory = "my_docs"
473
+ pxt.drop_dir(directory, if_not_exists="ignore", force=True)
474
+ pxt.create_dir("my_docs")
475
+
476
+ # Create a document table and add a PDF
477
+ docs = pxt.create_table(f'{directory}.docs', {'doc': pxt.Document})
478
+ docs.insert([{'doc': 'https://github.com/pixeltable/pixeltable/raw/release/docs/resources/rag-demo/Jefferson-Amazon.pdf'}])
479
+
480
+ # Create chunks view with sentence-based splitting
481
+ chunks = pxt.create_view(
482
+ 'doc_chunks',
483
+ docs,
484
+ iterator=DocumentSplitter.create(document=docs.doc, separators='sentence')
485
+ )
486
+
487
+ # Explicitly create the embedding function object
488
+ embed_model = huggingface.sentence_transformer.using(model_id='all-MiniLM-L6-v2')
489
+ # Add embedding index using the function object
490
+ chunks.add_embedding_index('text', string_embed=embed_model)
491
+
492
+ # Define query function for retrieval - Returns a DataFrame expression
493
+ @pxt.query
494
+ def get_relevant_context(query_text: str, limit: int = 3):
495
+ sim = chunks.text.similarity(query_text)
496
+ # Return a list of strings (text of relevant chunks)
497
+ return chunks.order_by(sim, asc=False).limit(limit).select(chunks.text)
498
+
499
+ # Build a simple Q&A table
500
+ qa = pxt.create_table(f'{directory}.qa_system', {'prompt': pxt.String})
501
+
502
+ # 1. Add retrieved context (now a list of strings)
503
+ qa.add_computed_column(context=get_relevant_context(qa.prompt))
504
+
505
+ # 2. Format the prompt with context
506
+ qa.add_computed_column(
507
+ final_prompt=pxtf.string.format(
508
+ """
509
+ PASSAGES:
510
+ {0}
511
+
512
+ QUESTION:
513
+ {1}
514
+ """,
515
+ qa.context,
516
+ qa.prompt
517
+ )
518
+ )
519
+
520
+ # 4. Generate the answer using the well-formatted prompt column
521
+ qa.add_computed_column(
522
+ answer=openai.chat_completions(
523
+ model='gpt-4o-mini',
524
+ messages=[{
525
+ 'role': 'user',
526
+ 'content': qa.final_prompt
527
+ }]
528
+ ).choices[0].message.content
529
+ )
530
+
531
+ # Ask a question and get the answer
532
+ qa.insert([{'prompt': 'What can you tell me about Amazon?'}])
533
+ print("--- Final Answer ---")
534
+ print(qa.select(qa.answer).collect())
535
+ ```
536
+
537
+ ## Notebook Gallery
538
+
539
+ Explore Pixeltable's capabilities interactively:
540
+
541
+ | Topic | Notebook | Topic | Notebook |
542
+ |:----------|:-----------------|:-------------------------|:---------------------------------:|
543
+ | **Fundamentals** | | **Integrations** | |
544
+ | 10-Min Tour | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | OpenAI | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/integrations/working-with-openai.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> |
545
+ | Tables & Ops | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Anthropic | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/integrations/working-with-anthropic.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> |
546
+ | UDFs | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/feature-guides/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Together AI | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/integrations/working-with-together.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> |
547
+ | Embedding Index | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/feature-guides/embedding-indexes.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Label Studio | <a target="_blank" href="https://docs.pixeltable.com/examples/vision/label-studio"> <img src="https://img.shields.io/badge/📚%20Docs-013056" alt="Visit Docs"/></a> |
548
+ | External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/feature-guides/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Mistral | <a target="_blank" href="https://colab.research.google.com/github/mistralai/cookbook/blob/main/third_party/Pixeltable/incremental_prompt_engineering_and_model_comparison.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Github"/> |
549
+ | **Use Cases** | | **Sample Apps** | |
550
+ | RAG Demo | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/rag-demo.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Multimodal Agent | <a target="_blank" href="https://huggingface.co/spaces/Pixeltable/Multimodal-Powerhouse"> <img src="https://img.shields.io/badge/🤗%20Demo-FF7D04" alt="HF Space"/></a> |
551
+ | Object Detection | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Image/Text Search | <a target="_blank" href="https://github.com/pixeltable/pixeltable/tree/main/docs/sample-apps/text-and-image-similarity-search-nextjs-fastapi"> <img src="https://img.shields.io/badge/🖥️%20App-black.svg" alt="GitHub App"/> |
552
+ | Audio Transcription | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Discord Bot | <a target="_blank" href="https://github.com/pixeltable/pixeltable/blob/main/docs/sample-apps/context-aware-discord-bot"> <img src="https://img.shields.io/badge/%F0%9F%92%AC%20Bot-%235865F2.svg" alt="GitHub App"/></a> |
553
+
554
+ ## Maintaining Production-Ready Multimodal AI Apps is Still Too Hard
555
+
556
+ Building robust AI applications, especially [multimodal](https://docs.pixeltable.com/datastore/bringing-data) ones,
557
+ requires stitching together numerous tools:
558
+
559
+ * ETL pipelines for data loading and transformation.
560
+ * Vector databases for semantic search.
561
+ * Feature stores for ML models.
562
+ * Orchestrators for scheduling.
563
+ * Model serving infrastructure for inference.
564
+ * Separate systems for parallelization, caching, versioning, and lineage tracking.
565
+
566
+ This complex "data plumbing" slows down development, increases costs, and makes applications brittle and hard to reproduce.
567
+
568
+ ## Roadmap (2025)
569
+
570
+ ### Cloud Infrastructure and Deployment
571
+
572
+ We're working on a hosted Pixeltable service that will:
573
+
574
+ * Enable Multimodal Data Sharing of Pixeltable Tables and Views | [Waitlist](https://www.pixeltable.com/waitlist)
575
+ * Provide a persistent cloud instance
576
+ * Turn Pixeltable workflows (Tables, Queries, UDFs) into API endpoints/[MCP Servers](https://github.com/pixeltable/pixeltable-mcp-server)
577
+
578
+ ## Contributing
579
+
580
+ We love contributions! Whether it's reporting bugs, suggesting features, improving documentation, or submitting code
581
+ changes, please check out our [Contributing Guide](CONTRIBUTING.md) and join the
582
+ [Discussions](https://github.com/pixeltable/pixeltable/discussions) or our
583
+ [Discord Server](https://discord.gg/QPyqFYx2UN).
584
+
585
+ ## License
586
+
587
+ Pixeltable is licensed under the Apache 2.0 License.