pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. pixeltable/__init__.py +83 -19
  2. pixeltable/_query.py +1444 -0
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +7 -4
  5. pixeltable/catalog/catalog.py +2394 -119
  6. pixeltable/catalog/column.py +225 -104
  7. pixeltable/catalog/dir.py +38 -9
  8. pixeltable/catalog/globals.py +53 -34
  9. pixeltable/catalog/insertable_table.py +265 -115
  10. pixeltable/catalog/path.py +80 -17
  11. pixeltable/catalog/schema_object.py +28 -43
  12. pixeltable/catalog/table.py +1270 -677
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +1270 -751
  15. pixeltable/catalog/table_version_handle.py +109 -0
  16. pixeltable/catalog/table_version_path.py +137 -42
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +251 -134
  20. pixeltable/config.py +215 -0
  21. pixeltable/env.py +736 -285
  22. pixeltable/exceptions.py +26 -2
  23. pixeltable/exec/__init__.py +7 -2
  24. pixeltable/exec/aggregation_node.py +39 -21
  25. pixeltable/exec/cache_prefetch_node.py +87 -109
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +25 -28
  29. pixeltable/exec/data_row_batch.py +11 -46
  30. pixeltable/exec/exec_context.py +26 -11
  31. pixeltable/exec/exec_node.py +35 -27
  32. pixeltable/exec/expr_eval/__init__.py +3 -0
  33. pixeltable/exec/expr_eval/evaluators.py +365 -0
  34. pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
  35. pixeltable/exec/expr_eval/globals.py +200 -0
  36. pixeltable/exec/expr_eval/row_buffer.py +74 -0
  37. pixeltable/exec/expr_eval/schedulers.py +413 -0
  38. pixeltable/exec/globals.py +35 -0
  39. pixeltable/exec/in_memory_data_node.py +35 -27
  40. pixeltable/exec/object_store_save_node.py +293 -0
  41. pixeltable/exec/row_update_node.py +44 -29
  42. pixeltable/exec/sql_node.py +414 -115
  43. pixeltable/exprs/__init__.py +8 -5
  44. pixeltable/exprs/arithmetic_expr.py +79 -45
  45. pixeltable/exprs/array_slice.py +5 -5
  46. pixeltable/exprs/column_property_ref.py +40 -26
  47. pixeltable/exprs/column_ref.py +254 -61
  48. pixeltable/exprs/comparison.py +14 -9
  49. pixeltable/exprs/compound_predicate.py +9 -10
  50. pixeltable/exprs/data_row.py +213 -72
  51. pixeltable/exprs/expr.py +270 -104
  52. pixeltable/exprs/expr_dict.py +6 -5
  53. pixeltable/exprs/expr_set.py +20 -11
  54. pixeltable/exprs/function_call.py +383 -284
  55. pixeltable/exprs/globals.py +18 -5
  56. pixeltable/exprs/in_predicate.py +7 -7
  57. pixeltable/exprs/inline_expr.py +37 -37
  58. pixeltable/exprs/is_null.py +8 -4
  59. pixeltable/exprs/json_mapper.py +120 -54
  60. pixeltable/exprs/json_path.py +90 -60
  61. pixeltable/exprs/literal.py +61 -16
  62. pixeltable/exprs/method_ref.py +7 -6
  63. pixeltable/exprs/object_ref.py +19 -8
  64. pixeltable/exprs/row_builder.py +238 -75
  65. pixeltable/exprs/rowid_ref.py +53 -15
  66. pixeltable/exprs/similarity_expr.py +65 -50
  67. pixeltable/exprs/sql_element_cache.py +5 -5
  68. pixeltable/exprs/string_op.py +107 -0
  69. pixeltable/exprs/type_cast.py +25 -13
  70. pixeltable/exprs/variable.py +2 -2
  71. pixeltable/func/__init__.py +9 -5
  72. pixeltable/func/aggregate_function.py +197 -92
  73. pixeltable/func/callable_function.py +119 -35
  74. pixeltable/func/expr_template_function.py +101 -48
  75. pixeltable/func/function.py +375 -62
  76. pixeltable/func/function_registry.py +20 -19
  77. pixeltable/func/globals.py +6 -5
  78. pixeltable/func/mcp.py +74 -0
  79. pixeltable/func/query_template_function.py +151 -35
  80. pixeltable/func/signature.py +178 -49
  81. pixeltable/func/tools.py +164 -0
  82. pixeltable/func/udf.py +176 -53
  83. pixeltable/functions/__init__.py +44 -4
  84. pixeltable/functions/anthropic.py +226 -47
  85. pixeltable/functions/audio.py +148 -11
  86. pixeltable/functions/bedrock.py +137 -0
  87. pixeltable/functions/date.py +188 -0
  88. pixeltable/functions/deepseek.py +113 -0
  89. pixeltable/functions/document.py +81 -0
  90. pixeltable/functions/fal.py +76 -0
  91. pixeltable/functions/fireworks.py +72 -20
  92. pixeltable/functions/gemini.py +249 -0
  93. pixeltable/functions/globals.py +208 -53
  94. pixeltable/functions/groq.py +108 -0
  95. pixeltable/functions/huggingface.py +1088 -95
  96. pixeltable/functions/image.py +155 -84
  97. pixeltable/functions/json.py +8 -11
  98. pixeltable/functions/llama_cpp.py +31 -19
  99. pixeltable/functions/math.py +169 -0
  100. pixeltable/functions/mistralai.py +50 -75
  101. pixeltable/functions/net.py +70 -0
  102. pixeltable/functions/ollama.py +29 -36
  103. pixeltable/functions/openai.py +548 -160
  104. pixeltable/functions/openrouter.py +143 -0
  105. pixeltable/functions/replicate.py +15 -14
  106. pixeltable/functions/reve.py +250 -0
  107. pixeltable/functions/string.py +310 -85
  108. pixeltable/functions/timestamp.py +37 -19
  109. pixeltable/functions/together.py +77 -120
  110. pixeltable/functions/twelvelabs.py +188 -0
  111. pixeltable/functions/util.py +7 -2
  112. pixeltable/functions/uuid.py +30 -0
  113. pixeltable/functions/video.py +1528 -117
  114. pixeltable/functions/vision.py +26 -26
  115. pixeltable/functions/voyageai.py +289 -0
  116. pixeltable/functions/whisper.py +19 -10
  117. pixeltable/functions/whisperx.py +179 -0
  118. pixeltable/functions/yolox.py +112 -0
  119. pixeltable/globals.py +716 -236
  120. pixeltable/index/__init__.py +3 -1
  121. pixeltable/index/base.py +17 -21
  122. pixeltable/index/btree.py +32 -22
  123. pixeltable/index/embedding_index.py +155 -92
  124. pixeltable/io/__init__.py +12 -7
  125. pixeltable/io/datarows.py +140 -0
  126. pixeltable/io/external_store.py +83 -125
  127. pixeltable/io/fiftyone.py +24 -33
  128. pixeltable/io/globals.py +47 -182
  129. pixeltable/io/hf_datasets.py +96 -127
  130. pixeltable/io/label_studio.py +171 -156
  131. pixeltable/io/lancedb.py +3 -0
  132. pixeltable/io/pandas.py +136 -115
  133. pixeltable/io/parquet.py +40 -153
  134. pixeltable/io/table_data_conduit.py +702 -0
  135. pixeltable/io/utils.py +100 -0
  136. pixeltable/iterators/__init__.py +8 -4
  137. pixeltable/iterators/audio.py +207 -0
  138. pixeltable/iterators/base.py +9 -3
  139. pixeltable/iterators/document.py +144 -87
  140. pixeltable/iterators/image.py +17 -38
  141. pixeltable/iterators/string.py +15 -12
  142. pixeltable/iterators/video.py +523 -127
  143. pixeltable/metadata/__init__.py +33 -8
  144. pixeltable/metadata/converters/convert_10.py +2 -3
  145. pixeltable/metadata/converters/convert_13.py +2 -2
  146. pixeltable/metadata/converters/convert_15.py +15 -11
  147. pixeltable/metadata/converters/convert_16.py +4 -5
  148. pixeltable/metadata/converters/convert_17.py +4 -5
  149. pixeltable/metadata/converters/convert_18.py +4 -6
  150. pixeltable/metadata/converters/convert_19.py +6 -9
  151. pixeltable/metadata/converters/convert_20.py +3 -6
  152. pixeltable/metadata/converters/convert_21.py +6 -8
  153. pixeltable/metadata/converters/convert_22.py +3 -2
  154. pixeltable/metadata/converters/convert_23.py +33 -0
  155. pixeltable/metadata/converters/convert_24.py +55 -0
  156. pixeltable/metadata/converters/convert_25.py +19 -0
  157. pixeltable/metadata/converters/convert_26.py +23 -0
  158. pixeltable/metadata/converters/convert_27.py +29 -0
  159. pixeltable/metadata/converters/convert_28.py +13 -0
  160. pixeltable/metadata/converters/convert_29.py +110 -0
  161. pixeltable/metadata/converters/convert_30.py +63 -0
  162. pixeltable/metadata/converters/convert_31.py +11 -0
  163. pixeltable/metadata/converters/convert_32.py +15 -0
  164. pixeltable/metadata/converters/convert_33.py +17 -0
  165. pixeltable/metadata/converters/convert_34.py +21 -0
  166. pixeltable/metadata/converters/convert_35.py +9 -0
  167. pixeltable/metadata/converters/convert_36.py +38 -0
  168. pixeltable/metadata/converters/convert_37.py +15 -0
  169. pixeltable/metadata/converters/convert_38.py +39 -0
  170. pixeltable/metadata/converters/convert_39.py +124 -0
  171. pixeltable/metadata/converters/convert_40.py +73 -0
  172. pixeltable/metadata/converters/convert_41.py +12 -0
  173. pixeltable/metadata/converters/convert_42.py +9 -0
  174. pixeltable/metadata/converters/convert_43.py +44 -0
  175. pixeltable/metadata/converters/util.py +44 -18
  176. pixeltable/metadata/notes.py +21 -0
  177. pixeltable/metadata/schema.py +185 -42
  178. pixeltable/metadata/utils.py +74 -0
  179. pixeltable/mypy/__init__.py +3 -0
  180. pixeltable/mypy/mypy_plugin.py +123 -0
  181. pixeltable/plan.py +616 -225
  182. pixeltable/share/__init__.py +3 -0
  183. pixeltable/share/packager.py +797 -0
  184. pixeltable/share/protocol/__init__.py +33 -0
  185. pixeltable/share/protocol/common.py +165 -0
  186. pixeltable/share/protocol/operation_types.py +33 -0
  187. pixeltable/share/protocol/replica.py +119 -0
  188. pixeltable/share/publish.py +349 -0
  189. pixeltable/store.py +398 -232
  190. pixeltable/type_system.py +730 -267
  191. pixeltable/utils/__init__.py +40 -0
  192. pixeltable/utils/arrow.py +201 -29
  193. pixeltable/utils/av.py +298 -0
  194. pixeltable/utils/azure_store.py +346 -0
  195. pixeltable/utils/coco.py +26 -27
  196. pixeltable/utils/code.py +4 -4
  197. pixeltable/utils/console_output.py +46 -0
  198. pixeltable/utils/coroutine.py +24 -0
  199. pixeltable/utils/dbms.py +92 -0
  200. pixeltable/utils/description_helper.py +11 -12
  201. pixeltable/utils/documents.py +60 -61
  202. pixeltable/utils/exception_handler.py +36 -0
  203. pixeltable/utils/filecache.py +38 -22
  204. pixeltable/utils/formatter.py +88 -51
  205. pixeltable/utils/gcs_store.py +295 -0
  206. pixeltable/utils/http.py +133 -0
  207. pixeltable/utils/http_server.py +14 -13
  208. pixeltable/utils/iceberg.py +13 -0
  209. pixeltable/utils/image.py +17 -0
  210. pixeltable/utils/lancedb.py +90 -0
  211. pixeltable/utils/local_store.py +322 -0
  212. pixeltable/utils/misc.py +5 -0
  213. pixeltable/utils/object_stores.py +573 -0
  214. pixeltable/utils/pydantic.py +60 -0
  215. pixeltable/utils/pytorch.py +20 -20
  216. pixeltable/utils/s3_store.py +527 -0
  217. pixeltable/utils/sql.py +32 -5
  218. pixeltable/utils/system.py +30 -0
  219. pixeltable/utils/transactional_directory.py +4 -3
  220. pixeltable-0.5.7.dist-info/METADATA +579 -0
  221. pixeltable-0.5.7.dist-info/RECORD +227 -0
  222. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  223. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  224. pixeltable/__version__.py +0 -3
  225. pixeltable/catalog/named_function.py +0 -36
  226. pixeltable/catalog/path_dict.py +0 -141
  227. pixeltable/dataframe.py +0 -894
  228. pixeltable/exec/expr_eval_node.py +0 -232
  229. pixeltable/ext/__init__.py +0 -14
  230. pixeltable/ext/functions/__init__.py +0 -8
  231. pixeltable/ext/functions/whisperx.py +0 -77
  232. pixeltable/ext/functions/yolox.py +0 -157
  233. pixeltable/tool/create_test_db_dump.py +0 -311
  234. pixeltable/tool/create_test_video.py +0 -81
  235. pixeltable/tool/doc_plugins/griffe.py +0 -50
  236. pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
  237. pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
  238. pixeltable/tool/embed_udf.py +0 -9
  239. pixeltable/tool/mypy_plugin.py +0 -55
  240. pixeltable/utils/media_store.py +0 -76
  241. pixeltable/utils/s3.py +0 -16
  242. pixeltable-0.2.26.dist-info/METADATA +0 -400
  243. pixeltable-0.2.26.dist-info/RECORD +0 -156
  244. pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
  245. {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -14,7 +14,8 @@ def transactional_directory(folder_path: Path) -> Generator[Path, Any, Any]:
14
14
 
15
15
  Yields:
16
16
  A pathlib.Path to a hidden temporary folder, which can be used to accumulate changes.
17
- If everything succeeds, the changes are committed via an atomic move operation upon exiting the 'with' block (os.replace)
17
+ If everything succeeds, the changes are committed via an atomic move operation upon
18
+ exiting the 'with' block (os.replace)
18
19
  If an exception occurred, no changes are visible in the original folder.
19
20
 
20
21
  Example:
@@ -24,9 +25,9 @@ def transactional_directory(folder_path: Path) -> Generator[Path, Any, Any]:
24
25
  (temp_folder / "subfolder2").mkdir()
25
26
  """
26
27
  if folder_path.exists():
27
- raise excs.Error(f"Folder {folder_path} already exists")
28
+ raise excs.Error(f'Folder {folder_path} already exists')
28
29
 
29
- tmp_folder = folder_path.parent / f".tmp_{folder_path.name}"
30
+ tmp_folder = folder_path.parent / f'.tmp_{folder_path.name}'
30
31
  # Remove the temporary folder if it already exists, eg if the previous run crashed
31
32
  shutil.rmtree(str(tmp_folder), ignore_errors=True)
32
33
  tmp_folder.mkdir(parents=True)
@@ -0,0 +1,579 @@
1
+ Metadata-Version: 2.4
2
+ Name: pixeltable
3
+ Version: 0.5.7
4
+ Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
5
+ Project-URL: homepage, https://pixeltable.com/
6
+ Project-URL: repository, https://github.com/pixeltable/pixeltable
7
+ Project-URL: documentation, https://docs.pixeltable.com/
8
+ Author-email: "Pixeltable, Inc." <contact@pixeltable.com>
9
+ License-Expression: Apache-2.0
10
+ License-File: LICENSE
11
+ Keywords: ai,artificial-intelligence,chatbot,computer-vision,data-science,database,feature-engineering,feature-store,genai,llm,machine-learning,ml,mlops,multimodal,vector-database
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: Apache Software License
15
+ Classifier: Operating System :: MacOS
16
+ Classifier: Operating System :: Microsoft :: Windows
17
+ Classifier: Operating System :: POSIX :: Linux
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Topic :: Database
23
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
+ Requires-Python: >=3.10
26
+ Requires-Dist: aiohttp>=3.10
27
+ Requires-Dist: av>=10.0.0
28
+ Requires-Dist: beautifulsoup4>=4.10
29
+ Requires-Dist: cloudpickle>=2.2.1
30
+ Requires-Dist: deprecated>=1.2.15
31
+ Requires-Dist: ftfy>=6.2.0
32
+ Requires-Dist: httpcore>=1.0.3
33
+ Requires-Dist: httpx>=0.27
34
+ Requires-Dist: jinja2>=3.1.3
35
+ Requires-Dist: jmespath>=1.0.1
36
+ Requires-Dist: jsonschema>=4.1.0
37
+ Requires-Dist: lxml>=5.1
38
+ Requires-Dist: more-itertools>=10.2
39
+ Requires-Dist: nest-asyncio>=1.5
40
+ Requires-Dist: numpy>=1.25
41
+ Requires-Dist: pandas>=2.0
42
+ Requires-Dist: pgvector>=0.4.0
43
+ Requires-Dist: pillow-heif>=0.15.0
44
+ Requires-Dist: pillow>=9.3.0
45
+ Requires-Dist: pixeltable-pgserver==0.4.0
46
+ Requires-Dist: psutil>=5.9.5
47
+ Requires-Dist: psycopg[binary]>=3.1.18
48
+ Requires-Dist: puremagic>=1.20
49
+ Requires-Dist: pyarrow>=19
50
+ Requires-Dist: pydantic>=2.7.4
51
+ Requires-Dist: pypdfium2>=4.30.0
52
+ Requires-Dist: pyyaml>=6.0.1
53
+ Requires-Dist: requests>=2.31.0
54
+ Requires-Dist: sqlalchemy>=2.0.23
55
+ Requires-Dist: tenacity>=8.2
56
+ Requires-Dist: toml>=0.10
57
+ Requires-Dist: tqdm>=4.64
58
+ Requires-Dist: tzlocal>=5.0
59
+ Description-Content-Type: text/markdown
60
+
61
+ <picture class="github-only">
62
+ <source media="(prefers-color-scheme: light)" srcset="https://github.com/user-attachments/assets/e9bf82b2-cace-4bd8-9523-b65495eb8131">
63
+ <source media="(prefers-color-scheme: dark)" srcset="https://github.com/user-attachments/assets/c5ab123e-806c-49bf-93e7-151353719b16">
64
+ <img alt="Pixeltable Logo" src="https://github.com/user-attachments/assets/e9bf82b2-cace-4bd8-9523-b65495eb8131" width="40%">
65
+ </picture>
66
+
67
+ <div>
68
+ <br>
69
+ </div>
70
+
71
+ The only open source Python library providing declarative data infrastructure for building multimodal AI applications, enabling incremental storage, transformation, indexing, retrieval, and orchestration of data.
72
+
73
+ [![License](https://img.shields.io/badge/License-Apache%202.0-0530AD.svg)](https://opensource.org/licenses/Apache-2.0)
74
+ [![tests status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml)
75
+ [![nightly status](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions/workflows/nightly.yml)
76
+ [![stress-tests status](https://github.com/pixeltable/pixeltable/actions/workflows/stress-tests.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions/workflows/stress-tests.yml)
77
+ [![PyPI Package](https://img.shields.io/pypi/v/pixeltable?color=4D148C)](https://pypi.org/project/pixeltable/)
78
+ [![My Discord (1306431018890166272)](https://img.shields.io/badge/💬-Discord-%235865F2.svg)](https://discord.gg/QPyqFYx2UN)
79
+
80
+ [**Quick Start**](https://docs.pixeltable.com/overview/quick-start) |
81
+ [**Documentation**](https://docs.pixeltable.com/) |
82
+ [**API Reference**](https://docs.pixeltable.com/sdk/latest/pixeltable) |
83
+ [**Sample Apps**](https://github.com/pixeltable/pixeltable/tree/main/docs/sample-apps) |
84
+ [**Discord Community**](https://discord.gg/QPyqFYx2UN)
85
+
86
+ ---
87
+
88
+ ## Installation
89
+
90
+ ```python
91
+ pip install pixeltable
92
+ ```
93
+ Pixeltable replaces the complex multi-system architecture typically needed for AI applications (databases, file storage, vector DBs, APIs, orchestration) with a single declarative table interface that natively handles multimodal data like images, videos, and documents.
94
+
95
+ ## Demo
96
+
97
+ https://github.com/user-attachments/assets/b50fd6df-5169-4881-9dbe-1b6e5d06cede
98
+
99
+ ## Quick Start
100
+
101
+ With Pixeltable, you define your *entire* data processing and AI workflow declaratively using
102
+ **[computed columns](https://docs.pixeltable.com/tutorials/computed-columns)** on
103
+ **[tables](https://docs.pixeltable.com/tutorials/tables-and-data-operations)**.
104
+ Focus on your application logic, not the data plumbing.
105
+
106
+ ```python
107
+
108
+ # Installation
109
+ pip install -qU torch transformers openai pixeltable
110
+
111
+ # Basic setup
112
+ import pixeltable as pxt
113
+
114
+ # Table with multimodal column types (Image, Video, Audio, Document)
115
+ t = pxt.create_table('images', {'input_image': pxt.Image})
116
+
117
+ # Computed columns: define transformation logic once, runs on all data
118
+ from pixeltable.functions import huggingface
119
+
120
+ # Object detection with automatic model management
121
+ t.add_computed_column(
122
+ detections=huggingface.detr_for_object_detection(
123
+ t.input_image,
124
+ model_id='facebook/detr-resnet-50'
125
+ )
126
+ )
127
+
128
+ # Extract specific fields from detection results
129
+ t.add_computed_column(detections_text=t.detections.label_text)
130
+
131
+ # OpenAI Vision API integration with built-in rate limiting and async management
132
+ from pixeltable.functions import openai
133
+
134
+ t.add_computed_column(
135
+ vision=openai.vision(
136
+ prompt="Describe what's in this image.",
137
+ image=t.input_image,
138
+ model='gpt-4o-mini'
139
+ )
140
+ )
141
+
142
+ # Insert data directly from an external URL
143
+ # Automatically triggers computation of all computed columns
144
+ t.insert(input_image='https://raw.github.com/pixeltable/pixeltable/release/docs/resources/images/000000000025.jpg')
145
+
146
+ # Query - All data, metadata, and computed results are persistently stored
147
+ # Structured and unstructured data are returned side-by-side
148
+ results = t.select(
149
+ t.input_image,
150
+ t.detections_text,
151
+ t.vision
152
+ ).collect()
153
+ ```
154
+
155
+ ## What Happened?
156
+
157
+ * **Data Ingestion & Storage:** References [files](https://docs.pixeltable.com/platform/external-files)
158
+ (images, videos, audio, docs) in place, handles structured data.
159
+ * **Transformation & Processing:** Applies *any* Python function ([UDFs](https://docs.pixeltable.com/platform/udfs-in-pixeltable))
160
+ or built-in operations ([chunking, frame extraction](https://docs.pixeltable.com/platform/iterators)) automatically.
161
+ * **AI Model Integration:** Runs inference ([embeddings](https://docs.pixeltable.com/platform/embedding-indexes),
162
+ [object detection](https://docs.pixeltable.com/howto/cookbooks/images/img-detect-objects),
163
+ [LLMs](https://docs.pixeltable.com/integrations/frameworks#cloud-llm-providers)) as part of the data pipeline.
164
+ * **Indexing & Retrieval:** Creates and manages vector indexes for fast
165
+ [semantic search](https://docs.pixeltable.com/platform/embedding-indexes)
166
+ alongside traditional filtering.
167
+ * **Incremental Computation:** Only [recomputes](https://docs.pixeltable.com/overview/quick-start) what's
168
+ necessary when data or code changes, saving time and cost.
169
+ * **Versioning & Lineage:** Automatically tracks data and schema changes for reproducibility. See below for an example
170
+ that uses "time travel" to query an older version of a table.
171
+
172
+ Pixeltable can ingest data from local storage or directly from a URL. When external media files are referenced by URL,
173
+ as in the `insert` statement above, Pixeltable caches them locally before processing. See the
174
+ [Working with External Files](https://github.com/pixeltable/pixeltable/blob/main/docs/notebooks/feature-guides/working-with-external-files.ipynb)
175
+ notebook for more details.
176
+
177
+ ## Where Did My Data Go?
178
+
179
+ Pixeltable workloads generate various outputs, including both structured outputs (such as bounding boxes for detected
180
+ objects) and/or unstructured outputs (such as generated images or video). By default, everything resides in your
181
+ Pixeltable user directory at `~/.pixeltable`. Structured data is stored in a Postgres instance in `~/.pixeltable`.
182
+ Generated media (images, video, audio, documents) are stored outside the Postgres database, in separate flat files in
183
+ `~/.pixeltable/media`. Those media files are referenced by URL in the database, and Pixeltable provides the "glue" for
184
+ a unified table interface over both structured and unstructured data.
185
+
186
+ In general, the user is not expected to interact directly with the data in `~/.pixeltable`; the data store is fully
187
+ managed by Pixeltable and is intended to be accessed through the Pixeltable Python SDK.
188
+
189
+ ## Key Principles
190
+
191
+ **[Unified Multimodal Interface:](https://docs.pixeltable.com/platform/type-system)** `pxt.Image`,
192
+ `pxt.Video`, `pxt.Audio`, `pxt.Document`, etc. – manage diverse data consistently.
193
+
194
+ ```python
195
+ t = pxt.create_table(
196
+ 'media',
197
+ {
198
+ 'img': pxt.Image,
199
+ 'video': pxt.Video
200
+ }
201
+ )
202
+ ```
203
+
204
+ **[Declarative Computed Columns:](https://docs.pixeltable.com/tutorials/computed-columns)** Define processing
205
+ steps once; they run automatically on new/updated data.
206
+
207
+ ```python
208
+ t.add_computed_column(
209
+ classification=huggingface.vit_for_image_classification(
210
+ t.image
211
+ )
212
+ )
213
+ ```
214
+
215
+ **[Built-in Vector Search:](https://docs.pixeltable.com/platform/embedding-indexes)** Add embedding indexes and
216
+ perform similarity searches directly on tables/views.
217
+
218
+ ```python
219
+ t.add_embedding_index(
220
+ 'img',
221
+ embedding=clip.using(
222
+ model_id='openai/clip-vit-base-patch32'
223
+ )
224
+ )
225
+
226
+ sim = t.img.similarity(string="cat playing with yarn")
227
+ ```
228
+
229
+ **[Incremental View Maintenance:](https://docs.pixeltable.com/platform/views)** Create virtual tables using iterators
230
+ for efficient processing without data duplication.
231
+
232
+ ```python
233
+ # Document chunking with overlap & metadata and many more options to build your own iterator
234
+ chunks = pxt.create_view('chunks', docs,
235
+ iterator=DocumentSplitter.create(
236
+ document=docs.doc,
237
+ separators='sentence,token_limit',
238
+ overlap=50, limit=500
239
+ ))
240
+
241
+ # Video frame extraction
242
+ frames = pxt.create_view('frames', videos,
243
+ iterator=FrameIterator.create(video=videos.video, fps=0.5))
244
+ ```
245
+
246
+ **[Seamless AI Integration:](https://docs.pixeltable.com/integrations/frameworks)** Built-in functions for
247
+ OpenAI, Anthropic, Hugging Face, CLIP, YOLOX, and more.
248
+
249
+ ```python
250
+ # LLM integration (OpenAI, Anthropic, etc.)
251
+ t.add_computed_column(
252
+ response=openai.chat_completions(
253
+ messages=[{"role": "user", "content": t.prompt}], model='gpt-4o-mini'
254
+ )
255
+ )
256
+
257
+ # Computer vision (YOLOX object detection)
258
+ t.add_computed_column(
259
+ detections=yolox(t.image, model_id='yolox_s', threshold=0.5)
260
+ )
261
+
262
+ # Embedding models (Hugging Face, CLIP)
263
+ t.add_computed_column(
264
+ embeddings=huggingface.sentence_transformer(
265
+ t.text, model_id='all-MiniLM-L6-v2'
266
+ )
267
+ )
268
+ ```
269
+
270
+ **[Bring Your Own Code:](https://docs.pixeltable.com/platform/udfs-in-pixeltable)** Extend Pixeltable with UDFs, batch processing, and custom aggregators.
271
+
272
+ ```python
273
+ @pxt.udf
274
+ def format_prompt(context: list, question: str) -> str:
275
+ return f"Context: {context}\nQuestion: {question}"
276
+ ```
277
+
278
+ **[Agentic Workflows / Tool Calling:](https://docs.pixeltable.com/howto/cookbooks/agents/llm-tool-calling)** Register `@pxt.udf`,
279
+ `@pxt.query` functions, or **MCP tools** as tools.
280
+
281
+ ```python
282
+ # Example tools: UDFs, Query functions, and MCP tools
283
+ mcp_tools = pxt.mcp_udfs('http://localhost:8000/mcp') # Load from MCP server
284
+ tools = pxt.tools(get_weather_udf, search_context_query, *mcp_tools)
285
+
286
+ # LLM decides which tool to call; Pixeltable executes it
287
+ t.add_computed_column(
288
+ tool_output=invoke_tools(tools, t.llm_tool_choice)
289
+ )
290
+ ```
291
+
292
+ **[Data Persistence:](https://docs.pixeltable.com/tutorials/tables-and-data-operations)** All data,
293
+ metadata, and computed results are automatically stored and versioned.
294
+
295
+ ```python
296
+ t = pxt.get_table('my_table') # Get a handle to an existing table
297
+ t.select(t.account, t.balance).collect() # Query its contents
298
+ t.revert() # Undo the last modification to the table and restore its previous state
299
+ ```
300
+
301
+ **[Time Travel:](https://docs.pixeltable.com/platform/version-control)** By default,
302
+ Pixeltable preserves the full change history of each table, and any prior version can be selected and queried.
303
+
304
+ ```python
305
+ t.history() # Display a human-readable list of all prior versions of the table
306
+ old_version = pxt.get_table('my_table:472') # Get a handle to a specific table version
307
+ old_version.select(t.account, t.balance).collect() # Query the older version
308
+ ```
309
+
310
+ **[SQL-like Python Querying:](https://docs.pixeltable.com/tutorials/queries-and-expressions)** Familiar syntax
311
+ combined with powerful AI capabilities.
312
+
313
+ ```python
314
+ results = (
315
+ t.where(t.score > 0.8)
316
+ .order_by(t.timestamp)
317
+ .select(t.image, score=t.score)
318
+ .limit(10)
319
+ .collect()
320
+ )
321
+ ```
322
+
323
+ **[I/O & Integration:](https://pixeltable.github.io/pixeltable/pixeltable/io/)** Export to multiple
324
+ formats and integrate with ML/AI tools ecosystem.
325
+
326
+ ```python
327
+ # Export to analytics/ML formats
328
+ pxt.export_parquet(table, 'data.parquet', partition_size_bytes=100_000_000)
329
+ pxt.export_lancedb(table, 'vector_db')
330
+
331
+ # DataFrame conversions
332
+ results = table.select(table.image, table.labels).collect()
333
+ df = results.to_pandas() # → pandas DataFrame
334
+ models = results.to_pydantic(MyModel) # → Pydantic models
335
+
336
+ # Specialized ML dataset formats
337
+ coco_path = table.to_coco_dataset() # → COCO annotations
338
+ pytorch_ds = table.to_pytorch_dataset('pt') # → PyTorch DataLoader ready
339
+
340
+ # ML tool integrations
341
+ pxt.create_label_studio_project(table, label_config) # Annotation
342
+ pxt.export_images_as_fo_dataset(table, table.image) # FiftyOne
343
+ ```
344
+
345
+ ## Key Examples
346
+
347
+ *(See the [Full Quick Start](https://docs.pixeltable.com/overview/quick-start) or
348
+ [Notebook Gallery](#notebook-gallery) for more details)*
349
+
350
+ **1. Multimodal Data Store and Data Transformation (Computed Column):**
351
+
352
+ ```bash
353
+ pip install pixeltable
354
+ ```
355
+
356
+ ```python
357
+ import pixeltable as pxt
358
+
359
+ # Create a table
360
+ t = pxt.create_table(
361
+ 'films',
362
+ {'name': pxt.String, 'revenue': pxt.Float, 'budget': pxt.Float},
363
+ if_exists="replace"
364
+ )
365
+
366
+ t.insert([
367
+ {'name': 'Inside Out', 'revenue': 800.5, 'budget': 200.0},
368
+ {'name': 'Toy Story', 'revenue': 1073.4, 'budget': 200.0}
369
+ ])
370
+
371
+ # Add a computed column for profit - runs automatically!
372
+ t.add_computed_column(profit=(t.revenue - t.budget), if_exists="replace")
373
+
374
+ # Query the results
375
+ print(t.select(t.name, t.profit).collect())
376
+ # Output includes the automatically computed 'profit' column
377
+ ```
378
+
379
+ **2. Object Detection with [YOLOX](https://github.com/pixeltable/pixeltable-yolox):**
380
+
381
+ ```bash
382
+ pip install pixeltable pixeltable-yolox
383
+ ```
384
+
385
+ ```python
386
+ import PIL
387
+ import pixeltable as pxt
388
+ from yolox.models import Yolox
389
+ from yolox.data.datasets import COCO_CLASSES
390
+
391
+ t = pxt.create_table('image', {'image': pxt.Image}, if_exists='replace')
392
+
393
+ # Insert some images
394
+ prefix = 'https://upload.wikimedia.org/wikipedia/commons'
395
+ paths = [
396
+ '/1/15/Cat_August_2010-4.jpg',
397
+ '/e/e1/Example_of_a_Dog.jpg',
398
+ '/thumb/b/bf/Bird_Diversity_2013.png/300px-Bird_Diversity_2013.png'
399
+ ]
400
+ t.insert({'image': prefix + p} for p in paths)
401
+
402
+ @pxt.udf
403
+ def detect(image: PIL.Image.Image) -> list[str]:
404
+ model = Yolox.from_pretrained("yolox_s")
405
+ result = model([image])
406
+ coco_labels = [COCO_CLASSES[label] for label in result[0]["labels"]]
407
+ return coco_labels
408
+
409
+ t.add_computed_column(classification=detect(t.image))
410
+
411
+ print(t.select().collect())
412
+ ```
413
+
414
+ **3. Image Similarity Search (CLIP Embedding Index):**
415
+
416
+ ```bash
417
+ pip install pixeltable sentence-transformers
418
+ ```
419
+
420
+ ```python
421
+ import pixeltable as pxt
422
+ from pixeltable.functions.huggingface import clip
423
+
424
+ # Create image table and add sample images
425
+ images = pxt.create_table('my_images', {'img': pxt.Image}, if_exists='replace')
426
+ images.insert([
427
+ {'img': 'https://upload.wikimedia.org/wikipedia/commons/thumb/6/68/Orange_tabby_cat_sitting_on_fallen_leaves-Hisashi-01A.jpg/1920px-Orange_tabby_cat_sitting_on_fallen_leaves-Hisashi-01A.jpg'},
428
+ {'img': 'https://upload.wikimedia.org/wikipedia/commons/d/d5/Retriever_in_water.jpg'}
429
+ ])
430
+
431
+ # Add CLIP embedding index for similarity search
432
+ images.add_embedding_index(
433
+ 'img',
434
+ embedding=clip.using(model_id='openai/clip-vit-base-patch32')
435
+ )
436
+
437
+ # Text-based image search
438
+ query_text = "a dog playing fetch"
439
+ sim_text = images.img.similarity(string=query_text)
440
+ results_text = images.order_by(sim_text, asc=False).limit(3).select(
441
+ image=images.img, similarity=sim_text
442
+ ).collect()
443
+ print("--- Text Query Results ---")
444
+ print(results_text)
445
+ ```
446
+
447
+ **4. Multimodal/Incremental RAG Workflow (Document Chunking & LLM Call):**
448
+
449
+ ```bash
450
+ pip install pixeltable openai spacy sentence-transformers
451
+ ```
452
+
453
+ ```bash
454
+ python -m spacy download en_core_web_sm
455
+ ```
456
+
457
+ ```python
458
+ import pixeltable as pxt
459
+ import pixeltable.functions as pxtf
460
+ from pixeltable.functions import openai, huggingface
461
+ from pixeltable.iterators import DocumentSplitter
462
+
463
+ # Manage your tables by directories
464
+ directory = "my_docs"
465
+ pxt.drop_dir(directory, if_not_exists="ignore", force=True)
466
+ pxt.create_dir("my_docs")
467
+
468
+ # Create a document table and add a PDF
469
+ docs = pxt.create_table(f'{directory}.docs', {'doc': pxt.Document})
470
+ docs.insert([{'doc': 'https://github.com/pixeltable/pixeltable/raw/release/docs/resources/rag-demo/Jefferson-Amazon.pdf'}])
471
+
472
+ # Create chunks view with sentence-based splitting
473
+ chunks = pxt.create_view(
474
+ 'doc_chunks',
475
+ docs,
476
+ iterator=DocumentSplitter.create(document=docs.doc, separators='sentence')
477
+ )
478
+
479
+ # Explicitly create the embedding function object
480
+ embed_model = huggingface.sentence_transformer.using(model_id='all-MiniLM-L6-v2')
481
+ # Add embedding index using the function object
482
+ chunks.add_embedding_index('text', string_embed=embed_model)
483
+
484
+ # Define query function for retrieval - Returns a Query expression
485
+ @pxt.query
486
+ def get_relevant_context(query_text: str, limit: int = 3):
487
+ sim = chunks.text.similarity(string=query_text)
488
+ # Return a list of strings (text of relevant chunks)
489
+ return chunks.order_by(sim, asc=False).limit(limit).select(chunks.text)
490
+
491
+ # Build a simple Q&A table
492
+ qa = pxt.create_table(f'{directory}.qa_system', {'prompt': pxt.String})
493
+
494
+ # 1. Add retrieved context (now a list of strings)
495
+ qa.add_computed_column(context=get_relevant_context(qa.prompt))
496
+
497
+ # 2. Format the prompt with context
498
+ qa.add_computed_column(
499
+ final_prompt=pxtf.string.format(
500
+ """
501
+ PASSAGES:
502
+ {0}
503
+
504
+ QUESTION:
505
+ {1}
506
+ """,
507
+ qa.context,
508
+ qa.prompt
509
+ )
510
+ )
511
+
512
+ # 4. Generate the answer using the well-formatted prompt column
513
+ qa.add_computed_column(
514
+ answer=openai.chat_completions(
515
+ model='gpt-4o-mini',
516
+ messages=[{
517
+ 'role': 'user',
518
+ 'content': qa.final_prompt
519
+ }]
520
+ ).choices[0].message.content
521
+ )
522
+
523
+ # Ask a question and get the answer
524
+ qa.insert([{'prompt': 'What can you tell me about Amazon?'}])
525
+ print("--- Final Answer ---")
526
+ print(qa.select(qa.answer).collect())
527
+ ```
528
+
529
+ ## Notebook Gallery
530
+
531
+ Explore Pixeltable's capabilities interactively:
532
+
533
+ | Topic | Notebook | Topic | Notebook |
534
+ |:----------|:-----------------|:-------------------------|:---------------------------------:|
535
+ | **Fundamentals** | | **Integrations** | |
536
+ | 10-Min Tour | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | OpenAI | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/integrations/working-with-openai.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> |
537
+ | Tables & Ops | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/fundamentals/tables-and-data-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Anthropic | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/integrations/working-with-anthropic.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> |
538
+ | UDFs | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/feature-guides/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Together AI | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/integrations/working-with-together.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> |
539
+ | Embedding Index | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/feature-guides/embedding-indexes.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Label Studio | <a target="_blank" href="https://docs.pixeltable.com/examples/vision/label-studio"> <img src="https://img.shields.io/badge/📚%20Docs-013056" alt="Visit Docs"/></a> |
540
+ | External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/feature-guides/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Mistral | <a target="_blank" href="https://colab.research.google.com/github/mistralai/cookbook/blob/main/third_party/Pixeltable/incremental_prompt_engineering_and_model_comparison.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Github"/> |
541
+ | **Use Cases** | | **Sample Apps** | |
542
+ | RAG Demo | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/rag-demo.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Multimodal Agent | <a target="_blank" href="https://huggingface.co/spaces/Pixeltable/Multimodal-Powerhouse"> <img src="https://img.shields.io/badge/🤗%20Demo-FF7D04" alt="HF Space"/></a> |
543
+ | Object Detection | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Image/Text Search | <a target="_blank" href="https://github.com/pixeltable/pixeltable/tree/main/docs/sample-apps/text-and-image-similarity-search-nextjs-fastapi"> <img src="https://img.shields.io/badge/🖥️%20App-black.svg" alt="GitHub App"/> |
544
+ | Audio Transcription | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Discord Bot | <a target="_blank" href="https://github.com/pixeltable/pixeltable/blob/main/docs/sample-apps/context-aware-discord-bot"> <img src="https://img.shields.io/badge/%F0%9F%92%AC%20Bot-%235865F2.svg" alt="GitHub App"/></a> |
545
+
546
+ ## Maintaining Production-Ready Multimodal AI Apps is Still Too Hard
547
+
548
+ Building robust AI applications, especially [multimodal](https://docs.pixeltable.com/platform/type-system) ones,
549
+ requires stitching together numerous tools:
550
+
551
+ * ETL pipelines for data loading and transformation.
552
+ * Vector databases for semantic search.
553
+ * Feature stores for ML models.
554
+ * Orchestrators for scheduling.
555
+ * Model serving infrastructure for inference.
556
+ * Separate systems for parallelization, caching, versioning, and lineage tracking.
557
+
558
+ This complex "data plumbing" slows down development, increases costs, and makes applications brittle and hard to reproduce.
559
+
560
+ ## Roadmap (2025)
561
+
562
+ ### Cloud Infrastructure and Deployment
563
+
564
+ We're working on a hosted Pixeltable service that will:
565
+
566
+ * Enable Multimodal Data Sharing of Pixeltable Tables and Views | [Waitlist](https://www.pixeltable.com/waitlist)
567
+ * Provide a persistent cloud instance
568
+ * Turn Pixeltable workflows (Tables, Queries, UDFs) into API endpoints/[MCP Servers](https://github.com/pixeltable/pixeltable-mcp-server)
569
+
570
+ ## Contributing
571
+
572
+ We love contributions! Whether it's reporting bugs, suggesting features, improving documentation, or submitting code
573
+ changes, please check out our [Contributing Guide](CONTRIBUTING.md) and join the
574
+ [Discussions](https://github.com/pixeltable/pixeltable/discussions) or our
575
+ [Discord Server](https://discord.gg/QPyqFYx2UN).
576
+
577
+ ## License
578
+
579
+ Pixeltable is licensed under the Apache 2.0 License.