pixeltable 0.4.5__py3-none-any.whl → 0.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (55) hide show
  1. pixeltable/__init__.py +4 -2
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/__init__.py +1 -1
  4. pixeltable/catalog/catalog.py +3 -3
  5. pixeltable/catalog/column.py +49 -0
  6. pixeltable/catalog/insertable_table.py +0 -7
  7. pixeltable/catalog/schema_object.py +1 -14
  8. pixeltable/catalog/table.py +139 -53
  9. pixeltable/catalog/table_version.py +30 -138
  10. pixeltable/catalog/view.py +2 -1
  11. pixeltable/dataframe.py +2 -3
  12. pixeltable/env.py +43 -5
  13. pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
  14. pixeltable/exec/expr_eval/schedulers.py +36 -15
  15. pixeltable/exprs/array_slice.py +2 -2
  16. pixeltable/exprs/data_row.py +13 -0
  17. pixeltable/exprs/expr.py +9 -9
  18. pixeltable/exprs/function_call.py +2 -2
  19. pixeltable/exprs/globals.py +1 -2
  20. pixeltable/exprs/json_path.py +3 -3
  21. pixeltable/exprs/row_builder.py +14 -16
  22. pixeltable/exprs/string_op.py +3 -3
  23. pixeltable/func/query_template_function.py +2 -2
  24. pixeltable/func/signature.py +30 -3
  25. pixeltable/func/tools.py +2 -2
  26. pixeltable/functions/anthropic.py +75 -25
  27. pixeltable/functions/globals.py +2 -2
  28. pixeltable/functions/llama_cpp.py +9 -1
  29. pixeltable/functions/openai.py +74 -54
  30. pixeltable/functions/video.py +54 -1
  31. pixeltable/functions/vision.py +2 -2
  32. pixeltable/globals.py +74 -12
  33. pixeltable/io/datarows.py +3 -3
  34. pixeltable/io/fiftyone.py +4 -4
  35. pixeltable/io/globals.py +3 -3
  36. pixeltable/io/hf_datasets.py +4 -4
  37. pixeltable/io/pandas.py +6 -6
  38. pixeltable/io/parquet.py +3 -3
  39. pixeltable/io/table_data_conduit.py +2 -2
  40. pixeltable/io/utils.py +2 -2
  41. pixeltable/iterators/document.py +2 -2
  42. pixeltable/iterators/video.py +49 -9
  43. pixeltable/share/packager.py +45 -36
  44. pixeltable/store.py +5 -25
  45. pixeltable/type_system.py +5 -8
  46. pixeltable/utils/__init__.py +2 -2
  47. pixeltable/utils/arrow.py +5 -5
  48. pixeltable/utils/description_helper.py +3 -3
  49. pixeltable/utils/iceberg.py +1 -2
  50. {pixeltable-0.4.5.dist-info → pixeltable-0.4.7.dist-info}/METADATA +109 -59
  51. {pixeltable-0.4.5.dist-info → pixeltable-0.4.7.dist-info}/RECORD +64 -64
  52. {pixeltable-0.4.5.dist-info → pixeltable-0.4.7.dist-info}/WHEEL +1 -1
  53. pixeltable-0.4.7.dist-info/entry_points.txt +2 -0
  54. pixeltable-0.4.5.dist-info/entry_points.txt +0 -3
  55. {pixeltable-0.4.5.dist-info → pixeltable-0.4.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/store.py CHANGED
@@ -4,7 +4,7 @@ import abc
4
4
  import logging
5
5
  import sys
6
6
  import warnings
7
- from typing import Any, Iterable, Iterator, Optional, Union
7
+ from typing import Any, Iterable, Iterator, Optional
8
8
 
9
9
  import more_itertools
10
10
  import psycopg
@@ -17,7 +17,6 @@ from pixeltable.env import Env
17
17
  from pixeltable.exec import ExecNode
18
18
  from pixeltable.metadata import schema
19
19
  from pixeltable.utils.exception_handler import run_cleanup
20
- from pixeltable.utils.media_store import MediaStore
21
20
  from pixeltable.utils.sql import log_explain, log_stmt
22
21
 
23
22
  _logger = logging.getLogger('pixeltable')
@@ -123,21 +122,6 @@ class StoreBase:
123
122
  def _storage_name(self) -> str:
124
123
  """Return the name of the data store table"""
125
124
 
126
- def _move_tmp_media_file(self, file_url: Optional[str], col: catalog.Column) -> str:
127
- src_path = MediaStore.resolve_tmp_url(file_url)
128
- if src_path is None:
129
- return file_url
130
- assert col.tbl.id == self.tbl_version.id # Ensure the column belongs to the same table as this store
131
- new_file_url = MediaStore.relocate_local_media_file(src_path, col)
132
- return new_file_url
133
-
134
- def _move_tmp_media_files(
135
- self, table_row: list[Any], media_cols_by_sql_idx: dict[int, catalog.Column], v_min: int
136
- ) -> None:
137
- """Move tmp media files that we generated to a permanent location"""
138
- for n, col in media_cols_by_sql_idx.items():
139
- table_row[n] = self._move_tmp_media_file(table_row[n], col)
140
-
141
125
  def count(self) -> int:
142
126
  """Return the number of rows visible in self.tbl_version"""
143
127
  stmt = (
@@ -235,7 +219,6 @@ class StoreBase:
235
219
  # create temp table to store output of exec_plan, with the same primary key as the store table
236
220
  tmp_name = f'temp_{self._storage_name()}'
237
221
  tmp_pk_cols = tuple(sql.Column(col.name, col.type, primary_key=True) for col in self.pk_columns())
238
- tmp_val_col_sql_idx = len(tmp_pk_cols)
239
222
  tmp_val_col = sql.Column(col.sa_col.name, col.sa_col.type)
240
223
  tmp_cols = [*tmp_pk_cols, tmp_val_col]
241
224
  # add error columns if the store column records errors
@@ -262,9 +245,7 @@ class StoreBase:
262
245
  if abort_on_exc and row.has_exc():
263
246
  exc = row.get_first_exc()
264
247
  raise excs.Error(f'Error while evaluating computed column {col.name!r}:\n{exc}') from exc
265
- table_row, num_row_exc = row_builder.create_table_row(row, None, row.pk)
266
- if col.col_type.is_media_type():
267
- table_row[tmp_val_col_sql_idx] = self._move_tmp_media_file(table_row[tmp_val_col_sql_idx], col)
248
+ table_row, num_row_exc = row_builder.create_store_table_row(row, None, row.pk)
268
249
  num_excs += num_row_exc
269
250
  batch_table_rows.append(tuple(table_row))
270
251
 
@@ -317,7 +298,7 @@ class StoreBase:
317
298
  progress_bar: Optional[tqdm] = None # create this only after we started executing
318
299
  row_builder = exec_plan.row_builder
319
300
 
320
- store_col_names, media_cols_by_idx = row_builder.store_column_names()
301
+ store_col_names = row_builder.store_column_names()
321
302
 
322
303
  try:
323
304
  table_rows: list[tuple[Any]] = []
@@ -337,7 +318,7 @@ class StoreBase:
337
318
  rowid = (next(rowids),) if rowids is not None else row.pk[:-1]
338
319
  pk = (*rowid, v_min)
339
320
  assert len(pk) == len(self._pk_cols)
340
- table_row, num_row_exc = row_builder.create_table_row(row, cols_with_excs, pk)
321
+ table_row, num_row_exc = row_builder.create_store_table_row(row, cols_with_excs, pk)
341
322
  num_excs += num_row_exc
342
323
 
343
324
  if show_progress:
@@ -351,7 +332,6 @@ class StoreBase:
351
332
  )
352
333
  progress_bar.update(1)
353
334
 
354
- self._move_tmp_media_files(table_row, media_cols_by_idx, v_min)
355
335
  batch_table_rows.append(tuple(table_row))
356
336
 
357
337
  table_rows.extend(batch_table_rows)
@@ -427,7 +407,7 @@ class StoreBase:
427
407
  base_versions_clause = (
428
408
  sql.true() if len(base_versions) == 0 else self.base._versions_clause(base_versions, match_on_vmin)
429
409
  )
430
- set_clause: dict[sql.Column, Union[int, sql.Column]] = {self.v_max_col: current_version}
410
+ set_clause: dict[sql.Column, int | sql.Column] = {self.v_max_col: current_version}
431
411
  for index_info in self.tbl_version.get().idxs_by_name.values():
432
412
  # copy value column to undo column
433
413
  set_clause[index_info.undo_col.sa_col] = index_info.val_col.sa_col
pixeltable/type_system.py CHANGED
@@ -292,7 +292,7 @@ class ColumnType:
292
292
 
293
293
  @classmethod
294
294
  def from_python_type(
295
- cls, t: Union[type, _GenericAlias], nullable_default: bool = False, allow_builtin_types: bool = True
295
+ cls, t: type | _GenericAlias, nullable_default: bool = False, allow_builtin_types: bool = True
296
296
  ) -> Optional[ColumnType]:
297
297
  """
298
298
  Convert a Python type into a Pixeltable `ColumnType` instance.
@@ -311,7 +311,7 @@ class ColumnType:
311
311
  if origin in (typing.Union, types.UnionType):
312
312
  # Check if `t` has the form Optional[T].
313
313
  if len(type_args) == 2 and type(None) in type_args:
314
- # `t` is a type of the form Optional[T] (equivalently, Union[T, None] or Union[None, T]).
314
+ # `t` is a type of the form Optional[T] (equivalently, T | None or None | T).
315
315
  # We treat it as the underlying type but with nullable=True.
316
316
  underlying_py_type = type_args[0] if type_args[1] is type(None) else type_args[1]
317
317
  underlying = cls.from_python_type(underlying_py_type, allow_builtin_types=allow_builtin_types)
@@ -361,10 +361,7 @@ class ColumnType:
361
361
 
362
362
  @classmethod
363
363
  def normalize_type(
364
- cls,
365
- t: Union[ColumnType, type, _AnnotatedAlias],
366
- nullable_default: bool = False,
367
- allow_builtin_types: bool = True,
364
+ cls, t: ColumnType | type | _AnnotatedAlias, nullable_default: bool = False, allow_builtin_types: bool = True
368
365
  ) -> ColumnType:
369
366
  """
370
367
  Convert any type recognizable by Pixeltable to its corresponding ColumnType.
@@ -389,7 +386,7 @@ class ColumnType:
389
386
  ]
390
387
 
391
388
  @classmethod
392
- def __raise_exc_for_invalid_type(cls, t: Union[type, _AnnotatedAlias]) -> None:
389
+ def __raise_exc_for_invalid_type(cls, t: type | _AnnotatedAlias) -> None:
393
390
  for builtin_type, suggestion in cls.__TYPE_SUGGESTIONS:
394
391
  if t is builtin_type or (isinstance(t, type) and issubclass(t, builtin_type)):
395
392
  name = t.__name__ if t.__module__ == 'builtins' else f'{t.__module__}.{t.__name__}'
@@ -405,7 +402,7 @@ class ColumnType:
405
402
  return cls.from_python_type(py_type) if py_type is not None else None
406
403
 
407
404
  @classmethod
408
- def __json_schema_to_py_type(cls, schema: dict[str, Any]) -> Union[type, _GenericAlias, None]:
405
+ def __json_schema_to_py_type(cls, schema: dict[str, Any]) -> type | _GenericAlias | None:
409
406
  if 'type' in schema:
410
407
  if schema['type'] == 'null':
411
408
  return type(None)
@@ -2,7 +2,7 @@ import hashlib
2
2
  import urllib.parse
3
3
  import urllib.request
4
4
  from pathlib import Path
5
- from typing import Optional, Union
5
+ from typing import Optional
6
6
 
7
7
 
8
8
  def print_perf_counter_delta(delta: float) -> str:
@@ -24,7 +24,7 @@ def print_perf_counter_delta(delta: float) -> str:
24
24
  return f'{delta:.2f} s'
25
25
 
26
26
 
27
- def sha256sum(path: Union[Path, str]) -> str:
27
+ def sha256sum(path: Path | str) -> str:
28
28
  """
29
29
  Compute the SHA256 hash of a file.
30
30
  """
pixeltable/utils/arrow.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import datetime
2
- from typing import Any, Iterator, Optional, Union
2
+ from typing import Any, Iterator, Optional
3
3
 
4
4
  import numpy as np
5
5
  import pyarrow as pa
@@ -88,11 +88,11 @@ def to_arrow_schema(pixeltable_schema: dict[str, Any]) -> pa.Schema:
88
88
  return pa.schema((name, to_arrow_type(typ)) for name, typ in pixeltable_schema.items()) # type: ignore[misc]
89
89
 
90
90
 
91
- def to_pydict(batch: Union[pa.Table, pa.RecordBatch]) -> dict[str, Union[list, np.ndarray]]:
91
+ def to_pydict(batch: pa.Table | pa.RecordBatch) -> dict[str, list | np.ndarray]:
92
92
  """Convert a RecordBatch to a dictionary of lists, unlike pa.lib.RecordBatch.to_pydict,
93
93
  this function will not convert numpy arrays to lists, and will preserve the original numpy dtype.
94
94
  """
95
- out: dict[str, Union[list, np.ndarray]] = {}
95
+ out: dict[str, list | np.ndarray] = {}
96
96
  for k, name in enumerate(batch.schema.names):
97
97
  col = batch.column(k)
98
98
  if isinstance(col.type, pa.FixedShapeTensorType):
@@ -105,7 +105,7 @@ def to_pydict(batch: Union[pa.Table, pa.RecordBatch]) -> dict[str, Union[list, n
105
105
  return out
106
106
 
107
107
 
108
- def iter_tuples(batch: Union[pa.Table, pa.RecordBatch]) -> Iterator[dict[str, Any]]:
108
+ def iter_tuples(batch: pa.Table | pa.RecordBatch) -> Iterator[dict[str, Any]]:
109
109
  """Convert a RecordBatch to an iterator of dictionaries. also works with pa.Table and pa.RowGroup"""
110
110
  pydict = to_pydict(batch)
111
111
  assert len(pydict) > 0, 'empty record batch'
@@ -145,7 +145,7 @@ def _ar_val_to_pxt_val(val: Any, pxt_type: ts.ColumnType) -> Any:
145
145
 
146
146
 
147
147
  def iter_tuples2(
148
- batch: Union[pa.Table, pa.RecordBatch], col_mapping: Optional[dict[str, str]], schema: dict[str, ts.ColumnType]
148
+ batch: pa.Table | pa.RecordBatch, col_mapping: Optional[dict[str, str]], schema: dict[str, ts.ColumnType]
149
149
  ) -> Iterator[dict[str, Any]]:
150
150
  """Convert a RecordBatch to an iterator of dictionaries. also works with pa.Table and pa.RowGroup"""
151
151
  pydict = to_pydict(batch)
@@ -1,5 +1,5 @@
1
1
  import dataclasses
2
- from typing import Optional, Union
2
+ from typing import Optional
3
3
 
4
4
  import pandas as pd
5
5
  from pandas.io.formats.style import Styler
@@ -7,7 +7,7 @@ from pandas.io.formats.style import Styler
7
7
 
8
8
  @dataclasses.dataclass
9
9
  class _Descriptor:
10
- body: Union[str, pd.DataFrame]
10
+ body: str | pd.DataFrame
11
11
  # The remaining fields only affect the behavior if `body` is a pd.DataFrame.
12
12
  show_index: bool
13
13
  show_header: bool
@@ -33,7 +33,7 @@ class DescriptionHelper:
33
33
 
34
34
  def append(
35
35
  self,
36
- descriptor: Union[str, pd.DataFrame],
36
+ descriptor: str | pd.DataFrame,
37
37
  show_index: bool = False,
38
38
  show_header: bool = True,
39
39
  styler: Optional[Styler] = None,
@@ -1,10 +1,9 @@
1
1
  from pathlib import Path
2
- from typing import Union
3
2
 
4
3
  from pyiceberg.catalog.sql import SqlCatalog
5
4
 
6
5
 
7
- def sqlite_catalog(warehouse_path: Union[str, Path], name: str = 'pixeltable') -> SqlCatalog:
6
+ def sqlite_catalog(warehouse_path: str | Path, name: str = 'pixeltable') -> SqlCatalog:
8
7
  """
9
8
  Instantiate a sqlite Iceberg catalog at the specified path. If no catalog exists, one will be created.
10
9
  """
@@ -1,12 +1,14 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: pixeltable
3
- Version: 0.4.5
3
+ Version: 0.4.7
4
4
  Summary: AI Data Infrastructure: Declarative, Multimodal, and Incremental
5
- License: Apache-2.0
6
- Keywords: data-science,machine-learning,database,ai,computer-vision,chatbot,ml,artificial-intelligence,feature-engineering,multimodal,mlops,feature-store,vector-database,llm,genai
7
- Author: Pixeltable, Inc.
8
- Author-email: contact@pixeltable.com>
9
- Requires-Python: >=3.10
5
+ Project-URL: homepage, https://pixeltable.com/
6
+ Project-URL: repository, https://github.com/pixeltable/pixeltable
7
+ Project-URL: documentation, https://docs.pixeltable.com/
8
+ Author-email: "Pixeltable, Inc." <contact@pixeltable.com>
9
+ License-Expression: Apache-2.0
10
+ License-File: LICENSE
11
+ Keywords: ai,artificial-intelligence,chatbot,computer-vision,data-science,database,feature-engineering,feature-store,genai,llm,machine-learning,ml,mlops,multimodal,vector-database
10
12
  Classifier: Intended Audience :: Developers
11
13
  Classifier: Intended Audience :: Science/Research
12
14
  Classifier: License :: OSI Approved :: Apache Software License
@@ -20,39 +22,37 @@ Classifier: Programming Language :: Python :: 3.13
20
22
  Classifier: Topic :: Database
21
23
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
24
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
23
- Requires-Dist: av (>=10.0.0)
24
- Requires-Dist: beautifulsoup4 (>=4.0.0)
25
- Requires-Dist: cloudpickle (>=2.2.1)
26
- Requires-Dist: ftfy (>=6.2.0)
27
- Requires-Dist: httpcore (>=1.0.3)
28
- Requires-Dist: httpx (>=0.27)
29
- Requires-Dist: jinja2 (>=3.1.3)
30
- Requires-Dist: jmespath (>=1.0.1)
31
- Requires-Dist: jsonschema (>=4.1.0)
32
- Requires-Dist: lxml (>=5.0)
33
- Requires-Dist: more-itertools (>=10.2)
34
- Requires-Dist: nest_asyncio (>=1.5)
35
- Requires-Dist: numpy (>=1.25)
36
- Requires-Dist: pandas (>=2.0,<3.0)
37
- Requires-Dist: pgvector (>=0.2.1)
38
- Requires-Dist: pillow (>=9.3.0)
39
- Requires-Dist: pillow-heif (>=0.15.0)
40
- Requires-Dist: pixeltable-pgserver (==0.3.1)
41
- Requires-Dist: psutil (>=5.9.5)
42
- Requires-Dist: psycopg[binary] (>=3.1.18)
43
- Requires-Dist: puremagic (>=1.20)
44
- Requires-Dist: pyarrow (>=13.0.0)
45
- Requires-Dist: pydantic (>=2.7.4)
46
- Requires-Dist: pymupdf (>=1.24.1)
47
- Requires-Dist: pyyaml (>=6.0.1)
48
- Requires-Dist: requests (>=2.31.0)
49
- Requires-Dist: sqlalchemy (>=2.0.23)
50
- Requires-Dist: tenacity (>=8.2)
51
- Requires-Dist: toml (>=0.10)
52
- Requires-Dist: tqdm (>=4.64)
53
- Project-URL: Documentation, https://docs.pixeltable.com/
54
- Project-URL: Homepage, https://pixeltable.com/
55
- Project-URL: Repository, https://github.com/pixeltable/pixeltable
25
+ Requires-Python: >=3.10
26
+ Requires-Dist: av>=10.0.0
27
+ Requires-Dist: beautifulsoup4>=4.10
28
+ Requires-Dist: cloudpickle>=2.2.1
29
+ Requires-Dist: ftfy>=6.2.0
30
+ Requires-Dist: httpcore>=1.0.3
31
+ Requires-Dist: httpx>=0.27
32
+ Requires-Dist: jinja2>=3.1.3
33
+ Requires-Dist: jmespath>=1.0.1
34
+ Requires-Dist: jsonschema>=4.1.0
35
+ Requires-Dist: lxml>=5.1
36
+ Requires-Dist: more-itertools>=10.2
37
+ Requires-Dist: nest-asyncio>=1.5
38
+ Requires-Dist: numpy>=1.25
39
+ Requires-Dist: pandas>=2.0
40
+ Requires-Dist: pgvector>=0.2.1
41
+ Requires-Dist: pillow-heif>=0.15.0
42
+ Requires-Dist: pillow>=9.3.0
43
+ Requires-Dist: pixeltable-pgserver==0.3.1
44
+ Requires-Dist: psutil>=5.9.5
45
+ Requires-Dist: psycopg[binary]>=3.1.18
46
+ Requires-Dist: puremagic>=1.20
47
+ Requires-Dist: pyarrow>=13.0.0
48
+ Requires-Dist: pydantic>=2.7.4
49
+ Requires-Dist: pymupdf>=1.24.1
50
+ Requires-Dist: pyyaml>=6.0.1
51
+ Requires-Dist: requests>=2.31.0
52
+ Requires-Dist: sqlalchemy>=2.0.23
53
+ Requires-Dist: tenacity>=8.2
54
+ Requires-Dist: toml>=0.10
55
+ Requires-Dist: tqdm>=4.64
56
56
  Description-Content-Type: text/markdown
57
57
 
58
58
  <div align="center">
@@ -62,6 +62,8 @@ Description-Content-Type: text/markdown
62
62
 
63
63
  <h2>Declarative Data Infrastructure for Multimodal AI Apps</h2>
64
64
 
65
+ Pixeltable is the only Python library providing incremental storage, transformation, indexing, and orchestration of multimodal data.
66
+
65
67
  [![License](https://img.shields.io/badge/License-Apache%202.0-0530AD.svg)](https://opensource.org/licenses/Apache-2.0)
66
68
  ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pixeltable?logo=python&logoColor=white&)
67
69
  ![Platform Support](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-E5DDD4)
@@ -82,31 +84,68 @@ Description-Content-Type: text/markdown
82
84
 
83
85
  ---
84
86
 
85
- Pixeltable is the only Python framework that provides incremental storage, transformation, indexing, and orchestration of your multimodal data.
87
+ ## 💾 Installation
86
88
 
87
- ## 😩 Maintaining Production-Ready Multimodal AI Apps is Still Too Hard
89
+ ```python
90
+ pip install pixeltable
91
+ ```
88
92
 
89
- Building robust AI applications, especially [multimodal](https://docs.pixeltable.com/docs/datastore/bringing-data) ones, requires stitching together numerous tools:
90
- * ETL pipelines for data loading and transformation.
91
- * Vector databases for semantic search.
92
- * Feature stores for ML models.
93
- * Orchestrators for scheduling.
94
- * Model serving infrastructure for inference.
95
- * Separate systems for parallelization, caching, versioning, and lineage tracking.
93
+ **Pixeltable unifies multimodal data storage, retrieval and orchestration.** It stores metadata and computed results persistently, typically in a `.pixeltable` directory in your workspace. See [configuration](https://docs.pixeltable.com/docs/overview/configuration) options for your setup. All media (videos, images, audio) resides in ext. files, and Pixeltable stores references to those. Files can be local/remote (e.g. S3). For the latter, Pixeltable caches the [files locally on access](https://github.com/pixeltable/pixeltable/blob/main/docs/notebooks/feature-guides/working-with-external-files.ipynb).
96
94
 
97
- This complex "data plumbing" slows down development, increases costs, and makes applications brittle and hard to reproduce.
95
+ https://github.com/user-attachments/assets/b50fd6df-5169-4881-9dbe-1b6e5d06cede
98
96
 
99
- ## 💾 Installation
97
+ ## Quick Start
98
+
99
+ With Pixeltable, you define your *entire* data processing and AI workflow declaratively using **[computed columns](https://docs.pixeltable.com/docs/datastore/computed-columns)** on **[tables](https://docs.pixeltable.com/docs/datastore/tables-and-operations)**.
100
100
 
101
101
  ```python
102
- pip install pixeltable
103
- ```
104
102
 
105
- **Pixeltable is a database.** It stores metadata and computed results persistently, typically in a `.pixeltable` directory in your workspace. See [configuration](https://docs.pixeltable.com/docs/overview/configuration) options for your setup.
103
+ # Installation
104
+ pip install -qU torch transformers openai pixeltable
105
+
106
+ # Basic setup
107
+ import pixeltable as pxt
108
+
109
+ # Table with multimodal column types (Image, Video, Audio, Document)
110
+ t = pxt.create_table('images', {'input_image': pxt.Image})
111
+
112
+ # Computed columns: define transformation logic once, runs on all data
113
+ from pixeltable.functions import huggingface
106
114
 
107
- ## What is Pixeltable?
115
+ # Object detection with automatic model management
116
+ t.add_computed_column(
117
+ detections=huggingface.detr_for_object_detection(
118
+ t.input_image,
119
+ model_id='facebook/detr-resnet-50'
120
+ )
121
+ )
122
+
123
+ # Extract specific fields from detection results
124
+ t.add_computed_column(detections_text=t.detections.label_text)
125
+
126
+ # OpenAI Vision API integration with built-in rate limiting and async managemennt
127
+ from pixeltable.functions import openai
128
+
129
+ t.add_computed_column(
130
+ vision=openai.vision(
131
+ prompt="Describe what's in this image.",
132
+ image=t.input_image,
133
+ model='gpt-4o-mini'
134
+ )
135
+ )
136
+
137
+ # Insert data - automatically triggers computation of all computed columns
138
+ t.insert(input_image='https://raw.github.com/pixeltable/pixeltable/release/docs/resources/images/000000000025.jpg')
139
+
140
+ # Query - All data, metadata, and computed results are persistently stored
141
+ results = t.select(
142
+ t.input_image,
143
+ t.detections_text,
144
+ t.vision
145
+ ).collect()
146
+ ```
108
147
 
109
- With Pixeltable, you define your *entire* data processing and AI workflow declaratively using **[computed columns](https://docs.pixeltable.com/docs/datastore/computed-columns)** on **[tables](https://docs.pixeltable.com/docs/datastore/tables-and-operations)**. Pixeltable's engine then automatically handles:
148
+ ## What Happened?
110
149
 
111
150
  * **Data Ingestion & Storage:** References [files](https://docs.pixeltable.com/docs/datastore/bringing-data) (images, videos, audio, docs) in place, handles structured data.
112
151
  * **Transformation & Processing:** Applies *any* Python function ([UDFs](https://docs.pixeltable.com/docs/datastore/custom-functions)) or built-in operations ([chunking, frame extraction](https://docs.pixeltable.com/docs/datastore/iterators)) automatically.
@@ -118,7 +157,7 @@ With Pixeltable, you define your *entire* data processing and AI workflow declar
118
157
  **Focus on your application logic, not the infrastructure.**
119
158
 
120
159
 
121
- ## 🚀 Key Features
160
+ ## ⚖️ Key Principles
122
161
 
123
162
  * **[Unified Multimodal Interface:](https://docs.pixeltable.com/docs/datastore/tables-and-operations)** `pxt.Image`, `pxt.Video`, `pxt.Audio`, `pxt.Document`, etc. – manage diverse data consistently.
124
163
  ```python
@@ -416,12 +455,24 @@ Explore Pixeltable's capabilities interactively:
416
455
  | Object Detection | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | Image/Text Search | <a target="_blank" href="https://github.com/pixeltable/pixeltable/tree/main/docs/sample-apps/text-and-image-similarity-search-nextjs-fastapi"> <img src="https://img.shields.io/badge/🖥️%20App-black.svg" alt="GitHub App"/> |
417
456
  | Audio Transcription | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/release/docs/notebooks/use-cases/audio-transcriptions.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | Discord Bot | <a target="_blank" href="https://github.com/pixeltable/pixeltable/blob/main/docs/sample-apps/context-aware-discord-bot"> <img src="https://img.shields.io/badge/%F0%9F%92%AC%20Bot-%235865F2.svg" alt="GitHub App"/></a> |
418
457
 
458
+ ## 🚨 Maintaining Production-Ready Multimodal AI Apps is Still Too Hard
459
+
460
+ Building robust AI applications, especially [multimodal](https://docs.pixeltable.com/docs/datastore/bringing-data) ones, requires stitching together numerous tools:
461
+ * ETL pipelines for data loading and transformation.
462
+ * Vector databases for semantic search.
463
+ * Feature stores for ML models.
464
+ * Orchestrators for scheduling.
465
+ * Model serving infrastructure for inference.
466
+ * Separate systems for parallelization, caching, versioning, and lineage tracking.
467
+
468
+ This complex "data plumbing" slows down development, increases costs, and makes applications brittle and hard to reproduce.
469
+
419
470
  ## 🔮 Roadmap (2025)
420
471
 
421
472
  ### Cloud Infrastructure and Deployment
422
473
  We're working on a hosted Pixeltable service that will:
423
474
 
424
- - Enable Multimodal Data Sharing of Pixeltable Tables and Views
475
+ - Enable Multimodal Data Sharing of Pixeltable Tables and Views | [Waitlist](https://www.pixeltable.com/waitlist)
425
476
  - Provide a persistent cloud instance
426
477
  - Turn Pixeltable workflows (Tables, Queries, UDFs) into API endpoints/[MCP Servers](https://github.com/pixeltable/pixeltable-mcp-server)
427
478
 
@@ -432,4 +483,3 @@ We love contributions! Whether it's reporting bugs, suggesting features, improvi
432
483
  ## 🏢 License
433
484
 
434
485
  Pixeltable is licensed under the Apache 2.0 License.
435
-