pixeltable 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (53) hide show
  1. pixeltable/__init__.py +3 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/column.py +8 -2
  4. pixeltable/catalog/insertable_table.py +32 -17
  5. pixeltable/catalog/table.py +167 -12
  6. pixeltable/catalog/table_version.py +185 -106
  7. pixeltable/datatransfer/__init__.py +1 -0
  8. pixeltable/datatransfer/label_studio.py +452 -0
  9. pixeltable/datatransfer/remote.py +85 -0
  10. pixeltable/env.py +148 -69
  11. pixeltable/exprs/column_ref.py +2 -2
  12. pixeltable/exprs/comparison.py +39 -1
  13. pixeltable/exprs/data_row.py +7 -0
  14. pixeltable/exprs/expr.py +11 -12
  15. pixeltable/exprs/function_call.py +0 -3
  16. pixeltable/exprs/globals.py +14 -2
  17. pixeltable/exprs/similarity_expr.py +5 -3
  18. pixeltable/ext/functions/whisperx.py +30 -0
  19. pixeltable/ext/functions/yolox.py +16 -0
  20. pixeltable/func/aggregate_function.py +2 -2
  21. pixeltable/func/expr_template_function.py +3 -1
  22. pixeltable/func/udf.py +2 -2
  23. pixeltable/functions/fireworks.py +9 -4
  24. pixeltable/functions/huggingface.py +25 -1
  25. pixeltable/functions/openai.py +15 -10
  26. pixeltable/functions/together.py +11 -6
  27. pixeltable/functions/util.py +0 -43
  28. pixeltable/functions/video.py +46 -8
  29. pixeltable/globals.py +20 -2
  30. pixeltable/index/__init__.py +1 -0
  31. pixeltable/index/base.py +6 -1
  32. pixeltable/index/btree.py +54 -0
  33. pixeltable/index/embedding_index.py +4 -1
  34. pixeltable/io/__init__.py +1 -0
  35. pixeltable/io/globals.py +58 -0
  36. pixeltable/iterators/base.py +4 -4
  37. pixeltable/iterators/document.py +26 -15
  38. pixeltable/iterators/video.py +9 -1
  39. pixeltable/metadata/__init__.py +2 -2
  40. pixeltable/metadata/converters/convert_14.py +13 -0
  41. pixeltable/metadata/schema.py +9 -6
  42. pixeltable/plan.py +9 -5
  43. pixeltable/store.py +14 -21
  44. pixeltable/tool/create_test_db_dump.py +14 -0
  45. pixeltable/type_system.py +14 -4
  46. pixeltable/utils/coco.py +94 -0
  47. pixeltable-0.2.8.dist-info/METADATA +137 -0
  48. {pixeltable-0.2.6.dist-info → pixeltable-0.2.8.dist-info}/RECORD +50 -45
  49. pixeltable/func/nos_function.py +0 -202
  50. pixeltable/utils/clip.py +0 -18
  51. pixeltable-0.2.6.dist-info/METADATA +0 -131
  52. {pixeltable-0.2.6.dist-info → pixeltable-0.2.8.dist-info}/LICENSE +0 -0
  53. {pixeltable-0.2.6.dist-info → pixeltable-0.2.8.dist-info}/WHEEL +0 -0
@@ -10,11 +10,11 @@ import sqlalchemy.orm as orm
10
10
  from .schema import SystemInfo, SystemInfoMd
11
11
 
12
12
  # current version of the metadata; this is incremented whenever the metadata schema changes
13
- VERSION = 14
13
+ VERSION = 15
14
14
 
15
15
 
16
16
  def create_system_info(engine: sql.engine.Engine) -> None:
17
- """Create the systemmetadata record"""
17
+ """Create the system metadata record"""
18
18
  system_md = SystemInfoMd(schema_version=VERSION)
19
19
  record = SystemInfo(md=dataclasses.asdict(system_md))
20
20
  with orm.Session(engine, future=True) as session:
@@ -0,0 +1,13 @@
1
+ import sqlalchemy as sql
2
+
3
+ from pixeltable.metadata.schema import Table
4
+ from pixeltable.metadata import register_converter
5
+
6
+
7
+ def convert_14(engine: sql.engine.Engine) -> None:
8
+ default_remotes = {'remotes': []}
9
+ with engine.begin() as conn:
10
+ conn.execute(sql.update(Table).where(Table.md['remotes'] == None).values(md=Table.md.concat(default_remotes)))
11
+
12
+
13
+ register_converter(14, convert_14)
@@ -1,12 +1,11 @@
1
- from typing import Optional, List, get_type_hints, Type, Any, TypeVar, Tuple, Union
2
- import platform
3
- import uuid
4
1
  import dataclasses
2
+ import uuid
3
+ from typing import Optional, List, get_type_hints, Type, Any, TypeVar, Tuple, Union
5
4
 
6
5
  import sqlalchemy as sql
7
- from sqlalchemy import Integer, String, Boolean, BigInteger, LargeBinary
6
+ from sqlalchemy import ForeignKey
7
+ from sqlalchemy import Integer, BigInteger, LargeBinary
8
8
  from sqlalchemy.dialects.postgresql import UUID, JSONB
9
- from sqlalchemy import ForeignKey, UniqueConstraint, ForeignKeyConstraint
10
9
  from sqlalchemy.orm import declarative_base
11
10
 
12
11
  Base = declarative_base()
@@ -143,6 +142,10 @@ class TableMd:
143
142
  # - every row is assigned a unique and immutable rowid on insertion
144
143
  next_row_id: int
145
144
 
145
+ # Metadata format for remotes:
146
+ # {'class': 'pixeltable.datatransfer.LabelStudioProject', 'md': {'project_id': 3}}
147
+ remotes: list[dict[str, Any]]
148
+
146
149
  column_md: dict[int, ColumnMd] # col_id -> ColumnMd
147
150
  index_md: dict[int, IndexMd] # index_id -> IndexMd
148
151
  view_md: Optional[ViewMd]
@@ -160,7 +163,7 @@ class Table(Base):
160
163
 
161
164
  MAX_VERSION = 9223372036854775807 # 2^63 - 1
162
165
 
163
- id = sql.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False)
166
+ id = sql.Column(UUID(as_uuid=True), primary_key=True, nullable=False)
164
167
  dir_id = sql.Column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=False)
165
168
  md = sql.Column(JSONB, nullable=False) # TableMd
166
169
 
pixeltable/plan.py CHANGED
@@ -251,7 +251,7 @@ class Planner:
251
251
  Returns:
252
252
  - root node of the plan
253
253
  - list of qualified column names that are getting updated
254
- - list of columns that are being recomputed
254
+ - list of user-visible columns that are being recomputed
255
255
  """
256
256
  # retrieve all stored cols and all target exprs
257
257
  assert isinstance(tbl, catalog.TableVersionPath)
@@ -260,7 +260,10 @@ class Planner:
260
260
  if len(recompute_targets) > 0:
261
261
  recomputed_cols = recompute_targets.copy()
262
262
  else:
263
- recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else {}
263
+ recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else set()
264
+ # regardless of cascade, we need to update all indices on any updated column
265
+ idx_val_cols = target.get_idx_val_columns(updated_cols)
266
+ recomputed_cols.update(idx_val_cols)
264
267
  # we only need to recompute stored columns (unstored ones are substituted away)
265
268
  recomputed_cols = {c for c in recomputed_cols if c.is_stored}
266
269
  recomputed_base_cols = {col for col in recomputed_cols if col.tbl == target}
@@ -273,8 +276,8 @@ class Planner:
273
276
  recomputed_exprs = \
274
277
  [c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols]
275
278
  # recomputed cols reference the new values of the updated cols
276
- for col, e in update_targets.items():
277
- exprs.Expr.list_substitute(recomputed_exprs, exprs.ColumnRef(col), e)
279
+ spec = {exprs.ColumnRef(col): e for col, e in update_targets.items()}
280
+ exprs.Expr.list_substitute(recomputed_exprs, spec)
278
281
  select_list.extend(recomputed_exprs)
279
282
 
280
283
  # we need to retrieve the PK columns of the existing rows
@@ -282,7 +285,8 @@ class Planner:
282
285
  all_base_cols = copied_cols + updated_cols + list(recomputed_base_cols) # same order as select_list
283
286
  # update row builder with column information
284
287
  [plan.row_builder.add_table_column(col, select_list[i].slot_idx) for i, col in enumerate(all_base_cols)]
285
- return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + list(recomputed_cols)], list(recomputed_cols)
288
+ recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
289
+ return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
286
290
 
287
291
  @classmethod
288
292
  def create_view_update_plan(
pixeltable/store.py CHANGED
@@ -66,7 +66,6 @@ class StoreBase:
66
66
  """Create self.sa_tbl from self.tbl_version."""
67
67
  system_cols = self._create_system_columns()
68
68
  all_cols = system_cols.copy()
69
- idxs: List[sql.Index] = []
70
69
  for col in [c for c in self.tbl_version.cols if c.is_stored]:
71
70
  # re-create sql.Column for each column, regardless of whether it already has sa_col set: it was bound
72
71
  # to the last sql.Table version we created and cannot be reused
@@ -76,26 +75,18 @@ class StoreBase:
76
75
  all_cols.append(col.sa_errormsg_col)
77
76
  all_cols.append(col.sa_errortype_col)
78
77
 
79
- # we create an index for:
80
- # - scalar columns (except for strings, because long strings can't be used for B-tree indices)
81
- # - non-computed video and image columns (they will contain external paths/urls that users might want to
82
- # filter on)
83
- if (col.col_type.is_scalar_type() and not col.col_type.is_string_type()) \
84
- or (col.col_type.is_media_type() and not col.is_computed):
85
- # index names need to be unique within the Postgres instance
86
- idx_name = f'idx_{col.id}_{self.tbl_version.id.hex}'
87
- idxs.append(sql.Index(idx_name, col.sa_col))
88
-
89
78
  if self.sa_tbl is not None:
90
79
  # if we're called in response to a schema change, we need to remove the old table first
91
80
  self.sa_md.remove(self.sa_tbl)
92
81
 
82
+ idxs: List[sql.Index] = []
93
83
  # index for all system columns:
94
84
  # - base x view joins can be executed as merge joins
95
85
  # - speeds up ORDER BY rowid DESC
96
86
  # - allows filtering for a particular table version in index scan
97
87
  idx_name = f'sys_cols_idx_{self.tbl_version.id.hex}'
98
88
  idxs.append(sql.Index(idx_name, *system_cols))
89
+
99
90
  # v_min/v_max indices: speeds up base table scans needed to propagate a base table insert or delete
100
91
  idx_name = f'vmin_idx_{self.tbl_version.id.hex}'
101
92
  idxs.append(sql.Index(idx_name, self.v_min_col, postgresql_using='brin'))
@@ -264,7 +255,8 @@ class StoreBase:
264
255
  return num_excs
265
256
 
266
257
  def insert_rows(
267
- self, exec_plan: ExecNode, conn: sql.engine.Connection, v_min: Optional[int] = None
258
+ self, exec_plan: ExecNode, conn: sql.engine.Connection, v_min: Optional[int] = None,
259
+ show_progress: bool = True
268
260
  ) -> Tuple[int, int, Set[int]]:
269
261
  """Insert rows into the store table and update the catalog table's md
270
262
  Returns:
@@ -293,15 +285,16 @@ class StoreBase:
293
285
  self._create_table_row(row, row_builder, media_cols, cols_with_excs, v_min=v_min)
294
286
  num_excs += num_row_exc
295
287
  table_rows.append(table_row)
296
- if progress_bar is None:
297
- warnings.simplefilter("ignore", category=TqdmWarning)
298
- progress_bar = tqdm(
299
- desc=f'Inserting rows into `{self.tbl_version.name}`',
300
- unit=' rows',
301
- ncols=100,
302
- file=sys.stdout
303
- )
304
- progress_bar.update(1)
288
+ if show_progress:
289
+ if progress_bar is None:
290
+ warnings.simplefilter("ignore", category=TqdmWarning)
291
+ progress_bar = tqdm(
292
+ desc=f'Inserting rows into `{self.tbl_version.name}`',
293
+ unit=' rows',
294
+ ncols=100,
295
+ file=sys.stdout
296
+ )
297
+ progress_bar.update(1)
305
298
  self._move_tmp_media_files(table_rows, media_cols, v_min)
306
299
  conn.execute(sql.insert(self.sa_tbl), table_rows)
307
300
  if progress_bar is not None:
@@ -162,6 +162,20 @@ class Dumper:
162
162
  # astype
163
163
  v['astype'] = t.c1.astype(pxt.FloatType())
164
164
 
165
+ # Add remotes
166
+ from pixeltable.datatransfer.remote import MockRemote
167
+ v.link_remote(
168
+ MockRemote({'int_field': pxt.IntType()}, {'str_field': pxt.StringType()}),
169
+ col_mapping={'test_udf': 'int_field', 'c1': 'str_field'}
170
+ )
171
+ # We're just trying to test metadata here, so reach "under the covers" and link a fake
172
+ # Label Studio project without validation (so we don't need a real Label Studio server)
173
+ from pixeltable.datatransfer.label_studio import LabelStudioProject
174
+ v.tbl_version_path.tbl_version.link(
175
+ LabelStudioProject(4171780),
176
+ col_mapping={'str_format': 'str_format'}
177
+ )
178
+
165
179
 
166
180
  @pxt.udf(_force_stored=True)
167
181
  def test_udf_stored(n: int) -> int:
pixeltable/type_system.py CHANGED
@@ -7,7 +7,7 @@ import json
7
7
  import typing
8
8
  import urllib.parse
9
9
  import urllib.request
10
- from copy import copy
10
+ from copy import deepcopy
11
11
  from pathlib import Path
12
12
  from typing import Any, Optional, Tuple, Dict, Callable, List, Union, Sequence, Mapping
13
13
 
@@ -82,7 +82,11 @@ class ColumnType:
82
82
 
83
83
  def __init__(self, t: Type, nullable: bool = False):
84
84
  self._type = t
85
- self.nullable = nullable
85
+ self._nullable = nullable
86
+
87
+ @property
88
+ def nullable(self) -> bool:
89
+ return self._nullable
86
90
 
87
91
  @property
88
92
  def type_enum(self) -> Type:
@@ -91,6 +95,12 @@ class ColumnType:
91
95
  def serialize(self) -> str:
92
96
  return json.dumps(self.as_dict())
93
97
 
98
+ def copy(self, nullable: Optional[bool] = None) -> ColumnType:
99
+ result = deepcopy(self)
100
+ if nullable is not None:
101
+ result._nullable = nullable
102
+ return result
103
+
94
104
  @classmethod
95
105
  def serialize_list(cls, type_list: List[ColumnType]) -> str:
96
106
  return json.dumps([t.as_dict() for t in type_list])
@@ -177,7 +187,7 @@ class ColumnType:
177
187
  if type(self) != type(other):
178
188
  return False
179
189
  for member_var in vars(self).keys():
180
- if member_var == 'nullable':
190
+ if member_var == '_nullable':
181
191
  continue
182
192
  if getattr(self, member_var) != getattr(other, member_var):
183
193
  return False
@@ -250,7 +260,7 @@ class ColumnType:
250
260
  # We treat it as the underlying type but with nullable=True.
251
261
  underlying = cls.from_python_type(union_args[0])
252
262
  if underlying is not None:
253
- underlying.nullable = True
263
+ underlying._nullable = True
254
264
  return underlying
255
265
  else:
256
266
  # Discard type parameters to ensure that parameterized types such as `list[T]`
pixeltable/utils/coco.py CHANGED
@@ -134,3 +134,97 @@ def write_coco_dataset(df: 'pixeltable.DataFrame', dest_path: Path) -> Path:
134
134
  json.dump(result, f)
135
135
  return output_path
136
136
 
137
+
138
+ COCO_2017_CATEGORIES = {
139
+ 0: 'N/A',
140
+ 1: 'person',
141
+ 2: 'bicycle',
142
+ 3: 'car',
143
+ 4: 'motorcycle',
144
+ 5: 'airplane',
145
+ 6: 'bus',
146
+ 7: 'train',
147
+ 8: 'truck',
148
+ 9: 'boat',
149
+ 10: 'traffic light',
150
+ 11: 'fire hydrant',
151
+ 12: 'N/A',
152
+ 13: 'stop sign',
153
+ 14: 'parking meter',
154
+ 15: 'bench',
155
+ 16: 'bird',
156
+ 17: 'cat',
157
+ 18: 'dog',
158
+ 19: 'horse',
159
+ 20: 'sheep',
160
+ 21: 'cow',
161
+ 22: 'elephant',
162
+ 23: 'bear',
163
+ 24: 'zebra',
164
+ 25: 'giraffe',
165
+ 26: 'N/A',
166
+ 27: 'backpack',
167
+ 28: 'umbrella',
168
+ 29: 'N/A',
169
+ 30: 'N/A',
170
+ 31: 'handbag',
171
+ 32: 'tie',
172
+ 33: 'suitcase',
173
+ 34: 'frisbee',
174
+ 35: 'skis',
175
+ 36: 'snowboard',
176
+ 37: 'sports ball',
177
+ 38: 'kite',
178
+ 39: 'baseball bat',
179
+ 40: 'baseball glove',
180
+ 41: 'skateboard',
181
+ 42: 'surfboard',
182
+ 43: 'tennis racket',
183
+ 44: 'bottle',
184
+ 45: 'N/A',
185
+ 46: 'wine glass',
186
+ 47: 'cup',
187
+ 48: 'fork',
188
+ 49: 'knife',
189
+ 50: 'spoon',
190
+ 51: 'bowl',
191
+ 52: 'banana',
192
+ 53: 'apple',
193
+ 54: 'sandwich',
194
+ 55: 'orange',
195
+ 56: 'broccoli',
196
+ 57: 'carrot',
197
+ 58: 'hot dog',
198
+ 59: 'pizza',
199
+ 60: 'donut',
200
+ 61: 'cake',
201
+ 62: 'chair',
202
+ 63: 'couch',
203
+ 64: 'potted plant',
204
+ 65: 'bed',
205
+ 66: 'N/A',
206
+ 67: 'dining table',
207
+ 68: 'N/A',
208
+ 69: 'N/A',
209
+ 70: 'toilet',
210
+ 71: 'N/A',
211
+ 72: 'tv',
212
+ 73: 'laptop',
213
+ 74: 'mouse',
214
+ 75: 'remote',
215
+ 76: 'keyboard',
216
+ 77: 'cell phone',
217
+ 78: 'microwave',
218
+ 79: 'oven',
219
+ 80: 'toaster',
220
+ 81: 'sink',
221
+ 82: 'refrigerator',
222
+ 83: 'N/A',
223
+ 84: 'book',
224
+ 85: 'clock',
225
+ 86: 'vase',
226
+ 87: 'scissors',
227
+ 88: 'teddy bear',
228
+ 89: 'hair drier',
229
+ 90: 'toothbrush'
230
+ }
@@ -0,0 +1,137 @@
1
+ Metadata-Version: 2.1
2
+ Name: pixeltable
3
+ Version: 0.2.8
4
+ Summary: Pixeltable: The Multimodal AI Data Plane
5
+ Author: Marcel Kornacker
6
+ Author-email: marcelk@gmail.com
7
+ Requires-Python: >=3.9,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Dist: av (>=10.0.0)
14
+ Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
15
+ Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
16
+ Requires-Dist: ftfy (>=6.2.0,<7.0.0)
17
+ Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
18
+ Requires-Dist: jmespath (>=1.0.1,<2.0.0)
19
+ Requires-Dist: mistune (>=3.0.2,<4.0.0)
20
+ Requires-Dist: more-itertools (>=10.2,<11.0)
21
+ Requires-Dist: numpy (>=1.25)
22
+ Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
23
+ Requires-Dist: pandas (>=2.0,<3.0)
24
+ Requires-Dist: pgserver (==0.1.4)
25
+ Requires-Dist: pgvector (>=0.2.1,<0.3.0)
26
+ Requires-Dist: pillow (>=9.3.0)
27
+ Requires-Dist: psutil (>=5.9.5,<6.0.0)
28
+ Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
29
+ Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
30
+ Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
31
+ Requires-Dist: requests (>=2.31.0,<3.0.0)
32
+ Requires-Dist: setuptools (==69.1.1)
33
+ Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
34
+ Requires-Dist: tenacity (>=8.2,<9.0)
35
+ Requires-Dist: tqdm (>=4.64)
36
+ Description-Content-Type: text/markdown
37
+
38
+ <div align="center">
39
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
40
+
41
+ # Unifying Data, Models, and Orchestration for AI Products
42
+
43
+ [![License](https://img.shields.io/badge/License-Apache%202.0-darkblue.svg)](https://opensource.org/licenses/Apache-2.0)
44
+ ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pixeltable?logo=python&logoColor=white)
45
+ [![Platform Support](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-8A2BE2)]()
46
+ [![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions)
47
+ [![PyPI Package](https://img.shields.io/pypi/v/pixeltable?color=darkorange)](https://pypi.org/project/pixeltable/)
48
+
49
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/master/docs/release/tutorials)
50
+ </div>
51
+
52
+ Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
53
+
54
+ ## What problems does Pixeltable solve?
55
+
56
+ Today’s solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployment is cumbersome. With Pixeltable you can store, transform, index, and iterate on your data within the same table interface, whether it's text, images, embeddings, or even video. Built-in lineage and versioning ensure transparency and reproducibility, while the development-to-production mirror streamlines deployment.
57
+
58
+ ## 💾 Installation
59
+
60
+ ```python
61
+ %pip install pixeltable
62
+ ```
63
+
64
+ To verify that it's working:
65
+
66
+ ```python
67
+ import pixeltable as pxt
68
+ pxt.init()
69
+ ```
70
+ > [!NOTE]
71
+ > Check out the [Pixeltable Basics](https://pixeltable.readme.io/docs/pixeltable-basics) tutorial for a tour of its most important features.
72
+
73
+ ## 💡 Get Started
74
+ Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations and AI operations.
75
+
76
+ | Topic | Notebook | API |
77
+ |:--------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------:|
78
+ | Get Started | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/pixeltable/) |
79
+ | User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
80
+ | Comparing Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#frame-extraction-for-video-data) |
81
+ | Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
82
+ | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#inserting-data-into-a-table) |
83
+
84
+ ## ❓ FAQ
85
+
86
+ ### What does Pixeltable provide me with? Pixeltable provides:
87
+
88
+ - Data storage and versioning
89
+ - Combined Data and Model Lineage
90
+ - Indexing (e.g. embedding vectors) and Data Retrieval
91
+ - Orchestration of multimodal workloads
92
+ - Incremental updates
93
+ - Code is automatically production-ready
94
+
95
+ ### Why should you use Pixeltable?
96
+
97
+ - **It gives you transparency and reproducibility**
98
+ - All generated data is automatically recorded and versioned
99
+ - You will never need to re-run a workload because you lost track of the input data
100
+ - **It saves you money**
101
+ - All data changes are automatically incremental
102
+ - You never need to re-run pipelines from scratch because you’re adding data
103
+ - **It integrates with any existing Python code or libraries**
104
+ - Bring your ever-changing code and workloads
105
+ - You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
106
+
107
+ ### What is Pixeltable not providing?
108
+
109
+ - Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.
110
+ - We do not aim to replace your existing AI toolkit, but rather enhance it by streamlining the underlying data infrastructure and orchestration.
111
+
112
+ > [!TIP]
113
+ > Check out the [Integrations](https://pixeltable.readme.io/docs/working-with-openai) section, and feel free to submit a request for additional ones.
114
+
115
+ ## 📙 Example of Use Cases
116
+
117
+ - **Interact with video data at the frame level** without having to think about frame extraction, intermediate file storage, or storage space explosion.
118
+ - **Augment your data incrementally and interactively with built-in functions and UDFs**, such as image transformations, model inference, and visualizations, without having to think about data pipelines, incremental updates, or capturing function output.
119
+ - **Interact with all the data relevant to your AI application** (video, images, documents, audio, structured data, JSON) through a simple dataframe-style API directly in Python. This includes:
120
+ - similarity search on embeddings, supported by high-dimensional vector indexing;
121
+ - path expressions and transformations on JSON data;
122
+ - PIL and OpenCV image operations;
123
+ - assembling frames into videos.
124
+ - **Perform keyword and image similarity search at the video frame level** without having to worry about frame storage.
125
+ - **Access all Pixeltable-resident data directly as a PyTorch dataset** in your training scripts.
126
+ - **Understand the compute and storage costs of your data at the granularity** of individual augmentations and get cost projections before adding new data and new augmentations.
127
+ - **Rely on Pixeltable's automatic versioning and snapshot functionality** to protect against regressions and to ensure reproducibility.
128
+
129
+ ## 🐛 Contributions & Feedback
130
+
131
+ Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
132
+ </br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
133
+
134
+ ## :classical_building: License
135
+
136
+ This library is licensed under the Apache 2.0 License.
137
+