pixeltable 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (56) hide show
  1. pixeltable/__init__.py +3 -1
  2. pixeltable/__version__.py +2 -2
  3. pixeltable/catalog/column.py +14 -2
  4. pixeltable/catalog/insertable_table.py +32 -17
  5. pixeltable/catalog/table.py +194 -12
  6. pixeltable/catalog/table_version.py +270 -110
  7. pixeltable/catalog/table_version_path.py +6 -1
  8. pixeltable/datatransfer/__init__.py +1 -0
  9. pixeltable/datatransfer/label_studio.py +526 -0
  10. pixeltable/datatransfer/remote.py +113 -0
  11. pixeltable/env.py +156 -73
  12. pixeltable/exprs/column_ref.py +2 -2
  13. pixeltable/exprs/comparison.py +39 -1
  14. pixeltable/exprs/data_row.py +7 -0
  15. pixeltable/exprs/expr.py +11 -12
  16. pixeltable/exprs/function_call.py +0 -3
  17. pixeltable/exprs/globals.py +14 -2
  18. pixeltable/exprs/similarity_expr.py +5 -3
  19. pixeltable/ext/functions/whisperx.py +30 -0
  20. pixeltable/ext/functions/yolox.py +16 -0
  21. pixeltable/func/aggregate_function.py +2 -2
  22. pixeltable/func/expr_template_function.py +3 -1
  23. pixeltable/func/udf.py +2 -2
  24. pixeltable/functions/fireworks.py +9 -4
  25. pixeltable/functions/huggingface.py +25 -1
  26. pixeltable/functions/openai.py +15 -10
  27. pixeltable/functions/together.py +11 -6
  28. pixeltable/functions/util.py +0 -43
  29. pixeltable/functions/video.py +46 -8
  30. pixeltable/globals.py +20 -2
  31. pixeltable/index/__init__.py +1 -0
  32. pixeltable/index/base.py +6 -1
  33. pixeltable/index/btree.py +54 -0
  34. pixeltable/index/embedding_index.py +4 -1
  35. pixeltable/io/__init__.py +1 -0
  36. pixeltable/io/globals.py +59 -0
  37. pixeltable/iterators/base.py +4 -4
  38. pixeltable/iterators/document.py +26 -15
  39. pixeltable/iterators/video.py +9 -1
  40. pixeltable/metadata/__init__.py +2 -2
  41. pixeltable/metadata/converters/convert_14.py +13 -0
  42. pixeltable/metadata/converters/convert_15.py +29 -0
  43. pixeltable/metadata/converters/util.py +63 -0
  44. pixeltable/metadata/schema.py +12 -6
  45. pixeltable/plan.py +9 -5
  46. pixeltable/store.py +14 -21
  47. pixeltable/tool/create_test_db_dump.py +16 -0
  48. pixeltable/type_system.py +14 -4
  49. pixeltable/utils/coco.py +94 -0
  50. pixeltable-0.2.7.dist-info/METADATA +137 -0
  51. {pixeltable-0.2.6.dist-info → pixeltable-0.2.7.dist-info}/RECORD +53 -46
  52. pixeltable/func/nos_function.py +0 -202
  53. pixeltable/utils/clip.py +0 -18
  54. pixeltable-0.2.6.dist-info/METADATA +0 -131
  55. {pixeltable-0.2.6.dist-info → pixeltable-0.2.7.dist-info}/LICENSE +0 -0
  56. {pixeltable-0.2.6.dist-info → pixeltable-0.2.7.dist-info}/WHEEL +0 -0
@@ -6,14 +6,22 @@ from typing import Dict, Any, List, Tuple
6
6
  import PIL.Image
7
7
  import cv2
8
8
 
9
- from pixeltable import exprs
10
9
  from pixeltable.exceptions import Error
11
10
  from pixeltable.type_system import ColumnType, VideoType, ImageType, IntType, FloatType
12
11
  from .base import ComponentIterator
13
12
 
14
13
  _logger = logging.getLogger('pixeltable')
15
14
 
15
+
16
16
  class FrameIterator(ComponentIterator):
17
+ """Iterator over frames of a video.
18
+
19
+ Args:
20
+ video: URL or file of the video to use for frame extraction
21
+ fps: number of frames to extract per second of video. This may be a fractional value, such as 0.5.
22
+ If set to 0.0, then the native framerate of the video will be used (all frames will be extracted).
23
+ Default: 0.0
24
+ """
17
25
  def __init__(self, video: str, *, fps: float = 0.0):
18
26
  video_path = Path(video)
19
27
  assert video_path.exists() and video_path.is_file()
@@ -10,11 +10,11 @@ import sqlalchemy.orm as orm
10
10
  from .schema import SystemInfo, SystemInfoMd
11
11
 
12
12
  # current version of the metadata; this is incremented whenever the metadata schema changes
13
- VERSION = 14
13
+ VERSION = 16
14
14
 
15
15
 
16
16
  def create_system_info(engine: sql.engine.Engine) -> None:
17
- """Create the systemmetadata record"""
17
+ """Create the system metadata record"""
18
18
  system_md = SystemInfoMd(schema_version=VERSION)
19
19
  record = SystemInfo(md=dataclasses.asdict(system_md))
20
20
  with orm.Session(engine, future=True) as session:
@@ -0,0 +1,13 @@
1
+ import sqlalchemy as sql
2
+
3
+ from pixeltable.metadata.schema import Table
4
+ from pixeltable.metadata import register_converter
5
+
6
+
7
+ def convert_14(engine: sql.engine.Engine) -> None:
8
+ default_remotes = {'remotes': []}
9
+ with engine.begin() as conn:
10
+ conn.execute(sql.update(Table).where(Table.md['remotes'] == None).values(md=Table.md.concat(default_remotes)))
11
+
12
+
13
+ register_converter(14, convert_14)
@@ -0,0 +1,29 @@
1
+ import uuid
2
+
3
+ import sqlalchemy as sql
4
+
5
+ from pixeltable.metadata import register_converter
6
+ from pixeltable.metadata.converters.util import convert_table_md
7
+
8
+
9
+ def convert_15(engine: sql.engine.Engine) -> None:
10
+ convert_table_md(engine, column_md_updater=update_column_md, remote_md_updater=update_remote_md)
11
+
12
+
13
+ def update_column_md(column_md: dict) -> None:
14
+ column_md['proxy_base'] = None
15
+
16
+
17
+ def update_remote_md(remote_md: dict) -> None:
18
+ remote_md['class'] = f'{remote_md["module"]}.{remote_md["class"]}'
19
+ del remote_md['module']
20
+ if remote_md['class'] == 'pixeltable.datatransfer.remote.MockRemote':
21
+ remote_md['remote_md']['name'] = f'remote_{uuid.uuid4()}'
22
+ elif remote_md['class'] == 'pixeltable.datatransfer.label_studio.LabelStudioProject':
23
+ # 'post' is the media_import_method for legacy LabelStudioProject remotes
24
+ remote_md['remote_md']['media_import_method'] = 'post'
25
+ else:
26
+ assert False, remote_md['class']
27
+
28
+
29
+ register_converter(15, convert_15)
@@ -0,0 +1,63 @@
1
+ import copy
2
+ import logging
3
+ from typing import Any, Callable, Optional
4
+
5
+ import sqlalchemy as sql
6
+
7
+ from pixeltable.metadata.schema import Table
8
+
9
+ __logger = logging.getLogger('pixeltable')
10
+
11
+
12
+ def convert_table_md(
13
+ engine: sql.engine.Engine,
14
+ column_md_updater: Optional[Callable[[dict], None]] = None,
15
+ remote_md_updater: Optional[Callable[[dict], None]] = None,
16
+ substitution_fn: Optional[Callable[[Any, Any], Optional[tuple[Any, Any]]]] = None
17
+ ) -> None:
18
+ with engine.begin() as conn:
19
+ for row in conn.execute(sql.select(Table)):
20
+ id = row[0]
21
+ table_md = row[2]
22
+ assert isinstance(table_md, dict)
23
+ updated_table_md = copy.deepcopy(table_md)
24
+ if column_md_updater is not None:
25
+ __update_column_md(updated_table_md, column_md_updater)
26
+ if remote_md_updater is not None:
27
+ __update_remote_md(updated_table_md, remote_md_updater)
28
+ if substitution_fn is not None:
29
+ updated_table_md = __substitute_md_rec(updated_table_md, substitution_fn)
30
+ if updated_table_md != table_md:
31
+ __logger.info(f'Updating schema for table: {id}')
32
+ conn.execute(sql.update(Table).where(Table.id == id).values(md=updated_table_md))
33
+
34
+
35
+ def __update_column_md(table_md: dict, column_md_updater: Callable[[dict], None]) -> None:
36
+ columns_md = table_md['column_md']
37
+ assert isinstance(columns_md, dict)
38
+ for column_md in columns_md.values():
39
+ column_md_updater(column_md)
40
+
41
+
42
+ def __update_remote_md(table_md: dict, remote_md_updater: Callable[[dict], None]) -> None:
43
+ remotes_md = table_md['remotes']
44
+ assert isinstance(remotes_md, list)
45
+ for remote_md in remotes_md:
46
+ remote_md_updater(remote_md)
47
+
48
+
49
+ def __substitute_md_rec(md: Any, substitution_fn: Callable[[Any, Any], Optional[tuple[Any, Any]]]) -> Any:
50
+ if isinstance(md, dict):
51
+ updated_md = {}
52
+ for k, v in md.items():
53
+ substitute = substitution_fn(k, v)
54
+ if substitute is not None:
55
+ updated_k, updated_v = substitute
56
+ updated_md[updated_k] = updated_v
57
+ else:
58
+ updated_md[k] = __substitute_md_rec(v, substitution_fn)
59
+ return updated_md
60
+ elif isinstance(md, list):
61
+ return [__substitute_md_rec(v, substitution_fn) for v in md]
62
+ else:
63
+ return md
@@ -1,12 +1,11 @@
1
- from typing import Optional, List, get_type_hints, Type, Any, TypeVar, Tuple, Union
2
- import platform
3
- import uuid
4
1
  import dataclasses
2
+ import uuid
3
+ from typing import Optional, List, get_type_hints, Type, Any, TypeVar, Tuple, Union
5
4
 
6
5
  import sqlalchemy as sql
7
- from sqlalchemy import Integer, String, Boolean, BigInteger, LargeBinary
6
+ from sqlalchemy import ForeignKey
7
+ from sqlalchemy import Integer, BigInteger, LargeBinary
8
8
  from sqlalchemy.dialects.postgresql import UUID, JSONB
9
- from sqlalchemy import ForeignKey, UniqueConstraint, ForeignKeyConstraint
10
9
  from sqlalchemy.orm import declarative_base
11
10
 
12
11
  Base = declarative_base()
@@ -93,6 +92,9 @@ class ColumnMd:
93
92
  # if True, the column is present in the stored table
94
93
  stored: Optional[bool]
95
94
 
95
+ # if specified, the column is a stored proxy of another column
96
+ proxy_base: Optional[int]
97
+
96
98
 
97
99
  @dataclasses.dataclass
98
100
  class IndexMd:
@@ -143,6 +145,10 @@ class TableMd:
143
145
  # - every row is assigned a unique and immutable rowid on insertion
144
146
  next_row_id: int
145
147
 
148
+ # Metadata format for remotes:
149
+ # {'class': 'pixeltable.datatransfer.LabelStudioProject', 'md': {'project_id': 3}}
150
+ remotes: list[dict[str, Any]]
151
+
146
152
  column_md: dict[int, ColumnMd] # col_id -> ColumnMd
147
153
  index_md: dict[int, IndexMd] # index_id -> IndexMd
148
154
  view_md: Optional[ViewMd]
@@ -160,7 +166,7 @@ class Table(Base):
160
166
 
161
167
  MAX_VERSION = 9223372036854775807 # 2^63 - 1
162
168
 
163
- id = sql.Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, nullable=False)
169
+ id = sql.Column(UUID(as_uuid=True), primary_key=True, nullable=False)
164
170
  dir_id = sql.Column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=False)
165
171
  md = sql.Column(JSONB, nullable=False) # TableMd
166
172
 
pixeltable/plan.py CHANGED
@@ -251,7 +251,7 @@ class Planner:
251
251
  Returns:
252
252
  - root node of the plan
253
253
  - list of qualified column names that are getting updated
254
- - list of columns that are being recomputed
254
+ - list of user-visible columns that are being recomputed
255
255
  """
256
256
  # retrieve all stored cols and all target exprs
257
257
  assert isinstance(tbl, catalog.TableVersionPath)
@@ -260,7 +260,10 @@ class Planner:
260
260
  if len(recompute_targets) > 0:
261
261
  recomputed_cols = recompute_targets.copy()
262
262
  else:
263
- recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else {}
263
+ recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else set()
264
+ # regardless of cascade, we need to update all indices on any updated column
265
+ idx_val_cols = target.get_idx_val_columns(updated_cols)
266
+ recomputed_cols.update(idx_val_cols)
264
267
  # we only need to recompute stored columns (unstored ones are substituted away)
265
268
  recomputed_cols = {c for c in recomputed_cols if c.is_stored}
266
269
  recomputed_base_cols = {col for col in recomputed_cols if col.tbl == target}
@@ -273,8 +276,8 @@ class Planner:
273
276
  recomputed_exprs = \
274
277
  [c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols]
275
278
  # recomputed cols reference the new values of the updated cols
276
- for col, e in update_targets.items():
277
- exprs.Expr.list_substitute(recomputed_exprs, exprs.ColumnRef(col), e)
279
+ spec = {exprs.ColumnRef(col): e for col, e in update_targets.items()}
280
+ exprs.Expr.list_substitute(recomputed_exprs, spec)
278
281
  select_list.extend(recomputed_exprs)
279
282
 
280
283
  # we need to retrieve the PK columns of the existing rows
@@ -282,7 +285,8 @@ class Planner:
282
285
  all_base_cols = copied_cols + updated_cols + list(recomputed_base_cols) # same order as select_list
283
286
  # update row builder with column information
284
287
  [plan.row_builder.add_table_column(col, select_list[i].slot_idx) for i, col in enumerate(all_base_cols)]
285
- return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + list(recomputed_cols)], list(recomputed_cols)
288
+ recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
289
+ return plan, [f'{c.tbl.name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
286
290
 
287
291
  @classmethod
288
292
  def create_view_update_plan(
pixeltable/store.py CHANGED
@@ -66,7 +66,6 @@ class StoreBase:
66
66
  """Create self.sa_tbl from self.tbl_version."""
67
67
  system_cols = self._create_system_columns()
68
68
  all_cols = system_cols.copy()
69
- idxs: List[sql.Index] = []
70
69
  for col in [c for c in self.tbl_version.cols if c.is_stored]:
71
70
  # re-create sql.Column for each column, regardless of whether it already has sa_col set: it was bound
72
71
  # to the last sql.Table version we created and cannot be reused
@@ -76,26 +75,18 @@ class StoreBase:
76
75
  all_cols.append(col.sa_errormsg_col)
77
76
  all_cols.append(col.sa_errortype_col)
78
77
 
79
- # we create an index for:
80
- # - scalar columns (except for strings, because long strings can't be used for B-tree indices)
81
- # - non-computed video and image columns (they will contain external paths/urls that users might want to
82
- # filter on)
83
- if (col.col_type.is_scalar_type() and not col.col_type.is_string_type()) \
84
- or (col.col_type.is_media_type() and not col.is_computed):
85
- # index names need to be unique within the Postgres instance
86
- idx_name = f'idx_{col.id}_{self.tbl_version.id.hex}'
87
- idxs.append(sql.Index(idx_name, col.sa_col))
88
-
89
78
  if self.sa_tbl is not None:
90
79
  # if we're called in response to a schema change, we need to remove the old table first
91
80
  self.sa_md.remove(self.sa_tbl)
92
81
 
82
+ idxs: List[sql.Index] = []
93
83
  # index for all system columns:
94
84
  # - base x view joins can be executed as merge joins
95
85
  # - speeds up ORDER BY rowid DESC
96
86
  # - allows filtering for a particular table version in index scan
97
87
  idx_name = f'sys_cols_idx_{self.tbl_version.id.hex}'
98
88
  idxs.append(sql.Index(idx_name, *system_cols))
89
+
99
90
  # v_min/v_max indices: speeds up base table scans needed to propagate a base table insert or delete
100
91
  idx_name = f'vmin_idx_{self.tbl_version.id.hex}'
101
92
  idxs.append(sql.Index(idx_name, self.v_min_col, postgresql_using='brin'))
@@ -264,7 +255,8 @@ class StoreBase:
264
255
  return num_excs
265
256
 
266
257
  def insert_rows(
267
- self, exec_plan: ExecNode, conn: sql.engine.Connection, v_min: Optional[int] = None
258
+ self, exec_plan: ExecNode, conn: sql.engine.Connection, v_min: Optional[int] = None,
259
+ show_progress: bool = True
268
260
  ) -> Tuple[int, int, Set[int]]:
269
261
  """Insert rows into the store table and update the catalog table's md
270
262
  Returns:
@@ -293,15 +285,16 @@ class StoreBase:
293
285
  self._create_table_row(row, row_builder, media_cols, cols_with_excs, v_min=v_min)
294
286
  num_excs += num_row_exc
295
287
  table_rows.append(table_row)
296
- if progress_bar is None:
297
- warnings.simplefilter("ignore", category=TqdmWarning)
298
- progress_bar = tqdm(
299
- desc=f'Inserting rows into `{self.tbl_version.name}`',
300
- unit=' rows',
301
- ncols=100,
302
- file=sys.stdout
303
- )
304
- progress_bar.update(1)
288
+ if show_progress:
289
+ if progress_bar is None:
290
+ warnings.simplefilter("ignore", category=TqdmWarning)
291
+ progress_bar = tqdm(
292
+ desc=f'Inserting rows into `{self.tbl_version.name}`',
293
+ unit=' rows',
294
+ ncols=100,
295
+ file=sys.stdout
296
+ )
297
+ progress_bar.update(1)
305
298
  self._move_tmp_media_files(table_rows, media_cols, v_min)
306
299
  conn.execute(sql.insert(self.sa_tbl), table_rows)
307
300
  if progress_bar is not None:
@@ -30,6 +30,8 @@ class Dumper:
30
30
  os.environ['PIXELTABLE_DB'] = db_name
31
31
  os.environ['PIXELTABLE_PGDATA'] = str(shared_home / 'pgdata')
32
32
 
33
+ Env._init_env(reinit_db=True)
34
+
33
35
  Env.get().configure_logging(level=logging.DEBUG, to_stdout=True)
34
36
 
35
37
  def dump_db(self) -> None:
@@ -162,6 +164,20 @@ class Dumper:
162
164
  # astype
163
165
  v['astype'] = t.c1.astype(pxt.FloatType())
164
166
 
167
+ # Add remotes
168
+ from pixeltable.datatransfer.remote import MockRemote
169
+ v.link(
170
+ MockRemote('remote', {'int_field': pxt.IntType()}, {'str_field': pxt.StringType()}),
171
+ col_mapping={'test_udf': 'int_field', 'c1': 'str_field'}
172
+ )
173
+ # We're just trying to test metadata here, so reach "under the covers" and link a fake
174
+ # Label Studio project without validation (so we don't need a real Label Studio server)
175
+ from pixeltable.datatransfer.label_studio import LabelStudioProject
176
+ v.tbl_version_path.tbl_version.link(
177
+ LabelStudioProject(4171780, media_import_method='file'),
178
+ col_mapping={'str_format': 'str_format'}
179
+ )
180
+
165
181
 
166
182
  @pxt.udf(_force_stored=True)
167
183
  def test_udf_stored(n: int) -> int:
pixeltable/type_system.py CHANGED
@@ -7,7 +7,7 @@ import json
7
7
  import typing
8
8
  import urllib.parse
9
9
  import urllib.request
10
- from copy import copy
10
+ from copy import deepcopy
11
11
  from pathlib import Path
12
12
  from typing import Any, Optional, Tuple, Dict, Callable, List, Union, Sequence, Mapping
13
13
 
@@ -82,7 +82,11 @@ class ColumnType:
82
82
 
83
83
  def __init__(self, t: Type, nullable: bool = False):
84
84
  self._type = t
85
- self.nullable = nullable
85
+ self._nullable = nullable
86
+
87
+ @property
88
+ def nullable(self) -> bool:
89
+ return self._nullable
86
90
 
87
91
  @property
88
92
  def type_enum(self) -> Type:
@@ -91,6 +95,12 @@ class ColumnType:
91
95
  def serialize(self) -> str:
92
96
  return json.dumps(self.as_dict())
93
97
 
98
+ def copy(self, nullable: Optional[bool] = None) -> ColumnType:
99
+ result = deepcopy(self)
100
+ if nullable is not None:
101
+ result._nullable = nullable
102
+ return result
103
+
94
104
  @classmethod
95
105
  def serialize_list(cls, type_list: List[ColumnType]) -> str:
96
106
  return json.dumps([t.as_dict() for t in type_list])
@@ -177,7 +187,7 @@ class ColumnType:
177
187
  if type(self) != type(other):
178
188
  return False
179
189
  for member_var in vars(self).keys():
180
- if member_var == 'nullable':
190
+ if member_var == '_nullable':
181
191
  continue
182
192
  if getattr(self, member_var) != getattr(other, member_var):
183
193
  return False
@@ -250,7 +260,7 @@ class ColumnType:
250
260
  # We treat it as the underlying type but with nullable=True.
251
261
  underlying = cls.from_python_type(union_args[0])
252
262
  if underlying is not None:
253
- underlying.nullable = True
263
+ underlying._nullable = True
254
264
  return underlying
255
265
  else:
256
266
  # Discard type parameters to ensure that parameterized types such as `list[T]`
pixeltable/utils/coco.py CHANGED
@@ -134,3 +134,97 @@ def write_coco_dataset(df: 'pixeltable.DataFrame', dest_path: Path) -> Path:
134
134
  json.dump(result, f)
135
135
  return output_path
136
136
 
137
+
138
+ COCO_2017_CATEGORIES = {
139
+ 0: 'N/A',
140
+ 1: 'person',
141
+ 2: 'bicycle',
142
+ 3: 'car',
143
+ 4: 'motorcycle',
144
+ 5: 'airplane',
145
+ 6: 'bus',
146
+ 7: 'train',
147
+ 8: 'truck',
148
+ 9: 'boat',
149
+ 10: 'traffic light',
150
+ 11: 'fire hydrant',
151
+ 12: 'N/A',
152
+ 13: 'stop sign',
153
+ 14: 'parking meter',
154
+ 15: 'bench',
155
+ 16: 'bird',
156
+ 17: 'cat',
157
+ 18: 'dog',
158
+ 19: 'horse',
159
+ 20: 'sheep',
160
+ 21: 'cow',
161
+ 22: 'elephant',
162
+ 23: 'bear',
163
+ 24: 'zebra',
164
+ 25: 'giraffe',
165
+ 26: 'N/A',
166
+ 27: 'backpack',
167
+ 28: 'umbrella',
168
+ 29: 'N/A',
169
+ 30: 'N/A',
170
+ 31: 'handbag',
171
+ 32: 'tie',
172
+ 33: 'suitcase',
173
+ 34: 'frisbee',
174
+ 35: 'skis',
175
+ 36: 'snowboard',
176
+ 37: 'sports ball',
177
+ 38: 'kite',
178
+ 39: 'baseball bat',
179
+ 40: 'baseball glove',
180
+ 41: 'skateboard',
181
+ 42: 'surfboard',
182
+ 43: 'tennis racket',
183
+ 44: 'bottle',
184
+ 45: 'N/A',
185
+ 46: 'wine glass',
186
+ 47: 'cup',
187
+ 48: 'fork',
188
+ 49: 'knife',
189
+ 50: 'spoon',
190
+ 51: 'bowl',
191
+ 52: 'banana',
192
+ 53: 'apple',
193
+ 54: 'sandwich',
194
+ 55: 'orange',
195
+ 56: 'broccoli',
196
+ 57: 'carrot',
197
+ 58: 'hot dog',
198
+ 59: 'pizza',
199
+ 60: 'donut',
200
+ 61: 'cake',
201
+ 62: 'chair',
202
+ 63: 'couch',
203
+ 64: 'potted plant',
204
+ 65: 'bed',
205
+ 66: 'N/A',
206
+ 67: 'dining table',
207
+ 68: 'N/A',
208
+ 69: 'N/A',
209
+ 70: 'toilet',
210
+ 71: 'N/A',
211
+ 72: 'tv',
212
+ 73: 'laptop',
213
+ 74: 'mouse',
214
+ 75: 'remote',
215
+ 76: 'keyboard',
216
+ 77: 'cell phone',
217
+ 78: 'microwave',
218
+ 79: 'oven',
219
+ 80: 'toaster',
220
+ 81: 'sink',
221
+ 82: 'refrigerator',
222
+ 83: 'N/A',
223
+ 84: 'book',
224
+ 85: 'clock',
225
+ 86: 'vase',
226
+ 87: 'scissors',
227
+ 88: 'teddy bear',
228
+ 89: 'hair drier',
229
+ 90: 'toothbrush'
230
+ }
@@ -0,0 +1,137 @@
1
+ Metadata-Version: 2.1
2
+ Name: pixeltable
3
+ Version: 0.2.7
4
+ Summary: Pixeltable: The Multimodal AI Data Plane
5
+ Author: Marcel Kornacker
6
+ Author-email: marcelk@gmail.com
7
+ Requires-Python: >=3.9,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Dist: av (>=10.0.0)
14
+ Requires-Dist: beautifulsoup4 (>=4.0.0,<5.0.0)
15
+ Requires-Dist: cloudpickle (>=2.2.1,<3.0.0)
16
+ Requires-Dist: ftfy (>=6.2.0,<7.0.0)
17
+ Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
18
+ Requires-Dist: jmespath (>=1.0.1,<2.0.0)
19
+ Requires-Dist: mistune (>=3.0.2,<4.0.0)
20
+ Requires-Dist: more-itertools (>=10.2,<11.0)
21
+ Requires-Dist: numpy (>=1.25)
22
+ Requires-Dist: opencv-python-headless (>=4.7.0.68,<5.0.0.0)
23
+ Requires-Dist: pandas (>=2.0,<3.0)
24
+ Requires-Dist: pgserver (==0.1.3)
25
+ Requires-Dist: pgvector (>=0.2.1,<0.3.0)
26
+ Requires-Dist: pillow (>=9.3.0)
27
+ Requires-Dist: psutil (>=5.9.5,<6.0.0)
28
+ Requires-Dist: psycopg2-binary (>=2.9.5,<3.0.0)
29
+ Requires-Dist: pymupdf (>=1.24.1,<2.0.0)
30
+ Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
31
+ Requires-Dist: requests (>=2.31.0,<3.0.0)
32
+ Requires-Dist: setuptools (==69.1.1)
33
+ Requires-Dist: sqlalchemy[mypy] (>=2.0.23,<3.0.0)
34
+ Requires-Dist: tenacity (>=8.2,<9.0)
35
+ Requires-Dist: tqdm (>=4.64)
36
+ Description-Content-Type: text/markdown
37
+
38
+ <div align="center">
39
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/master/docs/release/pixeltable-banner.png" alt="Pixeltable" width="45%" />
40
+
41
+ # Unifying Data, Models, and Orchestration for AI Products
42
+
43
+ [![License](https://img.shields.io/badge/License-Apache%202.0-darkblue.svg)](https://opensource.org/licenses/Apache-2.0)
44
+ ![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pixeltable?logo=python&logoColor=white)
45
+ [![Platform Support](https://img.shields.io/badge/platform-Linux%20%7C%20macOS%20%7C%20Windows-8A2BE2)]()
46
+ [![pytest status](https://github.com/pixeltable/pixeltable/actions/workflows/pytest.yml/badge.svg)](https://github.com/pixeltable/pixeltable/actions)
47
+ [![PyPI Package](https://img.shields.io/pypi/v/pixeltable?color=darkorange)](https://pypi.org/project/pixeltable/)
48
+
49
+ [Installation](https://pixeltable.github.io/pixeltable/getting-started/) | [Documentation](https://pixeltable.readme.io/) | [API Reference](https://pixeltable.github.io/pixeltable/) | [Code Samples](https://pixeltable.readme.io/recipes) | [Examples](https://github.com/pixeltable/pixeltable/tree/master/docs/release/tutorials)
50
+ </div>
51
+
52
+ Pixeltable is a Python library that lets AI engineers and data scientists focus on exploration, modeling, and app development without dealing with the customary data plumbing.
53
+
54
+ ## What problems does Pixeltable solve?
55
+
56
+ Today’s solutions for AI app development require extensive custom coding and infrastructure plumbing. Tracking lineage and versions between and across data transformations, models, and deployment is cumbersome. With Pixeltable you can store, transform, index, and iterate on your data within the same table interface, whether it's text, images, embeddings, or even video. Built-in lineage and versioning ensure transparency and reproducibility, while the development-to-production mirror streamlines deployment.
57
+
58
+ ## 💾 Installation
59
+
60
+ ```python
61
+ %pip install pixeltable
62
+ ```
63
+
64
+ To verify that it's working:
65
+
66
+ ```python
67
+ import pixeltable as pxt
68
+ pxt.init()
69
+ ```
70
+ > [!NOTE]
71
+ > Check out the [Pixeltable Basics](https://pixeltable.readme.io/docs/pixeltable-basics) tutorial for a tour of its most important features.
72
+
73
+ ## 💡 Get Started
74
+ Learn how to create tables, populate them with data, and enhance them with built-in or user-defined transformations and AI operations.
75
+
76
+ | Topic | Notebook | API |
77
+ |:--------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------:|
78
+ | Get Started | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/tutorials/pixeltable-basics.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/pixeltable/) |
79
+ | User-Defined Functions (UDFs) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/udfs-in-pixeltable.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
80
+ | Comparing Object Detection Models | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/object-detection-in-videos.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#frame-extraction-for-video-data) |
81
+ | Experimenting with Chunking (RAG) | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/tutorials/rag-operations.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api/iterators/document-splitter/) |
82
+ | Working with External Files | <a target="_blank" href="https://colab.research.google.com/github/pixeltable/pixeltable/blob/master/docs/release/howto/working-with-external-files.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a> | [![API](https://img.shields.io/badge/api-reference-blue.svg)](https://pixeltable.github.io/pixeltable/api-cheat-sheet/#inserting-data-into-a-table) |
83
+
84
+ ## ❓ FAQ
85
+
86
+ ### What does Pixeltable provide me with? Pixeltable provides:
87
+
88
+ - Data storage and versioning
89
+ - Combined Data and Model Lineage
90
+ - Indexing (e.g. embedding vectors) and Data Retrieval
91
+ - Orchestration of multimodal workloads
92
+ - Incremental updates
93
+ - Code is automatically production-ready
94
+
95
+ ### Why should you use Pixeltable?
96
+
97
+ - **It gives you transparency and reproducibility**
98
+ - All generated data is automatically recorded and versioned
99
+ - You will never need to re-run a workload because you lost track of the input data
100
+ - **It saves you money**
101
+ - All data changes are automatically incremental
102
+ - You never need to re-run pipelines from scratch because you’re adding data
103
+ - **It integrates with any existing Python code or libraries**
104
+ - Bring your ever-changing code and workloads
105
+ - You choose the models, tools, and AI practices (e.g., your embedding model for a vector index); Pixeltable orchestrates the data
106
+
107
+ ### What is Pixeltable not providing?
108
+
109
+ - Pixeltable is not a low-code, prescriptive AI solution. We empower you to use the best frameworks and techniques for your specific needs.
110
+ - We do not aim to replace your existing AI toolkit, but rather enhance it by streamlining the underlying data infrastructure and orchestration.
111
+
112
+ > [!TIP]
113
+ > Check out the [Integrations](https://pixeltable.readme.io/docs/working-with-openai) section, and feel free to submit a request for additional ones.
114
+
115
+ ## 📙 Example of Use Cases
116
+
117
+ - **Interact with video data at the frame level** without having to think about frame extraction, intermediate file storage, or storage space explosion.
118
+ - **Augment your data incrementally and interactively with built-in functions and UDFs**, such as image transformations, model inference, and visualizations, without having to think about data pipelines, incremental updates, or capturing function output.
119
+ - **Interact with all the data relevant to your AI application** (video, images, documents, audio, structured data, JSON) through a simple dataframe-style API directly in Python. This includes:
120
+ - similarity search on embeddings, supported by high-dimensional vector indexing;
121
+ - path expressions and transformations on JSON data;
122
+ - PIL and OpenCV image operations;
123
+ - assembling frames into videos.
124
+ - **Perform keyword and image similarity search at the video frame level** without having to worry about frame storage.
125
+ - **Access all Pixeltable-resident data directly as a PyTorch dataset** in your training scripts.
126
+ - **Understand the compute and storage costs of your data at the granularity** of individual augmentations and get cost projections before adding new data and new augmentations.
127
+ - **Rely on Pixeltable's automatic versioning and snapshot functionality** to protect against regressions and to ensure reproducibility.
128
+
129
+ ## 🐛 Contributions & Feedback
130
+
131
+ Are you experiencing issues or bugs with Pixeltable? File an [Issue](https://github.com/pixeltable/pixeltable/issues).
132
+ </br>Do you want to contribute? Feel free to open a [PR](https://github.com/pixeltable/pixeltable/pulls).
133
+
134
+ ## :classical_building: License
135
+
136
+ This library is licensed under the Apache 2.0 License.
137
+