pixeltable 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (56) hide show
  1. pixeltable/catalog/column.py +25 -48
  2. pixeltable/catalog/insertable_table.py +7 -4
  3. pixeltable/catalog/table.py +163 -57
  4. pixeltable/catalog/table_version.py +416 -140
  5. pixeltable/catalog/table_version_path.py +2 -2
  6. pixeltable/client.py +0 -4
  7. pixeltable/dataframe.py +65 -21
  8. pixeltable/env.py +16 -1
  9. pixeltable/exec/cache_prefetch_node.py +1 -1
  10. pixeltable/exec/in_memory_data_node.py +11 -7
  11. pixeltable/exprs/comparison.py +3 -3
  12. pixeltable/exprs/data_row.py +5 -1
  13. pixeltable/exprs/literal.py +16 -4
  14. pixeltable/exprs/row_builder.py +8 -40
  15. pixeltable/ext/__init__.py +5 -0
  16. pixeltable/ext/functions/yolox.py +92 -0
  17. pixeltable/func/aggregate_function.py +15 -15
  18. pixeltable/func/expr_template_function.py +9 -1
  19. pixeltable/func/globals.py +24 -14
  20. pixeltable/func/signature.py +18 -12
  21. pixeltable/func/udf.py +7 -2
  22. pixeltable/functions/__init__.py +8 -8
  23. pixeltable/functions/eval.py +7 -8
  24. pixeltable/functions/huggingface.py +47 -19
  25. pixeltable/functions/openai.py +2 -2
  26. pixeltable/functions/util.py +11 -0
  27. pixeltable/index/__init__.py +2 -0
  28. pixeltable/index/base.py +49 -0
  29. pixeltable/index/embedding_index.py +95 -0
  30. pixeltable/metadata/schema.py +45 -22
  31. pixeltable/plan.py +15 -34
  32. pixeltable/store.py +38 -41
  33. pixeltable/tests/conftest.py +5 -11
  34. pixeltable/tests/ext/test_yolox.py +21 -0
  35. pixeltable/tests/functions/test_fireworks.py +1 -0
  36. pixeltable/tests/functions/test_huggingface.py +2 -2
  37. pixeltable/tests/functions/test_openai.py +15 -5
  38. pixeltable/tests/functions/test_together.py +1 -0
  39. pixeltable/tests/test_component_view.py +14 -5
  40. pixeltable/tests/test_dataframe.py +19 -18
  41. pixeltable/tests/test_exprs.py +99 -102
  42. pixeltable/tests/test_function.py +51 -43
  43. pixeltable/tests/test_index.py +138 -0
  44. pixeltable/tests/test_migration.py +2 -1
  45. pixeltable/tests/test_snapshot.py +24 -1
  46. pixeltable/tests/test_table.py +101 -25
  47. pixeltable/tests/test_types.py +30 -0
  48. pixeltable/tests/test_video.py +16 -16
  49. pixeltable/tests/test_view.py +5 -0
  50. pixeltable/tests/utils.py +43 -9
  51. pixeltable/tool/create_test_db_dump.py +16 -0
  52. pixeltable/type_system.py +37 -45
  53. {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/METADATA +5 -4
  54. {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/RECORD +56 -49
  55. {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/LICENSE +0 -0
  56. {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/WHEEL +0 -0
pixeltable/plan.py CHANGED
@@ -76,7 +76,8 @@ class Analyzer:
76
76
  f'order_by()'))
77
77
  self.similarity_clause = similarity_clauses[0]
78
78
  img_col = self.similarity_clause.img_col_ref.col
79
- if not img_col.is_indexed:
79
+ indexed_col_ids = {info.col.id for info in tbl.tbl_version.idxs_by_name.values()}
80
+ if img_col.id not in indexed_col_ids:
80
81
  raise excs.Error(f'nearest() not available for unindexed column {img_col.name}')
81
82
 
82
83
  # all exprs that are evaluated in Python; not executable
@@ -220,18 +221,11 @@ class Planner:
220
221
  ) -> exec.ExecNode:
221
222
  """Creates a plan for TableVersion.insert()"""
222
223
  assert not tbl.is_view()
223
- # things we need to materialize:
224
- # 1. stored_cols: all cols we need to store, incl computed cols (and indices)
224
+ # stored_cols: all cols we need to store, incl computed cols (and indices)
225
225
  stored_cols = [c for c in tbl.cols if c.is_stored]
226
226
  assert len(stored_cols) > 0
227
- # 2. values to insert into indices
228
- indexed_cols = [c for c in tbl.cols if c.is_indexed]
229
- index_info: List[Tuple[catalog.Column, func.Function]] = []
230
- if len(indexed_cols) > 0:
231
- from pixeltable.functions.nos.image_embedding import openai_clip
232
- index_info = [(c, openai_clip) for c in tbl.cols if c.is_indexed]
233
227
 
234
- row_builder = exprs.RowBuilder([], stored_cols, index_info, [])
228
+ row_builder = exprs.RowBuilder([], stored_cols, [])
235
229
 
236
230
  # create InMemoryDataNode for 'rows'
237
231
  stored_col_info = row_builder.output_slot_idxs()
@@ -260,7 +254,7 @@ class Planner:
260
254
  @classmethod
261
255
  def create_update_plan(
262
256
  cls, tbl: catalog.TableVersionPath,
263
- update_targets: List[Tuple[catalog.Column, exprs.Expr]],
257
+ update_targets: dict[catalog.Column, exprs.Expr],
264
258
  recompute_targets: List[catalog.Column],
265
259
  where_clause: Optional[exprs.Predicate], cascade: bool
266
260
  ) -> Tuple[exec.ExecNode, List[str], List[catalog.Column]]:
@@ -279,7 +273,7 @@ class Planner:
279
273
  # retrieve all stored cols and all target exprs
280
274
  assert isinstance(tbl, catalog.TableVersionPath)
281
275
  target = tbl.tbl_version # the one we need to update
282
- updated_cols = [col for col, _ in update_targets]
276
+ updated_cols = list(update_targets.keys())
283
277
  if len(recompute_targets) > 0:
284
278
  recomputed_cols = recompute_targets.copy()
285
279
  else:
@@ -291,12 +285,12 @@ class Planner:
291
285
  col for col in target.cols if col.is_stored and not col in updated_cols and not col in recomputed_base_cols
292
286
  ]
293
287
  select_list = [exprs.ColumnRef(col) for col in copied_cols]
294
- select_list.extend([expr for _, expr in update_targets])
288
+ select_list.extend(update_targets.values())
295
289
 
296
290
  recomputed_exprs = \
297
291
  [c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols]
298
292
  # recomputed cols reference the new values of the updated cols
299
- for col, e in update_targets:
293
+ for col, e in update_targets.items():
300
294
  exprs.Expr.list_substitute(recomputed_exprs, exprs.ColumnRef(col), e)
301
295
  select_list.extend(recomputed_exprs)
302
296
 
@@ -375,16 +369,10 @@ class Planner:
375
369
  # the store
376
370
  target = view.tbl_version # the one we need to populate
377
371
  stored_cols = [c for c in target.cols if c.is_stored and (c.is_computed or target.is_iterator_column(c))]
378
- # 2. index values
379
- indexed_cols = [c for c in target.cols if c.is_indexed]
380
- index_info: List[Tuple[catalog.Column, func.Function]] = []
381
- if len(indexed_cols) > 0:
382
- from pixeltable.functions.nos.image_embedding import openai_clip
383
- index_info = [(c, openai_clip) for c in target.cols if c.is_indexed]
384
- # 3. for component views: iterator args
372
+ # 2. for component views: iterator args
385
373
  iterator_args = [target.iterator_args] if target.iterator_args is not None else []
386
374
 
387
- row_builder = exprs.RowBuilder(iterator_args, stored_cols, index_info, [])
375
+ row_builder = exprs.RowBuilder(iterator_args, stored_cols, [])
388
376
 
389
377
  # execution plan:
390
378
  # 1. materialize exprs computed from the base that are needed for stored view columns
@@ -548,7 +536,7 @@ class Planner:
548
536
  analyzer = Analyzer(
549
537
  tbl, select_list, where_clause=where_clause, group_by_clause=group_by_clause,
550
538
  order_by_clause=order_by_clause)
551
- row_builder = exprs.RowBuilder(analyzer.all_exprs, [], [], analyzer.sql_exprs)
539
+ row_builder = exprs.RowBuilder(analyzer.all_exprs, [], analyzer.sql_exprs)
552
540
 
553
541
  analyzer.finalize(row_builder)
554
542
  # select_list: we need to materialize everything that's been collected
@@ -627,21 +615,15 @@ class Planner:
627
615
  @classmethod
628
616
  def create_add_column_plan(
629
617
  cls, tbl: catalog.TableVersionPath, col: catalog.Column
630
- ) -> Tuple[exec.ExecNode, Optional[int], Optional[int]]:
618
+ ) -> Tuple[exec.ExecNode, Optional[int]]:
631
619
  """Creates a plan for InsertableTable.add_column()
632
620
  Returns:
633
621
  plan: the plan to execute
634
- ctx: the context to use for the plan
635
622
  value_expr slot idx for the plan output (for computed cols)
636
- embedding slot idx for the plan output (for indexed image cols)
637
623
  """
638
624
  assert isinstance(tbl, catalog.TableVersionPath)
639
625
  index_info: List[Tuple[catalog.Column, func.Function]] = []
640
- if col.is_indexed:
641
- from pixeltable.functions.nos.image_embedding import openai_clip
642
- index_info = [(col, openai_clip)]
643
- row_builder = exprs.RowBuilder(
644
- output_exprs=[], columns=[col], indices=index_info, input_exprs=[])
626
+ row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[])
645
627
  analyzer = Analyzer(tbl, row_builder.default_eval_ctx.target_exprs)
646
628
  plan = cls._create_query_plan(tbl, row_builder=row_builder, analyzer=analyzer, with_pk=True)
647
629
  plan.ctx.batch_size = 16
@@ -651,6 +633,5 @@ class Planner:
651
633
  # we want to flush images
652
634
  if col.is_computed and col.is_stored and col.col_type.is_image_type():
653
635
  plan.set_stored_img_cols(row_builder.output_slot_idxs())
654
- value_expr_slot_idx: Optional[int] = row_builder.output_slot_idxs()[0].slot_idx if col.is_computed else None
655
- embedding_slot_idx: Optional[int] = row_builder.index_slot_idxs()[0].slot_idx if col.is_indexed else None
656
- return plan, value_expr_slot_idx, embedding_slot_idx
636
+ value_expr_slot_idx = row_builder.output_slot_idxs()[0].slot_idx if col.is_computed else None
637
+ return plan, value_expr_slot_idx
pixeltable/store.py CHANGED
@@ -38,7 +38,7 @@ class StoreBase:
38
38
  self.tbl_version = tbl_version
39
39
  self.sa_md = sql.MetaData()
40
40
  self.sa_tbl: Optional[sql.Table] = None
41
- self._create_sa_tbl()
41
+ self.create_sa_tbl()
42
42
 
43
43
  def pk_columns(self) -> List[sql.Column]:
44
44
  return self._pk_columns
@@ -62,7 +62,7 @@ class StoreBase:
62
62
  return [*rowid_cols, self.v_min_col, self.v_max_col]
63
63
 
64
64
 
65
- def _create_sa_tbl(self) -> None:
65
+ def create_sa_tbl(self) -> None:
66
66
  """Create self.sa_tbl from self.tbl_version."""
67
67
  system_cols = self._create_system_columns()
68
68
  all_cols = system_cols.copy()
@@ -76,9 +76,6 @@ class StoreBase:
76
76
  all_cols.append(col.sa_errormsg_col)
77
77
  all_cols.append(col.sa_errortype_col)
78
78
 
79
- if col.is_indexed:
80
- all_cols.append(col.sa_idx_col)
81
-
82
79
  # we create an index for:
83
80
  # - scalar columns (except for strings, because long strings can't be used for B-tree indices)
84
81
  # - non-computed video and image columns (they will contain external paths/urls that users might want to
@@ -145,8 +142,8 @@ class StoreBase:
145
142
  """Move tmp media files that we generated to a permanent location"""
146
143
  for c in media_cols:
147
144
  for table_row in table_rows:
148
- file_url = table_row[c.storage_name()]
149
- table_row[c.storage_name()] = self._move_tmp_media_file(file_url, c, v_min)
145
+ file_url = table_row[c.store_name()]
146
+ table_row[c.store_name()] = self._move_tmp_media_file(file_url, c, v_min)
150
147
 
151
148
  def _create_table_row(
152
149
  self, input_row: exprs.DataRow, row_builder: exprs.RowBuilder, media_cols: List[catalog.Column],
@@ -168,16 +165,19 @@ class StoreBase:
168
165
 
169
166
  return table_row, num_excs
170
167
 
171
- def count(self) -> None:
168
+ def count(self, conn: Optional[sql.engine.Connection] = None) -> int:
172
169
  """Return the number of rows visible in self.tbl_version"""
173
170
  stmt = sql.select(sql.func.count('*'))\
174
171
  .select_from(self.sa_tbl)\
175
172
  .where(self.v_min_col <= self.tbl_version.version)\
176
173
  .where(self.v_max_col > self.tbl_version.version)
177
- with env.Env.get().engine.begin() as conn:
174
+ if conn is None:
175
+ with env.Env.get().engine.connect() as conn:
176
+ result = conn.execute(stmt).scalar_one()
177
+ else:
178
178
  result = conn.execute(stmt).scalar_one()
179
- assert isinstance(result, int)
180
- return result
179
+ assert isinstance(result, int)
180
+ return result
181
181
 
182
182
  def create(self, conn: sql.engine.Connection) -> None:
183
183
  self.sa_md.create_all(bind=conn)
@@ -193,38 +193,35 @@ class StoreBase:
193
193
  message).
194
194
  """
195
195
  assert col.is_stored
196
- stmt = sql.text(f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.storage_name()} {col.col_type.to_sql()}')
196
+ col_type_str = col.get_sa_col_type().compile(dialect=conn.dialect)
197
+ stmt = sql.text(f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.store_name()} {col_type_str} NULL')
197
198
  log_stmt(_logger, stmt)
198
199
  conn.execute(stmt)
199
- added_storage_cols = [col.storage_name()]
200
+ added_storage_cols = [col.store_name()]
200
201
  if col.records_errors:
201
202
  # we also need to create the errormsg and errortype storage cols
202
203
  stmt = (f'ALTER TABLE {self._storage_name()} '
203
- f'ADD COLUMN {col.errormsg_storage_name()} {StringType().to_sql()} DEFAULT NULL')
204
+ f'ADD COLUMN {col.errormsg_store_name()} {StringType().to_sql()} DEFAULT NULL')
204
205
  conn.execute(sql.text(stmt))
205
206
  stmt = (f'ALTER TABLE {self._storage_name()} '
206
- f'ADD COLUMN {col.errortype_storage_name()} {StringType().to_sql()} DEFAULT NULL')
207
+ f'ADD COLUMN {col.errortype_store_name()} {StringType().to_sql()} DEFAULT NULL')
207
208
  conn.execute(sql.text(stmt))
208
- added_storage_cols.extend([col.errormsg_storage_name(), col.errortype_storage_name()])
209
- self._create_sa_tbl()
209
+ added_storage_cols.extend([col.errormsg_store_name(), col.errortype_store_name()])
210
+ self.create_sa_tbl()
210
211
  _logger.info(f'Added columns {added_storage_cols} to storage table {self._storage_name()}')
211
212
 
212
- def drop_column(self, col: Optional[catalog.Column] = None, conn: Optional[sql.engine.Connection] = None) -> None:
213
- """Re-create self.sa_tbl and drop column, if one is given"""
214
- if col is not None:
215
- assert conn is not None
216
- stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.storage_name()}'
213
+ def drop_column(self, col: catalog.Column, conn: sql.engine.Connection) -> None:
214
+ """Execute Alter Table Drop Column statement"""
215
+ stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.store_name()}'
216
+ conn.execute(sql.text(stmt))
217
+ if col.records_errors:
218
+ stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errormsg_store_name()}'
219
+ conn.execute(sql.text(stmt))
220
+ stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errortype_store_name()}'
217
221
  conn.execute(sql.text(stmt))
218
- if col.records_errors:
219
- stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errormsg_storage_name()}'
220
- conn.execute(sql.text(stmt))
221
- stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errortype_storage_name()}'
222
- conn.execute(sql.text(stmt))
223
- self._create_sa_tbl()
224
222
 
225
223
  def load_column(
226
- self, col: catalog.Column, exec_plan: ExecNode, value_expr_slot_idx: int, embedding_slot_idx: int,
227
- conn: sql.engine.Connection
224
+ self, col: catalog.Column, exec_plan: ExecNode, value_expr_slot_idx: int, conn: sql.engine.Connection
228
225
  ) -> int:
229
226
  """Update store column of a computed column with values produced by an execution plan
230
227
 
@@ -253,18 +250,11 @@ class StoreBase:
253
250
  col.sa_errormsg_col: error_msg
254
251
  }
255
252
  else:
256
- val = result_row.get_stored_val(value_expr_slot_idx)
253
+ val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
257
254
  if col.col_type.is_media_type():
258
255
  val = self._move_tmp_media_file(val, col, result_row.pk[-1])
259
256
  values_dict = {col.sa_col: val}
260
257
 
261
- if col.is_indexed:
262
- # TODO: deal with exceptions
263
- assert not result_row.has_exc(embedding_slot_idx)
264
- # don't use get_stored_val() here, we need to pass the ndarray
265
- embedding = result_row[embedding_slot_idx]
266
- values_dict[col.sa_index_col] = embedding
267
-
268
258
  update_stmt = sql.update(self.sa_tbl).values(values_dict)
269
259
  for pk_col, pk_val in zip(self.pk_columns(), result_row.pk):
270
260
  update_stmt = update_stmt.where(pk_col == pk_val)
@@ -337,6 +327,7 @@ class StoreBase:
337
327
  self, current_version: int, base_versions: List[Optional[int]], match_on_vmin: bool,
338
328
  where_clause: Optional[sql.ClauseElement], conn: sql.engine.Connection) -> int:
339
329
  """Mark rows as deleted that are live and were created prior to current_version.
330
+ Also: populate the undo columns
340
331
  Args:
341
332
  base_versions: if non-None, join only to base rows that were created at that version,
342
333
  otherwise join to rows that are live in the base's current version (which is distinct from the
@@ -354,8 +345,14 @@ class StoreBase:
354
345
  rowid_join_clause = self._rowid_join_predicate()
355
346
  base_versions_clause = sql.true() if len(base_versions) == 0 \
356
347
  else self.base._versions_clause(base_versions, match_on_vmin)
348
+ set_clause = {self.v_max_col: current_version}
349
+ for index_info in self.tbl_version.idxs_by_name.values():
350
+ # copy value column to undo column
351
+ set_clause[index_info.undo_col.sa_col] = index_info.val_col.sa_col
352
+ # set value column to NULL
353
+ set_clause[index_info.val_col.sa_col] = None
357
354
  stmt = sql.update(self.sa_tbl) \
358
- .values({self.v_max_col: current_version}) \
355
+ .values(set_clause) \
359
356
  .where(where_clause) \
360
357
  .where(rowid_join_clause) \
361
358
  .where(base_versions_clause)
@@ -416,8 +413,8 @@ class StoreComponentView(StoreView):
416
413
  self.rowid_cols.append(self.pos_col)
417
414
  return self.rowid_cols
418
415
 
419
- def _create_sa_tbl(self) -> None:
420
- super()._create_sa_tbl()
416
+ def create_sa_tbl(self) -> None:
417
+ super().create_sa_tbl()
421
418
  # we need to fix up the 'pos' column in TableVersion
422
419
  self.tbl_version.cols_by_name['pos'].sa_col = self.pos_col
423
420
 
@@ -6,11 +6,12 @@ from typing import List
6
6
 
7
7
  import numpy as np
8
8
  import pytest
9
+ import PIL.Image
9
10
 
10
11
  import pixeltable as pxt
11
12
  import pixeltable.catalog as catalog
12
13
  from pixeltable import exprs
13
- from pixeltable import functions as ptf
14
+ import pixeltable.functions as pxtf
14
15
  from pixeltable.exprs import RELATIVE_PATH_ROOT as R
15
16
  from pixeltable.metadata import SystemInfo, create_system_info
16
17
  from pixeltable.metadata.schema import TableSchemaVersion, TableVersion, Table, Function, Dir
@@ -120,8 +121,7 @@ def test_tbl_exprs(test_tbl: catalog.Table) -> List[exprs.Expr]:
120
121
  t.c1.apply(json.loads),
121
122
  t.c8.errortype,
122
123
  t.c8.errormsg,
123
- ptf.sum(t.c2, group_by=t.c4, order_by=t.c3),
124
- #test_stored_fn(t.c2),
124
+ pxtf.sum(t.c2, group_by=t.c4, order_by=t.c3),
125
125
  ]
126
126
 
127
127
  @pytest.fixture(scope='function')
@@ -153,17 +153,11 @@ def img_tbl_exprs(img_tbl: catalog.Table) -> List[exprs.Expr]:
153
153
  img_t.img.localpath,
154
154
  ]
155
155
 
156
- # TODO: why does this not work with a session scope? (some user tables don't get created with create_all())
157
- #@pytest.fixture(scope='session')
158
- #def indexed_img_tbl(init_env: None) -> catalog.Table:
159
- # cl = pxt.Client()
160
- # db = cl.create_db('test_indexed')
161
156
  @pytest.fixture(scope='function')
162
- def indexed_img_tbl(test_client: pxt.Client) -> catalog.Table:
163
- skip_test_if_not_installed('nos')
157
+ def small_img_tbl(test_client: pxt.Client) -> catalog.Table:
164
158
  cl = test_client
165
159
  schema = {
166
- 'img': { 'type': ImageType(nullable=False), 'indexed': True },
160
+ 'img': ImageType(nullable=False),
167
161
  'category': StringType(nullable=False),
168
162
  'split': StringType(nullable=False),
169
163
  }
@@ -0,0 +1,21 @@
1
+ import pixeltable as pxt
2
+ from pixeltable.tests.utils import skip_test_if_not_installed, get_image_files, validate_update_status
3
+
4
+
5
+ class TestYolox:
6
+
7
+ def test_yolox(self, test_client: pxt.Client):
8
+ skip_test_if_not_installed('yolox')
9
+ from pixeltable.ext.functions.yolox import yolox
10
+ cl = test_client
11
+ t = cl.create_table('yolox_test', {'image': pxt.ImageType()})
12
+ t['detect_yolox_tiny'] = yolox(t.image, model_id='yolox_tiny')
13
+ t['detect_yolox_nano'] = yolox(t.image, model_id='yolox_nano', threshold=0.2)
14
+ t['yolox_nano_bboxes'] = t.detect_yolox_nano.bboxes
15
+ images = get_image_files()[:10]
16
+ validate_update_status(t.insert({'image': image} for image in images), expected_rows=10)
17
+ rows = t.collect()
18
+ # Verify correctly formed JSON
19
+ assert all(list(result.keys()) == ['bboxes', 'labels', 'scores'] for result in rows['detect_yolox_tiny'])
20
+ # Verify that bboxes are actually present in at least some of the rows.
21
+ assert any(len(bboxes) > 0 for bboxes in rows['yolox_nano_bboxes'])
@@ -5,6 +5,7 @@ import pixeltable.exceptions as excs
5
5
  from pixeltable.tests.utils import skip_test_if_not_installed, validate_update_status
6
6
 
7
7
 
8
+ @pytest.mark.remote_api
8
9
  class TestFireworks:
9
10
 
10
11
  def test_fireworks(self, test_client: pxt.Client) -> None:
@@ -123,10 +123,10 @@ class TestHuggingface:
123
123
  for idx, model_id in enumerate(model_ids):
124
124
  col_name = f'embed_text{idx}'
125
125
  t[col_name] = clip_text(t.text, model_id=model_id)
126
- assert t.column_types()[col_name] == ArrayType((None,), dtype=FloatType(), nullable=False)
126
+ assert t.column_types()[col_name].is_array_type()
127
127
  col_name = f'embed_img{idx}'
128
128
  t[col_name] = clip_image(t.img, model_id=model_id)
129
- assert t.column_types()[col_name] == ArrayType((None,), dtype=FloatType(), nullable=False)
129
+ assert t.column_types()[col_name].is_array_type()
130
130
 
131
131
  def verify_row(row: Dict[str, Any]) -> None:
132
132
  for idx, _ in enumerate(model_ids):
@@ -6,6 +6,7 @@ from pixeltable.tests.utils import SAMPLE_IMAGE_URL, skip_test_if_not_installed,
6
6
  from pixeltable.type_system import StringType, ImageType
7
7
 
8
8
 
9
+ @pytest.mark.remote_api
9
10
  class TestOpenai:
10
11
 
11
12
  def test_audio(self, test_client: pxt.Client) -> None:
@@ -22,7 +23,7 @@ class TestOpenai:
22
23
  ))
23
24
  t.add_column(translation=translations(t.speech, model='whisper-1'))
24
25
  t.add_column(translation_2=translations(
25
- t.speech, model='whisper-1', prompt='Translate the recording from Spanish into English.', temperature=0.7
26
+ t.speech, model='whisper-1', prompt='Translate the recording from Spanish into English.', temperature=0.05
26
27
  ))
27
28
  validate_update_status(t.insert([
28
29
  {'input': 'I am a banana.'},
@@ -33,8 +34,8 @@ class TestOpenai:
33
34
  results = t.collect()
34
35
  assert results[0]['transcription']['text'] in ['I am a banana.', "I'm a banana."]
35
36
  assert results[0]['transcription_2']['text'] in ['I am a banana.', "I'm a banana."]
36
- assert 'easy to translate from Spanish' in results[1]['translation']['text']
37
- assert 'easy to translate from Spanish' in results[1]['translation_2']['text']
37
+ assert 'easy to translate' in results[1]['translation']['text']
38
+ assert 'easy to translate' in results[1]['translation_2']['text']
38
39
 
39
40
  def test_chat_completions(self, test_client: pxt.Client) -> None:
40
41
  skip_test_if_not_installed('openai')
@@ -132,13 +133,22 @@ class TestOpenai:
132
133
  t.add_column(img_2=image_generations(
133
134
  t.input, model='dall-e-2', size='512x512', user='pixeltable'
134
135
  ))
136
+ validate_update_status(t.insert(input='A friendly dinosaur playing tennis in a cornfield'), 1)
137
+ assert t.collect()['img'][0].size == (1024, 1024)
138
+ assert t.collect()['img_2'][0].size == (512, 512)
139
+
140
+ @pytest.mark.skip('Test is expensive and slow')
141
+ def test_image_generations_dall_e_3(self, test_client: pxt.Client) -> None:
142
+ skip_test_if_not_installed('openai')
143
+ TestOpenai.skip_test_if_no_openai_client()
144
+ cl = test_client
145
+ t = cl.create_table('test_tbl', {'input': StringType()})
146
+ from pixeltable.functions.openai import image_generations
135
147
  # Test dall-e-3 options
136
148
  t.add_column(img_3=image_generations(
137
149
  t.input, model='dall-e-3', quality='hd', size='1792x1024', style='natural', user='pixeltable'
138
150
  ))
139
151
  validate_update_status(t.insert(input='A friendly dinosaur playing tennis in a cornfield'), 1)
140
- assert t.collect()['img'][0].size == (1024, 1024)
141
- assert t.collect()['img_2'][0].size == (512, 512)
142
152
  assert t.collect()['img_3'][0].size == (1792, 1024)
143
153
 
144
154
  # This ensures that the test will be skipped, rather than returning an error, when no API key is
@@ -5,6 +5,7 @@ import pixeltable.exceptions as excs
5
5
  from pixeltable.tests.utils import skip_test_if_not_installed, validate_update_status
6
6
 
7
7
 
8
+ @pytest.mark.remote_api
8
9
  class TestTogether:
9
10
 
10
11
  def test_completions(self, test_client: pxt.Client) -> None:
@@ -9,7 +9,7 @@ import pixeltable as pxt
9
9
  from pixeltable import exceptions as excs
10
10
  from pixeltable.iterators import ComponentIterator
11
11
  from pixeltable.iterators.video import FrameIterator
12
- from pixeltable.tests.utils import assert_resultset_eq, get_test_video_files
12
+ from pixeltable.tests.utils import assert_resultset_eq, get_test_video_files, validate_update_status
13
13
  from pixeltable.type_system import IntType, VideoType, JsonType
14
14
 
15
15
  class ConstantImgIterator(ComponentIterator):
@@ -157,10 +157,19 @@ class TestComponentView:
157
157
  assert status.num_excs == 0
158
158
  import urllib
159
159
  video_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(video_filepaths[0]))
160
- status = view_t.update({'annotation': {'a': 1}}, where=view_t.video == video_url)
161
- c1 = view_t.where(view_t.annotation != None).count()
162
- c2 = view_t.where(view_t.video == video_url).count()
163
- assert c1 == c2
160
+ validate_update_status(
161
+ view_t.update({'annotation': {'a': 1}}, where=view_t.video == video_url),
162
+ expected_rows=view_t.where(view_t.video == video_url).count())
163
+ assert view_t.where(view_t.annotation != None).count() == view_t.where(view_t.video == video_url).count()
164
+
165
+ # batch update with _rowid works
166
+ validate_update_status(
167
+ view_t.batch_update(
168
+ [{'annotation': {'a': 1}, '_rowid': (1, 0)}, {'annotation': {'a': 1}, '_rowid': (1, 1)}]),
169
+ expected_rows=2)
170
+ with pytest.raises(AssertionError):
171
+ # malformed _rowid
172
+ view_t.batch_update([{'annotation': {'a': 1}, '_rowid': (1,)}])
164
173
 
165
174
  with pytest.raises(excs.Error) as excinfo:
166
175
  _ = cl.create_view(
@@ -16,6 +16,22 @@ from pixeltable.tests.utils import get_video_files, get_audio_files, skip_test_i
16
16
 
17
17
 
18
18
  class TestDataFrame:
19
+
20
+ @pxt.udf(return_type=pxt.JsonType(nullable=False), param_types=[pxt.JsonType(nullable=False)])
21
+ def yolo_to_coco(detections):
22
+ bboxes, labels = detections['bboxes'], detections['labels']
23
+ num_annotations = len(detections['bboxes'])
24
+ assert num_annotations == len(detections['labels'])
25
+ result = []
26
+ for i in range(num_annotations):
27
+ bbox = bboxes[i]
28
+ ann = {
29
+ 'bbox': [round(bbox[0]), round(bbox[1]), round(bbox[2] - bbox[0]), round(bbox[3] - bbox[1])],
30
+ 'category': labels[i],
31
+ }
32
+ result.append(ann)
33
+ return result
34
+
19
35
  def test_select_where(self, test_tbl: catalog.Table) -> None:
20
36
  t = test_tbl
21
37
  res1 = t[t.c1, t.c2, t.c3].show(0)
@@ -156,7 +172,7 @@ class TestDataFrame:
156
172
  _ = df.__repr__()
157
173
  _ = df._repr_html_()
158
174
 
159
- def test_count(self, test_tbl: catalog.Table, indexed_img_tbl: catalog.Table) -> None:
175
+ def test_count(self, test_tbl: catalog.Table, small_img_tbl) -> None:
160
176
  skip_test_if_not_installed('nos')
161
177
  t = test_tbl
162
178
  cnt = t.count()
@@ -166,7 +182,7 @@ class TestDataFrame:
166
182
  assert cnt == 10
167
183
 
168
184
  # count() doesn't work with similarity search
169
- t = indexed_img_tbl
185
+ t = small_img_tbl
170
186
  probe = t.select(t.img).show(1)
171
187
  img = probe[0, 0]
172
188
  with pytest.raises(excs.Error):
@@ -397,22 +413,7 @@ class TestDataFrame:
397
413
  view_t.add_column(detections=yolox_medium(view_t.frame))
398
414
  base_t.insert(video=get_video_files()[0])
399
415
 
400
- @pxt.udf(return_type=pxt.JsonType(nullable=False), param_types=[pxt.JsonType(nullable=False)])
401
- def yolo_to_coco(detections):
402
- bboxes, labels = detections['bboxes'], detections['labels']
403
- num_annotations = len(detections['bboxes'])
404
- assert num_annotations == len(detections['labels'])
405
- result = []
406
- for i in range(num_annotations):
407
- bbox = bboxes[i]
408
- ann = {
409
- 'bbox': [round(bbox[0]), round(bbox[1]), round(bbox[2] - bbox[0]), round(bbox[3] - bbox[1])],
410
- 'category': labels[i],
411
- }
412
- result.append(ann)
413
- return result
414
-
415
- query = view_t.select({'image': view_t.frame, 'annotations': yolo_to_coco(view_t.detections)})
416
+ query = view_t.select({'image': view_t.frame, 'annotations': self.yolo_to_coco(view_t.detections)})
416
417
  path = query.to_coco_dataset()
417
418
  # we get a valid COCO dataset
418
419
  coco_ds = COCO(path)