pixeltable 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/catalog/column.py +25 -48
- pixeltable/catalog/insertable_table.py +7 -4
- pixeltable/catalog/table.py +163 -57
- pixeltable/catalog/table_version.py +416 -140
- pixeltable/catalog/table_version_path.py +2 -2
- pixeltable/client.py +0 -4
- pixeltable/dataframe.py +65 -21
- pixeltable/env.py +16 -1
- pixeltable/exec/cache_prefetch_node.py +1 -1
- pixeltable/exec/in_memory_data_node.py +11 -7
- pixeltable/exprs/comparison.py +3 -3
- pixeltable/exprs/data_row.py +5 -1
- pixeltable/exprs/literal.py +16 -4
- pixeltable/exprs/row_builder.py +8 -40
- pixeltable/ext/__init__.py +5 -0
- pixeltable/ext/functions/yolox.py +92 -0
- pixeltable/func/aggregate_function.py +15 -15
- pixeltable/func/expr_template_function.py +9 -1
- pixeltable/func/globals.py +24 -14
- pixeltable/func/signature.py +18 -12
- pixeltable/func/udf.py +7 -2
- pixeltable/functions/__init__.py +8 -8
- pixeltable/functions/eval.py +7 -8
- pixeltable/functions/huggingface.py +47 -19
- pixeltable/functions/openai.py +2 -2
- pixeltable/functions/util.py +11 -0
- pixeltable/index/__init__.py +2 -0
- pixeltable/index/base.py +49 -0
- pixeltable/index/embedding_index.py +95 -0
- pixeltable/metadata/schema.py +45 -22
- pixeltable/plan.py +15 -34
- pixeltable/store.py +38 -41
- pixeltable/tests/conftest.py +5 -11
- pixeltable/tests/ext/test_yolox.py +21 -0
- pixeltable/tests/functions/test_fireworks.py +1 -0
- pixeltable/tests/functions/test_huggingface.py +2 -2
- pixeltable/tests/functions/test_openai.py +15 -5
- pixeltable/tests/functions/test_together.py +1 -0
- pixeltable/tests/test_component_view.py +14 -5
- pixeltable/tests/test_dataframe.py +19 -18
- pixeltable/tests/test_exprs.py +99 -102
- pixeltable/tests/test_function.py +51 -43
- pixeltable/tests/test_index.py +138 -0
- pixeltable/tests/test_migration.py +2 -1
- pixeltable/tests/test_snapshot.py +24 -1
- pixeltable/tests/test_table.py +101 -25
- pixeltable/tests/test_types.py +30 -0
- pixeltable/tests/test_video.py +16 -16
- pixeltable/tests/test_view.py +5 -0
- pixeltable/tests/utils.py +43 -9
- pixeltable/tool/create_test_db_dump.py +16 -0
- pixeltable/type_system.py +37 -45
- {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/METADATA +5 -4
- {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/RECORD +56 -49
- {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/LICENSE +0 -0
- {pixeltable-0.2.4.dist-info → pixeltable-0.2.5.dist-info}/WHEEL +0 -0
pixeltable/plan.py
CHANGED
|
@@ -76,7 +76,8 @@ class Analyzer:
|
|
|
76
76
|
f'order_by()'))
|
|
77
77
|
self.similarity_clause = similarity_clauses[0]
|
|
78
78
|
img_col = self.similarity_clause.img_col_ref.col
|
|
79
|
-
|
|
79
|
+
indexed_col_ids = {info.col.id for info in tbl.tbl_version.idxs_by_name.values()}
|
|
80
|
+
if img_col.id not in indexed_col_ids:
|
|
80
81
|
raise excs.Error(f'nearest() not available for unindexed column {img_col.name}')
|
|
81
82
|
|
|
82
83
|
# all exprs that are evaluated in Python; not executable
|
|
@@ -220,18 +221,11 @@ class Planner:
|
|
|
220
221
|
) -> exec.ExecNode:
|
|
221
222
|
"""Creates a plan for TableVersion.insert()"""
|
|
222
223
|
assert not tbl.is_view()
|
|
223
|
-
#
|
|
224
|
-
# 1. stored_cols: all cols we need to store, incl computed cols (and indices)
|
|
224
|
+
# stored_cols: all cols we need to store, incl computed cols (and indices)
|
|
225
225
|
stored_cols = [c for c in tbl.cols if c.is_stored]
|
|
226
226
|
assert len(stored_cols) > 0
|
|
227
|
-
# 2. values to insert into indices
|
|
228
|
-
indexed_cols = [c for c in tbl.cols if c.is_indexed]
|
|
229
|
-
index_info: List[Tuple[catalog.Column, func.Function]] = []
|
|
230
|
-
if len(indexed_cols) > 0:
|
|
231
|
-
from pixeltable.functions.nos.image_embedding import openai_clip
|
|
232
|
-
index_info = [(c, openai_clip) for c in tbl.cols if c.is_indexed]
|
|
233
227
|
|
|
234
|
-
row_builder = exprs.RowBuilder([], stored_cols,
|
|
228
|
+
row_builder = exprs.RowBuilder([], stored_cols, [])
|
|
235
229
|
|
|
236
230
|
# create InMemoryDataNode for 'rows'
|
|
237
231
|
stored_col_info = row_builder.output_slot_idxs()
|
|
@@ -260,7 +254,7 @@ class Planner:
|
|
|
260
254
|
@classmethod
|
|
261
255
|
def create_update_plan(
|
|
262
256
|
cls, tbl: catalog.TableVersionPath,
|
|
263
|
-
update_targets:
|
|
257
|
+
update_targets: dict[catalog.Column, exprs.Expr],
|
|
264
258
|
recompute_targets: List[catalog.Column],
|
|
265
259
|
where_clause: Optional[exprs.Predicate], cascade: bool
|
|
266
260
|
) -> Tuple[exec.ExecNode, List[str], List[catalog.Column]]:
|
|
@@ -279,7 +273,7 @@ class Planner:
|
|
|
279
273
|
# retrieve all stored cols and all target exprs
|
|
280
274
|
assert isinstance(tbl, catalog.TableVersionPath)
|
|
281
275
|
target = tbl.tbl_version # the one we need to update
|
|
282
|
-
updated_cols =
|
|
276
|
+
updated_cols = list(update_targets.keys())
|
|
283
277
|
if len(recompute_targets) > 0:
|
|
284
278
|
recomputed_cols = recompute_targets.copy()
|
|
285
279
|
else:
|
|
@@ -291,12 +285,12 @@ class Planner:
|
|
|
291
285
|
col for col in target.cols if col.is_stored and not col in updated_cols and not col in recomputed_base_cols
|
|
292
286
|
]
|
|
293
287
|
select_list = [exprs.ColumnRef(col) for col in copied_cols]
|
|
294
|
-
select_list.extend(
|
|
288
|
+
select_list.extend(update_targets.values())
|
|
295
289
|
|
|
296
290
|
recomputed_exprs = \
|
|
297
291
|
[c.value_expr.copy().resolve_computed_cols(resolve_cols=recomputed_base_cols) for c in recomputed_base_cols]
|
|
298
292
|
# recomputed cols reference the new values of the updated cols
|
|
299
|
-
for col, e in update_targets:
|
|
293
|
+
for col, e in update_targets.items():
|
|
300
294
|
exprs.Expr.list_substitute(recomputed_exprs, exprs.ColumnRef(col), e)
|
|
301
295
|
select_list.extend(recomputed_exprs)
|
|
302
296
|
|
|
@@ -375,16 +369,10 @@ class Planner:
|
|
|
375
369
|
# the store
|
|
376
370
|
target = view.tbl_version # the one we need to populate
|
|
377
371
|
stored_cols = [c for c in target.cols if c.is_stored and (c.is_computed or target.is_iterator_column(c))]
|
|
378
|
-
# 2.
|
|
379
|
-
indexed_cols = [c for c in target.cols if c.is_indexed]
|
|
380
|
-
index_info: List[Tuple[catalog.Column, func.Function]] = []
|
|
381
|
-
if len(indexed_cols) > 0:
|
|
382
|
-
from pixeltable.functions.nos.image_embedding import openai_clip
|
|
383
|
-
index_info = [(c, openai_clip) for c in target.cols if c.is_indexed]
|
|
384
|
-
# 3. for component views: iterator args
|
|
372
|
+
# 2. for component views: iterator args
|
|
385
373
|
iterator_args = [target.iterator_args] if target.iterator_args is not None else []
|
|
386
374
|
|
|
387
|
-
row_builder = exprs.RowBuilder(iterator_args, stored_cols,
|
|
375
|
+
row_builder = exprs.RowBuilder(iterator_args, stored_cols, [])
|
|
388
376
|
|
|
389
377
|
# execution plan:
|
|
390
378
|
# 1. materialize exprs computed from the base that are needed for stored view columns
|
|
@@ -548,7 +536,7 @@ class Planner:
|
|
|
548
536
|
analyzer = Analyzer(
|
|
549
537
|
tbl, select_list, where_clause=where_clause, group_by_clause=group_by_clause,
|
|
550
538
|
order_by_clause=order_by_clause)
|
|
551
|
-
row_builder = exprs.RowBuilder(analyzer.all_exprs, [],
|
|
539
|
+
row_builder = exprs.RowBuilder(analyzer.all_exprs, [], analyzer.sql_exprs)
|
|
552
540
|
|
|
553
541
|
analyzer.finalize(row_builder)
|
|
554
542
|
# select_list: we need to materialize everything that's been collected
|
|
@@ -627,21 +615,15 @@ class Planner:
|
|
|
627
615
|
@classmethod
|
|
628
616
|
def create_add_column_plan(
|
|
629
617
|
cls, tbl: catalog.TableVersionPath, col: catalog.Column
|
|
630
|
-
) -> Tuple[exec.ExecNode, Optional[int]
|
|
618
|
+
) -> Tuple[exec.ExecNode, Optional[int]]:
|
|
631
619
|
"""Creates a plan for InsertableTable.add_column()
|
|
632
620
|
Returns:
|
|
633
621
|
plan: the plan to execute
|
|
634
|
-
ctx: the context to use for the plan
|
|
635
622
|
value_expr slot idx for the plan output (for computed cols)
|
|
636
|
-
embedding slot idx for the plan output (for indexed image cols)
|
|
637
623
|
"""
|
|
638
624
|
assert isinstance(tbl, catalog.TableVersionPath)
|
|
639
625
|
index_info: List[Tuple[catalog.Column, func.Function]] = []
|
|
640
|
-
|
|
641
|
-
from pixeltable.functions.nos.image_embedding import openai_clip
|
|
642
|
-
index_info = [(col, openai_clip)]
|
|
643
|
-
row_builder = exprs.RowBuilder(
|
|
644
|
-
output_exprs=[], columns=[col], indices=index_info, input_exprs=[])
|
|
626
|
+
row_builder = exprs.RowBuilder(output_exprs=[], columns=[col], input_exprs=[])
|
|
645
627
|
analyzer = Analyzer(tbl, row_builder.default_eval_ctx.target_exprs)
|
|
646
628
|
plan = cls._create_query_plan(tbl, row_builder=row_builder, analyzer=analyzer, with_pk=True)
|
|
647
629
|
plan.ctx.batch_size = 16
|
|
@@ -651,6 +633,5 @@ class Planner:
|
|
|
651
633
|
# we want to flush images
|
|
652
634
|
if col.is_computed and col.is_stored and col.col_type.is_image_type():
|
|
653
635
|
plan.set_stored_img_cols(row_builder.output_slot_idxs())
|
|
654
|
-
value_expr_slot_idx
|
|
655
|
-
|
|
656
|
-
return plan, value_expr_slot_idx, embedding_slot_idx
|
|
636
|
+
value_expr_slot_idx = row_builder.output_slot_idxs()[0].slot_idx if col.is_computed else None
|
|
637
|
+
return plan, value_expr_slot_idx
|
pixeltable/store.py
CHANGED
|
@@ -38,7 +38,7 @@ class StoreBase:
|
|
|
38
38
|
self.tbl_version = tbl_version
|
|
39
39
|
self.sa_md = sql.MetaData()
|
|
40
40
|
self.sa_tbl: Optional[sql.Table] = None
|
|
41
|
-
self.
|
|
41
|
+
self.create_sa_tbl()
|
|
42
42
|
|
|
43
43
|
def pk_columns(self) -> List[sql.Column]:
|
|
44
44
|
return self._pk_columns
|
|
@@ -62,7 +62,7 @@ class StoreBase:
|
|
|
62
62
|
return [*rowid_cols, self.v_min_col, self.v_max_col]
|
|
63
63
|
|
|
64
64
|
|
|
65
|
-
def
|
|
65
|
+
def create_sa_tbl(self) -> None:
|
|
66
66
|
"""Create self.sa_tbl from self.tbl_version."""
|
|
67
67
|
system_cols = self._create_system_columns()
|
|
68
68
|
all_cols = system_cols.copy()
|
|
@@ -76,9 +76,6 @@ class StoreBase:
|
|
|
76
76
|
all_cols.append(col.sa_errormsg_col)
|
|
77
77
|
all_cols.append(col.sa_errortype_col)
|
|
78
78
|
|
|
79
|
-
if col.is_indexed:
|
|
80
|
-
all_cols.append(col.sa_idx_col)
|
|
81
|
-
|
|
82
79
|
# we create an index for:
|
|
83
80
|
# - scalar columns (except for strings, because long strings can't be used for B-tree indices)
|
|
84
81
|
# - non-computed video and image columns (they will contain external paths/urls that users might want to
|
|
@@ -145,8 +142,8 @@ class StoreBase:
|
|
|
145
142
|
"""Move tmp media files that we generated to a permanent location"""
|
|
146
143
|
for c in media_cols:
|
|
147
144
|
for table_row in table_rows:
|
|
148
|
-
file_url = table_row[c.
|
|
149
|
-
table_row[c.
|
|
145
|
+
file_url = table_row[c.store_name()]
|
|
146
|
+
table_row[c.store_name()] = self._move_tmp_media_file(file_url, c, v_min)
|
|
150
147
|
|
|
151
148
|
def _create_table_row(
|
|
152
149
|
self, input_row: exprs.DataRow, row_builder: exprs.RowBuilder, media_cols: List[catalog.Column],
|
|
@@ -168,16 +165,19 @@ class StoreBase:
|
|
|
168
165
|
|
|
169
166
|
return table_row, num_excs
|
|
170
167
|
|
|
171
|
-
def count(self) ->
|
|
168
|
+
def count(self, conn: Optional[sql.engine.Connection] = None) -> int:
|
|
172
169
|
"""Return the number of rows visible in self.tbl_version"""
|
|
173
170
|
stmt = sql.select(sql.func.count('*'))\
|
|
174
171
|
.select_from(self.sa_tbl)\
|
|
175
172
|
.where(self.v_min_col <= self.tbl_version.version)\
|
|
176
173
|
.where(self.v_max_col > self.tbl_version.version)
|
|
177
|
-
|
|
174
|
+
if conn is None:
|
|
175
|
+
with env.Env.get().engine.connect() as conn:
|
|
176
|
+
result = conn.execute(stmt).scalar_one()
|
|
177
|
+
else:
|
|
178
178
|
result = conn.execute(stmt).scalar_one()
|
|
179
|
-
|
|
180
|
-
|
|
179
|
+
assert isinstance(result, int)
|
|
180
|
+
return result
|
|
181
181
|
|
|
182
182
|
def create(self, conn: sql.engine.Connection) -> None:
|
|
183
183
|
self.sa_md.create_all(bind=conn)
|
|
@@ -193,38 +193,35 @@ class StoreBase:
|
|
|
193
193
|
message).
|
|
194
194
|
"""
|
|
195
195
|
assert col.is_stored
|
|
196
|
-
|
|
196
|
+
col_type_str = col.get_sa_col_type().compile(dialect=conn.dialect)
|
|
197
|
+
stmt = sql.text(f'ALTER TABLE {self._storage_name()} ADD COLUMN {col.store_name()} {col_type_str} NULL')
|
|
197
198
|
log_stmt(_logger, stmt)
|
|
198
199
|
conn.execute(stmt)
|
|
199
|
-
added_storage_cols = [col.
|
|
200
|
+
added_storage_cols = [col.store_name()]
|
|
200
201
|
if col.records_errors:
|
|
201
202
|
# we also need to create the errormsg and errortype storage cols
|
|
202
203
|
stmt = (f'ALTER TABLE {self._storage_name()} '
|
|
203
|
-
f'ADD COLUMN {col.
|
|
204
|
+
f'ADD COLUMN {col.errormsg_store_name()} {StringType().to_sql()} DEFAULT NULL')
|
|
204
205
|
conn.execute(sql.text(stmt))
|
|
205
206
|
stmt = (f'ALTER TABLE {self._storage_name()} '
|
|
206
|
-
f'ADD COLUMN {col.
|
|
207
|
+
f'ADD COLUMN {col.errortype_store_name()} {StringType().to_sql()} DEFAULT NULL')
|
|
207
208
|
conn.execute(sql.text(stmt))
|
|
208
|
-
|
|
209
|
-
self.
|
|
209
|
+
added_storage_cols.extend([col.errormsg_store_name(), col.errortype_store_name()])
|
|
210
|
+
self.create_sa_tbl()
|
|
210
211
|
_logger.info(f'Added columns {added_storage_cols} to storage table {self._storage_name()}')
|
|
211
212
|
|
|
212
|
-
def drop_column(self, col:
|
|
213
|
-
"""
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
213
|
+
def drop_column(self, col: catalog.Column, conn: sql.engine.Connection) -> None:
|
|
214
|
+
"""Execute Alter Table Drop Column statement"""
|
|
215
|
+
stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.store_name()}'
|
|
216
|
+
conn.execute(sql.text(stmt))
|
|
217
|
+
if col.records_errors:
|
|
218
|
+
stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errormsg_store_name()}'
|
|
219
|
+
conn.execute(sql.text(stmt))
|
|
220
|
+
stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errortype_store_name()}'
|
|
217
221
|
conn.execute(sql.text(stmt))
|
|
218
|
-
if col.records_errors:
|
|
219
|
-
stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errormsg_storage_name()}'
|
|
220
|
-
conn.execute(sql.text(stmt))
|
|
221
|
-
stmt = f'ALTER TABLE {self._storage_name()} DROP COLUMN {col.errortype_storage_name()}'
|
|
222
|
-
conn.execute(sql.text(stmt))
|
|
223
|
-
self._create_sa_tbl()
|
|
224
222
|
|
|
225
223
|
def load_column(
|
|
226
|
-
self, col: catalog.Column, exec_plan: ExecNode, value_expr_slot_idx: int,
|
|
227
|
-
conn: sql.engine.Connection
|
|
224
|
+
self, col: catalog.Column, exec_plan: ExecNode, value_expr_slot_idx: int, conn: sql.engine.Connection
|
|
228
225
|
) -> int:
|
|
229
226
|
"""Update store column of a computed column with values produced by an execution plan
|
|
230
227
|
|
|
@@ -253,18 +250,11 @@ class StoreBase:
|
|
|
253
250
|
col.sa_errormsg_col: error_msg
|
|
254
251
|
}
|
|
255
252
|
else:
|
|
256
|
-
val = result_row.get_stored_val(value_expr_slot_idx)
|
|
253
|
+
val = result_row.get_stored_val(value_expr_slot_idx, col.sa_col.type)
|
|
257
254
|
if col.col_type.is_media_type():
|
|
258
255
|
val = self._move_tmp_media_file(val, col, result_row.pk[-1])
|
|
259
256
|
values_dict = {col.sa_col: val}
|
|
260
257
|
|
|
261
|
-
if col.is_indexed:
|
|
262
|
-
# TODO: deal with exceptions
|
|
263
|
-
assert not result_row.has_exc(embedding_slot_idx)
|
|
264
|
-
# don't use get_stored_val() here, we need to pass the ndarray
|
|
265
|
-
embedding = result_row[embedding_slot_idx]
|
|
266
|
-
values_dict[col.sa_index_col] = embedding
|
|
267
|
-
|
|
268
258
|
update_stmt = sql.update(self.sa_tbl).values(values_dict)
|
|
269
259
|
for pk_col, pk_val in zip(self.pk_columns(), result_row.pk):
|
|
270
260
|
update_stmt = update_stmt.where(pk_col == pk_val)
|
|
@@ -337,6 +327,7 @@ class StoreBase:
|
|
|
337
327
|
self, current_version: int, base_versions: List[Optional[int]], match_on_vmin: bool,
|
|
338
328
|
where_clause: Optional[sql.ClauseElement], conn: sql.engine.Connection) -> int:
|
|
339
329
|
"""Mark rows as deleted that are live and were created prior to current_version.
|
|
330
|
+
Also: populate the undo columns
|
|
340
331
|
Args:
|
|
341
332
|
base_versions: if non-None, join only to base rows that were created at that version,
|
|
342
333
|
otherwise join to rows that are live in the base's current version (which is distinct from the
|
|
@@ -354,8 +345,14 @@ class StoreBase:
|
|
|
354
345
|
rowid_join_clause = self._rowid_join_predicate()
|
|
355
346
|
base_versions_clause = sql.true() if len(base_versions) == 0 \
|
|
356
347
|
else self.base._versions_clause(base_versions, match_on_vmin)
|
|
348
|
+
set_clause = {self.v_max_col: current_version}
|
|
349
|
+
for index_info in self.tbl_version.idxs_by_name.values():
|
|
350
|
+
# copy value column to undo column
|
|
351
|
+
set_clause[index_info.undo_col.sa_col] = index_info.val_col.sa_col
|
|
352
|
+
# set value column to NULL
|
|
353
|
+
set_clause[index_info.val_col.sa_col] = None
|
|
357
354
|
stmt = sql.update(self.sa_tbl) \
|
|
358
|
-
.values(
|
|
355
|
+
.values(set_clause) \
|
|
359
356
|
.where(where_clause) \
|
|
360
357
|
.where(rowid_join_clause) \
|
|
361
358
|
.where(base_versions_clause)
|
|
@@ -416,8 +413,8 @@ class StoreComponentView(StoreView):
|
|
|
416
413
|
self.rowid_cols.append(self.pos_col)
|
|
417
414
|
return self.rowid_cols
|
|
418
415
|
|
|
419
|
-
def
|
|
420
|
-
super().
|
|
416
|
+
def create_sa_tbl(self) -> None:
|
|
417
|
+
super().create_sa_tbl()
|
|
421
418
|
# we need to fix up the 'pos' column in TableVersion
|
|
422
419
|
self.tbl_version.cols_by_name['pos'].sa_col = self.pos_col
|
|
423
420
|
|
pixeltable/tests/conftest.py
CHANGED
|
@@ -6,11 +6,12 @@ from typing import List
|
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
8
|
import pytest
|
|
9
|
+
import PIL.Image
|
|
9
10
|
|
|
10
11
|
import pixeltable as pxt
|
|
11
12
|
import pixeltable.catalog as catalog
|
|
12
13
|
from pixeltable import exprs
|
|
13
|
-
|
|
14
|
+
import pixeltable.functions as pxtf
|
|
14
15
|
from pixeltable.exprs import RELATIVE_PATH_ROOT as R
|
|
15
16
|
from pixeltable.metadata import SystemInfo, create_system_info
|
|
16
17
|
from pixeltable.metadata.schema import TableSchemaVersion, TableVersion, Table, Function, Dir
|
|
@@ -120,8 +121,7 @@ def test_tbl_exprs(test_tbl: catalog.Table) -> List[exprs.Expr]:
|
|
|
120
121
|
t.c1.apply(json.loads),
|
|
121
122
|
t.c8.errortype,
|
|
122
123
|
t.c8.errormsg,
|
|
123
|
-
|
|
124
|
-
#test_stored_fn(t.c2),
|
|
124
|
+
pxtf.sum(t.c2, group_by=t.c4, order_by=t.c3),
|
|
125
125
|
]
|
|
126
126
|
|
|
127
127
|
@pytest.fixture(scope='function')
|
|
@@ -153,17 +153,11 @@ def img_tbl_exprs(img_tbl: catalog.Table) -> List[exprs.Expr]:
|
|
|
153
153
|
img_t.img.localpath,
|
|
154
154
|
]
|
|
155
155
|
|
|
156
|
-
# TODO: why does this not work with a session scope? (some user tables don't get created with create_all())
|
|
157
|
-
#@pytest.fixture(scope='session')
|
|
158
|
-
#def indexed_img_tbl(init_env: None) -> catalog.Table:
|
|
159
|
-
# cl = pxt.Client()
|
|
160
|
-
# db = cl.create_db('test_indexed')
|
|
161
156
|
@pytest.fixture(scope='function')
|
|
162
|
-
def
|
|
163
|
-
skip_test_if_not_installed('nos')
|
|
157
|
+
def small_img_tbl(test_client: pxt.Client) -> catalog.Table:
|
|
164
158
|
cl = test_client
|
|
165
159
|
schema = {
|
|
166
|
-
'img':
|
|
160
|
+
'img': ImageType(nullable=False),
|
|
167
161
|
'category': StringType(nullable=False),
|
|
168
162
|
'split': StringType(nullable=False),
|
|
169
163
|
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import pixeltable as pxt
|
|
2
|
+
from pixeltable.tests.utils import skip_test_if_not_installed, get_image_files, validate_update_status
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class TestYolox:
|
|
6
|
+
|
|
7
|
+
def test_yolox(self, test_client: pxt.Client):
|
|
8
|
+
skip_test_if_not_installed('yolox')
|
|
9
|
+
from pixeltable.ext.functions.yolox import yolox
|
|
10
|
+
cl = test_client
|
|
11
|
+
t = cl.create_table('yolox_test', {'image': pxt.ImageType()})
|
|
12
|
+
t['detect_yolox_tiny'] = yolox(t.image, model_id='yolox_tiny')
|
|
13
|
+
t['detect_yolox_nano'] = yolox(t.image, model_id='yolox_nano', threshold=0.2)
|
|
14
|
+
t['yolox_nano_bboxes'] = t.detect_yolox_nano.bboxes
|
|
15
|
+
images = get_image_files()[:10]
|
|
16
|
+
validate_update_status(t.insert({'image': image} for image in images), expected_rows=10)
|
|
17
|
+
rows = t.collect()
|
|
18
|
+
# Verify correctly formed JSON
|
|
19
|
+
assert all(list(result.keys()) == ['bboxes', 'labels', 'scores'] for result in rows['detect_yolox_tiny'])
|
|
20
|
+
# Verify that bboxes are actually present in at least some of the rows.
|
|
21
|
+
assert any(len(bboxes) > 0 for bboxes in rows['yolox_nano_bboxes'])
|
|
@@ -123,10 +123,10 @@ class TestHuggingface:
|
|
|
123
123
|
for idx, model_id in enumerate(model_ids):
|
|
124
124
|
col_name = f'embed_text{idx}'
|
|
125
125
|
t[col_name] = clip_text(t.text, model_id=model_id)
|
|
126
|
-
assert t.column_types()[col_name]
|
|
126
|
+
assert t.column_types()[col_name].is_array_type()
|
|
127
127
|
col_name = f'embed_img{idx}'
|
|
128
128
|
t[col_name] = clip_image(t.img, model_id=model_id)
|
|
129
|
-
assert t.column_types()[col_name]
|
|
129
|
+
assert t.column_types()[col_name].is_array_type()
|
|
130
130
|
|
|
131
131
|
def verify_row(row: Dict[str, Any]) -> None:
|
|
132
132
|
for idx, _ in enumerate(model_ids):
|
|
@@ -6,6 +6,7 @@ from pixeltable.tests.utils import SAMPLE_IMAGE_URL, skip_test_if_not_installed,
|
|
|
6
6
|
from pixeltable.type_system import StringType, ImageType
|
|
7
7
|
|
|
8
8
|
|
|
9
|
+
@pytest.mark.remote_api
|
|
9
10
|
class TestOpenai:
|
|
10
11
|
|
|
11
12
|
def test_audio(self, test_client: pxt.Client) -> None:
|
|
@@ -22,7 +23,7 @@ class TestOpenai:
|
|
|
22
23
|
))
|
|
23
24
|
t.add_column(translation=translations(t.speech, model='whisper-1'))
|
|
24
25
|
t.add_column(translation_2=translations(
|
|
25
|
-
t.speech, model='whisper-1', prompt='Translate the recording from Spanish into English.', temperature=0.
|
|
26
|
+
t.speech, model='whisper-1', prompt='Translate the recording from Spanish into English.', temperature=0.05
|
|
26
27
|
))
|
|
27
28
|
validate_update_status(t.insert([
|
|
28
29
|
{'input': 'I am a banana.'},
|
|
@@ -33,8 +34,8 @@ class TestOpenai:
|
|
|
33
34
|
results = t.collect()
|
|
34
35
|
assert results[0]['transcription']['text'] in ['I am a banana.', "I'm a banana."]
|
|
35
36
|
assert results[0]['transcription_2']['text'] in ['I am a banana.', "I'm a banana."]
|
|
36
|
-
assert 'easy to translate
|
|
37
|
-
assert 'easy to translate
|
|
37
|
+
assert 'easy to translate' in results[1]['translation']['text']
|
|
38
|
+
assert 'easy to translate' in results[1]['translation_2']['text']
|
|
38
39
|
|
|
39
40
|
def test_chat_completions(self, test_client: pxt.Client) -> None:
|
|
40
41
|
skip_test_if_not_installed('openai')
|
|
@@ -132,13 +133,22 @@ class TestOpenai:
|
|
|
132
133
|
t.add_column(img_2=image_generations(
|
|
133
134
|
t.input, model='dall-e-2', size='512x512', user='pixeltable'
|
|
134
135
|
))
|
|
136
|
+
validate_update_status(t.insert(input='A friendly dinosaur playing tennis in a cornfield'), 1)
|
|
137
|
+
assert t.collect()['img'][0].size == (1024, 1024)
|
|
138
|
+
assert t.collect()['img_2'][0].size == (512, 512)
|
|
139
|
+
|
|
140
|
+
@pytest.mark.skip('Test is expensive and slow')
|
|
141
|
+
def test_image_generations_dall_e_3(self, test_client: pxt.Client) -> None:
|
|
142
|
+
skip_test_if_not_installed('openai')
|
|
143
|
+
TestOpenai.skip_test_if_no_openai_client()
|
|
144
|
+
cl = test_client
|
|
145
|
+
t = cl.create_table('test_tbl', {'input': StringType()})
|
|
146
|
+
from pixeltable.functions.openai import image_generations
|
|
135
147
|
# Test dall-e-3 options
|
|
136
148
|
t.add_column(img_3=image_generations(
|
|
137
149
|
t.input, model='dall-e-3', quality='hd', size='1792x1024', style='natural', user='pixeltable'
|
|
138
150
|
))
|
|
139
151
|
validate_update_status(t.insert(input='A friendly dinosaur playing tennis in a cornfield'), 1)
|
|
140
|
-
assert t.collect()['img'][0].size == (1024, 1024)
|
|
141
|
-
assert t.collect()['img_2'][0].size == (512, 512)
|
|
142
152
|
assert t.collect()['img_3'][0].size == (1792, 1024)
|
|
143
153
|
|
|
144
154
|
# This ensures that the test will be skipped, rather than returning an error, when no API key is
|
|
@@ -9,7 +9,7 @@ import pixeltable as pxt
|
|
|
9
9
|
from pixeltable import exceptions as excs
|
|
10
10
|
from pixeltable.iterators import ComponentIterator
|
|
11
11
|
from pixeltable.iterators.video import FrameIterator
|
|
12
|
-
from pixeltable.tests.utils import assert_resultset_eq, get_test_video_files
|
|
12
|
+
from pixeltable.tests.utils import assert_resultset_eq, get_test_video_files, validate_update_status
|
|
13
13
|
from pixeltable.type_system import IntType, VideoType, JsonType
|
|
14
14
|
|
|
15
15
|
class ConstantImgIterator(ComponentIterator):
|
|
@@ -157,10 +157,19 @@ class TestComponentView:
|
|
|
157
157
|
assert status.num_excs == 0
|
|
158
158
|
import urllib
|
|
159
159
|
video_url = urllib.parse.urljoin('file:', urllib.request.pathname2url(video_filepaths[0]))
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
assert
|
|
160
|
+
validate_update_status(
|
|
161
|
+
view_t.update({'annotation': {'a': 1}}, where=view_t.video == video_url),
|
|
162
|
+
expected_rows=view_t.where(view_t.video == video_url).count())
|
|
163
|
+
assert view_t.where(view_t.annotation != None).count() == view_t.where(view_t.video == video_url).count()
|
|
164
|
+
|
|
165
|
+
# batch update with _rowid works
|
|
166
|
+
validate_update_status(
|
|
167
|
+
view_t.batch_update(
|
|
168
|
+
[{'annotation': {'a': 1}, '_rowid': (1, 0)}, {'annotation': {'a': 1}, '_rowid': (1, 1)}]),
|
|
169
|
+
expected_rows=2)
|
|
170
|
+
with pytest.raises(AssertionError):
|
|
171
|
+
# malformed _rowid
|
|
172
|
+
view_t.batch_update([{'annotation': {'a': 1}, '_rowid': (1,)}])
|
|
164
173
|
|
|
165
174
|
with pytest.raises(excs.Error) as excinfo:
|
|
166
175
|
_ = cl.create_view(
|
|
@@ -16,6 +16,22 @@ from pixeltable.tests.utils import get_video_files, get_audio_files, skip_test_i
|
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class TestDataFrame:
|
|
19
|
+
|
|
20
|
+
@pxt.udf(return_type=pxt.JsonType(nullable=False), param_types=[pxt.JsonType(nullable=False)])
|
|
21
|
+
def yolo_to_coco(detections):
|
|
22
|
+
bboxes, labels = detections['bboxes'], detections['labels']
|
|
23
|
+
num_annotations = len(detections['bboxes'])
|
|
24
|
+
assert num_annotations == len(detections['labels'])
|
|
25
|
+
result = []
|
|
26
|
+
for i in range(num_annotations):
|
|
27
|
+
bbox = bboxes[i]
|
|
28
|
+
ann = {
|
|
29
|
+
'bbox': [round(bbox[0]), round(bbox[1]), round(bbox[2] - bbox[0]), round(bbox[3] - bbox[1])],
|
|
30
|
+
'category': labels[i],
|
|
31
|
+
}
|
|
32
|
+
result.append(ann)
|
|
33
|
+
return result
|
|
34
|
+
|
|
19
35
|
def test_select_where(self, test_tbl: catalog.Table) -> None:
|
|
20
36
|
t = test_tbl
|
|
21
37
|
res1 = t[t.c1, t.c2, t.c3].show(0)
|
|
@@ -156,7 +172,7 @@ class TestDataFrame:
|
|
|
156
172
|
_ = df.__repr__()
|
|
157
173
|
_ = df._repr_html_()
|
|
158
174
|
|
|
159
|
-
def test_count(self, test_tbl: catalog.Table,
|
|
175
|
+
def test_count(self, test_tbl: catalog.Table, small_img_tbl) -> None:
|
|
160
176
|
skip_test_if_not_installed('nos')
|
|
161
177
|
t = test_tbl
|
|
162
178
|
cnt = t.count()
|
|
@@ -166,7 +182,7 @@ class TestDataFrame:
|
|
|
166
182
|
assert cnt == 10
|
|
167
183
|
|
|
168
184
|
# count() doesn't work with similarity search
|
|
169
|
-
t =
|
|
185
|
+
t = small_img_tbl
|
|
170
186
|
probe = t.select(t.img).show(1)
|
|
171
187
|
img = probe[0, 0]
|
|
172
188
|
with pytest.raises(excs.Error):
|
|
@@ -397,22 +413,7 @@ class TestDataFrame:
|
|
|
397
413
|
view_t.add_column(detections=yolox_medium(view_t.frame))
|
|
398
414
|
base_t.insert(video=get_video_files()[0])
|
|
399
415
|
|
|
400
|
-
|
|
401
|
-
def yolo_to_coco(detections):
|
|
402
|
-
bboxes, labels = detections['bboxes'], detections['labels']
|
|
403
|
-
num_annotations = len(detections['bboxes'])
|
|
404
|
-
assert num_annotations == len(detections['labels'])
|
|
405
|
-
result = []
|
|
406
|
-
for i in range(num_annotations):
|
|
407
|
-
bbox = bboxes[i]
|
|
408
|
-
ann = {
|
|
409
|
-
'bbox': [round(bbox[0]), round(bbox[1]), round(bbox[2] - bbox[0]), round(bbox[3] - bbox[1])],
|
|
410
|
-
'category': labels[i],
|
|
411
|
-
}
|
|
412
|
-
result.append(ann)
|
|
413
|
-
return result
|
|
414
|
-
|
|
415
|
-
query = view_t.select({'image': view_t.frame, 'annotations': yolo_to_coco(view_t.detections)})
|
|
416
|
+
query = view_t.select({'image': view_t.frame, 'annotations': self.yolo_to_coco(view_t.detections)})
|
|
416
417
|
path = query.to_coco_dataset()
|
|
417
418
|
# we get a valid COCO dataset
|
|
418
419
|
coco_ds = COCO(path)
|