pixeltable 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +4 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +7 -9
- pixeltable/catalog/column.py +49 -0
- pixeltable/catalog/insertable_table.py +0 -7
- pixeltable/catalog/schema_object.py +1 -14
- pixeltable/catalog/table.py +180 -67
- pixeltable/catalog/table_version.py +42 -146
- pixeltable/catalog/table_version_path.py +6 -5
- pixeltable/catalog/view.py +2 -1
- pixeltable/config.py +24 -9
- pixeltable/dataframe.py +5 -6
- pixeltable/env.py +113 -21
- pixeltable/exec/aggregation_node.py +1 -1
- pixeltable/exec/cache_prefetch_node.py +4 -3
- pixeltable/exec/exec_node.py +0 -8
- pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
- pixeltable/exec/expr_eval/globals.py +1 -0
- pixeltable/exec/expr_eval/schedulers.py +52 -19
- pixeltable/exec/in_memory_data_node.py +2 -3
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/data_row.py +15 -2
- pixeltable/exprs/expr.py +9 -9
- pixeltable/exprs/function_call.py +61 -23
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/json_path.py +3 -3
- pixeltable/exprs/row_builder.py +25 -21
- pixeltable/exprs/string_op.py +3 -3
- pixeltable/func/expr_template_function.py +6 -3
- pixeltable/func/query_template_function.py +2 -2
- pixeltable/func/signature.py +30 -3
- pixeltable/func/tools.py +2 -2
- pixeltable/functions/anthropic.py +76 -27
- pixeltable/functions/deepseek.py +5 -1
- pixeltable/functions/gemini.py +11 -2
- pixeltable/functions/globals.py +2 -2
- pixeltable/functions/huggingface.py +6 -12
- pixeltable/functions/llama_cpp.py +9 -1
- pixeltable/functions/openai.py +76 -55
- pixeltable/functions/video.py +59 -6
- pixeltable/functions/vision.py +2 -2
- pixeltable/globals.py +86 -13
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/fiftyone.py +7 -7
- pixeltable/io/globals.py +3 -3
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +2 -1
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +3 -3
- pixeltable/io/table_data_conduit.py +2 -2
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +3 -2
- pixeltable/iterators/document.py +2 -8
- pixeltable/iterators/video.py +49 -9
- pixeltable/plan.py +0 -16
- pixeltable/share/packager.py +51 -42
- pixeltable/share/publish.py +134 -7
- pixeltable/store.py +5 -25
- pixeltable/type_system.py +5 -8
- pixeltable/utils/__init__.py +2 -2
- pixeltable/utils/arrow.py +5 -5
- pixeltable/utils/description_helper.py +3 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/media_store.py +131 -66
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/METADATA +238 -122
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/RECORD +69 -69
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,7 +14,6 @@ import sqlalchemy as sql
|
|
|
14
14
|
|
|
15
15
|
import pixeltable as pxt
|
|
16
16
|
import pixeltable.exceptions as excs
|
|
17
|
-
import pixeltable.type_system as ts
|
|
18
17
|
from pixeltable import exprs, index
|
|
19
18
|
from pixeltable.env import Env
|
|
20
19
|
from pixeltable.iterators import ComponentIterator
|
|
@@ -223,18 +222,23 @@ class TableVersion:
|
|
|
223
222
|
view_md: Optional[schema.ViewMd] = None,
|
|
224
223
|
) -> TableVersionMd:
|
|
225
224
|
user = Env.get().user
|
|
225
|
+
timestamp = time.time()
|
|
226
226
|
|
|
227
|
-
# assign ids
|
|
227
|
+
# assign ids, create metadata
|
|
228
228
|
cols_by_name: dict[str, Column] = {}
|
|
229
|
+
column_md: dict[int, schema.ColumnMd] = {}
|
|
230
|
+
schema_col_md: dict[int, schema.SchemaColumn] = {}
|
|
229
231
|
for pos, col in enumerate(cols):
|
|
230
232
|
col.id = pos
|
|
231
233
|
col.schema_version_add = 0
|
|
232
234
|
cols_by_name[col.name] = col
|
|
233
235
|
if col.is_computed:
|
|
234
236
|
col.check_value_expr()
|
|
237
|
+
col_md, sch_md = col.to_md(pos)
|
|
238
|
+
assert sch_md is not None
|
|
239
|
+
column_md[col.id] = col_md
|
|
240
|
+
schema_col_md[col.id] = sch_md
|
|
235
241
|
|
|
236
|
-
timestamp = time.time()
|
|
237
|
-
column_md = cls._create_column_md(cols)
|
|
238
242
|
tbl_id = uuid.uuid4()
|
|
239
243
|
tbl_id_str = str(tbl_id)
|
|
240
244
|
tbl_md = schema.TableMd(
|
|
@@ -256,18 +260,15 @@ class TableVersion:
|
|
|
256
260
|
)
|
|
257
261
|
|
|
258
262
|
table_version_md = schema.TableVersionMd(
|
|
259
|
-
tbl_id=tbl_id_str,
|
|
263
|
+
tbl_id=tbl_id_str,
|
|
264
|
+
created_at=timestamp,
|
|
265
|
+
version=0,
|
|
266
|
+
schema_version=0,
|
|
267
|
+
user=user,
|
|
268
|
+
update_status=None,
|
|
269
|
+
additional_md={},
|
|
260
270
|
)
|
|
261
271
|
|
|
262
|
-
schema_col_md: dict[int, schema.SchemaColumn] = {}
|
|
263
|
-
for pos, col in enumerate(cols):
|
|
264
|
-
md = schema.SchemaColumn(
|
|
265
|
-
pos=pos,
|
|
266
|
-
name=col.name,
|
|
267
|
-
media_validation=col._media_validation.name.lower() if col._media_validation is not None else None,
|
|
268
|
-
)
|
|
269
|
-
schema_col_md[col.id] = md
|
|
270
|
-
|
|
271
272
|
schema_version_md = schema.TableSchemaVersionMd(
|
|
272
273
|
tbl_id=tbl_id_str,
|
|
273
274
|
schema_version=0,
|
|
@@ -290,76 +291,11 @@ class TableVersion:
|
|
|
290
291
|
comment: str,
|
|
291
292
|
media_validation: MediaValidation,
|
|
292
293
|
) -> tuple[UUID, Optional[TableVersion]]:
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
# assign ids
|
|
296
|
-
cols_by_name: dict[str, Column] = {}
|
|
297
|
-
for pos, col in enumerate(cols):
|
|
298
|
-
col.id = pos
|
|
299
|
-
col.schema_version_add = 0
|
|
300
|
-
cols_by_name[col.name] = col
|
|
301
|
-
if col.is_computed:
|
|
302
|
-
col.check_value_expr()
|
|
303
|
-
|
|
304
|
-
timestamp = time.time()
|
|
305
|
-
# create schema.Table
|
|
306
|
-
# Column.dependent_cols for existing cols is wrong at this point, but init() will set it correctly
|
|
307
|
-
column_md = cls._create_column_md(cols)
|
|
308
|
-
tbl_id = uuid.uuid4()
|
|
309
|
-
tbl_id_str = str(tbl_id)
|
|
310
|
-
table_md = schema.TableMd(
|
|
311
|
-
tbl_id=tbl_id_str,
|
|
312
|
-
name=name,
|
|
313
|
-
user=user,
|
|
314
|
-
is_replica=False,
|
|
315
|
-
current_version=0,
|
|
316
|
-
current_schema_version=0,
|
|
317
|
-
next_col_id=len(cols),
|
|
318
|
-
next_idx_id=0,
|
|
319
|
-
next_row_id=0,
|
|
320
|
-
view_sn=0,
|
|
321
|
-
column_md=column_md,
|
|
322
|
-
index_md={},
|
|
323
|
-
external_stores=[],
|
|
324
|
-
view_md=None,
|
|
325
|
-
additional_md={},
|
|
326
|
-
)
|
|
327
|
-
|
|
328
|
-
# create schema.TableVersion of the initial version
|
|
329
|
-
table_version_md = schema.TableVersionMd(
|
|
330
|
-
tbl_id=tbl_id_str,
|
|
331
|
-
created_at=timestamp,
|
|
332
|
-
version=0,
|
|
333
|
-
schema_version=0,
|
|
334
|
-
user=user,
|
|
335
|
-
update_status=None,
|
|
336
|
-
additional_md={},
|
|
337
|
-
)
|
|
338
|
-
|
|
339
|
-
# create schema.TableSchemaVersion
|
|
340
|
-
schema_col_md: dict[int, schema.SchemaColumn] = {}
|
|
341
|
-
for pos, col in enumerate(cols):
|
|
342
|
-
md = schema.SchemaColumn(
|
|
343
|
-
pos=pos,
|
|
344
|
-
name=col.name,
|
|
345
|
-
media_validation=col._media_validation.name.lower() if col._media_validation is not None else None,
|
|
346
|
-
)
|
|
347
|
-
schema_col_md[col.id] = md
|
|
348
|
-
|
|
349
|
-
schema_version_md = schema.TableSchemaVersionMd(
|
|
350
|
-
tbl_id=tbl_id_str,
|
|
351
|
-
schema_version=0,
|
|
352
|
-
preceding_schema_version=None,
|
|
353
|
-
columns=schema_col_md,
|
|
354
|
-
num_retained_versions=num_retained_versions,
|
|
355
|
-
comment=comment,
|
|
356
|
-
media_validation=media_validation.name.lower(),
|
|
357
|
-
additional_md={},
|
|
358
|
-
)
|
|
359
|
-
|
|
294
|
+
inital_md = cls.create_initial_md(name, cols, num_retained_versions, comment, media_validation, view_md=None)
|
|
360
295
|
cat = pxt.catalog.Catalog.get()
|
|
361
296
|
|
|
362
|
-
|
|
297
|
+
tbl_id = UUID(hex=inital_md.tbl_md.tbl_id)
|
|
298
|
+
tbl_version = cls(tbl_id, inital_md.tbl_md, inital_md.version_md, None, inital_md.schema_version_md, [])
|
|
363
299
|
# TODO: break this up, so that Catalog.create_table() registers tbl_version
|
|
364
300
|
cat._tbl_versions[tbl_id, None] = tbl_version
|
|
365
301
|
tbl_version.init()
|
|
@@ -373,8 +309,8 @@ class TableVersion:
|
|
|
373
309
|
tbl_id=tbl_id,
|
|
374
310
|
dir_id=dir_id,
|
|
375
311
|
tbl_md=tbl_version.tbl_md,
|
|
376
|
-
version_md=
|
|
377
|
-
schema_version_md=schema_version_md,
|
|
312
|
+
version_md=inital_md.version_md,
|
|
313
|
+
schema_version_md=inital_md.schema_version_md,
|
|
378
314
|
)
|
|
379
315
|
return tbl_id, tbl_version
|
|
380
316
|
|
|
@@ -391,7 +327,7 @@ class TableVersion:
|
|
|
391
327
|
from .table_version_path import TableVersionPath
|
|
392
328
|
|
|
393
329
|
# clear out any remaining media files from an aborted previous attempt
|
|
394
|
-
MediaStore.delete(self.id)
|
|
330
|
+
MediaStore.get().delete(self.id)
|
|
395
331
|
view_path = TableVersionPath.from_dict(op.load_view_op.view_path)
|
|
396
332
|
plan, _ = Planner.create_view_load_plan(view_path)
|
|
397
333
|
_, row_counts = self.store_tbl.insert_rows(plan, v_min=self.version)
|
|
@@ -438,7 +374,7 @@ class TableVersion:
|
|
|
438
374
|
# if self.base.get().is_mutable:
|
|
439
375
|
# self.base.get().mutable_views.remove(TableVersionHandle.create(self))
|
|
440
376
|
|
|
441
|
-
MediaStore.delete(self.id)
|
|
377
|
+
MediaStore.get().delete(self.id)
|
|
442
378
|
FileCache.get().clear(tbl_id=self.id)
|
|
443
379
|
self.store_tbl.drop()
|
|
444
380
|
|
|
@@ -480,25 +416,7 @@ class TableVersion:
|
|
|
480
416
|
sorted_column_md = sorted(self.tbl_md.column_md.values(), key=lambda item: item.id)
|
|
481
417
|
for col_md in sorted_column_md:
|
|
482
418
|
schema_col_md = self.schema_version_md.columns.get(col_md.id)
|
|
483
|
-
|
|
484
|
-
media_val = (
|
|
485
|
-
MediaValidation[schema_col_md.media_validation.upper()]
|
|
486
|
-
if schema_col_md is not None and schema_col_md.media_validation is not None
|
|
487
|
-
else None
|
|
488
|
-
)
|
|
489
|
-
col = Column(
|
|
490
|
-
col_id=col_md.id,
|
|
491
|
-
name=col_name,
|
|
492
|
-
col_type=ts.ColumnType.from_dict(col_md.col_type),
|
|
493
|
-
is_pk=col_md.is_pk,
|
|
494
|
-
stored=col_md.stored,
|
|
495
|
-
media_validation=media_val,
|
|
496
|
-
schema_version_add=col_md.schema_version_add,
|
|
497
|
-
schema_version_drop=col_md.schema_version_drop,
|
|
498
|
-
value_expr_dict=col_md.value_expr,
|
|
499
|
-
tbl=self,
|
|
500
|
-
)
|
|
501
|
-
col.tbl = self
|
|
419
|
+
col = Column.from_md(col_md, self, schema_col_md)
|
|
502
420
|
self.cols.append(col)
|
|
503
421
|
|
|
504
422
|
# populate the lookup structures before Expr.from_dict()
|
|
@@ -783,31 +701,22 @@ class TableVersion:
|
|
|
783
701
|
num_excs = 0
|
|
784
702
|
cols_with_excs: list[Column] = []
|
|
785
703
|
for col in cols_to_add:
|
|
704
|
+
assert col.id is not None, 'Column id must be set before adding the column'
|
|
786
705
|
excs_per_col = 0
|
|
787
706
|
col.schema_version_add = self.schema_version
|
|
788
707
|
# add the column to the lookup structures now, rather than after the store changes executed successfully,
|
|
789
708
|
# because it might be referenced by the next column's value_expr
|
|
790
709
|
self.cols.append(col)
|
|
791
|
-
if col.name is not None:
|
|
792
|
-
self.cols_by_name[col.name] = col
|
|
793
710
|
self.cols_by_id[col.id] = col
|
|
794
|
-
|
|
795
|
-
# also add to stored md
|
|
796
|
-
self._tbl_md.column_md[col.id] = schema.ColumnMd(
|
|
797
|
-
id=col.id,
|
|
798
|
-
col_type=col.col_type.as_dict(),
|
|
799
|
-
is_pk=col.is_pk,
|
|
800
|
-
schema_version_add=col.schema_version_add,
|
|
801
|
-
schema_version_drop=col.schema_version_drop,
|
|
802
|
-
value_expr=col.value_expr.as_dict() if col.value_expr is not None else None,
|
|
803
|
-
stored=col.stored,
|
|
804
|
-
)
|
|
805
711
|
if col.name is not None:
|
|
806
|
-
self.
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
712
|
+
self.cols_by_name[col.name] = col
|
|
713
|
+
col_md, sch_md = col.to_md(len(self.cols_by_name))
|
|
714
|
+
assert sch_md is not None, 'Schema column metadata must be created for user-facing columns'
|
|
715
|
+
self._tbl_md.column_md[col.id] = col_md
|
|
716
|
+
self._schema_version_md.columns[col.id] = sch_md
|
|
717
|
+
else:
|
|
718
|
+
col_md, _ = col.to_md()
|
|
719
|
+
self._tbl_md.column_md[col.id] = col_md
|
|
811
720
|
|
|
812
721
|
if col.is_stored:
|
|
813
722
|
self.store_tbl.add_column(col)
|
|
@@ -918,14 +827,17 @@ class TableVersion:
|
|
|
918
827
|
|
|
919
828
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
920
829
|
"""Rename a column."""
|
|
921
|
-
|
|
922
|
-
|
|
830
|
+
if not self.is_mutable:
|
|
831
|
+
raise excs.Error(f'Cannot rename column for immutable table {self.name!r}')
|
|
832
|
+
col = self.path.get_column(old_name)
|
|
833
|
+
if col is None:
|
|
923
834
|
raise excs.Error(f'Unknown column: {old_name}')
|
|
835
|
+
if col.tbl.id != self.id:
|
|
836
|
+
raise excs.Error(f'Cannot rename base table column {col.name!r}')
|
|
924
837
|
if not is_valid_identifier(new_name):
|
|
925
838
|
raise excs.Error(f"Invalid column name: '{new_name}'")
|
|
926
839
|
if new_name in self.cols_by_name:
|
|
927
840
|
raise excs.Error(f'Column {new_name} already exists')
|
|
928
|
-
col = self.cols_by_name[old_name]
|
|
929
841
|
del self.cols_by_name[old_name]
|
|
930
842
|
col.name = new_name
|
|
931
843
|
self.cols_by_name[new_name] = col
|
|
@@ -1115,10 +1027,11 @@ class TableVersion:
|
|
|
1115
1027
|
for el in val:
|
|
1116
1028
|
assert isinstance(el, int)
|
|
1117
1029
|
continue
|
|
1118
|
-
col = self.path.get_column(col_name
|
|
1030
|
+
col = self.path.get_column(col_name)
|
|
1119
1031
|
if col is None:
|
|
1120
|
-
# TODO: return more informative error if this is trying to update a base column
|
|
1121
1032
|
raise excs.Error(f'Column {col_name} unknown')
|
|
1033
|
+
if col.tbl.id != self.id:
|
|
1034
|
+
raise excs.Error(f'Column {col.name!r} is a base table column and cannot be updated')
|
|
1122
1035
|
if col.is_computed:
|
|
1123
1036
|
raise excs.Error(f'Column {col_name} is computed and cannot be updated')
|
|
1124
1037
|
if col.is_pk and not allow_pk:
|
|
@@ -1326,7 +1239,7 @@ class TableVersion:
|
|
|
1326
1239
|
)
|
|
1327
1240
|
|
|
1328
1241
|
# delete newly-added data
|
|
1329
|
-
MediaStore.delete(self.id, tbl_version=self.version)
|
|
1242
|
+
MediaStore.get().delete(self.id, tbl_version=self.version)
|
|
1330
1243
|
conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
|
|
1331
1244
|
|
|
1332
1245
|
# revert new deletions
|
|
@@ -1628,23 +1541,6 @@ class TableVersion:
|
|
|
1628
1541
|
return 1 + self.base.get().num_rowid_columns()
|
|
1629
1542
|
return 1
|
|
1630
1543
|
|
|
1631
|
-
@classmethod
|
|
1632
|
-
def _create_column_md(cls, cols: list[Column]) -> dict[int, schema.ColumnMd]:
|
|
1633
|
-
column_md: dict[int, schema.ColumnMd] = {}
|
|
1634
|
-
for col in cols:
|
|
1635
|
-
value_expr_dict = col.value_expr.as_dict() if col.value_expr is not None else None
|
|
1636
|
-
assert col.is_pk is not None
|
|
1637
|
-
column_md[col.id] = schema.ColumnMd(
|
|
1638
|
-
id=col.id,
|
|
1639
|
-
col_type=col.col_type.as_dict(),
|
|
1640
|
-
is_pk=col.is_pk,
|
|
1641
|
-
schema_version_add=col.schema_version_add,
|
|
1642
|
-
schema_version_drop=col.schema_version_drop,
|
|
1643
|
-
value_expr=value_expr_dict,
|
|
1644
|
-
stored=col.stored,
|
|
1645
|
-
)
|
|
1646
|
-
return column_md
|
|
1647
|
-
|
|
1648
1544
|
@classmethod
|
|
1649
1545
|
def _create_stores_md(cls, stores: Iterable[pxt.io.ExternalStore]) -> list[dict[str, Any]]:
|
|
1650
1546
|
return [
|
|
@@ -184,13 +184,13 @@ class TableVersionPath:
|
|
|
184
184
|
cols = self.columns()
|
|
185
185
|
return {col.id: col for col in cols}
|
|
186
186
|
|
|
187
|
-
def get_column(self, name: str
|
|
187
|
+
def get_column(self, name: str) -> Optional[Column]:
|
|
188
188
|
"""Return the column with the given name, or None if not found"""
|
|
189
189
|
self.refresh_cached_md()
|
|
190
190
|
col = self._cached_tbl_version.cols_by_name.get(name)
|
|
191
191
|
if col is not None:
|
|
192
192
|
return col
|
|
193
|
-
elif self.base is not None and
|
|
193
|
+
elif self.base is not None and self._cached_tbl_version.include_base_columns:
|
|
194
194
|
return self.base.get_column(name)
|
|
195
195
|
else:
|
|
196
196
|
return None
|
|
@@ -206,10 +206,11 @@ class TableVersionPath:
|
|
|
206
206
|
else:
|
|
207
207
|
return None
|
|
208
208
|
|
|
209
|
-
def has_column(self, col: Column
|
|
209
|
+
def has_column(self, col: Column) -> bool:
|
|
210
210
|
"""Return True if this table has the given column."""
|
|
211
|
-
self.refresh_cached_md()
|
|
212
211
|
assert col.tbl is not None
|
|
212
|
+
self.refresh_cached_md()
|
|
213
|
+
|
|
213
214
|
if (
|
|
214
215
|
col.tbl.id == self.tbl_version.id
|
|
215
216
|
and col.tbl.effective_version == self.tbl_version.effective_version
|
|
@@ -217,7 +218,7 @@ class TableVersionPath:
|
|
|
217
218
|
):
|
|
218
219
|
# the column is visible in this table version
|
|
219
220
|
return True
|
|
220
|
-
elif self.base is not None
|
|
221
|
+
elif self.base is not None:
|
|
221
222
|
return self.base.has_column(col)
|
|
222
223
|
else:
|
|
223
224
|
return False
|
pixeltable/catalog/view.py
CHANGED
|
@@ -25,6 +25,7 @@ from .tbl_ops import CreateStoreTableOp, LoadViewOp, TableOp
|
|
|
25
25
|
from .update_status import UpdateStatus
|
|
26
26
|
|
|
27
27
|
if TYPE_CHECKING:
|
|
28
|
+
from pixeltable.catalog.table import TableMetadata
|
|
28
29
|
from pixeltable.globals import TableDataSource
|
|
29
30
|
|
|
30
31
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -261,7 +262,7 @@ class View(Table):
|
|
|
261
262
|
"""
|
|
262
263
|
return self._snapshot_only and self._id == self._tbl_version_path.tbl_id
|
|
263
264
|
|
|
264
|
-
def _get_metadata(self) ->
|
|
265
|
+
def _get_metadata(self) -> 'TableMetadata':
|
|
265
266
|
md = super()._get_metadata()
|
|
266
267
|
md['is_view'] = True
|
|
267
268
|
md['is_snapshot'] = self._tbl_version_path.is_snapshot()
|
pixeltable/config.py
CHANGED
|
@@ -111,10 +111,19 @@ class Config:
|
|
|
111
111
|
return default
|
|
112
112
|
|
|
113
113
|
def get_value(self, key: str, expected_type: type[T], section: str = 'pixeltable') -> Optional[T]:
|
|
114
|
-
value = self.lookup_env(section, key) # Try to get from environment first
|
|
114
|
+
value: Any = self.lookup_env(section, key) # Try to get from environment first
|
|
115
115
|
# Next try the config file
|
|
116
|
-
if value is None
|
|
117
|
-
|
|
116
|
+
if value is None:
|
|
117
|
+
# Resolve nested section dicts
|
|
118
|
+
lookup_elems = [*section.split('.'), key]
|
|
119
|
+
value = self.__config_dict
|
|
120
|
+
for el in lookup_elems:
|
|
121
|
+
if isinstance(value, dict):
|
|
122
|
+
if el not in value:
|
|
123
|
+
return None
|
|
124
|
+
value = value[el]
|
|
125
|
+
else:
|
|
126
|
+
return None
|
|
118
127
|
|
|
119
128
|
if value is None:
|
|
120
129
|
return None # Not specified
|
|
@@ -155,19 +164,25 @@ KNOWN_CONFIG_OPTIONS = {
|
|
|
155
164
|
},
|
|
156
165
|
'anthropic': {'api_key': 'Anthropic API key'},
|
|
157
166
|
'bedrock': {'api_key': 'AWS Bedrock API key'},
|
|
158
|
-
'deepseek': {'api_key': 'Deepseek API key'},
|
|
159
|
-
'fireworks': {'api_key': 'Fireworks API key'},
|
|
160
|
-
'gemini': {'api_key': 'Gemini API key'},
|
|
161
|
-
'
|
|
167
|
+
'deepseek': {'api_key': 'Deepseek API key', 'rate_limit': 'Rate limit for Deepseek API requests'},
|
|
168
|
+
'fireworks': {'api_key': 'Fireworks API key', 'rate_limit': 'Rate limit for Fireworks API requests'},
|
|
169
|
+
'gemini': {'api_key': 'Gemini API key', 'rate_limits': 'Per-model rate limits for Gemini API requests'},
|
|
170
|
+
'imagen': {'rate_limits': 'Per-model rate limits for Imagen API requests'},
|
|
171
|
+
'veo': {'rate_limits': 'Per-model rate limits for Veo API requests'},
|
|
172
|
+
'groq': {'api_key': 'Groq API key', 'rate_limit': 'Rate limit for Groq API requests'},
|
|
162
173
|
'label_studio': {'api_key': 'Label Studio API key', 'url': 'Label Studio server URL'},
|
|
163
|
-
'mistral': {'api_key': 'Mistral API key'},
|
|
174
|
+
'mistral': {'api_key': 'Mistral API key', 'rate_limit': 'Rate limit for Mistral API requests'},
|
|
164
175
|
'openai': {
|
|
165
176
|
'api_key': 'OpenAI API key',
|
|
166
177
|
'base_url': 'OpenAI API base URL',
|
|
167
178
|
'api_version': 'API version if using Azure OpenAI',
|
|
179
|
+
'rate_limits': 'Per-model rate limits for OpenAI API requests',
|
|
168
180
|
},
|
|
169
181
|
'replicate': {'api_token': 'Replicate API token'},
|
|
170
|
-
'together': {
|
|
182
|
+
'together': {
|
|
183
|
+
'api_key': 'Together API key',
|
|
184
|
+
'rate_limits': 'Per-model category rate limits for Together API requests',
|
|
185
|
+
},
|
|
171
186
|
'pypi': {'api_key': 'PyPI API key (for internal use only)'},
|
|
172
187
|
}
|
|
173
188
|
|
pixeltable/dataframe.py
CHANGED
|
@@ -19,7 +19,6 @@ from typing import (
|
|
|
19
19
|
Optional,
|
|
20
20
|
Sequence,
|
|
21
21
|
TypeVar,
|
|
22
|
-
Union,
|
|
23
22
|
)
|
|
24
23
|
|
|
25
24
|
import pandas as pd
|
|
@@ -766,7 +765,7 @@ class DataFrame:
|
|
|
766
765
|
)
|
|
767
766
|
|
|
768
767
|
def _create_join_predicate(
|
|
769
|
-
self, other: catalog.TableVersionPath, on:
|
|
768
|
+
self, other: catalog.TableVersionPath, on: exprs.Expr | Sequence[exprs.ColumnRef]
|
|
770
769
|
) -> exprs.Expr:
|
|
771
770
|
"""Verifies user-specified 'on' argument and converts it into a join predicate."""
|
|
772
771
|
col_refs: list[exprs.ColumnRef] = []
|
|
@@ -796,19 +795,19 @@ class DataFrame:
|
|
|
796
795
|
assert len(col_refs) > 0 and len(joined_tbls) >= 2
|
|
797
796
|
for col_ref in col_refs:
|
|
798
797
|
# identify the referenced column by name in 'other'
|
|
799
|
-
rhs_col = other.get_column(col_ref.col.name
|
|
798
|
+
rhs_col = other.get_column(col_ref.col.name)
|
|
800
799
|
if rhs_col is None:
|
|
801
800
|
raise excs.Error(f"'on': column {col_ref.col.name!r} not found in joined table")
|
|
802
801
|
rhs_col_ref = exprs.ColumnRef(rhs_col)
|
|
803
802
|
|
|
804
803
|
lhs_col_ref: Optional[exprs.ColumnRef] = None
|
|
805
|
-
if any(tbl.has_column(col_ref.col
|
|
804
|
+
if any(tbl.has_column(col_ref.col) for tbl in self._from_clause.tbls):
|
|
806
805
|
# col_ref comes from the existing from_clause, we use that directly
|
|
807
806
|
lhs_col_ref = col_ref
|
|
808
807
|
else:
|
|
809
808
|
# col_ref comes from other, we need to look for a match in the existing from_clause by name
|
|
810
809
|
for tbl in self._from_clause.tbls:
|
|
811
|
-
col = tbl.get_column(col_ref.col.name
|
|
810
|
+
col = tbl.get_column(col_ref.col.name)
|
|
812
811
|
if col is None:
|
|
813
812
|
continue
|
|
814
813
|
if lhs_col_ref is not None:
|
|
@@ -829,7 +828,7 @@ class DataFrame:
|
|
|
829
828
|
def join(
|
|
830
829
|
self,
|
|
831
830
|
other: catalog.Table,
|
|
832
|
-
on:
|
|
831
|
+
on: exprs.Expr | Sequence[exprs.ColumnRef] | None = None,
|
|
833
832
|
how: plan.JoinType.LiteralType = 'inner',
|
|
834
833
|
) -> DataFrame:
|
|
835
834
|
"""
|