pixeltable 0.4.7__py3-none-any.whl → 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

Files changed (50) hide show
  1. pixeltable/__init__.py +1 -1
  2. pixeltable/catalog/catalog.py +4 -6
  3. pixeltable/catalog/insertable_table.py +125 -28
  4. pixeltable/catalog/table.py +51 -15
  5. pixeltable/catalog/table_version.py +12 -8
  6. pixeltable/catalog/table_version_path.py +6 -5
  7. pixeltable/config.py +25 -9
  8. pixeltable/dataframe.py +3 -3
  9. pixeltable/env.py +89 -20
  10. pixeltable/exec/aggregation_node.py +1 -1
  11. pixeltable/exec/cache_prefetch_node.py +4 -3
  12. pixeltable/exec/exec_node.py +0 -8
  13. pixeltable/exec/expr_eval/globals.py +1 -0
  14. pixeltable/exec/expr_eval/schedulers.py +16 -4
  15. pixeltable/exec/in_memory_data_node.py +2 -3
  16. pixeltable/exprs/data_row.py +5 -5
  17. pixeltable/exprs/function_call.py +59 -21
  18. pixeltable/exprs/row_builder.py +11 -5
  19. pixeltable/func/expr_template_function.py +6 -3
  20. pixeltable/functions/__init__.py +2 -0
  21. pixeltable/functions/anthropic.py +1 -2
  22. pixeltable/functions/deepseek.py +5 -1
  23. pixeltable/functions/gemini.py +11 -2
  24. pixeltable/functions/huggingface.py +6 -12
  25. pixeltable/functions/openai.py +2 -1
  26. pixeltable/functions/video.py +5 -5
  27. pixeltable/functions/whisperx.py +177 -0
  28. pixeltable/{ext/functions → functions}/yolox.py +0 -4
  29. pixeltable/globals.py +16 -3
  30. pixeltable/io/fiftyone.py +3 -3
  31. pixeltable/io/label_studio.py +2 -1
  32. pixeltable/iterators/audio.py +3 -2
  33. pixeltable/iterators/document.py +0 -6
  34. pixeltable/metadata/__init__.py +3 -1
  35. pixeltable/mypy/__init__.py +3 -0
  36. pixeltable/mypy/mypy_plugin.py +123 -0
  37. pixeltable/plan.py +0 -16
  38. pixeltable/share/packager.py +6 -6
  39. pixeltable/share/publish.py +134 -7
  40. pixeltable/type_system.py +20 -4
  41. pixeltable/utils/media_store.py +131 -66
  42. pixeltable/utils/pydantic.py +60 -0
  43. {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/METADATA +186 -121
  44. {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/RECORD +47 -46
  45. pixeltable/ext/__init__.py +0 -17
  46. pixeltable/ext/functions/__init__.py +0 -11
  47. pixeltable/ext/functions/whisperx.py +0 -77
  48. {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/WHEEL +0 -0
  49. {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/entry_points.txt +0 -0
  50. {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/licenses/LICENSE +0 -0
pixeltable/__init__.py CHANGED
@@ -30,7 +30,7 @@ from .globals import (
30
30
  from .type_system import Array, Audio, Bool, Date, Document, Float, Image, Int, Json, Required, String, Timestamp, Video
31
31
 
32
32
  # This import must go last to avoid circular imports.
33
- from . import ext, functions, io, iterators # isort: skip
33
+ from . import functions, io, iterators # isort: skip
34
34
 
35
35
  # This is the safest / most maintainable way to construct __all__: start with the default and "blacklist"
36
36
  # stuff that we don't want in there. (Using a "whitelist" is considerably harder to maintain.)
@@ -189,12 +189,10 @@ class Catalog:
189
189
  @classmethod
190
190
  def clear(cls) -> None:
191
191
  """Remove the instance. Used for testing."""
192
- # invalidate all existing instances to force reloading of metadata
193
- for tbl_version in cls._instance._tbl_versions.values():
194
- # _logger.debug(
195
- # f'Invalidating table version {tbl_version.id}:{tbl_version.effective_version} ({id(tbl_version):x})'
196
- # )
197
- tbl_version.is_validated = False
192
+ if cls._instance is not None:
193
+ # invalidate all existing instances to force reloading of metadata
194
+ for tbl_version in cls._instance._tbl_versions.values():
195
+ tbl_version.is_validated = False
198
196
  cls._instance = None
199
197
 
200
198
  def __init__(self) -> None:
@@ -2,13 +2,17 @@ from __future__ import annotations
2
2
 
3
3
  import enum
4
4
  import logging
5
- from typing import TYPE_CHECKING, Any, Literal, Optional, overload
5
+ from typing import TYPE_CHECKING, Any, Literal, Optional, Sequence, cast, overload
6
6
  from uuid import UUID
7
7
 
8
+ import pydantic
9
+ import pydantic_core
10
+
8
11
  import pixeltable as pxt
9
12
  from pixeltable import exceptions as excs, type_system as ts
10
13
  from pixeltable.env import Env
11
14
  from pixeltable.utils.filecache import FileCache
15
+ from pixeltable.utils.pydantic import is_json_convertible
12
16
 
13
17
  from .globals import MediaValidation
14
18
  from .table import Table
@@ -137,8 +141,24 @@ class InsertableTable(Table):
137
141
  from pixeltable.catalog import Catalog
138
142
  from pixeltable.io.table_data_conduit import UnkTableDataConduit
139
143
 
144
+ if source is not None and isinstance(source, Sequence) and len(source) == 0:
145
+ raise excs.Error('Cannot insert an empty sequence')
146
+ fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
147
+
140
148
  with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
141
149
  table = self
150
+
151
+ # TODO: unify with TableDataConduit
152
+ if source is not None and isinstance(source, Sequence) and isinstance(source[0], pydantic.BaseModel):
153
+ status = self._insert_pydantic(
154
+ cast(Sequence[pydantic.BaseModel], source), # needed for mypy
155
+ print_stats=print_stats,
156
+ fail_on_exception=fail_on_exception,
157
+ )
158
+ Env.get().console_logger.info(status.insert_msg)
159
+ FileCache.get().emit_eviction_warnings()
160
+ return status
161
+
142
162
  if source is None:
143
163
  source = [kwargs]
144
164
  kwargs = None
@@ -154,7 +174,6 @@ class InsertableTable(Table):
154
174
  data_source.add_table_info(table)
155
175
  data_source.prepare_for_insert_into_table()
156
176
 
157
- fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
158
177
  return table.insert_table_data_source(
159
178
  data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
160
179
  )
@@ -184,32 +203,110 @@ class InsertableTable(Table):
184
203
  FileCache.get().emit_eviction_warnings()
185
204
  return status
186
205
 
187
- def _validate_input_rows(self, rows: list[dict[str, Any]]) -> None:
188
- """Verify that the input rows match the table schema"""
189
- valid_col_names = set(self._get_schema().keys())
190
- reqd_col_names = set(self._tbl_version_path.tbl_version.get().get_required_col_names())
191
- computed_col_names = set(self._tbl_version_path.tbl_version.get().get_computed_col_names())
192
- for row in rows:
193
- assert isinstance(row, dict)
194
- col_names = set(row.keys())
195
- if len(reqd_col_names - col_names) > 0:
196
- raise excs.Error(f'Missing required column(s) ({", ".join(reqd_col_names - col_names)}) in row {row}')
197
-
198
- for col_name, val in row.items():
199
- if col_name not in valid_col_names:
200
- raise excs.Error(f'Unknown column name {col_name!r} in row {row}')
201
- if col_name in computed_col_names:
202
- raise excs.Error(f'Value for computed column {col_name!r} in row {row}')
203
-
204
- # validate data
205
- col = self._tbl_version_path.get_column(col_name)
206
- try:
207
- # basic sanity checks here
208
- checked_val = col.col_type.create_literal(val)
209
- row[col_name] = checked_val
210
- except TypeError as e:
211
- msg = str(e)
212
- raise excs.Error(f'Error in column {col.name}: {msg[0].lower() + msg[1:]}\nRow: {row}') from e
206
+ def _insert_pydantic(
207
+ self, rows: Sequence[pydantic.BaseModel], print_stats: bool = False, fail_on_exception: bool = True
208
+ ) -> UpdateStatus:
209
+ model_class = type(rows[0])
210
+ self._validate_pydantic_model(model_class)
211
+ # convert rows one-by-one in order to be able to print meaningful error messages
212
+ pxt_rows: list[dict[str, Any]] = []
213
+ for i, row in enumerate(rows):
214
+ try:
215
+ pxt_rows.append(row.model_dump(mode='json'))
216
+ except pydantic_core.PydanticSerializationError as e:
217
+ raise excs.Error(f'Row {i}: error serializing pydantic model to JSON:\n{e!s}') from e
218
+
219
+ # explicitly check that all required columns are present and non-None in the rows,
220
+ # because we ignore nullability when validating the pydantic model
221
+ reqd_col_names = [col.name for col in self._tbl_version_path.columns() if col.is_required_for_insert]
222
+ for i, pxt_row in enumerate(pxt_rows):
223
+ if type(rows[i]) is not model_class:
224
+ raise excs.Error(
225
+ f'Expected {model_class.__name__!r} instance, got {type(rows[i]).__name__!r} (in row {i})'
226
+ )
227
+ for col_name in reqd_col_names:
228
+ if pxt_row.get(col_name) is None:
229
+ raise excs.Error(f'Missing required column {col_name!r} in row {i}')
230
+
231
+ status = self._tbl_version.get().insert(
232
+ rows=pxt_rows, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
233
+ )
234
+ return status
235
+
236
+ def _validate_pydantic_model(self, model: type[pydantic.BaseModel]) -> None:
237
+ """
238
+ Check if a Pydantic model is compatible with this table for insert operations.
239
+
240
+ A model is compatible if:
241
+ - All required table columns have corresponding model fields with compatible types
242
+ - Model does not define fields for computed columns
243
+ - Model field types are compatible with table column types
244
+ """
245
+ assert isinstance(model, type) and issubclass(model, pydantic.BaseModel)
246
+
247
+ schema = self._get_schema()
248
+ required_cols = set(self._tbl_version.get().get_required_col_names())
249
+ computed_cols = set(self._tbl_version.get().get_computed_col_names())
250
+ model_fields = model.model_fields
251
+ model_field_names = set(model_fields.keys())
252
+
253
+ missing_required = required_cols - model_field_names
254
+ if missing_required:
255
+ raise excs.Error(
256
+ f'Pydantic model {model.__name__!r} is missing required columns: '
257
+ f'{", ".join(f"{col_name!r}" for col_name in missing_required)}'
258
+ )
259
+
260
+ computed_in_model = computed_cols & model_field_names
261
+ if computed_in_model:
262
+ raise excs.Error(
263
+ f'Pydantic model {model.__name__!r} has fields for computed columns: '
264
+ f'{", ".join(f"{col_name!r}" for col_name in computed_in_model)}'
265
+ )
266
+
267
+ # validate type compatibility
268
+ common_fields = model_field_names & set(schema.keys())
269
+ if len(common_fields) == 0:
270
+ raise excs.Error(
271
+ f'Pydantic model {model.__name__!r} has no fields that map to columns in table {self._name!r}'
272
+ )
273
+ for field_name in common_fields:
274
+ pxt_col_type = schema[field_name]
275
+ model_field = model_fields[field_name]
276
+ model_type = model_field.annotation
277
+
278
+ # we ignore nullability: we want to accept optional model fields for required table columns, as long as
279
+ # the model instances provide a non-null value
280
+ # allow_enum=True: model_dump(mode='json') converts enums to their values
281
+ inferred_pxt_type = ts.ColumnType.from_python_type(model_type, infer_pydantic_json=True)
282
+ if inferred_pxt_type is None:
283
+ raise excs.Error(
284
+ f'Pydantic model {model.__name__!r}: cannot infer Pixeltable type for column {field_name!r}'
285
+ )
286
+
287
+ if pxt_col_type.is_media_type():
288
+ # media types require file paths, either as str or Path
289
+ if not inferred_pxt_type.is_string_type():
290
+ raise excs.Error(
291
+ f"Column {field_name!r} requires a 'str' or 'Path' field in {model.__name__!r}, but it is "
292
+ f'{model_type.__name__!r}'
293
+ )
294
+ else:
295
+ if not pxt_col_type.is_supertype_of(inferred_pxt_type, ignore_nullable=True):
296
+ raise excs.Error(
297
+ f'Pydantic model {model.__name__!r} has incompatible type ({model_type.__name__}) '
298
+ f'for column {field_name!r} ({pxt_col_type})'
299
+ )
300
+
301
+ if (
302
+ isinstance(model_type, type)
303
+ and issubclass(model_type, pydantic.BaseModel)
304
+ and not is_json_convertible(model_type)
305
+ ):
306
+ raise excs.Error(
307
+ f'Pydantic model {model.__name__!r} has field {field_name!r} with nested model '
308
+ f'{model_type.__name__!r}, which is not JSON-convertible'
309
+ )
213
310
 
214
311
  def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
215
312
  """Delete rows in this table.
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import abc
4
4
  import builtins
5
+ import datetime
5
6
  import json
6
7
  import logging
7
8
  from keyword import iskeyword as is_python_keyword
@@ -9,7 +10,6 @@ from pathlib import Path
9
10
  from typing import TYPE_CHECKING, Any, ClassVar, Iterable, Literal, Optional, TypedDict, overload
10
11
 
11
12
  from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
12
- import datetime
13
13
  from uuid import UUID
14
14
 
15
15
  import pandas as pd
@@ -183,16 +183,14 @@ class Table(SchemaObject):
183
183
 
184
184
  return op()
185
185
 
186
- def _get_views(self, *, recursive: bool = True, include_snapshots: bool = True) -> list['Table']:
186
+ def _get_views(self, *, recursive: bool = True, mutable_only: bool = False) -> list['Table']:
187
187
  cat = catalog.Catalog.get()
188
188
  view_ids = cat.get_view_ids(self._id)
189
189
  views = [cat.get_table_by_id(id) for id in view_ids]
190
- if not include_snapshots:
191
- views = [t for t in views if not t._tbl_version_path.is_snapshot()]
190
+ if mutable_only:
191
+ views = [t for t in views if t._tbl_version_path.is_mutable()]
192
192
  if recursive:
193
- views.extend(
194
- t for view in views for t in view._get_views(recursive=True, include_snapshots=include_snapshots)
195
- )
193
+ views.extend(t for view in views for t in view._get_views(recursive=True, mutable_only=mutable_only))
196
194
  return views
197
195
 
198
196
  def _df(self) -> 'pxt.dataframe.DataFrame':
@@ -836,21 +834,25 @@ class Table(SchemaObject):
836
834
  if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
837
835
 
838
836
  if isinstance(column, str):
839
- col = self._tbl_version_path.get_column(column, include_bases=False)
837
+ col = self._tbl_version_path.get_column(column)
840
838
  if col is None:
841
839
  if if_not_exists_ == IfNotExistsParam.ERROR:
842
840
  raise excs.Error(f'Column {column!r} unknown')
843
841
  assert if_not_exists_ == IfNotExistsParam.IGNORE
844
842
  return
843
+ if col.tbl.id != self._tbl_version_path.tbl_id:
844
+ raise excs.Error(f'Cannot drop base table column {col.name!r}')
845
845
  col = self._tbl_version.get().cols_by_name[column]
846
846
  else:
847
- exists = self._tbl_version_path.has_column(column.col, include_bases=False)
847
+ exists = self._tbl_version_path.has_column(column.col)
848
848
  if not exists:
849
849
  if if_not_exists_ == IfNotExistsParam.ERROR:
850
850
  raise excs.Error(f'Unknown column: {column.col.qualified_name}')
851
851
  assert if_not_exists_ == IfNotExistsParam.IGNORE
852
852
  return
853
853
  col = column.col
854
+ if col.tbl.id != self._tbl_version_path.tbl_id:
855
+ raise excs.Error(f'Cannot drop base table column {col.name!r}')
854
856
 
855
857
  dependent_user_cols = [c for c in cat.get_column_dependents(col.tbl.id, col.id) if c.name is not None]
856
858
  if len(dependent_user_cols) > 0:
@@ -859,13 +861,32 @@ class Table(SchemaObject):
859
861
  f'{", ".join(c.name for c in dependent_user_cols)}'
860
862
  )
861
863
 
862
- _ = self._get_views(recursive=True, include_snapshots=False)
864
+ views = self._get_views(recursive=True, mutable_only=True)
865
+
866
+ # See if any view predicates depend on this column
867
+ dependent_views = []
868
+ for view in views:
869
+ if view._tbl_version is not None:
870
+ predicate = view._tbl_version.get().predicate
871
+ if predicate is not None:
872
+ for predicate_col in exprs.Expr.get_refd_column_ids(predicate.as_dict()):
873
+ if predicate_col.tbl_id == col.tbl.id and predicate_col.col_id == col.id:
874
+ dependent_views.append((view, predicate))
875
+
876
+ if len(dependent_views) > 0:
877
+ dependent_views_str = '\n'.join(
878
+ f'view: {view._path()}, predicate: {predicate!s}' for view, predicate in dependent_views
879
+ )
880
+ raise excs.Error(
881
+ f'Cannot drop column `{col.name}` because the following views depend on it:\n{dependent_views_str}'
882
+ )
883
+
863
884
  # See if this column has a dependent store. We need to look through all stores in all
864
885
  # (transitive) views of this table.
865
886
  col_handle = col.handle
866
887
  dependent_stores = [
867
888
  (view, store)
868
- for view in (self, *self._get_views(recursive=True, include_snapshots=False))
889
+ for view in (self, *views)
869
890
  for store in view._tbl_version.get().external_stores.values()
870
891
  if col_handle in store.get_local_columns()
871
892
  ]
@@ -878,6 +899,12 @@ class Table(SchemaObject):
878
899
  f'Cannot drop column `{col.name}` because the following external stores depend on it:\n'
879
900
  f'{", ".join(dependent_store_names)}'
880
901
  )
902
+ all_columns = self.columns()
903
+ if len(all_columns) == 1 and col.name == all_columns[0]:
904
+ raise excs.Error(
905
+ f'Cannot drop column `{col.name}` because it is the last remaining column in this table.'
906
+ f' Tables must have at least one column.'
907
+ )
881
908
 
882
909
  self._tbl_version.get().drop_column(col)
883
910
 
@@ -1108,11 +1135,11 @@ class Table(SchemaObject):
1108
1135
  """Resolve a column parameter to a Column object"""
1109
1136
  col: Column = None
1110
1137
  if isinstance(column, str):
1111
- col = self._tbl_version_path.get_column(column, include_bases=True)
1138
+ col = self._tbl_version_path.get_column(column)
1112
1139
  if col is None:
1113
1140
  raise excs.Error(f'Column {column!r} unknown')
1114
1141
  elif isinstance(column, ColumnRef):
1115
- exists = self._tbl_version_path.has_column(column.col, include_bases=True)
1142
+ exists = self._tbl_version_path.has_column(column.col)
1116
1143
  if not exists:
1117
1144
  raise excs.Error(f'Unknown column: {column.col.qualified_name}')
1118
1145
  col = column.col
@@ -1329,6 +1356,15 @@ class Table(SchemaObject):
1329
1356
  Insert rows from a CSV file:
1330
1357
 
1331
1358
  >>> tbl.insert(source='path/to/file.csv')
1359
+
1360
+ Insert Pydantic model instances into a table with two `pxt.Int` columns `a` and `b`:
1361
+
1362
+ >>> class MyModel(pydantic.BaseModel):
1363
+ ... a: int
1364
+ ... b: int
1365
+ ...
1366
+ ... models = [MyModel(a=1, b=2), MyModel(a=3, b=4)]
1367
+ ... tbl.insert(models)
1332
1368
  """
1333
1369
  raise NotImplementedError
1334
1370
 
@@ -1483,14 +1519,14 @@ class Table(SchemaObject):
1483
1519
  col_name: str
1484
1520
  col: Column
1485
1521
  if isinstance(column, str):
1486
- col = self._tbl_version_path.get_column(column, include_bases=True)
1522
+ col = self._tbl_version_path.get_column(column)
1487
1523
  if col is None:
1488
1524
  raise excs.Error(f'Unknown column: {column!r}')
1489
1525
  col_name = column
1490
1526
  else:
1491
1527
  assert isinstance(column, ColumnRef)
1492
1528
  col = column.col
1493
- if not self._tbl_version_path.has_column(col, include_bases=True):
1529
+ if not self._tbl_version_path.has_column(col):
1494
1530
  raise excs.Error(f'Unknown column: {col.name!r}')
1495
1531
  col_name = col.name
1496
1532
  if not col.is_computed:
@@ -327,7 +327,7 @@ class TableVersion:
327
327
  from .table_version_path import TableVersionPath
328
328
 
329
329
  # clear out any remaining media files from an aborted previous attempt
330
- MediaStore.delete(self.id)
330
+ MediaStore.get().delete(self.id)
331
331
  view_path = TableVersionPath.from_dict(op.load_view_op.view_path)
332
332
  plan, _ = Planner.create_view_load_plan(view_path)
333
333
  _, row_counts = self.store_tbl.insert_rows(plan, v_min=self.version)
@@ -374,7 +374,7 @@ class TableVersion:
374
374
  # if self.base.get().is_mutable:
375
375
  # self.base.get().mutable_views.remove(TableVersionHandle.create(self))
376
376
 
377
- MediaStore.delete(self.id)
377
+ MediaStore.get().delete(self.id)
378
378
  FileCache.get().clear(tbl_id=self.id)
379
379
  self.store_tbl.drop()
380
380
 
@@ -827,14 +827,17 @@ class TableVersion:
827
827
 
828
828
  def rename_column(self, old_name: str, new_name: str) -> None:
829
829
  """Rename a column."""
830
- assert self.is_mutable
831
- if old_name not in self.cols_by_name:
830
+ if not self.is_mutable:
831
+ raise excs.Error(f'Cannot rename column for immutable table {self.name!r}')
832
+ col = self.path.get_column(old_name)
833
+ if col is None:
832
834
  raise excs.Error(f'Unknown column: {old_name}')
835
+ if col.tbl.id != self.id:
836
+ raise excs.Error(f'Cannot rename base table column {col.name!r}')
833
837
  if not is_valid_identifier(new_name):
834
838
  raise excs.Error(f"Invalid column name: '{new_name}'")
835
839
  if new_name in self.cols_by_name:
836
840
  raise excs.Error(f'Column {new_name} already exists')
837
- col = self.cols_by_name[old_name]
838
841
  del self.cols_by_name[old_name]
839
842
  col.name = new_name
840
843
  self.cols_by_name[new_name] = col
@@ -1024,10 +1027,11 @@ class TableVersion:
1024
1027
  for el in val:
1025
1028
  assert isinstance(el, int)
1026
1029
  continue
1027
- col = self.path.get_column(col_name, include_bases=False)
1030
+ col = self.path.get_column(col_name)
1028
1031
  if col is None:
1029
- # TODO: return more informative error if this is trying to update a base column
1030
1032
  raise excs.Error(f'Column {col_name} unknown')
1033
+ if col.tbl.id != self.id:
1034
+ raise excs.Error(f'Column {col.name!r} is a base table column and cannot be updated')
1031
1035
  if col.is_computed:
1032
1036
  raise excs.Error(f'Column {col_name} is computed and cannot be updated')
1033
1037
  if col.is_pk and not allow_pk:
@@ -1235,7 +1239,7 @@ class TableVersion:
1235
1239
  )
1236
1240
 
1237
1241
  # delete newly-added data
1238
- MediaStore.delete(self.id, tbl_version=self.version)
1242
+ MediaStore.get().delete(self.id, tbl_version=self.version)
1239
1243
  conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
1240
1244
 
1241
1245
  # revert new deletions
@@ -184,13 +184,13 @@ class TableVersionPath:
184
184
  cols = self.columns()
185
185
  return {col.id: col for col in cols}
186
186
 
187
- def get_column(self, name: str, include_bases: Optional[bool] = None) -> Optional[Column]:
187
+ def get_column(self, name: str) -> Optional[Column]:
188
188
  """Return the column with the given name, or None if not found"""
189
189
  self.refresh_cached_md()
190
190
  col = self._cached_tbl_version.cols_by_name.get(name)
191
191
  if col is not None:
192
192
  return col
193
- elif self.base is not None and (include_bases or self._cached_tbl_version.include_base_columns):
193
+ elif self.base is not None and self._cached_tbl_version.include_base_columns:
194
194
  return self.base.get_column(name)
195
195
  else:
196
196
  return None
@@ -206,10 +206,11 @@ class TableVersionPath:
206
206
  else:
207
207
  return None
208
208
 
209
- def has_column(self, col: Column, include_bases: bool = True) -> bool:
209
+ def has_column(self, col: Column) -> bool:
210
210
  """Return True if this table has the given column."""
211
- self.refresh_cached_md()
212
211
  assert col.tbl is not None
212
+ self.refresh_cached_md()
213
+
213
214
  if (
214
215
  col.tbl.id == self.tbl_version.id
215
216
  and col.tbl.effective_version == self.tbl_version.effective_version
@@ -217,7 +218,7 @@ class TableVersionPath:
217
218
  ):
218
219
  # the column is visible in this table version
219
220
  return True
220
- elif self.base is not None and include_bases:
221
+ elif self.base is not None:
221
222
  return self.base.has_column(col)
222
223
  else:
223
224
  return False
pixeltable/config.py CHANGED
@@ -111,10 +111,19 @@ class Config:
111
111
  return default
112
112
 
113
113
  def get_value(self, key: str, expected_type: type[T], section: str = 'pixeltable') -> Optional[T]:
114
- value = self.lookup_env(section, key) # Try to get from environment first
114
+ value: Any = self.lookup_env(section, key) # Try to get from environment first
115
115
  # Next try the config file
116
- if value is None and section in self.__config_dict and key in self.__config_dict[section]:
117
- value = self.__config_dict[section][key]
116
+ if value is None:
117
+ # Resolve nested section dicts
118
+ lookup_elems = [*section.split('.'), key]
119
+ value = self.__config_dict
120
+ for el in lookup_elems:
121
+ if isinstance(value, dict):
122
+ if el not in value:
123
+ return None
124
+ value = value[el]
125
+ else:
126
+ return None
118
127
 
119
128
  if value is None:
120
129
  return None # Not specified
@@ -155,19 +164,26 @@ KNOWN_CONFIG_OPTIONS = {
155
164
  },
156
165
  'anthropic': {'api_key': 'Anthropic API key'},
157
166
  'bedrock': {'api_key': 'AWS Bedrock API key'},
158
- 'deepseek': {'api_key': 'Deepseek API key'},
159
- 'fireworks': {'api_key': 'Fireworks API key'},
160
- 'gemini': {'api_key': 'Gemini API key'},
161
- 'groq': {'api_key': 'Groq API key'},
167
+ 'deepseek': {'api_key': 'Deepseek API key', 'rate_limit': 'Rate limit for Deepseek API requests'},
168
+ 'fireworks': {'api_key': 'Fireworks API key', 'rate_limit': 'Rate limit for Fireworks API requests'},
169
+ 'gemini': {'api_key': 'Gemini API key', 'rate_limits': 'Per-model rate limits for Gemini API requests'},
170
+ 'hf': {'auth_token': 'Hugging Face access token'},
171
+ 'imagen': {'rate_limits': 'Per-model rate limits for Imagen API requests'},
172
+ 'veo': {'rate_limits': 'Per-model rate limits for Veo API requests'},
173
+ 'groq': {'api_key': 'Groq API key', 'rate_limit': 'Rate limit for Groq API requests'},
162
174
  'label_studio': {'api_key': 'Label Studio API key', 'url': 'Label Studio server URL'},
163
- 'mistral': {'api_key': 'Mistral API key'},
175
+ 'mistral': {'api_key': 'Mistral API key', 'rate_limit': 'Rate limit for Mistral API requests'},
164
176
  'openai': {
165
177
  'api_key': 'OpenAI API key',
166
178
  'base_url': 'OpenAI API base URL',
167
179
  'api_version': 'API version if using Azure OpenAI',
180
+ 'rate_limits': 'Per-model rate limits for OpenAI API requests',
168
181
  },
169
182
  'replicate': {'api_token': 'Replicate API token'},
170
- 'together': {'api_key': 'Together API key'},
183
+ 'together': {
184
+ 'api_key': 'Together API key',
185
+ 'rate_limits': 'Per-model category rate limits for Together API requests',
186
+ },
171
187
  'pypi': {'api_key': 'PyPI API key (for internal use only)'},
172
188
  }
173
189
 
pixeltable/dataframe.py CHANGED
@@ -795,19 +795,19 @@ class DataFrame:
795
795
  assert len(col_refs) > 0 and len(joined_tbls) >= 2
796
796
  for col_ref in col_refs:
797
797
  # identify the referenced column by name in 'other'
798
- rhs_col = other.get_column(col_ref.col.name, include_bases=True)
798
+ rhs_col = other.get_column(col_ref.col.name)
799
799
  if rhs_col is None:
800
800
  raise excs.Error(f"'on': column {col_ref.col.name!r} not found in joined table")
801
801
  rhs_col_ref = exprs.ColumnRef(rhs_col)
802
802
 
803
803
  lhs_col_ref: Optional[exprs.ColumnRef] = None
804
- if any(tbl.has_column(col_ref.col, include_bases=True) for tbl in self._from_clause.tbls):
804
+ if any(tbl.has_column(col_ref.col) for tbl in self._from_clause.tbls):
805
805
  # col_ref comes from the existing from_clause, we use that directly
806
806
  lhs_col_ref = col_ref
807
807
  else:
808
808
  # col_ref comes from other, we need to look for a match in the existing from_clause by name
809
809
  for tbl in self._from_clause.tbls:
810
- col = tbl.get_column(col_ref.col.name, include_bases=True)
810
+ col = tbl.get_column(col_ref.col.name)
811
811
  if col is None:
812
812
  continue
813
813
  if lhs_col_ref is not None: