pixeltable 0.4.7__py3-none-any.whl → 0.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/catalog/catalog.py +4 -6
- pixeltable/catalog/insertable_table.py +125 -28
- pixeltable/catalog/table.py +51 -15
- pixeltable/catalog/table_version.py +12 -8
- pixeltable/catalog/table_version_path.py +6 -5
- pixeltable/config.py +25 -9
- pixeltable/dataframe.py +3 -3
- pixeltable/env.py +89 -20
- pixeltable/exec/aggregation_node.py +1 -1
- pixeltable/exec/cache_prefetch_node.py +4 -3
- pixeltable/exec/exec_node.py +0 -8
- pixeltable/exec/expr_eval/globals.py +1 -0
- pixeltable/exec/expr_eval/schedulers.py +16 -4
- pixeltable/exec/in_memory_data_node.py +2 -3
- pixeltable/exprs/data_row.py +5 -5
- pixeltable/exprs/function_call.py +59 -21
- pixeltable/exprs/row_builder.py +11 -5
- pixeltable/func/expr_template_function.py +6 -3
- pixeltable/functions/__init__.py +2 -0
- pixeltable/functions/anthropic.py +1 -2
- pixeltable/functions/deepseek.py +5 -1
- pixeltable/functions/gemini.py +11 -2
- pixeltable/functions/huggingface.py +6 -12
- pixeltable/functions/openai.py +2 -1
- pixeltable/functions/video.py +5 -5
- pixeltable/functions/whisperx.py +177 -0
- pixeltable/{ext/functions → functions}/yolox.py +0 -4
- pixeltable/globals.py +16 -3
- pixeltable/io/fiftyone.py +3 -3
- pixeltable/io/label_studio.py +2 -1
- pixeltable/iterators/audio.py +3 -2
- pixeltable/iterators/document.py +0 -6
- pixeltable/metadata/__init__.py +3 -1
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +0 -16
- pixeltable/share/packager.py +6 -6
- pixeltable/share/publish.py +134 -7
- pixeltable/type_system.py +20 -4
- pixeltable/utils/media_store.py +131 -66
- pixeltable/utils/pydantic.py +60 -0
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/METADATA +186 -121
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/RECORD +47 -46
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.7.dist-info → pixeltable-0.4.9.dist-info}/licenses/LICENSE +0 -0
pixeltable/__init__.py
CHANGED
|
@@ -30,7 +30,7 @@ from .globals import (
|
|
|
30
30
|
from .type_system import Array, Audio, Bool, Date, Document, Float, Image, Int, Json, Required, String, Timestamp, Video
|
|
31
31
|
|
|
32
32
|
# This import must go last to avoid circular imports.
|
|
33
|
-
from . import
|
|
33
|
+
from . import functions, io, iterators # isort: skip
|
|
34
34
|
|
|
35
35
|
# This is the safest / most maintainable way to construct __all__: start with the default and "blacklist"
|
|
36
36
|
# stuff that we don't want in there. (Using a "whitelist" is considerably harder to maintain.)
|
pixeltable/catalog/catalog.py
CHANGED
|
@@ -189,12 +189,10 @@ class Catalog:
|
|
|
189
189
|
@classmethod
|
|
190
190
|
def clear(cls) -> None:
|
|
191
191
|
"""Remove the instance. Used for testing."""
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
# )
|
|
197
|
-
tbl_version.is_validated = False
|
|
192
|
+
if cls._instance is not None:
|
|
193
|
+
# invalidate all existing instances to force reloading of metadata
|
|
194
|
+
for tbl_version in cls._instance._tbl_versions.values():
|
|
195
|
+
tbl_version.is_validated = False
|
|
198
196
|
cls._instance = None
|
|
199
197
|
|
|
200
198
|
def __init__(self) -> None:
|
|
@@ -2,13 +2,17 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import enum
|
|
4
4
|
import logging
|
|
5
|
-
from typing import TYPE_CHECKING, Any, Literal, Optional, overload
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Literal, Optional, Sequence, cast, overload
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
8
|
+
import pydantic
|
|
9
|
+
import pydantic_core
|
|
10
|
+
|
|
8
11
|
import pixeltable as pxt
|
|
9
12
|
from pixeltable import exceptions as excs, type_system as ts
|
|
10
13
|
from pixeltable.env import Env
|
|
11
14
|
from pixeltable.utils.filecache import FileCache
|
|
15
|
+
from pixeltable.utils.pydantic import is_json_convertible
|
|
12
16
|
|
|
13
17
|
from .globals import MediaValidation
|
|
14
18
|
from .table import Table
|
|
@@ -137,8 +141,24 @@ class InsertableTable(Table):
|
|
|
137
141
|
from pixeltable.catalog import Catalog
|
|
138
142
|
from pixeltable.io.table_data_conduit import UnkTableDataConduit
|
|
139
143
|
|
|
144
|
+
if source is not None and isinstance(source, Sequence) and len(source) == 0:
|
|
145
|
+
raise excs.Error('Cannot insert an empty sequence')
|
|
146
|
+
fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
|
|
147
|
+
|
|
140
148
|
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
141
149
|
table = self
|
|
150
|
+
|
|
151
|
+
# TODO: unify with TableDataConduit
|
|
152
|
+
if source is not None and isinstance(source, Sequence) and isinstance(source[0], pydantic.BaseModel):
|
|
153
|
+
status = self._insert_pydantic(
|
|
154
|
+
cast(Sequence[pydantic.BaseModel], source), # needed for mypy
|
|
155
|
+
print_stats=print_stats,
|
|
156
|
+
fail_on_exception=fail_on_exception,
|
|
157
|
+
)
|
|
158
|
+
Env.get().console_logger.info(status.insert_msg)
|
|
159
|
+
FileCache.get().emit_eviction_warnings()
|
|
160
|
+
return status
|
|
161
|
+
|
|
142
162
|
if source is None:
|
|
143
163
|
source = [kwargs]
|
|
144
164
|
kwargs = None
|
|
@@ -154,7 +174,6 @@ class InsertableTable(Table):
|
|
|
154
174
|
data_source.add_table_info(table)
|
|
155
175
|
data_source.prepare_for_insert_into_table()
|
|
156
176
|
|
|
157
|
-
fail_on_exception = OnErrorParameter.fail_on_exception(on_error)
|
|
158
177
|
return table.insert_table_data_source(
|
|
159
178
|
data_source=data_source, fail_on_exception=fail_on_exception, print_stats=print_stats
|
|
160
179
|
)
|
|
@@ -184,32 +203,110 @@ class InsertableTable(Table):
|
|
|
184
203
|
FileCache.get().emit_eviction_warnings()
|
|
185
204
|
return status
|
|
186
205
|
|
|
187
|
-
def
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
206
|
+
def _insert_pydantic(
|
|
207
|
+
self, rows: Sequence[pydantic.BaseModel], print_stats: bool = False, fail_on_exception: bool = True
|
|
208
|
+
) -> UpdateStatus:
|
|
209
|
+
model_class = type(rows[0])
|
|
210
|
+
self._validate_pydantic_model(model_class)
|
|
211
|
+
# convert rows one-by-one in order to be able to print meaningful error messages
|
|
212
|
+
pxt_rows: list[dict[str, Any]] = []
|
|
213
|
+
for i, row in enumerate(rows):
|
|
214
|
+
try:
|
|
215
|
+
pxt_rows.append(row.model_dump(mode='json'))
|
|
216
|
+
except pydantic_core.PydanticSerializationError as e:
|
|
217
|
+
raise excs.Error(f'Row {i}: error serializing pydantic model to JSON:\n{e!s}') from e
|
|
218
|
+
|
|
219
|
+
# explicitly check that all required columns are present and non-None in the rows,
|
|
220
|
+
# because we ignore nullability when validating the pydantic model
|
|
221
|
+
reqd_col_names = [col.name for col in self._tbl_version_path.columns() if col.is_required_for_insert]
|
|
222
|
+
for i, pxt_row in enumerate(pxt_rows):
|
|
223
|
+
if type(rows[i]) is not model_class:
|
|
224
|
+
raise excs.Error(
|
|
225
|
+
f'Expected {model_class.__name__!r} instance, got {type(rows[i]).__name__!r} (in row {i})'
|
|
226
|
+
)
|
|
227
|
+
for col_name in reqd_col_names:
|
|
228
|
+
if pxt_row.get(col_name) is None:
|
|
229
|
+
raise excs.Error(f'Missing required column {col_name!r} in row {i}')
|
|
230
|
+
|
|
231
|
+
status = self._tbl_version.get().insert(
|
|
232
|
+
rows=pxt_rows, df=None, print_stats=print_stats, fail_on_exception=fail_on_exception
|
|
233
|
+
)
|
|
234
|
+
return status
|
|
235
|
+
|
|
236
|
+
def _validate_pydantic_model(self, model: type[pydantic.BaseModel]) -> None:
|
|
237
|
+
"""
|
|
238
|
+
Check if a Pydantic model is compatible with this table for insert operations.
|
|
239
|
+
|
|
240
|
+
A model is compatible if:
|
|
241
|
+
- All required table columns have corresponding model fields with compatible types
|
|
242
|
+
- Model does not define fields for computed columns
|
|
243
|
+
- Model field types are compatible with table column types
|
|
244
|
+
"""
|
|
245
|
+
assert isinstance(model, type) and issubclass(model, pydantic.BaseModel)
|
|
246
|
+
|
|
247
|
+
schema = self._get_schema()
|
|
248
|
+
required_cols = set(self._tbl_version.get().get_required_col_names())
|
|
249
|
+
computed_cols = set(self._tbl_version.get().get_computed_col_names())
|
|
250
|
+
model_fields = model.model_fields
|
|
251
|
+
model_field_names = set(model_fields.keys())
|
|
252
|
+
|
|
253
|
+
missing_required = required_cols - model_field_names
|
|
254
|
+
if missing_required:
|
|
255
|
+
raise excs.Error(
|
|
256
|
+
f'Pydantic model {model.__name__!r} is missing required columns: '
|
|
257
|
+
f'{", ".join(f"{col_name!r}" for col_name in missing_required)}'
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
computed_in_model = computed_cols & model_field_names
|
|
261
|
+
if computed_in_model:
|
|
262
|
+
raise excs.Error(
|
|
263
|
+
f'Pydantic model {model.__name__!r} has fields for computed columns: '
|
|
264
|
+
f'{", ".join(f"{col_name!r}" for col_name in computed_in_model)}'
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# validate type compatibility
|
|
268
|
+
common_fields = model_field_names & set(schema.keys())
|
|
269
|
+
if len(common_fields) == 0:
|
|
270
|
+
raise excs.Error(
|
|
271
|
+
f'Pydantic model {model.__name__!r} has no fields that map to columns in table {self._name!r}'
|
|
272
|
+
)
|
|
273
|
+
for field_name in common_fields:
|
|
274
|
+
pxt_col_type = schema[field_name]
|
|
275
|
+
model_field = model_fields[field_name]
|
|
276
|
+
model_type = model_field.annotation
|
|
277
|
+
|
|
278
|
+
# we ignore nullability: we want to accept optional model fields for required table columns, as long as
|
|
279
|
+
# the model instances provide a non-null value
|
|
280
|
+
# allow_enum=True: model_dump(mode='json') converts enums to their values
|
|
281
|
+
inferred_pxt_type = ts.ColumnType.from_python_type(model_type, infer_pydantic_json=True)
|
|
282
|
+
if inferred_pxt_type is None:
|
|
283
|
+
raise excs.Error(
|
|
284
|
+
f'Pydantic model {model.__name__!r}: cannot infer Pixeltable type for column {field_name!r}'
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
if pxt_col_type.is_media_type():
|
|
288
|
+
# media types require file paths, either as str or Path
|
|
289
|
+
if not inferred_pxt_type.is_string_type():
|
|
290
|
+
raise excs.Error(
|
|
291
|
+
f"Column {field_name!r} requires a 'str' or 'Path' field in {model.__name__!r}, but it is "
|
|
292
|
+
f'{model_type.__name__!r}'
|
|
293
|
+
)
|
|
294
|
+
else:
|
|
295
|
+
if not pxt_col_type.is_supertype_of(inferred_pxt_type, ignore_nullable=True):
|
|
296
|
+
raise excs.Error(
|
|
297
|
+
f'Pydantic model {model.__name__!r} has incompatible type ({model_type.__name__}) '
|
|
298
|
+
f'for column {field_name!r} ({pxt_col_type})'
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
if (
|
|
302
|
+
isinstance(model_type, type)
|
|
303
|
+
and issubclass(model_type, pydantic.BaseModel)
|
|
304
|
+
and not is_json_convertible(model_type)
|
|
305
|
+
):
|
|
306
|
+
raise excs.Error(
|
|
307
|
+
f'Pydantic model {model.__name__!r} has field {field_name!r} with nested model '
|
|
308
|
+
f'{model_type.__name__!r}, which is not JSON-convertible'
|
|
309
|
+
)
|
|
213
310
|
|
|
214
311
|
def delete(self, where: Optional['exprs.Expr'] = None) -> UpdateStatus:
|
|
215
312
|
"""Delete rows in this table.
|
pixeltable/catalog/table.py
CHANGED
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
4
|
import builtins
|
|
5
|
+
import datetime
|
|
5
6
|
import json
|
|
6
7
|
import logging
|
|
7
8
|
from keyword import iskeyword as is_python_keyword
|
|
@@ -9,7 +10,6 @@ from pathlib import Path
|
|
|
9
10
|
from typing import TYPE_CHECKING, Any, ClassVar, Iterable, Literal, Optional, TypedDict, overload
|
|
10
11
|
|
|
11
12
|
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
12
|
-
import datetime
|
|
13
13
|
from uuid import UUID
|
|
14
14
|
|
|
15
15
|
import pandas as pd
|
|
@@ -183,16 +183,14 @@ class Table(SchemaObject):
|
|
|
183
183
|
|
|
184
184
|
return op()
|
|
185
185
|
|
|
186
|
-
def _get_views(self, *, recursive: bool = True,
|
|
186
|
+
def _get_views(self, *, recursive: bool = True, mutable_only: bool = False) -> list['Table']:
|
|
187
187
|
cat = catalog.Catalog.get()
|
|
188
188
|
view_ids = cat.get_view_ids(self._id)
|
|
189
189
|
views = [cat.get_table_by_id(id) for id in view_ids]
|
|
190
|
-
if
|
|
191
|
-
views = [t for t in views if
|
|
190
|
+
if mutable_only:
|
|
191
|
+
views = [t for t in views if t._tbl_version_path.is_mutable()]
|
|
192
192
|
if recursive:
|
|
193
|
-
views.extend(
|
|
194
|
-
t for view in views for t in view._get_views(recursive=True, include_snapshots=include_snapshots)
|
|
195
|
-
)
|
|
193
|
+
views.extend(t for view in views for t in view._get_views(recursive=True, mutable_only=mutable_only))
|
|
196
194
|
return views
|
|
197
195
|
|
|
198
196
|
def _df(self) -> 'pxt.dataframe.DataFrame':
|
|
@@ -836,21 +834,25 @@ class Table(SchemaObject):
|
|
|
836
834
|
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
837
835
|
|
|
838
836
|
if isinstance(column, str):
|
|
839
|
-
col = self._tbl_version_path.get_column(column
|
|
837
|
+
col = self._tbl_version_path.get_column(column)
|
|
840
838
|
if col is None:
|
|
841
839
|
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
842
840
|
raise excs.Error(f'Column {column!r} unknown')
|
|
843
841
|
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
844
842
|
return
|
|
843
|
+
if col.tbl.id != self._tbl_version_path.tbl_id:
|
|
844
|
+
raise excs.Error(f'Cannot drop base table column {col.name!r}')
|
|
845
845
|
col = self._tbl_version.get().cols_by_name[column]
|
|
846
846
|
else:
|
|
847
|
-
exists = self._tbl_version_path.has_column(column.col
|
|
847
|
+
exists = self._tbl_version_path.has_column(column.col)
|
|
848
848
|
if not exists:
|
|
849
849
|
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
850
850
|
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
851
851
|
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
852
852
|
return
|
|
853
853
|
col = column.col
|
|
854
|
+
if col.tbl.id != self._tbl_version_path.tbl_id:
|
|
855
|
+
raise excs.Error(f'Cannot drop base table column {col.name!r}')
|
|
854
856
|
|
|
855
857
|
dependent_user_cols = [c for c in cat.get_column_dependents(col.tbl.id, col.id) if c.name is not None]
|
|
856
858
|
if len(dependent_user_cols) > 0:
|
|
@@ -859,13 +861,32 @@ class Table(SchemaObject):
|
|
|
859
861
|
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
860
862
|
)
|
|
861
863
|
|
|
862
|
-
|
|
864
|
+
views = self._get_views(recursive=True, mutable_only=True)
|
|
865
|
+
|
|
866
|
+
# See if any view predicates depend on this column
|
|
867
|
+
dependent_views = []
|
|
868
|
+
for view in views:
|
|
869
|
+
if view._tbl_version is not None:
|
|
870
|
+
predicate = view._tbl_version.get().predicate
|
|
871
|
+
if predicate is not None:
|
|
872
|
+
for predicate_col in exprs.Expr.get_refd_column_ids(predicate.as_dict()):
|
|
873
|
+
if predicate_col.tbl_id == col.tbl.id and predicate_col.col_id == col.id:
|
|
874
|
+
dependent_views.append((view, predicate))
|
|
875
|
+
|
|
876
|
+
if len(dependent_views) > 0:
|
|
877
|
+
dependent_views_str = '\n'.join(
|
|
878
|
+
f'view: {view._path()}, predicate: {predicate!s}' for view, predicate in dependent_views
|
|
879
|
+
)
|
|
880
|
+
raise excs.Error(
|
|
881
|
+
f'Cannot drop column `{col.name}` because the following views depend on it:\n{dependent_views_str}'
|
|
882
|
+
)
|
|
883
|
+
|
|
863
884
|
# See if this column has a dependent store. We need to look through all stores in all
|
|
864
885
|
# (transitive) views of this table.
|
|
865
886
|
col_handle = col.handle
|
|
866
887
|
dependent_stores = [
|
|
867
888
|
(view, store)
|
|
868
|
-
for view in (self, *
|
|
889
|
+
for view in (self, *views)
|
|
869
890
|
for store in view._tbl_version.get().external_stores.values()
|
|
870
891
|
if col_handle in store.get_local_columns()
|
|
871
892
|
]
|
|
@@ -878,6 +899,12 @@ class Table(SchemaObject):
|
|
|
878
899
|
f'Cannot drop column `{col.name}` because the following external stores depend on it:\n'
|
|
879
900
|
f'{", ".join(dependent_store_names)}'
|
|
880
901
|
)
|
|
902
|
+
all_columns = self.columns()
|
|
903
|
+
if len(all_columns) == 1 and col.name == all_columns[0]:
|
|
904
|
+
raise excs.Error(
|
|
905
|
+
f'Cannot drop column `{col.name}` because it is the last remaining column in this table.'
|
|
906
|
+
f' Tables must have at least one column.'
|
|
907
|
+
)
|
|
881
908
|
|
|
882
909
|
self._tbl_version.get().drop_column(col)
|
|
883
910
|
|
|
@@ -1108,11 +1135,11 @@ class Table(SchemaObject):
|
|
|
1108
1135
|
"""Resolve a column parameter to a Column object"""
|
|
1109
1136
|
col: Column = None
|
|
1110
1137
|
if isinstance(column, str):
|
|
1111
|
-
col = self._tbl_version_path.get_column(column
|
|
1138
|
+
col = self._tbl_version_path.get_column(column)
|
|
1112
1139
|
if col is None:
|
|
1113
1140
|
raise excs.Error(f'Column {column!r} unknown')
|
|
1114
1141
|
elif isinstance(column, ColumnRef):
|
|
1115
|
-
exists = self._tbl_version_path.has_column(column.col
|
|
1142
|
+
exists = self._tbl_version_path.has_column(column.col)
|
|
1116
1143
|
if not exists:
|
|
1117
1144
|
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
1118
1145
|
col = column.col
|
|
@@ -1329,6 +1356,15 @@ class Table(SchemaObject):
|
|
|
1329
1356
|
Insert rows from a CSV file:
|
|
1330
1357
|
|
|
1331
1358
|
>>> tbl.insert(source='path/to/file.csv')
|
|
1359
|
+
|
|
1360
|
+
Insert Pydantic model instances into a table with two `pxt.Int` columns `a` and `b`:
|
|
1361
|
+
|
|
1362
|
+
>>> class MyModel(pydantic.BaseModel):
|
|
1363
|
+
... a: int
|
|
1364
|
+
... b: int
|
|
1365
|
+
...
|
|
1366
|
+
... models = [MyModel(a=1, b=2), MyModel(a=3, b=4)]
|
|
1367
|
+
... tbl.insert(models)
|
|
1332
1368
|
"""
|
|
1333
1369
|
raise NotImplementedError
|
|
1334
1370
|
|
|
@@ -1483,14 +1519,14 @@ class Table(SchemaObject):
|
|
|
1483
1519
|
col_name: str
|
|
1484
1520
|
col: Column
|
|
1485
1521
|
if isinstance(column, str):
|
|
1486
|
-
col = self._tbl_version_path.get_column(column
|
|
1522
|
+
col = self._tbl_version_path.get_column(column)
|
|
1487
1523
|
if col is None:
|
|
1488
1524
|
raise excs.Error(f'Unknown column: {column!r}')
|
|
1489
1525
|
col_name = column
|
|
1490
1526
|
else:
|
|
1491
1527
|
assert isinstance(column, ColumnRef)
|
|
1492
1528
|
col = column.col
|
|
1493
|
-
if not self._tbl_version_path.has_column(col
|
|
1529
|
+
if not self._tbl_version_path.has_column(col):
|
|
1494
1530
|
raise excs.Error(f'Unknown column: {col.name!r}')
|
|
1495
1531
|
col_name = col.name
|
|
1496
1532
|
if not col.is_computed:
|
|
@@ -327,7 +327,7 @@ class TableVersion:
|
|
|
327
327
|
from .table_version_path import TableVersionPath
|
|
328
328
|
|
|
329
329
|
# clear out any remaining media files from an aborted previous attempt
|
|
330
|
-
MediaStore.delete(self.id)
|
|
330
|
+
MediaStore.get().delete(self.id)
|
|
331
331
|
view_path = TableVersionPath.from_dict(op.load_view_op.view_path)
|
|
332
332
|
plan, _ = Planner.create_view_load_plan(view_path)
|
|
333
333
|
_, row_counts = self.store_tbl.insert_rows(plan, v_min=self.version)
|
|
@@ -374,7 +374,7 @@ class TableVersion:
|
|
|
374
374
|
# if self.base.get().is_mutable:
|
|
375
375
|
# self.base.get().mutable_views.remove(TableVersionHandle.create(self))
|
|
376
376
|
|
|
377
|
-
MediaStore.delete(self.id)
|
|
377
|
+
MediaStore.get().delete(self.id)
|
|
378
378
|
FileCache.get().clear(tbl_id=self.id)
|
|
379
379
|
self.store_tbl.drop()
|
|
380
380
|
|
|
@@ -827,14 +827,17 @@ class TableVersion:
|
|
|
827
827
|
|
|
828
828
|
def rename_column(self, old_name: str, new_name: str) -> None:
|
|
829
829
|
"""Rename a column."""
|
|
830
|
-
|
|
831
|
-
|
|
830
|
+
if not self.is_mutable:
|
|
831
|
+
raise excs.Error(f'Cannot rename column for immutable table {self.name!r}')
|
|
832
|
+
col = self.path.get_column(old_name)
|
|
833
|
+
if col is None:
|
|
832
834
|
raise excs.Error(f'Unknown column: {old_name}')
|
|
835
|
+
if col.tbl.id != self.id:
|
|
836
|
+
raise excs.Error(f'Cannot rename base table column {col.name!r}')
|
|
833
837
|
if not is_valid_identifier(new_name):
|
|
834
838
|
raise excs.Error(f"Invalid column name: '{new_name}'")
|
|
835
839
|
if new_name in self.cols_by_name:
|
|
836
840
|
raise excs.Error(f'Column {new_name} already exists')
|
|
837
|
-
col = self.cols_by_name[old_name]
|
|
838
841
|
del self.cols_by_name[old_name]
|
|
839
842
|
col.name = new_name
|
|
840
843
|
self.cols_by_name[new_name] = col
|
|
@@ -1024,10 +1027,11 @@ class TableVersion:
|
|
|
1024
1027
|
for el in val:
|
|
1025
1028
|
assert isinstance(el, int)
|
|
1026
1029
|
continue
|
|
1027
|
-
col = self.path.get_column(col_name
|
|
1030
|
+
col = self.path.get_column(col_name)
|
|
1028
1031
|
if col is None:
|
|
1029
|
-
# TODO: return more informative error if this is trying to update a base column
|
|
1030
1032
|
raise excs.Error(f'Column {col_name} unknown')
|
|
1033
|
+
if col.tbl.id != self.id:
|
|
1034
|
+
raise excs.Error(f'Column {col.name!r} is a base table column and cannot be updated')
|
|
1031
1035
|
if col.is_computed:
|
|
1032
1036
|
raise excs.Error(f'Column {col_name} is computed and cannot be updated')
|
|
1033
1037
|
if col.is_pk and not allow_pk:
|
|
@@ -1235,7 +1239,7 @@ class TableVersion:
|
|
|
1235
1239
|
)
|
|
1236
1240
|
|
|
1237
1241
|
# delete newly-added data
|
|
1238
|
-
MediaStore.delete(self.id, tbl_version=self.version)
|
|
1242
|
+
MediaStore.get().delete(self.id, tbl_version=self.version)
|
|
1239
1243
|
conn.execute(sql.delete(self.store_tbl.sa_tbl).where(self.store_tbl.sa_tbl.c.v_min == self.version))
|
|
1240
1244
|
|
|
1241
1245
|
# revert new deletions
|
|
@@ -184,13 +184,13 @@ class TableVersionPath:
|
|
|
184
184
|
cols = self.columns()
|
|
185
185
|
return {col.id: col for col in cols}
|
|
186
186
|
|
|
187
|
-
def get_column(self, name: str
|
|
187
|
+
def get_column(self, name: str) -> Optional[Column]:
|
|
188
188
|
"""Return the column with the given name, or None if not found"""
|
|
189
189
|
self.refresh_cached_md()
|
|
190
190
|
col = self._cached_tbl_version.cols_by_name.get(name)
|
|
191
191
|
if col is not None:
|
|
192
192
|
return col
|
|
193
|
-
elif self.base is not None and
|
|
193
|
+
elif self.base is not None and self._cached_tbl_version.include_base_columns:
|
|
194
194
|
return self.base.get_column(name)
|
|
195
195
|
else:
|
|
196
196
|
return None
|
|
@@ -206,10 +206,11 @@ class TableVersionPath:
|
|
|
206
206
|
else:
|
|
207
207
|
return None
|
|
208
208
|
|
|
209
|
-
def has_column(self, col: Column
|
|
209
|
+
def has_column(self, col: Column) -> bool:
|
|
210
210
|
"""Return True if this table has the given column."""
|
|
211
|
-
self.refresh_cached_md()
|
|
212
211
|
assert col.tbl is not None
|
|
212
|
+
self.refresh_cached_md()
|
|
213
|
+
|
|
213
214
|
if (
|
|
214
215
|
col.tbl.id == self.tbl_version.id
|
|
215
216
|
and col.tbl.effective_version == self.tbl_version.effective_version
|
|
@@ -217,7 +218,7 @@ class TableVersionPath:
|
|
|
217
218
|
):
|
|
218
219
|
# the column is visible in this table version
|
|
219
220
|
return True
|
|
220
|
-
elif self.base is not None
|
|
221
|
+
elif self.base is not None:
|
|
221
222
|
return self.base.has_column(col)
|
|
222
223
|
else:
|
|
223
224
|
return False
|
pixeltable/config.py
CHANGED
|
@@ -111,10 +111,19 @@ class Config:
|
|
|
111
111
|
return default
|
|
112
112
|
|
|
113
113
|
def get_value(self, key: str, expected_type: type[T], section: str = 'pixeltable') -> Optional[T]:
|
|
114
|
-
value = self.lookup_env(section, key) # Try to get from environment first
|
|
114
|
+
value: Any = self.lookup_env(section, key) # Try to get from environment first
|
|
115
115
|
# Next try the config file
|
|
116
|
-
if value is None
|
|
117
|
-
|
|
116
|
+
if value is None:
|
|
117
|
+
# Resolve nested section dicts
|
|
118
|
+
lookup_elems = [*section.split('.'), key]
|
|
119
|
+
value = self.__config_dict
|
|
120
|
+
for el in lookup_elems:
|
|
121
|
+
if isinstance(value, dict):
|
|
122
|
+
if el not in value:
|
|
123
|
+
return None
|
|
124
|
+
value = value[el]
|
|
125
|
+
else:
|
|
126
|
+
return None
|
|
118
127
|
|
|
119
128
|
if value is None:
|
|
120
129
|
return None # Not specified
|
|
@@ -155,19 +164,26 @@ KNOWN_CONFIG_OPTIONS = {
|
|
|
155
164
|
},
|
|
156
165
|
'anthropic': {'api_key': 'Anthropic API key'},
|
|
157
166
|
'bedrock': {'api_key': 'AWS Bedrock API key'},
|
|
158
|
-
'deepseek': {'api_key': 'Deepseek API key'},
|
|
159
|
-
'fireworks': {'api_key': 'Fireworks API key'},
|
|
160
|
-
'gemini': {'api_key': 'Gemini API key'},
|
|
161
|
-
'
|
|
167
|
+
'deepseek': {'api_key': 'Deepseek API key', 'rate_limit': 'Rate limit for Deepseek API requests'},
|
|
168
|
+
'fireworks': {'api_key': 'Fireworks API key', 'rate_limit': 'Rate limit for Fireworks API requests'},
|
|
169
|
+
'gemini': {'api_key': 'Gemini API key', 'rate_limits': 'Per-model rate limits for Gemini API requests'},
|
|
170
|
+
'hf': {'auth_token': 'Hugging Face access token'},
|
|
171
|
+
'imagen': {'rate_limits': 'Per-model rate limits for Imagen API requests'},
|
|
172
|
+
'veo': {'rate_limits': 'Per-model rate limits for Veo API requests'},
|
|
173
|
+
'groq': {'api_key': 'Groq API key', 'rate_limit': 'Rate limit for Groq API requests'},
|
|
162
174
|
'label_studio': {'api_key': 'Label Studio API key', 'url': 'Label Studio server URL'},
|
|
163
|
-
'mistral': {'api_key': 'Mistral API key'},
|
|
175
|
+
'mistral': {'api_key': 'Mistral API key', 'rate_limit': 'Rate limit for Mistral API requests'},
|
|
164
176
|
'openai': {
|
|
165
177
|
'api_key': 'OpenAI API key',
|
|
166
178
|
'base_url': 'OpenAI API base URL',
|
|
167
179
|
'api_version': 'API version if using Azure OpenAI',
|
|
180
|
+
'rate_limits': 'Per-model rate limits for OpenAI API requests',
|
|
168
181
|
},
|
|
169
182
|
'replicate': {'api_token': 'Replicate API token'},
|
|
170
|
-
'together': {
|
|
183
|
+
'together': {
|
|
184
|
+
'api_key': 'Together API key',
|
|
185
|
+
'rate_limits': 'Per-model category rate limits for Together API requests',
|
|
186
|
+
},
|
|
171
187
|
'pypi': {'api_key': 'PyPI API key (for internal use only)'},
|
|
172
188
|
}
|
|
173
189
|
|
pixeltable/dataframe.py
CHANGED
|
@@ -795,19 +795,19 @@ class DataFrame:
|
|
|
795
795
|
assert len(col_refs) > 0 and len(joined_tbls) >= 2
|
|
796
796
|
for col_ref in col_refs:
|
|
797
797
|
# identify the referenced column by name in 'other'
|
|
798
|
-
rhs_col = other.get_column(col_ref.col.name
|
|
798
|
+
rhs_col = other.get_column(col_ref.col.name)
|
|
799
799
|
if rhs_col is None:
|
|
800
800
|
raise excs.Error(f"'on': column {col_ref.col.name!r} not found in joined table")
|
|
801
801
|
rhs_col_ref = exprs.ColumnRef(rhs_col)
|
|
802
802
|
|
|
803
803
|
lhs_col_ref: Optional[exprs.ColumnRef] = None
|
|
804
|
-
if any(tbl.has_column(col_ref.col
|
|
804
|
+
if any(tbl.has_column(col_ref.col) for tbl in self._from_clause.tbls):
|
|
805
805
|
# col_ref comes from the existing from_clause, we use that directly
|
|
806
806
|
lhs_col_ref = col_ref
|
|
807
807
|
else:
|
|
808
808
|
# col_ref comes from other, we need to look for a match in the existing from_clause by name
|
|
809
809
|
for tbl in self._from_clause.tbls:
|
|
810
|
-
col = tbl.get_column(col_ref.col.name
|
|
810
|
+
col = tbl.get_column(col_ref.col.name)
|
|
811
811
|
if col is None:
|
|
812
812
|
continue
|
|
813
813
|
if lhs_col_ref is not None:
|