pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/_version.py +1 -0
- pixeltable/catalog/catalog.py +119 -100
- pixeltable/catalog/column.py +104 -115
- pixeltable/catalog/globals.py +1 -2
- pixeltable/catalog/insertable_table.py +44 -49
- pixeltable/catalog/path.py +3 -4
- pixeltable/catalog/schema_object.py +4 -4
- pixeltable/catalog/table.py +118 -122
- pixeltable/catalog/table_metadata.py +6 -6
- pixeltable/catalog/table_version.py +322 -257
- pixeltable/catalog/table_version_handle.py +4 -4
- pixeltable/catalog/table_version_path.py +9 -10
- pixeltable/catalog/tbl_ops.py +9 -3
- pixeltable/catalog/view.py +34 -28
- pixeltable/config.py +14 -10
- pixeltable/dataframe.py +68 -77
- pixeltable/env.py +74 -64
- pixeltable/exec/aggregation_node.py +6 -6
- pixeltable/exec/cache_prefetch_node.py +10 -10
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +4 -5
- pixeltable/exec/exec_node.py +5 -5
- pixeltable/exec/expr_eval/evaluators.py +6 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
- pixeltable/exec/expr_eval/globals.py +6 -6
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +11 -11
- pixeltable/exec/in_memory_data_node.py +2 -2
- pixeltable/exec/object_store_save_node.py +14 -17
- pixeltable/exec/sql_node.py +25 -25
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +61 -74
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +3 -3
- pixeltable/exprs/data_row.py +12 -12
- pixeltable/exprs/expr.py +41 -31
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +3 -3
- pixeltable/exprs/function_call.py +14 -14
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +8 -8
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +6 -6
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +14 -14
- pixeltable/exprs/rowid_ref.py +8 -8
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +3 -3
- pixeltable/func/function.py +15 -17
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +2 -2
- pixeltable/func/query_template_function.py +16 -16
- pixeltable/func/signature.py +14 -14
- pixeltable/func/tools.py +11 -11
- pixeltable/func/udf.py +16 -18
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +7 -7
- pixeltable/functions/audio.py +76 -0
- pixeltable/functions/bedrock.py +6 -6
- pixeltable/functions/deepseek.py +4 -4
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +6 -6
- pixeltable/functions/globals.py +12 -12
- pixeltable/functions/groq.py +4 -4
- pixeltable/functions/huggingface.py +18 -20
- pixeltable/functions/image.py +7 -10
- pixeltable/functions/llama_cpp.py +7 -7
- pixeltable/functions/math.py +2 -3
- pixeltable/functions/mistralai.py +3 -3
- pixeltable/functions/ollama.py +9 -9
- pixeltable/functions/openai.py +21 -21
- pixeltable/functions/openrouter.py +7 -7
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +7 -8
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/video.py +2 -24
- pixeltable/functions/vision.py +6 -6
- pixeltable/functions/whisper.py +7 -7
- pixeltable/functions/whisperx.py +16 -16
- pixeltable/globals.py +52 -36
- pixeltable/index/base.py +12 -8
- pixeltable/index/btree.py +19 -22
- pixeltable/index/embedding_index.py +30 -39
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/external_store.py +13 -16
- pixeltable/io/fiftyone.py +5 -5
- pixeltable/io/globals.py +5 -5
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +12 -12
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +12 -12
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +2 -2
- pixeltable/iterators/video.py +8 -13
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_38.py +2 -2
- pixeltable/metadata/converters/convert_39.py +1 -2
- pixeltable/metadata/converters/util.py +11 -13
- pixeltable/metadata/schema.py +22 -21
- pixeltable/metadata/utils.py +2 -6
- pixeltable/mypy/mypy_plugin.py +5 -5
- pixeltable/plan.py +30 -28
- pixeltable/share/packager.py +7 -7
- pixeltable/share/publish.py +3 -3
- pixeltable/store.py +125 -61
- pixeltable/type_system.py +43 -46
- pixeltable/utils/__init__.py +1 -2
- pixeltable/utils/arrow.py +4 -4
- pixeltable/utils/av.py +8 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +1 -2
- pixeltable/utils/dbms.py +15 -19
- pixeltable/utils/description_helper.py +2 -3
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +2 -2
- pixeltable/utils/filecache.py +5 -5
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +9 -9
- pixeltable/utils/local_store.py +17 -17
- pixeltable/utils/object_stores.py +59 -43
- pixeltable/utils/s3_store.py +35 -30
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
- pixeltable-0.4.19.dist-info/RECORD +213 -0
- pixeltable/__version__.py +0 -3
- pixeltable-0.4.18.dist-info/RECORD +0 -211
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
pixeltable/__init__.py
CHANGED
pixeltable/_version.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__: str = '0.4.19'
|
pixeltable/catalog/catalog.py
CHANGED
|
@@ -7,7 +7,7 @@ import random
|
|
|
7
7
|
import time
|
|
8
8
|
from collections import defaultdict
|
|
9
9
|
from contextlib import contextmanager
|
|
10
|
-
from typing import TYPE_CHECKING, Any, Callable, Iterator,
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Iterator, TypeVar
|
|
11
11
|
from uuid import UUID
|
|
12
12
|
|
|
13
13
|
import psycopg
|
|
@@ -37,15 +37,13 @@ from .view import View
|
|
|
37
37
|
if TYPE_CHECKING:
|
|
38
38
|
from pixeltable.plan import SampleClause
|
|
39
39
|
|
|
40
|
-
from .. import
|
|
40
|
+
from .. import exprs
|
|
41
41
|
|
|
42
42
|
|
|
43
43
|
_logger = logging.getLogger('pixeltable')
|
|
44
44
|
|
|
45
45
|
|
|
46
|
-
def _unpack_row(
|
|
47
|
-
row: Optional[sql.engine.Row], entities: list[type[sql.orm.decl_api.DeclarativeBase]]
|
|
48
|
-
) -> Optional[list[Any]]:
|
|
46
|
+
def _unpack_row(row: sql.engine.Row | None, entities: list[type[sql.orm.decl_api.DeclarativeBase]]) -> list[Any] | None:
|
|
49
47
|
"""Convert a Row result into a list of entity instances.
|
|
50
48
|
|
|
51
49
|
Assumes that the query contains a select() of exactly those entities.
|
|
@@ -75,7 +73,7 @@ T = TypeVar('T')
|
|
|
75
73
|
|
|
76
74
|
|
|
77
75
|
def retry_loop(
|
|
78
|
-
*, tbl:
|
|
76
|
+
*, tbl: TableVersionPath | None = None, for_write: bool, lock_mutable_tree: bool = False
|
|
79
77
|
) -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
80
78
|
def decorator(op: Callable[..., T]) -> Callable[..., T]:
|
|
81
79
|
@functools.wraps(op)
|
|
@@ -159,13 +157,13 @@ class Catalog:
|
|
|
159
157
|
- metadata validation is only needed for live TableVersion instances (snapshot instances are immutable)
|
|
160
158
|
"""
|
|
161
159
|
|
|
162
|
-
_instance:
|
|
160
|
+
_instance: Catalog | None = None
|
|
163
161
|
|
|
164
162
|
# cached TableVersion instances; key: [id, version]
|
|
165
163
|
# - mutable version of a table: version == None (even though TableVersion.version is set correctly)
|
|
166
164
|
# - snapshot versions: records the version of the snapshot
|
|
167
|
-
_tbl_versions: dict[tuple[UUID,
|
|
168
|
-
_tbls: dict[tuple[UUID,
|
|
165
|
+
_tbl_versions: dict[tuple[UUID, int | None], TableVersion]
|
|
166
|
+
_tbls: dict[tuple[UUID, int | None], Table]
|
|
169
167
|
_in_write_xact: bool # True if we're in a write transaction
|
|
170
168
|
_x_locked_tbl_ids: set[UUID] # non-empty for write transactions
|
|
171
169
|
_modified_tvs: set[TableVersionHandle] # TableVersion instances modified in the current transaction
|
|
@@ -179,7 +177,7 @@ class Catalog:
|
|
|
179
177
|
_column_dependencies: dict[UUID, dict[QColumnId, set[QColumnId]]]
|
|
180
178
|
|
|
181
179
|
# column dependents are recomputed at the beginning of every write transaction and only reflect the locked tree
|
|
182
|
-
_column_dependents:
|
|
180
|
+
_column_dependents: dict[QColumnId, set[QColumnId]] | None
|
|
183
181
|
|
|
184
182
|
@classmethod
|
|
185
183
|
def get(cls) -> Catalog:
|
|
@@ -258,8 +256,8 @@ class Catalog:
|
|
|
258
256
|
def begin_xact(
|
|
259
257
|
self,
|
|
260
258
|
*,
|
|
261
|
-
tbl:
|
|
262
|
-
tbl_id:
|
|
259
|
+
tbl: TableVersionPath | None = None,
|
|
260
|
+
tbl_id: UUID | None = None,
|
|
263
261
|
for_write: bool = False,
|
|
264
262
|
lock_mutable_tree: bool = False,
|
|
265
263
|
convert_db_excs: bool = True,
|
|
@@ -303,7 +301,7 @@ class Catalog:
|
|
|
303
301
|
# )
|
|
304
302
|
# _logger.debug(f'begin_xact(): {tv_msg}')
|
|
305
303
|
num_retries = 0
|
|
306
|
-
pending_ops_tbl_id:
|
|
304
|
+
pending_ops_tbl_id: UUID | None = None
|
|
307
305
|
has_exc = False # True if we exited the 'with ...begin_xact()' block with an exception
|
|
308
306
|
while True:
|
|
309
307
|
if pending_ops_tbl_id is not None:
|
|
@@ -322,7 +320,7 @@ class Catalog:
|
|
|
322
320
|
with Env.get().begin_xact(for_write=for_write) as conn:
|
|
323
321
|
if tbl is not None or tbl_id is not None:
|
|
324
322
|
try:
|
|
325
|
-
target:
|
|
323
|
+
target: TableVersionHandle | None = None
|
|
326
324
|
if tbl is not None:
|
|
327
325
|
if self._acquire_path_locks(
|
|
328
326
|
tbl=tbl,
|
|
@@ -451,7 +449,7 @@ class Catalog:
|
|
|
451
449
|
if isinstance(e.orig, psycopg.errors.UndefinedTable) and tbl is not None:
|
|
452
450
|
# the table got dropped in the middle of the operation
|
|
453
451
|
tbl_name = tbl.get().name
|
|
454
|
-
_logger.debug(f'Exception: undefined table
|
|
452
|
+
_logger.debug(f'Exception: undefined table {tbl_name!r}: Caught {type(e.orig)}: {e!r}')
|
|
455
453
|
raise excs.Error(f'Table was dropped: {tbl_name}') from None
|
|
456
454
|
elif (
|
|
457
455
|
isinstance(
|
|
@@ -490,7 +488,7 @@ class Catalog:
|
|
|
490
488
|
tbl: TableVersionPath,
|
|
491
489
|
for_write: bool = False,
|
|
492
490
|
lock_mutable_tree: bool = False,
|
|
493
|
-
check_pending_ops:
|
|
491
|
+
check_pending_ops: bool | None = None,
|
|
494
492
|
) -> bool:
|
|
495
493
|
"""
|
|
496
494
|
Path locking protocol:
|
|
@@ -524,13 +522,13 @@ class Catalog:
|
|
|
524
522
|
self,
|
|
525
523
|
*,
|
|
526
524
|
for_write: bool,
|
|
527
|
-
tbl_id:
|
|
528
|
-
dir_id:
|
|
529
|
-
tbl_name:
|
|
525
|
+
tbl_id: UUID | None = None,
|
|
526
|
+
dir_id: UUID | None = None,
|
|
527
|
+
tbl_name: str | None = None,
|
|
530
528
|
lock_mutable_tree: bool = False,
|
|
531
529
|
raise_if_not_exists: bool = True,
|
|
532
|
-
check_pending_ops:
|
|
533
|
-
) ->
|
|
530
|
+
check_pending_ops: bool | None = None,
|
|
531
|
+
) -> TableVersionHandle | None:
|
|
534
532
|
"""
|
|
535
533
|
For writes: force acquisition of an X-lock on a Table record via a blind update.
|
|
536
534
|
|
|
@@ -594,7 +592,7 @@ class Catalog:
|
|
|
594
592
|
while True:
|
|
595
593
|
try:
|
|
596
594
|
tbl_version: int
|
|
597
|
-
op:
|
|
595
|
+
op: TableOp | None = None
|
|
598
596
|
delete_next_op_stmt: sql.Delete
|
|
599
597
|
reset_has_pending_stmt: sql.Update
|
|
600
598
|
with self.begin_xact(
|
|
@@ -613,7 +611,10 @@ class Catalog:
|
|
|
613
611
|
row = conn.execute(q).one_or_none()
|
|
614
612
|
if row is None:
|
|
615
613
|
return
|
|
616
|
-
|
|
614
|
+
view_md = row.md.get('view_md')
|
|
615
|
+
is_snapshot = False if view_md is None else view_md.get('is_snapshot')
|
|
616
|
+
assert is_snapshot is not None
|
|
617
|
+
tbl_version = row.md.get('current_version') if is_snapshot else None
|
|
617
618
|
op = schema.md_from_dict(TableOp, row.op)
|
|
618
619
|
delete_next_op_stmt = sql.delete(schema.PendingTableOp).where(
|
|
619
620
|
schema.PendingTableOp.tbl_id == tbl_id, schema.PendingTableOp.op_sn == row.op_sn
|
|
@@ -720,7 +721,7 @@ class Catalog:
|
|
|
720
721
|
return result
|
|
721
722
|
|
|
722
723
|
def _acquire_dir_xlock(
|
|
723
|
-
self, *, parent_id:
|
|
724
|
+
self, *, parent_id: UUID | None = None, dir_id: UUID | None = None, dir_name: str | None = None
|
|
724
725
|
) -> None:
|
|
725
726
|
"""Force acquisition of an X-lock on a Dir record via a blind update.
|
|
726
727
|
|
|
@@ -760,9 +761,9 @@ class Catalog:
|
|
|
760
761
|
|
|
761
762
|
@dataclasses.dataclass
|
|
762
763
|
class DirEntry:
|
|
763
|
-
dir:
|
|
764
|
+
dir: schema.Dir | None
|
|
764
765
|
dir_entries: dict[str, Catalog.DirEntry]
|
|
765
|
-
table:
|
|
766
|
+
table: schema.Table | None
|
|
766
767
|
|
|
767
768
|
@retry_loop(for_write=False)
|
|
768
769
|
def get_dir_contents(self, dir_path: Path, recursive: bool = False) -> dict[str, DirEntry]:
|
|
@@ -814,14 +815,14 @@ class Catalog:
|
|
|
814
815
|
|
|
815
816
|
def _prepare_dir_op(
|
|
816
817
|
self,
|
|
817
|
-
add_dir_path:
|
|
818
|
-
add_name:
|
|
819
|
-
drop_dir_path:
|
|
820
|
-
drop_name:
|
|
821
|
-
drop_expected:
|
|
818
|
+
add_dir_path: Path | None = None,
|
|
819
|
+
add_name: str | None = None,
|
|
820
|
+
drop_dir_path: Path | None = None,
|
|
821
|
+
drop_name: str | None = None,
|
|
822
|
+
drop_expected: type[SchemaObject] | None = None,
|
|
822
823
|
raise_if_exists: bool = False,
|
|
823
824
|
raise_if_not_exists: bool = False,
|
|
824
|
-
) -> tuple[
|
|
825
|
+
) -> tuple[SchemaObject | None, Dir | None, SchemaObject | None]:
|
|
825
826
|
"""
|
|
826
827
|
Validates paths and acquires locks needed for a directory operation, ie, add/drop/rename (add + drop) of a
|
|
827
828
|
directory entry.
|
|
@@ -848,25 +849,29 @@ class Catalog:
|
|
|
848
849
|
if drop_dir_path is not None:
|
|
849
850
|
dir_paths.add(drop_dir_path)
|
|
850
851
|
|
|
851
|
-
add_dir:
|
|
852
|
-
drop_dir:
|
|
852
|
+
add_dir: schema.Dir | None = None
|
|
853
|
+
drop_dir: schema.Dir | None = None
|
|
853
854
|
for p in sorted(dir_paths):
|
|
854
855
|
dir = self._get_dir(p, lock_dir=True)
|
|
855
856
|
if dir is None:
|
|
856
|
-
|
|
857
|
+
# Dir does not exist; raise an appropriate error.
|
|
858
|
+
if add_dir_path is not None or add_name is not None:
|
|
859
|
+
raise excs.Error(f'Directory {p!r} does not exist. Create it first with:\npxt.create_dir({p!r})')
|
|
860
|
+
else:
|
|
861
|
+
raise excs.Error(f'Directory {p!r} does not exist.')
|
|
857
862
|
if p == add_dir_path:
|
|
858
863
|
add_dir = dir
|
|
859
864
|
if p == drop_dir_path:
|
|
860
865
|
drop_dir = dir
|
|
861
866
|
|
|
862
|
-
add_obj:
|
|
867
|
+
add_obj: SchemaObject | None = None
|
|
863
868
|
if add_dir is not None:
|
|
864
869
|
add_obj = self._get_dir_entry(add_dir.id, add_name, lock_entry=True)
|
|
865
870
|
if add_obj is not None and raise_if_exists:
|
|
866
871
|
add_path = add_dir_path.append(add_name)
|
|
867
872
|
raise excs.Error(f'Path {add_path!r} already exists.')
|
|
868
873
|
|
|
869
|
-
drop_obj:
|
|
874
|
+
drop_obj: SchemaObject | None = None
|
|
870
875
|
if drop_dir is not None:
|
|
871
876
|
drop_path = drop_dir_path.append(drop_name)
|
|
872
877
|
drop_obj = self._get_dir_entry(drop_dir.id, drop_name, lock_entry=True)
|
|
@@ -880,8 +885,8 @@ class Catalog:
|
|
|
880
885
|
return add_obj, add_dir_obj, drop_obj
|
|
881
886
|
|
|
882
887
|
def _get_dir_entry(
|
|
883
|
-
self, dir_id: UUID, name: str, version:
|
|
884
|
-
) ->
|
|
888
|
+
self, dir_id: UUID, name: str, version: int | None = None, lock_entry: bool = False
|
|
889
|
+
) -> SchemaObject | None:
|
|
885
890
|
user = Env.get().user
|
|
886
891
|
conn = Env.get().conn
|
|
887
892
|
|
|
@@ -918,12 +923,12 @@ class Catalog:
|
|
|
918
923
|
def _get_schema_object(
|
|
919
924
|
self,
|
|
920
925
|
path: Path,
|
|
921
|
-
expected:
|
|
926
|
+
expected: type[SchemaObject] | None = None,
|
|
922
927
|
raise_if_exists: bool = False,
|
|
923
928
|
raise_if_not_exists: bool = False,
|
|
924
929
|
lock_parent: bool = False,
|
|
925
930
|
lock_obj: bool = False,
|
|
926
|
-
) ->
|
|
931
|
+
) -> SchemaObject | None:
|
|
927
932
|
"""Return the schema object at the given path, or None if it doesn't exist.
|
|
928
933
|
|
|
929
934
|
Raises Error if
|
|
@@ -958,7 +963,7 @@ class Catalog:
|
|
|
958
963
|
raise excs.Error(f'{path!r} needs to be a {expected_name} but is a {obj._display_name()}.')
|
|
959
964
|
return obj
|
|
960
965
|
|
|
961
|
-
def get_table_by_id(self, tbl_id: UUID, version:
|
|
966
|
+
def get_table_by_id(self, tbl_id: UUID, version: int | None = None) -> Table | None:
|
|
962
967
|
"""Must be executed inside a transaction. Might raise PendingTableOpsError."""
|
|
963
968
|
if (tbl_id, version) not in self._tbls:
|
|
964
969
|
if version is None:
|
|
@@ -967,17 +972,16 @@ class Catalog:
|
|
|
967
972
|
return self._load_tbl_at_version(tbl_id, version)
|
|
968
973
|
return self._tbls.get((tbl_id, version))
|
|
969
974
|
|
|
970
|
-
@retry_loop(for_write=True)
|
|
971
975
|
def create_table(
|
|
972
976
|
self,
|
|
973
977
|
path: Path,
|
|
974
978
|
schema: dict[str, Any],
|
|
975
|
-
df: 'DataFrame',
|
|
976
979
|
if_exists: IfExistsParam,
|
|
977
|
-
primary_key:
|
|
980
|
+
primary_key: list[str] | None,
|
|
978
981
|
num_retained_versions: int,
|
|
979
982
|
comment: str,
|
|
980
983
|
media_validation: MediaValidation,
|
|
984
|
+
create_default_idxs: bool,
|
|
981
985
|
) -> tuple[Table, bool]:
|
|
982
986
|
"""
|
|
983
987
|
Creates a new InsertableTable at the given path.
|
|
@@ -986,37 +990,49 @@ class Catalog:
|
|
|
986
990
|
|
|
987
991
|
Otherwise, creates a new table `t` and returns `t, True` (or raises an exception if the operation fails).
|
|
988
992
|
"""
|
|
989
|
-
existing = self._handle_path_collision(path, InsertableTable, False, if_exists)
|
|
990
|
-
if existing is not None:
|
|
991
|
-
assert isinstance(existing, Table)
|
|
992
|
-
return existing, False
|
|
993
993
|
|
|
994
|
-
|
|
995
|
-
|
|
994
|
+
@retry_loop(for_write=True)
|
|
995
|
+
def create_fn() -> tuple[UUID, bool]:
|
|
996
|
+
existing = self._handle_path_collision(path, InsertableTable, False, if_exists)
|
|
997
|
+
if existing is not None:
|
|
998
|
+
assert isinstance(existing, Table)
|
|
999
|
+
return existing._id, False
|
|
996
1000
|
|
|
997
|
-
|
|
998
|
-
dir
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1001
|
+
dir = self._get_schema_object(path.parent, expected=Dir, raise_if_not_exists=True)
|
|
1002
|
+
assert dir is not None
|
|
1003
|
+
|
|
1004
|
+
md, ops = InsertableTable._create(
|
|
1005
|
+
path.name,
|
|
1006
|
+
schema,
|
|
1007
|
+
primary_key=primary_key,
|
|
1008
|
+
num_retained_versions=num_retained_versions,
|
|
1009
|
+
comment=comment,
|
|
1010
|
+
media_validation=media_validation,
|
|
1011
|
+
create_default_idxs=create_default_idxs,
|
|
1012
|
+
)
|
|
1013
|
+
tbl_id = UUID(md.tbl_md.tbl_id)
|
|
1014
|
+
self.store_tbl_md(tbl_id, dir._id, md.tbl_md, md.version_md, md.schema_version_md, ops)
|
|
1015
|
+
return tbl_id, True
|
|
1016
|
+
|
|
1017
|
+
tbl_id, is_created = create_fn()
|
|
1018
|
+
# finalize pending ops
|
|
1019
|
+
with self.begin_xact(tbl_id=tbl_id, for_write=True, finalize_pending_ops=True):
|
|
1020
|
+
tbl = self.get_table_by_id(tbl_id)
|
|
1021
|
+
_logger.info(f'Created table {tbl._name!r}, id={tbl._id}')
|
|
1022
|
+
Env.get().console_logger.info(f'Created table {tbl._name!r}.')
|
|
1023
|
+
return tbl, is_created
|
|
1009
1024
|
|
|
1010
1025
|
def create_view(
|
|
1011
1026
|
self,
|
|
1012
1027
|
path: Path,
|
|
1013
1028
|
base: TableVersionPath,
|
|
1014
|
-
select_list:
|
|
1015
|
-
where:
|
|
1016
|
-
sample_clause:
|
|
1017
|
-
additional_columns:
|
|
1029
|
+
select_list: list[tuple[exprs.Expr, str | None]] | None,
|
|
1030
|
+
where: exprs.Expr | None,
|
|
1031
|
+
sample_clause: 'SampleClause' | None,
|
|
1032
|
+
additional_columns: dict[str, Any] | None,
|
|
1018
1033
|
is_snapshot: bool,
|
|
1019
|
-
|
|
1034
|
+
create_default_idxs: bool,
|
|
1035
|
+
iterator: tuple[type[ComponentIterator], dict[str, Any]] | None,
|
|
1020
1036
|
num_retained_versions: int,
|
|
1021
1037
|
comment: str,
|
|
1022
1038
|
media_validation: MediaValidation,
|
|
@@ -1057,6 +1073,7 @@ class Catalog:
|
|
|
1057
1073
|
predicate=where,
|
|
1058
1074
|
sample_clause=sample_clause,
|
|
1059
1075
|
is_snapshot=is_snapshot,
|
|
1076
|
+
create_default_idxs=create_default_idxs,
|
|
1060
1077
|
iterator_cls=iterator_class,
|
|
1061
1078
|
iterator_args=iterator_args,
|
|
1062
1079
|
num_retained_versions=num_retained_versions,
|
|
@@ -1079,7 +1096,7 @@ class Catalog:
|
|
|
1079
1096
|
with self.begin_xact(tbl_id=view_id, for_write=True, finalize_pending_ops=True):
|
|
1080
1097
|
return self.get_table_by_id(view_id)
|
|
1081
1098
|
|
|
1082
|
-
def _clear_tv_cache(self, tbl_id: UUID, effective_version:
|
|
1099
|
+
def _clear_tv_cache(self, tbl_id: UUID, effective_version: int | None) -> None:
|
|
1083
1100
|
if (tbl_id, effective_version) in self._tbl_versions:
|
|
1084
1101
|
tv = self._tbl_versions[tbl_id, effective_version]
|
|
1085
1102
|
tv.is_validated = False
|
|
@@ -1176,9 +1193,9 @@ class Catalog:
|
|
|
1176
1193
|
conn = Env.get().conn
|
|
1177
1194
|
tbl_id = md.tbl_md.tbl_id
|
|
1178
1195
|
|
|
1179
|
-
new_tbl_md:
|
|
1180
|
-
new_version_md:
|
|
1181
|
-
new_schema_version_md:
|
|
1196
|
+
new_tbl_md: schema.TableMd | None = None
|
|
1197
|
+
new_version_md: schema.TableVersionMd | None = None
|
|
1198
|
+
new_schema_version_md: schema.TableSchemaVersionMd | None = None
|
|
1182
1199
|
is_new_tbl_version: bool = False
|
|
1183
1200
|
|
|
1184
1201
|
# We need to ensure that the table metadata in the catalog always reflects the latest observed version of
|
|
@@ -1352,11 +1369,11 @@ class Catalog:
|
|
|
1352
1369
|
msg: str
|
|
1353
1370
|
if is_replace:
|
|
1354
1371
|
msg = (
|
|
1355
|
-
f'{tbl.
|
|
1372
|
+
f'{tbl._display_str()} already exists and has dependents. '
|
|
1356
1373
|
"Use `if_exists='replace_force'` to replace it."
|
|
1357
1374
|
)
|
|
1358
1375
|
else:
|
|
1359
|
-
msg = f'{tbl.
|
|
1376
|
+
msg = f'{tbl._display_str()} has dependents.'
|
|
1360
1377
|
raise excs.Error(msg)
|
|
1361
1378
|
|
|
1362
1379
|
# if this is a mutable view of a mutable base, advance the base's view_sn
|
|
@@ -1431,7 +1448,7 @@ class Catalog:
|
|
|
1431
1448
|
|
|
1432
1449
|
if parents:
|
|
1433
1450
|
# start walking down from the root
|
|
1434
|
-
last_parent:
|
|
1451
|
+
last_parent: SchemaObject | None = None
|
|
1435
1452
|
for ancestor in path.ancestors():
|
|
1436
1453
|
ancestor_obj = self._get_schema_object(ancestor, expected=Dir)
|
|
1437
1454
|
assert ancestor_obj is not None or last_parent is not None
|
|
@@ -1507,10 +1524,10 @@ class Catalog:
|
|
|
1507
1524
|
def get_tbl_version(
|
|
1508
1525
|
self,
|
|
1509
1526
|
tbl_id: UUID,
|
|
1510
|
-
effective_version:
|
|
1511
|
-
check_pending_ops:
|
|
1527
|
+
effective_version: int | None,
|
|
1528
|
+
check_pending_ops: bool | None = None,
|
|
1512
1529
|
validate_initialized: bool = False,
|
|
1513
|
-
) ->
|
|
1530
|
+
) -> TableVersion | None:
|
|
1514
1531
|
"""
|
|
1515
1532
|
Returns the TableVersion instance for the given table and version and updates the cache.
|
|
1516
1533
|
|
|
@@ -1559,7 +1576,7 @@ class Catalog:
|
|
|
1559
1576
|
assert (tbl_version.id, tbl_version.effective_version) in self._tbl_versions
|
|
1560
1577
|
del self._tbl_versions[tbl_version.id, tbl_version.effective_version]
|
|
1561
1578
|
|
|
1562
|
-
def get_dir(self, dir_id: UUID, for_update: bool = False) ->
|
|
1579
|
+
def get_dir(self, dir_id: UUID, for_update: bool = False) -> Dir | None:
|
|
1563
1580
|
"""Return the Dir with the given id, or None if it doesn't exist"""
|
|
1564
1581
|
conn = Env.get().conn
|
|
1565
1582
|
if for_update:
|
|
@@ -1571,7 +1588,7 @@ class Catalog:
|
|
|
1571
1588
|
dir_record = schema.Dir(**row._mapping)
|
|
1572
1589
|
return Dir(dir_record.id, dir_record.parent_id, dir_record.md['name'])
|
|
1573
1590
|
|
|
1574
|
-
def _get_dir(self, path: Path, lock_dir: bool = False) ->
|
|
1591
|
+
def _get_dir(self, path: Path, lock_dir: bool = False) -> schema.Dir | None:
|
|
1575
1592
|
"""
|
|
1576
1593
|
lock_dir: if True, X-locks target (but not the ancestors)
|
|
1577
1594
|
"""
|
|
@@ -1597,7 +1614,7 @@ class Catalog:
|
|
|
1597
1614
|
row = conn.execute(q).one_or_none()
|
|
1598
1615
|
return schema.Dir(**row._mapping) if row is not None else None
|
|
1599
1616
|
|
|
1600
|
-
def _load_tbl(self, tbl_id: UUID) ->
|
|
1617
|
+
def _load_tbl(self, tbl_id: UUID) -> Table | None:
|
|
1601
1618
|
"""Loads metadata for the table with the given id and caches it."""
|
|
1602
1619
|
_logger.info(f'Loading table {tbl_id}')
|
|
1603
1620
|
from .insertable_table import InsertableTable
|
|
@@ -1657,7 +1674,7 @@ class Catalog:
|
|
|
1657
1674
|
return tbl
|
|
1658
1675
|
|
|
1659
1676
|
# this is a view; determine the sequence of TableVersions to load
|
|
1660
|
-
tbl_version_path: list[tuple[UUID,
|
|
1677
|
+
tbl_version_path: list[tuple[UUID, int | None]] = []
|
|
1661
1678
|
if tbl_md.is_pure_snapshot:
|
|
1662
1679
|
# this is a pure snapshot, without a physical table backing it; we only need the bases
|
|
1663
1680
|
pass
|
|
@@ -1670,8 +1687,8 @@ class Catalog:
|
|
|
1670
1687
|
tbl_version_path.extend((UUID(tbl_id), version) for tbl_id, version in view_md.base_versions)
|
|
1671
1688
|
|
|
1672
1689
|
# load TableVersions, starting at the root
|
|
1673
|
-
base_path:
|
|
1674
|
-
view_path:
|
|
1690
|
+
base_path: TableVersionPath | None = None
|
|
1691
|
+
view_path: TableVersionPath | None = None
|
|
1675
1692
|
for id, effective_version in tbl_version_path[::-1]:
|
|
1676
1693
|
if (id, effective_version) not in self._tbl_versions:
|
|
1677
1694
|
_ = self._load_tbl_version(id, effective_version)
|
|
@@ -1681,7 +1698,7 @@ class Catalog:
|
|
|
1681
1698
|
self._tbls[tbl_id, None] = view
|
|
1682
1699
|
return view
|
|
1683
1700
|
|
|
1684
|
-
def _load_tbl_at_version(self, tbl_id: UUID, version: int) ->
|
|
1701
|
+
def _load_tbl_at_version(self, tbl_id: UUID, version: int) -> Table | None:
|
|
1685
1702
|
from .view import View
|
|
1686
1703
|
|
|
1687
1704
|
# Load the specified TableMd and TableVersionMd records from the db.
|
|
@@ -1708,7 +1725,7 @@ class Catalog:
|
|
|
1708
1725
|
# Build the list of ancestor versions, starting with the given table and traversing back to the base table.
|
|
1709
1726
|
# For each proper ancestor, we use the version whose created_at timestamp equals or most nearly precedes the
|
|
1710
1727
|
# given TableVersion's created_at timestamp.
|
|
1711
|
-
ancestors: list[tuple[UUID,
|
|
1728
|
+
ancestors: list[tuple[UUID, int | None]] = [(tbl_id, version)]
|
|
1712
1729
|
if tbl_md.view_md is not None:
|
|
1713
1730
|
for ancestor_id, _ in tbl_md.view_md.base_versions:
|
|
1714
1731
|
q = (
|
|
@@ -1735,7 +1752,7 @@ class Catalog:
|
|
|
1735
1752
|
_ = self._load_tbl_version(anc_id, anc_version)
|
|
1736
1753
|
|
|
1737
1754
|
# Now reconstruct the relevant TableVersionPath instance from the ancestor versions.
|
|
1738
|
-
tvp:
|
|
1755
|
+
tvp: TableVersionPath | None = None
|
|
1739
1756
|
for anc_id, anc_version in ancestors[::-1]:
|
|
1740
1757
|
tvp = TableVersionPath(TableVersionHandle(anc_id, anc_version), base=tvp)
|
|
1741
1758
|
|
|
@@ -1744,10 +1761,10 @@ class Catalog:
|
|
|
1744
1761
|
return view
|
|
1745
1762
|
|
|
1746
1763
|
@retry_loop(for_write=False)
|
|
1747
|
-
def collect_tbl_history(self, tbl_id: UUID, n:
|
|
1764
|
+
def collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[schema.FullTableMd]:
|
|
1748
1765
|
return self._collect_tbl_history(tbl_id, n)
|
|
1749
1766
|
|
|
1750
|
-
def _collect_tbl_history(self, tbl_id: UUID, n:
|
|
1767
|
+
def _collect_tbl_history(self, tbl_id: UUID, n: int | None) -> list[schema.FullTableMd]:
|
|
1751
1768
|
"""
|
|
1752
1769
|
Returns the history of up to n versions of the table with the given UUID.
|
|
1753
1770
|
|
|
@@ -1783,7 +1800,7 @@ class Catalog:
|
|
|
1783
1800
|
for row in src_rows
|
|
1784
1801
|
]
|
|
1785
1802
|
|
|
1786
|
-
def load_tbl_md(self, tbl_id: UUID, effective_version:
|
|
1803
|
+
def load_tbl_md(self, tbl_id: UUID, effective_version: int | None) -> schema.FullTableMd:
|
|
1787
1804
|
"""
|
|
1788
1805
|
Loads metadata from the store for a given table UUID and version.
|
|
1789
1806
|
"""
|
|
@@ -1839,11 +1856,11 @@ class Catalog:
|
|
|
1839
1856
|
def store_tbl_md(
|
|
1840
1857
|
self,
|
|
1841
1858
|
tbl_id: UUID,
|
|
1842
|
-
dir_id:
|
|
1843
|
-
tbl_md:
|
|
1844
|
-
version_md:
|
|
1845
|
-
schema_version_md:
|
|
1846
|
-
pending_ops:
|
|
1859
|
+
dir_id: UUID | None,
|
|
1860
|
+
tbl_md: schema.TableMd | None,
|
|
1861
|
+
version_md: schema.TableVersionMd | None,
|
|
1862
|
+
schema_version_md: schema.TableSchemaVersionMd | None,
|
|
1863
|
+
pending_ops: list[TableOp] | None = None,
|
|
1847
1864
|
) -> None:
|
|
1848
1865
|
"""
|
|
1849
1866
|
Stores metadata to the DB.
|
|
@@ -1885,6 +1902,7 @@ class Catalog:
|
|
|
1885
1902
|
.values({schema.Table.md: dataclasses.asdict(tbl_md)})
|
|
1886
1903
|
.where(schema.Table.id == tbl_id)
|
|
1887
1904
|
)
|
|
1905
|
+
assert isinstance(result, sql.CursorResult)
|
|
1888
1906
|
assert result.rowcount == 1, result.rowcount
|
|
1889
1907
|
|
|
1890
1908
|
# Construct and insert new table version record if requested.
|
|
@@ -1914,6 +1932,7 @@ class Catalog:
|
|
|
1914
1932
|
.values({schema.TableVersion.md: dataclasses.asdict(version_md)})
|
|
1915
1933
|
.where(schema.TableVersion.tbl_id == tbl_id, schema.TableVersion.version == version_md.version)
|
|
1916
1934
|
)
|
|
1935
|
+
assert isinstance(result, sql.CursorResult)
|
|
1917
1936
|
assert result.rowcount == 1, result.rowcount
|
|
1918
1937
|
|
|
1919
1938
|
# Construct and insert a new schema version record if requested.
|
|
@@ -1995,8 +2014,8 @@ class Catalog:
|
|
|
1995
2014
|
return md
|
|
1996
2015
|
|
|
1997
2016
|
def _load_tbl_version(
|
|
1998
|
-
self, tbl_id: UUID, effective_version:
|
|
1999
|
-
) ->
|
|
2017
|
+
self, tbl_id: UUID, effective_version: int | None, check_pending_ops: bool = True
|
|
2018
|
+
) -> TableVersion | None:
|
|
2000
2019
|
"""Creates TableVersion instance from stored metadata and registers it in _tbl_versions."""
|
|
2001
2020
|
tbl_md, version_md, schema_version_md = self.load_tbl_md(tbl_id, effective_version)
|
|
2002
2021
|
view_md = tbl_md.view_md
|
|
@@ -2052,7 +2071,7 @@ class Catalog:
|
|
|
2052
2071
|
assert not pure_snapshot # a pure snapshot doesn't have a physical table backing it, no point in loading it
|
|
2053
2072
|
|
|
2054
2073
|
base: TableVersionHandle
|
|
2055
|
-
base_path:
|
|
2074
|
+
base_path: TableVersionPath | None = None # needed for live view
|
|
2056
2075
|
if view_md.is_snapshot:
|
|
2057
2076
|
base = TableVersionHandle(UUID(view_md.base_versions[0][0]), view_md.base_versions[0][1])
|
|
2058
2077
|
else:
|
|
@@ -2083,7 +2102,7 @@ class Catalog:
|
|
|
2083
2102
|
self.create_user(None)
|
|
2084
2103
|
_logger.info('Initialized catalog.')
|
|
2085
2104
|
|
|
2086
|
-
def create_user(self, user:
|
|
2105
|
+
def create_user(self, user: str | None) -> None:
|
|
2087
2106
|
"""
|
|
2088
2107
|
Creates a catalog record (root directory) for the specified user, if one does not already exist.
|
|
2089
2108
|
"""
|
|
@@ -2102,7 +2121,7 @@ class Catalog:
|
|
|
2102
2121
|
|
|
2103
2122
|
def _handle_path_collision(
|
|
2104
2123
|
self, path: Path, expected_obj_type: type[SchemaObject], expected_snapshot: bool, if_exists: IfExistsParam
|
|
2105
|
-
) ->
|
|
2124
|
+
) -> SchemaObject | None:
|
|
2106
2125
|
obj, _, _ = self._prepare_dir_op(add_dir_path=path.parent, add_name=path.name)
|
|
2107
2126
|
|
|
2108
2127
|
if if_exists == IfExistsParam.ERROR and obj is not None:
|