pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import TYPE_CHECKING
|
|
5
6
|
from uuid import UUID
|
|
6
7
|
|
|
7
|
-
from
|
|
8
|
+
from pixeltable import exceptions as excs
|
|
9
|
+
|
|
10
|
+
from .table_version import TableVersion, TableVersionKey
|
|
8
11
|
|
|
9
12
|
if TYPE_CHECKING:
|
|
10
|
-
|
|
13
|
+
from pixeltable.catalog import Column
|
|
11
14
|
|
|
12
15
|
_logger = logging.getLogger('pixeltable')
|
|
13
16
|
|
|
@@ -15,15 +18,15 @@ _logger = logging.getLogger('pixeltable')
|
|
|
15
18
|
class TableVersionHandle:
|
|
16
19
|
"""
|
|
17
20
|
Indirection mechanism for TableVersion instances, which get resolved against the catalog at runtime.
|
|
21
|
+
|
|
22
|
+
See the TableVersion docstring for details on the semantics of `effective_version` and `anchor_tbl_id`.
|
|
18
23
|
"""
|
|
19
24
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
_tbl_version: Optional[TableVersion]
|
|
25
|
+
key: TableVersionKey
|
|
26
|
+
_tbl_version: TableVersion | None
|
|
23
27
|
|
|
24
|
-
def __init__(self,
|
|
25
|
-
self.
|
|
26
|
-
self.effective_version = effective_version
|
|
28
|
+
def __init__(self, key: TableVersionKey, *, tbl_version: TableVersion | None = None):
|
|
29
|
+
self.key = key
|
|
27
30
|
self._tbl_version = tbl_version
|
|
28
31
|
|
|
29
32
|
def __eq__(self, other: object) -> bool:
|
|
@@ -34,20 +37,73 @@ class TableVersionHandle:
|
|
|
34
37
|
def __hash__(self) -> int:
|
|
35
38
|
return hash((self.id, self.effective_version))
|
|
36
39
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
+
def __repr__(self) -> str:
|
|
41
|
+
return (
|
|
42
|
+
f'TableVersionHandle(id={self.id!r}, effective_version={self.effective_version}, '
|
|
43
|
+
f'anchor_tbl_id={self.anchor_tbl_id})'
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def id(self) -> UUID:
|
|
48
|
+
return self.key.tbl_id
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def effective_version(self) -> int | None:
|
|
52
|
+
return self.key.effective_version
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def anchor_tbl_id(self) -> UUID | None:
|
|
56
|
+
return self.key.anchor_tbl_id
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def is_snapshot(self) -> bool:
|
|
60
|
+
return self.effective_version is not None
|
|
40
61
|
|
|
41
62
|
def get(self) -> TableVersion:
|
|
42
63
|
from .catalog import Catalog
|
|
43
64
|
|
|
44
|
-
|
|
45
|
-
|
|
65
|
+
cat = Catalog.get()
|
|
66
|
+
if self._tbl_version is None or not self._tbl_version.is_validated:
|
|
67
|
+
if self.effective_version is not None and self._tbl_version is not None:
|
|
68
|
+
# this is a snapshot version; we need to make sure we refer to the instance cached
|
|
69
|
+
# in Catalog, in order to avoid mixing sa_tbl instances in the same transaction
|
|
70
|
+
# (which will lead to duplicates in the From clause generated in SqlNode.create_from_clause())
|
|
71
|
+
assert self.key in cat._tbl_versions
|
|
72
|
+
self._tbl_version = cat._tbl_versions[self.key]
|
|
73
|
+
self._tbl_version.is_validated = True
|
|
74
|
+
else:
|
|
75
|
+
self._tbl_version = Catalog.get().get_tbl_version(self.key)
|
|
76
|
+
assert self._tbl_version.key == self.key
|
|
77
|
+
if self.effective_version is None:
|
|
78
|
+
tvs = list(Catalog.get()._tbl_versions.values())
|
|
79
|
+
assert self._tbl_version in tvs, self._tbl_version
|
|
46
80
|
return self._tbl_version
|
|
47
81
|
|
|
48
82
|
def as_dict(self) -> dict:
|
|
49
|
-
return
|
|
83
|
+
return self.key.as_dict()
|
|
50
84
|
|
|
51
85
|
@classmethod
|
|
52
86
|
def from_dict(cls, d: dict) -> TableVersionHandle:
|
|
53
|
-
return cls(
|
|
87
|
+
return cls(TableVersionKey.from_dict(d))
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass(frozen=True)
|
|
91
|
+
class ColumnHandle:
|
|
92
|
+
tbl_version: TableVersionHandle
|
|
93
|
+
col_id: int
|
|
94
|
+
|
|
95
|
+
def get(self) -> 'Column':
|
|
96
|
+
if self.col_id not in self.tbl_version.get().cols_by_id:
|
|
97
|
+
schema_version_drop = self.tbl_version.get()._tbl_md.column_md[self.col_id].schema_version_drop
|
|
98
|
+
raise excs.Error(
|
|
99
|
+
f'Column was dropped (no record for column ID {self.col_id} in table '
|
|
100
|
+
f'{self.tbl_version.get().versioned_name!r}; it was dropped in table version {schema_version_drop})'
|
|
101
|
+
)
|
|
102
|
+
return self.tbl_version.get().cols_by_id[self.col_id]
|
|
103
|
+
|
|
104
|
+
def as_dict(self) -> dict:
|
|
105
|
+
return {'tbl_version': self.tbl_version.as_dict(), 'col_id': self.col_id}
|
|
106
|
+
|
|
107
|
+
@classmethod
|
|
108
|
+
def from_dict(cls, d: dict) -> ColumnHandle:
|
|
109
|
+
return cls(tbl_version=TableVersionHandle.from_dict(d['tbl_version']), col_id=d['col_id'])
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
from typing import Optional
|
|
5
4
|
from uuid import UUID
|
|
6
5
|
|
|
6
|
+
from pixeltable.env import Env
|
|
7
7
|
from pixeltable.metadata import schema
|
|
8
8
|
|
|
9
9
|
from .column import Column
|
|
10
|
+
from .globals import MediaValidation
|
|
11
|
+
from .table_version import TableVersion, TableVersionKey
|
|
10
12
|
from .table_version_handle import TableVersionHandle
|
|
11
13
|
|
|
12
14
|
_logger = logging.getLogger('pixeltable')
|
|
@@ -22,23 +24,40 @@ class TableVersionPath:
|
|
|
22
24
|
|
|
23
25
|
TableVersionPath contains all metadata needed to execute queries and updates against a particular version of a
|
|
24
26
|
table/view.
|
|
27
|
+
|
|
28
|
+
TableVersionPath supplies metadata needed for query construction (eg, column names), for which it uses a
|
|
29
|
+
cached TableVersion instance.
|
|
30
|
+
- when running inside a transaction, this instance is guaranteed to be validated
|
|
31
|
+
- when running outside a transaction, we use an unvalidated instance in order to avoid repeated validation
|
|
32
|
+
on every metadata-related method call (the instance won't stay validated, because TableVersionHandle.get()
|
|
33
|
+
runs a local transaction, at the end of which the instance is again invalidated)
|
|
34
|
+
- supplying metadata from an unvalidated instance is okay, because it needs to get revalidated anyway when a
|
|
35
|
+
query actually runs (at which point there is a transaction context) - there is no guarantee that in between
|
|
36
|
+
constructing a Query and executing it, the underlying table schema hasn't changed (eg, a concurrent process
|
|
37
|
+
could have dropped a column referenced in the query).
|
|
25
38
|
"""
|
|
26
39
|
|
|
27
40
|
tbl_version: TableVersionHandle
|
|
28
|
-
base:
|
|
41
|
+
base: TableVersionPath | None
|
|
42
|
+
_cached_tbl_version: TableVersion | None
|
|
29
43
|
|
|
30
|
-
def __init__(self, tbl_version: TableVersionHandle, base:
|
|
44
|
+
def __init__(self, tbl_version: TableVersionHandle, base: TableVersionPath | None = None):
|
|
31
45
|
assert tbl_version is not None
|
|
32
46
|
self.tbl_version = tbl_version
|
|
33
47
|
self.base = base
|
|
48
|
+
self._cached_tbl_version = None
|
|
49
|
+
|
|
50
|
+
if self.base is not None and tbl_version.anchor_tbl_id is not None:
|
|
51
|
+
self.base = self.base.anchor_to(tbl_version.anchor_tbl_id)
|
|
34
52
|
|
|
35
53
|
@classmethod
|
|
36
54
|
def from_md(cls, path: schema.TableVersionPath) -> TableVersionPath:
|
|
37
55
|
assert len(path) > 0
|
|
38
|
-
result:
|
|
56
|
+
result: TableVersionPath | None = None
|
|
39
57
|
for tbl_id_str, effective_version in path[::-1]:
|
|
40
58
|
tbl_id = UUID(tbl_id_str)
|
|
41
|
-
|
|
59
|
+
key = TableVersionKey(tbl_id, effective_version, None)
|
|
60
|
+
result = TableVersionPath(TableVersionHandle(key), base=result)
|
|
42
61
|
return result
|
|
43
62
|
|
|
44
63
|
def as_md(self) -> schema.TableVersionPath:
|
|
@@ -47,17 +66,59 @@ class TableVersionPath:
|
|
|
47
66
|
result.extend(self.base.as_md())
|
|
48
67
|
return result
|
|
49
68
|
|
|
69
|
+
def refresh_cached_md(self) -> None:
|
|
70
|
+
from pixeltable.catalog import Catalog
|
|
71
|
+
|
|
72
|
+
if Env.get().in_xact:
|
|
73
|
+
# when we're running inside a transaction, we need to make sure to supply current metadata;
|
|
74
|
+
# mixing stale metadata with current metadata leads to query construction failures
|
|
75
|
+
# (multiple sqlalchemy Table instances for the same underlying table create corrupted From clauses)
|
|
76
|
+
if self._cached_tbl_version is not None and self._cached_tbl_version.is_validated:
|
|
77
|
+
# nothing to refresh
|
|
78
|
+
return
|
|
79
|
+
elif self._cached_tbl_version is not None:
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
with Catalog.get().begin_xact(tbl_id=self.tbl_version.id, for_write=False):
|
|
83
|
+
self._cached_tbl_version = self.tbl_version.get()
|
|
84
|
+
|
|
85
|
+
def anchor_to(self, anchor_tbl_id: UUID | None) -> TableVersionPath:
|
|
86
|
+
"""
|
|
87
|
+
Return a new TableVersionPath with all of its non-snapshot TableVersions pointing to the given anchor_tbl_id.
|
|
88
|
+
(This will clear the existing anchor_tbl_id in the case anchor_tbl_id=None.)
|
|
89
|
+
"""
|
|
90
|
+
if self.tbl_version.effective_version is not None:
|
|
91
|
+
return self
|
|
92
|
+
|
|
93
|
+
return TableVersionPath(
|
|
94
|
+
TableVersionHandle(TableVersionKey(self.tbl_version.id, None, anchor_tbl_id)),
|
|
95
|
+
base=self.base.anchor_to(anchor_tbl_id) if self.base is not None else None,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def clear_cached_md(self) -> None:
|
|
99
|
+
self._cached_tbl_version = None
|
|
100
|
+
if self.base is not None:
|
|
101
|
+
self.base.clear_cached_md()
|
|
102
|
+
|
|
103
|
+
@property
|
|
50
104
|
def tbl_id(self) -> UUID:
|
|
51
105
|
"""Return the id of the table/view that this path represents"""
|
|
52
106
|
return self.tbl_version.id
|
|
53
107
|
|
|
54
108
|
def version(self) -> int:
|
|
55
109
|
"""Return the version of the table/view that this path represents"""
|
|
56
|
-
|
|
110
|
+
self.refresh_cached_md()
|
|
111
|
+
return self._cached_tbl_version.version
|
|
112
|
+
|
|
113
|
+
def schema_version(self) -> int:
|
|
114
|
+
"""Return the version of the table/view that this path represents"""
|
|
115
|
+
self.refresh_cached_md()
|
|
116
|
+
return self._cached_tbl_version.schema_version
|
|
57
117
|
|
|
58
118
|
def tbl_name(self) -> str:
|
|
59
119
|
"""Return the name of the table/view that this path represents"""
|
|
60
|
-
|
|
120
|
+
self.refresh_cached_md()
|
|
121
|
+
return self._cached_tbl_version.name
|
|
61
122
|
|
|
62
123
|
def path_len(self) -> int:
|
|
63
124
|
"""Return the length of the path"""
|
|
@@ -65,18 +126,39 @@ class TableVersionPath:
|
|
|
65
126
|
|
|
66
127
|
def is_snapshot(self) -> bool:
|
|
67
128
|
"""Return True if this is a path of snapshot versions"""
|
|
68
|
-
|
|
69
|
-
return False
|
|
70
|
-
return self.base.is_snapshot() if self.base is not None else True
|
|
129
|
+
return self.tbl_version.is_snapshot
|
|
71
130
|
|
|
72
131
|
def is_view(self) -> bool:
|
|
73
|
-
|
|
132
|
+
self.refresh_cached_md()
|
|
133
|
+
return self._cached_tbl_version.is_view
|
|
74
134
|
|
|
75
135
|
def is_component_view(self) -> bool:
|
|
76
|
-
|
|
136
|
+
self.refresh_cached_md()
|
|
137
|
+
return self._cached_tbl_version.is_component_view
|
|
138
|
+
|
|
139
|
+
def is_replica(self) -> bool:
|
|
140
|
+
self.refresh_cached_md()
|
|
141
|
+
return self._cached_tbl_version.is_replica
|
|
142
|
+
|
|
143
|
+
def is_mutable(self) -> bool:
|
|
144
|
+
self.refresh_cached_md()
|
|
145
|
+
return self._cached_tbl_version.is_mutable
|
|
77
146
|
|
|
78
147
|
def is_insertable(self) -> bool:
|
|
79
|
-
|
|
148
|
+
self.refresh_cached_md()
|
|
149
|
+
return self._cached_tbl_version.is_insertable
|
|
150
|
+
|
|
151
|
+
def comment(self) -> str:
|
|
152
|
+
self.refresh_cached_md()
|
|
153
|
+
return self._cached_tbl_version.comment
|
|
154
|
+
|
|
155
|
+
def num_retained_versions(self) -> int:
|
|
156
|
+
self.refresh_cached_md()
|
|
157
|
+
return self._cached_tbl_version.num_retained_versions
|
|
158
|
+
|
|
159
|
+
def media_validation(self) -> MediaValidation:
|
|
160
|
+
self.refresh_cached_md()
|
|
161
|
+
return self._cached_tbl_version.media_validation
|
|
80
162
|
|
|
81
163
|
def get_tbl_versions(self) -> list[TableVersionHandle]:
|
|
82
164
|
"""Return all tbl versions"""
|
|
@@ -90,7 +172,7 @@ class TableVersionPath:
|
|
|
90
172
|
return []
|
|
91
173
|
return self.base.get_tbl_versions()
|
|
92
174
|
|
|
93
|
-
def find_tbl_version(self, id: UUID) ->
|
|
175
|
+
def find_tbl_version(self, id: UUID) -> TableVersionHandle | None:
|
|
94
176
|
"""Return the matching TableVersion in the chain of TableVersions, starting with this one"""
|
|
95
177
|
if self.tbl_version.id == id:
|
|
96
178
|
return self.tbl_version
|
|
@@ -98,20 +180,14 @@ class TableVersionPath:
|
|
|
98
180
|
return None
|
|
99
181
|
return self.base.find_tbl_version(id)
|
|
100
182
|
|
|
101
|
-
@property
|
|
102
|
-
def ancestor_paths(self) -> list[TableVersionPath]:
|
|
103
|
-
if self.base is None:
|
|
104
|
-
return [self]
|
|
105
|
-
else:
|
|
106
|
-
return [self, *self.base.ancestor_paths]
|
|
107
|
-
|
|
108
183
|
def columns(self) -> list[Column]:
|
|
109
184
|
"""Return all user columns visible in this tbl version path, including columns from bases"""
|
|
110
|
-
|
|
111
|
-
|
|
185
|
+
self.refresh_cached_md()
|
|
186
|
+
result = list(self._cached_tbl_version.cols_by_name.values())
|
|
187
|
+
if self.base is not None and self._cached_tbl_version.include_base_columns:
|
|
112
188
|
base_cols = self.base.columns()
|
|
113
189
|
# we only include base columns that don't conflict with one of our column names
|
|
114
|
-
result.extend(c for c in base_cols if c.name not in self.
|
|
190
|
+
result.extend(c for c in base_cols if c.name not in self._cached_tbl_version.cols_by_name)
|
|
115
191
|
return result
|
|
116
192
|
|
|
117
193
|
def cols_by_name(self) -> dict[str, Column]:
|
|
@@ -124,37 +200,30 @@ class TableVersionPath:
|
|
|
124
200
|
cols = self.columns()
|
|
125
201
|
return {col.id: col for col in cols}
|
|
126
202
|
|
|
127
|
-
def get_column(self, name: str
|
|
203
|
+
def get_column(self, name: str) -> Column | None:
|
|
128
204
|
"""Return the column with the given name, or None if not found"""
|
|
129
|
-
|
|
205
|
+
self.refresh_cached_md()
|
|
206
|
+
col = self._cached_tbl_version.cols_by_name.get(name)
|
|
130
207
|
if col is not None:
|
|
131
208
|
return col
|
|
132
|
-
elif self.base is not None and
|
|
209
|
+
elif self.base is not None and self._cached_tbl_version.include_base_columns:
|
|
133
210
|
return self.base.get_column(name)
|
|
134
211
|
else:
|
|
135
212
|
return None
|
|
136
213
|
|
|
137
|
-
def
|
|
138
|
-
"""Return the column for the given tbl/col id"""
|
|
139
|
-
if self.tbl_version.id == tbl_id:
|
|
140
|
-
assert col_id in self.tbl_version.get().cols_by_id
|
|
141
|
-
return self.tbl_version.get().cols_by_id[col_id]
|
|
142
|
-
elif self.base is not None:
|
|
143
|
-
return self.base.get_column_by_id(tbl_id, col_id)
|
|
144
|
-
else:
|
|
145
|
-
return None
|
|
146
|
-
|
|
147
|
-
def has_column(self, col: Column, include_bases: bool = True) -> bool:
|
|
214
|
+
def has_column(self, col: Column) -> bool:
|
|
148
215
|
"""Return True if this table has the given column."""
|
|
149
|
-
assert col.
|
|
216
|
+
assert col.get_tbl() is not None
|
|
217
|
+
self.refresh_cached_md()
|
|
218
|
+
|
|
150
219
|
if (
|
|
151
|
-
col.
|
|
152
|
-
and col.
|
|
153
|
-
and col.id in self.
|
|
220
|
+
col.get_tbl().id == self.tbl_version.id
|
|
221
|
+
and col.get_tbl().effective_version == self.tbl_version.effective_version
|
|
222
|
+
and col.id in self._cached_tbl_version.cols_by_id
|
|
154
223
|
):
|
|
155
224
|
# the column is visible in this table version
|
|
156
225
|
return True
|
|
157
|
-
elif self.base is not None
|
|
226
|
+
elif self.base is not None:
|
|
158
227
|
return self.base.has_column(col)
|
|
159
228
|
else:
|
|
160
229
|
return False
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# This file contains all dataclasses related to schema.PendingTableOp:
|
|
2
|
+
# - TableOp: the container for each log entry
|
|
3
|
+
# - <>Op: the actual operation, which is performed by TableVersion.exec_op(); each <>Op class contains
|
|
4
|
+
# enough information for exec_op() to perform the operation without having to reference data outside of
|
|
5
|
+
# TableVersion
|
|
6
|
+
|
|
7
|
+
import dataclasses
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclasses.dataclass
|
|
12
|
+
class CreateStoreTableOp:
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclasses.dataclass
|
|
17
|
+
class CreateIndexOp:
|
|
18
|
+
idx_id: int
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclasses.dataclass
|
|
22
|
+
class LoadViewOp:
|
|
23
|
+
view_path: dict[str, Any] # needed to create the view load plan
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclasses.dataclass
|
|
27
|
+
class DeleteTableMdOp:
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclasses.dataclass
|
|
32
|
+
class DeleteTableMediaFilesOp:
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclasses.dataclass
|
|
37
|
+
class DropStoreTableOp:
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclasses.dataclass
|
|
42
|
+
class TableOp:
|
|
43
|
+
tbl_id: str # uuid.UUID
|
|
44
|
+
op_sn: int # sequence number within the update operation; [0, num_ops)
|
|
45
|
+
num_ops: int # total number of ops forming the update operation
|
|
46
|
+
needs_xact: bool # if True, op must be run as part of a transaction
|
|
47
|
+
|
|
48
|
+
create_store_table_op: CreateStoreTableOp | None = None
|
|
49
|
+
create_index_op: CreateIndexOp | None = None
|
|
50
|
+
load_view_op: LoadViewOp | None = None
|
|
51
|
+
delete_table_md_op: DeleteTableMdOp | None = None
|
|
52
|
+
delete_table_media_files_op: DeleteTableMediaFilesOp | None = None
|
|
53
|
+
drop_store_table_op: DropStoreTableOp | None = None
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from IPython.lib.pretty import RepresentationPrinter
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass(frozen=True)
|
|
11
|
+
class RowCountStats:
|
|
12
|
+
"""
|
|
13
|
+
Statistics about the counts of rows affected by a table operation.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
ins_rows: int = 0 # rows inserted
|
|
17
|
+
del_rows: int = 0 # rows deleted
|
|
18
|
+
upd_rows: int = 0 # rows updated
|
|
19
|
+
num_excs: int = 0 # total number of exceptions
|
|
20
|
+
# TODO: disambiguate what this means: # of slots computed or # of columns computed?
|
|
21
|
+
computed_values: int = 0 # number of computed values (e.g., computed columns) affected by the operation
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def num_rows(self) -> int:
|
|
25
|
+
return self.ins_rows + self.del_rows + self.upd_rows
|
|
26
|
+
|
|
27
|
+
def insert_to_update(self) -> 'RowCountStats':
|
|
28
|
+
"""
|
|
29
|
+
Convert insert row count stats to update row count stats.
|
|
30
|
+
This is used when an insert operation is treated as an update.
|
|
31
|
+
"""
|
|
32
|
+
return RowCountStats(
|
|
33
|
+
ins_rows=0,
|
|
34
|
+
del_rows=self.del_rows,
|
|
35
|
+
upd_rows=self.upd_rows + self.ins_rows,
|
|
36
|
+
num_excs=self.num_excs,
|
|
37
|
+
computed_values=self.computed_values,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def __add__(self, other: 'RowCountStats') -> 'RowCountStats':
|
|
41
|
+
"""
|
|
42
|
+
Add the stats from two RowCountStats objects together.
|
|
43
|
+
"""
|
|
44
|
+
return RowCountStats(
|
|
45
|
+
ins_rows=self.ins_rows + other.ins_rows,
|
|
46
|
+
del_rows=self.del_rows + other.del_rows,
|
|
47
|
+
upd_rows=self.upd_rows + other.upd_rows,
|
|
48
|
+
num_excs=self.num_excs + other.num_excs,
|
|
49
|
+
computed_values=self.computed_values + other.computed_values,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass(frozen=True)
|
|
54
|
+
class UpdateStatus:
|
|
55
|
+
"""
|
|
56
|
+
Information about changes to table data or table schema
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
updated_cols: list[str] = field(default_factory=list)
|
|
60
|
+
"""Columns that were updated."""
|
|
61
|
+
cols_with_excs: list[str] = field(default_factory=list)
|
|
62
|
+
"""Columns that encountered exceptions."""
|
|
63
|
+
|
|
64
|
+
# stats for the rows affected by the operation
|
|
65
|
+
row_count_stats: RowCountStats = field(default_factory=RowCountStats)
|
|
66
|
+
"""Row count statistics for rows affected by this operation."""
|
|
67
|
+
|
|
68
|
+
# stats for changes cascaded to other tables
|
|
69
|
+
cascade_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
|
|
70
|
+
"""Row count statistics for changes cascaded to other tables."""
|
|
71
|
+
|
|
72
|
+
# stats for the rows affected by the operation in an external store
|
|
73
|
+
ext_row_count_stats: RowCountStats = field(default_factory=RowCountStats)
|
|
74
|
+
"""Row count statistics for rows affected in an external store."""
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def num_rows(self) -> int:
|
|
78
|
+
"""Total number of rows affected (including cascaded changes)."""
|
|
79
|
+
return self.row_count_stats.num_rows + self.cascade_row_count_stats.num_rows
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def num_excs(self) -> int:
|
|
83
|
+
"""Total number of exceptions encountered (including cascaded changes)."""
|
|
84
|
+
return self.row_count_stats.num_excs + self.cascade_row_count_stats.num_excs
|
|
85
|
+
|
|
86
|
+
@property
|
|
87
|
+
def num_computed_values(self) -> int:
|
|
88
|
+
"""Total number of computed values affected (including cascaded changes)."""
|
|
89
|
+
return self.row_count_stats.computed_values + self.cascade_row_count_stats.computed_values
|
|
90
|
+
|
|
91
|
+
def insert_to_update(self) -> 'UpdateStatus':
|
|
92
|
+
"""
|
|
93
|
+
Convert the update status from an insert operation to an update operation.
|
|
94
|
+
This is used when an insert operation is treated as an update.
|
|
95
|
+
"""
|
|
96
|
+
return UpdateStatus(
|
|
97
|
+
updated_cols=self.updated_cols,
|
|
98
|
+
cols_with_excs=self.cols_with_excs,
|
|
99
|
+
row_count_stats=self.row_count_stats.insert_to_update(),
|
|
100
|
+
cascade_row_count_stats=self.cascade_row_count_stats.insert_to_update(),
|
|
101
|
+
ext_row_count_stats=self.ext_row_count_stats,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def to_cascade(self) -> 'UpdateStatus':
|
|
105
|
+
"""
|
|
106
|
+
Convert the update status to a cascade update status.
|
|
107
|
+
This is used when an operation cascades changes to other tables.
|
|
108
|
+
"""
|
|
109
|
+
return UpdateStatus(
|
|
110
|
+
updated_cols=self.updated_cols,
|
|
111
|
+
cols_with_excs=self.cols_with_excs,
|
|
112
|
+
row_count_stats=RowCountStats(),
|
|
113
|
+
cascade_row_count_stats=self.cascade_row_count_stats + self.row_count_stats,
|
|
114
|
+
ext_row_count_stats=self.ext_row_count_stats,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
def __add__(self, other: 'UpdateStatus') -> UpdateStatus:
|
|
118
|
+
"""
|
|
119
|
+
Add the update status from two UpdateStatus objects together.
|
|
120
|
+
"""
|
|
121
|
+
return UpdateStatus(
|
|
122
|
+
updated_cols=list(dict.fromkeys(self.updated_cols + other.updated_cols)),
|
|
123
|
+
cols_with_excs=list(dict.fromkeys(self.cols_with_excs + other.cols_with_excs)),
|
|
124
|
+
row_count_stats=self.row_count_stats + other.row_count_stats,
|
|
125
|
+
cascade_row_count_stats=self.cascade_row_count_stats + other.cascade_row_count_stats,
|
|
126
|
+
ext_row_count_stats=self.ext_row_count_stats + other.ext_row_count_stats,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def insert_msg(self) -> str:
|
|
131
|
+
"""A message describing the results of an insert operation."""
|
|
132
|
+
if self.num_excs == 0:
|
|
133
|
+
cols_with_excs_str = ''
|
|
134
|
+
else:
|
|
135
|
+
cols_with_excs_str = (
|
|
136
|
+
f' across {len(self.cols_with_excs)} column{"" if len(self.cols_with_excs) == 1 else "s"}'
|
|
137
|
+
)
|
|
138
|
+
cols_with_excs_str += f' ({", ".join(self.cols_with_excs)})'
|
|
139
|
+
msg = (
|
|
140
|
+
f'Inserted {self.num_rows} row{"" if self.num_rows == 1 else "s"} '
|
|
141
|
+
f'with {self.num_excs} error{"" if self.num_excs == 1 else "s"}{cols_with_excs_str}.'
|
|
142
|
+
)
|
|
143
|
+
return msg
|
|
144
|
+
|
|
145
|
+
@classmethod
|
|
146
|
+
def __cnt_str(cls, cnt: int, item: str) -> str:
|
|
147
|
+
assert cnt > 0
|
|
148
|
+
return f'{cnt} {item}{"" if cnt == 1 else "s"}'
|
|
149
|
+
|
|
150
|
+
def _repr_pretty_(self, p: 'RepresentationPrinter', cycle: bool) -> None:
|
|
151
|
+
messages = []
|
|
152
|
+
# Combine row count stats and cascade row count stats
|
|
153
|
+
stats = self.row_count_stats + self.cascade_row_count_stats
|
|
154
|
+
if stats.ins_rows > 0:
|
|
155
|
+
messages.append(f'{self.__cnt_str(stats.ins_rows, "row")} inserted')
|
|
156
|
+
if stats.del_rows > 0:
|
|
157
|
+
messages.append(f'{self.__cnt_str(stats.del_rows, "row")} deleted')
|
|
158
|
+
if stats.upd_rows > 0:
|
|
159
|
+
messages.append(f'{self.__cnt_str(stats.upd_rows, "row")} updated')
|
|
160
|
+
if stats.computed_values > 0:
|
|
161
|
+
messages.append(f'{self.__cnt_str(stats.computed_values, "value")} computed')
|
|
162
|
+
if stats.num_excs > 0:
|
|
163
|
+
messages.append(self.__cnt_str(stats.num_excs, 'exception'))
|
|
164
|
+
p.text(', '.join(messages) + '.' if len(messages) > 0 else 'No rows affected.')
|
|
165
|
+
|
|
166
|
+
@property
|
|
167
|
+
def pxt_rows_updated(self) -> int:
|
|
168
|
+
"""
|
|
169
|
+
Returns the number of Pixeltable rows that were updated as a result of the operation.
|
|
170
|
+
"""
|
|
171
|
+
return (self.row_count_stats + self.cascade_row_count_stats).upd_rows
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def external_rows_updated(self) -> int:
|
|
175
|
+
"""Number of rows updated in an external store."""
|
|
176
|
+
return self.ext_row_count_stats.upd_rows
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
def external_rows_created(self) -> int:
|
|
180
|
+
"""Number of rows created in an external store."""
|
|
181
|
+
return self.ext_row_count_stats.ins_rows
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def external_rows_deleted(self) -> int:
|
|
185
|
+
"""Number of rows deleted from an external store."""
|
|
186
|
+
return self.ext_row_count_stats.del_rows
|
|
187
|
+
|
|
188
|
+
@property
|
|
189
|
+
def ext_num_rows(self) -> int:
|
|
190
|
+
"""Total number of rows affected in an external store."""
|
|
191
|
+
return self.ext_row_count_stats.num_rows
|