pixeltable 0.4.0rc3__py3-none-any.whl → 0.4.20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +23 -5
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -3
- pixeltable/catalog/catalog.py +1318 -404
- pixeltable/catalog/column.py +186 -115
- pixeltable/catalog/dir.py +1 -2
- pixeltable/catalog/globals.py +11 -43
- pixeltable/catalog/insertable_table.py +167 -79
- pixeltable/catalog/path.py +61 -23
- pixeltable/catalog/schema_object.py +9 -10
- pixeltable/catalog/table.py +626 -308
- pixeltable/catalog/table_metadata.py +101 -0
- pixeltable/catalog/table_version.py +713 -569
- pixeltable/catalog/table_version_handle.py +37 -6
- pixeltable/catalog/table_version_path.py +42 -29
- pixeltable/catalog/tbl_ops.py +50 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +108 -94
- pixeltable/config.py +128 -22
- pixeltable/dataframe.py +188 -100
- pixeltable/env.py +407 -136
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +3 -0
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +231 -0
- pixeltable/exec/cell_reconstruction_node.py +135 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +7 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +190 -30
- pixeltable/exec/globals.py +32 -0
- pixeltable/exec/in_memory_data_node.py +18 -18
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +206 -101
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +34 -30
- pixeltable/exprs/column_ref.py +92 -96
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +152 -55
- pixeltable/exprs/expr.py +62 -43
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +75 -37
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +10 -27
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +127 -53
- pixeltable/exprs/rowid_ref.py +8 -12
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +10 -10
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +20 -18
- pixeltable/func/signature.py +43 -16
- pixeltable/func/tools.py +23 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +6 -0
- pixeltable/functions/anthropic.py +93 -33
- pixeltable/functions/audio.py +114 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +1 -1
- pixeltable/functions/deepseek.py +20 -9
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +28 -11
- pixeltable/functions/globals.py +13 -13
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1046 -23
- pixeltable/functions/image.py +9 -18
- pixeltable/functions/llama_cpp.py +23 -8
- pixeltable/functions/math.py +3 -4
- pixeltable/functions/mistralai.py +4 -15
- pixeltable/functions/ollama.py +16 -9
- pixeltable/functions/openai.py +104 -82
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +2 -2
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +13 -14
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/video.py +1388 -106
- pixeltable/functions/vision.py +7 -7
- pixeltable/functions/whisper.py +15 -7
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +332 -105
- pixeltable/index/base.py +13 -22
- pixeltable/index/btree.py +23 -22
- pixeltable/index/embedding_index.py +32 -44
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +7 -6
- pixeltable/io/external_store.py +49 -77
- pixeltable/io/fiftyone.py +11 -11
- pixeltable/io/globals.py +29 -28
- pixeltable/io/hf_datasets.py +17 -9
- pixeltable/io/label_studio.py +70 -66
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +12 -11
- pixeltable/io/parquet.py +13 -93
- pixeltable/io/table_data_conduit.py +71 -47
- pixeltable/io/utils.py +3 -3
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +21 -11
- pixeltable/iterators/document.py +116 -55
- pixeltable/iterators/image.py +5 -2
- pixeltable/iterators/video.py +293 -13
- pixeltable/metadata/__init__.py +4 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/util.py +13 -12
- pixeltable/metadata/notes.py +4 -0
- pixeltable/metadata/schema.py +79 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +274 -223
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +259 -129
- pixeltable/share/protocol/__init__.py +34 -0
- pixeltable/share/protocol/common.py +170 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +109 -0
- pixeltable/share/publish.py +213 -57
- pixeltable/store.py +238 -175
- pixeltable/type_system.py +104 -63
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +108 -13
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +31 -5
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +283 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +88 -0
- pixeltable/utils/local_store.py +316 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +528 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +392 -0
- pixeltable-0.4.20.dist-info/METADATA +587 -0
- pixeltable-0.4.20.dist-info/RECORD +218 -0
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info}/WHEEL +1 -1
- pixeltable-0.4.20.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable/utils/sample.py +0 -25
- pixeltable-0.4.0rc3.dist-info/METADATA +0 -435
- pixeltable-0.4.0rc3.dist-info/RECORD +0 -189
- pixeltable-0.4.0rc3.dist-info/entry_points.txt +0 -3
- {pixeltable-0.4.0rc3.dist-info → pixeltable-0.4.20.dist-info/licenses}/LICENSE +0 -0
pixeltable/catalog/table.py
CHANGED
|
@@ -2,13 +2,12 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import abc
|
|
4
4
|
import builtins
|
|
5
|
+
import datetime
|
|
5
6
|
import json
|
|
6
7
|
import logging
|
|
7
8
|
from keyword import iskeyword as is_python_keyword
|
|
8
9
|
from pathlib import Path
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Iterable, Literal,
|
|
10
|
-
|
|
11
|
-
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Iterable, Literal, overload
|
|
12
11
|
from uuid import UUID
|
|
13
12
|
|
|
14
13
|
import pandas as pd
|
|
@@ -16,7 +15,16 @@ import sqlalchemy as sql
|
|
|
16
15
|
|
|
17
16
|
import pixeltable as pxt
|
|
18
17
|
from pixeltable import catalog, env, exceptions as excs, exprs, index, type_system as ts
|
|
18
|
+
from pixeltable.catalog.table_metadata import (
|
|
19
|
+
ColumnMetadata,
|
|
20
|
+
EmbeddingIndexParams,
|
|
21
|
+
IndexMetadata,
|
|
22
|
+
TableMetadata,
|
|
23
|
+
VersionMetadata,
|
|
24
|
+
)
|
|
19
25
|
from pixeltable.metadata import schema
|
|
26
|
+
from pixeltable.metadata.utils import MetadataUtils
|
|
27
|
+
from pixeltable.utils.object_stores import ObjectOps
|
|
20
28
|
|
|
21
29
|
from ..exprs import ColumnRef
|
|
22
30
|
from ..utils.description_helper import DescriptionHelper
|
|
@@ -27,13 +35,16 @@ from .globals import (
|
|
|
27
35
|
IfExistsParam,
|
|
28
36
|
IfNotExistsParam,
|
|
29
37
|
MediaValidation,
|
|
30
|
-
UpdateStatus,
|
|
31
38
|
is_system_column_name,
|
|
32
39
|
is_valid_identifier,
|
|
33
40
|
)
|
|
34
41
|
from .schema_object import SchemaObject
|
|
35
42
|
from .table_version_handle import TableVersionHandle
|
|
36
43
|
from .table_version_path import TableVersionPath
|
|
44
|
+
from .update_status import UpdateStatus
|
|
45
|
+
|
|
46
|
+
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
47
|
+
|
|
37
48
|
|
|
38
49
|
if TYPE_CHECKING:
|
|
39
50
|
import torch.utils.data
|
|
@@ -41,6 +52,7 @@ if TYPE_CHECKING:
|
|
|
41
52
|
import pixeltable.plan
|
|
42
53
|
from pixeltable.globals import TableDataSource
|
|
43
54
|
|
|
55
|
+
|
|
44
56
|
_logger = logging.getLogger('pixeltable')
|
|
45
57
|
|
|
46
58
|
|
|
@@ -48,21 +60,34 @@ class Table(SchemaObject):
|
|
|
48
60
|
"""
|
|
49
61
|
A handle to a table, view, or snapshot. This class is the primary interface through which table operations
|
|
50
62
|
(queries, insertions, updates, etc.) are performed in Pixeltable.
|
|
63
|
+
|
|
64
|
+
Every user-invoked operation that runs an ExecNode tree (directly or indirectly) needs to call
|
|
65
|
+
FileCache.emit_eviction_warnings() at the end of the operation.
|
|
51
66
|
"""
|
|
52
67
|
|
|
53
|
-
#
|
|
54
|
-
|
|
68
|
+
# the chain of TableVersions needed to run queries and supply metadata (eg, schema)
|
|
69
|
+
_tbl_version_path: TableVersionPath
|
|
55
70
|
|
|
56
|
-
|
|
57
|
-
|
|
71
|
+
# the physical TableVersion backing this Table; None for pure snapshots
|
|
72
|
+
_tbl_version: TableVersionHandle | None
|
|
58
73
|
|
|
59
74
|
def __init__(self, id: UUID, dir_id: UUID, name: str, tbl_version_path: TableVersionPath):
|
|
60
75
|
super().__init__(id, name, dir_id)
|
|
61
|
-
self.
|
|
62
|
-
self.
|
|
76
|
+
self._tbl_version_path = tbl_version_path
|
|
77
|
+
self._tbl_version = None
|
|
63
78
|
|
|
64
79
|
def _move(self, new_name: str, new_dir_id: UUID) -> None:
|
|
65
|
-
self.
|
|
80
|
+
old_name = self._name
|
|
81
|
+
old_dir_id = self._dir_id
|
|
82
|
+
|
|
83
|
+
cat = catalog.Catalog.get()
|
|
84
|
+
|
|
85
|
+
@cat.register_undo_action
|
|
86
|
+
def _() -> None:
|
|
87
|
+
# TODO: We should really be invalidating the Table instance and forcing a reload.
|
|
88
|
+
self._name = old_name
|
|
89
|
+
self._dir_id = old_dir_id
|
|
90
|
+
|
|
66
91
|
super()._move(new_name, new_dir_id)
|
|
67
92
|
conn = env.Env.get().conn
|
|
68
93
|
stmt = sql.text(
|
|
@@ -75,73 +100,85 @@ class Table(SchemaObject):
|
|
|
75
100
|
)
|
|
76
101
|
conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
|
|
77
102
|
|
|
78
|
-
|
|
103
|
+
# this is duplicated from SchemaObject so that our API docs show the docstring for Table
|
|
104
|
+
def get_metadata(self) -> 'TableMetadata':
|
|
79
105
|
"""
|
|
80
106
|
Retrieves metadata associated with this table.
|
|
81
107
|
|
|
82
108
|
Returns:
|
|
83
|
-
A
|
|
84
|
-
|
|
85
|
-
```python
|
|
86
|
-
{
|
|
87
|
-
'base': None, # If this is a view or snapshot, will contain the name of its base table
|
|
88
|
-
'schema': {
|
|
89
|
-
'col1': StringType(),
|
|
90
|
-
'col2': IntType(),
|
|
91
|
-
},
|
|
92
|
-
'is_replica': False,
|
|
93
|
-
'version': 22,
|
|
94
|
-
'schema_version': 1,
|
|
95
|
-
'comment': '',
|
|
96
|
-
'num_retained_versions': 10,
|
|
97
|
-
'is_view': False,
|
|
98
|
-
'is_snapshot': False,
|
|
99
|
-
'media_validation': 'on_write',
|
|
100
|
-
}
|
|
101
|
-
```
|
|
109
|
+
A [TableMetadata][pixeltable.TableMetadata] instance containing this table's metadata.
|
|
102
110
|
"""
|
|
103
|
-
from pixeltable.catalog import
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
111
|
+
from pixeltable.catalog import retry_loop
|
|
112
|
+
|
|
113
|
+
@retry_loop(for_write=False)
|
|
114
|
+
def op() -> 'TableMetadata':
|
|
115
|
+
return self._get_metadata()
|
|
116
|
+
|
|
117
|
+
return op()
|
|
118
|
+
|
|
119
|
+
def _get_metadata(self) -> TableMetadata:
|
|
120
|
+
columns = self._tbl_version_path.columns()
|
|
121
|
+
column_info: dict[str, ColumnMetadata] = {}
|
|
122
|
+
for col in columns:
|
|
123
|
+
column_info[col.name] = ColumnMetadata(
|
|
124
|
+
name=col.name,
|
|
125
|
+
type_=col.col_type._to_str(as_schema=True),
|
|
126
|
+
version_added=col.schema_version_add,
|
|
127
|
+
is_stored=col.is_stored,
|
|
128
|
+
is_primary_key=col.is_pk,
|
|
129
|
+
media_validation=col.media_validation.name.lower() if col.media_validation is not None else None, # type: ignore[typeddict-item]
|
|
130
|
+
computed_with=col.value_expr.display_str(inline=False) if col.value_expr is not None else None,
|
|
131
|
+
defined_in=col.get_tbl().name,
|
|
132
|
+
)
|
|
133
|
+
# Pure snapshots have no indices
|
|
134
|
+
indices = self._tbl_version.get().idxs_by_name.values() if self._tbl_version is not None else {}
|
|
135
|
+
index_info: dict[str, IndexMetadata] = {}
|
|
136
|
+
for info in indices:
|
|
137
|
+
if isinstance(info.idx, index.EmbeddingIndex):
|
|
138
|
+
embeddings: list[str] = []
|
|
139
|
+
if info.idx.string_embed is not None:
|
|
140
|
+
embeddings.append(str(info.idx.string_embed))
|
|
141
|
+
if info.idx.image_embed is not None:
|
|
142
|
+
embeddings.append(str(info.idx.image_embed))
|
|
143
|
+
index_info[info.name] = IndexMetadata(
|
|
144
|
+
name=info.name,
|
|
145
|
+
columns=[info.col.name],
|
|
146
|
+
index_type='embedding',
|
|
147
|
+
parameters=EmbeddingIndexParams(
|
|
148
|
+
metric=info.idx.metric.name.lower(), # type: ignore[typeddict-item]
|
|
149
|
+
embeddings=embeddings,
|
|
150
|
+
),
|
|
151
|
+
)
|
|
152
|
+
return TableMetadata(
|
|
153
|
+
name=self._name,
|
|
154
|
+
path=self._path(),
|
|
155
|
+
columns=column_info,
|
|
156
|
+
indices=index_info,
|
|
157
|
+
is_replica=self._tbl_version_path.is_replica(),
|
|
158
|
+
is_view=False,
|
|
159
|
+
is_snapshot=False,
|
|
160
|
+
version=self._get_version(),
|
|
161
|
+
version_created=datetime.datetime.fromtimestamp(
|
|
162
|
+
self._tbl_version_path.tbl_version.get().created_at, tz=datetime.timezone.utc
|
|
163
|
+
),
|
|
164
|
+
schema_version=self._tbl_version_path.schema_version(),
|
|
165
|
+
comment=self._get_comment(),
|
|
166
|
+
media_validation=self._get_media_validation().name.lower(), # type: ignore[typeddict-item]
|
|
167
|
+
base=None,
|
|
168
|
+
)
|
|
117
169
|
|
|
118
|
-
|
|
119
|
-
def _version(self) -> int:
|
|
170
|
+
def _get_version(self) -> int:
|
|
120
171
|
"""Return the version of this table. Used by tests to ascertain version changes."""
|
|
121
|
-
return self.
|
|
122
|
-
|
|
123
|
-
@property
|
|
124
|
-
def _tbl_version(self) -> TableVersionHandle:
|
|
125
|
-
"""Return TableVersion for just this table."""
|
|
126
|
-
return self._tbl_version_path.tbl_version
|
|
127
|
-
|
|
128
|
-
@property
|
|
129
|
-
def _tbl_version_path(self) -> TableVersionPath:
|
|
130
|
-
self._check_is_dropped()
|
|
131
|
-
return self.__tbl_version_path
|
|
172
|
+
return self._tbl_version_path.version()
|
|
132
173
|
|
|
133
174
|
def __hash__(self) -> int:
|
|
134
|
-
return hash(self.
|
|
135
|
-
|
|
136
|
-
def _check_is_dropped(self) -> None:
|
|
137
|
-
if self._is_dropped:
|
|
138
|
-
raise excs.Error(f'{self._display_name()} {self._name} has been dropped')
|
|
175
|
+
return hash(self._tbl_version_path.tbl_id)
|
|
139
176
|
|
|
140
177
|
def __getattr__(self, name: str) -> 'exprs.ColumnRef':
|
|
141
178
|
"""Return a ColumnRef for the given name."""
|
|
142
179
|
col = self._tbl_version_path.get_column(name)
|
|
143
180
|
if col is None:
|
|
144
|
-
raise AttributeError(f'
|
|
181
|
+
raise AttributeError(f'Unknown column: {name}')
|
|
145
182
|
return ColumnRef(col, reference_tbl=self._tbl_version_path)
|
|
146
183
|
|
|
147
184
|
def __getitem__(self, name: str) -> 'exprs.ColumnRef':
|
|
@@ -159,18 +196,23 @@ class Table(SchemaObject):
|
|
|
159
196
|
Returns:
|
|
160
197
|
A list of view paths.
|
|
161
198
|
"""
|
|
162
|
-
from pixeltable.catalog import
|
|
199
|
+
from pixeltable.catalog import retry_loop
|
|
163
200
|
|
|
164
|
-
|
|
165
|
-
|
|
201
|
+
# we need retry_loop() here, because we end up loading Tables for the views
|
|
202
|
+
@retry_loop(tbl=self._tbl_version_path, for_write=False)
|
|
203
|
+
def op() -> list[str]:
|
|
166
204
|
return [t._path() for t in self._get_views(recursive=recursive)]
|
|
167
205
|
|
|
168
|
-
|
|
206
|
+
return op()
|
|
207
|
+
|
|
208
|
+
def _get_views(self, *, recursive: bool = True, mutable_only: bool = False) -> list['Table']:
|
|
169
209
|
cat = catalog.Catalog.get()
|
|
170
210
|
view_ids = cat.get_view_ids(self._id)
|
|
171
211
|
views = [cat.get_table_by_id(id) for id in view_ids]
|
|
212
|
+
if mutable_only:
|
|
213
|
+
views = [t for t in views if t._tbl_version_path.is_mutable()]
|
|
172
214
|
if recursive:
|
|
173
|
-
views.extend(
|
|
215
|
+
views.extend(t for view in views for t in view._get_views(recursive=True, mutable_only=mutable_only))
|
|
174
216
|
return views
|
|
175
217
|
|
|
176
218
|
def _df(self) -> 'pxt.dataframe.DataFrame':
|
|
@@ -187,7 +229,7 @@ class Table(SchemaObject):
|
|
|
187
229
|
"""
|
|
188
230
|
from pixeltable.catalog import Catalog
|
|
189
231
|
|
|
190
|
-
with Catalog.get().begin_xact(for_write=False):
|
|
232
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
191
233
|
return self._df().select(*items, **named_items)
|
|
192
234
|
|
|
193
235
|
def where(self, pred: 'exprs.Expr') -> 'pxt.DataFrame':
|
|
@@ -197,20 +239,16 @@ class Table(SchemaObject):
|
|
|
197
239
|
"""
|
|
198
240
|
from pixeltable.catalog import Catalog
|
|
199
241
|
|
|
200
|
-
with Catalog.get().begin_xact(for_write=False):
|
|
242
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
201
243
|
return self._df().where(pred)
|
|
202
244
|
|
|
203
245
|
def join(
|
|
204
|
-
self,
|
|
205
|
-
other: 'Table',
|
|
206
|
-
*,
|
|
207
|
-
on: Optional['exprs.Expr'] = None,
|
|
208
|
-
how: 'pixeltable.plan.JoinType.LiteralType' = 'inner',
|
|
246
|
+
self, other: 'Table', *, on: 'exprs.Expr' | None = None, how: 'pixeltable.plan.JoinType.LiteralType' = 'inner'
|
|
209
247
|
) -> 'pxt.DataFrame':
|
|
210
248
|
"""Join this table with another table."""
|
|
211
249
|
from pixeltable.catalog import Catalog
|
|
212
250
|
|
|
213
|
-
with Catalog.get().begin_xact(for_write=False):
|
|
251
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
214
252
|
return self._df().join(other, on=on, how=how)
|
|
215
253
|
|
|
216
254
|
def order_by(self, *items: 'exprs.Expr', asc: bool = True) -> 'pxt.DataFrame':
|
|
@@ -220,7 +258,7 @@ class Table(SchemaObject):
|
|
|
220
258
|
"""
|
|
221
259
|
from pixeltable.catalog import Catalog
|
|
222
260
|
|
|
223
|
-
with Catalog.get().begin_xact(for_write=False):
|
|
261
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
224
262
|
return self._df().order_by(*items, asc=asc)
|
|
225
263
|
|
|
226
264
|
def group_by(self, *items: 'exprs.Expr') -> 'pxt.DataFrame':
|
|
@@ -230,7 +268,7 @@ class Table(SchemaObject):
|
|
|
230
268
|
"""
|
|
231
269
|
from pixeltable.catalog import Catalog
|
|
232
270
|
|
|
233
|
-
with Catalog.get().begin_xact(for_write=False):
|
|
271
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
234
272
|
return self._df().group_by(*items)
|
|
235
273
|
|
|
236
274
|
def distinct(self) -> 'pxt.DataFrame':
|
|
@@ -242,10 +280,10 @@ class Table(SchemaObject):
|
|
|
242
280
|
|
|
243
281
|
def sample(
|
|
244
282
|
self,
|
|
245
|
-
n:
|
|
246
|
-
n_per_stratum:
|
|
247
|
-
fraction:
|
|
248
|
-
seed:
|
|
283
|
+
n: int | None = None,
|
|
284
|
+
n_per_stratum: int | None = None,
|
|
285
|
+
fraction: float | None = None,
|
|
286
|
+
seed: int | None = None,
|
|
249
287
|
stratify_by: Any = None,
|
|
250
288
|
) -> pxt.DataFrame:
|
|
251
289
|
"""Choose a shuffled sample of rows
|
|
@@ -276,53 +314,44 @@ class Table(SchemaObject):
|
|
|
276
314
|
"""Return the number of rows in this table."""
|
|
277
315
|
return self._df().count()
|
|
278
316
|
|
|
279
|
-
@property
|
|
280
317
|
def columns(self) -> list[str]:
|
|
281
318
|
"""Return the names of the columns in this table."""
|
|
282
319
|
cols = self._tbl_version_path.columns()
|
|
283
320
|
return [c.name for c in cols]
|
|
284
321
|
|
|
285
|
-
|
|
286
|
-
def _schema(self) -> dict[str, ts.ColumnType]:
|
|
322
|
+
def _get_schema(self) -> dict[str, ts.ColumnType]:
|
|
287
323
|
"""Return the schema (column names and column types) of this table."""
|
|
288
324
|
return {c.name: c.col_type for c in self._tbl_version_path.columns()}
|
|
289
325
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
with env.Env.get().begin_xact():
|
|
293
|
-
return self._base_table
|
|
326
|
+
def get_base_table(self) -> 'Table' | None:
|
|
327
|
+
return self._get_base_table()
|
|
294
328
|
|
|
295
|
-
@property
|
|
296
329
|
@abc.abstractmethod
|
|
297
|
-
def
|
|
298
|
-
"""The base's Table instance"""
|
|
330
|
+
def _get_base_table(self) -> 'Table' | None:
|
|
331
|
+
"""The base's Table instance. Requires a transaction context"""
|
|
299
332
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
base = self._base_table
|
|
333
|
+
def _get_base_tables(self) -> list['Table']:
|
|
334
|
+
"""The ancestor list of bases of this table, starting with its immediate base. Requires a transaction context"""
|
|
335
|
+
bases: list[Table] = []
|
|
336
|
+
base = self._get_base_table()
|
|
305
337
|
while base is not None:
|
|
306
338
|
bases.append(base)
|
|
307
|
-
base = base.
|
|
339
|
+
base = base._get_base_table()
|
|
308
340
|
return bases
|
|
309
341
|
|
|
310
342
|
@property
|
|
311
343
|
@abc.abstractmethod
|
|
312
|
-
def _effective_base_versions(self) -> list[
|
|
344
|
+
def _effective_base_versions(self) -> list[int | None]:
|
|
313
345
|
"""The effective versions of the ancestor bases, starting with its immediate base."""
|
|
314
346
|
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
return self._tbl_version.get().comment
|
|
347
|
+
def _get_comment(self) -> str:
|
|
348
|
+
return self._tbl_version_path.comment()
|
|
318
349
|
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
return self._tbl_version.get().num_retained_versions
|
|
350
|
+
def _get_num_retained_versions(self) -> int:
|
|
351
|
+
return self._tbl_version_path.num_retained_versions()
|
|
322
352
|
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
return self._tbl_version.get().media_validation
|
|
353
|
+
def _get_media_validation(self) -> MediaValidation:
|
|
354
|
+
return self._tbl_version_path.media_validation()
|
|
326
355
|
|
|
327
356
|
def __repr__(self) -> str:
|
|
328
357
|
return self._descriptors().to_string()
|
|
@@ -336,7 +365,7 @@ class Table(SchemaObject):
|
|
|
336
365
|
"""
|
|
337
366
|
from pixeltable.catalog import Catalog
|
|
338
367
|
|
|
339
|
-
with Catalog.get().begin_xact(for_write=False):
|
|
368
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=False):
|
|
340
369
|
helper = DescriptionHelper()
|
|
341
370
|
helper.append(self._table_descriptor())
|
|
342
371
|
helper.append(self._col_descriptor())
|
|
@@ -346,11 +375,11 @@ class Table(SchemaObject):
|
|
|
346
375
|
stores = self._external_store_descriptor()
|
|
347
376
|
if not stores.empty:
|
|
348
377
|
helper.append(stores)
|
|
349
|
-
if self.
|
|
350
|
-
helper.append(f'COMMENT: {self.
|
|
378
|
+
if self._get_comment():
|
|
379
|
+
helper.append(f'COMMENT: {self._get_comment()}')
|
|
351
380
|
return helper
|
|
352
381
|
|
|
353
|
-
def _col_descriptor(self, columns:
|
|
382
|
+
def _col_descriptor(self, columns: list[str] | None = None) -> pd.DataFrame:
|
|
354
383
|
return pd.DataFrame(
|
|
355
384
|
{
|
|
356
385
|
'Column Name': col.name,
|
|
@@ -361,9 +390,11 @@ class Table(SchemaObject):
|
|
|
361
390
|
if columns is None or col.name in columns
|
|
362
391
|
)
|
|
363
392
|
|
|
364
|
-
def _index_descriptor(self, columns:
|
|
393
|
+
def _index_descriptor(self, columns: list[str] | None = None) -> pd.DataFrame:
|
|
365
394
|
from pixeltable import index
|
|
366
395
|
|
|
396
|
+
if self._tbl_version is None:
|
|
397
|
+
return pd.DataFrame([])
|
|
367
398
|
pd_rows = []
|
|
368
399
|
for name, info in self._tbl_version.get().idxs_by_name.items():
|
|
369
400
|
if isinstance(info.idx, index.EmbeddingIndex) and (columns is None or info.col.name in columns):
|
|
@@ -383,7 +414,7 @@ class Table(SchemaObject):
|
|
|
383
414
|
|
|
384
415
|
def _external_store_descriptor(self) -> pd.DataFrame:
|
|
385
416
|
pd_rows = []
|
|
386
|
-
for name, store in self.
|
|
417
|
+
for name, store in self._tbl_version_path.tbl_version.get().external_stores.items():
|
|
387
418
|
row = {'External Store': name, 'Type': type(store).__name__}
|
|
388
419
|
pd_rows.append(row)
|
|
389
420
|
return pd.DataFrame(pd_rows)
|
|
@@ -392,7 +423,6 @@ class Table(SchemaObject):
|
|
|
392
423
|
"""
|
|
393
424
|
Print the table schema.
|
|
394
425
|
"""
|
|
395
|
-
self._check_is_dropped()
|
|
396
426
|
if getattr(builtins, '__IPYTHON__', False):
|
|
397
427
|
from IPython.display import Markdown, display
|
|
398
428
|
|
|
@@ -400,11 +430,6 @@ class Table(SchemaObject):
|
|
|
400
430
|
else:
|
|
401
431
|
print(repr(self))
|
|
402
432
|
|
|
403
|
-
def _drop(self) -> None:
|
|
404
|
-
self._check_is_dropped()
|
|
405
|
-
self._tbl_version.get().drop()
|
|
406
|
-
self._is_dropped = True
|
|
407
|
-
|
|
408
433
|
# TODO Factor this out into a separate module.
|
|
409
434
|
# The return type is unresolvable, but torch can't be imported since it's an optional dependency.
|
|
410
435
|
def to_pytorch_dataset(self, image_format: str = 'pt') -> 'torch.utils.data.IterableDataset':
|
|
@@ -422,9 +447,11 @@ class Table(SchemaObject):
|
|
|
422
447
|
def _column_has_dependents(self, col: Column) -> bool:
|
|
423
448
|
"""Returns True if the column has dependents, False otherwise."""
|
|
424
449
|
assert col is not None
|
|
425
|
-
assert col.name in self.
|
|
426
|
-
|
|
450
|
+
assert col.name in self._get_schema()
|
|
451
|
+
cat = catalog.Catalog.get()
|
|
452
|
+
if any(c.name is not None for c in cat.get_column_dependents(col.get_tbl().id, col.id)):
|
|
427
453
|
return True
|
|
454
|
+
assert self._tbl_version is not None
|
|
428
455
|
return any(
|
|
429
456
|
col in store.get_local_columns()
|
|
430
457
|
for view in (self, *self._get_views(recursive=True))
|
|
@@ -436,13 +463,13 @@ class Table(SchemaObject):
|
|
|
436
463
|
|
|
437
464
|
If `if_exists='ignore'`, returns a list of existing columns, if any, in `new_col_names`.
|
|
438
465
|
"""
|
|
439
|
-
assert not
|
|
440
|
-
existing_col_names = set(self.
|
|
466
|
+
assert self._tbl_version is not None
|
|
467
|
+
existing_col_names = set(self._get_schema().keys())
|
|
441
468
|
cols_to_ignore = []
|
|
442
469
|
for new_col_name in new_col_names:
|
|
443
470
|
if new_col_name in existing_col_names:
|
|
444
471
|
if if_exists == IfExistsParam.ERROR:
|
|
445
|
-
raise excs.Error(f'Duplicate column name: {new_col_name
|
|
472
|
+
raise excs.Error(f'Duplicate column name: {new_col_name}')
|
|
446
473
|
elif if_exists == IfExistsParam.IGNORE:
|
|
447
474
|
cols_to_ignore.append(new_col_name)
|
|
448
475
|
elif if_exists in (IfExistsParam.REPLACE, IfExistsParam.REPLACE_FORCE):
|
|
@@ -465,15 +492,14 @@ class Table(SchemaObject):
|
|
|
465
492
|
|
|
466
493
|
def add_columns(
|
|
467
494
|
self,
|
|
468
|
-
schema: dict[str,
|
|
495
|
+
schema: dict[str, ts.ColumnType | builtins.type | _GenericAlias],
|
|
469
496
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
470
497
|
) -> UpdateStatus:
|
|
471
498
|
"""
|
|
472
499
|
Adds multiple columns to the table. The columns must be concrete (non-computed) columns; to add computed
|
|
473
500
|
columns, use [`add_computed_column()`][pixeltable.catalog.Table.add_computed_column] instead.
|
|
474
501
|
|
|
475
|
-
The format of the `schema` argument is
|
|
476
|
-
[`create_table()`][pixeltable.globals.create_table].
|
|
502
|
+
The format of the `schema` argument is a dict mapping column names to their types.
|
|
477
503
|
|
|
478
504
|
Args:
|
|
479
505
|
schema: A dictionary mapping column names to types.
|
|
@@ -507,10 +533,9 @@ class Table(SchemaObject):
|
|
|
507
533
|
"""
|
|
508
534
|
from pixeltable.catalog import Catalog
|
|
509
535
|
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
raise excs.Error('Cannot add column to a snapshot.')
|
|
536
|
+
# lock_mutable_tree=True: we might end up having to drop existing columns, which requires locking the tree
|
|
537
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
538
|
+
self.__check_mutable('add columns to')
|
|
514
539
|
col_schema = {
|
|
515
540
|
col_name: {'type': ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)}
|
|
516
541
|
for col_name, spec in schema.items()
|
|
@@ -525,20 +550,22 @@ class Table(SchemaObject):
|
|
|
525
550
|
for cname in cols_to_ignore:
|
|
526
551
|
assert cname in col_schema
|
|
527
552
|
del col_schema[cname]
|
|
553
|
+
result = UpdateStatus()
|
|
528
554
|
if len(col_schema) == 0:
|
|
529
|
-
return
|
|
555
|
+
return result
|
|
530
556
|
new_cols = self._create_columns(col_schema)
|
|
531
557
|
for new_col in new_cols:
|
|
532
558
|
self._verify_column(new_col)
|
|
533
|
-
|
|
559
|
+
assert self._tbl_version is not None
|
|
560
|
+
result += self._tbl_version.get().add_columns(new_cols, print_stats=False, on_error='abort')
|
|
534
561
|
FileCache.get().emit_eviction_warnings()
|
|
535
|
-
return
|
|
562
|
+
return result
|
|
536
563
|
|
|
537
564
|
def add_column(
|
|
538
565
|
self,
|
|
539
566
|
*,
|
|
540
567
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
541
|
-
**kwargs:
|
|
568
|
+
**kwargs: ts.ColumnType | builtins.type | _GenericAlias | exprs.Expr,
|
|
542
569
|
) -> UpdateStatus:
|
|
543
570
|
"""
|
|
544
571
|
Adds an ordinary (non-computed) column to the table.
|
|
@@ -568,30 +595,24 @@ class Table(SchemaObject):
|
|
|
568
595
|
|
|
569
596
|
>>> tbl.add_columns({'new_col': pxt.Int})
|
|
570
597
|
"""
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
)
|
|
584
|
-
col_type = next(iter(kwargs.values()))
|
|
585
|
-
if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
|
|
586
|
-
raise excs.Error(
|
|
587
|
-
'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
|
|
588
|
-
)
|
|
589
|
-
return self.add_columns(kwargs, if_exists=if_exists)
|
|
598
|
+
# verify kwargs and construct column schema dict
|
|
599
|
+
if len(kwargs) != 1:
|
|
600
|
+
raise excs.Error(
|
|
601
|
+
f'add_column() requires exactly one keyword argument of the form `col_name=col_type`; '
|
|
602
|
+
f'got {len(kwargs)} arguments instead ({", ".join(kwargs.keys())})'
|
|
603
|
+
)
|
|
604
|
+
col_type = next(iter(kwargs.values()))
|
|
605
|
+
if not isinstance(col_type, (ts.ColumnType, type, _GenericAlias)):
|
|
606
|
+
raise excs.Error(
|
|
607
|
+
'The argument to add_column() must be a type; did you intend to use add_computed_column() instead?'
|
|
608
|
+
)
|
|
609
|
+
return self.add_columns(kwargs, if_exists=if_exists)
|
|
590
610
|
|
|
591
611
|
def add_computed_column(
|
|
592
612
|
self,
|
|
593
613
|
*,
|
|
594
|
-
stored:
|
|
614
|
+
stored: bool | None = None,
|
|
615
|
+
destination: str | Path | None = None,
|
|
595
616
|
print_stats: bool = False,
|
|
596
617
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
597
618
|
if_exists: Literal['error', 'ignore', 'replace'] = 'error',
|
|
@@ -603,6 +624,7 @@ class Table(SchemaObject):
|
|
|
603
624
|
Args:
|
|
604
625
|
kwargs: Exactly one keyword argument of the form `col_name=expression`.
|
|
605
626
|
stored: Whether the column is materialized and stored or computed on demand.
|
|
627
|
+
destination: An object store reference for persisting computed files.
|
|
606
628
|
print_stats: If `True`, print execution metrics during evaluation.
|
|
607
629
|
on_error: Determines the behavior if an error occurs while evaluating the column expression for at least one
|
|
608
630
|
row.
|
|
@@ -610,7 +632,7 @@ class Table(SchemaObject):
|
|
|
610
632
|
- `'abort'`: an exception will be raised and the column will not be added.
|
|
611
633
|
- `'ignore'`: execution will continue and the column will be added. Any rows
|
|
612
634
|
with errors will have a `None` value for the column, with information about the error stored in the
|
|
613
|
-
corresponding `tbl.col_name.
|
|
635
|
+
corresponding `tbl.col_name.errormsg` and `tbl.col_name.errortype` fields.
|
|
614
636
|
if_exists: Determines the behavior if the column already exists. Must be one of the following:
|
|
615
637
|
|
|
616
638
|
- `'error'`: an exception will be raised.
|
|
@@ -637,31 +659,32 @@ class Table(SchemaObject):
|
|
|
637
659
|
"""
|
|
638
660
|
from pixeltable.catalog import Catalog
|
|
639
661
|
|
|
640
|
-
with Catalog.get().begin_xact(
|
|
641
|
-
self.
|
|
642
|
-
if self.get_metadata()['is_snapshot']:
|
|
643
|
-
raise excs.Error('Cannot add column to a snapshot.')
|
|
662
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
663
|
+
self.__check_mutable('add columns to')
|
|
644
664
|
if len(kwargs) != 1:
|
|
645
665
|
raise excs.Error(
|
|
646
666
|
f'add_computed_column() requires exactly one keyword argument of the form '
|
|
647
|
-
'
|
|
648
|
-
f'got {len(kwargs)} arguments instead ({", ".join(
|
|
667
|
+
'`col_name=col_type` or `col_name=expression`; '
|
|
668
|
+
f'got {len(kwargs)} arguments instead ({", ".join(kwargs.keys())})'
|
|
649
669
|
)
|
|
650
670
|
col_name, spec = next(iter(kwargs.items()))
|
|
651
671
|
if not is_valid_identifier(col_name):
|
|
652
|
-
raise excs.Error(f'Invalid column name: {col_name
|
|
672
|
+
raise excs.Error(f'Invalid column name: {col_name}')
|
|
653
673
|
|
|
654
674
|
col_schema: dict[str, Any] = {'value': spec}
|
|
655
675
|
if stored is not None:
|
|
656
676
|
col_schema['stored'] = stored
|
|
657
677
|
|
|
678
|
+
if destination is not None:
|
|
679
|
+
col_schema['destination'] = destination
|
|
680
|
+
|
|
658
681
|
# Raise an error if the column expression refers to a column error property
|
|
659
682
|
if isinstance(spec, exprs.Expr):
|
|
660
683
|
for e in spec.subexprs(expr_class=exprs.ColumnPropertyRef, traverse_matches=False):
|
|
661
|
-
if e.
|
|
684
|
+
if e.is_cellmd_prop():
|
|
662
685
|
raise excs.Error(
|
|
663
|
-
'Use of a reference to
|
|
664
|
-
f'
|
|
686
|
+
f'Use of a reference to the {e.prop.name.lower()!r} property of another column '
|
|
687
|
+
f'is not allowed in a computed column.'
|
|
665
688
|
)
|
|
666
689
|
|
|
667
690
|
# handle existing columns based on if_exists parameter
|
|
@@ -669,16 +692,18 @@ class Table(SchemaObject):
|
|
|
669
692
|
[col_name], IfExistsParam.validated(if_exists, 'if_exists')
|
|
670
693
|
)
|
|
671
694
|
# if the column to add already exists and user asked to ignore
|
|
672
|
-
#
|
|
695
|
+
# existing column, there's nothing to do.
|
|
696
|
+
result = UpdateStatus()
|
|
673
697
|
if len(cols_to_ignore) != 0:
|
|
674
698
|
assert cols_to_ignore[0] == col_name
|
|
675
|
-
return
|
|
699
|
+
return result
|
|
676
700
|
|
|
677
701
|
new_col = self._create_columns({col_name: col_schema})[0]
|
|
678
702
|
self._verify_column(new_col)
|
|
679
|
-
|
|
703
|
+
assert self._tbl_version is not None
|
|
704
|
+
result += self._tbl_version.get().add_columns([new_col], print_stats=print_stats, on_error=on_error)
|
|
680
705
|
FileCache.get().emit_eviction_warnings()
|
|
681
|
-
return
|
|
706
|
+
return result
|
|
682
707
|
|
|
683
708
|
@classmethod
|
|
684
709
|
def _validate_column_spec(cls, name: str, spec: dict[str, Any]) -> None:
|
|
@@ -688,40 +713,45 @@ class Table(SchemaObject):
|
|
|
688
713
|
(on account of containing Python Callables or Exprs).
|
|
689
714
|
"""
|
|
690
715
|
assert isinstance(spec, dict)
|
|
691
|
-
valid_keys = {'type', 'value', 'stored', 'media_validation'}
|
|
716
|
+
valid_keys = {'type', 'value', 'stored', 'media_validation', 'destination'}
|
|
692
717
|
for k in spec:
|
|
693
718
|
if k not in valid_keys:
|
|
694
|
-
raise excs.Error(f'Column {name}: invalid key {k!r}')
|
|
719
|
+
raise excs.Error(f'Column {name!r}: invalid key {k!r}')
|
|
695
720
|
|
|
696
721
|
if 'type' not in spec and 'value' not in spec:
|
|
697
|
-
raise excs.Error(f"Column {name}: 'type' or 'value' must be specified")
|
|
722
|
+
raise excs.Error(f"Column {name!r}: 'type' or 'value' must be specified")
|
|
698
723
|
|
|
699
724
|
if 'type' in spec and not isinstance(spec['type'], (ts.ColumnType, type, _GenericAlias)):
|
|
700
|
-
raise excs.Error(f
|
|
725
|
+
raise excs.Error(f"Column {name!r}: 'type' must be a type or ColumnType; got {spec['type']}")
|
|
701
726
|
|
|
702
727
|
if 'value' in spec:
|
|
703
728
|
value_expr = exprs.Expr.from_object(spec['value'])
|
|
704
729
|
if value_expr is None:
|
|
705
|
-
raise excs.Error(f
|
|
730
|
+
raise excs.Error(f"Column {name!r}: 'value' must be a Pixeltable expression.")
|
|
706
731
|
if 'type' in spec:
|
|
707
|
-
raise excs.Error(f"Column {name}: 'type' is redundant if 'value' is specified")
|
|
732
|
+
raise excs.Error(f"Column {name!r}: 'type' is redundant if 'value' is specified")
|
|
708
733
|
|
|
709
734
|
if 'media_validation' in spec:
|
|
710
|
-
_ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name}: media_validation')
|
|
735
|
+
_ = catalog.MediaValidation.validated(spec['media_validation'], f'Column {name!r}: media_validation')
|
|
711
736
|
|
|
712
737
|
if 'stored' in spec and not isinstance(spec['stored'], bool):
|
|
713
|
-
raise excs.Error(f
|
|
738
|
+
raise excs.Error(f"Column {name!r}: 'stored' must be a bool; got {spec['stored']}")
|
|
739
|
+
|
|
740
|
+
d = spec.get('destination')
|
|
741
|
+
if d is not None and not isinstance(d, (str, Path)):
|
|
742
|
+
raise excs.Error(f'Column {name!r}: `destination` must be a string or path; got {d}')
|
|
714
743
|
|
|
715
744
|
@classmethod
|
|
716
745
|
def _create_columns(cls, schema: dict[str, Any]) -> list[Column]:
|
|
717
746
|
"""Construct list of Columns, given schema"""
|
|
718
747
|
columns: list[Column] = []
|
|
719
748
|
for name, spec in schema.items():
|
|
720
|
-
col_type:
|
|
721
|
-
value_expr:
|
|
749
|
+
col_type: ts.ColumnType | None = None
|
|
750
|
+
value_expr: exprs.Expr | None = None
|
|
722
751
|
primary_key: bool = False
|
|
723
|
-
media_validation:
|
|
752
|
+
media_validation: catalog.MediaValidation | None = None
|
|
724
753
|
stored = True
|
|
754
|
+
destination: str | None = None
|
|
725
755
|
|
|
726
756
|
if isinstance(spec, (ts.ColumnType, type, _GenericAlias)):
|
|
727
757
|
col_type = ts.ColumnType.normalize_type(spec, nullable_default=True, allow_builtin_types=False)
|
|
@@ -746,6 +776,7 @@ class Table(SchemaObject):
|
|
|
746
776
|
media_validation = (
|
|
747
777
|
catalog.MediaValidation[media_validation_str.upper()] if media_validation_str is not None else None
|
|
748
778
|
)
|
|
779
|
+
destination = spec.get('destination')
|
|
749
780
|
else:
|
|
750
781
|
raise excs.Error(f'Invalid value for column {name!r}')
|
|
751
782
|
|
|
@@ -756,41 +787,46 @@ class Table(SchemaObject):
|
|
|
756
787
|
stored=stored,
|
|
757
788
|
is_pk=primary_key,
|
|
758
789
|
media_validation=media_validation,
|
|
790
|
+
destination=destination,
|
|
759
791
|
)
|
|
792
|
+
# Validate the column's resolved_destination. This will ensure that if the column uses a default (global)
|
|
793
|
+
# media destination, it gets validated at this time.
|
|
794
|
+
ObjectOps.validate_destination(column.destination, column.name)
|
|
760
795
|
columns.append(column)
|
|
796
|
+
|
|
761
797
|
return columns
|
|
762
798
|
|
|
763
799
|
@classmethod
|
|
764
800
|
def validate_column_name(cls, name: str) -> None:
|
|
765
|
-
"""Check that a name is usable as a
|
|
801
|
+
"""Check that a name is usable as a pixeltable column name"""
|
|
766
802
|
if is_system_column_name(name) or is_python_keyword(name):
|
|
767
803
|
raise excs.Error(f'{name!r} is a reserved name in Pixeltable; please choose a different column name.')
|
|
768
804
|
if not is_valid_identifier(name):
|
|
769
|
-
raise excs.Error(f'Invalid column name: {name
|
|
805
|
+
raise excs.Error(f'Invalid column name: {name}')
|
|
770
806
|
|
|
771
807
|
@classmethod
|
|
772
808
|
def _verify_column(cls, col: Column) -> None:
|
|
773
809
|
"""Check integrity of user-supplied Column and supply defaults"""
|
|
774
810
|
cls.validate_column_name(col.name)
|
|
775
811
|
if col.stored is False and not col.is_computed:
|
|
776
|
-
raise excs.Error(f'Column {col.name!r}: stored={col.stored} only applies to computed columns')
|
|
812
|
+
raise excs.Error(f'Column {col.name!r}: `stored={col.stored}` only applies to computed columns')
|
|
777
813
|
if col.stored is False and col.has_window_fn_call():
|
|
778
814
|
raise excs.Error(
|
|
779
815
|
(
|
|
780
|
-
f'Column {col.name!r}: stored={col.stored} is not valid for image columns computed with a '
|
|
816
|
+
f'Column {col.name!r}: `stored={col.stored}` is not valid for image columns computed with a '
|
|
781
817
|
f'streaming function'
|
|
782
818
|
)
|
|
783
819
|
)
|
|
820
|
+
if col._explicit_destination is not None and not (col.stored and col.is_computed):
|
|
821
|
+
raise excs.Error(f'Column {col.name!r}: `destination` property only applies to stored computed columns')
|
|
784
822
|
|
|
785
823
|
@classmethod
|
|
786
824
|
def _verify_schema(cls, schema: list[Column]) -> None:
|
|
787
825
|
"""Check integrity of user-supplied schema and set defaults"""
|
|
788
|
-
column_names: set[str] = set()
|
|
789
826
|
for col in schema:
|
|
790
827
|
cls._verify_column(col)
|
|
791
|
-
column_names.add(col.name)
|
|
792
828
|
|
|
793
|
-
def drop_column(self, column:
|
|
829
|
+
def drop_column(self, column: str | ColumnRef, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
|
|
794
830
|
"""Drop a column from the table.
|
|
795
831
|
|
|
796
832
|
Args:
|
|
@@ -822,54 +858,86 @@ class Table(SchemaObject):
|
|
|
822
858
|
"""
|
|
823
859
|
from pixeltable.catalog import Catalog
|
|
824
860
|
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
861
|
+
cat = Catalog.get()
|
|
862
|
+
|
|
863
|
+
# lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
|
|
864
|
+
with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
865
|
+
self.__check_mutable('drop columns from')
|
|
829
866
|
col: Column = None
|
|
830
867
|
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
831
868
|
|
|
832
869
|
if isinstance(column, str):
|
|
833
|
-
col = self._tbl_version_path.get_column(column
|
|
870
|
+
col = self._tbl_version_path.get_column(column)
|
|
834
871
|
if col is None:
|
|
835
872
|
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
836
|
-
raise excs.Error(f'
|
|
873
|
+
raise excs.Error(f'Unknown column: {column}')
|
|
837
874
|
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
838
875
|
return
|
|
876
|
+
if col.get_tbl().id != self._tbl_version_path.tbl_id:
|
|
877
|
+
raise excs.Error(f'Cannot drop base table column {col.name!r}')
|
|
839
878
|
col = self._tbl_version.get().cols_by_name[column]
|
|
840
879
|
else:
|
|
841
|
-
exists = self._tbl_version_path.has_column(column.col
|
|
880
|
+
exists = self._tbl_version_path.has_column(column.col)
|
|
842
881
|
if not exists:
|
|
843
882
|
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
844
883
|
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
845
884
|
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
846
885
|
return
|
|
847
886
|
col = column.col
|
|
887
|
+
if col.get_tbl().id != self._tbl_version_path.tbl_id:
|
|
888
|
+
raise excs.Error(f'Cannot drop base table column {col.name!r}')
|
|
848
889
|
|
|
849
|
-
dependent_user_cols = [c for c in col.
|
|
890
|
+
dependent_user_cols = [c for c in cat.get_column_dependents(col.get_tbl().id, col.id) if c.name is not None]
|
|
850
891
|
if len(dependent_user_cols) > 0:
|
|
851
892
|
raise excs.Error(
|
|
852
|
-
f'Cannot drop column
|
|
893
|
+
f'Cannot drop column {col.name!r} because the following columns depend on it:\n'
|
|
853
894
|
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
854
895
|
)
|
|
855
896
|
|
|
897
|
+
views = self._get_views(recursive=True, mutable_only=True)
|
|
898
|
+
|
|
899
|
+
# See if any view predicates depend on this column
|
|
900
|
+
dependent_views: list[tuple[Table, exprs.Expr]] = []
|
|
901
|
+
for view in views:
|
|
902
|
+
if view._tbl_version is not None:
|
|
903
|
+
predicate = view._tbl_version.get().predicate
|
|
904
|
+
if predicate is not None:
|
|
905
|
+
for predicate_col in exprs.Expr.get_refd_column_ids(predicate.as_dict()):
|
|
906
|
+
if predicate_col.tbl_id == col.get_tbl().id and predicate_col.col_id == col.id:
|
|
907
|
+
dependent_views.append((view, predicate))
|
|
908
|
+
|
|
909
|
+
if len(dependent_views) > 0:
|
|
910
|
+
dependent_views_str = '\n'.join(
|
|
911
|
+
f'view: {view._path()}, predicate: {predicate}' for view, predicate in dependent_views
|
|
912
|
+
)
|
|
913
|
+
raise excs.Error(
|
|
914
|
+
f'Cannot drop column {col.name!r} because the following views depend on it:\n{dependent_views_str}'
|
|
915
|
+
)
|
|
916
|
+
|
|
856
917
|
# See if this column has a dependent store. We need to look through all stores in all
|
|
857
918
|
# (transitive) views of this table.
|
|
919
|
+
col_handle = col.handle
|
|
858
920
|
dependent_stores = [
|
|
859
921
|
(view, store)
|
|
860
|
-
for view in (self, *
|
|
922
|
+
for view in (self, *views)
|
|
861
923
|
for store in view._tbl_version.get().external_stores.values()
|
|
862
|
-
if
|
|
924
|
+
if col_handle in store.get_local_columns()
|
|
863
925
|
]
|
|
864
926
|
if len(dependent_stores) > 0:
|
|
865
927
|
dependent_store_names = [
|
|
866
|
-
store.name if view._id == self._id else f'{store.name} (in view
|
|
928
|
+
store.name if view._id == self._id else f'{store.name} (in view {view._name!r})'
|
|
867
929
|
for view, store in dependent_stores
|
|
868
930
|
]
|
|
869
931
|
raise excs.Error(
|
|
870
|
-
f'Cannot drop column
|
|
932
|
+
f'Cannot drop column {col.name!r} because the following external stores depend on it:\n'
|
|
871
933
|
f'{", ".join(dependent_store_names)}'
|
|
872
934
|
)
|
|
935
|
+
all_columns = self.columns()
|
|
936
|
+
if len(all_columns) == 1 and col.name == all_columns[0]:
|
|
937
|
+
raise excs.Error(
|
|
938
|
+
f'Cannot drop column {col.name!r} because it is the last remaining column in this table.'
|
|
939
|
+
f' Tables must have at least one column.'
|
|
940
|
+
)
|
|
873
941
|
|
|
874
942
|
self._tbl_version.get().drop_column(col)
|
|
875
943
|
|
|
@@ -891,7 +959,7 @@ class Table(SchemaObject):
|
|
|
891
959
|
"""
|
|
892
960
|
from pixeltable.catalog import Catalog
|
|
893
961
|
|
|
894
|
-
with Catalog.get().begin_xact(
|
|
962
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
895
963
|
self._tbl_version.get().rename_column(old_name, new_name)
|
|
896
964
|
|
|
897
965
|
def _list_index_info_for_test(self) -> list[dict[str, Any]]:
|
|
@@ -902,7 +970,6 @@ class Table(SchemaObject):
|
|
|
902
970
|
A list of index information, each containing the index's
|
|
903
971
|
id, name, and the name of the column it indexes.
|
|
904
972
|
"""
|
|
905
|
-
assert not self._is_dropped
|
|
906
973
|
index_info = []
|
|
907
974
|
for idx_name, idx in self._tbl_version.get().idxs_by_name.items():
|
|
908
975
|
index_info.append({'_id': idx.id, '_name': idx_name, '_column': idx.col.name})
|
|
@@ -910,13 +977,13 @@ class Table(SchemaObject):
|
|
|
910
977
|
|
|
911
978
|
def add_embedding_index(
|
|
912
979
|
self,
|
|
913
|
-
column:
|
|
980
|
+
column: str | ColumnRef,
|
|
914
981
|
*,
|
|
915
|
-
idx_name:
|
|
916
|
-
embedding:
|
|
917
|
-
string_embed:
|
|
918
|
-
image_embed:
|
|
919
|
-
metric:
|
|
982
|
+
idx_name: str | None = None,
|
|
983
|
+
embedding: pxt.Function | None = None,
|
|
984
|
+
string_embed: pxt.Function | None = None,
|
|
985
|
+
image_embed: pxt.Function | None = None,
|
|
986
|
+
metric: Literal['cosine', 'ip', 'l2'] = 'cosine',
|
|
920
987
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
921
988
|
) -> None:
|
|
922
989
|
"""
|
|
@@ -924,25 +991,28 @@ class Table(SchemaObject):
|
|
|
924
991
|
rows are inserted into the table.
|
|
925
992
|
|
|
926
993
|
To add an embedding index, one must specify, at minimum, the column to be indexed and an embedding UDF.
|
|
927
|
-
Only `String` and `Image` columns are currently supported.
|
|
928
|
-
[CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
|
|
994
|
+
Only `String` and `Image` columns are currently supported.
|
|
929
995
|
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
996
|
+
Examples:
|
|
997
|
+
Here's an example that uses a
|
|
998
|
+
[CLIP embedding][pixeltable.functions.huggingface.clip] to index an image column:
|
|
999
|
+
|
|
1000
|
+
>>> from pixeltable.functions.huggingface import clip
|
|
1001
|
+
>>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
|
|
1002
|
+
>>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
|
|
933
1003
|
|
|
934
|
-
|
|
1004
|
+
Once the index is created, similarity lookups can be performed using the `similarity` pseudo-function:
|
|
935
1005
|
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
1006
|
+
>>> reference_img = PIL.Image.open('my_image.jpg')
|
|
1007
|
+
>>> sim = tbl.img.similarity(reference_img)
|
|
1008
|
+
>>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
|
|
939
1009
|
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
1010
|
+
If the embedding UDF is a multimodal embedding (supporting more than one data type), then lookups may be
|
|
1011
|
+
performed using any of its supported types. In our example, CLIP supports both text and images, so we can
|
|
1012
|
+
also search for images using a text description:
|
|
943
1013
|
|
|
944
|
-
|
|
945
|
-
|
|
1014
|
+
>>> sim = tbl.img.similarity('a picture of a train')
|
|
1015
|
+
>>> tbl.select(tbl.img, sim).order_by(sim, asc=False).limit(5)
|
|
946
1016
|
|
|
947
1017
|
Args:
|
|
948
1018
|
column: The name of, or reference to, the column to be indexed; must be a `String` or `Image` column.
|
|
@@ -973,9 +1043,9 @@ class Table(SchemaObject):
|
|
|
973
1043
|
Add an index to the `img` column of the table `my_table`:
|
|
974
1044
|
|
|
975
1045
|
>>> from pixeltable.functions.huggingface import clip
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
1046
|
+
>>> tbl = pxt.get_table('my_table')
|
|
1047
|
+
>>> embedding_fn = clip.using(model_id='openai/clip-vit-base-patch32')
|
|
1048
|
+
>>> tbl.add_embedding_index(tbl.img, embedding=embedding_fn)
|
|
979
1049
|
|
|
980
1050
|
Alternatively, the `img` column may be specified by name:
|
|
981
1051
|
|
|
@@ -1001,9 +1071,8 @@ class Table(SchemaObject):
|
|
|
1001
1071
|
"""
|
|
1002
1072
|
from pixeltable.catalog import Catalog
|
|
1003
1073
|
|
|
1004
|
-
with Catalog.get().begin_xact(
|
|
1005
|
-
|
|
1006
|
-
raise excs.Error('Cannot add an index to a snapshot')
|
|
1074
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1075
|
+
self.__check_mutable('add an index to')
|
|
1007
1076
|
col = self._resolve_column_parameter(column)
|
|
1008
1077
|
|
|
1009
1078
|
if idx_name is not None and idx_name in self._tbl_version.get().idxs_by_name:
|
|
@@ -1014,7 +1083,7 @@ class Table(SchemaObject):
|
|
|
1014
1083
|
raise excs.Error(f'Duplicate index name: {idx_name}')
|
|
1015
1084
|
if not isinstance(self._tbl_version.get().idxs_by_name[idx_name].idx, index.EmbeddingIndex):
|
|
1016
1085
|
raise excs.Error(
|
|
1017
|
-
f'Index
|
|
1086
|
+
f'Index {idx_name!r} is not an embedding index. Cannot {if_exists_.name.lower()} it.'
|
|
1018
1087
|
)
|
|
1019
1088
|
if if_exists_ == IfExistsParam.IGNORE:
|
|
1020
1089
|
return
|
|
@@ -1027,10 +1096,9 @@ class Table(SchemaObject):
|
|
|
1027
1096
|
if idx_name is not None:
|
|
1028
1097
|
Table.validate_column_name(idx_name)
|
|
1029
1098
|
|
|
1030
|
-
#
|
|
1031
|
-
idx = EmbeddingIndex(
|
|
1032
|
-
|
|
1033
|
-
)
|
|
1099
|
+
# validate EmbeddingIndex args
|
|
1100
|
+
idx = EmbeddingIndex(metric=metric, embed=embedding, string_embed=string_embed, image_embed=image_embed)
|
|
1101
|
+
_ = idx.create_value_expr(col)
|
|
1034
1102
|
_ = self._tbl_version.get().add_index(col, idx_name=idx_name, idx=idx)
|
|
1035
1103
|
# TODO: how to deal with exceptions here? drop the index and raise?
|
|
1036
1104
|
FileCache.get().emit_eviction_warnings()
|
|
@@ -1038,8 +1106,8 @@ class Table(SchemaObject):
|
|
|
1038
1106
|
def drop_embedding_index(
|
|
1039
1107
|
self,
|
|
1040
1108
|
*,
|
|
1041
|
-
column:
|
|
1042
|
-
idx_name:
|
|
1109
|
+
column: str | ColumnRef | None = None,
|
|
1110
|
+
idx_name: str | None = None,
|
|
1043
1111
|
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
1044
1112
|
) -> None:
|
|
1045
1113
|
"""
|
|
@@ -1090,7 +1158,7 @@ class Table(SchemaObject):
|
|
|
1090
1158
|
if (column is None) == (idx_name is None):
|
|
1091
1159
|
raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
|
|
1092
1160
|
|
|
1093
|
-
with Catalog.get().begin_xact(
|
|
1161
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1094
1162
|
col: Column = None
|
|
1095
1163
|
if idx_name is None:
|
|
1096
1164
|
col = self._resolve_column_parameter(column)
|
|
@@ -1098,15 +1166,15 @@ class Table(SchemaObject):
|
|
|
1098
1166
|
|
|
1099
1167
|
self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
|
|
1100
1168
|
|
|
1101
|
-
def _resolve_column_parameter(self, column:
|
|
1169
|
+
def _resolve_column_parameter(self, column: str | ColumnRef) -> Column:
|
|
1102
1170
|
"""Resolve a column parameter to a Column object"""
|
|
1103
1171
|
col: Column = None
|
|
1104
1172
|
if isinstance(column, str):
|
|
1105
|
-
col = self._tbl_version_path.get_column(column
|
|
1173
|
+
col = self._tbl_version_path.get_column(column)
|
|
1106
1174
|
if col is None:
|
|
1107
|
-
raise excs.Error(f'
|
|
1175
|
+
raise excs.Error(f'Unknown column: {column}')
|
|
1108
1176
|
elif isinstance(column, ColumnRef):
|
|
1109
|
-
exists = self._tbl_version_path.has_column(column.col
|
|
1177
|
+
exists = self._tbl_version_path.has_column(column.col)
|
|
1110
1178
|
if not exists:
|
|
1111
1179
|
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
1112
1180
|
col = column.col
|
|
@@ -1117,8 +1185,8 @@ class Table(SchemaObject):
|
|
|
1117
1185
|
def drop_index(
|
|
1118
1186
|
self,
|
|
1119
1187
|
*,
|
|
1120
|
-
column:
|
|
1121
|
-
idx_name:
|
|
1188
|
+
column: str | ColumnRef | None = None,
|
|
1189
|
+
idx_name: str | None = None,
|
|
1122
1190
|
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
1123
1191
|
) -> None:
|
|
1124
1192
|
"""
|
|
@@ -1169,7 +1237,7 @@ class Table(SchemaObject):
|
|
|
1169
1237
|
if (column is None) == (idx_name is None):
|
|
1170
1238
|
raise excs.Error("Exactly one of 'column' or 'idx_name' must be provided")
|
|
1171
1239
|
|
|
1172
|
-
with Catalog.get().begin_xact(
|
|
1240
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
1173
1241
|
col: Column = None
|
|
1174
1242
|
if idx_name is None:
|
|
1175
1243
|
col = self._resolve_column_parameter(column)
|
|
@@ -1180,13 +1248,14 @@ class Table(SchemaObject):
|
|
|
1180
1248
|
def _drop_index(
|
|
1181
1249
|
self,
|
|
1182
1250
|
*,
|
|
1183
|
-
col:
|
|
1184
|
-
idx_name:
|
|
1185
|
-
_idx_class:
|
|
1251
|
+
col: Column | None = None,
|
|
1252
|
+
idx_name: str | None = None,
|
|
1253
|
+
_idx_class: type[index.IndexBase] | None = None,
|
|
1186
1254
|
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
1187
1255
|
) -> None:
|
|
1188
|
-
|
|
1189
|
-
|
|
1256
|
+
from pixeltable.catalog import Catalog
|
|
1257
|
+
|
|
1258
|
+
self.__check_mutable('drop an index from')
|
|
1190
1259
|
assert (col is None) != (idx_name is None)
|
|
1191
1260
|
|
|
1192
1261
|
if idx_name is not None:
|
|
@@ -1198,9 +1267,10 @@ class Table(SchemaObject):
|
|
|
1198
1267
|
return
|
|
1199
1268
|
idx_info = self._tbl_version.get().idxs_by_name[idx_name]
|
|
1200
1269
|
else:
|
|
1201
|
-
if col.
|
|
1270
|
+
if col.get_tbl().id != self._tbl_version.id:
|
|
1202
1271
|
raise excs.Error(
|
|
1203
|
-
f'Column {col.name!r}:
|
|
1272
|
+
f'Column {col.name!r}: '
|
|
1273
|
+
f'cannot drop index from column that belongs to base table {col.get_tbl().name!r}'
|
|
1204
1274
|
)
|
|
1205
1275
|
idx_info_list = [info for info in self._tbl_version.get().idxs_by_name.values() if info.col.id == col.id]
|
|
1206
1276
|
if _idx_class is not None:
|
|
@@ -1212,14 +1282,17 @@ class Table(SchemaObject):
|
|
|
1212
1282
|
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
1213
1283
|
return
|
|
1214
1284
|
if len(idx_info_list) > 1:
|
|
1215
|
-
raise excs.Error(f
|
|
1285
|
+
raise excs.Error(f'Column {col.name!r} has multiple indices; specify `idx_name` explicitly to drop one')
|
|
1216
1286
|
idx_info = idx_info_list[0]
|
|
1217
1287
|
|
|
1218
1288
|
# Find out if anything depends on this index
|
|
1219
|
-
|
|
1289
|
+
val_col = idx_info.val_col
|
|
1290
|
+
dependent_user_cols = [
|
|
1291
|
+
c for c in Catalog.get().get_column_dependents(val_col.get_tbl().id, val_col.id) if c.name is not None
|
|
1292
|
+
]
|
|
1220
1293
|
if len(dependent_user_cols) > 0:
|
|
1221
1294
|
raise excs.Error(
|
|
1222
|
-
f'Cannot drop index because the following columns depend on it:\n'
|
|
1295
|
+
f'Cannot drop index {idx_info.name!r} because the following columns depend on it:\n'
|
|
1223
1296
|
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
1224
1297
|
)
|
|
1225
1298
|
self._tbl_version.get().drop_index(idx_info.id)
|
|
@@ -1230,8 +1303,8 @@ class Table(SchemaObject):
|
|
|
1230
1303
|
source: TableDataSource,
|
|
1231
1304
|
/,
|
|
1232
1305
|
*,
|
|
1233
|
-
source_format:
|
|
1234
|
-
schema_overrides:
|
|
1306
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
1307
|
+
schema_overrides: dict[str, ts.ColumnType] | None = None,
|
|
1235
1308
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1236
1309
|
print_stats: bool = False,
|
|
1237
1310
|
**kwargs: Any,
|
|
@@ -1245,11 +1318,11 @@ class Table(SchemaObject):
|
|
|
1245
1318
|
@abc.abstractmethod
|
|
1246
1319
|
def insert(
|
|
1247
1320
|
self,
|
|
1248
|
-
source:
|
|
1321
|
+
source: TableDataSource | None = None,
|
|
1249
1322
|
/,
|
|
1250
1323
|
*,
|
|
1251
|
-
source_format:
|
|
1252
|
-
schema_overrides:
|
|
1324
|
+
source_format: Literal['csv', 'excel', 'parquet', 'json'] | None = None,
|
|
1325
|
+
schema_overrides: dict[str, ts.ColumnType] | None = None,
|
|
1253
1326
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1254
1327
|
print_stats: bool = False,
|
|
1255
1328
|
**kwargs: Any,
|
|
@@ -1266,7 +1339,8 @@ class Table(SchemaObject):
|
|
|
1266
1339
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1267
1340
|
print_stats: bool = False,
|
|
1268
1341
|
**kwargs: Any,
|
|
1269
|
-
)
|
|
1342
|
+
)
|
|
1343
|
+
```
|
|
1270
1344
|
|
|
1271
1345
|
To insert just a single row, you can use the more concise syntax:
|
|
1272
1346
|
|
|
@@ -1276,7 +1350,8 @@ class Table(SchemaObject):
|
|
|
1276
1350
|
on_error: Literal['abort', 'ignore'] = 'abort',
|
|
1277
1351
|
print_stats: bool = False,
|
|
1278
1352
|
**kwargs: Any
|
|
1279
|
-
)
|
|
1353
|
+
)
|
|
1354
|
+
```
|
|
1280
1355
|
|
|
1281
1356
|
Args:
|
|
1282
1357
|
source: A data source from which data can be imported.
|
|
@@ -1319,11 +1394,20 @@ class Table(SchemaObject):
|
|
|
1319
1394
|
Insert rows from a CSV file:
|
|
1320
1395
|
|
|
1321
1396
|
>>> tbl.insert(source='path/to/file.csv')
|
|
1397
|
+
|
|
1398
|
+
Insert Pydantic model instances into a table with two `pxt.Int` columns `a` and `b`:
|
|
1399
|
+
|
|
1400
|
+
>>> class MyModel(pydantic.BaseModel):
|
|
1401
|
+
... a: int
|
|
1402
|
+
... b: int
|
|
1403
|
+
...
|
|
1404
|
+
... models = [MyModel(a=1, b=2), MyModel(a=3, b=4)]
|
|
1405
|
+
... tbl.insert(models)
|
|
1322
1406
|
"""
|
|
1323
1407
|
raise NotImplementedError
|
|
1324
1408
|
|
|
1325
1409
|
def update(
|
|
1326
|
-
self, value_spec: dict[str, Any], where:
|
|
1410
|
+
self, value_spec: dict[str, Any], where: 'exprs.Expr' | None = None, cascade: bool = True
|
|
1327
1411
|
) -> UpdateStatus:
|
|
1328
1412
|
"""Update rows in this table.
|
|
1329
1413
|
|
|
@@ -1332,6 +1416,9 @@ class Table(SchemaObject):
|
|
|
1332
1416
|
where: a predicate to filter rows to update.
|
|
1333
1417
|
cascade: if True, also update all computed columns that transitively depend on the updated columns.
|
|
1334
1418
|
|
|
1419
|
+
Returns:
|
|
1420
|
+
An [`UpdateStatus`][pixeltable.UpdateStatus] object containing information about the update.
|
|
1421
|
+
|
|
1335
1422
|
Examples:
|
|
1336
1423
|
Set column `int_col` to 1 for all rows:
|
|
1337
1424
|
|
|
@@ -1351,10 +1438,11 @@ class Table(SchemaObject):
|
|
|
1351
1438
|
"""
|
|
1352
1439
|
from pixeltable.catalog import Catalog
|
|
1353
1440
|
|
|
1354
|
-
with Catalog.get().begin_xact(
|
|
1355
|
-
|
|
1441
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1442
|
+
self.__check_mutable('update')
|
|
1443
|
+
result = self._tbl_version.get().update(value_spec, where, cascade)
|
|
1356
1444
|
FileCache.get().emit_eviction_warnings()
|
|
1357
|
-
return
|
|
1445
|
+
return result
|
|
1358
1446
|
|
|
1359
1447
|
def batch_update(
|
|
1360
1448
|
self,
|
|
@@ -1384,14 +1472,13 @@ class Table(SchemaObject):
|
|
|
1384
1472
|
the row with new `id` 3 (assuming this key does not exist):
|
|
1385
1473
|
|
|
1386
1474
|
>>> tbl.update(
|
|
1387
|
-
|
|
1388
|
-
|
|
1475
|
+
... [{'id': 1, 'name': 'Alice', 'age': 30}, {'id': 3, 'name': 'Bob', 'age': 40}],
|
|
1476
|
+
... if_not_exists='insert')
|
|
1389
1477
|
"""
|
|
1390
1478
|
from pixeltable.catalog import Catalog
|
|
1391
1479
|
|
|
1392
|
-
with Catalog.get().begin_xact(
|
|
1393
|
-
|
|
1394
|
-
raise excs.Error('Cannot update a snapshot')
|
|
1480
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1481
|
+
self.__check_mutable('update')
|
|
1395
1482
|
rows = list(rows)
|
|
1396
1483
|
|
|
1397
1484
|
row_updates: list[dict[Column, exprs.Expr]] = []
|
|
@@ -1415,10 +1502,12 @@ class Table(SchemaObject):
|
|
|
1415
1502
|
col_names = {col.name for col in col_vals}
|
|
1416
1503
|
if any(pk_col_name not in col_names for pk_col_name in pk_col_names):
|
|
1417
1504
|
missing_cols = pk_col_names - {col.name for col in col_vals}
|
|
1418
|
-
raise excs.Error(
|
|
1505
|
+
raise excs.Error(
|
|
1506
|
+
f'Primary key column(s) {", ".join(repr(c) for c in missing_cols)} missing in {row_spec}'
|
|
1507
|
+
)
|
|
1419
1508
|
row_updates.append(col_vals)
|
|
1420
1509
|
|
|
1421
|
-
|
|
1510
|
+
result = self._tbl_version.get().batch_update(
|
|
1422
1511
|
row_updates,
|
|
1423
1512
|
rowids,
|
|
1424
1513
|
error_if_not_exists=if_not_exists == 'error',
|
|
@@ -1426,9 +1515,85 @@ class Table(SchemaObject):
|
|
|
1426
1515
|
cascade=cascade,
|
|
1427
1516
|
)
|
|
1428
1517
|
FileCache.get().emit_eviction_warnings()
|
|
1429
|
-
return
|
|
1518
|
+
return result
|
|
1430
1519
|
|
|
1431
|
-
def
|
|
1520
|
+
def recompute_columns(
|
|
1521
|
+
self,
|
|
1522
|
+
*columns: str | ColumnRef,
|
|
1523
|
+
where: 'exprs.Expr' | None = None,
|
|
1524
|
+
errors_only: bool = False,
|
|
1525
|
+
cascade: bool = True,
|
|
1526
|
+
) -> UpdateStatus:
|
|
1527
|
+
"""Recompute the values in one or more computed columns of this table.
|
|
1528
|
+
|
|
1529
|
+
Args:
|
|
1530
|
+
columns: The names or references of the computed columns to recompute.
|
|
1531
|
+
where: A predicate to filter rows to recompute.
|
|
1532
|
+
errors_only: If True, only run the recomputation for rows that have errors in the column (ie, the column's
|
|
1533
|
+
`errortype` property indicates that an error occurred). Only allowed for recomputing a single column.
|
|
1534
|
+
cascade: if True, also update all computed columns that transitively depend on the recomputed columns.
|
|
1535
|
+
|
|
1536
|
+
Examples:
|
|
1537
|
+
Recompute computed columns `c1` and `c2` for all rows in this table, and everything that transitively
|
|
1538
|
+
depends on them:
|
|
1539
|
+
|
|
1540
|
+
>>> tbl.recompute_columns('c1', 'c2')
|
|
1541
|
+
|
|
1542
|
+
Recompute computed column `c1` for all rows in this table, but don't recompute other columns that depend on
|
|
1543
|
+
it:
|
|
1544
|
+
|
|
1545
|
+
>>> tbl.recompute_columns(tbl.c1, tbl.c2, cascade=False)
|
|
1546
|
+
|
|
1547
|
+
Recompute column `c1` and its dependents, but only for rows with `c2` == 0:
|
|
1548
|
+
|
|
1549
|
+
>>> tbl.recompute_columns('c1', where=tbl.c2 == 0)
|
|
1550
|
+
|
|
1551
|
+
Recompute column `c1` and its dependents, but only for rows that have errors in it:
|
|
1552
|
+
|
|
1553
|
+
>>> tbl.recompute_columns('c1', errors_only=True)
|
|
1554
|
+
"""
|
|
1555
|
+
from pixeltable.catalog import Catalog
|
|
1556
|
+
|
|
1557
|
+
cat = Catalog.get()
|
|
1558
|
+
# lock_mutable_tree=True: we need to be able to see whether any transitive view has column dependents
|
|
1559
|
+
with cat.begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1560
|
+
self.__check_mutable('recompute columns of')
|
|
1561
|
+
if len(columns) == 0:
|
|
1562
|
+
raise excs.Error('At least one column must be specified to recompute')
|
|
1563
|
+
if errors_only and len(columns) > 1:
|
|
1564
|
+
raise excs.Error('Cannot use errors_only=True with multiple columns')
|
|
1565
|
+
|
|
1566
|
+
col_names: list[str] = []
|
|
1567
|
+
for column in columns:
|
|
1568
|
+
col_name: str
|
|
1569
|
+
col: Column
|
|
1570
|
+
if isinstance(column, str):
|
|
1571
|
+
col = self._tbl_version_path.get_column(column)
|
|
1572
|
+
if col is None:
|
|
1573
|
+
raise excs.Error(f'Unknown column: {column}')
|
|
1574
|
+
col_name = column
|
|
1575
|
+
else:
|
|
1576
|
+
assert isinstance(column, ColumnRef)
|
|
1577
|
+
col = column.col
|
|
1578
|
+
if not self._tbl_version_path.has_column(col):
|
|
1579
|
+
raise excs.Error(f'Unknown column: {col.name}')
|
|
1580
|
+
col_name = col.name
|
|
1581
|
+
if not col.is_computed:
|
|
1582
|
+
raise excs.Error(f'Column {col_name!r} is not a computed column')
|
|
1583
|
+
if col.get_tbl().id != self._tbl_version_path.tbl_id:
|
|
1584
|
+
raise excs.Error(f'Cannot recompute column of a base: {col_name}')
|
|
1585
|
+
col_names.append(col_name)
|
|
1586
|
+
|
|
1587
|
+
if where is not None and not where.is_bound_by([self._tbl_version_path]):
|
|
1588
|
+
raise excs.Error(f'`where` predicate ({where}) is not bound by {self._display_str()}')
|
|
1589
|
+
|
|
1590
|
+
result = self._tbl_version.get().recompute_columns(
|
|
1591
|
+
col_names, where=where, errors_only=errors_only, cascade=cascade
|
|
1592
|
+
)
|
|
1593
|
+
FileCache.get().emit_eviction_warnings()
|
|
1594
|
+
return result
|
|
1595
|
+
|
|
1596
|
+
def delete(self, where: 'exprs.Expr' | None = None) -> UpdateStatus:
|
|
1432
1597
|
"""Delete rows in this table.
|
|
1433
1598
|
|
|
1434
1599
|
Args:
|
|
@@ -1453,14 +1618,63 @@ class Table(SchemaObject):
|
|
|
1453
1618
|
"""
|
|
1454
1619
|
from pixeltable.catalog import Catalog
|
|
1455
1620
|
|
|
1456
|
-
with Catalog.get().begin_xact(
|
|
1457
|
-
|
|
1458
|
-
raise excs.Error('Cannot revert a snapshot')
|
|
1621
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=True):
|
|
1622
|
+
self.__check_mutable('revert')
|
|
1459
1623
|
self._tbl_version.get().revert()
|
|
1460
1624
|
# remove cached md in order to force a reload on the next operation
|
|
1461
|
-
self.
|
|
1625
|
+
self._tbl_version_path.clear_cached_md()
|
|
1626
|
+
|
|
1627
|
+
def push(self, *, version: int | None = None) -> None:
|
|
1628
|
+
from pixeltable.share import push_replica
|
|
1629
|
+
from pixeltable.share.protocol import PxtUri
|
|
1630
|
+
|
|
1631
|
+
tbl_version = self._tbl_version.get()
|
|
1632
|
+
pxt_uri = tbl_version.pxt_uri
|
|
1633
|
+
|
|
1634
|
+
if tbl_version.is_replica:
|
|
1635
|
+
raise excs.Error(f'push(): Cannot push replica table {self._name!r}. (Did you mean `pull()`?)')
|
|
1636
|
+
if pxt_uri is None:
|
|
1637
|
+
raise excs.Error(
|
|
1638
|
+
f'push(): Table {self._name!r} has not yet been published to Pixeltable Cloud. '
|
|
1639
|
+
'To publish it, use `pxt.publish()` instead.'
|
|
1640
|
+
)
|
|
1641
|
+
|
|
1642
|
+
# Parse the pxt URI to extract org/db and create a UUID-based URI for pushing
|
|
1643
|
+
parsed_uri = PxtUri(uri=pxt_uri)
|
|
1644
|
+
uuid_uri_obj = PxtUri.from_components(org=parsed_uri.org, id=self._id, db=parsed_uri.db)
|
|
1645
|
+
uuid_uri = str(uuid_uri_obj)
|
|
1646
|
+
|
|
1647
|
+
if version is None:
|
|
1648
|
+
# Push this version
|
|
1649
|
+
push_replica(uuid_uri, self)
|
|
1650
|
+
else:
|
|
1651
|
+
versioned_path = catalog.Path.parse(self._path())._replace(version=version)
|
|
1652
|
+
versioned_tbl = catalog.Catalog.get().get_table(versioned_path, IfNotExistsParam.IGNORE)
|
|
1653
|
+
if versioned_tbl is None:
|
|
1654
|
+
raise excs.Error(f'Table {self._name!r} has no known version {version}')
|
|
1655
|
+
assert versioned_tbl._id == self._id
|
|
1656
|
+
push_replica(uuid_uri, versioned_tbl)
|
|
1657
|
+
|
|
1658
|
+
def pull(self, *, version: int | None = None) -> None:
|
|
1659
|
+
from pixeltable.share import pull_replica
|
|
1660
|
+
from pixeltable.share.protocol import PxtUri
|
|
1661
|
+
|
|
1662
|
+
tbl_version = self._tbl_version_path.tbl_version.get()
|
|
1663
|
+
pxt_uri = tbl_version.pxt_uri
|
|
1664
|
+
|
|
1665
|
+
if not tbl_version.is_replica:
|
|
1666
|
+
raise excs.Error(
|
|
1667
|
+
f'pull(): Table {self._name!r} is not a replica of a Pixeltable Cloud table (nothing to `pull()`).'
|
|
1668
|
+
)
|
|
1669
|
+
assert pxt_uri is not None
|
|
1670
|
+
|
|
1671
|
+
# Parse the pxt URI to extract org/db and create a UUID-based URI for pulling
|
|
1672
|
+
parsed_uri = PxtUri(uri=pxt_uri)
|
|
1673
|
+
uuid_uri_obj = PxtUri.from_components(org=parsed_uri.org, id=self._id, db=parsed_uri.db, version=version)
|
|
1674
|
+
uuid_uri = str(uuid_uri_obj)
|
|
1675
|
+
|
|
1676
|
+
pull_replica(self._path(), uuid_uri)
|
|
1462
1677
|
|
|
1463
|
-
@property
|
|
1464
1678
|
def external_stores(self) -> list[str]:
|
|
1465
1679
|
return list(self._tbl_version.get().external_stores.keys())
|
|
1466
1680
|
|
|
@@ -1470,23 +1684,18 @@ class Table(SchemaObject):
|
|
|
1470
1684
|
"""
|
|
1471
1685
|
from pixeltable.catalog import Catalog
|
|
1472
1686
|
|
|
1473
|
-
with Catalog.get().begin_xact(
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
_logger.info(f'Linking external store `{store.name}` to table `{self._name}`')
|
|
1687
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
1688
|
+
self.__check_mutable('link an external store to')
|
|
1689
|
+
if store.name in self.external_stores():
|
|
1690
|
+
raise excs.Error(f'Table {self._name!r} already has an external store with that name: {store.name}')
|
|
1691
|
+
_logger.info(f'Linking external store {store.name!r} to table {self._name!r}.')
|
|
1479
1692
|
|
|
1480
1693
|
store.link(self._tbl_version.get()) # might call tbl_version.add_columns()
|
|
1481
1694
|
self._tbl_version.get().link_external_store(store)
|
|
1482
|
-
env.Env.get().console_logger.info(f'Linked external store
|
|
1695
|
+
env.Env.get().console_logger.info(f'Linked external store {store.name!r} to table {self._name!r}.')
|
|
1483
1696
|
|
|
1484
1697
|
def unlink_external_stores(
|
|
1485
|
-
self,
|
|
1486
|
-
stores: Optional[str | list[str]] = None,
|
|
1487
|
-
*,
|
|
1488
|
-
delete_external_data: bool = False,
|
|
1489
|
-
ignore_errors: bool = False,
|
|
1698
|
+
self, stores: str | list[str] | None = None, *, delete_external_data: bool = False, ignore_errors: bool = False
|
|
1490
1699
|
) -> None:
|
|
1491
1700
|
"""
|
|
1492
1701
|
Unlinks this table's external stores.
|
|
@@ -1501,9 +1710,10 @@ class Table(SchemaObject):
|
|
|
1501
1710
|
"""
|
|
1502
1711
|
from pixeltable.catalog import Catalog
|
|
1503
1712
|
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1713
|
+
if not self._tbl_version_path.is_mutable():
|
|
1714
|
+
return
|
|
1715
|
+
with Catalog.get().begin_xact(tbl=self._tbl_version_path, for_write=True, lock_mutable_tree=False):
|
|
1716
|
+
all_stores = self.external_stores()
|
|
1507
1717
|
|
|
1508
1718
|
if stores is None:
|
|
1509
1719
|
stores = all_stores
|
|
@@ -1514,7 +1724,7 @@ class Table(SchemaObject):
|
|
|
1514
1724
|
if not ignore_errors:
|
|
1515
1725
|
for store_name in stores:
|
|
1516
1726
|
if store_name not in all_stores:
|
|
1517
|
-
raise excs.Error(f'Table
|
|
1727
|
+
raise excs.Error(f'Table {self._name!r} has no external store with that name: {store_name}')
|
|
1518
1728
|
|
|
1519
1729
|
for store_name in stores:
|
|
1520
1730
|
store = self._tbl_version.get().external_stores[store_name]
|
|
@@ -1524,11 +1734,11 @@ class Table(SchemaObject):
|
|
|
1524
1734
|
self._tbl_version.get().unlink_external_store(store)
|
|
1525
1735
|
if delete_external_data and isinstance(store, pxt.io.external_store.Project):
|
|
1526
1736
|
store.delete()
|
|
1527
|
-
env.Env.get().console_logger.info(f'Unlinked external store from table
|
|
1737
|
+
env.Env.get().console_logger.info(f'Unlinked external store from table {self._name!r}: {store_str}')
|
|
1528
1738
|
|
|
1529
1739
|
def sync(
|
|
1530
|
-
self, stores:
|
|
1531
|
-
) ->
|
|
1740
|
+
self, stores: str | list[str] | None = None, *, export_data: bool = True, import_data: bool = True
|
|
1741
|
+
) -> UpdateStatus:
|
|
1532
1742
|
"""
|
|
1533
1743
|
Synchronizes this table with its linked external stores.
|
|
1534
1744
|
|
|
@@ -1540,9 +1750,13 @@ class Table(SchemaObject):
|
|
|
1540
1750
|
"""
|
|
1541
1751
|
from pixeltable.catalog import Catalog
|
|
1542
1752
|
|
|
1543
|
-
|
|
1544
|
-
|
|
1545
|
-
|
|
1753
|
+
if not self._tbl_version_path.is_mutable():
|
|
1754
|
+
return UpdateStatus()
|
|
1755
|
+
# we lock the entire tree starting at the root base table in order to ensure that all synced columns can
|
|
1756
|
+
# have their updates propagated down the tree
|
|
1757
|
+
base_tv = self._tbl_version_path.get_tbl_versions()[-1]
|
|
1758
|
+
with Catalog.get().begin_xact(tbl=TableVersionPath(base_tv), for_write=True, lock_mutable_tree=True):
|
|
1759
|
+
all_stores = self.external_stores()
|
|
1546
1760
|
|
|
1547
1761
|
if stores is None:
|
|
1548
1762
|
stores = all_stores
|
|
@@ -1551,18 +1765,122 @@ class Table(SchemaObject):
|
|
|
1551
1765
|
|
|
1552
1766
|
for store in stores:
|
|
1553
1767
|
if store not in all_stores:
|
|
1554
|
-
raise excs.Error(f'Table
|
|
1768
|
+
raise excs.Error(f'Table {self._name!r} has no external store with that name: {store}')
|
|
1555
1769
|
|
|
1556
|
-
sync_status =
|
|
1770
|
+
sync_status = UpdateStatus()
|
|
1557
1771
|
for store in stores:
|
|
1558
1772
|
store_obj = self._tbl_version.get().external_stores[store]
|
|
1559
1773
|
store_sync_status = store_obj.sync(self, export_data=export_data, import_data=import_data)
|
|
1560
|
-
sync_status
|
|
1774
|
+
sync_status += store_sync_status
|
|
1561
1775
|
|
|
1562
1776
|
return sync_status
|
|
1563
1777
|
|
|
1564
1778
|
def __dir__(self) -> list[str]:
|
|
1565
|
-
return list(super().__dir__()) + list(self.
|
|
1779
|
+
return list(super().__dir__()) + list(self._get_schema().keys())
|
|
1566
1780
|
|
|
1567
1781
|
def _ipython_key_completions_(self) -> list[str]:
|
|
1568
|
-
return list(self.
|
|
1782
|
+
return list(self._get_schema().keys())
|
|
1783
|
+
|
|
1784
|
+
def get_versions(self, n: int | None = None) -> list[VersionMetadata]:
|
|
1785
|
+
"""
|
|
1786
|
+
Returns information about versions of this table, most recent first.
|
|
1787
|
+
|
|
1788
|
+
`get_versions()` is intended for programmatic access to version metadata; for human-readable
|
|
1789
|
+
output, use [`history()`][pixeltable.Table.history] instead.
|
|
1790
|
+
|
|
1791
|
+
Args:
|
|
1792
|
+
n: if specified, will return at most `n` versions
|
|
1793
|
+
|
|
1794
|
+
Returns:
|
|
1795
|
+
A list of [VersionMetadata][pixeltable.VersionMetadata] dictionaries, one per version retrieved, most
|
|
1796
|
+
recent first.
|
|
1797
|
+
|
|
1798
|
+
Examples:
|
|
1799
|
+
Retrieve metadata about all versions of the table `tbl`:
|
|
1800
|
+
|
|
1801
|
+
>>> tbl.get_versions()
|
|
1802
|
+
|
|
1803
|
+
Retrieve metadata about the most recent 5 versions of the table `tbl`:
|
|
1804
|
+
|
|
1805
|
+
>>> tbl.get_versions(n=5)
|
|
1806
|
+
"""
|
|
1807
|
+
from pixeltable.catalog import Catalog
|
|
1808
|
+
|
|
1809
|
+
if n is None:
|
|
1810
|
+
n = 1_000_000_000
|
|
1811
|
+
if not isinstance(n, int) or n < 1:
|
|
1812
|
+
raise excs.Error(f'Invalid value for `n`: {n}')
|
|
1813
|
+
|
|
1814
|
+
# Retrieve the table history components from the catalog
|
|
1815
|
+
tbl_id = self._id
|
|
1816
|
+
# Collect an extra version, if available, to allow for computation of the first version's schema change
|
|
1817
|
+
vers_list = Catalog.get().collect_tbl_history(tbl_id, n + 1)
|
|
1818
|
+
|
|
1819
|
+
# Construct the metadata change description dictionary
|
|
1820
|
+
md_list = [(vers_md.version_md.version, vers_md.schema_version_md.columns) for vers_md in vers_list]
|
|
1821
|
+
md_dict = MetadataUtils._create_md_change_dict(md_list)
|
|
1822
|
+
|
|
1823
|
+
# Construct report lines
|
|
1824
|
+
if len(vers_list) > n:
|
|
1825
|
+
assert len(vers_list) == n + 1
|
|
1826
|
+
over_count = 1
|
|
1827
|
+
else:
|
|
1828
|
+
over_count = 0
|
|
1829
|
+
|
|
1830
|
+
metadata_dicts: list[VersionMetadata] = []
|
|
1831
|
+
for vers_md in vers_list[0 : len(vers_list) - over_count]:
|
|
1832
|
+
version = vers_md.version_md.version
|
|
1833
|
+
schema_change = md_dict.get(version, None)
|
|
1834
|
+
update_status = vers_md.version_md.update_status
|
|
1835
|
+
if update_status is None:
|
|
1836
|
+
update_status = UpdateStatus()
|
|
1837
|
+
change_type: Literal['schema', 'data'] = 'schema' if schema_change is not None else 'data'
|
|
1838
|
+
rcs = update_status.row_count_stats + update_status.cascade_row_count_stats
|
|
1839
|
+
metadata_dicts.append(
|
|
1840
|
+
VersionMetadata(
|
|
1841
|
+
version=version,
|
|
1842
|
+
created_at=datetime.datetime.fromtimestamp(vers_md.version_md.created_at, tz=datetime.timezone.utc),
|
|
1843
|
+
user=vers_md.version_md.user,
|
|
1844
|
+
change_type=change_type,
|
|
1845
|
+
inserts=rcs.ins_rows,
|
|
1846
|
+
updates=rcs.upd_rows,
|
|
1847
|
+
deletes=rcs.del_rows,
|
|
1848
|
+
errors=rcs.num_excs,
|
|
1849
|
+
computed=rcs.computed_values,
|
|
1850
|
+
schema_change=schema_change,
|
|
1851
|
+
)
|
|
1852
|
+
)
|
|
1853
|
+
|
|
1854
|
+
return metadata_dicts
|
|
1855
|
+
|
|
1856
|
+
def history(self, n: int | None = None) -> pd.DataFrame:
|
|
1857
|
+
"""
|
|
1858
|
+
Returns a human-readable report about versions of this table.
|
|
1859
|
+
|
|
1860
|
+
`history()` is intended for human-readable output of version metadata; for programmatic access,
|
|
1861
|
+
use [`get_versions()`][pixeltable.Table.get_versions] instead.
|
|
1862
|
+
|
|
1863
|
+
Args:
|
|
1864
|
+
n: if specified, will return at most `n` versions
|
|
1865
|
+
|
|
1866
|
+
Returns:
|
|
1867
|
+
A report with information about each version, one per row, most recent first.
|
|
1868
|
+
|
|
1869
|
+
Examples:
|
|
1870
|
+
Report all versions of the table:
|
|
1871
|
+
|
|
1872
|
+
>>> tbl.history()
|
|
1873
|
+
|
|
1874
|
+
Report only the most recent 5 changes to the table:
|
|
1875
|
+
|
|
1876
|
+
>>> tbl.history(n=5)
|
|
1877
|
+
"""
|
|
1878
|
+
versions = self.get_versions(n)
|
|
1879
|
+
assert len(versions) > 0
|
|
1880
|
+
return pd.DataFrame([list(v.values()) for v in versions], columns=list(versions[0].keys()))
|
|
1881
|
+
|
|
1882
|
+
def __check_mutable(self, op_descr: str) -> None:
|
|
1883
|
+
if self._tbl_version_path.is_replica():
|
|
1884
|
+
raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a replica.')
|
|
1885
|
+
if self._tbl_version_path.is_snapshot():
|
|
1886
|
+
raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a snapshot.')
|