pixeltable 0.4.6__py3-none-any.whl → 0.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +4 -2
- pixeltable/catalog/__init__.py +1 -1
- pixeltable/catalog/catalog.py +7 -9
- pixeltable/catalog/column.py +49 -0
- pixeltable/catalog/insertable_table.py +0 -7
- pixeltable/catalog/schema_object.py +1 -14
- pixeltable/catalog/table.py +180 -67
- pixeltable/catalog/table_version.py +42 -146
- pixeltable/catalog/table_version_path.py +6 -5
- pixeltable/catalog/view.py +2 -1
- pixeltable/config.py +24 -9
- pixeltable/dataframe.py +5 -6
- pixeltable/env.py +113 -21
- pixeltable/exec/aggregation_node.py +1 -1
- pixeltable/exec/cache_prefetch_node.py +4 -3
- pixeltable/exec/exec_node.py +0 -8
- pixeltable/exec/expr_eval/expr_eval_node.py +2 -2
- pixeltable/exec/expr_eval/globals.py +1 -0
- pixeltable/exec/expr_eval/schedulers.py +52 -19
- pixeltable/exec/in_memory_data_node.py +2 -3
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/data_row.py +15 -2
- pixeltable/exprs/expr.py +9 -9
- pixeltable/exprs/function_call.py +61 -23
- pixeltable/exprs/globals.py +1 -2
- pixeltable/exprs/json_path.py +3 -3
- pixeltable/exprs/row_builder.py +25 -21
- pixeltable/exprs/string_op.py +3 -3
- pixeltable/func/expr_template_function.py +6 -3
- pixeltable/func/query_template_function.py +2 -2
- pixeltable/func/signature.py +30 -3
- pixeltable/func/tools.py +2 -2
- pixeltable/functions/anthropic.py +76 -27
- pixeltable/functions/deepseek.py +5 -1
- pixeltable/functions/gemini.py +11 -2
- pixeltable/functions/globals.py +2 -2
- pixeltable/functions/huggingface.py +6 -12
- pixeltable/functions/llama_cpp.py +9 -1
- pixeltable/functions/openai.py +76 -55
- pixeltable/functions/video.py +59 -6
- pixeltable/functions/vision.py +2 -2
- pixeltable/globals.py +86 -13
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/fiftyone.py +7 -7
- pixeltable/io/globals.py +3 -3
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +2 -1
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +3 -3
- pixeltable/io/table_data_conduit.py +2 -2
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +3 -2
- pixeltable/iterators/document.py +2 -8
- pixeltable/iterators/video.py +49 -9
- pixeltable/plan.py +0 -16
- pixeltable/share/packager.py +51 -42
- pixeltable/share/publish.py +134 -7
- pixeltable/store.py +5 -25
- pixeltable/type_system.py +5 -8
- pixeltable/utils/__init__.py +2 -2
- pixeltable/utils/arrow.py +5 -5
- pixeltable/utils/description_helper.py +3 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/media_store.py +131 -66
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/METADATA +238 -122
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/RECORD +69 -69
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.6.dist-info → pixeltable-0.4.8.dist-info}/licenses/LICENSE +0 -0
pixeltable/__init__.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
# ruff: noqa: F401
|
|
2
2
|
|
|
3
3
|
from .__version__ import __version__, __version_tuple__
|
|
4
|
-
from .catalog import Column, InsertableTable, Table, UpdateStatus, View
|
|
4
|
+
from .catalog import Column, ColumnMetadata, IndexMetadata, InsertableTable, Table, TableMetadata, UpdateStatus, View
|
|
5
5
|
from .dataframe import DataFrame
|
|
6
6
|
from .exceptions import Error, ExprEvalError, PixeltableWarning
|
|
7
|
-
from .func import Aggregator, Function, expr_udf, mcp_udfs, query, retrieval_udf, uda, udf
|
|
7
|
+
from .func import Aggregator, Function, Tool, ToolChoice, Tools, expr_udf, mcp_udfs, query, retrieval_udf, uda, udf
|
|
8
8
|
from .globals import (
|
|
9
|
+
DirContents,
|
|
9
10
|
array,
|
|
10
11
|
configure_logging,
|
|
11
12
|
create_dir,
|
|
@@ -15,6 +16,7 @@ from .globals import (
|
|
|
15
16
|
create_view,
|
|
16
17
|
drop_dir,
|
|
17
18
|
drop_table,
|
|
19
|
+
get_dir_contents,
|
|
18
20
|
get_table,
|
|
19
21
|
init,
|
|
20
22
|
list_dirs,
|
pixeltable/catalog/__init__.py
CHANGED
|
@@ -8,7 +8,7 @@ from .insertable_table import InsertableTable
|
|
|
8
8
|
from .named_function import NamedFunction
|
|
9
9
|
from .path import Path
|
|
10
10
|
from .schema_object import SchemaObject
|
|
11
|
-
from .table import Table
|
|
11
|
+
from .table import ColumnMetadata, IndexMetadata, Table, TableMetadata
|
|
12
12
|
from .table_version import TableVersion
|
|
13
13
|
from .table_version_handle import ColumnHandle, TableVersionHandle
|
|
14
14
|
from .table_version_path import TableVersionPath
|
pixeltable/catalog/catalog.py
CHANGED
|
@@ -103,7 +103,7 @@ def retry_loop(
|
|
|
103
103
|
except PendingTableOpsError as e:
|
|
104
104
|
Env.get().console_logger.debug(f'retry_loop(): finalizing pending ops for {e.tbl_id}')
|
|
105
105
|
Catalog.get()._finalize_pending_ops(e.tbl_id)
|
|
106
|
-
except sql.exc.DBAPIError as e:
|
|
106
|
+
except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
|
|
107
107
|
# TODO: what other exceptions should we be looking for?
|
|
108
108
|
if isinstance(e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)):
|
|
109
109
|
if num_retries < _MAX_RETRIES or _MAX_RETRIES == -1:
|
|
@@ -189,12 +189,10 @@ class Catalog:
|
|
|
189
189
|
@classmethod
|
|
190
190
|
def clear(cls) -> None:
|
|
191
191
|
"""Remove the instance. Used for testing."""
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
# )
|
|
197
|
-
tbl_version.is_validated = False
|
|
192
|
+
if cls._instance is not None:
|
|
193
|
+
# invalidate all existing instances to force reloading of metadata
|
|
194
|
+
for tbl_version in cls._instance._tbl_versions.values():
|
|
195
|
+
tbl_version.is_validated = False
|
|
198
196
|
cls._instance = None
|
|
199
197
|
|
|
200
198
|
def __init__(self) -> None:
|
|
@@ -356,7 +354,7 @@ class Catalog:
|
|
|
356
354
|
# raise to abort the transaction
|
|
357
355
|
raise
|
|
358
356
|
|
|
359
|
-
except sql.exc.DBAPIError as e:
|
|
357
|
+
except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
|
|
360
358
|
has_exc = True
|
|
361
359
|
if isinstance(
|
|
362
360
|
e.orig, (psycopg.errors.SerializationFailure, psycopg.errors.LockNotAvailable)
|
|
@@ -380,7 +378,7 @@ class Catalog:
|
|
|
380
378
|
# we got this exception after getting the initial table locks and therefore need to abort
|
|
381
379
|
raise
|
|
382
380
|
|
|
383
|
-
except sql.exc.DBAPIError as e:
|
|
381
|
+
except (sql.exc.DBAPIError, sql.exc.OperationalError) as e:
|
|
384
382
|
has_exc = True
|
|
385
383
|
# we got some db error during the actual operation (not just while trying to get locks on the metadata
|
|
386
384
|
# records): we convert these into Errors, if asked to do so, and abort
|
pixeltable/catalog/column.py
CHANGED
|
@@ -10,6 +10,7 @@ import sqlalchemy as sql
|
|
|
10
10
|
import pixeltable.exceptions as excs
|
|
11
11
|
import pixeltable.type_system as ts
|
|
12
12
|
from pixeltable import exprs
|
|
13
|
+
from pixeltable.metadata import schema
|
|
13
14
|
|
|
14
15
|
from .globals import MediaValidation, is_valid_identifier
|
|
15
16
|
|
|
@@ -126,6 +127,54 @@ class Column:
|
|
|
126
127
|
# computed cols also have storage columns for the exception string and type
|
|
127
128
|
self.sa_cellmd_col = None
|
|
128
129
|
|
|
130
|
+
def to_md(self, pos: Optional[int] = None) -> tuple[schema.ColumnMd, Optional[schema.SchemaColumn]]:
|
|
131
|
+
"""Returns the Column and optional SchemaColumn metadata for this Column."""
|
|
132
|
+
assert self.is_pk is not None
|
|
133
|
+
col_md = schema.ColumnMd(
|
|
134
|
+
id=self.id,
|
|
135
|
+
col_type=self.col_type.as_dict(),
|
|
136
|
+
is_pk=self.is_pk,
|
|
137
|
+
schema_version_add=self.schema_version_add,
|
|
138
|
+
schema_version_drop=self.schema_version_drop,
|
|
139
|
+
value_expr=self.value_expr.as_dict() if self.value_expr is not None else None,
|
|
140
|
+
stored=self.stored,
|
|
141
|
+
)
|
|
142
|
+
if pos is None:
|
|
143
|
+
return col_md, None
|
|
144
|
+
assert self.name is not None, 'Column name must be set for user-facing columns'
|
|
145
|
+
sch_md = schema.SchemaColumn(
|
|
146
|
+
name=self.name,
|
|
147
|
+
pos=pos,
|
|
148
|
+
media_validation=self._media_validation.name.lower() if self._media_validation is not None else None,
|
|
149
|
+
)
|
|
150
|
+
return col_md, sch_md
|
|
151
|
+
|
|
152
|
+
@classmethod
|
|
153
|
+
def from_md(
|
|
154
|
+
cls, col_md: schema.ColumnMd, tbl: TableVersion, schema_col_md: Optional[schema.SchemaColumn]
|
|
155
|
+
) -> Column:
|
|
156
|
+
"""Create a Column from a ColumnMd."""
|
|
157
|
+
assert col_md.id is not None
|
|
158
|
+
col_name = schema_col_md.name if schema_col_md is not None else None
|
|
159
|
+
media_val = (
|
|
160
|
+
MediaValidation[schema_col_md.media_validation.upper()]
|
|
161
|
+
if schema_col_md is not None and schema_col_md.media_validation is not None
|
|
162
|
+
else None
|
|
163
|
+
)
|
|
164
|
+
col = cls(
|
|
165
|
+
col_id=col_md.id,
|
|
166
|
+
name=col_name,
|
|
167
|
+
col_type=ts.ColumnType.from_dict(col_md.col_type),
|
|
168
|
+
is_pk=col_md.is_pk,
|
|
169
|
+
stored=col_md.stored,
|
|
170
|
+
media_validation=media_val,
|
|
171
|
+
schema_version_add=col_md.schema_version_add,
|
|
172
|
+
schema_version_drop=col_md.schema_version_drop,
|
|
173
|
+
value_expr_dict=col_md.value_expr,
|
|
174
|
+
tbl=tbl,
|
|
175
|
+
)
|
|
176
|
+
return col
|
|
177
|
+
|
|
129
178
|
def init_value_expr(self) -> None:
|
|
130
179
|
from pixeltable import exprs
|
|
131
180
|
|
|
@@ -105,13 +105,6 @@ class InsertableTable(Table):
|
|
|
105
105
|
Env.get().console_logger.info(f'Created table {name!r}.')
|
|
106
106
|
return tbl
|
|
107
107
|
|
|
108
|
-
def _get_metadata(self) -> dict[str, Any]:
|
|
109
|
-
md = super()._get_metadata()
|
|
110
|
-
md['base'] = None
|
|
111
|
-
md['is_view'] = False
|
|
112
|
-
md['is_snapshot'] = False
|
|
113
|
-
return md
|
|
114
|
-
|
|
115
108
|
@overload
|
|
116
109
|
def insert(
|
|
117
110
|
self,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
|
-
from typing import TYPE_CHECKING,
|
|
2
|
+
from typing import TYPE_CHECKING, Optional
|
|
3
3
|
from uuid import UUID
|
|
4
4
|
|
|
5
5
|
if TYPE_CHECKING:
|
|
@@ -41,19 +41,6 @@ class SchemaObject:
|
|
|
41
41
|
path = Catalog.get().get_dir_path(self._dir_id)
|
|
42
42
|
return str(path.append(self._name))
|
|
43
43
|
|
|
44
|
-
def get_metadata(self) -> dict[str, Any]:
|
|
45
|
-
"""Returns metadata associated with this schema object."""
|
|
46
|
-
from pixeltable.catalog import retry_loop
|
|
47
|
-
|
|
48
|
-
@retry_loop(for_write=False)
|
|
49
|
-
def op() -> dict[str, Any]:
|
|
50
|
-
return self._get_metadata()
|
|
51
|
-
|
|
52
|
-
return op()
|
|
53
|
-
|
|
54
|
-
def _get_metadata(self) -> dict[str, Any]:
|
|
55
|
-
return {'name': self._name, 'path': self._path()}
|
|
56
|
-
|
|
57
44
|
@abstractmethod
|
|
58
45
|
def _display_name(self) -> str:
|
|
59
46
|
"""
|
pixeltable/catalog/table.py
CHANGED
|
@@ -6,7 +6,7 @@ import json
|
|
|
6
6
|
import logging
|
|
7
7
|
from keyword import iskeyword as is_python_keyword
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import TYPE_CHECKING, Any, ClassVar, Iterable, Literal, Optional,
|
|
9
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Iterable, Literal, Optional, TypedDict, overload
|
|
10
10
|
|
|
11
11
|
from typing import _GenericAlias # type: ignore[attr-defined] # isort: skip
|
|
12
12
|
import datetime
|
|
@@ -80,49 +80,70 @@ class Table(SchemaObject):
|
|
|
80
80
|
conn.execute(stmt, {'new_dir_id': new_dir_id, 'new_name': json.dumps(new_name), 'id': self._id})
|
|
81
81
|
|
|
82
82
|
# this is duplicated from SchemaObject so that our API docs show the docstring for Table
|
|
83
|
-
def get_metadata(self) ->
|
|
83
|
+
def get_metadata(self) -> 'TableMetadata':
|
|
84
84
|
"""
|
|
85
85
|
Retrieves metadata associated with this table.
|
|
86
86
|
|
|
87
87
|
Returns:
|
|
88
|
-
A
|
|
89
|
-
|
|
90
|
-
```python
|
|
91
|
-
{
|
|
92
|
-
'name': 'my_table',
|
|
93
|
-
'path': 'my_dir.my_subdir.my_table',
|
|
94
|
-
'base': None, # If this is a view or snapshot, will contain the name of its base table
|
|
95
|
-
'schema': {
|
|
96
|
-
'col1': StringType(),
|
|
97
|
-
'col2': IntType(),
|
|
98
|
-
},
|
|
99
|
-
'is_replica': False,
|
|
100
|
-
'version': 22,
|
|
101
|
-
'version_created': datetime.datetime(...),
|
|
102
|
-
'schema_version': 1,
|
|
103
|
-
'comment': '',
|
|
104
|
-
'num_retained_versions': 10,
|
|
105
|
-
'is_view': False,
|
|
106
|
-
'is_snapshot': False,
|
|
107
|
-
'media_validation': 'on_write',
|
|
108
|
-
}
|
|
109
|
-
```
|
|
88
|
+
A [TableMetadata][pixeltable.TableMetadata] instance containing this table's metadata.
|
|
110
89
|
"""
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
90
|
+
from pixeltable.catalog import retry_loop
|
|
91
|
+
|
|
92
|
+
@retry_loop(for_write=False)
|
|
93
|
+
def op() -> 'TableMetadata':
|
|
94
|
+
return self._get_metadata()
|
|
95
|
+
|
|
96
|
+
return op()
|
|
97
|
+
|
|
98
|
+
def _get_metadata(self) -> 'TableMetadata':
|
|
99
|
+
columns = self._tbl_version_path.columns()
|
|
100
|
+
column_info: dict[str, ColumnMetadata] = {}
|
|
101
|
+
for col in columns:
|
|
102
|
+
column_info[col.name] = ColumnMetadata(
|
|
103
|
+
name=col.name,
|
|
104
|
+
type_=col.col_type._to_str(as_schema=True),
|
|
105
|
+
version_added=col.schema_version_add,
|
|
106
|
+
is_stored=col.is_stored,
|
|
107
|
+
is_primary_key=col.is_pk,
|
|
108
|
+
media_validation=col.media_validation.name.lower() if col.media_validation is not None else None, # type: ignore[typeddict-item]
|
|
109
|
+
computed_with=col.value_expr.display_str(inline=False) if col.value_expr is not None else None,
|
|
110
|
+
)
|
|
111
|
+
# Pure snapshots have no indices
|
|
112
|
+
indices = self._tbl_version.get().idxs_by_name.values() if self._tbl_version is not None else {}
|
|
113
|
+
index_info: dict[str, IndexMetadata] = {}
|
|
114
|
+
for info in indices:
|
|
115
|
+
if isinstance(info.idx, index.EmbeddingIndex):
|
|
116
|
+
embeddings: list[str] = []
|
|
117
|
+
if info.idx.string_embed is not None:
|
|
118
|
+
embeddings.append(str(info.idx.string_embed))
|
|
119
|
+
if info.idx.image_embed is not None:
|
|
120
|
+
embeddings.append(str(info.idx.image_embed))
|
|
121
|
+
index_info[info.name] = IndexMetadata(
|
|
122
|
+
name=info.name,
|
|
123
|
+
columns=[info.col.name],
|
|
124
|
+
index_type='embedding',
|
|
125
|
+
parameters=EmbeddingIndexParams(
|
|
126
|
+
metric=info.idx.metric.name.lower(), # type: ignore[typeddict-item]
|
|
127
|
+
embeddings=embeddings,
|
|
128
|
+
),
|
|
129
|
+
)
|
|
130
|
+
return TableMetadata(
|
|
131
|
+
name=self._name,
|
|
132
|
+
path=self._path(),
|
|
133
|
+
columns=column_info,
|
|
134
|
+
indices=index_info,
|
|
135
|
+
is_replica=self._tbl_version_path.is_replica(),
|
|
136
|
+
is_view=False,
|
|
137
|
+
is_snapshot=False,
|
|
138
|
+
version=self._get_version(),
|
|
139
|
+
version_created=datetime.datetime.fromtimestamp(
|
|
140
|
+
self._tbl_version_path.tbl_version.get().created_at, tz=datetime.timezone.utc
|
|
141
|
+
),
|
|
142
|
+
schema_version=self._tbl_version_path.schema_version(),
|
|
143
|
+
comment=self._get_comment(),
|
|
144
|
+
media_validation=self._get_media_validation().name.lower(), # type: ignore[typeddict-item]
|
|
145
|
+
base=None,
|
|
120
146
|
)
|
|
121
|
-
md['schema_version'] = self._tbl_version_path.schema_version()
|
|
122
|
-
md['comment'] = self._get_comment()
|
|
123
|
-
md['num_retained_versions'] = self._get_num_retained_versions()
|
|
124
|
-
md['media_validation'] = self._get_media_validation().name.lower()
|
|
125
|
-
return md
|
|
126
147
|
|
|
127
148
|
def _get_version(self) -> int:
|
|
128
149
|
"""Return the version of this table. Used by tests to ascertain version changes."""
|
|
@@ -162,16 +183,14 @@ class Table(SchemaObject):
|
|
|
162
183
|
|
|
163
184
|
return op()
|
|
164
185
|
|
|
165
|
-
def _get_views(self, *, recursive: bool = True,
|
|
186
|
+
def _get_views(self, *, recursive: bool = True, mutable_only: bool = False) -> list['Table']:
|
|
166
187
|
cat = catalog.Catalog.get()
|
|
167
188
|
view_ids = cat.get_view_ids(self._id)
|
|
168
189
|
views = [cat.get_table_by_id(id) for id in view_ids]
|
|
169
|
-
if
|
|
170
|
-
views = [t for t in views if
|
|
190
|
+
if mutable_only:
|
|
191
|
+
views = [t for t in views if t._tbl_version_path.is_mutable()]
|
|
171
192
|
if recursive:
|
|
172
|
-
views.extend(
|
|
173
|
-
t for view in views for t in view._get_views(recursive=True, include_snapshots=include_snapshots)
|
|
174
|
-
)
|
|
193
|
+
views.extend(t for view in views for t in view._get_views(recursive=True, mutable_only=mutable_only))
|
|
175
194
|
return views
|
|
176
195
|
|
|
177
196
|
def _df(self) -> 'pxt.dataframe.DataFrame':
|
|
@@ -455,7 +474,7 @@ class Table(SchemaObject):
|
|
|
455
474
|
|
|
456
475
|
def add_columns(
|
|
457
476
|
self,
|
|
458
|
-
schema: dict[str,
|
|
477
|
+
schema: dict[str, ts.ColumnType | builtins.type | _GenericAlias],
|
|
459
478
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
460
479
|
) -> UpdateStatus:
|
|
461
480
|
"""
|
|
@@ -529,7 +548,7 @@ class Table(SchemaObject):
|
|
|
529
548
|
self,
|
|
530
549
|
*,
|
|
531
550
|
if_exists: Literal['error', 'ignore', 'replace', 'replace_force'] = 'error',
|
|
532
|
-
**kwargs:
|
|
551
|
+
**kwargs: ts.ColumnType | builtins.type | _GenericAlias | exprs.Expr,
|
|
533
552
|
) -> UpdateStatus:
|
|
534
553
|
"""
|
|
535
554
|
Adds an ordinary (non-computed) column to the table.
|
|
@@ -774,7 +793,7 @@ class Table(SchemaObject):
|
|
|
774
793
|
cls._verify_column(col)
|
|
775
794
|
column_names.add(col.name)
|
|
776
795
|
|
|
777
|
-
def drop_column(self, column:
|
|
796
|
+
def drop_column(self, column: str | ColumnRef, if_not_exists: Literal['error', 'ignore'] = 'error') -> None:
|
|
778
797
|
"""Drop a column from the table.
|
|
779
798
|
|
|
780
799
|
Args:
|
|
@@ -815,21 +834,25 @@ class Table(SchemaObject):
|
|
|
815
834
|
if_not_exists_ = IfNotExistsParam.validated(if_not_exists, 'if_not_exists')
|
|
816
835
|
|
|
817
836
|
if isinstance(column, str):
|
|
818
|
-
col = self._tbl_version_path.get_column(column
|
|
837
|
+
col = self._tbl_version_path.get_column(column)
|
|
819
838
|
if col is None:
|
|
820
839
|
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
821
840
|
raise excs.Error(f'Column {column!r} unknown')
|
|
822
841
|
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
823
842
|
return
|
|
843
|
+
if col.tbl.id != self._tbl_version_path.tbl_id:
|
|
844
|
+
raise excs.Error(f'Cannot drop base table column {col.name!r}')
|
|
824
845
|
col = self._tbl_version.get().cols_by_name[column]
|
|
825
846
|
else:
|
|
826
|
-
exists = self._tbl_version_path.has_column(column.col
|
|
847
|
+
exists = self._tbl_version_path.has_column(column.col)
|
|
827
848
|
if not exists:
|
|
828
849
|
if if_not_exists_ == IfNotExistsParam.ERROR:
|
|
829
850
|
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
830
851
|
assert if_not_exists_ == IfNotExistsParam.IGNORE
|
|
831
852
|
return
|
|
832
853
|
col = column.col
|
|
854
|
+
if col.tbl.id != self._tbl_version_path.tbl_id:
|
|
855
|
+
raise excs.Error(f'Cannot drop base table column {col.name!r}')
|
|
833
856
|
|
|
834
857
|
dependent_user_cols = [c for c in cat.get_column_dependents(col.tbl.id, col.id) if c.name is not None]
|
|
835
858
|
if len(dependent_user_cols) > 0:
|
|
@@ -838,13 +861,32 @@ class Table(SchemaObject):
|
|
|
838
861
|
f'{", ".join(c.name for c in dependent_user_cols)}'
|
|
839
862
|
)
|
|
840
863
|
|
|
841
|
-
|
|
864
|
+
views = self._get_views(recursive=True, mutable_only=True)
|
|
865
|
+
|
|
866
|
+
# See if any view predicates depend on this column
|
|
867
|
+
dependent_views = []
|
|
868
|
+
for view in views:
|
|
869
|
+
if view._tbl_version is not None:
|
|
870
|
+
predicate = view._tbl_version.get().predicate
|
|
871
|
+
if predicate is not None:
|
|
872
|
+
for predicate_col in exprs.Expr.get_refd_column_ids(predicate.as_dict()):
|
|
873
|
+
if predicate_col.tbl_id == col.tbl.id and predicate_col.col_id == col.id:
|
|
874
|
+
dependent_views.append((view, predicate))
|
|
875
|
+
|
|
876
|
+
if len(dependent_views) > 0:
|
|
877
|
+
dependent_views_str = '\n'.join(
|
|
878
|
+
f'view: {view._path()}, predicate: {predicate!s}' for view, predicate in dependent_views
|
|
879
|
+
)
|
|
880
|
+
raise excs.Error(
|
|
881
|
+
f'Cannot drop column `{col.name}` because the following views depend on it:\n{dependent_views_str}'
|
|
882
|
+
)
|
|
883
|
+
|
|
842
884
|
# See if this column has a dependent store. We need to look through all stores in all
|
|
843
885
|
# (transitive) views of this table.
|
|
844
886
|
col_handle = col.handle
|
|
845
887
|
dependent_stores = [
|
|
846
888
|
(view, store)
|
|
847
|
-
for view in (self, *
|
|
889
|
+
for view in (self, *views)
|
|
848
890
|
for store in view._tbl_version.get().external_stores.values()
|
|
849
891
|
if col_handle in store.get_local_columns()
|
|
850
892
|
]
|
|
@@ -857,6 +899,12 @@ class Table(SchemaObject):
|
|
|
857
899
|
f'Cannot drop column `{col.name}` because the following external stores depend on it:\n'
|
|
858
900
|
f'{", ".join(dependent_store_names)}'
|
|
859
901
|
)
|
|
902
|
+
all_columns = self.columns()
|
|
903
|
+
if len(all_columns) == 1 and col.name == all_columns[0]:
|
|
904
|
+
raise excs.Error(
|
|
905
|
+
f'Cannot drop column `{col.name}` because it is the last remaining column in this table.'
|
|
906
|
+
f' Tables must have at least one column.'
|
|
907
|
+
)
|
|
860
908
|
|
|
861
909
|
self._tbl_version.get().drop_column(col)
|
|
862
910
|
|
|
@@ -896,7 +944,7 @@ class Table(SchemaObject):
|
|
|
896
944
|
|
|
897
945
|
def add_embedding_index(
|
|
898
946
|
self,
|
|
899
|
-
column:
|
|
947
|
+
column: str | ColumnRef,
|
|
900
948
|
*,
|
|
901
949
|
idx_name: Optional[str] = None,
|
|
902
950
|
embedding: Optional[pxt.Function] = None,
|
|
@@ -1023,7 +1071,7 @@ class Table(SchemaObject):
|
|
|
1023
1071
|
def drop_embedding_index(
|
|
1024
1072
|
self,
|
|
1025
1073
|
*,
|
|
1026
|
-
column:
|
|
1074
|
+
column: str | ColumnRef | None = None,
|
|
1027
1075
|
idx_name: Optional[str] = None,
|
|
1028
1076
|
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
1029
1077
|
) -> None:
|
|
@@ -1083,15 +1131,15 @@ class Table(SchemaObject):
|
|
|
1083
1131
|
|
|
1084
1132
|
self._drop_index(col=col, idx_name=idx_name, _idx_class=index.EmbeddingIndex, if_not_exists=if_not_exists)
|
|
1085
1133
|
|
|
1086
|
-
def _resolve_column_parameter(self, column:
|
|
1134
|
+
def _resolve_column_parameter(self, column: str | ColumnRef) -> Column:
|
|
1087
1135
|
"""Resolve a column parameter to a Column object"""
|
|
1088
1136
|
col: Column = None
|
|
1089
1137
|
if isinstance(column, str):
|
|
1090
|
-
col = self._tbl_version_path.get_column(column
|
|
1138
|
+
col = self._tbl_version_path.get_column(column)
|
|
1091
1139
|
if col is None:
|
|
1092
1140
|
raise excs.Error(f'Column {column!r} unknown')
|
|
1093
1141
|
elif isinstance(column, ColumnRef):
|
|
1094
|
-
exists = self._tbl_version_path.has_column(column.col
|
|
1142
|
+
exists = self._tbl_version_path.has_column(column.col)
|
|
1095
1143
|
if not exists:
|
|
1096
1144
|
raise excs.Error(f'Unknown column: {column.col.qualified_name}')
|
|
1097
1145
|
col = column.col
|
|
@@ -1102,7 +1150,7 @@ class Table(SchemaObject):
|
|
|
1102
1150
|
def drop_index(
|
|
1103
1151
|
self,
|
|
1104
1152
|
*,
|
|
1105
|
-
column:
|
|
1153
|
+
column: str | ColumnRef | None = None,
|
|
1106
1154
|
idx_name: Optional[str] = None,
|
|
1107
1155
|
if_not_exists: Literal['error', 'ignore'] = 'error',
|
|
1108
1156
|
) -> None:
|
|
@@ -1421,7 +1469,7 @@ class Table(SchemaObject):
|
|
|
1421
1469
|
return result
|
|
1422
1470
|
|
|
1423
1471
|
def recompute_columns(
|
|
1424
|
-
self, *columns:
|
|
1472
|
+
self, *columns: str | ColumnRef, errors_only: bool = False, cascade: bool = True
|
|
1425
1473
|
) -> UpdateStatus:
|
|
1426
1474
|
"""Recompute the values in one or more computed columns of this table.
|
|
1427
1475
|
|
|
@@ -1462,14 +1510,14 @@ class Table(SchemaObject):
|
|
|
1462
1510
|
col_name: str
|
|
1463
1511
|
col: Column
|
|
1464
1512
|
if isinstance(column, str):
|
|
1465
|
-
col = self._tbl_version_path.get_column(column
|
|
1513
|
+
col = self._tbl_version_path.get_column(column)
|
|
1466
1514
|
if col is None:
|
|
1467
1515
|
raise excs.Error(f'Unknown column: {column!r}')
|
|
1468
1516
|
col_name = column
|
|
1469
1517
|
else:
|
|
1470
1518
|
assert isinstance(column, ColumnRef)
|
|
1471
1519
|
col = column.col
|
|
1472
|
-
if not self._tbl_version_path.has_column(col
|
|
1520
|
+
if not self._tbl_version_path.has_column(col):
|
|
1473
1521
|
raise excs.Error(f'Unknown column: {col.name!r}')
|
|
1474
1522
|
col_name = col.name
|
|
1475
1523
|
if not col.is_computed:
|
|
@@ -1533,11 +1581,7 @@ class Table(SchemaObject):
|
|
|
1533
1581
|
env.Env.get().console_logger.info(f'Linked external store `{store.name}` to table `{self._name}`.')
|
|
1534
1582
|
|
|
1535
1583
|
def unlink_external_stores(
|
|
1536
|
-
self,
|
|
1537
|
-
stores: Optional[str | list[str]] = None,
|
|
1538
|
-
*,
|
|
1539
|
-
delete_external_data: bool = False,
|
|
1540
|
-
ignore_errors: bool = False,
|
|
1584
|
+
self, stores: str | list[str] | None = None, *, delete_external_data: bool = False, ignore_errors: bool = False
|
|
1541
1585
|
) -> None:
|
|
1542
1586
|
"""
|
|
1543
1587
|
Unlinks this table's external stores.
|
|
@@ -1579,7 +1623,7 @@ class Table(SchemaObject):
|
|
|
1579
1623
|
env.Env.get().console_logger.info(f'Unlinked external store from table `{self._name}`: {store_str}')
|
|
1580
1624
|
|
|
1581
1625
|
def sync(
|
|
1582
|
-
self, stores:
|
|
1626
|
+
self, stores: str | list[str] | None = None, *, export_data: bool = True, import_data: bool = True
|
|
1583
1627
|
) -> UpdateStatus:
|
|
1584
1628
|
"""
|
|
1585
1629
|
Synchronizes this table with its linked external stores.
|
|
@@ -1657,7 +1701,7 @@ class Table(SchemaObject):
|
|
|
1657
1701
|
from pixeltable.catalog import Catalog
|
|
1658
1702
|
|
|
1659
1703
|
if n is None:
|
|
1660
|
-
n =
|
|
1704
|
+
n = 1_000_000_000
|
|
1661
1705
|
if not isinstance(n, int) or n < 1:
|
|
1662
1706
|
raise excs.Error(f'Invalid value for n: {n}')
|
|
1663
1707
|
|
|
@@ -1709,3 +1753,72 @@ class Table(SchemaObject):
|
|
|
1709
1753
|
raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a snapshot.')
|
|
1710
1754
|
if self._tbl_version_path.is_replica():
|
|
1711
1755
|
raise excs.Error(f'{self._display_str()}: Cannot {op_descr} a {self._display_name()}.')
|
|
1756
|
+
|
|
1757
|
+
|
|
1758
|
+
class ColumnMetadata(TypedDict):
|
|
1759
|
+
"""Metadata for a column of a Pixeltable table."""
|
|
1760
|
+
|
|
1761
|
+
name: str
|
|
1762
|
+
"""The name of the column."""
|
|
1763
|
+
type_: str
|
|
1764
|
+
"""The type specifier of the column."""
|
|
1765
|
+
version_added: int
|
|
1766
|
+
"""The table version when this column was added."""
|
|
1767
|
+
is_stored: bool
|
|
1768
|
+
"""`True` if this is a stored column; `False` if it is dynamically computed."""
|
|
1769
|
+
is_primary_key: bool
|
|
1770
|
+
"""`True` if this column is part of the table's primary key."""
|
|
1771
|
+
media_validation: Optional[Literal['on_read', 'on_write']]
|
|
1772
|
+
"""The media validation policy for this column."""
|
|
1773
|
+
computed_with: Optional[str]
|
|
1774
|
+
"""Expression used to compute this column; `None` if this is not a computed column."""
|
|
1775
|
+
|
|
1776
|
+
|
|
1777
|
+
class IndexMetadata(TypedDict):
|
|
1778
|
+
"""Metadata for a column of a Pixeltable table."""
|
|
1779
|
+
|
|
1780
|
+
name: str
|
|
1781
|
+
"""The name of the index."""
|
|
1782
|
+
columns: list[str]
|
|
1783
|
+
"""The table columns that are indexed."""
|
|
1784
|
+
index_type: Literal['embedding']
|
|
1785
|
+
"""The type of index (currently only `'embedding'` is supported, but others will be added in the future)."""
|
|
1786
|
+
parameters: EmbeddingIndexParams
|
|
1787
|
+
|
|
1788
|
+
|
|
1789
|
+
class EmbeddingIndexParams(TypedDict):
|
|
1790
|
+
metric: Literal['cosine', 'ip', 'l2']
|
|
1791
|
+
"""Index metric."""
|
|
1792
|
+
embeddings: list[str]
|
|
1793
|
+
"""List of embeddings defined for this index."""
|
|
1794
|
+
|
|
1795
|
+
|
|
1796
|
+
class TableMetadata(TypedDict):
|
|
1797
|
+
"""Metadata for a Pixeltable table."""
|
|
1798
|
+
|
|
1799
|
+
name: str
|
|
1800
|
+
"""The name of the table (ex: `'my_table'`)."""
|
|
1801
|
+
path: str
|
|
1802
|
+
"""The full path of the table (ex: `'my_dir.my_subdir.my_table'`)."""
|
|
1803
|
+
columns: dict[str, ColumnMetadata]
|
|
1804
|
+
"""Column metadata for all of the visible columns of the table."""
|
|
1805
|
+
indices: dict[str, IndexMetadata]
|
|
1806
|
+
"""Index metadata for all of the indices of the table."""
|
|
1807
|
+
is_replica: bool
|
|
1808
|
+
"""`True` if this table is a replica of another (shared) table."""
|
|
1809
|
+
is_view: bool
|
|
1810
|
+
"""`True` if this table is a view."""
|
|
1811
|
+
is_snapshot: bool
|
|
1812
|
+
"""`True` if this table is a snapshot."""
|
|
1813
|
+
version: int
|
|
1814
|
+
"""The current version of the table."""
|
|
1815
|
+
version_created: datetime.datetime
|
|
1816
|
+
"""The timestamp when this table version was created."""
|
|
1817
|
+
schema_version: int
|
|
1818
|
+
"""The current schema version of the table."""
|
|
1819
|
+
comment: Optional[str]
|
|
1820
|
+
"""User-provided table comment, if one exists."""
|
|
1821
|
+
media_validation: Literal['on_read', 'on_write']
|
|
1822
|
+
"""The media validation policy for this table."""
|
|
1823
|
+
base: Optional[str]
|
|
1824
|
+
"""If this table is a view or snapshot, the full path of its base table; otherwise `None`."""
|