pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import sqlalchemy as sql
|
|
5
|
+
|
|
6
|
+
from pixeltable import type_system as ts
|
|
7
|
+
from pixeltable.metadata import register_converter
|
|
8
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@register_converter(version=43)
|
|
12
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
13
|
+
"""Converts ArrayTypes by replacing legacy dtype (which was a pxt Type ID) to numpy dtype."""
|
|
14
|
+
convert_table_md(engine, substitution_fn=_substitution_fn)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _substitution_fn(key: str | None, value: Any) -> tuple[str | None, Any] | None:
|
|
18
|
+
if not isinstance(value, dict):
|
|
19
|
+
return None
|
|
20
|
+
if value.get('_classname', None) != 'ArrayType':
|
|
21
|
+
return None
|
|
22
|
+
if 'numpy_dtype' in value:
|
|
23
|
+
return None
|
|
24
|
+
assert 'dtype' in value
|
|
25
|
+
|
|
26
|
+
legacy_dtype_val = value['dtype']
|
|
27
|
+
new_dtype: np.dtype | None
|
|
28
|
+
if legacy_dtype_val is None:
|
|
29
|
+
new_dtype = None
|
|
30
|
+
else:
|
|
31
|
+
legacy_dtype = ts.ColumnType.Type(legacy_dtype_val)
|
|
32
|
+
new_dtype = ts.ArrayType.pxt_dtype_to_numpy_dtype.get(legacy_dtype, None)
|
|
33
|
+
if new_dtype is None:
|
|
34
|
+
raise ValueError(f'Unrecognized dtype: {legacy_dtype_val} ({legacy_dtype}) in {key}, {value}')
|
|
35
|
+
|
|
36
|
+
del value['dtype']
|
|
37
|
+
if new_dtype is None:
|
|
38
|
+
value['numpy_dtype'] = None
|
|
39
|
+
elif new_dtype == np.str_:
|
|
40
|
+
# str(np.str_) would be something like '<U'
|
|
41
|
+
value['numpy_dtype'] = 'str'
|
|
42
|
+
else:
|
|
43
|
+
value['numpy_dtype'] = str(new_dtype)
|
|
44
|
+
return key, value
|
|
@@ -1,21 +1,22 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import logging
|
|
3
|
-
from typing import Any, Callable,
|
|
3
|
+
from typing import Any, Callable, TypeVar
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
7
7
|
|
|
8
|
-
from pixeltable.metadata.schema import Function, Table, TableSchemaVersion
|
|
8
|
+
from pixeltable.metadata.schema import Function, Table, TableSchemaVersion
|
|
9
9
|
|
|
10
10
|
__logger = logging.getLogger('pixeltable')
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
def convert_table_md(
|
|
14
14
|
engine: sql.engine.Engine,
|
|
15
|
-
table_md_updater:
|
|
16
|
-
column_md_updater:
|
|
17
|
-
external_store_md_updater:
|
|
18
|
-
substitution_fn:
|
|
15
|
+
table_md_updater: Callable[[dict, UUID], None] | None = None,
|
|
16
|
+
column_md_updater: Callable[[dict], None] | None = None,
|
|
17
|
+
external_store_md_updater: Callable[[dict], None] | None = None,
|
|
18
|
+
substitution_fn: Callable[[str | None, Any], tuple[str | None, Any] | None] | None = None,
|
|
19
|
+
table_modifier: Callable[[sql.Connection, UUID, dict, dict], None] | None = None,
|
|
19
20
|
) -> None:
|
|
20
21
|
"""
|
|
21
22
|
Converts schema.TableMd dicts based on the specified conversion functions.
|
|
@@ -33,9 +34,10 @@ def convert_table_md(
|
|
|
33
34
|
the original entry will be replaced, and the traversal will continue with `v'`.
|
|
34
35
|
"""
|
|
35
36
|
with engine.begin() as conn:
|
|
36
|
-
|
|
37
|
+
# avoid a SELECT * here, which breaks when we add new columns to Table
|
|
38
|
+
for row in conn.execute(sql.select(Table.id, Table.md)):
|
|
37
39
|
tbl_id = row[0]
|
|
38
|
-
table_md = row[
|
|
40
|
+
table_md = row[1]
|
|
39
41
|
assert isinstance(table_md, dict)
|
|
40
42
|
updated_table_md = copy.deepcopy(table_md)
|
|
41
43
|
if table_md_updater is not None:
|
|
@@ -49,6 +51,8 @@ def convert_table_md(
|
|
|
49
51
|
if updated_table_md != table_md:
|
|
50
52
|
__logger.info(f'Updating schema for table: {tbl_id}')
|
|
51
53
|
conn.execute(sql.update(Table).where(Table.id == tbl_id).values(md=updated_table_md))
|
|
54
|
+
if table_modifier is not None:
|
|
55
|
+
table_modifier(conn, tbl_id, table_md, updated_table_md)
|
|
52
56
|
|
|
53
57
|
for row in conn.execute(sql.select(Function)):
|
|
54
58
|
fn_id = row[0]
|
|
@@ -76,9 +80,7 @@ def __update_external_store_md(table_md: dict, external_store_md_updater: Callab
|
|
|
76
80
|
external_store_md_updater(store_md)
|
|
77
81
|
|
|
78
82
|
|
|
79
|
-
def __substitute_md_rec(
|
|
80
|
-
md: Any, substitution_fn: Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]
|
|
81
|
-
) -> Any:
|
|
83
|
+
def __substitute_md_rec(md: Any, substitution_fn: Callable[[str | None, Any], tuple[str | None, Any] | None]) -> Any:
|
|
82
84
|
if isinstance(md, dict):
|
|
83
85
|
updated_dict: dict[str, Any] = {}
|
|
84
86
|
for k, v in md.items():
|
|
@@ -106,8 +108,8 @@ def __substitute_md_rec(
|
|
|
106
108
|
|
|
107
109
|
def convert_table_schema_version_md(
|
|
108
110
|
engine: sql.engine.Engine,
|
|
109
|
-
table_schema_version_md_updater:
|
|
110
|
-
schema_column_updater:
|
|
111
|
+
table_schema_version_md_updater: Callable[[dict], None] | None = None,
|
|
112
|
+
schema_column_updater: Callable[[dict], None] | None = None,
|
|
111
113
|
) -> None:
|
|
112
114
|
"""
|
|
113
115
|
Converts schema.TableSchemaVersionMd dicts based on the specified conversion functions.
|
|
@@ -145,26 +147,13 @@ def __update_schema_column(table_schema_version_md: dict, schema_column_updater:
|
|
|
145
147
|
schema_column_updater(schema_col)
|
|
146
148
|
|
|
147
149
|
|
|
148
|
-
|
|
149
|
-
with sql.orm.Session(engine, future=True) as session:
|
|
150
|
-
for record in session.query(Table).all():
|
|
151
|
-
table_record_updater(record)
|
|
152
|
-
session.commit()
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
def convert_table_version_record(
|
|
156
|
-
engine: sql.engine.Engine, table_version_record_updater: Optional[Callable[[TableVersion], None]]
|
|
157
|
-
) -> None:
|
|
158
|
-
with sql.orm.Session(engine, future=True) as session:
|
|
159
|
-
for record in session.query(TableVersion).all():
|
|
160
|
-
table_version_record_updater(record)
|
|
161
|
-
session.commit()
|
|
150
|
+
T = TypeVar('T')
|
|
162
151
|
|
|
163
152
|
|
|
164
|
-
def
|
|
165
|
-
engine: sql.engine.Engine,
|
|
153
|
+
def convert_sql_table_record(
|
|
154
|
+
schema: type[T], engine: sql.engine.Engine, record_updater: Callable[[T], None] | None
|
|
166
155
|
) -> None:
|
|
167
156
|
with sql.orm.Session(engine, future=True) as session:
|
|
168
|
-
for record in session.query(
|
|
169
|
-
|
|
157
|
+
for record in session.query(schema).all():
|
|
158
|
+
record_updater(record)
|
|
170
159
|
session.commit()
|
pixeltable/metadata/notes.py
CHANGED
|
@@ -2,6 +2,15 @@
|
|
|
2
2
|
# rather than as a comment, so that the existence of a description can be enforced by
|
|
3
3
|
# the unit tests when new versions are added.
|
|
4
4
|
VERSION_NOTES = {
|
|
5
|
+
44: 'ArrayType dtype migration from pxt types to numpy dtypes',
|
|
6
|
+
43: 'Changing tables.dir_id to nullable',
|
|
7
|
+
42: 'Add additional_md columns to metadata tables',
|
|
8
|
+
41: 'Cellmd columns for array and json columns',
|
|
9
|
+
40: 'Convert error property columns to cellmd columns',
|
|
10
|
+
39: 'ColumnHandles in external stores',
|
|
11
|
+
38: 'Added TableMd.view_sn',
|
|
12
|
+
37: 'Add support for the sample() method on DataFrames',
|
|
13
|
+
36: 'Added Table.lock_dummy',
|
|
5
14
|
35: 'Track reference_tbl in ColumnRef',
|
|
6
15
|
34: 'Set default value for is_pk field in column metadata to False',
|
|
7
16
|
33: 'Add is_replica field to table metadata',
|
pixeltable/metadata/schema.py
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
import dataclasses
|
|
2
|
+
import types
|
|
2
3
|
import typing
|
|
3
4
|
import uuid
|
|
4
|
-
from
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from typing import Any, TypeVar, Union, get_type_hints
|
|
5
7
|
|
|
6
8
|
import sqlalchemy as sql
|
|
7
9
|
from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary, orm
|
|
8
10
|
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
|
9
11
|
from sqlalchemy.orm.decl_api import DeclarativeMeta
|
|
10
12
|
|
|
13
|
+
from ..catalog.update_status import UpdateStatus
|
|
14
|
+
|
|
11
15
|
# Base has to be marked explicitly as a type, in order to be used elsewhere as a type hint. But in addition to being
|
|
12
16
|
# a type, it's also a `DeclarativeMeta`. The following pattern enables us to expose both `Base` and `Base.metadata`
|
|
13
17
|
# outside of the module in a typesafe way.
|
|
@@ -18,17 +22,17 @@ base_metadata = Base.metadata
|
|
|
18
22
|
T = TypeVar('T')
|
|
19
23
|
|
|
20
24
|
|
|
21
|
-
def md_from_dict(
|
|
25
|
+
def md_from_dict(type_: type[T], data: Any) -> T:
|
|
22
26
|
"""Re-instantiate a dataclass instance that contains nested dataclasses from a dict."""
|
|
23
|
-
if dataclasses.is_dataclass(
|
|
24
|
-
fieldtypes = get_type_hints(
|
|
25
|
-
return
|
|
27
|
+
if dataclasses.is_dataclass(type_):
|
|
28
|
+
fieldtypes = get_type_hints(type_)
|
|
29
|
+
return type_(**{f: md_from_dict(fieldtypes[f], data[f]) for f in data})
|
|
26
30
|
|
|
27
|
-
origin = typing.get_origin(
|
|
31
|
+
origin = typing.get_origin(type_)
|
|
28
32
|
if origin is not None:
|
|
29
|
-
type_args = typing.get_args(
|
|
30
|
-
if origin is Union and type(None) in type_args:
|
|
31
|
-
#
|
|
33
|
+
type_args = typing.get_args(type_)
|
|
34
|
+
if (origin is Union or origin is types.UnionType) and type(None) in type_args:
|
|
35
|
+
# handling T | None, T | None
|
|
32
36
|
non_none_args = [arg for arg in type_args if arg is not type(None)]
|
|
33
37
|
assert len(non_none_args) == 1
|
|
34
38
|
return md_from_dict(non_none_args[0], data) if data is not None else None
|
|
@@ -42,10 +46,18 @@ def md_from_dict(data_class_type: type[T], data: Any) -> T:
|
|
|
42
46
|
return tuple(md_from_dict(arg_type, elem) for arg_type, elem in zip(type_args, data)) # type: ignore[return-value]
|
|
43
47
|
else:
|
|
44
48
|
raise AssertionError(origin)
|
|
49
|
+
elif isinstance(type_, type) and issubclass(type_, Enum):
|
|
50
|
+
return type_(data)
|
|
45
51
|
else:
|
|
46
52
|
return data
|
|
47
53
|
|
|
48
54
|
|
|
55
|
+
def _md_dict_factory(data: list[tuple[str, Any]]) -> dict:
|
|
56
|
+
"""Use this to serialize <>Md instances with dataclasses.asdict()"""
|
|
57
|
+
# serialize enums to their values
|
|
58
|
+
return {k: v.value if isinstance(v, Enum) else v for k, v in data}
|
|
59
|
+
|
|
60
|
+
|
|
49
61
|
# structure of the stored metadata:
|
|
50
62
|
# - each schema entity that grows somehow proportionally to the data (# of output_rows, total insert operations,
|
|
51
63
|
# number of schema changes) gets its own table
|
|
@@ -65,6 +77,7 @@ class SystemInfo(Base):
|
|
|
65
77
|
"""A single-row table that contains system-wide metadata."""
|
|
66
78
|
|
|
67
79
|
__tablename__ = 'systeminfo'
|
|
80
|
+
|
|
68
81
|
dummy = sql.Column(Integer, primary_key=True, default=0, nullable=False)
|
|
69
82
|
md = sql.Column(JSONB, nullable=False) # SystemInfoMd
|
|
70
83
|
|
|
@@ -72,8 +85,8 @@ class SystemInfo(Base):
|
|
|
72
85
|
@dataclasses.dataclass
|
|
73
86
|
class DirMd:
|
|
74
87
|
name: str
|
|
75
|
-
user:
|
|
76
|
-
additional_md: dict[str, Any]
|
|
88
|
+
user: str | None
|
|
89
|
+
additional_md: dict[str, Any] # deprecated
|
|
77
90
|
|
|
78
91
|
|
|
79
92
|
class Dir(Base):
|
|
@@ -84,7 +97,9 @@ class Dir(Base):
|
|
|
84
97
|
)
|
|
85
98
|
parent_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
|
|
86
99
|
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # DirMd
|
|
87
|
-
|
|
100
|
+
additional_md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False, default=dict)
|
|
101
|
+
|
|
102
|
+
# used to force acquisition of an X-lock via an Update stmt
|
|
88
103
|
lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
|
|
89
104
|
|
|
90
105
|
|
|
@@ -100,17 +115,20 @@ class ColumnMd:
|
|
|
100
115
|
|
|
101
116
|
id: int
|
|
102
117
|
schema_version_add: int
|
|
103
|
-
schema_version_drop:
|
|
118
|
+
schema_version_drop: int | None
|
|
104
119
|
col_type: dict
|
|
105
120
|
|
|
106
121
|
# if True, is part of the primary key
|
|
107
122
|
is_pk: bool
|
|
108
123
|
|
|
109
124
|
# if set, this is a computed column
|
|
110
|
-
value_expr:
|
|
125
|
+
value_expr: dict | None
|
|
111
126
|
|
|
112
127
|
# if True, the column is present in the stored table
|
|
113
|
-
stored:
|
|
128
|
+
stored: bool | None
|
|
129
|
+
|
|
130
|
+
# If present, the URI for the destination for column values
|
|
131
|
+
destination: str | None = None
|
|
114
132
|
|
|
115
133
|
|
|
116
134
|
@dataclasses.dataclass
|
|
@@ -126,13 +144,13 @@ class IndexMd:
|
|
|
126
144
|
index_val_col_id: int # column holding the values to be indexed
|
|
127
145
|
index_val_undo_col_id: int # column holding index values for deleted rows
|
|
128
146
|
schema_version_add: int
|
|
129
|
-
schema_version_drop:
|
|
147
|
+
schema_version_drop: int | None
|
|
130
148
|
class_fqn: str
|
|
131
149
|
init_args: dict[str, Any]
|
|
132
150
|
|
|
133
151
|
|
|
134
152
|
# a stored table version path is a list of (table id as str, effective table version)
|
|
135
|
-
TableVersionPath = list[tuple[str,
|
|
153
|
+
TableVersionPath = list[tuple[str, int | None]]
|
|
136
154
|
|
|
137
155
|
|
|
138
156
|
@dataclasses.dataclass
|
|
@@ -144,13 +162,36 @@ class ViewMd:
|
|
|
144
162
|
base_versions: TableVersionPath
|
|
145
163
|
|
|
146
164
|
# filter predicate applied to the base table; view-only
|
|
147
|
-
predicate:
|
|
165
|
+
predicate: dict[str, Any] | None
|
|
166
|
+
|
|
167
|
+
# sampling predicate applied to the base table; view-only
|
|
168
|
+
sample_clause: dict[str, Any] | None
|
|
148
169
|
|
|
149
170
|
# ComponentIterator subclass; only for component views
|
|
150
|
-
iterator_class_fqn:
|
|
171
|
+
iterator_class_fqn: str | None
|
|
151
172
|
|
|
152
173
|
# args to pass to the iterator class constructor; only for component views
|
|
153
|
-
iterator_args:
|
|
174
|
+
iterator_args: dict[str, Any] | None
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class TableState(Enum):
|
|
178
|
+
"""The operational state of the table"""
|
|
179
|
+
|
|
180
|
+
LIVE = 0
|
|
181
|
+
ROLLFORWARD = 1 # finalizing pending table ops
|
|
182
|
+
ROLLBACK = 2 # rolling back pending table ops
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class TableStatement(Enum):
|
|
186
|
+
"""The top-level DDL/DML operation (corresponding to a statement in SQL; not: a TableOp) currently being executed"""
|
|
187
|
+
|
|
188
|
+
CREATE_TABLE = 0
|
|
189
|
+
CREATE_VIEW = 1
|
|
190
|
+
DROP_TABLE = 2
|
|
191
|
+
ADD_COLUMNS = 3
|
|
192
|
+
DROP_COLUMNS = 4
|
|
193
|
+
ADD_INDEX = 5
|
|
194
|
+
DROP_INDEX = 6
|
|
154
195
|
|
|
155
196
|
|
|
156
197
|
@dataclasses.dataclass
|
|
@@ -159,7 +200,7 @@ class TableMd:
|
|
|
159
200
|
name: str
|
|
160
201
|
is_replica: bool
|
|
161
202
|
|
|
162
|
-
user:
|
|
203
|
+
user: str | None
|
|
163
204
|
|
|
164
205
|
# monotonically increasing w/in Table for both data and schema changes, starting at 0
|
|
165
206
|
current_version: int
|
|
@@ -173,14 +214,52 @@ class TableMd:
|
|
|
173
214
|
# - every row is assigned a unique and immutable rowid on insertion
|
|
174
215
|
next_row_id: int
|
|
175
216
|
|
|
217
|
+
# sequence number to track changes in the set of mutable views of this table (ie, this table = the view base)
|
|
218
|
+
# - incremented for each add/drop of a mutable view
|
|
219
|
+
# - only maintained for mutable tables
|
|
220
|
+
# TODO: replace with mutable_views: list[UUID] to help with debugging
|
|
221
|
+
view_sn: int
|
|
222
|
+
|
|
176
223
|
# Metadata format for external stores:
|
|
177
224
|
# {'class': 'pixeltable.io.label_studio.LabelStudioProject', 'md': {'project_id': 3}}
|
|
178
225
|
external_stores: list[dict[str, Any]]
|
|
179
226
|
|
|
180
227
|
column_md: dict[int, ColumnMd] # col_id -> ColumnMd
|
|
181
228
|
index_md: dict[int, IndexMd] # index_id -> IndexMd
|
|
182
|
-
view_md:
|
|
183
|
-
|
|
229
|
+
view_md: ViewMd | None
|
|
230
|
+
# TODO: Remove additional_md from this and other Md dataclasses (and switch to using the separate additional_md
|
|
231
|
+
# column in all cases)
|
|
232
|
+
additional_md: dict[str, Any] # deprecated
|
|
233
|
+
|
|
234
|
+
# deprecated
|
|
235
|
+
has_pending_ops: bool = False
|
|
236
|
+
|
|
237
|
+
tbl_state: TableState = TableState.LIVE
|
|
238
|
+
pending_stmt: TableStatement | None = None
|
|
239
|
+
|
|
240
|
+
@property
|
|
241
|
+
def is_snapshot(self) -> bool:
|
|
242
|
+
return self.view_md is not None and self.view_md.is_snapshot
|
|
243
|
+
|
|
244
|
+
@property
|
|
245
|
+
def is_mutable(self) -> bool:
|
|
246
|
+
return not self.is_snapshot and not self.is_replica
|
|
247
|
+
|
|
248
|
+
@property
|
|
249
|
+
def is_pure_snapshot(self) -> bool:
|
|
250
|
+
return (
|
|
251
|
+
self.view_md is not None
|
|
252
|
+
and self.view_md.is_snapshot
|
|
253
|
+
and self.view_md.sample_clause is None
|
|
254
|
+
and self.view_md.predicate is None
|
|
255
|
+
and len(self.column_md) == 0
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
@property
|
|
259
|
+
def ancestors(self) -> TableVersionPath:
|
|
260
|
+
if self.view_md is None:
|
|
261
|
+
return []
|
|
262
|
+
return self.view_md.base_versions
|
|
184
263
|
|
|
185
264
|
|
|
186
265
|
class Table(Base):
|
|
@@ -190,6 +269,8 @@ class Table(Base):
|
|
|
190
269
|
Views are in essence a subclass of tables, because they also store materialized columns. The differences are:
|
|
191
270
|
- views have a base, which is either a (live) table or a snapshot
|
|
192
271
|
- views can have a filter predicate
|
|
272
|
+
|
|
273
|
+
dir_id: NULL for dropped tables
|
|
193
274
|
"""
|
|
194
275
|
|
|
195
276
|
__tablename__ = 'tables'
|
|
@@ -197,26 +278,37 @@ class Table(Base):
|
|
|
197
278
|
MAX_VERSION = 9223372036854775807 # 2^63 - 1
|
|
198
279
|
|
|
199
280
|
id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), primary_key=True, nullable=False)
|
|
200
|
-
dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=
|
|
281
|
+
dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
|
|
201
282
|
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # TableMd
|
|
283
|
+
additional_md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False, default=dict)
|
|
284
|
+
|
|
285
|
+
# used to force acquisition of an X-lock via an Update stmt
|
|
286
|
+
lock_dummy: orm.Mapped[int] = orm.mapped_column(BigInteger, nullable=True)
|
|
202
287
|
|
|
203
288
|
|
|
204
289
|
@dataclasses.dataclass
|
|
205
|
-
class
|
|
290
|
+
class VersionMd:
|
|
206
291
|
tbl_id: str # uuid.UUID
|
|
207
292
|
created_at: float # time.time()
|
|
208
293
|
version: int
|
|
209
294
|
schema_version: int
|
|
210
|
-
|
|
295
|
+
user: str | None = None # User that created this version
|
|
296
|
+
update_status: UpdateStatus | None = None # UpdateStatus of the change that created this version
|
|
297
|
+
# A version fragment cannot be queried or instantiated via get_table(). A fragment represents a version of a
|
|
298
|
+
# replica table that has incomplete data, and exists only to provide base table support for a dependent view.
|
|
299
|
+
is_fragment: bool = False
|
|
300
|
+
additional_md: dict[str, Any] = dataclasses.field(default_factory=dict) # deprecated
|
|
211
301
|
|
|
212
302
|
|
|
213
303
|
class TableVersion(Base):
|
|
214
304
|
__tablename__ = 'tableversions'
|
|
305
|
+
|
|
215
306
|
tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(
|
|
216
307
|
UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False
|
|
217
308
|
)
|
|
218
309
|
version: orm.Mapped[int] = orm.mapped_column(BigInteger, primary_key=True, nullable=False)
|
|
219
310
|
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False)
|
|
311
|
+
additional_md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False, default=dict)
|
|
220
312
|
|
|
221
313
|
|
|
222
314
|
@dataclasses.dataclass
|
|
@@ -230,18 +322,18 @@ class SchemaColumn:
|
|
|
230
322
|
|
|
231
323
|
# media validation strategy of this particular media column; if not set, TableMd.media_validation applies
|
|
232
324
|
# stores column.MediaValiation.name.lower()
|
|
233
|
-
media_validation:
|
|
325
|
+
media_validation: str | None
|
|
234
326
|
|
|
235
327
|
|
|
236
328
|
@dataclasses.dataclass
|
|
237
|
-
class
|
|
329
|
+
class SchemaVersionMd:
|
|
238
330
|
"""
|
|
239
331
|
Records all versioned table metadata.
|
|
240
332
|
"""
|
|
241
333
|
|
|
242
334
|
tbl_id: str # uuid.UUID
|
|
243
335
|
schema_version: int
|
|
244
|
-
preceding_schema_version:
|
|
336
|
+
preceding_schema_version: int | None
|
|
245
337
|
columns: dict[int, SchemaColumn] # col_id -> SchemaColumn
|
|
246
338
|
num_retained_versions: int
|
|
247
339
|
comment: str
|
|
@@ -249,7 +341,7 @@ class TableSchemaVersionMd:
|
|
|
249
341
|
# default validation strategy for any media column of this table
|
|
250
342
|
# stores column.MediaValiation.name.lower()
|
|
251
343
|
media_validation: str
|
|
252
|
-
additional_md: dict[str, Any]
|
|
344
|
+
additional_md: dict[str, Any] # deprecated
|
|
253
345
|
|
|
254
346
|
|
|
255
347
|
# versioning: each table schema change results in a new record
|
|
@@ -261,6 +353,23 @@ class TableSchemaVersion(Base):
|
|
|
261
353
|
)
|
|
262
354
|
schema_version: orm.Mapped[int] = orm.mapped_column(BigInteger, primary_key=True, nullable=False)
|
|
263
355
|
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # TableSchemaVersionMd
|
|
356
|
+
additional_md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False, default=dict)
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
class PendingTableOp(Base):
|
|
360
|
+
"""
|
|
361
|
+
Table operation that needs to be completed before the table can be used.
|
|
362
|
+
|
|
363
|
+
Operations need to be completed in order of increasing seq_num.
|
|
364
|
+
"""
|
|
365
|
+
|
|
366
|
+
__tablename__ = 'pendingtableops'
|
|
367
|
+
|
|
368
|
+
tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(
|
|
369
|
+
UUID(as_uuid=True), ForeignKey('tables.id'), primary_key=True, nullable=False
|
|
370
|
+
)
|
|
371
|
+
op_sn: orm.Mapped[int] = orm.mapped_column(Integer, primary_key=True, nullable=False) # catalog.TableOp.op_sn
|
|
372
|
+
op: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # catalog.TableOp
|
|
264
373
|
|
|
265
374
|
|
|
266
375
|
@dataclasses.dataclass
|
|
@@ -288,26 +397,4 @@ class Function(Base):
|
|
|
288
397
|
)
|
|
289
398
|
dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
|
|
290
399
|
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # FunctionMd
|
|
291
|
-
binary_obj: orm.Mapped[
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
class FullTableMd(NamedTuple):
|
|
295
|
-
tbl_md: TableMd
|
|
296
|
-
version_md: TableVersionMd
|
|
297
|
-
schema_version_md: TableSchemaVersionMd
|
|
298
|
-
|
|
299
|
-
def as_dict(self) -> dict[str, Any]:
|
|
300
|
-
return {
|
|
301
|
-
'table_id': self.tbl_md.tbl_id,
|
|
302
|
-
'table_md': dataclasses.asdict(self.tbl_md),
|
|
303
|
-
'table_version_md': dataclasses.asdict(self.version_md),
|
|
304
|
-
'table_schema_version_md': dataclasses.asdict(self.schema_version_md),
|
|
305
|
-
}
|
|
306
|
-
|
|
307
|
-
@classmethod
|
|
308
|
-
def from_dict(cls, data_dict: dict[str, Any]) -> 'FullTableMd':
|
|
309
|
-
return FullTableMd(
|
|
310
|
-
tbl_md=md_from_dict(TableMd, data_dict['table_md']),
|
|
311
|
-
version_md=md_from_dict(TableVersionMd, data_dict['table_version_md']),
|
|
312
|
-
schema_version_md=md_from_dict(TableSchemaVersionMd, data_dict['table_schema_version_md']),
|
|
313
|
-
)
|
|
400
|
+
binary_obj: orm.Mapped[bytes | None] = orm.mapped_column(LargeBinary, nullable=True)
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pixeltable.metadata import schema
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MetadataUtils:
|
|
7
|
+
@classmethod
|
|
8
|
+
def _diff_md(
|
|
9
|
+
cls, old_md: dict[int, schema.SchemaColumn] | None, new_md: dict[int, schema.SchemaColumn] | None
|
|
10
|
+
) -> str:
|
|
11
|
+
"""Return a string reporting the differences in a specific entry in two dictionaries
|
|
12
|
+
|
|
13
|
+
Results are formatted as follows:
|
|
14
|
+
- If `old_md` is `None`, returns 'Initial Version'.
|
|
15
|
+
- If `old_md` and `new_md` are the same, returns an empty string.
|
|
16
|
+
- If there are additions, changes, or deletions, returns a string summarizing the changes.
|
|
17
|
+
"""
|
|
18
|
+
assert new_md is not None
|
|
19
|
+
if old_md is None:
|
|
20
|
+
return 'Initial Version'
|
|
21
|
+
if old_md == new_md:
|
|
22
|
+
return ''
|
|
23
|
+
added = {k: v.name for k, v in new_md.items() if k not in old_md}
|
|
24
|
+
changed = {
|
|
25
|
+
k: f'{old_md[k].name!r} to {v.name!r}'
|
|
26
|
+
for k, v in new_md.items()
|
|
27
|
+
if k in old_md and old_md[k].name != v.name
|
|
28
|
+
}
|
|
29
|
+
deleted = {k: v.name for k, v in old_md.items() if k not in new_md}
|
|
30
|
+
if len(added) == 0 and len(changed) == 0 and len(deleted) == 0:
|
|
31
|
+
return ''
|
|
32
|
+
# Format the result
|
|
33
|
+
t = []
|
|
34
|
+
if len(added) > 0:
|
|
35
|
+
t.append('Added: ' + ', '.join(added.values()))
|
|
36
|
+
if len(changed) > 0:
|
|
37
|
+
t.append('Renamed: ' + ', '.join(changed.values()))
|
|
38
|
+
if len(deleted) > 0:
|
|
39
|
+
t.append('Deleted: ' + ', '.join(deleted.values()))
|
|
40
|
+
r = ', '.join(t)
|
|
41
|
+
return r
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def _create_md_change_dict(cls, md_list: list[tuple[int, dict[int, schema.SchemaColumn]]] | None) -> dict[int, str]:
|
|
45
|
+
"""Return a dictionary of schema changes by version
|
|
46
|
+
Args:
|
|
47
|
+
md_list: a list of tuples, each containing a version number and a metadata dictionary.
|
|
48
|
+
"""
|
|
49
|
+
r: dict[int, str] = {}
|
|
50
|
+
if md_list is None or len(md_list) == 0:
|
|
51
|
+
return r
|
|
52
|
+
|
|
53
|
+
# Sort the list in place by version number
|
|
54
|
+
md_list.sort()
|
|
55
|
+
|
|
56
|
+
first_retrieved_version = md_list[0][0]
|
|
57
|
+
if first_retrieved_version == 0:
|
|
58
|
+
prev_md = None
|
|
59
|
+
prev_ver = -1
|
|
60
|
+
start = 0
|
|
61
|
+
else:
|
|
62
|
+
prev_md = md_list[0][1]
|
|
63
|
+
prev_ver = first_retrieved_version
|
|
64
|
+
start = 1
|
|
65
|
+
|
|
66
|
+
for ver, curr_md in md_list[start:]:
|
|
67
|
+
if ver == prev_ver:
|
|
68
|
+
continue
|
|
69
|
+
assert ver > prev_ver
|
|
70
|
+
tf = cls._diff_md(prev_md, curr_md)
|
|
71
|
+
if tf != '':
|
|
72
|
+
r[ver] = tf
|
|
73
|
+
prev_md = curr_md
|
|
74
|
+
return r
|