pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/metadata/__init__.py
CHANGED
|
@@ -8,21 +8,24 @@ from typing import Callable
|
|
|
8
8
|
import sqlalchemy as sql
|
|
9
9
|
from sqlalchemy import orm
|
|
10
10
|
|
|
11
|
+
import pixeltable as pxt
|
|
12
|
+
import pixeltable.exceptions as excs
|
|
11
13
|
from pixeltable.utils.console_output import ConsoleLogger
|
|
12
14
|
|
|
13
15
|
from .schema import SystemInfo, SystemInfoMd
|
|
14
16
|
|
|
15
17
|
_console_logger = ConsoleLogger(logging.getLogger('pixeltable'))
|
|
16
|
-
|
|
18
|
+
_logger = logging.getLogger('pixeltable')
|
|
17
19
|
|
|
18
20
|
# current version of the metadata; this is incremented whenever the metadata schema changes
|
|
19
|
-
VERSION =
|
|
21
|
+
VERSION = 44
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
def create_system_info(engine: sql.engine.Engine) -> None:
|
|
23
25
|
"""Create the system metadata record"""
|
|
24
26
|
system_md = SystemInfoMd(schema_version=VERSION)
|
|
25
27
|
record = SystemInfo(md=dataclasses.asdict(system_md))
|
|
28
|
+
_logger.debug(f'Creating pixeltable system info record {record}')
|
|
26
29
|
with orm.Session(engine, future=True) as session:
|
|
27
30
|
# Write system metadata only once for idempotency
|
|
28
31
|
if session.query(SystemInfo).count() == 0:
|
|
@@ -52,9 +55,17 @@ for _, modname, _ in pkgutil.iter_modules([os.path.dirname(__file__) + '/convert
|
|
|
52
55
|
def upgrade_md(engine: sql.engine.Engine) -> None:
|
|
53
56
|
"""Upgrade the metadata schema to the current version"""
|
|
54
57
|
with orm.Session(engine) as session:
|
|
55
|
-
|
|
58
|
+
# Get exclusive lock on SystemInfo row
|
|
59
|
+
system_info = session.query(SystemInfo).with_for_update().one().md
|
|
56
60
|
md_version = system_info['schema_version']
|
|
57
61
|
assert isinstance(md_version, int)
|
|
62
|
+
_logger.info(f'Current database version: {md_version}, installed version: {VERSION}')
|
|
63
|
+
if md_version > VERSION:
|
|
64
|
+
raise excs.Error(
|
|
65
|
+
'This Pixeltable database was created with a newer Pixeltable version '
|
|
66
|
+
f'than the one currently installed ({pxt.__version__}).\n'
|
|
67
|
+
'Please update to the latest Pixeltable version by running: pip install --upgrade pixeltable'
|
|
68
|
+
)
|
|
58
69
|
if md_version == VERSION:
|
|
59
70
|
return
|
|
60
71
|
while md_version < VERSION:
|
|
@@ -12,9 +12,9 @@ _logger = logging.getLogger('pixeltable')
|
|
|
12
12
|
@register_converter(version=13)
|
|
13
13
|
def _(engine: sql.engine.Engine) -> None:
|
|
14
14
|
with engine.begin() as conn:
|
|
15
|
-
for row in conn.execute(sql.select(Table)):
|
|
15
|
+
for row in conn.execute(sql.select(Table.id, Table.md)):
|
|
16
16
|
id = row[0]
|
|
17
|
-
md = row[
|
|
17
|
+
md = row[1]
|
|
18
18
|
updated_md = __update_md(md)
|
|
19
19
|
if updated_md != md:
|
|
20
20
|
_logger.info(f'Updating schema for table: {id}')
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
# Migrate a few changed function names
|
|
16
16
|
if k == 'path' and v == 'pixeltable.functions.string.str_format':
|
|
17
17
|
return 'path', 'pixeltable.functions.string.format'
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import datetime
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
|
|
4
4
|
import sqlalchemy as sql
|
|
5
5
|
|
|
@@ -28,7 +28,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
28
28
|
conn.execute(sql.text(f'ALTER TABLE {store_name} ALTER COLUMN col_{col_id} TYPE TIMESTAMPTZ'))
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def __update_timestamp_literals(k: Any, v: Any) ->
|
|
31
|
+
def __update_timestamp_literals(k: Any, v: Any) -> tuple[Any, Any] | None:
|
|
32
32
|
if isinstance(v, dict) and 'val_t' in v:
|
|
33
33
|
# It's a literal with an explicit 'val_t' field. In version 19 this can only mean a
|
|
34
34
|
# timestamp literal, which (in version 19) is stored in the DB as a naive datetime.
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
if isinstance(v, dict) and '_classname' in v:
|
|
16
16
|
# The way InlineArray is represented changed in v20. Previously, literal values were stored
|
|
17
17
|
# directly in the Inline expr; now we store them in Literal sub-exprs. This converter
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -24,7 +24,7 @@ def __update_schema_column(schema_column: dict) -> None:
|
|
|
24
24
|
schema_column['media_validation'] = None
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def __substitute_md(k:
|
|
27
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
28
28
|
if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
|
|
29
29
|
if 'perform_validation' not in v:
|
|
30
30
|
v['perform_validation'] = False
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'DataFrame':
|
|
16
16
|
v['from_clause'] = {'tbls': [v['tbl']], 'join_clauses': []}
|
|
17
17
|
return k, v
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
from pixeltable import func
|
|
16
16
|
from pixeltable.func.globals import resolve_symbol
|
|
17
17
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
if k == 'path' and (
|
|
16
16
|
v in ('pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image')
|
|
17
17
|
):
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
import pixeltable.type_system as ts
|
|
16
16
|
from pixeltable.exprs.literal import Literal
|
|
17
17
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -12,7 +12,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
12
12
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
def __substitute_md(k:
|
|
15
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
16
16
|
# Defaults are now stored as literals in signatures
|
|
17
17
|
if k == 'parameters':
|
|
18
18
|
for param in v:
|
|
@@ -55,8 +55,8 @@ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], A
|
|
|
55
55
|
# We need to expand ("unroll") any var-args or var-kwargs.
|
|
56
56
|
|
|
57
57
|
new_args_len = len(new_args)
|
|
58
|
-
rolled_args:
|
|
59
|
-
rolled_kwargs:
|
|
58
|
+
rolled_args: dict | None = None
|
|
59
|
+
rolled_kwargs: dict | None = None
|
|
60
60
|
|
|
61
61
|
if 'signature' in v['fn']:
|
|
62
62
|
# If it's a pickled function, there's no signature, so we're out of luck; varargs in a pickled function
|
|
@@ -1,36 +1,49 @@
|
|
|
1
1
|
import copy
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import Any
|
|
2
4
|
|
|
3
5
|
import sqlalchemy as sql
|
|
6
|
+
from sqlalchemy import orm
|
|
7
|
+
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
|
4
8
|
|
|
5
9
|
from pixeltable.metadata import register_converter
|
|
6
|
-
from pixeltable.metadata.converters.util import
|
|
7
|
-
convert_table_record,
|
|
8
|
-
convert_table_schema_version_record,
|
|
9
|
-
convert_table_version_record,
|
|
10
|
-
)
|
|
11
|
-
from pixeltable.metadata.schema import Table, TableSchemaVersion, TableVersion
|
|
10
|
+
from pixeltable.metadata.converters.util import convert_sql_table_record, convert_table_md
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
@register_converter(version=30)
|
|
15
14
|
def _(engine: sql.engine.Engine) -> None:
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
engine, table_schema_version_record_updater=__update_table_schema_version_record
|
|
20
|
-
)
|
|
15
|
+
convert_table_md(engine, table_md_updater=__update_table_md)
|
|
16
|
+
convert_sql_table_record(TableVersionAtV30, engine, record_updater=__update_table_version_record)
|
|
17
|
+
convert_sql_table_record(TableSchemaVersionAtV30, engine, record_updater=__update_table_schema_version_record)
|
|
21
18
|
|
|
22
19
|
|
|
23
|
-
def
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
20
|
+
def __update_table_md(md: dict, tbl_id: uuid.UUID) -> None:
|
|
21
|
+
md['tbl_id'] = str(tbl_id)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# We can't use the ORM declarations from pixeltable.metadata.schema, because those might have changed since the
|
|
25
|
+
# version being converted. So we include static declarations here of the tables as they existed at version 30.
|
|
26
|
+
|
|
27
|
+
Base: type = orm.declarative_base()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class TableVersionAtV30(Base):
|
|
31
|
+
__tablename__ = 'tableversions'
|
|
32
|
+
|
|
33
|
+
tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), primary_key=True, nullable=False)
|
|
34
|
+
version: orm.Mapped[int] = orm.mapped_column(sql.BigInteger, primary_key=True, nullable=False)
|
|
35
|
+
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class TableSchemaVersionAtV30(Base):
|
|
39
|
+
__tablename__ = 'tableschemaversions'
|
|
40
|
+
|
|
41
|
+
tbl_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), primary_key=True, nullable=False)
|
|
42
|
+
schema_version: orm.Mapped[int] = orm.mapped_column(sql.BigInteger, primary_key=True, nullable=False)
|
|
43
|
+
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # TableSchemaVersionMd
|
|
31
44
|
|
|
32
45
|
|
|
33
|
-
def __update_table_version_record(record:
|
|
46
|
+
def __update_table_version_record(record: TableVersionAtV30) -> None:
|
|
34
47
|
"""
|
|
35
48
|
Update TableVersion with table_id.
|
|
36
49
|
"""
|
|
@@ -40,7 +53,7 @@ def __update_table_version_record(record: TableVersion) -> None:
|
|
|
40
53
|
record.md = md
|
|
41
54
|
|
|
42
55
|
|
|
43
|
-
def __update_table_schema_version_record(record:
|
|
56
|
+
def __update_table_schema_version_record(record: TableSchemaVersionAtV30) -> None:
|
|
44
57
|
"""
|
|
45
58
|
Update TableSchemaVersion with table_id.
|
|
46
59
|
"""
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
|
|
16
16
|
# Add reference_tbl to ColumnRef; for historical metadata it is always equal to tbl
|
|
17
17
|
assert 'reference_tbl' not in v
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import sqlalchemy as sql
|
|
2
|
+
|
|
3
|
+
from pixeltable.metadata import register_converter
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@register_converter(version=35)
|
|
7
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
8
|
+
with engine.begin() as conn:
|
|
9
|
+
conn.execute(sql.text('ALTER TABLE tables ADD COLUMN lock_dummy int8'))
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
from uuid import UUID
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sql
|
|
6
|
+
|
|
7
|
+
from pixeltable.metadata import register_converter
|
|
8
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
9
|
+
|
|
10
|
+
_logger = logging.getLogger('pixeltable')
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@register_converter(version=36)
|
|
14
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
15
|
+
convert_table_md(engine, table_md_updater=__update_table_md, substitution_fn=__substitute_md)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def __update_table_md(table_md: dict, table_id: UUID) -> None:
|
|
19
|
+
"""Update the view metadata to add the sample_clause field if it is missing
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
table_md (dict): copy of the original table metadata. this gets updated in place.
|
|
23
|
+
table_id (UUID): the table id
|
|
24
|
+
|
|
25
|
+
"""
|
|
26
|
+
if table_md['view_md'] is None:
|
|
27
|
+
return
|
|
28
|
+
if 'sample_clause' not in table_md['view_md']:
|
|
29
|
+
table_md['view_md']['sample_clause'] = None
|
|
30
|
+
_logger.info(f'Updating view metadata for table: {table_id}')
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
34
|
+
if isinstance(v, dict) and (v.get('_classname') == 'DataFrame'):
|
|
35
|
+
if 'sample_clause' not in v:
|
|
36
|
+
v['sample_clause'] = None
|
|
37
|
+
return k, v
|
|
38
|
+
return None
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from uuid import UUID
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
|
|
5
|
+
from pixeltable.metadata import register_converter
|
|
6
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@register_converter(version=37)
|
|
10
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
11
|
+
convert_table_md(engine, table_md_updater=__update_table_md)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def __update_table_md(table_md: dict, _: UUID) -> None:
|
|
15
|
+
table_md['view_sn'] = 0
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
import sqlalchemy as sql
|
|
4
|
+
|
|
5
|
+
from pixeltable.metadata import register_converter
|
|
6
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@register_converter(version=38)
|
|
10
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
11
|
+
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
|
+
if k == 'col_mapping':
|
|
16
|
+
assert isinstance(v, list)
|
|
17
|
+
return k, [__col_mapping_entry(e) for e in v]
|
|
18
|
+
if k == 'stored_proxies':
|
|
19
|
+
assert isinstance(v, list)
|
|
20
|
+
return k, [__stored_proxies_entry(e) for e in v]
|
|
21
|
+
return None
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def __col_mapping_entry(e: list) -> list:
|
|
25
|
+
assert isinstance(e, list)
|
|
26
|
+
assert isinstance(e[0], dict)
|
|
27
|
+
assert isinstance(e[1], str)
|
|
28
|
+
return [__col_handle(e[0]), e[1]]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def __stored_proxies_entry(e: list) -> list:
|
|
32
|
+
assert isinstance(e, list)
|
|
33
|
+
assert isinstance(e[0], dict)
|
|
34
|
+
assert isinstance(e[1], dict)
|
|
35
|
+
return [__col_handle(e[0]), __col_handle(e[1])]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def __col_handle(e: dict) -> dict:
|
|
39
|
+
return {'tbl_version': {'id': e['tbl_id'], 'effective_version': None}, 'col_id': e['col_id']}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sql
|
|
5
|
+
|
|
6
|
+
from pixeltable.metadata import register_converter
|
|
7
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
8
|
+
|
|
9
|
+
_logger = logging.getLogger('pixeltable')
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@register_converter(version=39)
|
|
13
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
14
|
+
convert_table_md(engine, table_modifier=__table_modifier)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def __table_modifier(conn: sql.Connection, tbl_id: UUID, orig_table_md: dict, updated_table_md: dict) -> None:
|
|
18
|
+
store_prefix = 'view' if orig_table_md['view_md'] is not None else 'tbl'
|
|
19
|
+
store_name = f'{store_prefix}_{tbl_id.hex}'
|
|
20
|
+
|
|
21
|
+
# Get the list of column names that need to be migrated
|
|
22
|
+
col_names = find_error_columns(conn=conn, store_name=store_name)
|
|
23
|
+
if len(col_names) == 0:
|
|
24
|
+
_logger.info(f'No error columns found in table {store_name}. Skipping migration.')
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
# Check if the table exists, outside of the metadata we were given
|
|
28
|
+
# There seem to be cases where the metadata is present in the catalog,
|
|
29
|
+
# but the table itself is not in the database.
|
|
30
|
+
check_table_sql = sql.text(f"""
|
|
31
|
+
SELECT EXISTS (
|
|
32
|
+
SELECT 1
|
|
33
|
+
FROM information_schema.tables
|
|
34
|
+
WHERE table_name = '{store_name}'
|
|
35
|
+
)
|
|
36
|
+
""")
|
|
37
|
+
table_exists = conn.execute(check_table_sql).scalar()
|
|
38
|
+
if not table_exists:
|
|
39
|
+
_logger.warning(f'Table {store_name} does not exist. Skipping migration.')
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
return migrate_error_to_cellmd_columns(conn, store_name, col_names)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def find_error_columns(conn: sql.Connection, store_name: str) -> list[str]:
|
|
46
|
+
"""
|
|
47
|
+
Return and errormsg or errortype columns in the given table
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
conn: SQLAlchemy connection
|
|
51
|
+
store_name: Name of the table to check
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
List of column name roots (root_errormsg, root_errortype)
|
|
55
|
+
"""
|
|
56
|
+
check_columns_sql = sql.text(f"""
|
|
57
|
+
SELECT column_name
|
|
58
|
+
FROM information_schema.columns
|
|
59
|
+
WHERE table_name = '{store_name}'
|
|
60
|
+
""")
|
|
61
|
+
found_columns = [
|
|
62
|
+
row[0]
|
|
63
|
+
for row in conn.execute(check_columns_sql)
|
|
64
|
+
if row[0].endswith('_errormsg') or row[0].endswith('_errortype')
|
|
65
|
+
]
|
|
66
|
+
column_roots = {s.removesuffix('_errormsg').removesuffix('_errortype') for s in found_columns}
|
|
67
|
+
return [*column_roots]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def migrate_error_to_cellmd_columns(
|
|
71
|
+
conn: sql.Connection, store_name: str, col_names: list[str], backup_table: str | None = None
|
|
72
|
+
) -> None:
|
|
73
|
+
"""
|
|
74
|
+
Safe version with error handling and optional backup.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
engine: SQLAlchemy engine
|
|
78
|
+
store_name: Name of the table to modify
|
|
79
|
+
col_names: List of column name prefixes
|
|
80
|
+
backup_table: Optional name for backup table
|
|
81
|
+
|
|
82
|
+
Usage:
|
|
83
|
+
migrate_error_to_cellmd_columns(engine, 'my_table', ['columnname'], 'my_table_backup')
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
# Optional: Create backup
|
|
88
|
+
if backup_table:
|
|
89
|
+
backup_sql = sql.text(f"""
|
|
90
|
+
CREATE TABLE {backup_table} AS SELECT * FROM {store_name}
|
|
91
|
+
""")
|
|
92
|
+
conn.execute(backup_sql)
|
|
93
|
+
_logger.info(f'Backup created: {backup_table}')
|
|
94
|
+
|
|
95
|
+
# Step 1: Add new columns
|
|
96
|
+
add_column_str = ', '.join(f'ADD COLUMN {col}_cellmd JSONB DEFAULT NULL' for col in col_names)
|
|
97
|
+
add_column_sql = sql.text(f'ALTER TABLE {store_name} {add_column_str}')
|
|
98
|
+
conn.execute(add_column_sql)
|
|
99
|
+
_logger.info(f'Added columns: {", ".join(f"{col}_cellmd" for col in col_names)}')
|
|
100
|
+
|
|
101
|
+
# Step 2: Populate new columns
|
|
102
|
+
set_column_str = ', '.join(
|
|
103
|
+
[
|
|
104
|
+
f'{col}_cellmd = CASE WHEN {col}_errormsg IS NULL OR {col}_errortype IS NULL '
|
|
105
|
+
f"THEN NULL ELSE jsonb_build_object('errormsg', {col}_errormsg, 'errortype', {col}_errortype) END"
|
|
106
|
+
for col in col_names
|
|
107
|
+
]
|
|
108
|
+
)
|
|
109
|
+
populate_sql = sql.text(f'UPDATE {store_name} SET {set_column_str}')
|
|
110
|
+
result = conn.execute(populate_sql)
|
|
111
|
+
_logger.info(f'Updated {result.rowcount} rows')
|
|
112
|
+
|
|
113
|
+
# Step 3: Drop old columns
|
|
114
|
+
drop_columns_str = ', '.join(
|
|
115
|
+
[f'DROP COLUMN IF EXISTS {col}_errormsg, DROP COLUMN IF EXISTS {col}_errortype' for col in col_names]
|
|
116
|
+
)
|
|
117
|
+
drop_columns_sql = sql.text(f'ALTER TABLE {store_name} {drop_columns_str}')
|
|
118
|
+
conn.execute(drop_columns_sql)
|
|
119
|
+
_logger.info(f'Dropped columns: {", ".join(f"{col}_errormsg, {col}_errortype" for col in col_names)}')
|
|
120
|
+
_logger.info(f'Migration completed successfully for table: {store_name}')
|
|
121
|
+
|
|
122
|
+
except sql.exc.SQLAlchemyError as e:
|
|
123
|
+
_logger.error(f'Migration for table {store_name} failed: {e}')
|
|
124
|
+
raise
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from uuid import UUID
|
|
3
|
+
|
|
4
|
+
import sqlalchemy as sql
|
|
5
|
+
|
|
6
|
+
from pixeltable.metadata import register_converter
|
|
7
|
+
from pixeltable.metadata.converters.util import convert_table_md
|
|
8
|
+
|
|
9
|
+
_logger = logging.getLogger('pixeltable')
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@register_converter(version=40)
|
|
13
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
14
|
+
convert_table_md(engine, table_modifier=__table_modifier)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def __table_modifier(conn: sql.Connection, tbl_id: UUID, orig_table_md: dict, updated_table_md: dict) -> None:
|
|
18
|
+
store_prefix = 'view' if orig_table_md['view_md'] is not None else 'tbl'
|
|
19
|
+
store_name = f'{store_prefix}_{tbl_id.hex}'
|
|
20
|
+
|
|
21
|
+
# Get the list of column names that need _cellmd columns
|
|
22
|
+
_logger.info(f'Checking table {orig_table_md["name"]} ({store_name})')
|
|
23
|
+
col_ids = find_target_columns(orig_table_md)
|
|
24
|
+
if len(col_ids) == 0:
|
|
25
|
+
_logger.info(f'No Array or Json columns found in table {orig_table_md["name"]}. Skipping migration.')
|
|
26
|
+
return
|
|
27
|
+
|
|
28
|
+
# Check which columns already exist in the table
|
|
29
|
+
check_columns_sql = sql.text(f"""
|
|
30
|
+
SELECT column_name
|
|
31
|
+
FROM information_schema.columns
|
|
32
|
+
WHERE table_name = '{store_name}'
|
|
33
|
+
""")
|
|
34
|
+
existing_columns = {row[0] for row in conn.execute(check_columns_sql)}
|
|
35
|
+
|
|
36
|
+
# Filter out columns that already have _cellmd
|
|
37
|
+
col_ids_to_add: list[int] = []
|
|
38
|
+
for col_id in col_ids:
|
|
39
|
+
cellmd_col = f'col_{col_id}_cellmd'
|
|
40
|
+
if cellmd_col not in existing_columns:
|
|
41
|
+
col_ids_to_add.append(col_id)
|
|
42
|
+
else:
|
|
43
|
+
_logger.info(f'Column {cellmd_col} already exists in table {orig_table_md["name"]}. Skipping.')
|
|
44
|
+
|
|
45
|
+
if len(col_ids_to_add) == 0:
|
|
46
|
+
_logger.info(f'All _cellmd columns already exist in table {orig_table_md["name"]}. Skipping migration.')
|
|
47
|
+
return
|
|
48
|
+
|
|
49
|
+
return add_cellmd_columns(conn, store_name, col_ids_to_add)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def find_target_columns(table_md: dict) -> list[int]:
|
|
53
|
+
"""Returns ids of stored array and json columns"""
|
|
54
|
+
result: list[int] = []
|
|
55
|
+
for col_id, col_md in table_md['column_md'].items():
|
|
56
|
+
col_type = col_md['col_type']
|
|
57
|
+
classname = col_type.get('_classname')
|
|
58
|
+
if classname in ['ArrayType', 'JsonType'] and col_md.get('stored', False):
|
|
59
|
+
result.append(col_id)
|
|
60
|
+
_logger.info(f'Found {classname} column: {col_id}')
|
|
61
|
+
return result
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def add_cellmd_columns(conn: sql.Connection, store_name: str, col_ids: list[int]) -> None:
|
|
65
|
+
try:
|
|
66
|
+
# Add new columns
|
|
67
|
+
add_column_str = ', '.join(f'ADD COLUMN col_{col_id}_cellmd JSONB DEFAULT NULL' for col_id in col_ids)
|
|
68
|
+
add_column_sql = sql.text(f'ALTER TABLE {store_name} {add_column_str}')
|
|
69
|
+
conn.execute(add_column_sql)
|
|
70
|
+
_logger.info(f'Added columns to {store_name}: {", ".join(f"col_{col_id}_cellmd" for col_id in col_ids)}')
|
|
71
|
+
except sql.exc.SQLAlchemyError as e:
|
|
72
|
+
_logger.error(f'Migration for table {store_name} failed: {e}')
|
|
73
|
+
raise
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import sqlalchemy as sql
|
|
2
|
+
|
|
3
|
+
from pixeltable.metadata import register_converter
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@register_converter(version=41)
|
|
7
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
8
|
+
with engine.begin() as conn:
|
|
9
|
+
conn.execute(sql.text("ALTER TABLE dirs ADD COLUMN additional_md JSONB DEFAULT '{}'::JSONB"))
|
|
10
|
+
conn.execute(sql.text("ALTER TABLE tables ADD COLUMN additional_md JSONB DEFAULT '{}'::JSONB"))
|
|
11
|
+
conn.execute(sql.text("ALTER TABLE tableversions ADD COLUMN additional_md JSONB DEFAULT '{}'::JSONB"))
|
|
12
|
+
conn.execute(sql.text("ALTER TABLE tableschemaversions ADD COLUMN additional_md JSONB DEFAULT '{}'::JSONB"))
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import sqlalchemy as sql
|
|
2
|
+
|
|
3
|
+
from pixeltable.metadata import register_converter
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@register_converter(version=42)
|
|
7
|
+
def _(engine: sql.engine.Engine) -> None:
|
|
8
|
+
with engine.begin() as conn:
|
|
9
|
+
conn.execute(sql.text('ALTER TABLE tables ALTER COLUMN dir_id DROP NOT NULL'))
|