pixeltable 0.4.17__py3-none-any.whl → 0.4.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/_version.py +1 -0
- pixeltable/catalog/catalog.py +144 -118
- pixeltable/catalog/column.py +104 -115
- pixeltable/catalog/globals.py +1 -2
- pixeltable/catalog/insertable_table.py +44 -49
- pixeltable/catalog/path.py +3 -4
- pixeltable/catalog/schema_object.py +4 -4
- pixeltable/catalog/table.py +139 -124
- pixeltable/catalog/table_metadata.py +6 -6
- pixeltable/catalog/table_version.py +315 -246
- pixeltable/catalog/table_version_handle.py +4 -4
- pixeltable/catalog/table_version_path.py +9 -10
- pixeltable/catalog/tbl_ops.py +9 -3
- pixeltable/catalog/view.py +34 -28
- pixeltable/config.py +14 -10
- pixeltable/dataframe.py +69 -78
- pixeltable/env.py +78 -64
- pixeltable/exec/aggregation_node.py +6 -6
- pixeltable/exec/cache_prefetch_node.py +10 -10
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +5 -5
- pixeltable/exec/expr_eval/evaluators.py +6 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
- pixeltable/exec/expr_eval/globals.py +6 -6
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +11 -11
- pixeltable/exec/in_memory_data_node.py +2 -2
- pixeltable/exec/object_store_save_node.py +14 -17
- pixeltable/exec/sql_node.py +28 -27
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +61 -74
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +3 -3
- pixeltable/exprs/data_row.py +12 -12
- pixeltable/exprs/expr.py +41 -31
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +3 -3
- pixeltable/exprs/function_call.py +14 -14
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +8 -8
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +6 -6
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +14 -14
- pixeltable/exprs/rowid_ref.py +8 -8
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +3 -3
- pixeltable/func/function.py +15 -17
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +2 -2
- pixeltable/func/query_template_function.py +16 -16
- pixeltable/func/signature.py +14 -14
- pixeltable/func/tools.py +11 -11
- pixeltable/func/udf.py +16 -18
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +7 -7
- pixeltable/functions/audio.py +76 -0
- pixeltable/functions/bedrock.py +6 -6
- pixeltable/functions/deepseek.py +4 -4
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +6 -6
- pixeltable/functions/globals.py +12 -12
- pixeltable/functions/groq.py +4 -4
- pixeltable/functions/huggingface.py +1033 -6
- pixeltable/functions/image.py +7 -10
- pixeltable/functions/llama_cpp.py +7 -7
- pixeltable/functions/math.py +2 -3
- pixeltable/functions/mistralai.py +3 -3
- pixeltable/functions/ollama.py +9 -9
- pixeltable/functions/openai.py +21 -21
- pixeltable/functions/openrouter.py +7 -7
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +7 -8
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/video.py +36 -31
- pixeltable/functions/vision.py +6 -6
- pixeltable/functions/whisper.py +7 -7
- pixeltable/functions/whisperx.py +16 -16
- pixeltable/globals.py +75 -40
- pixeltable/index/base.py +12 -8
- pixeltable/index/btree.py +19 -22
- pixeltable/index/embedding_index.py +30 -39
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/external_store.py +13 -16
- pixeltable/io/fiftyone.py +5 -5
- pixeltable/io/globals.py +5 -5
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +12 -12
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +12 -12
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +2 -2
- pixeltable/iterators/document.py +88 -57
- pixeltable/iterators/video.py +66 -37
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_38.py +2 -2
- pixeltable/metadata/converters/convert_39.py +1 -2
- pixeltable/metadata/converters/util.py +11 -13
- pixeltable/metadata/schema.py +22 -21
- pixeltable/metadata/utils.py +2 -6
- pixeltable/mypy/mypy_plugin.py +5 -5
- pixeltable/plan.py +32 -34
- pixeltable/share/packager.py +7 -7
- pixeltable/share/publish.py +3 -3
- pixeltable/store.py +126 -41
- pixeltable/type_system.py +43 -46
- pixeltable/utils/__init__.py +1 -2
- pixeltable/utils/arrow.py +4 -4
- pixeltable/utils/av.py +74 -38
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +1 -2
- pixeltable/utils/dbms.py +15 -19
- pixeltable/utils/description_helper.py +2 -3
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +2 -2
- pixeltable/utils/filecache.py +5 -5
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +9 -9
- pixeltable/utils/local_store.py +17 -17
- pixeltable/utils/object_stores.py +59 -43
- pixeltable/utils/s3_store.py +35 -30
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/METADATA +4 -4
- pixeltable-0.4.19.dist-info/RECORD +213 -0
- pixeltable/__version__.py +0 -3
- pixeltable-0.4.17.dist-info/RECORD +0 -211
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.17.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'DataFrame':
|
|
16
16
|
v['from_clause'] = {'tbls': [v['tbl']], 'join_clauses': []}
|
|
17
17
|
return k, v
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
from pixeltable import func
|
|
16
16
|
from pixeltable.func.globals import resolve_symbol
|
|
17
17
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
if k == 'path' and (
|
|
16
16
|
v in ('pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image')
|
|
17
17
|
):
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
import pixeltable.type_system as ts
|
|
16
16
|
from pixeltable.exprs.literal import Literal
|
|
17
17
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -12,7 +12,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
12
12
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
def __substitute_md(k:
|
|
15
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
16
16
|
# Defaults are now stored as literals in signatures
|
|
17
17
|
if k == 'parameters':
|
|
18
18
|
for param in v:
|
|
@@ -55,8 +55,8 @@ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], A
|
|
|
55
55
|
# We need to expand ("unroll") any var-args or var-kwargs.
|
|
56
56
|
|
|
57
57
|
new_args_len = len(new_args)
|
|
58
|
-
rolled_args:
|
|
59
|
-
rolled_kwargs:
|
|
58
|
+
rolled_args: dict | None = None
|
|
59
|
+
rolled_kwargs: dict | None = None
|
|
60
60
|
|
|
61
61
|
if 'signature' in v['fn']:
|
|
62
62
|
# If it's a pickled function, there's no signature, so we're out of luck; varargs in a pickled function
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
|
|
16
16
|
# Add reference_tbl to ColumnRef; for historical metadata it is always equal to tbl
|
|
17
17
|
assert 'reference_tbl' not in v
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
from uuid import UUID
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
@@ -30,7 +30,7 @@ def __update_table_md(table_md: dict, table_id: UUID) -> None:
|
|
|
30
30
|
_logger.info(f'Updating view metadata for table: {table_id}')
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
def __substitute_md(k:
|
|
33
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
34
34
|
if isinstance(v, dict) and (v.get('_classname') == 'DataFrame'):
|
|
35
35
|
if 'sample_clause' not in v:
|
|
36
36
|
v['sample_clause'] = None
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
if k == 'col_mapping':
|
|
16
16
|
assert isinstance(v, list)
|
|
17
17
|
return k, [__col_mapping_entry(e) for e in v]
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Optional
|
|
3
2
|
from uuid import UUID
|
|
4
3
|
|
|
5
4
|
import sqlalchemy as sql
|
|
@@ -69,7 +68,7 @@ def find_error_columns(conn: sql.Connection, store_name: str) -> list[str]:
|
|
|
69
68
|
|
|
70
69
|
|
|
71
70
|
def migrate_error_to_cellmd_columns(
|
|
72
|
-
conn: sql.Connection, store_name: str, col_names: list[str], backup_table:
|
|
71
|
+
conn: sql.Connection, store_name: str, col_names: list[str], backup_table: str | None = None
|
|
73
72
|
) -> None:
|
|
74
73
|
"""
|
|
75
74
|
Safe version with error handling and optional backup.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import logging
|
|
3
|
-
from typing import Any, Callable
|
|
3
|
+
from typing import Any, Callable
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
@@ -12,11 +12,11 @@ __logger = logging.getLogger('pixeltable')
|
|
|
12
12
|
|
|
13
13
|
def convert_table_md(
|
|
14
14
|
engine: sql.engine.Engine,
|
|
15
|
-
table_md_updater:
|
|
16
|
-
column_md_updater:
|
|
17
|
-
external_store_md_updater:
|
|
18
|
-
substitution_fn:
|
|
19
|
-
table_modifier:
|
|
15
|
+
table_md_updater: Callable[[dict, UUID], None] | None = None,
|
|
16
|
+
column_md_updater: Callable[[dict], None] | None = None,
|
|
17
|
+
external_store_md_updater: Callable[[dict], None] | None = None,
|
|
18
|
+
substitution_fn: Callable[[str | None, Any], tuple[str | None, Any] | None] | None = None,
|
|
19
|
+
table_modifier: Callable[[sql.Connection, UUID, dict, dict], None] | None = None,
|
|
20
20
|
) -> None:
|
|
21
21
|
"""
|
|
22
22
|
Converts schema.TableMd dicts based on the specified conversion functions.
|
|
@@ -80,9 +80,7 @@ def __update_external_store_md(table_md: dict, external_store_md_updater: Callab
|
|
|
80
80
|
external_store_md_updater(store_md)
|
|
81
81
|
|
|
82
82
|
|
|
83
|
-
def __substitute_md_rec(
|
|
84
|
-
md: Any, substitution_fn: Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]
|
|
85
|
-
) -> Any:
|
|
83
|
+
def __substitute_md_rec(md: Any, substitution_fn: Callable[[str | None, Any], tuple[str | None, Any] | None]) -> Any:
|
|
86
84
|
if isinstance(md, dict):
|
|
87
85
|
updated_dict: dict[str, Any] = {}
|
|
88
86
|
for k, v in md.items():
|
|
@@ -110,8 +108,8 @@ def __substitute_md_rec(
|
|
|
110
108
|
|
|
111
109
|
def convert_table_schema_version_md(
|
|
112
110
|
engine: sql.engine.Engine,
|
|
113
|
-
table_schema_version_md_updater:
|
|
114
|
-
schema_column_updater:
|
|
111
|
+
table_schema_version_md_updater: Callable[[dict], None] | None = None,
|
|
112
|
+
schema_column_updater: Callable[[dict], None] | None = None,
|
|
115
113
|
) -> None:
|
|
116
114
|
"""
|
|
117
115
|
Converts schema.TableSchemaVersionMd dicts based on the specified conversion functions.
|
|
@@ -150,7 +148,7 @@ def __update_schema_column(table_schema_version_md: dict, schema_column_updater:
|
|
|
150
148
|
|
|
151
149
|
|
|
152
150
|
def convert_table_version_record(
|
|
153
|
-
engine: sql.engine.Engine, table_version_record_updater:
|
|
151
|
+
engine: sql.engine.Engine, table_version_record_updater: Callable[[TableVersion], None] | None
|
|
154
152
|
) -> None:
|
|
155
153
|
with sql.orm.Session(engine, future=True) as session:
|
|
156
154
|
for record in session.query(TableVersion).all():
|
|
@@ -159,7 +157,7 @@ def convert_table_version_record(
|
|
|
159
157
|
|
|
160
158
|
|
|
161
159
|
def convert_table_schema_version_record(
|
|
162
|
-
engine: sql.engine.Engine, table_schema_version_record_updater:
|
|
160
|
+
engine: sql.engine.Engine, table_schema_version_record_updater: Callable[[TableSchemaVersion], None] | None
|
|
163
161
|
) -> None:
|
|
164
162
|
with sql.orm.Session(engine, future=True) as session:
|
|
165
163
|
for record in session.query(TableSchemaVersion).all():
|
pixeltable/metadata/schema.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import dataclasses
|
|
2
|
+
import types
|
|
2
3
|
import typing
|
|
3
4
|
import uuid
|
|
4
|
-
from typing import Any, NamedTuple,
|
|
5
|
+
from typing import Any, NamedTuple, TypeVar, Union, get_type_hints
|
|
5
6
|
|
|
6
7
|
import sqlalchemy as sql
|
|
7
8
|
from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary, orm
|
|
@@ -29,8 +30,8 @@ def md_from_dict(data_class_type: type[T], data: Any) -> T:
|
|
|
29
30
|
origin = typing.get_origin(data_class_type)
|
|
30
31
|
if origin is not None:
|
|
31
32
|
type_args = typing.get_args(data_class_type)
|
|
32
|
-
if origin is Union and type(None) in type_args:
|
|
33
|
-
#
|
|
33
|
+
if (origin is Union or origin is types.UnionType) and type(None) in type_args:
|
|
34
|
+
# handling T | None, T | None
|
|
34
35
|
non_none_args = [arg for arg in type_args if arg is not type(None)]
|
|
35
36
|
assert len(non_none_args) == 1
|
|
36
37
|
return md_from_dict(non_none_args[0], data) if data is not None else None
|
|
@@ -74,7 +75,7 @@ class SystemInfo(Base):
|
|
|
74
75
|
@dataclasses.dataclass
|
|
75
76
|
class DirMd:
|
|
76
77
|
name: str
|
|
77
|
-
user:
|
|
78
|
+
user: str | None
|
|
78
79
|
additional_md: dict[str, Any]
|
|
79
80
|
|
|
80
81
|
|
|
@@ -103,20 +104,20 @@ class ColumnMd:
|
|
|
103
104
|
|
|
104
105
|
id: int
|
|
105
106
|
schema_version_add: int
|
|
106
|
-
schema_version_drop:
|
|
107
|
+
schema_version_drop: int | None
|
|
107
108
|
col_type: dict
|
|
108
109
|
|
|
109
110
|
# if True, is part of the primary key
|
|
110
111
|
is_pk: bool
|
|
111
112
|
|
|
112
113
|
# if set, this is a computed column
|
|
113
|
-
value_expr:
|
|
114
|
+
value_expr: dict | None
|
|
114
115
|
|
|
115
116
|
# if True, the column is present in the stored table
|
|
116
|
-
stored:
|
|
117
|
+
stored: bool | None
|
|
117
118
|
|
|
118
119
|
# If present, the URI for the destination for column values
|
|
119
|
-
destination:
|
|
120
|
+
destination: str | None = None
|
|
120
121
|
|
|
121
122
|
|
|
122
123
|
@dataclasses.dataclass
|
|
@@ -132,13 +133,13 @@ class IndexMd:
|
|
|
132
133
|
index_val_col_id: int # column holding the values to be indexed
|
|
133
134
|
index_val_undo_col_id: int # column holding index values for deleted rows
|
|
134
135
|
schema_version_add: int
|
|
135
|
-
schema_version_drop:
|
|
136
|
+
schema_version_drop: int | None
|
|
136
137
|
class_fqn: str
|
|
137
138
|
init_args: dict[str, Any]
|
|
138
139
|
|
|
139
140
|
|
|
140
141
|
# a stored table version path is a list of (table id as str, effective table version)
|
|
141
|
-
TableVersionPath = list[tuple[str,
|
|
142
|
+
TableVersionPath = list[tuple[str, int | None]]
|
|
142
143
|
|
|
143
144
|
|
|
144
145
|
@dataclasses.dataclass
|
|
@@ -150,16 +151,16 @@ class ViewMd:
|
|
|
150
151
|
base_versions: TableVersionPath
|
|
151
152
|
|
|
152
153
|
# filter predicate applied to the base table; view-only
|
|
153
|
-
predicate:
|
|
154
|
+
predicate: dict[str, Any] | None
|
|
154
155
|
|
|
155
156
|
# sampling predicate applied to the base table; view-only
|
|
156
|
-
sample_clause:
|
|
157
|
+
sample_clause: dict[str, Any] | None
|
|
157
158
|
|
|
158
159
|
# ComponentIterator subclass; only for component views
|
|
159
|
-
iterator_class_fqn:
|
|
160
|
+
iterator_class_fqn: str | None
|
|
160
161
|
|
|
161
162
|
# args to pass to the iterator class constructor; only for component views
|
|
162
|
-
iterator_args:
|
|
163
|
+
iterator_args: dict[str, Any] | None
|
|
163
164
|
|
|
164
165
|
|
|
165
166
|
@dataclasses.dataclass
|
|
@@ -168,7 +169,7 @@ class TableMd:
|
|
|
168
169
|
name: str
|
|
169
170
|
is_replica: bool
|
|
170
171
|
|
|
171
|
-
user:
|
|
172
|
+
user: str | None
|
|
172
173
|
|
|
173
174
|
# monotonically increasing w/in Table for both data and schema changes, starting at 0
|
|
174
175
|
current_version: int
|
|
@@ -194,7 +195,7 @@ class TableMd:
|
|
|
194
195
|
|
|
195
196
|
column_md: dict[int, ColumnMd] # col_id -> ColumnMd
|
|
196
197
|
index_md: dict[int, IndexMd] # index_id -> IndexMd
|
|
197
|
-
view_md:
|
|
198
|
+
view_md: ViewMd | None
|
|
198
199
|
additional_md: dict[str, Any]
|
|
199
200
|
|
|
200
201
|
has_pending_ops: bool = False
|
|
@@ -245,8 +246,8 @@ class TableVersionMd:
|
|
|
245
246
|
created_at: float # time.time()
|
|
246
247
|
version: int
|
|
247
248
|
schema_version: int
|
|
248
|
-
user:
|
|
249
|
-
update_status:
|
|
249
|
+
user: str | None = None # User that created this version
|
|
250
|
+
update_status: UpdateStatus | None = None # UpdateStatus of the change that created this version
|
|
250
251
|
# A version fragment cannot be queried or instantiated via get_table(). A fragment represents a version of a
|
|
251
252
|
# replica table that has incomplete data, and exists only to provide base table support for a dependent view.
|
|
252
253
|
is_fragment: bool = False
|
|
@@ -273,7 +274,7 @@ class SchemaColumn:
|
|
|
273
274
|
|
|
274
275
|
# media validation strategy of this particular media column; if not set, TableMd.media_validation applies
|
|
275
276
|
# stores column.MediaValiation.name.lower()
|
|
276
|
-
media_validation:
|
|
277
|
+
media_validation: str | None
|
|
277
278
|
|
|
278
279
|
|
|
279
280
|
@dataclasses.dataclass
|
|
@@ -284,7 +285,7 @@ class TableSchemaVersionMd:
|
|
|
284
285
|
|
|
285
286
|
tbl_id: str # uuid.UUID
|
|
286
287
|
schema_version: int
|
|
287
|
-
preceding_schema_version:
|
|
288
|
+
preceding_schema_version: int | None
|
|
288
289
|
columns: dict[int, SchemaColumn] # col_id -> SchemaColumn
|
|
289
290
|
num_retained_versions: int
|
|
290
291
|
comment: str
|
|
@@ -347,7 +348,7 @@ class Function(Base):
|
|
|
347
348
|
)
|
|
348
349
|
dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
|
|
349
350
|
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # FunctionMd
|
|
350
|
-
binary_obj: orm.Mapped[
|
|
351
|
+
binary_obj: orm.Mapped[bytes | None] = orm.mapped_column(LargeBinary, nullable=True)
|
|
351
352
|
|
|
352
353
|
|
|
353
354
|
class FullTableMd(NamedTuple):
|
pixeltable/metadata/utils.py
CHANGED
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
3
|
from pixeltable.metadata import schema
|
|
6
4
|
|
|
7
5
|
|
|
8
6
|
class MetadataUtils:
|
|
9
7
|
@classmethod
|
|
10
8
|
def _diff_md(
|
|
11
|
-
cls, old_md:
|
|
9
|
+
cls, old_md: dict[int, schema.SchemaColumn] | None, new_md: dict[int, schema.SchemaColumn] | None
|
|
12
10
|
) -> str:
|
|
13
11
|
"""Return a string reporting the differences in a specific entry in two dictionaries
|
|
14
12
|
|
|
@@ -43,9 +41,7 @@ class MetadataUtils:
|
|
|
43
41
|
return r
|
|
44
42
|
|
|
45
43
|
@classmethod
|
|
46
|
-
def _create_md_change_dict(
|
|
47
|
-
cls, md_list: Optional[list[tuple[int, dict[int, schema.SchemaColumn]]]]
|
|
48
|
-
) -> dict[int, str]:
|
|
44
|
+
def _create_md_change_dict(cls, md_list: list[tuple[int, dict[int, schema.SchemaColumn]]] | None) -> dict[int, str]:
|
|
49
45
|
"""Return a dictionary of schema changes by version
|
|
50
46
|
Args:
|
|
51
47
|
md_list: a list of tuples, each containing a version number and a metadata dictionary.
|
pixeltable/mypy/mypy_plugin.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Callable, ClassVar
|
|
1
|
+
from typing import Callable, ClassVar
|
|
2
2
|
|
|
3
3
|
from mypy import nodes
|
|
4
4
|
from mypy.plugin import AnalyzeTypeContext, ClassDefContext, FunctionContext, MethodSigContext, Plugin
|
|
@@ -26,21 +26,21 @@ class PxtPlugin(Plugin):
|
|
|
26
26
|
}
|
|
27
27
|
__FULLNAME_MAP: ClassVar[dict] = {f'{k.__module__}.{k.__name__}': v for k, v in __TYPE_MAP.items()}
|
|
28
28
|
|
|
29
|
-
def get_function_hook(self, fullname: str) ->
|
|
29
|
+
def get_function_hook(self, fullname: str) -> Callable[[FunctionContext], Type] | None:
|
|
30
30
|
return adjust_uda_type
|
|
31
31
|
|
|
32
|
-
def get_type_analyze_hook(self, fullname: str) ->
|
|
32
|
+
def get_type_analyze_hook(self, fullname: str) -> Callable[[AnalyzeTypeContext], Type] | None:
|
|
33
33
|
if fullname in self.__FULLNAME_MAP:
|
|
34
34
|
subst_name = self.__FULLNAME_MAP[fullname]
|
|
35
35
|
return lambda ctx: adjust_pxt_type(ctx, subst_name)
|
|
36
36
|
return None
|
|
37
37
|
|
|
38
|
-
def get_method_signature_hook(self, fullname: str) ->
|
|
38
|
+
def get_method_signature_hook(self, fullname: str) -> Callable[[MethodSigContext], FunctionLike] | None:
|
|
39
39
|
if fullname in (self.__ADD_COLUMN_FULLNAME, self.__ADD_COMPUTED_COLUMN_FULLNAME):
|
|
40
40
|
return adjust_kwargs
|
|
41
41
|
return None
|
|
42
42
|
|
|
43
|
-
def get_class_decorator_hook_2(self, fullname: str) ->
|
|
43
|
+
def get_class_decorator_hook_2(self, fullname: str) -> Callable[[ClassDefContext], bool] | None:
|
|
44
44
|
if fullname == self.__UDA_FULLNAME:
|
|
45
45
|
return adjust_uda_methods
|
|
46
46
|
return None
|
pixeltable/plan.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import dataclasses
|
|
4
4
|
import enum
|
|
5
5
|
from textwrap import dedent
|
|
6
|
-
from typing import Any, Iterable, Literal,
|
|
6
|
+
from typing import Any, Iterable, Literal, Sequence, cast
|
|
7
7
|
from uuid import UUID
|
|
8
8
|
|
|
9
9
|
import pgvector.sqlalchemy # type: ignore[import-untyped]
|
|
@@ -66,7 +66,7 @@ class JoinClause:
|
|
|
66
66
|
"""Corresponds to a single 'JOIN ... ON (...)' clause in a SELECT statement; excludes the joined table."""
|
|
67
67
|
|
|
68
68
|
join_type: JoinType
|
|
69
|
-
join_predicate:
|
|
69
|
+
join_predicate: exprs.Expr | None # None for join_type == CROSS
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
@dataclasses.dataclass
|
|
@@ -86,25 +86,20 @@ class FromClause:
|
|
|
86
86
|
class SampleClause:
|
|
87
87
|
"""Defines a sampling clause for a table."""
|
|
88
88
|
|
|
89
|
-
version:
|
|
90
|
-
n:
|
|
91
|
-
n_per_stratum:
|
|
92
|
-
fraction:
|
|
93
|
-
seed:
|
|
94
|
-
stratify_exprs:
|
|
95
|
-
|
|
96
|
-
# This seed value is used if one is not supplied
|
|
97
|
-
DEFAULT_SEED = 0
|
|
89
|
+
version: int | None
|
|
90
|
+
n: int | None
|
|
91
|
+
n_per_stratum: int | None
|
|
92
|
+
fraction: float | None
|
|
93
|
+
seed: int | None
|
|
94
|
+
stratify_exprs: list[exprs.Expr] | None
|
|
98
95
|
|
|
99
96
|
# The version of the hashing algorithm used for ordering and fractional sampling.
|
|
100
97
|
CURRENT_VERSION = 1
|
|
101
98
|
|
|
102
99
|
def __post_init__(self) -> None:
|
|
103
|
-
|
|
100
|
+
# If no version was provided, provide the default version
|
|
104
101
|
if self.version is None:
|
|
105
102
|
self.version = self.CURRENT_VERSION
|
|
106
|
-
if self.seed is None:
|
|
107
|
-
self.seed = self.DEFAULT_SEED
|
|
108
103
|
|
|
109
104
|
@property
|
|
110
105
|
def is_stratified(self) -> bool:
|
|
@@ -167,19 +162,19 @@ class Analyzer:
|
|
|
167
162
|
from_clause: FromClause
|
|
168
163
|
all_exprs: list[exprs.Expr] # union of all exprs, aside from sql_where_clause
|
|
169
164
|
select_list: list[exprs.Expr]
|
|
170
|
-
group_by_clause:
|
|
165
|
+
group_by_clause: list[exprs.Expr] | None # None for non-aggregate queries; [] for agg query w/o grouping
|
|
171
166
|
grouping_exprs: list[exprs.Expr] # [] for non-aggregate queries or agg query w/o grouping
|
|
172
167
|
order_by_clause: OrderByClause
|
|
173
168
|
stratify_exprs: list[exprs.Expr] # [] if no stratiifcation is required
|
|
174
|
-
sample_clause:
|
|
169
|
+
sample_clause: SampleClause | None # None if no sampling clause is present
|
|
175
170
|
|
|
176
171
|
sql_elements: exprs.SqlElementCache
|
|
177
172
|
|
|
178
173
|
# Where clause of the Select stmt of the SQL scan
|
|
179
|
-
sql_where_clause:
|
|
174
|
+
sql_where_clause: exprs.Expr | None
|
|
180
175
|
|
|
181
176
|
# filter predicate applied to output rows of the SQL scan
|
|
182
|
-
filter:
|
|
177
|
+
filter: exprs.Expr | None
|
|
183
178
|
|
|
184
179
|
agg_fn_calls: list[exprs.FunctionCall] # grouping aggregation (ie, not window functions)
|
|
185
180
|
window_fn_calls: list[exprs.FunctionCall]
|
|
@@ -189,10 +184,10 @@ class Analyzer:
|
|
|
189
184
|
self,
|
|
190
185
|
from_clause: FromClause,
|
|
191
186
|
select_list: Sequence[exprs.Expr],
|
|
192
|
-
where_clause:
|
|
193
|
-
group_by_clause:
|
|
194
|
-
order_by_clause:
|
|
195
|
-
sample_clause:
|
|
187
|
+
where_clause: exprs.Expr | None = None,
|
|
188
|
+
group_by_clause: list[exprs.Expr] | None = None,
|
|
189
|
+
order_by_clause: list[tuple[exprs.Expr, bool]] | None = None,
|
|
190
|
+
sample_clause: SampleClause | None = None,
|
|
196
191
|
):
|
|
197
192
|
if order_by_clause is None:
|
|
198
193
|
order_by_clause = []
|
|
@@ -335,7 +330,7 @@ class Analyzer:
|
|
|
335
330
|
row_builder.set_slot_idxs(self.agg_fn_calls)
|
|
336
331
|
row_builder.set_slot_idxs(self.agg_order_by)
|
|
337
332
|
|
|
338
|
-
def get_window_fn_ob_clause(self) ->
|
|
333
|
+
def get_window_fn_ob_clause(self) -> OrderByClause | None:
|
|
339
334
|
clause: list[OrderByClause] = []
|
|
340
335
|
for fn_call in self.window_fn_calls:
|
|
341
336
|
# window functions require ordering by the group_by/order_by clauses
|
|
@@ -353,7 +348,7 @@ class Analyzer:
|
|
|
353
348
|
class Planner:
|
|
354
349
|
# TODO: create an exec.CountNode and change this to create_count_plan()
|
|
355
350
|
@classmethod
|
|
356
|
-
def create_count_stmt(cls, tbl: catalog.TableVersionPath, where_clause:
|
|
351
|
+
def create_count_stmt(cls, tbl: catalog.TableVersionPath, where_clause: exprs.Expr | None = None) -> sql.Select:
|
|
357
352
|
stmt = sql.select(sql.func.count().label('all_count'))
|
|
358
353
|
refd_tbl_ids: set[UUID] = set()
|
|
359
354
|
if where_clause is not None:
|
|
@@ -411,7 +406,7 @@ class Planner:
|
|
|
411
406
|
return plan
|
|
412
407
|
|
|
413
408
|
@classmethod
|
|
414
|
-
def rowid_columns(cls, target: TableVersionHandle, num_rowid_cols:
|
|
409
|
+
def rowid_columns(cls, target: TableVersionHandle, num_rowid_cols: int | None = None) -> list[exprs.Expr]:
|
|
415
410
|
"""Return list of RowidRef for the given number of associated rowids"""
|
|
416
411
|
if num_rowid_cols is None:
|
|
417
412
|
num_rowid_cols = target.get().num_rowid_columns()
|
|
@@ -452,7 +447,7 @@ class Planner:
|
|
|
452
447
|
tbl: catalog.TableVersionPath,
|
|
453
448
|
update_targets: dict[catalog.Column, exprs.Expr],
|
|
454
449
|
recompute_targets: list[catalog.Column],
|
|
455
|
-
where_clause:
|
|
450
|
+
where_clause: exprs.Expr | None,
|
|
456
451
|
cascade: bool,
|
|
457
452
|
) -> tuple[exec.ExecNode, list[str], list[catalog.Column]]:
|
|
458
453
|
"""Creates a plan to materialize updated rows.
|
|
@@ -482,7 +477,8 @@ class Planner:
|
|
|
482
477
|
else:
|
|
483
478
|
recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else set()
|
|
484
479
|
# regardless of cascade, we need to update all indices on any updated/recomputed column
|
|
485
|
-
|
|
480
|
+
modified_base_cols = [c for c in set(updated_cols) | recomputed_cols if c.get_tbl().id == target.id]
|
|
481
|
+
idx_val_cols = target.get_idx_val_columns(modified_base_cols)
|
|
486
482
|
recomputed_cols.update(idx_val_cols)
|
|
487
483
|
# we only need to recompute stored columns (unstored ones are substituted away)
|
|
488
484
|
recomputed_cols = {c for c in recomputed_cols if c.is_stored}
|
|
@@ -492,7 +488,7 @@ class Planner:
|
|
|
492
488
|
# our query plan
|
|
493
489
|
# - evaluates the update targets and recomputed columns
|
|
494
490
|
# - copies all other stored columns
|
|
495
|
-
recomputed_base_cols = {col for col in recomputed_cols if col.
|
|
491
|
+
recomputed_base_cols = {col for col in recomputed_cols if col.get_tbl().id == tbl.tbl_version.id}
|
|
496
492
|
copied_cols = [
|
|
497
493
|
col
|
|
498
494
|
for col in target.cols_by_id.values()
|
|
@@ -527,7 +523,7 @@ class Planner:
|
|
|
527
523
|
plan = cls._add_save_node(plan)
|
|
528
524
|
|
|
529
525
|
recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
|
|
530
|
-
return plan, [f'{c.
|
|
526
|
+
return plan, [f'{c.get_tbl().name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
|
|
531
527
|
|
|
532
528
|
@classmethod
|
|
533
529
|
def __check_valid_columns(
|
|
@@ -652,11 +648,12 @@ class Planner:
|
|
|
652
648
|
updated_cols = batch[0].keys() - target.primary_key_columns()
|
|
653
649
|
recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else set()
|
|
654
650
|
# regardless of cascade, we need to update all indices on any updated column
|
|
655
|
-
|
|
651
|
+
modified_base_cols = [c for c in set(updated_cols) | recomputed_cols if c.get_tbl().id == target.id]
|
|
652
|
+
idx_val_cols = target.get_idx_val_columns(modified_base_cols)
|
|
656
653
|
recomputed_cols.update(idx_val_cols)
|
|
657
654
|
# we only need to recompute stored columns (unstored ones are substituted away)
|
|
658
655
|
recomputed_cols = {c for c in recomputed_cols if c.is_stored}
|
|
659
|
-
recomputed_base_cols = {col for col in recomputed_cols if col.
|
|
656
|
+
recomputed_base_cols = {col for col in recomputed_cols if col.get_tbl().id == target.id}
|
|
660
657
|
copied_cols = [
|
|
661
658
|
col
|
|
662
659
|
for col in target.cols_by_id.values()
|
|
@@ -849,7 +846,7 @@ class Planner:
|
|
|
849
846
|
raise excs.Error(f'Join predicate {join_clause.join_predicate} not expressible in SQL')
|
|
850
847
|
|
|
851
848
|
@classmethod
|
|
852
|
-
def _create_combined_ordering(cls, analyzer: Analyzer, verify_agg: bool) ->
|
|
849
|
+
def _create_combined_ordering(cls, analyzer: Analyzer, verify_agg: bool) -> OrderByClause | None:
|
|
853
850
|
"""Verify that the various ordering requirements don't conflict and return a combined ordering"""
|
|
854
851
|
ob_clauses: list[OrderByClause] = [analyzer.order_by_clause.copy()]
|
|
855
852
|
|
|
@@ -985,7 +982,7 @@ class Planner:
|
|
|
985
982
|
analyzer: Analyzer,
|
|
986
983
|
eval_ctx: exprs.RowBuilder.EvalCtx,
|
|
987
984
|
columns: list[catalog.Column] | None = None,
|
|
988
|
-
limit:
|
|
985
|
+
limit: exprs.Expr | None = None,
|
|
989
986
|
with_pk: bool = False,
|
|
990
987
|
exact_version_only: list[catalog.TableVersionHandle] | None = None,
|
|
991
988
|
) -> exec.ExecNode:
|
|
@@ -1006,6 +1003,7 @@ class Planner:
|
|
|
1006
1003
|
analyzer.window_fn_calls
|
|
1007
1004
|
)
|
|
1008
1005
|
ctx = exec.ExecContext(row_builder)
|
|
1006
|
+
|
|
1009
1007
|
combined_ordering = cls._create_combined_ordering(analyzer, verify_agg=is_python_agg)
|
|
1010
1008
|
cls._verify_join_clauses(analyzer)
|
|
1011
1009
|
|
|
@@ -1061,7 +1059,7 @@ class Planner:
|
|
|
1061
1059
|
tbl,
|
|
1062
1060
|
row_builder,
|
|
1063
1061
|
select_list=tbl_scan_exprs,
|
|
1064
|
-
columns=[c for c in columns if c.
|
|
1062
|
+
columns=[c for c in columns if c.get_tbl().id == tbl.tbl_id],
|
|
1065
1063
|
set_pk=with_pk,
|
|
1066
1064
|
cell_md_col_refs=cls._cell_md_col_refs(tbl_scan_exprs),
|
|
1067
1065
|
exact_version_only=exact_version_only,
|