pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/_version.py +1 -0
- pixeltable/catalog/catalog.py +119 -100
- pixeltable/catalog/column.py +104 -115
- pixeltable/catalog/globals.py +1 -2
- pixeltable/catalog/insertable_table.py +44 -49
- pixeltable/catalog/path.py +3 -4
- pixeltable/catalog/schema_object.py +4 -4
- pixeltable/catalog/table.py +118 -122
- pixeltable/catalog/table_metadata.py +6 -6
- pixeltable/catalog/table_version.py +322 -257
- pixeltable/catalog/table_version_handle.py +4 -4
- pixeltable/catalog/table_version_path.py +9 -10
- pixeltable/catalog/tbl_ops.py +9 -3
- pixeltable/catalog/view.py +34 -28
- pixeltable/config.py +14 -10
- pixeltable/dataframe.py +68 -77
- pixeltable/env.py +74 -64
- pixeltable/exec/aggregation_node.py +6 -6
- pixeltable/exec/cache_prefetch_node.py +10 -10
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +4 -5
- pixeltable/exec/exec_node.py +5 -5
- pixeltable/exec/expr_eval/evaluators.py +6 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
- pixeltable/exec/expr_eval/globals.py +6 -6
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +11 -11
- pixeltable/exec/in_memory_data_node.py +2 -2
- pixeltable/exec/object_store_save_node.py +14 -17
- pixeltable/exec/sql_node.py +25 -25
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +61 -74
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +3 -3
- pixeltable/exprs/data_row.py +12 -12
- pixeltable/exprs/expr.py +41 -31
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +3 -3
- pixeltable/exprs/function_call.py +14 -14
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +8 -8
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +6 -6
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +14 -14
- pixeltable/exprs/rowid_ref.py +8 -8
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +3 -3
- pixeltable/func/function.py +15 -17
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +2 -2
- pixeltable/func/query_template_function.py +16 -16
- pixeltable/func/signature.py +14 -14
- pixeltable/func/tools.py +11 -11
- pixeltable/func/udf.py +16 -18
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +7 -7
- pixeltable/functions/audio.py +76 -0
- pixeltable/functions/bedrock.py +6 -6
- pixeltable/functions/deepseek.py +4 -4
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +6 -6
- pixeltable/functions/globals.py +12 -12
- pixeltable/functions/groq.py +4 -4
- pixeltable/functions/huggingface.py +18 -20
- pixeltable/functions/image.py +7 -10
- pixeltable/functions/llama_cpp.py +7 -7
- pixeltable/functions/math.py +2 -3
- pixeltable/functions/mistralai.py +3 -3
- pixeltable/functions/ollama.py +9 -9
- pixeltable/functions/openai.py +21 -21
- pixeltable/functions/openrouter.py +7 -7
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +7 -8
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/video.py +2 -24
- pixeltable/functions/vision.py +6 -6
- pixeltable/functions/whisper.py +7 -7
- pixeltable/functions/whisperx.py +16 -16
- pixeltable/globals.py +52 -36
- pixeltable/index/base.py +12 -8
- pixeltable/index/btree.py +19 -22
- pixeltable/index/embedding_index.py +30 -39
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/external_store.py +13 -16
- pixeltable/io/fiftyone.py +5 -5
- pixeltable/io/globals.py +5 -5
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +12 -12
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +12 -12
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +2 -2
- pixeltable/iterators/video.py +8 -13
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_38.py +2 -2
- pixeltable/metadata/converters/convert_39.py +1 -2
- pixeltable/metadata/converters/util.py +11 -13
- pixeltable/metadata/schema.py +22 -21
- pixeltable/metadata/utils.py +2 -6
- pixeltable/mypy/mypy_plugin.py +5 -5
- pixeltable/plan.py +30 -28
- pixeltable/share/packager.py +7 -7
- pixeltable/share/publish.py +3 -3
- pixeltable/store.py +125 -61
- pixeltable/type_system.py +43 -46
- pixeltable/utils/__init__.py +1 -2
- pixeltable/utils/arrow.py +4 -4
- pixeltable/utils/av.py +8 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +1 -2
- pixeltable/utils/dbms.py +15 -19
- pixeltable/utils/description_helper.py +2 -3
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +2 -2
- pixeltable/utils/filecache.py +5 -5
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +9 -9
- pixeltable/utils/local_store.py +17 -17
- pixeltable/utils/object_stores.py +59 -43
- pixeltable/utils/s3_store.py +35 -30
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
- pixeltable-0.4.19.dist-info/RECORD +213 -0
- pixeltable/__version__.py +0 -3
- pixeltable-0.4.18.dist-info/RECORD +0 -211
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import logging
|
|
3
|
-
from typing import Any, Callable
|
|
3
|
+
from typing import Any, Callable
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
import sqlalchemy as sql
|
|
@@ -12,11 +12,11 @@ __logger = logging.getLogger('pixeltable')
|
|
|
12
12
|
|
|
13
13
|
def convert_table_md(
|
|
14
14
|
engine: sql.engine.Engine,
|
|
15
|
-
table_md_updater:
|
|
16
|
-
column_md_updater:
|
|
17
|
-
external_store_md_updater:
|
|
18
|
-
substitution_fn:
|
|
19
|
-
table_modifier:
|
|
15
|
+
table_md_updater: Callable[[dict, UUID], None] | None = None,
|
|
16
|
+
column_md_updater: Callable[[dict], None] | None = None,
|
|
17
|
+
external_store_md_updater: Callable[[dict], None] | None = None,
|
|
18
|
+
substitution_fn: Callable[[str | None, Any], tuple[str | None, Any] | None] | None = None,
|
|
19
|
+
table_modifier: Callable[[sql.Connection, UUID, dict, dict], None] | None = None,
|
|
20
20
|
) -> None:
|
|
21
21
|
"""
|
|
22
22
|
Converts schema.TableMd dicts based on the specified conversion functions.
|
|
@@ -80,9 +80,7 @@ def __update_external_store_md(table_md: dict, external_store_md_updater: Callab
|
|
|
80
80
|
external_store_md_updater(store_md)
|
|
81
81
|
|
|
82
82
|
|
|
83
|
-
def __substitute_md_rec(
|
|
84
|
-
md: Any, substitution_fn: Callable[[Optional[str], Any], Optional[tuple[Optional[str], Any]]]
|
|
85
|
-
) -> Any:
|
|
83
|
+
def __substitute_md_rec(md: Any, substitution_fn: Callable[[str | None, Any], tuple[str | None, Any] | None]) -> Any:
|
|
86
84
|
if isinstance(md, dict):
|
|
87
85
|
updated_dict: dict[str, Any] = {}
|
|
88
86
|
for k, v in md.items():
|
|
@@ -110,8 +108,8 @@ def __substitute_md_rec(
|
|
|
110
108
|
|
|
111
109
|
def convert_table_schema_version_md(
|
|
112
110
|
engine: sql.engine.Engine,
|
|
113
|
-
table_schema_version_md_updater:
|
|
114
|
-
schema_column_updater:
|
|
111
|
+
table_schema_version_md_updater: Callable[[dict], None] | None = None,
|
|
112
|
+
schema_column_updater: Callable[[dict], None] | None = None,
|
|
115
113
|
) -> None:
|
|
116
114
|
"""
|
|
117
115
|
Converts schema.TableSchemaVersionMd dicts based on the specified conversion functions.
|
|
@@ -150,7 +148,7 @@ def __update_schema_column(table_schema_version_md: dict, schema_column_updater:
|
|
|
150
148
|
|
|
151
149
|
|
|
152
150
|
def convert_table_version_record(
|
|
153
|
-
engine: sql.engine.Engine, table_version_record_updater:
|
|
151
|
+
engine: sql.engine.Engine, table_version_record_updater: Callable[[TableVersion], None] | None
|
|
154
152
|
) -> None:
|
|
155
153
|
with sql.orm.Session(engine, future=True) as session:
|
|
156
154
|
for record in session.query(TableVersion).all():
|
|
@@ -159,7 +157,7 @@ def convert_table_version_record(
|
|
|
159
157
|
|
|
160
158
|
|
|
161
159
|
def convert_table_schema_version_record(
|
|
162
|
-
engine: sql.engine.Engine, table_schema_version_record_updater:
|
|
160
|
+
engine: sql.engine.Engine, table_schema_version_record_updater: Callable[[TableSchemaVersion], None] | None
|
|
163
161
|
) -> None:
|
|
164
162
|
with sql.orm.Session(engine, future=True) as session:
|
|
165
163
|
for record in session.query(TableSchemaVersion).all():
|
pixeltable/metadata/schema.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import dataclasses
|
|
2
|
+
import types
|
|
2
3
|
import typing
|
|
3
4
|
import uuid
|
|
4
|
-
from typing import Any, NamedTuple,
|
|
5
|
+
from typing import Any, NamedTuple, TypeVar, Union, get_type_hints
|
|
5
6
|
|
|
6
7
|
import sqlalchemy as sql
|
|
7
8
|
from sqlalchemy import BigInteger, ForeignKey, Integer, LargeBinary, orm
|
|
@@ -29,8 +30,8 @@ def md_from_dict(data_class_type: type[T], data: Any) -> T:
|
|
|
29
30
|
origin = typing.get_origin(data_class_type)
|
|
30
31
|
if origin is not None:
|
|
31
32
|
type_args = typing.get_args(data_class_type)
|
|
32
|
-
if origin is Union and type(None) in type_args:
|
|
33
|
-
#
|
|
33
|
+
if (origin is Union or origin is types.UnionType) and type(None) in type_args:
|
|
34
|
+
# handling T | None, T | None
|
|
34
35
|
non_none_args = [arg for arg in type_args if arg is not type(None)]
|
|
35
36
|
assert len(non_none_args) == 1
|
|
36
37
|
return md_from_dict(non_none_args[0], data) if data is not None else None
|
|
@@ -74,7 +75,7 @@ class SystemInfo(Base):
|
|
|
74
75
|
@dataclasses.dataclass
|
|
75
76
|
class DirMd:
|
|
76
77
|
name: str
|
|
77
|
-
user:
|
|
78
|
+
user: str | None
|
|
78
79
|
additional_md: dict[str, Any]
|
|
79
80
|
|
|
80
81
|
|
|
@@ -103,20 +104,20 @@ class ColumnMd:
|
|
|
103
104
|
|
|
104
105
|
id: int
|
|
105
106
|
schema_version_add: int
|
|
106
|
-
schema_version_drop:
|
|
107
|
+
schema_version_drop: int | None
|
|
107
108
|
col_type: dict
|
|
108
109
|
|
|
109
110
|
# if True, is part of the primary key
|
|
110
111
|
is_pk: bool
|
|
111
112
|
|
|
112
113
|
# if set, this is a computed column
|
|
113
|
-
value_expr:
|
|
114
|
+
value_expr: dict | None
|
|
114
115
|
|
|
115
116
|
# if True, the column is present in the stored table
|
|
116
|
-
stored:
|
|
117
|
+
stored: bool | None
|
|
117
118
|
|
|
118
119
|
# If present, the URI for the destination for column values
|
|
119
|
-
destination:
|
|
120
|
+
destination: str | None = None
|
|
120
121
|
|
|
121
122
|
|
|
122
123
|
@dataclasses.dataclass
|
|
@@ -132,13 +133,13 @@ class IndexMd:
|
|
|
132
133
|
index_val_col_id: int # column holding the values to be indexed
|
|
133
134
|
index_val_undo_col_id: int # column holding index values for deleted rows
|
|
134
135
|
schema_version_add: int
|
|
135
|
-
schema_version_drop:
|
|
136
|
+
schema_version_drop: int | None
|
|
136
137
|
class_fqn: str
|
|
137
138
|
init_args: dict[str, Any]
|
|
138
139
|
|
|
139
140
|
|
|
140
141
|
# a stored table version path is a list of (table id as str, effective table version)
|
|
141
|
-
TableVersionPath = list[tuple[str,
|
|
142
|
+
TableVersionPath = list[tuple[str, int | None]]
|
|
142
143
|
|
|
143
144
|
|
|
144
145
|
@dataclasses.dataclass
|
|
@@ -150,16 +151,16 @@ class ViewMd:
|
|
|
150
151
|
base_versions: TableVersionPath
|
|
151
152
|
|
|
152
153
|
# filter predicate applied to the base table; view-only
|
|
153
|
-
predicate:
|
|
154
|
+
predicate: dict[str, Any] | None
|
|
154
155
|
|
|
155
156
|
# sampling predicate applied to the base table; view-only
|
|
156
|
-
sample_clause:
|
|
157
|
+
sample_clause: dict[str, Any] | None
|
|
157
158
|
|
|
158
159
|
# ComponentIterator subclass; only for component views
|
|
159
|
-
iterator_class_fqn:
|
|
160
|
+
iterator_class_fqn: str | None
|
|
160
161
|
|
|
161
162
|
# args to pass to the iterator class constructor; only for component views
|
|
162
|
-
iterator_args:
|
|
163
|
+
iterator_args: dict[str, Any] | None
|
|
163
164
|
|
|
164
165
|
|
|
165
166
|
@dataclasses.dataclass
|
|
@@ -168,7 +169,7 @@ class TableMd:
|
|
|
168
169
|
name: str
|
|
169
170
|
is_replica: bool
|
|
170
171
|
|
|
171
|
-
user:
|
|
172
|
+
user: str | None
|
|
172
173
|
|
|
173
174
|
# monotonically increasing w/in Table for both data and schema changes, starting at 0
|
|
174
175
|
current_version: int
|
|
@@ -194,7 +195,7 @@ class TableMd:
|
|
|
194
195
|
|
|
195
196
|
column_md: dict[int, ColumnMd] # col_id -> ColumnMd
|
|
196
197
|
index_md: dict[int, IndexMd] # index_id -> IndexMd
|
|
197
|
-
view_md:
|
|
198
|
+
view_md: ViewMd | None
|
|
198
199
|
additional_md: dict[str, Any]
|
|
199
200
|
|
|
200
201
|
has_pending_ops: bool = False
|
|
@@ -245,8 +246,8 @@ class TableVersionMd:
|
|
|
245
246
|
created_at: float # time.time()
|
|
246
247
|
version: int
|
|
247
248
|
schema_version: int
|
|
248
|
-
user:
|
|
249
|
-
update_status:
|
|
249
|
+
user: str | None = None # User that created this version
|
|
250
|
+
update_status: UpdateStatus | None = None # UpdateStatus of the change that created this version
|
|
250
251
|
# A version fragment cannot be queried or instantiated via get_table(). A fragment represents a version of a
|
|
251
252
|
# replica table that has incomplete data, and exists only to provide base table support for a dependent view.
|
|
252
253
|
is_fragment: bool = False
|
|
@@ -273,7 +274,7 @@ class SchemaColumn:
|
|
|
273
274
|
|
|
274
275
|
# media validation strategy of this particular media column; if not set, TableMd.media_validation applies
|
|
275
276
|
# stores column.MediaValiation.name.lower()
|
|
276
|
-
media_validation:
|
|
277
|
+
media_validation: str | None
|
|
277
278
|
|
|
278
279
|
|
|
279
280
|
@dataclasses.dataclass
|
|
@@ -284,7 +285,7 @@ class TableSchemaVersionMd:
|
|
|
284
285
|
|
|
285
286
|
tbl_id: str # uuid.UUID
|
|
286
287
|
schema_version: int
|
|
287
|
-
preceding_schema_version:
|
|
288
|
+
preceding_schema_version: int | None
|
|
288
289
|
columns: dict[int, SchemaColumn] # col_id -> SchemaColumn
|
|
289
290
|
num_retained_versions: int
|
|
290
291
|
comment: str
|
|
@@ -347,7 +348,7 @@ class Function(Base):
|
|
|
347
348
|
)
|
|
348
349
|
dir_id: orm.Mapped[uuid.UUID] = orm.mapped_column(UUID(as_uuid=True), ForeignKey('dirs.id'), nullable=True)
|
|
349
350
|
md: orm.Mapped[dict[str, Any]] = orm.mapped_column(JSONB, nullable=False) # FunctionMd
|
|
350
|
-
binary_obj: orm.Mapped[
|
|
351
|
+
binary_obj: orm.Mapped[bytes | None] = orm.mapped_column(LargeBinary, nullable=True)
|
|
351
352
|
|
|
352
353
|
|
|
353
354
|
class FullTableMd(NamedTuple):
|
pixeltable/metadata/utils.py
CHANGED
|
@@ -1,14 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Optional
|
|
4
|
-
|
|
5
3
|
from pixeltable.metadata import schema
|
|
6
4
|
|
|
7
5
|
|
|
8
6
|
class MetadataUtils:
|
|
9
7
|
@classmethod
|
|
10
8
|
def _diff_md(
|
|
11
|
-
cls, old_md:
|
|
9
|
+
cls, old_md: dict[int, schema.SchemaColumn] | None, new_md: dict[int, schema.SchemaColumn] | None
|
|
12
10
|
) -> str:
|
|
13
11
|
"""Return a string reporting the differences in a specific entry in two dictionaries
|
|
14
12
|
|
|
@@ -43,9 +41,7 @@ class MetadataUtils:
|
|
|
43
41
|
return r
|
|
44
42
|
|
|
45
43
|
@classmethod
|
|
46
|
-
def _create_md_change_dict(
|
|
47
|
-
cls, md_list: Optional[list[tuple[int, dict[int, schema.SchemaColumn]]]]
|
|
48
|
-
) -> dict[int, str]:
|
|
44
|
+
def _create_md_change_dict(cls, md_list: list[tuple[int, dict[int, schema.SchemaColumn]]] | None) -> dict[int, str]:
|
|
49
45
|
"""Return a dictionary of schema changes by version
|
|
50
46
|
Args:
|
|
51
47
|
md_list: a list of tuples, each containing a version number and a metadata dictionary.
|
pixeltable/mypy/mypy_plugin.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Callable, ClassVar
|
|
1
|
+
from typing import Callable, ClassVar
|
|
2
2
|
|
|
3
3
|
from mypy import nodes
|
|
4
4
|
from mypy.plugin import AnalyzeTypeContext, ClassDefContext, FunctionContext, MethodSigContext, Plugin
|
|
@@ -26,21 +26,21 @@ class PxtPlugin(Plugin):
|
|
|
26
26
|
}
|
|
27
27
|
__FULLNAME_MAP: ClassVar[dict] = {f'{k.__module__}.{k.__name__}': v for k, v in __TYPE_MAP.items()}
|
|
28
28
|
|
|
29
|
-
def get_function_hook(self, fullname: str) ->
|
|
29
|
+
def get_function_hook(self, fullname: str) -> Callable[[FunctionContext], Type] | None:
|
|
30
30
|
return adjust_uda_type
|
|
31
31
|
|
|
32
|
-
def get_type_analyze_hook(self, fullname: str) ->
|
|
32
|
+
def get_type_analyze_hook(self, fullname: str) -> Callable[[AnalyzeTypeContext], Type] | None:
|
|
33
33
|
if fullname in self.__FULLNAME_MAP:
|
|
34
34
|
subst_name = self.__FULLNAME_MAP[fullname]
|
|
35
35
|
return lambda ctx: adjust_pxt_type(ctx, subst_name)
|
|
36
36
|
return None
|
|
37
37
|
|
|
38
|
-
def get_method_signature_hook(self, fullname: str) ->
|
|
38
|
+
def get_method_signature_hook(self, fullname: str) -> Callable[[MethodSigContext], FunctionLike] | None:
|
|
39
39
|
if fullname in (self.__ADD_COLUMN_FULLNAME, self.__ADD_COMPUTED_COLUMN_FULLNAME):
|
|
40
40
|
return adjust_kwargs
|
|
41
41
|
return None
|
|
42
42
|
|
|
43
|
-
def get_class_decorator_hook_2(self, fullname: str) ->
|
|
43
|
+
def get_class_decorator_hook_2(self, fullname: str) -> Callable[[ClassDefContext], bool] | None:
|
|
44
44
|
if fullname == self.__UDA_FULLNAME:
|
|
45
45
|
return adjust_uda_methods
|
|
46
46
|
return None
|
pixeltable/plan.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import dataclasses
|
|
4
4
|
import enum
|
|
5
5
|
from textwrap import dedent
|
|
6
|
-
from typing import Any, Iterable, Literal,
|
|
6
|
+
from typing import Any, Iterable, Literal, Sequence, cast
|
|
7
7
|
from uuid import UUID
|
|
8
8
|
|
|
9
9
|
import pgvector.sqlalchemy # type: ignore[import-untyped]
|
|
@@ -66,7 +66,7 @@ class JoinClause:
|
|
|
66
66
|
"""Corresponds to a single 'JOIN ... ON (...)' clause in a SELECT statement; excludes the joined table."""
|
|
67
67
|
|
|
68
68
|
join_type: JoinType
|
|
69
|
-
join_predicate:
|
|
69
|
+
join_predicate: exprs.Expr | None # None for join_type == CROSS
|
|
70
70
|
|
|
71
71
|
|
|
72
72
|
@dataclasses.dataclass
|
|
@@ -86,12 +86,12 @@ class FromClause:
|
|
|
86
86
|
class SampleClause:
|
|
87
87
|
"""Defines a sampling clause for a table."""
|
|
88
88
|
|
|
89
|
-
version:
|
|
90
|
-
n:
|
|
91
|
-
n_per_stratum:
|
|
92
|
-
fraction:
|
|
93
|
-
seed:
|
|
94
|
-
stratify_exprs:
|
|
89
|
+
version: int | None
|
|
90
|
+
n: int | None
|
|
91
|
+
n_per_stratum: int | None
|
|
92
|
+
fraction: float | None
|
|
93
|
+
seed: int | None
|
|
94
|
+
stratify_exprs: list[exprs.Expr] | None
|
|
95
95
|
|
|
96
96
|
# The version of the hashing algorithm used for ordering and fractional sampling.
|
|
97
97
|
CURRENT_VERSION = 1
|
|
@@ -162,19 +162,19 @@ class Analyzer:
|
|
|
162
162
|
from_clause: FromClause
|
|
163
163
|
all_exprs: list[exprs.Expr] # union of all exprs, aside from sql_where_clause
|
|
164
164
|
select_list: list[exprs.Expr]
|
|
165
|
-
group_by_clause:
|
|
165
|
+
group_by_clause: list[exprs.Expr] | None # None for non-aggregate queries; [] for agg query w/o grouping
|
|
166
166
|
grouping_exprs: list[exprs.Expr] # [] for non-aggregate queries or agg query w/o grouping
|
|
167
167
|
order_by_clause: OrderByClause
|
|
168
168
|
stratify_exprs: list[exprs.Expr] # [] if no stratiifcation is required
|
|
169
|
-
sample_clause:
|
|
169
|
+
sample_clause: SampleClause | None # None if no sampling clause is present
|
|
170
170
|
|
|
171
171
|
sql_elements: exprs.SqlElementCache
|
|
172
172
|
|
|
173
173
|
# Where clause of the Select stmt of the SQL scan
|
|
174
|
-
sql_where_clause:
|
|
174
|
+
sql_where_clause: exprs.Expr | None
|
|
175
175
|
|
|
176
176
|
# filter predicate applied to output rows of the SQL scan
|
|
177
|
-
filter:
|
|
177
|
+
filter: exprs.Expr | None
|
|
178
178
|
|
|
179
179
|
agg_fn_calls: list[exprs.FunctionCall] # grouping aggregation (ie, not window functions)
|
|
180
180
|
window_fn_calls: list[exprs.FunctionCall]
|
|
@@ -184,10 +184,10 @@ class Analyzer:
|
|
|
184
184
|
self,
|
|
185
185
|
from_clause: FromClause,
|
|
186
186
|
select_list: Sequence[exprs.Expr],
|
|
187
|
-
where_clause:
|
|
188
|
-
group_by_clause:
|
|
189
|
-
order_by_clause:
|
|
190
|
-
sample_clause:
|
|
187
|
+
where_clause: exprs.Expr | None = None,
|
|
188
|
+
group_by_clause: list[exprs.Expr] | None = None,
|
|
189
|
+
order_by_clause: list[tuple[exprs.Expr, bool]] | None = None,
|
|
190
|
+
sample_clause: SampleClause | None = None,
|
|
191
191
|
):
|
|
192
192
|
if order_by_clause is None:
|
|
193
193
|
order_by_clause = []
|
|
@@ -330,7 +330,7 @@ class Analyzer:
|
|
|
330
330
|
row_builder.set_slot_idxs(self.agg_fn_calls)
|
|
331
331
|
row_builder.set_slot_idxs(self.agg_order_by)
|
|
332
332
|
|
|
333
|
-
def get_window_fn_ob_clause(self) ->
|
|
333
|
+
def get_window_fn_ob_clause(self) -> OrderByClause | None:
|
|
334
334
|
clause: list[OrderByClause] = []
|
|
335
335
|
for fn_call in self.window_fn_calls:
|
|
336
336
|
# window functions require ordering by the group_by/order_by clauses
|
|
@@ -348,7 +348,7 @@ class Analyzer:
|
|
|
348
348
|
class Planner:
|
|
349
349
|
# TODO: create an exec.CountNode and change this to create_count_plan()
|
|
350
350
|
@classmethod
|
|
351
|
-
def create_count_stmt(cls, tbl: catalog.TableVersionPath, where_clause:
|
|
351
|
+
def create_count_stmt(cls, tbl: catalog.TableVersionPath, where_clause: exprs.Expr | None = None) -> sql.Select:
|
|
352
352
|
stmt = sql.select(sql.func.count().label('all_count'))
|
|
353
353
|
refd_tbl_ids: set[UUID] = set()
|
|
354
354
|
if where_clause is not None:
|
|
@@ -406,7 +406,7 @@ class Planner:
|
|
|
406
406
|
return plan
|
|
407
407
|
|
|
408
408
|
@classmethod
|
|
409
|
-
def rowid_columns(cls, target: TableVersionHandle, num_rowid_cols:
|
|
409
|
+
def rowid_columns(cls, target: TableVersionHandle, num_rowid_cols: int | None = None) -> list[exprs.Expr]:
|
|
410
410
|
"""Return list of RowidRef for the given number of associated rowids"""
|
|
411
411
|
if num_rowid_cols is None:
|
|
412
412
|
num_rowid_cols = target.get().num_rowid_columns()
|
|
@@ -447,7 +447,7 @@ class Planner:
|
|
|
447
447
|
tbl: catalog.TableVersionPath,
|
|
448
448
|
update_targets: dict[catalog.Column, exprs.Expr],
|
|
449
449
|
recompute_targets: list[catalog.Column],
|
|
450
|
-
where_clause:
|
|
450
|
+
where_clause: exprs.Expr | None,
|
|
451
451
|
cascade: bool,
|
|
452
452
|
) -> tuple[exec.ExecNode, list[str], list[catalog.Column]]:
|
|
453
453
|
"""Creates a plan to materialize updated rows.
|
|
@@ -477,7 +477,8 @@ class Planner:
|
|
|
477
477
|
else:
|
|
478
478
|
recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else set()
|
|
479
479
|
# regardless of cascade, we need to update all indices on any updated/recomputed column
|
|
480
|
-
|
|
480
|
+
modified_base_cols = [c for c in set(updated_cols) | recomputed_cols if c.get_tbl().id == target.id]
|
|
481
|
+
idx_val_cols = target.get_idx_val_columns(modified_base_cols)
|
|
481
482
|
recomputed_cols.update(idx_val_cols)
|
|
482
483
|
# we only need to recompute stored columns (unstored ones are substituted away)
|
|
483
484
|
recomputed_cols = {c for c in recomputed_cols if c.is_stored}
|
|
@@ -487,7 +488,7 @@ class Planner:
|
|
|
487
488
|
# our query plan
|
|
488
489
|
# - evaluates the update targets and recomputed columns
|
|
489
490
|
# - copies all other stored columns
|
|
490
|
-
recomputed_base_cols = {col for col in recomputed_cols if col.
|
|
491
|
+
recomputed_base_cols = {col for col in recomputed_cols if col.get_tbl().id == tbl.tbl_version.id}
|
|
491
492
|
copied_cols = [
|
|
492
493
|
col
|
|
493
494
|
for col in target.cols_by_id.values()
|
|
@@ -522,7 +523,7 @@ class Planner:
|
|
|
522
523
|
plan = cls._add_save_node(plan)
|
|
523
524
|
|
|
524
525
|
recomputed_user_cols = [c for c in recomputed_cols if c.name is not None]
|
|
525
|
-
return plan, [f'{c.
|
|
526
|
+
return plan, [f'{c.get_tbl().name}.{c.name}' for c in updated_cols + recomputed_user_cols], recomputed_user_cols
|
|
526
527
|
|
|
527
528
|
@classmethod
|
|
528
529
|
def __check_valid_columns(
|
|
@@ -647,11 +648,12 @@ class Planner:
|
|
|
647
648
|
updated_cols = batch[0].keys() - target.primary_key_columns()
|
|
648
649
|
recomputed_cols = target.get_dependent_columns(updated_cols) if cascade else set()
|
|
649
650
|
# regardless of cascade, we need to update all indices on any updated column
|
|
650
|
-
|
|
651
|
+
modified_base_cols = [c for c in set(updated_cols) | recomputed_cols if c.get_tbl().id == target.id]
|
|
652
|
+
idx_val_cols = target.get_idx_val_columns(modified_base_cols)
|
|
651
653
|
recomputed_cols.update(idx_val_cols)
|
|
652
654
|
# we only need to recompute stored columns (unstored ones are substituted away)
|
|
653
655
|
recomputed_cols = {c for c in recomputed_cols if c.is_stored}
|
|
654
|
-
recomputed_base_cols = {col for col in recomputed_cols if col.
|
|
656
|
+
recomputed_base_cols = {col for col in recomputed_cols if col.get_tbl().id == target.id}
|
|
655
657
|
copied_cols = [
|
|
656
658
|
col
|
|
657
659
|
for col in target.cols_by_id.values()
|
|
@@ -844,7 +846,7 @@ class Planner:
|
|
|
844
846
|
raise excs.Error(f'Join predicate {join_clause.join_predicate} not expressible in SQL')
|
|
845
847
|
|
|
846
848
|
@classmethod
|
|
847
|
-
def _create_combined_ordering(cls, analyzer: Analyzer, verify_agg: bool) ->
|
|
849
|
+
def _create_combined_ordering(cls, analyzer: Analyzer, verify_agg: bool) -> OrderByClause | None:
|
|
848
850
|
"""Verify that the various ordering requirements don't conflict and return a combined ordering"""
|
|
849
851
|
ob_clauses: list[OrderByClause] = [analyzer.order_by_clause.copy()]
|
|
850
852
|
|
|
@@ -980,7 +982,7 @@ class Planner:
|
|
|
980
982
|
analyzer: Analyzer,
|
|
981
983
|
eval_ctx: exprs.RowBuilder.EvalCtx,
|
|
982
984
|
columns: list[catalog.Column] | None = None,
|
|
983
|
-
limit:
|
|
985
|
+
limit: exprs.Expr | None = None,
|
|
984
986
|
with_pk: bool = False,
|
|
985
987
|
exact_version_only: list[catalog.TableVersionHandle] | None = None,
|
|
986
988
|
) -> exec.ExecNode:
|
|
@@ -1057,7 +1059,7 @@ class Planner:
|
|
|
1057
1059
|
tbl,
|
|
1058
1060
|
row_builder,
|
|
1059
1061
|
select_list=tbl_scan_exprs,
|
|
1060
|
-
columns=[c for c in columns if c.
|
|
1062
|
+
columns=[c for c in columns if c.get_tbl().id == tbl.tbl_id],
|
|
1061
1063
|
set_pk=with_pk,
|
|
1062
1064
|
cell_md_col_refs=cls._cell_md_col_refs(tbl_scan_exprs),
|
|
1063
1065
|
exact_version_only=exact_version_only,
|
pixeltable/share/packager.py
CHANGED
|
@@ -7,7 +7,7 @@ import urllib.parse
|
|
|
7
7
|
import urllib.request
|
|
8
8
|
import uuid
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import Any, Iterator
|
|
10
|
+
from typing import Any, Iterator
|
|
11
11
|
from uuid import UUID
|
|
12
12
|
|
|
13
13
|
import more_itertools
|
|
@@ -57,7 +57,7 @@ class TablePackager:
|
|
|
57
57
|
preview_header: dict[str, str]
|
|
58
58
|
preview: list[list[Any]]
|
|
59
59
|
|
|
60
|
-
def __init__(self, table: catalog.Table, additional_md:
|
|
60
|
+
def __init__(self, table: catalog.Table, additional_md: dict[str, Any] | None = None) -> None:
|
|
61
61
|
self.table = table
|
|
62
62
|
self.tmp_dir = TempStore.create_path()
|
|
63
63
|
self.media_files = {}
|
|
@@ -342,11 +342,11 @@ class TablePackager:
|
|
|
342
342
|
scaled_img.save(buffer, 'webp')
|
|
343
343
|
return base64.b64encode(buffer.getvalue()).decode()
|
|
344
344
|
|
|
345
|
-
def __encode_video(self, video_path: str) ->
|
|
345
|
+
def __encode_video(self, video_path: str) -> str | None:
|
|
346
346
|
thumb = Formatter.extract_first_video_frame(video_path)
|
|
347
347
|
return self.__encode_image(thumb) if thumb is not None else None
|
|
348
348
|
|
|
349
|
-
def __encode_document(self, doc_path: str) ->
|
|
349
|
+
def __encode_document(self, doc_path: str) -> str | None:
|
|
350
350
|
thumb = Formatter.make_document_thumbnail(doc_path)
|
|
351
351
|
return self.__encode_image(thumb) if thumb is not None else None
|
|
352
352
|
|
|
@@ -364,11 +364,11 @@ class TableRestorer:
|
|
|
364
364
|
"""
|
|
365
365
|
|
|
366
366
|
tbl_path: str
|
|
367
|
-
md:
|
|
367
|
+
md: dict[str, Any] | None
|
|
368
368
|
tmp_dir: Path
|
|
369
369
|
media_files: dict[str, str] # Mapping from pxtmedia:// URLs to local file:// URLs
|
|
370
370
|
|
|
371
|
-
def __init__(self, tbl_path: str, md:
|
|
371
|
+
def __init__(self, tbl_path: str, md: dict[str, Any] | None = None) -> None:
|
|
372
372
|
self.tbl_path = tbl_path
|
|
373
373
|
self.md = md
|
|
374
374
|
self.tmp_dir = TempStore.create_path()
|
|
@@ -710,7 +710,7 @@ class TableRestorer:
|
|
|
710
710
|
self,
|
|
711
711
|
val: Any,
|
|
712
712
|
sql_type: sql.types.TypeEngine[Any],
|
|
713
|
-
col:
|
|
713
|
+
col: catalog.Column | None,
|
|
714
714
|
is_media_col: bool,
|
|
715
715
|
is_cellmd_col: bool,
|
|
716
716
|
) -> Any:
|
pixeltable/share/publish.py
CHANGED
|
@@ -3,7 +3,7 @@ import sys
|
|
|
3
3
|
import urllib.parse
|
|
4
4
|
import urllib.request
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Literal
|
|
6
|
+
from typing import Literal
|
|
7
7
|
|
|
8
8
|
import requests
|
|
9
9
|
from requests.adapters import HTTPAdapter
|
|
@@ -159,7 +159,7 @@ def _download_bundle_from_s3(parsed_location: urllib.parse.ParseResult, bundle_f
|
|
|
159
159
|
|
|
160
160
|
|
|
161
161
|
def _create_retry_session(
|
|
162
|
-
max_retries: int = 3, backoff_factor: float = 1.0, status_forcelist:
|
|
162
|
+
max_retries: int = 3, backoff_factor: float = 1.0, status_forcelist: list | None = None
|
|
163
163
|
) -> requests.Session:
|
|
164
164
|
"""Create a requests session with retry configuration"""
|
|
165
165
|
if status_forcelist is None:
|
|
@@ -222,7 +222,7 @@ def _upload_to_presigned_url(file_path: Path, url: str, max_retries: int = 3) ->
|
|
|
222
222
|
|
|
223
223
|
|
|
224
224
|
def _download_from_presigned_url(
|
|
225
|
-
url: str, output_path: Path, headers:
|
|
225
|
+
url: str, output_path: Path, headers: dict[str, str] | None = None, max_retries: int = 3
|
|
226
226
|
) -> None:
|
|
227
227
|
"""Download file with progress bar and retries"""
|
|
228
228
|
session = _create_retry_session(max_retries=max_retries)
|