pixeltable 0.4.18__py3-none-any.whl → 0.4.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pixeltable might be problematic. Click here for more details.
- pixeltable/__init__.py +1 -1
- pixeltable/_version.py +1 -0
- pixeltable/catalog/catalog.py +119 -100
- pixeltable/catalog/column.py +104 -115
- pixeltable/catalog/globals.py +1 -2
- pixeltable/catalog/insertable_table.py +44 -49
- pixeltable/catalog/path.py +3 -4
- pixeltable/catalog/schema_object.py +4 -4
- pixeltable/catalog/table.py +118 -122
- pixeltable/catalog/table_metadata.py +6 -6
- pixeltable/catalog/table_version.py +322 -257
- pixeltable/catalog/table_version_handle.py +4 -4
- pixeltable/catalog/table_version_path.py +9 -10
- pixeltable/catalog/tbl_ops.py +9 -3
- pixeltable/catalog/view.py +34 -28
- pixeltable/config.py +14 -10
- pixeltable/dataframe.py +68 -77
- pixeltable/env.py +74 -64
- pixeltable/exec/aggregation_node.py +6 -6
- pixeltable/exec/cache_prefetch_node.py +10 -10
- pixeltable/exec/data_row_batch.py +3 -3
- pixeltable/exec/exec_context.py +4 -5
- pixeltable/exec/exec_node.py +5 -5
- pixeltable/exec/expr_eval/evaluators.py +6 -6
- pixeltable/exec/expr_eval/expr_eval_node.py +8 -7
- pixeltable/exec/expr_eval/globals.py +6 -6
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +11 -11
- pixeltable/exec/in_memory_data_node.py +2 -2
- pixeltable/exec/object_store_save_node.py +14 -17
- pixeltable/exec/sql_node.py +25 -25
- pixeltable/exprs/arithmetic_expr.py +4 -4
- pixeltable/exprs/array_slice.py +2 -2
- pixeltable/exprs/column_property_ref.py +3 -3
- pixeltable/exprs/column_ref.py +61 -74
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +3 -3
- pixeltable/exprs/data_row.py +12 -12
- pixeltable/exprs/expr.py +41 -31
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +3 -3
- pixeltable/exprs/function_call.py +14 -14
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +8 -8
- pixeltable/exprs/is_null.py +1 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +6 -6
- pixeltable/exprs/literal.py +5 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +14 -14
- pixeltable/exprs/rowid_ref.py +8 -8
- pixeltable/exprs/similarity_expr.py +50 -25
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +2 -2
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +3 -3
- pixeltable/func/function.py +15 -17
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +2 -2
- pixeltable/func/query_template_function.py +16 -16
- pixeltable/func/signature.py +14 -14
- pixeltable/func/tools.py +11 -11
- pixeltable/func/udf.py +16 -18
- pixeltable/functions/__init__.py +1 -0
- pixeltable/functions/anthropic.py +7 -7
- pixeltable/functions/audio.py +76 -0
- pixeltable/functions/bedrock.py +6 -6
- pixeltable/functions/deepseek.py +4 -4
- pixeltable/functions/fireworks.py +2 -2
- pixeltable/functions/gemini.py +6 -6
- pixeltable/functions/globals.py +12 -12
- pixeltable/functions/groq.py +4 -4
- pixeltable/functions/huggingface.py +18 -20
- pixeltable/functions/image.py +7 -10
- pixeltable/functions/llama_cpp.py +7 -7
- pixeltable/functions/math.py +2 -3
- pixeltable/functions/mistralai.py +3 -3
- pixeltable/functions/ollama.py +9 -9
- pixeltable/functions/openai.py +21 -21
- pixeltable/functions/openrouter.py +7 -7
- pixeltable/functions/string.py +21 -28
- pixeltable/functions/timestamp.py +7 -8
- pixeltable/functions/together.py +4 -6
- pixeltable/functions/twelvelabs.py +92 -0
- pixeltable/functions/video.py +2 -24
- pixeltable/functions/vision.py +6 -6
- pixeltable/functions/whisper.py +7 -7
- pixeltable/functions/whisperx.py +16 -16
- pixeltable/globals.py +52 -36
- pixeltable/index/base.py +12 -8
- pixeltable/index/btree.py +19 -22
- pixeltable/index/embedding_index.py +30 -39
- pixeltable/io/datarows.py +3 -3
- pixeltable/io/external_store.py +13 -16
- pixeltable/io/fiftyone.py +5 -5
- pixeltable/io/globals.py +5 -5
- pixeltable/io/hf_datasets.py +4 -4
- pixeltable/io/label_studio.py +12 -12
- pixeltable/io/pandas.py +6 -6
- pixeltable/io/parquet.py +2 -2
- pixeltable/io/table_data_conduit.py +12 -12
- pixeltable/io/utils.py +2 -2
- pixeltable/iterators/audio.py +2 -2
- pixeltable/iterators/video.py +8 -13
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_36.py +2 -2
- pixeltable/metadata/converters/convert_38.py +2 -2
- pixeltable/metadata/converters/convert_39.py +1 -2
- pixeltable/metadata/converters/util.py +11 -13
- pixeltable/metadata/schema.py +22 -21
- pixeltable/metadata/utils.py +2 -6
- pixeltable/mypy/mypy_plugin.py +5 -5
- pixeltable/plan.py +30 -28
- pixeltable/share/packager.py +7 -7
- pixeltable/share/publish.py +3 -3
- pixeltable/store.py +125 -61
- pixeltable/type_system.py +43 -46
- pixeltable/utils/__init__.py +1 -2
- pixeltable/utils/arrow.py +4 -4
- pixeltable/utils/av.py +8 -0
- pixeltable/utils/azure_store.py +305 -0
- pixeltable/utils/code.py +1 -2
- pixeltable/utils/dbms.py +15 -19
- pixeltable/utils/description_helper.py +2 -3
- pixeltable/utils/documents.py +5 -6
- pixeltable/utils/exception_handler.py +2 -2
- pixeltable/utils/filecache.py +5 -5
- pixeltable/utils/formatter.py +4 -6
- pixeltable/utils/gcs_store.py +9 -9
- pixeltable/utils/local_store.py +17 -17
- pixeltable/utils/object_stores.py +59 -43
- pixeltable/utils/s3_store.py +35 -30
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/METADATA +1 -1
- pixeltable-0.4.19.dist-info/RECORD +213 -0
- pixeltable/__version__.py +0 -3
- pixeltable-0.4.18.dist-info/RECORD +0 -211
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/WHEEL +0 -0
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/entry_points.txt +0 -0
- {pixeltable-0.4.18.dist-info → pixeltable-0.4.19.dist-info}/licenses/LICENSE +0 -0
pixeltable/io/fiftyone.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import Any, Iterator
|
|
2
|
+
from typing import Any, Iterator
|
|
3
3
|
|
|
4
4
|
import fiftyone as fo # type: ignore[import-untyped]
|
|
5
5
|
import fiftyone.utils.data as foud # type: ignore[import-untyped]
|
|
@@ -20,7 +20,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
20
20
|
__image_format: str # format to use for any exported images that are not already stored on disk
|
|
21
21
|
__labels: dict[str, tuple[exprs.Expr, type[fo.Label]]] # label_name -> (expr, label_cls)
|
|
22
22
|
__image_idx: int # index of the image expr in the select list
|
|
23
|
-
__localpath_idx:
|
|
23
|
+
__localpath_idx: int | None # index of the image localpath in the select list, if present
|
|
24
24
|
__row_iter: Iterator[list] # iterator over the table rows, to be convered to FiftyOne samples
|
|
25
25
|
|
|
26
26
|
def __init__(
|
|
@@ -30,10 +30,10 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
30
30
|
image_format: str,
|
|
31
31
|
classifications: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
32
32
|
detections: exprs.Expr | list[exprs.Expr] | dict[str, exprs.Expr] | None = None,
|
|
33
|
-
dataset_dir:
|
|
33
|
+
dataset_dir: os.PathLike | None = None,
|
|
34
34
|
shuffle: bool = False,
|
|
35
35
|
seed: int | float | str | bytes | bytearray | None = None,
|
|
36
|
-
max_samples:
|
|
36
|
+
max_samples: int | None = None,
|
|
37
37
|
):
|
|
38
38
|
super().__init__(dataset_dir=dataset_dir, shuffle=shuffle, seed=seed, max_samples=max_samples)
|
|
39
39
|
|
|
@@ -90,7 +90,7 @@ class PxtImageDatasetImporter(foud.LabeledImageDatasetImporter):
|
|
|
90
90
|
df = tbl.select(*selection)
|
|
91
91
|
self.__row_iter = df._output_row_iterator()
|
|
92
92
|
|
|
93
|
-
def __next__(self) -> tuple[str,
|
|
93
|
+
def __next__(self) -> tuple[str, fo.ImageMetadata | None, dict[str, fo.Label] | None]:
|
|
94
94
|
row = next(self.__row_iter)
|
|
95
95
|
img = row[self.__image_idx]
|
|
96
96
|
assert isinstance(img, PIL.Image.Image)
|
pixeltable/io/globals.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import TYPE_CHECKING, Any, Literal
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
4
4
|
|
|
5
5
|
import pixeltable as pxt
|
|
6
6
|
import pixeltable.exceptions as excs
|
|
@@ -15,12 +15,12 @@ if TYPE_CHECKING:
|
|
|
15
15
|
def create_label_studio_project(
|
|
16
16
|
t: Table,
|
|
17
17
|
label_config: str,
|
|
18
|
-
name:
|
|
19
|
-
title:
|
|
18
|
+
name: str | None = None,
|
|
19
|
+
title: str | None = None,
|
|
20
20
|
media_import_method: Literal['post', 'file', 'url'] = 'post',
|
|
21
|
-
col_mapping:
|
|
21
|
+
col_mapping: dict[str, str] | None = None,
|
|
22
22
|
sync_immediately: bool = True,
|
|
23
|
-
s3_configuration:
|
|
23
|
+
s3_configuration: dict[str, Any] | None = None,
|
|
24
24
|
**kwargs: Any,
|
|
25
25
|
) -> UpdateStatus:
|
|
26
26
|
"""
|
pixeltable/io/hf_datasets.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any
|
|
5
5
|
|
|
6
6
|
import pixeltable as pxt
|
|
7
7
|
import pixeltable.type_system as ts
|
|
@@ -36,7 +36,7 @@ _hf_to_pxt: dict[str, ts.ColumnType] = {
|
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
def _to_pixeltable_type(feature_type: Any, nullable: bool) ->
|
|
39
|
+
def _to_pixeltable_type(feature_type: Any, nullable: bool) -> ts.ColumnType | None:
|
|
40
40
|
"""Convert a huggingface feature type to a pixeltable ColumnType if one is defined."""
|
|
41
41
|
import datasets
|
|
42
42
|
|
|
@@ -76,7 +76,7 @@ def _get_hf_schema(dataset: datasets.Dataset | datasets.DatasetDict) -> datasets
|
|
|
76
76
|
|
|
77
77
|
def huggingface_schema_to_pxt_schema(
|
|
78
78
|
hf_schema: datasets.Features, schema_overrides: dict[str, Any], primary_key: list[str]
|
|
79
|
-
) -> dict[str,
|
|
79
|
+
) -> dict[str, ts.ColumnType | None]:
|
|
80
80
|
"""Generate a pixeltable schema from a huggingface dataset schema.
|
|
81
81
|
Columns without a known mapping are mapped to None
|
|
82
82
|
"""
|
|
@@ -93,7 +93,7 @@ def import_huggingface_dataset(
|
|
|
93
93
|
table_path: str,
|
|
94
94
|
dataset: datasets.Dataset | datasets.DatasetDict,
|
|
95
95
|
*,
|
|
96
|
-
schema_overrides:
|
|
96
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
97
97
|
primary_key: str | list[str] | None = None,
|
|
98
98
|
**kwargs: Any,
|
|
99
99
|
) -> pxt.Table:
|
pixeltable/io/label_studio.py
CHANGED
|
@@ -4,7 +4,7 @@ import logging
|
|
|
4
4
|
import os
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Any, Iterator, Literal
|
|
7
|
+
from typing import Any, Iterator, Literal
|
|
8
8
|
from xml.etree import ElementTree as ET
|
|
9
9
|
|
|
10
10
|
import label_studio_sdk
|
|
@@ -53,7 +53,7 @@ class LabelStudioProject(Project):
|
|
|
53
53
|
|
|
54
54
|
project_id: int # Label Studio project ID
|
|
55
55
|
media_import_method: Literal['post', 'file', 'url']
|
|
56
|
-
_project:
|
|
56
|
+
_project: ls_project.Project | None
|
|
57
57
|
|
|
58
58
|
def __init__(
|
|
59
59
|
self,
|
|
@@ -61,7 +61,7 @@ class LabelStudioProject(Project):
|
|
|
61
61
|
project_id: int,
|
|
62
62
|
media_import_method: Literal['post', 'file', 'url'],
|
|
63
63
|
col_mapping: dict[ColumnHandle, str],
|
|
64
|
-
stored_proxies:
|
|
64
|
+
stored_proxies: dict[ColumnHandle, ColumnHandle] | None = None,
|
|
65
65
|
):
|
|
66
66
|
self.project_id = project_id
|
|
67
67
|
self.media_import_method = media_import_method
|
|
@@ -278,8 +278,8 @@ class LabelStudioProject(Project):
|
|
|
278
278
|
# columns. `rl_col_idxs` holds the indices for the columns that map to RectangleLabels
|
|
279
279
|
# preannotations; `data_col_idxs` holds the indices for the columns that map to data fields.
|
|
280
280
|
# We have to wait until we begin iterating to populate them, so they're initially `None`.
|
|
281
|
-
rl_col_idxs:
|
|
282
|
-
data_col_idxs:
|
|
281
|
+
rl_col_idxs: list[int] | None = None
|
|
282
|
+
data_col_idxs: list[int] | None = None
|
|
283
283
|
|
|
284
284
|
row_ids_in_pxt: set[tuple] = set()
|
|
285
285
|
tasks_created = 0
|
|
@@ -349,7 +349,7 @@ class LabelStudioProject(Project):
|
|
|
349
349
|
return sync_status
|
|
350
350
|
|
|
351
351
|
@classmethod
|
|
352
|
-
def __validate_fileurl(cls, col: Column, url: str) ->
|
|
352
|
+
def __validate_fileurl(cls, col: Column, url: str) -> str | None:
|
|
353
353
|
# Check that the URL is one that will be visible to Label Studio. If it isn't, log an info message
|
|
354
354
|
# to help users debug the issue.
|
|
355
355
|
if not (url.startswith('http://') or url.startswith('https://')):
|
|
@@ -497,7 +497,7 @@ class LabelStudioProject(Project):
|
|
|
497
497
|
|
|
498
498
|
@classmethod
|
|
499
499
|
def __coco_to_predictions(
|
|
500
|
-
cls, coco_annotations: dict[str, Any], from_name: str, rl_info: '_RectangleLabel', task_id:
|
|
500
|
+
cls, coco_annotations: dict[str, Any], from_name: str, rl_info: '_RectangleLabel', task_id: int | None = None
|
|
501
501
|
) -> dict[str, Any]:
|
|
502
502
|
width = coco_annotations['image']['width']
|
|
503
503
|
height = coco_annotations['image']['height']
|
|
@@ -549,11 +549,11 @@ class LabelStudioProject(Project):
|
|
|
549
549
|
cls,
|
|
550
550
|
t: Table,
|
|
551
551
|
label_config: str,
|
|
552
|
-
name:
|
|
553
|
-
title:
|
|
552
|
+
name: str | None,
|
|
553
|
+
title: str | None,
|
|
554
554
|
media_import_method: Literal['post', 'file', 'url'],
|
|
555
|
-
col_mapping:
|
|
556
|
-
s3_configuration:
|
|
555
|
+
col_mapping: dict[str, str] | None,
|
|
556
|
+
s3_configuration: dict[str, Any] | None,
|
|
557
557
|
**kwargs: Any,
|
|
558
558
|
) -> 'LabelStudioProject':
|
|
559
559
|
"""
|
|
@@ -652,7 +652,7 @@ class LabelStudioProject(Project):
|
|
|
652
652
|
|
|
653
653
|
@dataclass(frozen=True)
|
|
654
654
|
class _DataKey:
|
|
655
|
-
name:
|
|
655
|
+
name: str | None # The 'name' attribute of the data key; may differ from the field name
|
|
656
656
|
column_type: ts.ColumnType
|
|
657
657
|
|
|
658
658
|
|
pixeltable/io/pandas.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import pandas as pd
|
|
@@ -16,7 +16,7 @@ def import_pandas(
|
|
|
16
16
|
tbl_name: str,
|
|
17
17
|
df: pd.DataFrame,
|
|
18
18
|
*,
|
|
19
|
-
schema_overrides:
|
|
19
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
20
20
|
primary_key: str | list[str] | None = None,
|
|
21
21
|
num_retained_versions: int = 10,
|
|
22
22
|
comment: str = '',
|
|
@@ -56,7 +56,7 @@ def import_pandas(
|
|
|
56
56
|
def import_csv(
|
|
57
57
|
tbl_name: str,
|
|
58
58
|
filepath_or_buffer: str | os.PathLike,
|
|
59
|
-
schema_overrides:
|
|
59
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
60
60
|
primary_key: str | list[str] | None = None,
|
|
61
61
|
num_retained_versions: int = 10,
|
|
62
62
|
comment: str = '',
|
|
@@ -86,7 +86,7 @@ def import_excel(
|
|
|
86
86
|
tbl_name: str,
|
|
87
87
|
io: str | os.PathLike,
|
|
88
88
|
*,
|
|
89
|
-
schema_overrides:
|
|
89
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
90
90
|
primary_key: str | list[str] | None = None,
|
|
91
91
|
num_retained_versions: int = 10,
|
|
92
92
|
comment: str = '',
|
|
@@ -141,7 +141,7 @@ def df_infer_schema(
|
|
|
141
141
|
return pd_schema
|
|
142
142
|
|
|
143
143
|
|
|
144
|
-
def __pd_dtype_to_pxt_type(pd_dtype: DtypeObj, nullable: bool) ->
|
|
144
|
+
def __pd_dtype_to_pxt_type(pd_dtype: DtypeObj, nullable: bool) -> ts.ColumnType | None:
|
|
145
145
|
"""
|
|
146
146
|
Determines a pixeltable ColumnType from a pandas dtype
|
|
147
147
|
|
|
@@ -192,7 +192,7 @@ def __pd_coltype_to_pxt_type(pd_dtype: DtypeObj, data_col: pd.Series, nullable:
|
|
|
192
192
|
|
|
193
193
|
|
|
194
194
|
def _df_row_to_pxt_row(
|
|
195
|
-
row: tuple[Any, ...], schema: dict[str, ts.ColumnType], col_mapping:
|
|
195
|
+
row: tuple[Any, ...], schema: dict[str, ts.ColumnType], col_mapping: dict[str, str] | None
|
|
196
196
|
) -> dict[str, Any]:
|
|
197
197
|
"""Convert a row to insertable format"""
|
|
198
198
|
pxt_row: dict[str, Any] = {}
|
pixeltable/io/parquet.py
CHANGED
|
@@ -4,7 +4,7 @@ import json
|
|
|
4
4
|
import logging
|
|
5
5
|
import typing
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Any
|
|
7
|
+
from typing import Any
|
|
8
8
|
|
|
9
9
|
import pixeltable as pxt
|
|
10
10
|
import pixeltable.exceptions as excs
|
|
@@ -71,7 +71,7 @@ def import_parquet(
|
|
|
71
71
|
table: str,
|
|
72
72
|
*,
|
|
73
73
|
parquet_path: str,
|
|
74
|
-
schema_overrides:
|
|
74
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
75
75
|
primary_key: str | list[str] | None = None,
|
|
76
76
|
**kwargs: Any,
|
|
77
77
|
) -> pxt.Table:
|
|
@@ -8,7 +8,7 @@ import urllib.parse
|
|
|
8
8
|
import urllib.request
|
|
9
9
|
from dataclasses import dataclass, field, fields
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal,
|
|
11
|
+
from typing import TYPE_CHECKING, Any, Iterable, Iterator, Literal, cast
|
|
12
12
|
|
|
13
13
|
import numpy as np
|
|
14
14
|
import pandas as pd
|
|
@@ -50,15 +50,15 @@ class TableDataConduitFormat(str, enum.Enum):
|
|
|
50
50
|
@dataclass
|
|
51
51
|
class TableDataConduit:
|
|
52
52
|
source: 'TableDataSource'
|
|
53
|
-
source_format:
|
|
54
|
-
source_column_map:
|
|
53
|
+
source_format: str | None = None
|
|
54
|
+
source_column_map: dict[str, str] | None = None
|
|
55
55
|
if_row_exists: Literal['update', 'ignore', 'error'] = 'error'
|
|
56
|
-
pxt_schema:
|
|
57
|
-
src_schema_overrides:
|
|
58
|
-
src_schema:
|
|
59
|
-
pxt_pk:
|
|
60
|
-
src_pk:
|
|
61
|
-
valid_rows:
|
|
56
|
+
pxt_schema: dict[str, ts.ColumnType] | None = None
|
|
57
|
+
src_schema_overrides: dict[str, ts.ColumnType] | None = None
|
|
58
|
+
src_schema: dict[str, ts.ColumnType] | None = None
|
|
59
|
+
pxt_pk: list[str] | None = None
|
|
60
|
+
src_pk: list[str] | None = None
|
|
61
|
+
valid_rows: RowData | None = None
|
|
62
62
|
extra_fields: dict[str, Any] = field(default_factory=dict)
|
|
63
63
|
|
|
64
64
|
reqd_col_names: set[str] = field(default_factory=set)
|
|
@@ -151,7 +151,7 @@ class DFTableDataConduit(TableDataConduit):
|
|
|
151
151
|
|
|
152
152
|
|
|
153
153
|
class RowDataTableDataConduit(TableDataConduit):
|
|
154
|
-
raw_rows:
|
|
154
|
+
raw_rows: RowData | None = None
|
|
155
155
|
disable_mapping: bool = True
|
|
156
156
|
batch_count: int = 0
|
|
157
157
|
|
|
@@ -332,7 +332,7 @@ class HFTableDataConduit(TableDataConduit):
|
|
|
332
332
|
- use set_format('arrow') and convert ChunkedArrays to PIL.Image.Image instead of going through numpy, which is slow
|
|
333
333
|
"""
|
|
334
334
|
|
|
335
|
-
column_name_for_split:
|
|
335
|
+
column_name_for_split: str | None = None
|
|
336
336
|
categorical_features: dict[str, dict[int, str]]
|
|
337
337
|
dataset_dict: dict[str, datasets.Dataset] = None
|
|
338
338
|
hf_schema_source: dict[str, Any] = None
|
|
@@ -478,7 +478,7 @@ class HFTableDataConduit(TableDataConduit):
|
|
|
478
478
|
|
|
479
479
|
|
|
480
480
|
class ParquetTableDataConduit(TableDataConduit):
|
|
481
|
-
pq_ds:
|
|
481
|
+
pq_ds: ParquetDataset | None = None
|
|
482
482
|
|
|
483
483
|
@classmethod
|
|
484
484
|
def from_tds(cls, tds: TableDataConduit) -> 'ParquetTableDataConduit':
|
pixeltable/io/utils.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from keyword import iskeyword as is_python_keyword
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
|
|
4
4
|
import pixeltable as pxt
|
|
5
5
|
import pixeltable.exceptions as excs
|
|
@@ -40,7 +40,7 @@ def normalize_schema_names(
|
|
|
40
40
|
primary_key: list[str],
|
|
41
41
|
schema_overrides: dict[str, Any],
|
|
42
42
|
require_valid_pxt_column_names: bool = False,
|
|
43
|
-
) -> tuple[dict[str, Any], list[str],
|
|
43
|
+
) -> tuple[dict[str, Any], list[str], dict[str, str] | None]:
|
|
44
44
|
"""
|
|
45
45
|
Convert all names in the input schema from source names to valid Pixeltable identifiers
|
|
46
46
|
- Ensure that all names are unique.
|
pixeltable/iterators/audio.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from fractions import Fraction
|
|
3
3
|
from pathlib import Path
|
|
4
|
-
from typing import Any, ClassVar
|
|
4
|
+
from typing import Any, ClassVar
|
|
5
5
|
|
|
6
6
|
import av
|
|
7
7
|
|
|
@@ -37,7 +37,7 @@ class AudioSplitter(ComponentIterator):
|
|
|
37
37
|
|
|
38
38
|
# List of chunks to extract
|
|
39
39
|
# Each chunk is defined by start and end presentation timestamps in audio file (int)
|
|
40
|
-
chunks_to_extract_in_pts:
|
|
40
|
+
chunks_to_extract_in_pts: list[tuple[int, int]] | None
|
|
41
41
|
# next chunk to extract
|
|
42
42
|
next_pos: int
|
|
43
43
|
|
pixeltable/iterators/video.py
CHANGED
|
@@ -4,7 +4,7 @@ import math
|
|
|
4
4
|
import subprocess
|
|
5
5
|
from fractions import Fraction
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Any, Iterator, Literal
|
|
7
|
+
from typing import Any, Iterator, Literal
|
|
8
8
|
|
|
9
9
|
import av
|
|
10
10
|
import pandas as pd
|
|
@@ -42,9 +42,9 @@ class FrameIterator(ComponentIterator):
|
|
|
42
42
|
[Frame](https://pyav.org/docs/develop/api/frame.html)):
|
|
43
43
|
|
|
44
44
|
* `index` (`int`)
|
|
45
|
-
* `pts` (`
|
|
46
|
-
* `dts` (`
|
|
47
|
-
* `time` (`
|
|
45
|
+
* `pts` (`int | None`)
|
|
46
|
+
* `dts` (`int | None`)
|
|
47
|
+
* `time` (`float | None`)
|
|
48
48
|
* `is_corrupt` (`bool`)
|
|
49
49
|
* `key_frame` (`bool`)
|
|
50
50
|
* `pict_type` (`int`)
|
|
@@ -55,8 +55,8 @@ class FrameIterator(ComponentIterator):
|
|
|
55
55
|
|
|
56
56
|
# Input parameters
|
|
57
57
|
video_path: Path
|
|
58
|
-
fps:
|
|
59
|
-
num_frames:
|
|
58
|
+
fps: float | None
|
|
59
|
+
num_frames: int | None
|
|
60
60
|
all_frame_attrs: bool
|
|
61
61
|
|
|
62
62
|
# Video info
|
|
@@ -67,19 +67,14 @@ class FrameIterator(ComponentIterator):
|
|
|
67
67
|
video_start_time: int
|
|
68
68
|
|
|
69
69
|
# List of frame indices to be extracted, or None to extract all frames
|
|
70
|
-
frames_to_extract:
|
|
70
|
+
frames_to_extract: list[int] | None
|
|
71
71
|
|
|
72
72
|
# Next frame to extract, as an iterator `pos` index. If `frames_to_extract` is None, this is the same as the
|
|
73
73
|
# frame index in the video. Otherwise, the corresponding video index is `frames_to_extract[next_pos]`.
|
|
74
74
|
next_pos: int
|
|
75
75
|
|
|
76
76
|
def __init__(
|
|
77
|
-
self,
|
|
78
|
-
video: str,
|
|
79
|
-
*,
|
|
80
|
-
fps: Optional[float] = None,
|
|
81
|
-
num_frames: Optional[int] = None,
|
|
82
|
-
all_frame_attrs: bool = False,
|
|
77
|
+
self, video: str, *, fps: float | None = None, num_frames: int | None = None, all_frame_attrs: bool = False
|
|
83
78
|
):
|
|
84
79
|
if fps is not None and num_frames is not None:
|
|
85
80
|
raise excs.Error('At most one of `fps` or `num_frames` may be specified')
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
# Migrate a few changed function names
|
|
16
16
|
if k == 'path' and v == 'pixeltable.functions.string.str_format':
|
|
17
17
|
return 'path', 'pixeltable.functions.string.format'
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import datetime
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
|
|
4
4
|
import sqlalchemy as sql
|
|
5
5
|
|
|
@@ -28,7 +28,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
28
28
|
conn.execute(sql.text(f'ALTER TABLE {store_name} ALTER COLUMN col_{col_id} TYPE TIMESTAMPTZ'))
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
def __update_timestamp_literals(k: Any, v: Any) ->
|
|
31
|
+
def __update_timestamp_literals(k: Any, v: Any) -> tuple[Any, Any] | None:
|
|
32
32
|
if isinstance(v, dict) and 'val_t' in v:
|
|
33
33
|
# It's a literal with an explicit 'val_t' field. In version 19 this can only mean a
|
|
34
34
|
# timestamp literal, which (in version 19) is stored in the DB as a naive datetime.
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
if isinstance(v, dict) and '_classname' in v:
|
|
16
16
|
# The way InlineArray is represented changed in v20. Previously, literal values were stored
|
|
17
17
|
# directly in the Inline expr; now we store them in Literal sub-exprs. This converter
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -24,7 +24,7 @@ def __update_schema_column(schema_column: dict) -> None:
|
|
|
24
24
|
schema_column['media_validation'] = None
|
|
25
25
|
|
|
26
26
|
|
|
27
|
-
def __substitute_md(k:
|
|
27
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
28
28
|
if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
|
|
29
29
|
if 'perform_validation' not in v:
|
|
30
30
|
v['perform_validation'] = False
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'DataFrame':
|
|
16
16
|
v['from_clause'] = {'tbls': [v['tbl']], 'join_clauses': []}
|
|
17
17
|
return k, v
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
from pixeltable import func
|
|
16
16
|
from pixeltable.func.globals import resolve_symbol
|
|
17
17
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
if k == 'path' and (
|
|
16
16
|
v in ('pixeltable.functions.huggingface.clip_text', 'pixeltable.functions.huggingface.clip_image')
|
|
17
17
|
):
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
import pixeltable.type_system as ts
|
|
16
16
|
from pixeltable.exprs.literal import Literal
|
|
17
17
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -12,7 +12,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
12
12
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
def __substitute_md(k:
|
|
15
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
16
16
|
# Defaults are now stored as literals in signatures
|
|
17
17
|
if k == 'parameters':
|
|
18
18
|
for param in v:
|
|
@@ -55,8 +55,8 @@ def __substitute_md(k: Optional[str], v: Any) -> Optional[tuple[Optional[str], A
|
|
|
55
55
|
# We need to expand ("unroll") any var-args or var-kwargs.
|
|
56
56
|
|
|
57
57
|
new_args_len = len(new_args)
|
|
58
|
-
rolled_args:
|
|
59
|
-
rolled_kwargs:
|
|
58
|
+
rolled_args: dict | None = None
|
|
59
|
+
rolled_kwargs: dict | None = None
|
|
60
60
|
|
|
61
61
|
if 'signature' in v['fn']:
|
|
62
62
|
# If it's a pickled function, there's no signature, so we're out of luck; varargs in a pickled function
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
if isinstance(v, dict) and '_classname' in v and v['_classname'] == 'ColumnRef':
|
|
16
16
|
# Add reference_tbl to ColumnRef; for historical metadata it is always equal to tbl
|
|
17
17
|
assert 'reference_tbl' not in v
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Any
|
|
2
|
+
from typing import Any
|
|
3
3
|
from uuid import UUID
|
|
4
4
|
|
|
5
5
|
import sqlalchemy as sql
|
|
@@ -30,7 +30,7 @@ def __update_table_md(table_md: dict, table_id: UUID) -> None:
|
|
|
30
30
|
_logger.info(f'Updating view metadata for table: {table_id}')
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
def __substitute_md(k:
|
|
33
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
34
34
|
if isinstance(v, dict) and (v.get('_classname') == 'DataFrame'):
|
|
35
35
|
if 'sample_clause' not in v:
|
|
36
36
|
v['sample_clause'] = None
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
import sqlalchemy as sql
|
|
4
4
|
|
|
@@ -11,7 +11,7 @@ def _(engine: sql.engine.Engine) -> None:
|
|
|
11
11
|
convert_table_md(engine, substitution_fn=__substitute_md)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
def __substitute_md(k:
|
|
14
|
+
def __substitute_md(k: str | None, v: Any) -> tuple[str | None, Any] | None:
|
|
15
15
|
if k == 'col_mapping':
|
|
16
16
|
assert isinstance(v, list)
|
|
17
17
|
return k, [__col_mapping_entry(e) for e in v]
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Optional
|
|
3
2
|
from uuid import UUID
|
|
4
3
|
|
|
5
4
|
import sqlalchemy as sql
|
|
@@ -69,7 +68,7 @@ def find_error_columns(conn: sql.Connection, store_name: str) -> list[str]:
|
|
|
69
68
|
|
|
70
69
|
|
|
71
70
|
def migrate_error_to_cellmd_columns(
|
|
72
|
-
conn: sql.Connection, store_name: str, col_names: list[str], backup_table:
|
|
71
|
+
conn: sql.Connection, store_name: str, col_names: list[str], backup_table: str | None = None
|
|
73
72
|
) -> None:
|
|
74
73
|
"""
|
|
75
74
|
Safe version with error handling and optional backup.
|