pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/io/hf_datasets.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import typing
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any
|
|
5
5
|
|
|
6
6
|
import pixeltable as pxt
|
|
7
7
|
import pixeltable.type_system as ts
|
|
@@ -31,12 +31,13 @@ _hf_to_pxt: dict[str, ts.ColumnType] = {
|
|
|
31
31
|
'timestamp[s]': ts.TimestampType(nullable=True),
|
|
32
32
|
'timestamp[ms]': ts.TimestampType(nullable=True), # HF dataset iterator converts timestamps to datetime.datetime
|
|
33
33
|
'timestamp[us]': ts.TimestampType(nullable=True),
|
|
34
|
+
'timestamp[ns]': ts.TimestampType(nullable=True),
|
|
34
35
|
'date32': ts.DateType(nullable=True),
|
|
35
36
|
'date64': ts.DateType(nullable=True),
|
|
36
37
|
}
|
|
37
38
|
|
|
38
39
|
|
|
39
|
-
def _to_pixeltable_type(feature_type: Any, nullable: bool) ->
|
|
40
|
+
def _to_pixeltable_type(feature_type: Any, nullable: bool) -> ts.ColumnType | None:
|
|
40
41
|
"""Convert a huggingface feature type to a pixeltable ColumnType if one is defined."""
|
|
41
42
|
import datasets
|
|
42
43
|
|
|
@@ -47,28 +48,51 @@ def _to_pixeltable_type(feature_type: Any, nullable: bool) -> Optional[ts.Column
|
|
|
47
48
|
# example: Value(dtype='int64', id=None)
|
|
48
49
|
pt = _hf_to_pxt.get(feature_type.dtype, None)
|
|
49
50
|
return pt.copy(nullable=nullable) if pt is not None else None
|
|
50
|
-
elif isinstance(feature_type, datasets.Sequence):
|
|
51
|
+
elif isinstance(feature_type, (datasets.Sequence, datasets.LargeList)):
|
|
51
52
|
# example: cohere wiki. Sequence(feature=Value(dtype='float32', id=None), length=-1, id=None)
|
|
52
53
|
dtype = _to_pixeltable_type(feature_type.feature, nullable)
|
|
53
|
-
|
|
54
|
-
|
|
54
|
+
if dtype is None:
|
|
55
|
+
return None
|
|
56
|
+
if dtype.is_int_type() or dtype.is_float_type() or dtype.is_bool_type() or dtype.is_string_type():
|
|
57
|
+
length = feature_type.length if feature_type.length != -1 else None
|
|
58
|
+
return ts.ArrayType(shape=(length,), dtype=dtype, nullable=nullable)
|
|
59
|
+
else:
|
|
60
|
+
# Sequence of dicts must be cast as Json
|
|
61
|
+
return ts.JsonType(nullable=nullable)
|
|
55
62
|
elif isinstance(feature_type, datasets.Image):
|
|
56
63
|
return ts.ImageType(nullable=nullable)
|
|
64
|
+
elif isinstance(feature_type, datasets.Audio):
|
|
65
|
+
return ts.AudioType(nullable=nullable)
|
|
66
|
+
elif isinstance(feature_type, datasets.Video):
|
|
67
|
+
return ts.VideoType(nullable=nullable)
|
|
68
|
+
elif isinstance(feature_type, (datasets.Array2D, datasets.Array3D, datasets.Array4D, datasets.Array5D)):
|
|
69
|
+
# Multi-dimensional arrays with fixed shape and dtype
|
|
70
|
+
inner_dtype = _hf_to_pxt.get(feature_type.dtype, None)
|
|
71
|
+
if inner_dtype is None:
|
|
72
|
+
return None
|
|
73
|
+
return ts.ArrayType(shape=feature_type.shape, dtype=inner_dtype, nullable=nullable)
|
|
74
|
+
elif isinstance(feature_type, (datasets.Translation, datasets.TranslationVariableLanguages)):
|
|
75
|
+
# Translation types are dict-like structures
|
|
76
|
+
return ts.JsonType(nullable=nullable)
|
|
77
|
+
elif isinstance(feature_type, (list, dict)):
|
|
78
|
+
return ts.JsonType(nullable=nullable)
|
|
57
79
|
else:
|
|
58
80
|
return None
|
|
59
81
|
|
|
60
82
|
|
|
61
|
-
def _get_hf_schema(dataset:
|
|
83
|
+
def _get_hf_schema(dataset: datasets.Dataset | datasets.DatasetDict) -> datasets.Features:
|
|
62
84
|
"""Get the schema of a huggingface dataset as a dictionary."""
|
|
63
85
|
import datasets
|
|
64
86
|
|
|
65
|
-
first_dataset =
|
|
87
|
+
first_dataset = (
|
|
88
|
+
dataset if isinstance(dataset, (datasets.Dataset, datasets.IterableDataset)) else next(iter(dataset.values()))
|
|
89
|
+
)
|
|
66
90
|
return first_dataset.features
|
|
67
91
|
|
|
68
92
|
|
|
69
93
|
def huggingface_schema_to_pxt_schema(
|
|
70
94
|
hf_schema: datasets.Features, schema_overrides: dict[str, Any], primary_key: list[str]
|
|
71
|
-
) -> dict[str,
|
|
95
|
+
) -> dict[str, ts.ColumnType | None]:
|
|
72
96
|
"""Generate a pixeltable schema from a huggingface dataset schema.
|
|
73
97
|
Columns without a known mapping are mapped to None
|
|
74
98
|
"""
|
|
@@ -83,20 +107,41 @@ def huggingface_schema_to_pxt_schema(
|
|
|
83
107
|
|
|
84
108
|
def import_huggingface_dataset(
|
|
85
109
|
table_path: str,
|
|
86
|
-
dataset:
|
|
110
|
+
dataset: datasets.Dataset | datasets.DatasetDict | datasets.IterableDataset | datasets.IterableDatasetDict,
|
|
87
111
|
*,
|
|
88
|
-
schema_overrides:
|
|
89
|
-
primary_key:
|
|
112
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
113
|
+
primary_key: str | list[str] | None = None,
|
|
90
114
|
**kwargs: Any,
|
|
91
115
|
) -> pxt.Table:
|
|
92
|
-
"""
|
|
93
|
-
|
|
116
|
+
"""
|
|
117
|
+
Create a new base table from a Huggingface dataset, or dataset dict with multiple splits.
|
|
118
|
+
Requires `datasets` library to be installed.
|
|
119
|
+
|
|
120
|
+
HuggingFace feature types are mapped to Pixeltable column types as follows:
|
|
121
|
+
|
|
122
|
+
- `Value(bool)`: `Bool`<br/>
|
|
123
|
+
`Value(int*/uint*)`: `Int`<br/>
|
|
124
|
+
`Value(float*)`: `Float`<br/>
|
|
125
|
+
`Value(string/large_string)`: `String`<br/>
|
|
126
|
+
`Value(timestamp*)`: `Timestamp`<br/>
|
|
127
|
+
`Value(date*)`: `Date`
|
|
128
|
+
- `ClassLabel`: `String` (converted to label names)
|
|
129
|
+
- `Sequence`/`LargeList` of numeric types: `Array`
|
|
130
|
+
- `Sequence`/`LargeList` of string: `Json`
|
|
131
|
+
- `Sequence`/`LargeList` of dicts: `Json`
|
|
132
|
+
- `Array2D`-`Array5D`: `Array` (preserves shape)
|
|
133
|
+
- `Image`: `Image`
|
|
134
|
+
- `Audio`: `Audio`
|
|
135
|
+
- `Video`: `Video`
|
|
136
|
+
- `Translation`/`TranslationVariableLanguages`: `Json`
|
|
94
137
|
|
|
95
138
|
Args:
|
|
96
139
|
table_path: Path to the table.
|
|
97
|
-
dataset: Huggingface
|
|
98
|
-
|
|
99
|
-
|
|
140
|
+
dataset: An instance of any of the Huggingface dataset classes:
|
|
141
|
+
[`datasets.Dataset`](https://huggingface.co/docs/datasets/en/package_reference/main_classes#datasets.Dataset),
|
|
142
|
+
[`datasets.DatasetDict`](https://huggingface.co/docs/datasets/en/package_reference/main_classes#datasets.DatasetDict),
|
|
143
|
+
[`datasets.IterableDataset`](https://huggingface.co/docs/datasets/en/package_reference/main_classes#datasets.IterableDataset),
|
|
144
|
+
[`datasets.IterableDatasetDict`](https://huggingface.co/docs/datasets/en/package_reference/main_classes#datasets.IterableDatasetDict)
|
|
100
145
|
schema_overrides: If specified, then for each (name, type) pair in `schema_overrides`, the column with
|
|
101
146
|
name `name` will be given type `type`, instead of being inferred from the `Dataset` or `DatasetDict`.
|
|
102
147
|
The keys in `schema_overrides` should be the column names of the `Dataset` or `DatasetDict` (whether or not
|
pixeltable/io/label_studio.py
CHANGED
|
@@ -4,19 +4,22 @@ import logging
|
|
|
4
4
|
import os
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
from typing import Any, Iterator, Literal
|
|
7
|
+
from typing import Any, Iterator, Literal
|
|
8
8
|
from xml.etree import ElementTree as ET
|
|
9
9
|
|
|
10
|
-
import label_studio_sdk
|
|
10
|
+
import label_studio_sdk
|
|
11
11
|
import PIL.Image
|
|
12
12
|
from requests.exceptions import HTTPError
|
|
13
13
|
|
|
14
14
|
import pixeltable.type_system as ts
|
|
15
15
|
from pixeltable import Column, Table, env, exceptions as excs
|
|
16
|
+
from pixeltable.catalog import ColumnHandle
|
|
17
|
+
from pixeltable.catalog.update_status import RowCountStats, UpdateStatus
|
|
16
18
|
from pixeltable.config import Config
|
|
17
19
|
from pixeltable.exprs import ColumnRef, DataRow, Expr
|
|
18
|
-
from pixeltable.io.external_store import Project
|
|
20
|
+
from pixeltable.io.external_store import Project
|
|
19
21
|
from pixeltable.utils import coco
|
|
22
|
+
from pixeltable.utils.local_store import TempStore
|
|
20
23
|
|
|
21
24
|
# label_studio_sdk>=1 and label_studio_sdk<1 are not compatible, so we need to try
|
|
22
25
|
# the import two different ways to insure intercompatibility
|
|
@@ -25,7 +28,7 @@ try:
|
|
|
25
28
|
import label_studio_sdk.project as ls_project # type: ignore
|
|
26
29
|
except ImportError:
|
|
27
30
|
# label_studio_sdk>=1 compatibility
|
|
28
|
-
import label_studio_sdk._legacy.project as ls_project
|
|
31
|
+
import label_studio_sdk._legacy.project as ls_project
|
|
29
32
|
|
|
30
33
|
_logger = logging.getLogger('pixeltable')
|
|
31
34
|
|
|
@@ -43,23 +46,26 @@ class LabelStudioProject(Project):
|
|
|
43
46
|
"""
|
|
44
47
|
An [`ExternalStore`][pixeltable.io.ExternalStore] that represents a Label Studio project, providing functionality
|
|
45
48
|
for synchronizing between a Pixeltable table and a Label Studio project.
|
|
49
|
+
|
|
50
|
+
The constructor will NOT create a new Label Studio project; it is also used when loading
|
|
51
|
+
metadata for existing projects.
|
|
46
52
|
"""
|
|
47
53
|
|
|
54
|
+
project_id: int # Label Studio project ID
|
|
55
|
+
media_import_method: Literal['post', 'file', 'url']
|
|
56
|
+
_project: ls_project.Project | None
|
|
57
|
+
|
|
48
58
|
def __init__(
|
|
49
59
|
self,
|
|
50
60
|
name: str,
|
|
51
61
|
project_id: int,
|
|
52
62
|
media_import_method: Literal['post', 'file', 'url'],
|
|
53
|
-
col_mapping: dict[
|
|
54
|
-
stored_proxies:
|
|
63
|
+
col_mapping: dict[ColumnHandle, str],
|
|
64
|
+
stored_proxies: dict[ColumnHandle, ColumnHandle] | None = None,
|
|
55
65
|
):
|
|
56
|
-
"""
|
|
57
|
-
The constructor will NOT create a new Label Studio project; it is also used when loading
|
|
58
|
-
metadata for existing projects.
|
|
59
|
-
"""
|
|
60
66
|
self.project_id = project_id
|
|
61
67
|
self.media_import_method = media_import_method
|
|
62
|
-
self._project
|
|
68
|
+
self._project = None
|
|
63
69
|
super().__init__(name, col_mapping, stored_proxies)
|
|
64
70
|
|
|
65
71
|
@property
|
|
@@ -105,20 +111,20 @@ class LabelStudioProject(Project):
|
|
|
105
111
|
"""
|
|
106
112
|
return {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}
|
|
107
113
|
|
|
108
|
-
def sync(self, t: Table, export_data: bool, import_data: bool) ->
|
|
114
|
+
def sync(self, t: Table, export_data: bool, import_data: bool) -> UpdateStatus:
|
|
109
115
|
_logger.info(
|
|
110
116
|
f'Syncing Label Studio project "{self.project_title}" with table `{t._name}`'
|
|
111
117
|
f' (export: {export_data}, import: {import_data}).'
|
|
112
118
|
)
|
|
113
119
|
# Collect all existing tasks into a dict with entries `rowid: task`
|
|
114
120
|
tasks = {tuple(task['meta']['rowid']): task for task in self.__fetch_all_tasks()}
|
|
115
|
-
sync_status =
|
|
121
|
+
sync_status = UpdateStatus()
|
|
116
122
|
if export_data:
|
|
117
123
|
export_sync_status = self.__update_tasks(t, tasks)
|
|
118
|
-
sync_status
|
|
124
|
+
sync_status += export_sync_status
|
|
119
125
|
if import_data:
|
|
120
126
|
import_sync_status = self.__update_table_from_tasks(t, tasks)
|
|
121
|
-
sync_status
|
|
127
|
+
sync_status += import_sync_status
|
|
122
128
|
return sync_status
|
|
123
129
|
|
|
124
130
|
def __fetch_all_tasks(self) -> Iterator[dict[str, Any]]:
|
|
@@ -142,7 +148,7 @@ class LabelStudioProject(Project):
|
|
|
142
148
|
f'Label Studio project {self.project_title!r}.'
|
|
143
149
|
)
|
|
144
150
|
|
|
145
|
-
def __update_tasks(self, t: Table, existing_tasks: dict[tuple, dict]) ->
|
|
151
|
+
def __update_tasks(self, t: Table, existing_tasks: dict[tuple, dict]) -> UpdateStatus:
|
|
146
152
|
"""
|
|
147
153
|
Updates all tasks in this Label Studio project based on the Pixeltable data:
|
|
148
154
|
- Creates new tasks for rows that don't map to any existing task;
|
|
@@ -155,7 +161,7 @@ class LabelStudioProject(Project):
|
|
|
155
161
|
t_data_cols = [t_col for t_col, ext_col_name in self.col_mapping.items() if ext_col_name in config.data_keys]
|
|
156
162
|
|
|
157
163
|
if len(t_data_cols) == 0:
|
|
158
|
-
return
|
|
164
|
+
return UpdateStatus()
|
|
159
165
|
|
|
160
166
|
# Columns in `t` that map to `rectanglelabels` preannotations
|
|
161
167
|
t_rl_cols = [
|
|
@@ -183,15 +189,15 @@ class LabelStudioProject(Project):
|
|
|
183
189
|
self,
|
|
184
190
|
t: Table,
|
|
185
191
|
existing_tasks: dict[tuple, dict],
|
|
186
|
-
media_col:
|
|
187
|
-
t_rl_cols: list[
|
|
192
|
+
media_col: ColumnHandle,
|
|
193
|
+
t_rl_cols: list[ColumnHandle],
|
|
188
194
|
rl_info: list['_RectangleLabel'],
|
|
189
|
-
) ->
|
|
190
|
-
is_stored = media_col.is_stored
|
|
195
|
+
) -> UpdateStatus:
|
|
196
|
+
is_stored = media_col.get().is_stored
|
|
191
197
|
# If it's a stored column, we can use `localpath`
|
|
192
|
-
localpath_col_opt = [t[media_col.name].localpath] if is_stored else []
|
|
198
|
+
localpath_col_opt = [t[media_col.get().name].localpath] if is_stored else []
|
|
193
199
|
# Select the media column, rectanglelabels columns, and localpath (if appropriate)
|
|
194
|
-
rows = t.select(t[media_col.name], *[t[col.name] for col in t_rl_cols], *localpath_col_opt)
|
|
200
|
+
rows = t.select(t[media_col.get().name], *[t[col.get().name] for col in t_rl_cols], *localpath_col_opt)
|
|
195
201
|
tasks_created = 0
|
|
196
202
|
row_ids_in_pxt: set[tuple] = set()
|
|
197
203
|
|
|
@@ -209,7 +215,7 @@ class LabelStudioProject(Project):
|
|
|
209
215
|
else:
|
|
210
216
|
# No localpath; create a temp file and upload it
|
|
211
217
|
assert isinstance(row[media_col_idx], PIL.Image.Image)
|
|
212
|
-
file =
|
|
218
|
+
file = TempStore.create_path(extension='.png')
|
|
213
219
|
row[media_col_idx].save(file, format='png')
|
|
214
220
|
task_id = self.project.import_tasks(file)[0]
|
|
215
221
|
os.remove(file)
|
|
@@ -232,48 +238,48 @@ class LabelStudioProject(Project):
|
|
|
232
238
|
|
|
233
239
|
env.Env.get().console_logger.info(f'Created {tasks_created} new task(s) in {self}.')
|
|
234
240
|
|
|
235
|
-
sync_status =
|
|
241
|
+
sync_status = UpdateStatus(ext_row_count_stats=RowCountStats(ins_rows=tasks_created))
|
|
236
242
|
|
|
237
243
|
deletion_sync_status = self.__delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
|
|
238
|
-
|
|
239
|
-
return sync_status
|
|
244
|
+
sync_status += deletion_sync_status
|
|
245
|
+
return sync_status
|
|
240
246
|
|
|
241
247
|
def __update_tasks_by_files(
|
|
242
248
|
self,
|
|
243
249
|
t: Table,
|
|
244
250
|
existing_tasks: dict[tuple, dict],
|
|
245
|
-
t_data_cols: list[
|
|
246
|
-
t_rl_cols: list[
|
|
251
|
+
t_data_cols: list[ColumnHandle],
|
|
252
|
+
t_rl_cols: list[ColumnHandle],
|
|
247
253
|
rl_info: list['_RectangleLabel'],
|
|
248
|
-
) ->
|
|
254
|
+
) -> UpdateStatus:
|
|
249
255
|
ext_data_cols = [self.col_mapping[col] for col in t_data_cols]
|
|
250
256
|
expr_refs: dict[str, Expr] = {} # kwargs for the select statement
|
|
251
257
|
for col in t_data_cols:
|
|
252
|
-
col_name = col.name
|
|
258
|
+
col_name = col.get().name
|
|
253
259
|
if self.media_import_method == 'url':
|
|
254
260
|
expr_refs[col_name] = t[col_name].fileurl
|
|
255
261
|
else:
|
|
256
262
|
assert self.media_import_method == 'file'
|
|
257
|
-
if not col.col_type.is_media_type():
|
|
263
|
+
if not col.get().col_type.is_media_type():
|
|
258
264
|
# Not a media column; query the data directly
|
|
259
|
-
expr_refs[col_name] =
|
|
265
|
+
expr_refs[col_name] = t[col_name]
|
|
260
266
|
elif col in self.stored_proxies:
|
|
261
267
|
# Media column that has a stored proxy; use it. We have to give it a name,
|
|
262
268
|
# since it's an anonymous column
|
|
263
|
-
stored_proxy_col = self.stored_proxies[col]
|
|
269
|
+
stored_proxy_col = self.stored_proxies[col].get()
|
|
264
270
|
expr_refs[f'{col_name}_proxy'] = ColumnRef(stored_proxy_col).localpath
|
|
265
271
|
else:
|
|
266
272
|
# Media column without a stored proxy; this means it's a stored computed column,
|
|
267
273
|
# and we can just use the localpath
|
|
268
274
|
expr_refs[col_name] = t[col_name].localpath
|
|
269
275
|
|
|
270
|
-
|
|
276
|
+
query = t.select(*[t[col.get().name] for col in t_rl_cols], **expr_refs)
|
|
271
277
|
# The following buffers will hold `DataRow` indices that correspond to each of the selected
|
|
272
278
|
# columns. `rl_col_idxs` holds the indices for the columns that map to RectangleLabels
|
|
273
279
|
# preannotations; `data_col_idxs` holds the indices for the columns that map to data fields.
|
|
274
280
|
# We have to wait until we begin iterating to populate them, so they're initially `None`.
|
|
275
|
-
rl_col_idxs:
|
|
276
|
-
data_col_idxs:
|
|
281
|
+
rl_col_idxs: list[int] | None = None
|
|
282
|
+
data_col_idxs: list[int] | None = None
|
|
277
283
|
|
|
278
284
|
row_ids_in_pxt: set[tuple] = set()
|
|
279
285
|
tasks_created = 0
|
|
@@ -286,11 +292,11 @@ class LabelStudioProject(Project):
|
|
|
286
292
|
data_vals = [row[idx] for idx in data_col_idxs]
|
|
287
293
|
coco_annotations = [row[idx] for idx in rl_col_idxs]
|
|
288
294
|
for i in range(len(t_data_cols)):
|
|
289
|
-
if t_data_cols[i].col_type.is_media_type():
|
|
295
|
+
if t_data_cols[i].get().col_type.is_media_type():
|
|
290
296
|
# Special handling for media columns
|
|
291
297
|
assert isinstance(data_vals[i], str)
|
|
292
298
|
if self.media_import_method == 'url':
|
|
293
|
-
data_vals[i] = self.__validate_fileurl(t_data_cols[i], data_vals[i])
|
|
299
|
+
data_vals[i] = self.__validate_fileurl(t_data_cols[i].get(), data_vals[i])
|
|
294
300
|
else:
|
|
295
301
|
assert self.media_import_method == 'file'
|
|
296
302
|
data_vals[i] = self.__localpath_to_lspath(data_vals[i])
|
|
@@ -304,10 +310,10 @@ class LabelStudioProject(Project):
|
|
|
304
310
|
'predictions': predictions,
|
|
305
311
|
}
|
|
306
312
|
|
|
307
|
-
for row in
|
|
313
|
+
for row in query._exec():
|
|
308
314
|
if rl_col_idxs is None:
|
|
309
|
-
rl_col_idxs = [expr.slot_idx for expr in
|
|
310
|
-
data_col_idxs = [expr.slot_idx for expr in
|
|
315
|
+
rl_col_idxs = [expr.slot_idx for expr in query._select_list_exprs[: len(t_rl_cols)]]
|
|
316
|
+
data_col_idxs = [expr.slot_idx for expr in query._select_list_exprs[len(t_rl_cols) :]]
|
|
311
317
|
row_ids_in_pxt.add(row.rowid)
|
|
312
318
|
task_info = create_task_info(row)
|
|
313
319
|
# TODO(aaron-siegel): Implement more efficient update logic (currently involves a full table scan)
|
|
@@ -336,14 +342,14 @@ class LabelStudioProject(Project):
|
|
|
336
342
|
f'Created {tasks_created} new task(s) and updated {tasks_updated} existing task(s) in {self}.'
|
|
337
343
|
)
|
|
338
344
|
|
|
339
|
-
sync_status =
|
|
345
|
+
sync_status = UpdateStatus(ext_row_count_stats=RowCountStats(ins_rows=tasks_created, upd_rows=tasks_updated))
|
|
340
346
|
|
|
341
347
|
deletion_sync_status = self.__delete_stale_tasks(existing_tasks, row_ids_in_pxt, tasks_created)
|
|
342
|
-
|
|
343
|
-
return sync_status
|
|
348
|
+
sync_status += deletion_sync_status
|
|
349
|
+
return sync_status
|
|
344
350
|
|
|
345
351
|
@classmethod
|
|
346
|
-
def __validate_fileurl(cls, col: Column, url: str) ->
|
|
352
|
+
def __validate_fileurl(cls, col: Column, url: str) -> str | None:
|
|
347
353
|
# Check that the URL is one that will be visible to Label Studio. If it isn't, log an info message
|
|
348
354
|
# to help users debug the issue.
|
|
349
355
|
if not (url.startswith('http://') or url.startswith('https://')):
|
|
@@ -361,7 +367,7 @@ class LabelStudioProject(Project):
|
|
|
361
367
|
|
|
362
368
|
def __delete_stale_tasks(
|
|
363
369
|
self, existing_tasks: dict[tuple, dict], row_ids_in_pxt: set[tuple], tasks_created: int
|
|
364
|
-
) ->
|
|
370
|
+
) -> UpdateStatus:
|
|
365
371
|
deleted_rowids = set(existing_tasks.keys()) - row_ids_in_pxt
|
|
366
372
|
# Sanity check the math
|
|
367
373
|
assert len(deleted_rowids) == len(existing_tasks) + tasks_created - len(row_ids_in_pxt)
|
|
@@ -377,11 +383,11 @@ class LabelStudioProject(Project):
|
|
|
377
383
|
for rowid in deleted_rowids:
|
|
378
384
|
del existing_tasks[rowid]
|
|
379
385
|
|
|
380
|
-
return
|
|
386
|
+
return UpdateStatus(ext_row_count_stats=RowCountStats(del_rows=len(deleted_rowids)))
|
|
381
387
|
|
|
382
|
-
def __update_table_from_tasks(self, t: Table, tasks: dict[tuple, dict]) ->
|
|
388
|
+
def __update_table_from_tasks(self, t: Table, tasks: dict[tuple, dict]) -> UpdateStatus:
|
|
383
389
|
if ANNOTATIONS_COLUMN not in self.col_mapping.values():
|
|
384
|
-
return
|
|
390
|
+
return UpdateStatus()
|
|
385
391
|
|
|
386
392
|
annotations = {
|
|
387
393
|
# Replace [] by None to indicate no annotations. We do want to sync rows with no annotations,
|
|
@@ -391,7 +397,7 @@ class LabelStudioProject(Project):
|
|
|
391
397
|
for task in tasks.values()
|
|
392
398
|
}
|
|
393
399
|
|
|
394
|
-
local_annotations_col = next(k for k, v in self.col_mapping.items() if v == ANNOTATIONS_COLUMN)
|
|
400
|
+
local_annotations_col = next(k for k, v in self.col_mapping.items() if v == ANNOTATIONS_COLUMN).get()
|
|
395
401
|
|
|
396
402
|
# Prune the annotations down to just the ones that have actually changed.
|
|
397
403
|
rows = t.select(t[local_annotations_col.name])
|
|
@@ -412,23 +418,21 @@ class LabelStudioProject(Project):
|
|
|
412
418
|
# TODO(aaron-siegel): Simplify this once propagation is properly implemented in batch_update
|
|
413
419
|
ancestor = t
|
|
414
420
|
while local_annotations_col not in ancestor._tbl_version.get().cols:
|
|
415
|
-
assert ancestor.
|
|
416
|
-
ancestor = ancestor.
|
|
421
|
+
assert ancestor._get_base_table is not None
|
|
422
|
+
ancestor = ancestor._get_base_table()
|
|
417
423
|
update_status = ancestor.batch_update(updates)
|
|
418
424
|
env.Env.get().console_logger.info(f'Updated annotation(s) from {len(updates)} task(s) in {self}.')
|
|
419
|
-
return
|
|
425
|
+
return update_status
|
|
420
426
|
else:
|
|
421
|
-
return
|
|
427
|
+
return UpdateStatus()
|
|
422
428
|
|
|
423
429
|
def as_dict(self) -> dict[str, Any]:
|
|
424
430
|
return {
|
|
425
431
|
'name': self.name,
|
|
426
432
|
'project_id': self.project_id,
|
|
427
433
|
'media_import_method': self.media_import_method,
|
|
428
|
-
'col_mapping': [[
|
|
429
|
-
'stored_proxies': [
|
|
430
|
-
[self._column_as_dict(k), self._column_as_dict(v)] for k, v in self.stored_proxies.items()
|
|
431
|
-
],
|
|
434
|
+
'col_mapping': [[k.as_dict(), v] for k, v in self.col_mapping.items()],
|
|
435
|
+
'stored_proxies': [[k.as_dict(), v.as_dict()] for k, v in self.stored_proxies.items()],
|
|
432
436
|
}
|
|
433
437
|
|
|
434
438
|
@classmethod
|
|
@@ -437,8 +441,8 @@ class LabelStudioProject(Project):
|
|
|
437
441
|
md['name'],
|
|
438
442
|
md['project_id'],
|
|
439
443
|
md['media_import_method'],
|
|
440
|
-
{
|
|
441
|
-
{
|
|
444
|
+
{ColumnHandle.from_dict(entry[0]): entry[1] for entry in md['col_mapping']},
|
|
445
|
+
{ColumnHandle.from_dict(entry[0]): ColumnHandle.from_dict(entry[1]) for entry in md['stored_proxies']},
|
|
442
446
|
)
|
|
443
447
|
|
|
444
448
|
def __repr__(self) -> str:
|
|
@@ -493,7 +497,7 @@ class LabelStudioProject(Project):
|
|
|
493
497
|
|
|
494
498
|
@classmethod
|
|
495
499
|
def __coco_to_predictions(
|
|
496
|
-
cls, coco_annotations: dict[str, Any], from_name: str, rl_info: '_RectangleLabel', task_id:
|
|
500
|
+
cls, coco_annotations: dict[str, Any], from_name: str, rl_info: '_RectangleLabel', task_id: int | None = None
|
|
497
501
|
) -> dict[str, Any]:
|
|
498
502
|
width = coco_annotations['image']['width']
|
|
499
503
|
height = coco_annotations['image']['height']
|
|
@@ -545,11 +549,11 @@ class LabelStudioProject(Project):
|
|
|
545
549
|
cls,
|
|
546
550
|
t: Table,
|
|
547
551
|
label_config: str,
|
|
548
|
-
name:
|
|
549
|
-
title:
|
|
552
|
+
name: str | None,
|
|
553
|
+
title: str | None,
|
|
550
554
|
media_import_method: Literal['post', 'file', 'url'],
|
|
551
|
-
col_mapping:
|
|
552
|
-
s3_configuration:
|
|
555
|
+
col_mapping: dict[str, str] | None,
|
|
556
|
+
s3_configuration: dict[str, Any] | None,
|
|
553
557
|
**kwargs: Any,
|
|
554
558
|
) -> 'LabelStudioProject':
|
|
555
559
|
"""
|
|
@@ -560,7 +564,7 @@ class LabelStudioProject(Project):
|
|
|
560
564
|
|
|
561
565
|
if name is None:
|
|
562
566
|
# Create a default name that's unique to the table
|
|
563
|
-
all_stores = t.external_stores
|
|
567
|
+
all_stores = t.external_stores()
|
|
564
568
|
n = 0
|
|
565
569
|
while f'ls_project_{n}' in all_stores:
|
|
566
570
|
n += 1
|
|
@@ -576,8 +580,8 @@ class LabelStudioProject(Project):
|
|
|
576
580
|
local_annotations_column = ANNOTATIONS_COLUMN
|
|
577
581
|
else:
|
|
578
582
|
local_annotations_column = next(k for k, v in col_mapping.items() if v == ANNOTATIONS_COLUMN)
|
|
579
|
-
if local_annotations_column not in t.
|
|
580
|
-
t.add_columns({local_annotations_column: ts.
|
|
583
|
+
if local_annotations_column not in t._get_schema():
|
|
584
|
+
t.add_columns({local_annotations_column: ts.Json})
|
|
581
585
|
|
|
582
586
|
resolved_col_mapping = cls.validate_columns(
|
|
583
587
|
t, config.export_columns, {ANNOTATIONS_COLUMN: ts.JsonType(nullable=True)}, col_mapping
|
|
@@ -648,7 +652,7 @@ class LabelStudioProject(Project):
|
|
|
648
652
|
|
|
649
653
|
@dataclass(frozen=True)
|
|
650
654
|
class _DataKey:
|
|
651
|
-
name:
|
|
655
|
+
name: str | None # The 'name' attribute of the data key; may differ from the field name
|
|
652
656
|
column_type: ts.ColumnType
|
|
653
657
|
|
|
654
658
|
|
pixeltable/io/lancedb.py
ADDED
pixeltable/io/pandas.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
|
-
|
|
2
|
+
import uuid
|
|
3
|
+
from typing import Any
|
|
3
4
|
|
|
4
5
|
import numpy as np
|
|
5
6
|
import pandas as pd
|
|
@@ -16,8 +17,8 @@ def import_pandas(
|
|
|
16
17
|
tbl_name: str,
|
|
17
18
|
df: pd.DataFrame,
|
|
18
19
|
*,
|
|
19
|
-
schema_overrides:
|
|
20
|
-
primary_key:
|
|
20
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
21
|
+
primary_key: str | list[str] | None = None,
|
|
21
22
|
num_retained_versions: int = 10,
|
|
22
23
|
comment: str = '',
|
|
23
24
|
) -> pxt.Table:
|
|
@@ -55,9 +56,9 @@ def import_pandas(
|
|
|
55
56
|
|
|
56
57
|
def import_csv(
|
|
57
58
|
tbl_name: str,
|
|
58
|
-
filepath_or_buffer:
|
|
59
|
-
schema_overrides:
|
|
60
|
-
primary_key:
|
|
59
|
+
filepath_or_buffer: str | os.PathLike,
|
|
60
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
61
|
+
primary_key: str | list[str] | None = None,
|
|
61
62
|
num_retained_versions: int = 10,
|
|
62
63
|
comment: str = '',
|
|
63
64
|
**kwargs: Any,
|
|
@@ -84,10 +85,10 @@ def import_csv(
|
|
|
84
85
|
|
|
85
86
|
def import_excel(
|
|
86
87
|
tbl_name: str,
|
|
87
|
-
io:
|
|
88
|
+
io: str | os.PathLike,
|
|
88
89
|
*,
|
|
89
|
-
schema_overrides:
|
|
90
|
-
primary_key:
|
|
90
|
+
schema_overrides: dict[str, Any] | None = None,
|
|
91
|
+
primary_key: str | list[str] | None = None,
|
|
91
92
|
num_retained_versions: int = 10,
|
|
92
93
|
comment: str = '',
|
|
93
94
|
**kwargs: Any,
|
|
@@ -132,6 +133,7 @@ def df_infer_schema(
|
|
|
132
133
|
pd_schema: dict[str, ts.ColumnType] = {}
|
|
133
134
|
for pd_name, pd_dtype in zip(df.columns, df.dtypes):
|
|
134
135
|
if pd_name in schema_overrides:
|
|
136
|
+
assert isinstance(schema_overrides[pd_name], ts.ColumnType)
|
|
135
137
|
pxt_type = schema_overrides[pd_name]
|
|
136
138
|
else:
|
|
137
139
|
pxt_type = __pd_coltype_to_pxt_type(pd_dtype, df[pd_name], pd_name not in primary_key)
|
|
@@ -140,7 +142,7 @@ def df_infer_schema(
|
|
|
140
142
|
return pd_schema
|
|
141
143
|
|
|
142
144
|
|
|
143
|
-
def __pd_dtype_to_pxt_type(pd_dtype: DtypeObj, nullable: bool) ->
|
|
145
|
+
def __pd_dtype_to_pxt_type(pd_dtype: DtypeObj, nullable: bool) -> ts.ColumnType | None:
|
|
144
146
|
"""
|
|
145
147
|
Determines a pixeltable ColumnType from a pandas dtype
|
|
146
148
|
|
|
@@ -159,7 +161,7 @@ def __pd_dtype_to_pxt_type(pd_dtype: DtypeObj, nullable: bool) -> Optional[ts.Co
|
|
|
159
161
|
return None
|
|
160
162
|
# Most other pandas dtypes are directly NumPy compatible
|
|
161
163
|
assert isinstance(pd_dtype, np.dtype)
|
|
162
|
-
return ts.
|
|
164
|
+
return ts.ColumnType.from_np_dtype(pd_dtype, nullable)
|
|
163
165
|
|
|
164
166
|
|
|
165
167
|
def __pd_coltype_to_pxt_type(pd_dtype: DtypeObj, data_col: pd.Series, nullable: bool) -> ts.ColumnType:
|
|
@@ -191,7 +193,7 @@ def __pd_coltype_to_pxt_type(pd_dtype: DtypeObj, data_col: pd.Series, nullable:
|
|
|
191
193
|
|
|
192
194
|
|
|
193
195
|
def _df_row_to_pxt_row(
|
|
194
|
-
row: tuple[Any, ...], schema: dict[str, ts.ColumnType], col_mapping:
|
|
196
|
+
row: tuple[Any, ...], schema: dict[str, ts.ColumnType], col_mapping: dict[str, str] | None
|
|
195
197
|
) -> dict[str, Any]:
|
|
196
198
|
"""Convert a row to insertable format"""
|
|
197
199
|
pxt_row: dict[str, Any] = {}
|
|
@@ -229,6 +231,13 @@ def _df_row_to_pxt_row(
|
|
|
229
231
|
nval = pd.Timestamp(tval).tz_localize(tz=Env.get().default_time_zone)
|
|
230
232
|
else:
|
|
231
233
|
nval = tval.astimezone(Env.get().default_time_zone)
|
|
234
|
+
elif pxt_type.is_uuid_type():
|
|
235
|
+
if pd.isnull(val):
|
|
236
|
+
nval = None
|
|
237
|
+
elif isinstance(val, uuid.UUID):
|
|
238
|
+
nval = val
|
|
239
|
+
else:
|
|
240
|
+
nval = uuid.UUID(val)
|
|
232
241
|
else:
|
|
233
242
|
nval = val
|
|
234
243
|
pxt_row[pxt_name] = nval
|