pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +42 -8
- pixeltable/{dataframe.py → _query.py} +470 -206
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +5 -4
- pixeltable/catalog/catalog.py +1785 -432
- pixeltable/catalog/column.py +190 -113
- pixeltable/catalog/dir.py +2 -4
- pixeltable/catalog/globals.py +19 -46
- pixeltable/catalog/insertable_table.py +191 -98
- pixeltable/catalog/path.py +63 -23
- pixeltable/catalog/schema_object.py +11 -15
- pixeltable/catalog/table.py +843 -436
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +978 -657
- pixeltable/catalog/table_version_handle.py +72 -16
- pixeltable/catalog/table_version_path.py +112 -43
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +134 -90
- pixeltable/config.py +134 -22
- pixeltable/env.py +471 -157
- pixeltable/exceptions.py +6 -0
- pixeltable/exec/__init__.py +4 -1
- pixeltable/exec/aggregation_node.py +7 -8
- pixeltable/exec/cache_prefetch_node.py +83 -110
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +4 -3
- pixeltable/exec/data_row_batch.py +8 -65
- pixeltable/exec/exec_context.py +16 -4
- pixeltable/exec/exec_node.py +13 -36
- pixeltable/exec/expr_eval/evaluators.py +11 -7
- pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
- pixeltable/exec/expr_eval/globals.py +8 -5
- pixeltable/exec/expr_eval/row_buffer.py +1 -2
- pixeltable/exec/expr_eval/schedulers.py +106 -56
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +19 -19
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +16 -9
- pixeltable/exec/sql_node.py +351 -84
- pixeltable/exprs/__init__.py +1 -1
- pixeltable/exprs/arithmetic_expr.py +27 -22
- pixeltable/exprs/array_slice.py +3 -3
- pixeltable/exprs/column_property_ref.py +36 -23
- pixeltable/exprs/column_ref.py +213 -89
- pixeltable/exprs/comparison.py +5 -5
- pixeltable/exprs/compound_predicate.py +5 -4
- pixeltable/exprs/data_row.py +164 -54
- pixeltable/exprs/expr.py +70 -44
- pixeltable/exprs/expr_dict.py +3 -3
- pixeltable/exprs/expr_set.py +17 -10
- pixeltable/exprs/function_call.py +100 -40
- pixeltable/exprs/globals.py +2 -2
- pixeltable/exprs/in_predicate.py +4 -4
- pixeltable/exprs/inline_expr.py +18 -32
- pixeltable/exprs/is_null.py +7 -3
- pixeltable/exprs/json_mapper.py +8 -8
- pixeltable/exprs/json_path.py +56 -22
- pixeltable/exprs/literal.py +27 -5
- pixeltable/exprs/method_ref.py +2 -2
- pixeltable/exprs/object_ref.py +2 -2
- pixeltable/exprs/row_builder.py +167 -67
- pixeltable/exprs/rowid_ref.py +25 -10
- pixeltable/exprs/similarity_expr.py +58 -40
- pixeltable/exprs/sql_element_cache.py +4 -4
- pixeltable/exprs/string_op.py +5 -5
- pixeltable/exprs/type_cast.py +3 -5
- pixeltable/func/__init__.py +1 -0
- pixeltable/func/aggregate_function.py +8 -8
- pixeltable/func/callable_function.py +9 -9
- pixeltable/func/expr_template_function.py +17 -11
- pixeltable/func/function.py +18 -20
- pixeltable/func/function_registry.py +6 -7
- pixeltable/func/globals.py +2 -3
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +29 -27
- pixeltable/func/signature.py +46 -19
- pixeltable/func/tools.py +31 -13
- pixeltable/func/udf.py +18 -20
- pixeltable/functions/__init__.py +16 -0
- pixeltable/functions/anthropic.py +123 -77
- pixeltable/functions/audio.py +147 -10
- pixeltable/functions/bedrock.py +13 -6
- pixeltable/functions/date.py +7 -4
- pixeltable/functions/deepseek.py +35 -43
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +11 -20
- pixeltable/functions/gemini.py +195 -39
- pixeltable/functions/globals.py +142 -14
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1056 -24
- pixeltable/functions/image.py +115 -57
- pixeltable/functions/json.py +1 -1
- pixeltable/functions/llama_cpp.py +28 -13
- pixeltable/functions/math.py +67 -5
- pixeltable/functions/mistralai.py +18 -55
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +20 -13
- pixeltable/functions/openai.py +240 -226
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +4 -4
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +239 -69
- pixeltable/functions/timestamp.py +16 -16
- pixeltable/functions/together.py +24 -84
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +6 -1
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1515 -107
- pixeltable/functions/vision.py +8 -8
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +16 -8
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/{ext/functions → functions}/yolox.py +2 -4
- pixeltable/globals.py +362 -115
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +28 -22
- pixeltable/index/embedding_index.py +100 -118
- pixeltable/io/__init__.py +4 -2
- pixeltable/io/datarows.py +8 -7
- pixeltable/io/external_store.py +56 -105
- pixeltable/io/fiftyone.py +13 -13
- pixeltable/io/globals.py +31 -30
- pixeltable/io/hf_datasets.py +61 -16
- pixeltable/io/label_studio.py +74 -70
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +21 -12
- pixeltable/io/parquet.py +25 -105
- pixeltable/io/table_data_conduit.py +250 -123
- pixeltable/io/utils.py +4 -4
- pixeltable/iterators/__init__.py +2 -1
- pixeltable/iterators/audio.py +26 -25
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +112 -78
- pixeltable/iterators/image.py +12 -15
- pixeltable/iterators/string.py +11 -4
- pixeltable/iterators/video.py +523 -120
- pixeltable/metadata/__init__.py +14 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_18.py +2 -2
- pixeltable/metadata/converters/convert_19.py +2 -2
- pixeltable/metadata/converters/convert_20.py +2 -2
- pixeltable/metadata/converters/convert_21.py +2 -2
- pixeltable/metadata/converters/convert_22.py +2 -2
- pixeltable/metadata/converters/convert_24.py +2 -2
- pixeltable/metadata/converters/convert_25.py +2 -2
- pixeltable/metadata/converters/convert_26.py +2 -2
- pixeltable/metadata/converters/convert_29.py +4 -4
- pixeltable/metadata/converters/convert_30.py +34 -21
- pixeltable/metadata/converters/convert_34.py +2 -2
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +20 -31
- pixeltable/metadata/notes.py +9 -0
- pixeltable/metadata/schema.py +140 -53
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +382 -115
- pixeltable/share/__init__.py +1 -1
- pixeltable/share/packager.py +547 -83
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +257 -59
- pixeltable/store.py +311 -194
- pixeltable/type_system.py +373 -211
- pixeltable/utils/__init__.py +2 -3
- pixeltable/utils/arrow.py +131 -17
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +6 -6
- pixeltable/utils/code.py +3 -3
- pixeltable/utils/console_output.py +4 -1
- pixeltable/utils/coroutine.py +6 -23
- pixeltable/utils/dbms.py +32 -6
- pixeltable/utils/description_helper.py +4 -5
- pixeltable/utils/documents.py +7 -18
- pixeltable/utils/exception_handler.py +7 -30
- pixeltable/utils/filecache.py +6 -6
- pixeltable/utils/formatter.py +86 -48
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +2 -3
- pixeltable/utils/iceberg.py +1 -2
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +5 -6
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +26 -0
- pixeltable/utils/system.py +30 -0
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -40
- pixeltable/ext/__init__.py +0 -17
- pixeltable/ext/functions/__init__.py +0 -11
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/utils/media_store.py +0 -77
- pixeltable/utils/s3.py +0 -17
- pixeltable-0.3.14.dist-info/METADATA +0 -434
- pixeltable-0.3.14.dist-info/RECORD +0 -186
- pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
- {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/exprs/data_row.py
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import dataclasses
|
|
3
4
|
import datetime
|
|
4
5
|
import io
|
|
5
6
|
import urllib.parse
|
|
6
7
|
import urllib.request
|
|
7
8
|
from pathlib import Path
|
|
8
|
-
from typing import Any
|
|
9
|
+
from typing import Any
|
|
9
10
|
|
|
10
11
|
import numpy as np
|
|
11
12
|
import pgvector.sqlalchemy # type: ignore[import-untyped]
|
|
@@ -13,14 +14,81 @@ import PIL
|
|
|
13
14
|
import PIL.Image
|
|
14
15
|
import sqlalchemy as sql
|
|
15
16
|
|
|
16
|
-
|
|
17
|
+
import pixeltable.utils.image as image_utils
|
|
18
|
+
from pixeltable import catalog, env
|
|
19
|
+
from pixeltable.utils.local_store import TempStore
|
|
20
|
+
from pixeltable.utils.misc import non_none_dict_factory
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclasses.dataclass
|
|
24
|
+
class ArrayMd:
|
|
25
|
+
"""
|
|
26
|
+
Metadata for array cells that are stored externally.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
start: int
|
|
30
|
+
end: int
|
|
31
|
+
|
|
32
|
+
# we store bool arrays as packed bits (uint8 arrays), and need to record the shape to reconstruct the array
|
|
33
|
+
is_bool: bool = False
|
|
34
|
+
shape: tuple[int, ...] | None = None
|
|
35
|
+
|
|
36
|
+
def as_dict(self) -> dict:
|
|
37
|
+
# dict_factory: suppress Nones
|
|
38
|
+
x = dataclasses.asdict(self, dict_factory=non_none_dict_factory)
|
|
39
|
+
return x
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclasses.dataclass
|
|
43
|
+
class BinaryMd:
|
|
44
|
+
"""
|
|
45
|
+
Metadata for binary cells that are stored externally.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
start: int
|
|
49
|
+
end: int
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclasses.dataclass
|
|
53
|
+
class CellMd:
|
|
54
|
+
"""
|
|
55
|
+
Content of the cellmd column.
|
|
56
|
+
|
|
57
|
+
All fields are optional, to minimize storage.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
errortype: str | None = None
|
|
61
|
+
errormsg: str | None = None
|
|
62
|
+
|
|
63
|
+
# a list of file urls that are used to store images and arrays; only set for json and array columns
|
|
64
|
+
# for json columns: a list of all urls referenced in the column value
|
|
65
|
+
# for array columns: a single url
|
|
66
|
+
file_urls: list[str] | None = None
|
|
67
|
+
|
|
68
|
+
array_md: ArrayMd | None = None
|
|
69
|
+
binary_md: BinaryMd | None = None
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def from_dict(cls, d: dict) -> CellMd:
|
|
73
|
+
d = d.copy()
|
|
74
|
+
if 'array_md' in d:
|
|
75
|
+
d['array_md'] = ArrayMd(**d['array_md'])
|
|
76
|
+
if 'binary_md' in d:
|
|
77
|
+
d['binary_md'] = BinaryMd(**d['binary_md'])
|
|
78
|
+
return cls(**d)
|
|
79
|
+
|
|
80
|
+
def as_dict(self) -> dict:
|
|
81
|
+
x = dataclasses.asdict(self, dict_factory=non_none_dict_factory)
|
|
82
|
+
return x
|
|
17
83
|
|
|
18
84
|
|
|
19
85
|
class DataRow:
|
|
20
86
|
"""
|
|
21
87
|
Encapsulates all data and execution state needed by RowBuilder and DataRowBatch:
|
|
22
88
|
- state for in-memory computation
|
|
23
|
-
- state for
|
|
89
|
+
- state needed for expression evaluation
|
|
90
|
+
- containers for output column values
|
|
91
|
+
|
|
24
92
|
This is not meant to be a black-box abstraction.
|
|
25
93
|
|
|
26
94
|
In-memory representations by column type:
|
|
@@ -30,6 +98,8 @@ class DataRow:
|
|
|
30
98
|
- BoolType: bool
|
|
31
99
|
- TimestampType: datetime.datetime
|
|
32
100
|
- DateType: datetime.date
|
|
101
|
+
- UUIDType: uuid.UUID
|
|
102
|
+
- BinaryType: bytes
|
|
33
103
|
- JsonType: json-serializable object
|
|
34
104
|
- ArrayType: numpy.ndarray
|
|
35
105
|
- ImageType: PIL.Image.Image
|
|
@@ -38,37 +108,49 @@ class DataRow:
|
|
|
38
108
|
- DocumentType: local path if available, otherwise url
|
|
39
109
|
"""
|
|
40
110
|
|
|
111
|
+
# expr evaluation state; indexed by slot idx
|
|
41
112
|
vals: np.ndarray # of object
|
|
42
113
|
has_val: np.ndarray # of bool
|
|
43
114
|
excs: np.ndarray # of object
|
|
44
|
-
|
|
45
|
-
# expr evaluation state; indexed by slot idx
|
|
46
115
|
missing_slots: np.ndarray # of bool; number of missing dependencies
|
|
47
116
|
missing_dependents: np.ndarray # of int16; number of missing dependents
|
|
48
117
|
is_scheduled: np.ndarray # of bool; True if this slot is scheduled for evaluation
|
|
49
118
|
|
|
50
|
-
#
|
|
51
|
-
|
|
52
|
-
media_slot_idxs: list[int]
|
|
53
|
-
array_slot_idxs: list[int]
|
|
54
|
-
|
|
55
|
-
# the primary key of a store row is a sequence of ints (the number is different for table vs view)
|
|
56
|
-
pk: Optional[tuple[int, ...]]
|
|
119
|
+
# CellMd needed for query execution; needs to be indexed by slot idx, not column id, to work for joins
|
|
120
|
+
slot_md: dict[int, CellMd]
|
|
57
121
|
|
|
58
122
|
# file_urls:
|
|
59
123
|
# - stored url of file for media in vals[i]
|
|
60
124
|
# - None if vals[i] is not media type
|
|
61
125
|
# - not None if file_paths[i] is not None
|
|
126
|
+
# TODO: this is a sparse vector; should it be a dict[int, str]?
|
|
62
127
|
file_urls: np.ndarray # of str
|
|
63
128
|
|
|
64
129
|
# file_paths:
|
|
65
130
|
# - local path of media file in vals[i]; points to the file cache if file_urls[i] is remote
|
|
66
131
|
# - None if vals[i] is not a media type or if there is no local file yet for file_urls[i]
|
|
132
|
+
# TODO: this is a sparse vector; should it be a dict[int, str]?
|
|
67
133
|
file_paths: np.ndarray # of str
|
|
68
134
|
|
|
135
|
+
# If `may_have_exc` is False, then we guarantee that no slot has an exception set. This is used to optimize
|
|
136
|
+
# exception handling under normal operation.
|
|
137
|
+
_may_have_exc: bool
|
|
138
|
+
|
|
139
|
+
# the primary key of a store row is a sequence of ints (the number is different for table vs view)
|
|
140
|
+
pk: tuple[int, ...] | None
|
|
69
141
|
# for nested rows (ie, those produced by JsonMapperDispatcher)
|
|
70
|
-
parent_row:
|
|
71
|
-
parent_slot_idx:
|
|
142
|
+
parent_row: DataRow | None
|
|
143
|
+
parent_slot_idx: int | None
|
|
144
|
+
|
|
145
|
+
# state for table output (insert()/update()); key: column id
|
|
146
|
+
cell_vals: dict[int, Any] # materialized values of output columns, in the format required for the column
|
|
147
|
+
cell_md: dict[int, CellMd]
|
|
148
|
+
|
|
149
|
+
# control structures that are shared across all DataRows in a batch
|
|
150
|
+
img_slot_idxs: list[int]
|
|
151
|
+
media_slot_idxs: list[int]
|
|
152
|
+
array_slot_idxs: list[int]
|
|
153
|
+
json_slot_idxs: list[int]
|
|
72
154
|
|
|
73
155
|
def __init__(
|
|
74
156
|
self,
|
|
@@ -76,36 +158,42 @@ class DataRow:
|
|
|
76
158
|
img_slot_idxs: list[int],
|
|
77
159
|
media_slot_idxs: list[int],
|
|
78
160
|
array_slot_idxs: list[int],
|
|
79
|
-
|
|
80
|
-
|
|
161
|
+
json_slot_idxs: list[int],
|
|
162
|
+
parent_row: DataRow | None = None,
|
|
163
|
+
parent_slot_idx: int | None = None,
|
|
81
164
|
):
|
|
82
|
-
self.img_slot_idxs = img_slot_idxs
|
|
83
|
-
self.media_slot_idxs = media_slot_idxs
|
|
84
|
-
self.array_slot_idxs = array_slot_idxs
|
|
85
165
|
self.init(size)
|
|
86
166
|
self.parent_row = parent_row
|
|
87
167
|
self.parent_slot_idx = parent_slot_idx
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
self.
|
|
91
|
-
self.
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
self.
|
|
95
|
-
self.
|
|
168
|
+
self.img_slot_idxs = img_slot_idxs
|
|
169
|
+
self.media_slot_idxs = media_slot_idxs
|
|
170
|
+
self.array_slot_idxs = array_slot_idxs
|
|
171
|
+
self.json_slot_idxs = json_slot_idxs
|
|
172
|
+
|
|
173
|
+
def init(self, size: int) -> None:
|
|
174
|
+
self.vals = np.full(size, None, dtype=object)
|
|
175
|
+
self.has_val = np.zeros(size, dtype=bool)
|
|
176
|
+
self.excs = np.full(size, None, dtype=object)
|
|
177
|
+
self.missing_slots = np.zeros(size, dtype=bool)
|
|
178
|
+
self.missing_dependents = np.zeros(size, dtype=np.int16)
|
|
179
|
+
self.is_scheduled = np.zeros(size, dtype=bool)
|
|
180
|
+
self.slot_md = {}
|
|
181
|
+
self.file_urls = np.full(size, None, dtype=object)
|
|
182
|
+
self.file_paths = np.full(size, None, dtype=object)
|
|
183
|
+
self._may_have_exc = False
|
|
184
|
+
self.cell_vals = {}
|
|
185
|
+
self.cell_md = {}
|
|
96
186
|
self.pk = None
|
|
97
|
-
self.file_urls = np.full(num_slots, None, dtype=object)
|
|
98
|
-
self.file_paths = np.full(num_slots, None, dtype=object)
|
|
99
187
|
self.parent_row = None
|
|
100
188
|
self.parent_slot_idx = None
|
|
101
189
|
|
|
102
|
-
def clear(self,
|
|
103
|
-
if
|
|
104
|
-
self.has_val[
|
|
105
|
-
self.vals[
|
|
106
|
-
self.excs[
|
|
107
|
-
self.file_urls[
|
|
108
|
-
self.file_paths[
|
|
190
|
+
def clear(self, slot_idxs: np.ndarray | None = None) -> None:
|
|
191
|
+
if slot_idxs is not None:
|
|
192
|
+
self.has_val[slot_idxs] = False
|
|
193
|
+
self.vals[slot_idxs] = None
|
|
194
|
+
self.excs[slot_idxs] = None
|
|
195
|
+
self.file_urls[slot_idxs] = None
|
|
196
|
+
self.file_paths[slot_idxs] = None
|
|
109
197
|
else:
|
|
110
198
|
self.init(len(self.vals))
|
|
111
199
|
|
|
@@ -132,20 +220,23 @@ class DataRow:
|
|
|
132
220
|
def set_pk(self, pk: tuple[int, ...]) -> None:
|
|
133
221
|
self.pk = pk
|
|
134
222
|
|
|
135
|
-
def has_exc(self, slot_idx:
|
|
223
|
+
def has_exc(self, slot_idx: int | None = None) -> bool:
|
|
136
224
|
"""
|
|
137
225
|
Returns True if an exception has been set for the given slot index, or for any slot index if slot_idx is None
|
|
138
226
|
"""
|
|
227
|
+
if not self._may_have_exc:
|
|
228
|
+
return False
|
|
229
|
+
|
|
139
230
|
if slot_idx is not None:
|
|
140
231
|
return self.excs[slot_idx] is not None
|
|
141
232
|
return (self.excs != None).any()
|
|
142
233
|
|
|
143
|
-
def get_exc(self, slot_idx: int) ->
|
|
234
|
+
def get_exc(self, slot_idx: int) -> Exception | None:
|
|
144
235
|
exc = self.excs[slot_idx]
|
|
145
236
|
assert exc is None or isinstance(exc, Exception)
|
|
146
237
|
return exc
|
|
147
238
|
|
|
148
|
-
def get_first_exc(self) ->
|
|
239
|
+
def get_first_exc(self) -> Exception | None:
|
|
149
240
|
mask = self.excs != None
|
|
150
241
|
if not mask.any():
|
|
151
242
|
return None
|
|
@@ -154,6 +245,7 @@ class DataRow:
|
|
|
154
245
|
def set_exc(self, slot_idx: int, exc: Exception) -> None:
|
|
155
246
|
assert self.excs[slot_idx] is None
|
|
156
247
|
self.excs[slot_idx] = exc
|
|
248
|
+
self._may_have_exc = True
|
|
157
249
|
|
|
158
250
|
# an exception means the value is None
|
|
159
251
|
self.has_val[slot_idx] = True
|
|
@@ -179,7 +271,7 @@ class DataRow:
|
|
|
179
271
|
|
|
180
272
|
return self.vals[index]
|
|
181
273
|
|
|
182
|
-
def get_stored_val(self, index: int, sa_col_type:
|
|
274
|
+
def get_stored_val(self, index: int, sa_col_type: sql.types.TypeEngine | None = None) -> Any:
|
|
183
275
|
"""Return the value that gets stored in the db"""
|
|
184
276
|
assert self.excs[index] is None
|
|
185
277
|
if not self.has_val[index]:
|
|
@@ -214,6 +306,7 @@ class DataRow:
|
|
|
214
306
|
"""Assign in-memory cell value
|
|
215
307
|
This allows overwriting
|
|
216
308
|
"""
|
|
309
|
+
assert isinstance(idx, int)
|
|
217
310
|
assert self.excs[idx] is None
|
|
218
311
|
|
|
219
312
|
if (idx in self.img_slot_idxs or idx in self.media_slot_idxs) and isinstance(val, str):
|
|
@@ -246,29 +339,46 @@ class DataRow:
|
|
|
246
339
|
self.vals[idx] = val
|
|
247
340
|
self.has_val[idx] = True
|
|
248
341
|
|
|
249
|
-
def
|
|
250
|
-
"""
|
|
342
|
+
def prepare_col_val_for_save(self, index: int, col: catalog.Column | None = None) -> bool:
|
|
343
|
+
"""
|
|
344
|
+
Prepare to save a column's value into the appropriate store. Discard unneeded values.
|
|
345
|
+
|
|
346
|
+
Return:
|
|
347
|
+
True if the media object in the column needs to be saved.
|
|
348
|
+
"""
|
|
251
349
|
if self.vals[index] is None:
|
|
252
|
-
return
|
|
350
|
+
return False
|
|
351
|
+
|
|
352
|
+
if self.file_urls[index] is not None:
|
|
353
|
+
return False
|
|
354
|
+
|
|
253
355
|
assert self.excs[index] is None
|
|
254
356
|
if self.file_paths[index] is None:
|
|
255
|
-
if
|
|
256
|
-
#
|
|
257
|
-
|
|
258
|
-
self.file_urls[index] = urllib.parse.urljoin('file:', urllib.request.pathname2url(filepath))
|
|
259
|
-
image = self.vals[index]
|
|
260
|
-
assert isinstance(image, PIL.Image.Image)
|
|
261
|
-
# Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
|
|
262
|
-
# In that case, use WebP instead.
|
|
263
|
-
format = 'webp' if image.has_transparency_data else 'jpeg'
|
|
264
|
-
image.save(filepath, format=format)
|
|
357
|
+
if col is not None:
|
|
358
|
+
# This is a media object that needs to be saved
|
|
359
|
+
return True
|
|
265
360
|
else:
|
|
266
|
-
# we
|
|
361
|
+
# This is a media object that we don't care about, so we discard it
|
|
267
362
|
self.has_val[index] = False
|
|
268
363
|
else:
|
|
269
364
|
# we already have a file for this image, nothing left to do
|
|
270
365
|
pass
|
|
366
|
+
|
|
367
|
+
self.vals[index] = None
|
|
368
|
+
return False
|
|
369
|
+
|
|
370
|
+
def save_media_to_temp(self, index: int, col: catalog.Column) -> str:
|
|
371
|
+
"""Save the media object in the column to the TempStore.
|
|
372
|
+
Objects cannot be saved directly to general destinations."""
|
|
373
|
+
assert col.col_type.is_media_type()
|
|
374
|
+
val = self.vals[index]
|
|
375
|
+
format = None
|
|
376
|
+
if isinstance(val, PIL.Image.Image):
|
|
377
|
+
format = image_utils.default_format(val)
|
|
378
|
+
filepath, url = TempStore.save_media_object(val, col, format=format)
|
|
379
|
+
self.file_paths[index] = str(filepath) if filepath is not None else None
|
|
271
380
|
self.vals[index] = None
|
|
381
|
+
return url
|
|
272
382
|
|
|
273
383
|
@property
|
|
274
384
|
def rowid(self) -> tuple[int, ...]:
|