pixeltable 0.2.26__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pixeltable/__init__.py +83 -19
- pixeltable/_query.py +1444 -0
- pixeltable/_version.py +1 -0
- pixeltable/catalog/__init__.py +7 -4
- pixeltable/catalog/catalog.py +2394 -119
- pixeltable/catalog/column.py +225 -104
- pixeltable/catalog/dir.py +38 -9
- pixeltable/catalog/globals.py +53 -34
- pixeltable/catalog/insertable_table.py +265 -115
- pixeltable/catalog/path.py +80 -17
- pixeltable/catalog/schema_object.py +28 -43
- pixeltable/catalog/table.py +1270 -677
- pixeltable/catalog/table_metadata.py +103 -0
- pixeltable/catalog/table_version.py +1270 -751
- pixeltable/catalog/table_version_handle.py +109 -0
- pixeltable/catalog/table_version_path.py +137 -42
- pixeltable/catalog/tbl_ops.py +53 -0
- pixeltable/catalog/update_status.py +191 -0
- pixeltable/catalog/view.py +251 -134
- pixeltable/config.py +215 -0
- pixeltable/env.py +736 -285
- pixeltable/exceptions.py +26 -2
- pixeltable/exec/__init__.py +7 -2
- pixeltable/exec/aggregation_node.py +39 -21
- pixeltable/exec/cache_prefetch_node.py +87 -109
- pixeltable/exec/cell_materialization_node.py +268 -0
- pixeltable/exec/cell_reconstruction_node.py +168 -0
- pixeltable/exec/component_iteration_node.py +25 -28
- pixeltable/exec/data_row_batch.py +11 -46
- pixeltable/exec/exec_context.py +26 -11
- pixeltable/exec/exec_node.py +35 -27
- pixeltable/exec/expr_eval/__init__.py +3 -0
- pixeltable/exec/expr_eval/evaluators.py +365 -0
- pixeltable/exec/expr_eval/expr_eval_node.py +413 -0
- pixeltable/exec/expr_eval/globals.py +200 -0
- pixeltable/exec/expr_eval/row_buffer.py +74 -0
- pixeltable/exec/expr_eval/schedulers.py +413 -0
- pixeltable/exec/globals.py +35 -0
- pixeltable/exec/in_memory_data_node.py +35 -27
- pixeltable/exec/object_store_save_node.py +293 -0
- pixeltable/exec/row_update_node.py +44 -29
- pixeltable/exec/sql_node.py +414 -115
- pixeltable/exprs/__init__.py +8 -5
- pixeltable/exprs/arithmetic_expr.py +79 -45
- pixeltable/exprs/array_slice.py +5 -5
- pixeltable/exprs/column_property_ref.py +40 -26
- pixeltable/exprs/column_ref.py +254 -61
- pixeltable/exprs/comparison.py +14 -9
- pixeltable/exprs/compound_predicate.py +9 -10
- pixeltable/exprs/data_row.py +213 -72
- pixeltable/exprs/expr.py +270 -104
- pixeltable/exprs/expr_dict.py +6 -5
- pixeltable/exprs/expr_set.py +20 -11
- pixeltable/exprs/function_call.py +383 -284
- pixeltable/exprs/globals.py +18 -5
- pixeltable/exprs/in_predicate.py +7 -7
- pixeltable/exprs/inline_expr.py +37 -37
- pixeltable/exprs/is_null.py +8 -4
- pixeltable/exprs/json_mapper.py +120 -54
- pixeltable/exprs/json_path.py +90 -60
- pixeltable/exprs/literal.py +61 -16
- pixeltable/exprs/method_ref.py +7 -6
- pixeltable/exprs/object_ref.py +19 -8
- pixeltable/exprs/row_builder.py +238 -75
- pixeltable/exprs/rowid_ref.py +53 -15
- pixeltable/exprs/similarity_expr.py +65 -50
- pixeltable/exprs/sql_element_cache.py +5 -5
- pixeltable/exprs/string_op.py +107 -0
- pixeltable/exprs/type_cast.py +25 -13
- pixeltable/exprs/variable.py +2 -2
- pixeltable/func/__init__.py +9 -5
- pixeltable/func/aggregate_function.py +197 -92
- pixeltable/func/callable_function.py +119 -35
- pixeltable/func/expr_template_function.py +101 -48
- pixeltable/func/function.py +375 -62
- pixeltable/func/function_registry.py +20 -19
- pixeltable/func/globals.py +6 -5
- pixeltable/func/mcp.py +74 -0
- pixeltable/func/query_template_function.py +151 -35
- pixeltable/func/signature.py +178 -49
- pixeltable/func/tools.py +164 -0
- pixeltable/func/udf.py +176 -53
- pixeltable/functions/__init__.py +44 -4
- pixeltable/functions/anthropic.py +226 -47
- pixeltable/functions/audio.py +148 -11
- pixeltable/functions/bedrock.py +137 -0
- pixeltable/functions/date.py +188 -0
- pixeltable/functions/deepseek.py +113 -0
- pixeltable/functions/document.py +81 -0
- pixeltable/functions/fal.py +76 -0
- pixeltable/functions/fireworks.py +72 -20
- pixeltable/functions/gemini.py +249 -0
- pixeltable/functions/globals.py +208 -53
- pixeltable/functions/groq.py +108 -0
- pixeltable/functions/huggingface.py +1088 -95
- pixeltable/functions/image.py +155 -84
- pixeltable/functions/json.py +8 -11
- pixeltable/functions/llama_cpp.py +31 -19
- pixeltable/functions/math.py +169 -0
- pixeltable/functions/mistralai.py +50 -75
- pixeltable/functions/net.py +70 -0
- pixeltable/functions/ollama.py +29 -36
- pixeltable/functions/openai.py +548 -160
- pixeltable/functions/openrouter.py +143 -0
- pixeltable/functions/replicate.py +15 -14
- pixeltable/functions/reve.py +250 -0
- pixeltable/functions/string.py +310 -85
- pixeltable/functions/timestamp.py +37 -19
- pixeltable/functions/together.py +77 -120
- pixeltable/functions/twelvelabs.py +188 -0
- pixeltable/functions/util.py +7 -2
- pixeltable/functions/uuid.py +30 -0
- pixeltable/functions/video.py +1528 -117
- pixeltable/functions/vision.py +26 -26
- pixeltable/functions/voyageai.py +289 -0
- pixeltable/functions/whisper.py +19 -10
- pixeltable/functions/whisperx.py +179 -0
- pixeltable/functions/yolox.py +112 -0
- pixeltable/globals.py +716 -236
- pixeltable/index/__init__.py +3 -1
- pixeltable/index/base.py +17 -21
- pixeltable/index/btree.py +32 -22
- pixeltable/index/embedding_index.py +155 -92
- pixeltable/io/__init__.py +12 -7
- pixeltable/io/datarows.py +140 -0
- pixeltable/io/external_store.py +83 -125
- pixeltable/io/fiftyone.py +24 -33
- pixeltable/io/globals.py +47 -182
- pixeltable/io/hf_datasets.py +96 -127
- pixeltable/io/label_studio.py +171 -156
- pixeltable/io/lancedb.py +3 -0
- pixeltable/io/pandas.py +136 -115
- pixeltable/io/parquet.py +40 -153
- pixeltable/io/table_data_conduit.py +702 -0
- pixeltable/io/utils.py +100 -0
- pixeltable/iterators/__init__.py +8 -4
- pixeltable/iterators/audio.py +207 -0
- pixeltable/iterators/base.py +9 -3
- pixeltable/iterators/document.py +144 -87
- pixeltable/iterators/image.py +17 -38
- pixeltable/iterators/string.py +15 -12
- pixeltable/iterators/video.py +523 -127
- pixeltable/metadata/__init__.py +33 -8
- pixeltable/metadata/converters/convert_10.py +2 -3
- pixeltable/metadata/converters/convert_13.py +2 -2
- pixeltable/metadata/converters/convert_15.py +15 -11
- pixeltable/metadata/converters/convert_16.py +4 -5
- pixeltable/metadata/converters/convert_17.py +4 -5
- pixeltable/metadata/converters/convert_18.py +4 -6
- pixeltable/metadata/converters/convert_19.py +6 -9
- pixeltable/metadata/converters/convert_20.py +3 -6
- pixeltable/metadata/converters/convert_21.py +6 -8
- pixeltable/metadata/converters/convert_22.py +3 -2
- pixeltable/metadata/converters/convert_23.py +33 -0
- pixeltable/metadata/converters/convert_24.py +55 -0
- pixeltable/metadata/converters/convert_25.py +19 -0
- pixeltable/metadata/converters/convert_26.py +23 -0
- pixeltable/metadata/converters/convert_27.py +29 -0
- pixeltable/metadata/converters/convert_28.py +13 -0
- pixeltable/metadata/converters/convert_29.py +110 -0
- pixeltable/metadata/converters/convert_30.py +63 -0
- pixeltable/metadata/converters/convert_31.py +11 -0
- pixeltable/metadata/converters/convert_32.py +15 -0
- pixeltable/metadata/converters/convert_33.py +17 -0
- pixeltable/metadata/converters/convert_34.py +21 -0
- pixeltable/metadata/converters/convert_35.py +9 -0
- pixeltable/metadata/converters/convert_36.py +38 -0
- pixeltable/metadata/converters/convert_37.py +15 -0
- pixeltable/metadata/converters/convert_38.py +39 -0
- pixeltable/metadata/converters/convert_39.py +124 -0
- pixeltable/metadata/converters/convert_40.py +73 -0
- pixeltable/metadata/converters/convert_41.py +12 -0
- pixeltable/metadata/converters/convert_42.py +9 -0
- pixeltable/metadata/converters/convert_43.py +44 -0
- pixeltable/metadata/converters/util.py +44 -18
- pixeltable/metadata/notes.py +21 -0
- pixeltable/metadata/schema.py +185 -42
- pixeltable/metadata/utils.py +74 -0
- pixeltable/mypy/__init__.py +3 -0
- pixeltable/mypy/mypy_plugin.py +123 -0
- pixeltable/plan.py +616 -225
- pixeltable/share/__init__.py +3 -0
- pixeltable/share/packager.py +797 -0
- pixeltable/share/protocol/__init__.py +33 -0
- pixeltable/share/protocol/common.py +165 -0
- pixeltable/share/protocol/operation_types.py +33 -0
- pixeltable/share/protocol/replica.py +119 -0
- pixeltable/share/publish.py +349 -0
- pixeltable/store.py +398 -232
- pixeltable/type_system.py +730 -267
- pixeltable/utils/__init__.py +40 -0
- pixeltable/utils/arrow.py +201 -29
- pixeltable/utils/av.py +298 -0
- pixeltable/utils/azure_store.py +346 -0
- pixeltable/utils/coco.py +26 -27
- pixeltable/utils/code.py +4 -4
- pixeltable/utils/console_output.py +46 -0
- pixeltable/utils/coroutine.py +24 -0
- pixeltable/utils/dbms.py +92 -0
- pixeltable/utils/description_helper.py +11 -12
- pixeltable/utils/documents.py +60 -61
- pixeltable/utils/exception_handler.py +36 -0
- pixeltable/utils/filecache.py +38 -22
- pixeltable/utils/formatter.py +88 -51
- pixeltable/utils/gcs_store.py +295 -0
- pixeltable/utils/http.py +133 -0
- pixeltable/utils/http_server.py +14 -13
- pixeltable/utils/iceberg.py +13 -0
- pixeltable/utils/image.py +17 -0
- pixeltable/utils/lancedb.py +90 -0
- pixeltable/utils/local_store.py +322 -0
- pixeltable/utils/misc.py +5 -0
- pixeltable/utils/object_stores.py +573 -0
- pixeltable/utils/pydantic.py +60 -0
- pixeltable/utils/pytorch.py +20 -20
- pixeltable/utils/s3_store.py +527 -0
- pixeltable/utils/sql.py +32 -5
- pixeltable/utils/system.py +30 -0
- pixeltable/utils/transactional_directory.py +4 -3
- pixeltable-0.5.7.dist-info/METADATA +579 -0
- pixeltable-0.5.7.dist-info/RECORD +227 -0
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
- pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
- pixeltable/__version__.py +0 -3
- pixeltable/catalog/named_function.py +0 -36
- pixeltable/catalog/path_dict.py +0 -141
- pixeltable/dataframe.py +0 -894
- pixeltable/exec/expr_eval_node.py +0 -232
- pixeltable/ext/__init__.py +0 -14
- pixeltable/ext/functions/__init__.py +0 -8
- pixeltable/ext/functions/whisperx.py +0 -77
- pixeltable/ext/functions/yolox.py +0 -157
- pixeltable/tool/create_test_db_dump.py +0 -311
- pixeltable/tool/create_test_video.py +0 -81
- pixeltable/tool/doc_plugins/griffe.py +0 -50
- pixeltable/tool/doc_plugins/mkdocstrings.py +0 -6
- pixeltable/tool/doc_plugins/templates/material/udf.html.jinja +0 -135
- pixeltable/tool/embed_udf.py +0 -9
- pixeltable/tool/mypy_plugin.py +0 -55
- pixeltable/utils/media_store.py +0 -76
- pixeltable/utils/s3.py +0 -16
- pixeltable-0.2.26.dist-info/METADATA +0 -400
- pixeltable-0.2.26.dist-info/RECORD +0 -156
- pixeltable-0.2.26.dist-info/entry_points.txt +0 -3
- {pixeltable-0.2.26.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
pixeltable/exprs/data_row.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import dataclasses
|
|
3
4
|
import datetime
|
|
4
5
|
import io
|
|
5
6
|
import urllib.parse
|
|
6
7
|
import urllib.request
|
|
7
|
-
from
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
8
10
|
|
|
9
11
|
import numpy as np
|
|
10
12
|
import pgvector.sqlalchemy # type: ignore[import-untyped]
|
|
@@ -12,14 +14,81 @@ import PIL
|
|
|
12
14
|
import PIL.Image
|
|
13
15
|
import sqlalchemy as sql
|
|
14
16
|
|
|
15
|
-
|
|
17
|
+
import pixeltable.utils.image as image_utils
|
|
18
|
+
from pixeltable import catalog, env
|
|
19
|
+
from pixeltable.utils.local_store import TempStore
|
|
20
|
+
from pixeltable.utils.misc import non_none_dict_factory
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclasses.dataclass
|
|
24
|
+
class ArrayMd:
|
|
25
|
+
"""
|
|
26
|
+
Metadata for array cells that are stored externally.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
start: int
|
|
30
|
+
end: int
|
|
31
|
+
|
|
32
|
+
# we store bool arrays as packed bits (uint8 arrays), and need to record the shape to reconstruct the array
|
|
33
|
+
is_bool: bool = False
|
|
34
|
+
shape: tuple[int, ...] | None = None
|
|
35
|
+
|
|
36
|
+
def as_dict(self) -> dict:
|
|
37
|
+
# dict_factory: suppress Nones
|
|
38
|
+
x = dataclasses.asdict(self, dict_factory=non_none_dict_factory)
|
|
39
|
+
return x
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclasses.dataclass
|
|
43
|
+
class BinaryMd:
|
|
44
|
+
"""
|
|
45
|
+
Metadata for binary cells that are stored externally.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
start: int
|
|
49
|
+
end: int
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclasses.dataclass
|
|
53
|
+
class CellMd:
|
|
54
|
+
"""
|
|
55
|
+
Content of the cellmd column.
|
|
56
|
+
|
|
57
|
+
All fields are optional, to minimize storage.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
errortype: str | None = None
|
|
61
|
+
errormsg: str | None = None
|
|
62
|
+
|
|
63
|
+
# a list of file urls that are used to store images and arrays; only set for json and array columns
|
|
64
|
+
# for json columns: a list of all urls referenced in the column value
|
|
65
|
+
# for array columns: a single url
|
|
66
|
+
file_urls: list[str] | None = None
|
|
67
|
+
|
|
68
|
+
array_md: ArrayMd | None = None
|
|
69
|
+
binary_md: BinaryMd | None = None
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def from_dict(cls, d: dict) -> CellMd:
|
|
73
|
+
d = d.copy()
|
|
74
|
+
if 'array_md' in d:
|
|
75
|
+
d['array_md'] = ArrayMd(**d['array_md'])
|
|
76
|
+
if 'binary_md' in d:
|
|
77
|
+
d['binary_md'] = BinaryMd(**d['binary_md'])
|
|
78
|
+
return cls(**d)
|
|
79
|
+
|
|
80
|
+
def as_dict(self) -> dict:
|
|
81
|
+
x = dataclasses.asdict(self, dict_factory=non_none_dict_factory)
|
|
82
|
+
return x
|
|
16
83
|
|
|
17
84
|
|
|
18
85
|
class DataRow:
|
|
19
86
|
"""
|
|
20
87
|
Encapsulates all data and execution state needed by RowBuilder and DataRowBatch:
|
|
21
88
|
- state for in-memory computation
|
|
22
|
-
- state for
|
|
89
|
+
- state needed for expression evaluation
|
|
90
|
+
- containers for output column values
|
|
91
|
+
|
|
23
92
|
This is not meant to be a black-box abstraction.
|
|
24
93
|
|
|
25
94
|
In-memory representations by column type:
|
|
@@ -28,54 +97,113 @@ class DataRow:
|
|
|
28
97
|
- FloatType: float
|
|
29
98
|
- BoolType: bool
|
|
30
99
|
- TimestampType: datetime.datetime
|
|
100
|
+
- DateType: datetime.date
|
|
101
|
+
- UUIDType: uuid.UUID
|
|
102
|
+
- BinaryType: bytes
|
|
31
103
|
- JsonType: json-serializable object
|
|
32
104
|
- ArrayType: numpy.ndarray
|
|
33
105
|
- ImageType: PIL.Image.Image
|
|
34
106
|
- VideoType: local path if available, otherwise url
|
|
107
|
+
- AudioType: local path if available, otherwise url
|
|
108
|
+
- DocumentType: local path if available, otherwise url
|
|
35
109
|
"""
|
|
36
110
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
#
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
array_slot_idxs: list[int]
|
|
111
|
+
# expr evaluation state; indexed by slot idx
|
|
112
|
+
vals: np.ndarray # of object
|
|
113
|
+
has_val: np.ndarray # of bool
|
|
114
|
+
excs: np.ndarray # of object
|
|
115
|
+
missing_slots: np.ndarray # of bool; number of missing dependencies
|
|
116
|
+
missing_dependents: np.ndarray # of int16; number of missing dependents
|
|
117
|
+
is_scheduled: np.ndarray # of bool; True if this slot is scheduled for evaluation
|
|
45
118
|
|
|
46
|
-
#
|
|
47
|
-
|
|
119
|
+
# CellMd needed for query execution; needs to be indexed by slot idx, not column id, to work for joins
|
|
120
|
+
slot_md: dict[int, CellMd]
|
|
48
121
|
|
|
49
122
|
# file_urls:
|
|
50
123
|
# - stored url of file for media in vals[i]
|
|
51
124
|
# - None if vals[i] is not media type
|
|
52
125
|
# - not None if file_paths[i] is not None
|
|
53
|
-
|
|
126
|
+
# TODO: this is a sparse vector; should it be a dict[int, str]?
|
|
127
|
+
file_urls: np.ndarray # of str
|
|
54
128
|
|
|
55
129
|
# file_paths:
|
|
56
130
|
# - local path of media file in vals[i]; points to the file cache if file_urls[i] is remote
|
|
57
131
|
# - None if vals[i] is not a media type or if there is no local file yet for file_urls[i]
|
|
58
|
-
|
|
132
|
+
# TODO: this is a sparse vector; should it be a dict[int, str]?
|
|
133
|
+
file_paths: np.ndarray # of str
|
|
134
|
+
|
|
135
|
+
# If `may_have_exc` is False, then we guarantee that no slot has an exception set. This is used to optimize
|
|
136
|
+
# exception handling under normal operation.
|
|
137
|
+
_may_have_exc: bool
|
|
138
|
+
|
|
139
|
+
# the primary key of a store row is a sequence of ints (the number is different for table vs view)
|
|
140
|
+
pk: tuple[int, ...] | None
|
|
141
|
+
# for nested rows (ie, those produced by JsonMapperDispatcher)
|
|
142
|
+
parent_row: DataRow | None
|
|
143
|
+
parent_slot_idx: int | None
|
|
59
144
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
145
|
+
# state for table output (insert()/update()); key: column id
|
|
146
|
+
cell_vals: dict[int, Any] # materialized values of output columns, in the format required for the column
|
|
147
|
+
cell_md: dict[int, CellMd]
|
|
148
|
+
|
|
149
|
+
# control structures that are shared across all DataRows in a batch
|
|
150
|
+
img_slot_idxs: list[int]
|
|
151
|
+
media_slot_idxs: list[int]
|
|
152
|
+
array_slot_idxs: list[int]
|
|
153
|
+
json_slot_idxs: list[int]
|
|
154
|
+
|
|
155
|
+
def __init__(
|
|
156
|
+
self,
|
|
157
|
+
size: int,
|
|
158
|
+
img_slot_idxs: list[int],
|
|
159
|
+
media_slot_idxs: list[int],
|
|
160
|
+
array_slot_idxs: list[int],
|
|
161
|
+
json_slot_idxs: list[int],
|
|
162
|
+
parent_row: DataRow | None = None,
|
|
163
|
+
parent_slot_idx: int | None = None,
|
|
164
|
+
):
|
|
165
|
+
self.init(size)
|
|
166
|
+
self.parent_row = parent_row
|
|
167
|
+
self.parent_slot_idx = parent_slot_idx
|
|
64
168
|
self.img_slot_idxs = img_slot_idxs
|
|
65
169
|
self.media_slot_idxs = media_slot_idxs
|
|
66
170
|
self.array_slot_idxs = array_slot_idxs
|
|
171
|
+
self.json_slot_idxs = json_slot_idxs
|
|
172
|
+
|
|
173
|
+
def init(self, size: int) -> None:
|
|
174
|
+
self.vals = np.full(size, None, dtype=object)
|
|
175
|
+
self.has_val = np.zeros(size, dtype=bool)
|
|
176
|
+
self.excs = np.full(size, None, dtype=object)
|
|
177
|
+
self.missing_slots = np.zeros(size, dtype=bool)
|
|
178
|
+
self.missing_dependents = np.zeros(size, dtype=np.int16)
|
|
179
|
+
self.is_scheduled = np.zeros(size, dtype=bool)
|
|
180
|
+
self.slot_md = {}
|
|
181
|
+
self.file_urls = np.full(size, None, dtype=object)
|
|
182
|
+
self.file_paths = np.full(size, None, dtype=object)
|
|
183
|
+
self._may_have_exc = False
|
|
184
|
+
self.cell_vals = {}
|
|
185
|
+
self.cell_md = {}
|
|
67
186
|
self.pk = None
|
|
68
|
-
self.
|
|
69
|
-
self.
|
|
70
|
-
|
|
71
|
-
def clear(self) -> None:
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
187
|
+
self.parent_row = None
|
|
188
|
+
self.parent_slot_idx = None
|
|
189
|
+
|
|
190
|
+
def clear(self, slot_idxs: np.ndarray | None = None) -> None:
|
|
191
|
+
if slot_idxs is not None:
|
|
192
|
+
self.has_val[slot_idxs] = False
|
|
193
|
+
self.vals[slot_idxs] = None
|
|
194
|
+
self.excs[slot_idxs] = None
|
|
195
|
+
self.file_urls[slot_idxs] = None
|
|
196
|
+
self.file_paths[slot_idxs] = None
|
|
197
|
+
else:
|
|
198
|
+
self.init(len(self.vals))
|
|
199
|
+
|
|
200
|
+
def set_file_path(self, idx: int, path: str) -> None:
|
|
201
|
+
"""Augment an existing url with a local file path"""
|
|
202
|
+
assert self.has_val[idx]
|
|
203
|
+
assert idx in self.img_slot_idxs or idx in self.media_slot_idxs
|
|
204
|
+
self.file_paths[idx] = path
|
|
205
|
+
if idx in self.media_slot_idxs:
|
|
206
|
+
self.vals[idx] = path
|
|
79
207
|
|
|
80
208
|
def copy(self, target: DataRow) -> None:
|
|
81
209
|
"""Create a copy of the contents of this DataRow in target
|
|
@@ -92,26 +220,32 @@ class DataRow:
|
|
|
92
220
|
def set_pk(self, pk: tuple[int, ...]) -> None:
|
|
93
221
|
self.pk = pk
|
|
94
222
|
|
|
95
|
-
def has_exc(self, slot_idx:
|
|
223
|
+
def has_exc(self, slot_idx: int | None = None) -> bool:
|
|
96
224
|
"""
|
|
97
225
|
Returns True if an exception has been set for the given slot index, or for any slot index if slot_idx is None
|
|
98
226
|
"""
|
|
227
|
+
if not self._may_have_exc:
|
|
228
|
+
return False
|
|
229
|
+
|
|
99
230
|
if slot_idx is not None:
|
|
100
231
|
return self.excs[slot_idx] is not None
|
|
101
|
-
return
|
|
232
|
+
return (self.excs != None).any()
|
|
102
233
|
|
|
103
|
-
def get_exc(self, slot_idx: int) ->
|
|
104
|
-
|
|
234
|
+
def get_exc(self, slot_idx: int) -> Exception | None:
|
|
235
|
+
exc = self.excs[slot_idx]
|
|
236
|
+
assert exc is None or isinstance(exc, Exception)
|
|
237
|
+
return exc
|
|
105
238
|
|
|
106
|
-
def get_first_exc(self) ->
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
return
|
|
239
|
+
def get_first_exc(self) -> Exception | None:
|
|
240
|
+
mask = self.excs != None
|
|
241
|
+
if not mask.any():
|
|
242
|
+
return None
|
|
243
|
+
return self.excs[mask][0]
|
|
111
244
|
|
|
112
245
|
def set_exc(self, slot_idx: int, exc: Exception) -> None:
|
|
113
246
|
assert self.excs[slot_idx] is None
|
|
114
247
|
self.excs[slot_idx] = exc
|
|
248
|
+
self._may_have_exc = True
|
|
115
249
|
|
|
116
250
|
# an exception means the value is None
|
|
117
251
|
self.has_val[slot_idx] = True
|
|
@@ -119,16 +253,13 @@ class DataRow:
|
|
|
119
253
|
self.file_paths[slot_idx] = None
|
|
120
254
|
self.file_urls[slot_idx] = None
|
|
121
255
|
|
|
122
|
-
def
|
|
123
|
-
return len(self.vals)
|
|
124
|
-
|
|
125
|
-
def __getitem__(self, index: object) -> Any:
|
|
256
|
+
def __getitem__(self, index: int) -> Any:
|
|
126
257
|
"""Returns in-memory value, ie, what is needed for expr evaluation"""
|
|
127
258
|
assert isinstance(index, int)
|
|
128
259
|
if not self.has_val[index]:
|
|
129
|
-
#
|
|
130
|
-
|
|
131
|
-
|
|
260
|
+
# This is a sufficiently cheap and sensitive validation that it makes sense to keep the assertion around
|
|
261
|
+
# even if python is running with -O.
|
|
262
|
+
raise AssertionError(index)
|
|
132
263
|
|
|
133
264
|
if self.file_urls[index] is not None and index in self.img_slot_idxs:
|
|
134
265
|
# if we need to load this from a file, it should have been materialized locally
|
|
@@ -140,7 +271,7 @@ class DataRow:
|
|
|
140
271
|
|
|
141
272
|
return self.vals[index]
|
|
142
273
|
|
|
143
|
-
def get_stored_val(self, index: int, sa_col_type:
|
|
274
|
+
def get_stored_val(self, index: int, sa_col_type: sql.types.TypeEngine | None = None) -> Any:
|
|
144
275
|
"""Return the value that gets stored in the db"""
|
|
145
276
|
assert self.excs[index] is None
|
|
146
277
|
if not self.has_val[index]:
|
|
@@ -171,7 +302,7 @@ class DataRow:
|
|
|
171
302
|
|
|
172
303
|
return self.vals[index]
|
|
173
304
|
|
|
174
|
-
def __setitem__(self, idx:
|
|
305
|
+
def __setitem__(self, idx: int, val: Any) -> None:
|
|
175
306
|
"""Assign in-memory cell value
|
|
176
307
|
This allows overwriting
|
|
177
308
|
"""
|
|
@@ -188,9 +319,10 @@ class DataRow:
|
|
|
188
319
|
# local file path
|
|
189
320
|
assert self.file_urls[idx] is None and self.file_paths[idx] is None
|
|
190
321
|
if len(parsed.scheme) <= 1:
|
|
191
|
-
|
|
192
|
-
self.
|
|
193
|
-
|
|
322
|
+
path = str(Path(val).absolute()) # Ensure we're using an absolute pathname.
|
|
323
|
+
self.file_urls[idx] = urllib.parse.urljoin('file:', urllib.request.pathname2url(path))
|
|
324
|
+
self.file_paths[idx] = path
|
|
325
|
+
else: # file:// URL
|
|
194
326
|
self.file_urls[idx] = val
|
|
195
327
|
# Wrap the path in a url2pathname() call to ensure proper handling on Windows.
|
|
196
328
|
self.file_paths[idx] = urllib.parse.unquote(urllib.request.url2pathname(parsed.path))
|
|
@@ -207,37 +339,46 @@ class DataRow:
|
|
|
207
339
|
self.vals[idx] = val
|
|
208
340
|
self.has_val[idx] = True
|
|
209
341
|
|
|
210
|
-
def
|
|
211
|
-
"""
|
|
212
|
-
|
|
213
|
-
assert idx in self.img_slot_idxs or idx in self.media_slot_idxs
|
|
214
|
-
self.file_paths[idx] = path
|
|
215
|
-
if idx in self.media_slot_idxs:
|
|
216
|
-
self.vals[idx] = path
|
|
342
|
+
def prepare_col_val_for_save(self, index: int, col: catalog.Column | None = None) -> bool:
|
|
343
|
+
"""
|
|
344
|
+
Prepare to save a column's value into the appropriate store. Discard unneeded values.
|
|
217
345
|
|
|
218
|
-
|
|
219
|
-
|
|
346
|
+
Return:
|
|
347
|
+
True if the media object in the column needs to be saved.
|
|
348
|
+
"""
|
|
220
349
|
if self.vals[index] is None:
|
|
221
|
-
return
|
|
350
|
+
return False
|
|
351
|
+
|
|
352
|
+
if self.file_urls[index] is not None:
|
|
353
|
+
return False
|
|
354
|
+
|
|
222
355
|
assert self.excs[index] is None
|
|
223
356
|
if self.file_paths[index] is None:
|
|
224
|
-
if
|
|
225
|
-
#
|
|
226
|
-
|
|
227
|
-
self.file_urls[index] = urllib.parse.urljoin('file:', urllib.request.pathname2url(filepath))
|
|
228
|
-
image = self.vals[index]
|
|
229
|
-
assert isinstance(image, PIL.Image.Image)
|
|
230
|
-
# Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
|
|
231
|
-
# In that case, use WebP instead.
|
|
232
|
-
format = 'webp' if image.has_transparency_data else 'jpeg'
|
|
233
|
-
image.save(filepath, format=format)
|
|
357
|
+
if col is not None:
|
|
358
|
+
# This is a media object that needs to be saved
|
|
359
|
+
return True
|
|
234
360
|
else:
|
|
235
|
-
# we
|
|
361
|
+
# This is a media object that we don't care about, so we discard it
|
|
236
362
|
self.has_val[index] = False
|
|
237
363
|
else:
|
|
238
364
|
# we already have a file for this image, nothing left to do
|
|
239
365
|
pass
|
|
366
|
+
|
|
367
|
+
self.vals[index] = None
|
|
368
|
+
return False
|
|
369
|
+
|
|
370
|
+
def save_media_to_temp(self, index: int, col: catalog.Column) -> str:
|
|
371
|
+
"""Save the media object in the column to the TempStore.
|
|
372
|
+
Objects cannot be saved directly to general destinations."""
|
|
373
|
+
assert col.col_type.is_media_type()
|
|
374
|
+
val = self.vals[index]
|
|
375
|
+
format = None
|
|
376
|
+
if isinstance(val, PIL.Image.Image):
|
|
377
|
+
format = image_utils.default_format(val)
|
|
378
|
+
filepath, url = TempStore.save_media_object(val, col, format=format)
|
|
379
|
+
self.file_paths[index] = str(filepath) if filepath is not None else None
|
|
240
380
|
self.vals[index] = None
|
|
381
|
+
return url
|
|
241
382
|
|
|
242
383
|
@property
|
|
243
384
|
def rowid(self) -> tuple[int, ...]:
|