pixeltable 0.3.14__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. pixeltable/__init__.py +42 -8
  2. pixeltable/{dataframe.py → _query.py} +470 -206
  3. pixeltable/_version.py +1 -0
  4. pixeltable/catalog/__init__.py +5 -4
  5. pixeltable/catalog/catalog.py +1785 -432
  6. pixeltable/catalog/column.py +190 -113
  7. pixeltable/catalog/dir.py +2 -4
  8. pixeltable/catalog/globals.py +19 -46
  9. pixeltable/catalog/insertable_table.py +191 -98
  10. pixeltable/catalog/path.py +63 -23
  11. pixeltable/catalog/schema_object.py +11 -15
  12. pixeltable/catalog/table.py +843 -436
  13. pixeltable/catalog/table_metadata.py +103 -0
  14. pixeltable/catalog/table_version.py +978 -657
  15. pixeltable/catalog/table_version_handle.py +72 -16
  16. pixeltable/catalog/table_version_path.py +112 -43
  17. pixeltable/catalog/tbl_ops.py +53 -0
  18. pixeltable/catalog/update_status.py +191 -0
  19. pixeltable/catalog/view.py +134 -90
  20. pixeltable/config.py +134 -22
  21. pixeltable/env.py +471 -157
  22. pixeltable/exceptions.py +6 -0
  23. pixeltable/exec/__init__.py +4 -1
  24. pixeltable/exec/aggregation_node.py +7 -8
  25. pixeltable/exec/cache_prefetch_node.py +83 -110
  26. pixeltable/exec/cell_materialization_node.py +268 -0
  27. pixeltable/exec/cell_reconstruction_node.py +168 -0
  28. pixeltable/exec/component_iteration_node.py +4 -3
  29. pixeltable/exec/data_row_batch.py +8 -65
  30. pixeltable/exec/exec_context.py +16 -4
  31. pixeltable/exec/exec_node.py +13 -36
  32. pixeltable/exec/expr_eval/evaluators.py +11 -7
  33. pixeltable/exec/expr_eval/expr_eval_node.py +27 -12
  34. pixeltable/exec/expr_eval/globals.py +8 -5
  35. pixeltable/exec/expr_eval/row_buffer.py +1 -2
  36. pixeltable/exec/expr_eval/schedulers.py +106 -56
  37. pixeltable/exec/globals.py +35 -0
  38. pixeltable/exec/in_memory_data_node.py +19 -19
  39. pixeltable/exec/object_store_save_node.py +293 -0
  40. pixeltable/exec/row_update_node.py +16 -9
  41. pixeltable/exec/sql_node.py +351 -84
  42. pixeltable/exprs/__init__.py +1 -1
  43. pixeltable/exprs/arithmetic_expr.py +27 -22
  44. pixeltable/exprs/array_slice.py +3 -3
  45. pixeltable/exprs/column_property_ref.py +36 -23
  46. pixeltable/exprs/column_ref.py +213 -89
  47. pixeltable/exprs/comparison.py +5 -5
  48. pixeltable/exprs/compound_predicate.py +5 -4
  49. pixeltable/exprs/data_row.py +164 -54
  50. pixeltable/exprs/expr.py +70 -44
  51. pixeltable/exprs/expr_dict.py +3 -3
  52. pixeltable/exprs/expr_set.py +17 -10
  53. pixeltable/exprs/function_call.py +100 -40
  54. pixeltable/exprs/globals.py +2 -2
  55. pixeltable/exprs/in_predicate.py +4 -4
  56. pixeltable/exprs/inline_expr.py +18 -32
  57. pixeltable/exprs/is_null.py +7 -3
  58. pixeltable/exprs/json_mapper.py +8 -8
  59. pixeltable/exprs/json_path.py +56 -22
  60. pixeltable/exprs/literal.py +27 -5
  61. pixeltable/exprs/method_ref.py +2 -2
  62. pixeltable/exprs/object_ref.py +2 -2
  63. pixeltable/exprs/row_builder.py +167 -67
  64. pixeltable/exprs/rowid_ref.py +25 -10
  65. pixeltable/exprs/similarity_expr.py +58 -40
  66. pixeltable/exprs/sql_element_cache.py +4 -4
  67. pixeltable/exprs/string_op.py +5 -5
  68. pixeltable/exprs/type_cast.py +3 -5
  69. pixeltable/func/__init__.py +1 -0
  70. pixeltable/func/aggregate_function.py +8 -8
  71. pixeltable/func/callable_function.py +9 -9
  72. pixeltable/func/expr_template_function.py +17 -11
  73. pixeltable/func/function.py +18 -20
  74. pixeltable/func/function_registry.py +6 -7
  75. pixeltable/func/globals.py +2 -3
  76. pixeltable/func/mcp.py +74 -0
  77. pixeltable/func/query_template_function.py +29 -27
  78. pixeltable/func/signature.py +46 -19
  79. pixeltable/func/tools.py +31 -13
  80. pixeltable/func/udf.py +18 -20
  81. pixeltable/functions/__init__.py +16 -0
  82. pixeltable/functions/anthropic.py +123 -77
  83. pixeltable/functions/audio.py +147 -10
  84. pixeltable/functions/bedrock.py +13 -6
  85. pixeltable/functions/date.py +7 -4
  86. pixeltable/functions/deepseek.py +35 -43
  87. pixeltable/functions/document.py +81 -0
  88. pixeltable/functions/fal.py +76 -0
  89. pixeltable/functions/fireworks.py +11 -20
  90. pixeltable/functions/gemini.py +195 -39
  91. pixeltable/functions/globals.py +142 -14
  92. pixeltable/functions/groq.py +108 -0
  93. pixeltable/functions/huggingface.py +1056 -24
  94. pixeltable/functions/image.py +115 -57
  95. pixeltable/functions/json.py +1 -1
  96. pixeltable/functions/llama_cpp.py +28 -13
  97. pixeltable/functions/math.py +67 -5
  98. pixeltable/functions/mistralai.py +18 -55
  99. pixeltable/functions/net.py +70 -0
  100. pixeltable/functions/ollama.py +20 -13
  101. pixeltable/functions/openai.py +240 -226
  102. pixeltable/functions/openrouter.py +143 -0
  103. pixeltable/functions/replicate.py +4 -4
  104. pixeltable/functions/reve.py +250 -0
  105. pixeltable/functions/string.py +239 -69
  106. pixeltable/functions/timestamp.py +16 -16
  107. pixeltable/functions/together.py +24 -84
  108. pixeltable/functions/twelvelabs.py +188 -0
  109. pixeltable/functions/util.py +6 -1
  110. pixeltable/functions/uuid.py +30 -0
  111. pixeltable/functions/video.py +1515 -107
  112. pixeltable/functions/vision.py +8 -8
  113. pixeltable/functions/voyageai.py +289 -0
  114. pixeltable/functions/whisper.py +16 -8
  115. pixeltable/functions/whisperx.py +179 -0
  116. pixeltable/{ext/functions → functions}/yolox.py +2 -4
  117. pixeltable/globals.py +362 -115
  118. pixeltable/index/base.py +17 -21
  119. pixeltable/index/btree.py +28 -22
  120. pixeltable/index/embedding_index.py +100 -118
  121. pixeltable/io/__init__.py +4 -2
  122. pixeltable/io/datarows.py +8 -7
  123. pixeltable/io/external_store.py +56 -105
  124. pixeltable/io/fiftyone.py +13 -13
  125. pixeltable/io/globals.py +31 -30
  126. pixeltable/io/hf_datasets.py +61 -16
  127. pixeltable/io/label_studio.py +74 -70
  128. pixeltable/io/lancedb.py +3 -0
  129. pixeltable/io/pandas.py +21 -12
  130. pixeltable/io/parquet.py +25 -105
  131. pixeltable/io/table_data_conduit.py +250 -123
  132. pixeltable/io/utils.py +4 -4
  133. pixeltable/iterators/__init__.py +2 -1
  134. pixeltable/iterators/audio.py +26 -25
  135. pixeltable/iterators/base.py +9 -3
  136. pixeltable/iterators/document.py +112 -78
  137. pixeltable/iterators/image.py +12 -15
  138. pixeltable/iterators/string.py +11 -4
  139. pixeltable/iterators/video.py +523 -120
  140. pixeltable/metadata/__init__.py +14 -3
  141. pixeltable/metadata/converters/convert_13.py +2 -2
  142. pixeltable/metadata/converters/convert_18.py +2 -2
  143. pixeltable/metadata/converters/convert_19.py +2 -2
  144. pixeltable/metadata/converters/convert_20.py +2 -2
  145. pixeltable/metadata/converters/convert_21.py +2 -2
  146. pixeltable/metadata/converters/convert_22.py +2 -2
  147. pixeltable/metadata/converters/convert_24.py +2 -2
  148. pixeltable/metadata/converters/convert_25.py +2 -2
  149. pixeltable/metadata/converters/convert_26.py +2 -2
  150. pixeltable/metadata/converters/convert_29.py +4 -4
  151. pixeltable/metadata/converters/convert_30.py +34 -21
  152. pixeltable/metadata/converters/convert_34.py +2 -2
  153. pixeltable/metadata/converters/convert_35.py +9 -0
  154. pixeltable/metadata/converters/convert_36.py +38 -0
  155. pixeltable/metadata/converters/convert_37.py +15 -0
  156. pixeltable/metadata/converters/convert_38.py +39 -0
  157. pixeltable/metadata/converters/convert_39.py +124 -0
  158. pixeltable/metadata/converters/convert_40.py +73 -0
  159. pixeltable/metadata/converters/convert_41.py +12 -0
  160. pixeltable/metadata/converters/convert_42.py +9 -0
  161. pixeltable/metadata/converters/convert_43.py +44 -0
  162. pixeltable/metadata/converters/util.py +20 -31
  163. pixeltable/metadata/notes.py +9 -0
  164. pixeltable/metadata/schema.py +140 -53
  165. pixeltable/metadata/utils.py +74 -0
  166. pixeltable/mypy/__init__.py +3 -0
  167. pixeltable/mypy/mypy_plugin.py +123 -0
  168. pixeltable/plan.py +382 -115
  169. pixeltable/share/__init__.py +1 -1
  170. pixeltable/share/packager.py +547 -83
  171. pixeltable/share/protocol/__init__.py +33 -0
  172. pixeltable/share/protocol/common.py +165 -0
  173. pixeltable/share/protocol/operation_types.py +33 -0
  174. pixeltable/share/protocol/replica.py +119 -0
  175. pixeltable/share/publish.py +257 -59
  176. pixeltable/store.py +311 -194
  177. pixeltable/type_system.py +373 -211
  178. pixeltable/utils/__init__.py +2 -3
  179. pixeltable/utils/arrow.py +131 -17
  180. pixeltable/utils/av.py +298 -0
  181. pixeltable/utils/azure_store.py +346 -0
  182. pixeltable/utils/coco.py +6 -6
  183. pixeltable/utils/code.py +3 -3
  184. pixeltable/utils/console_output.py +4 -1
  185. pixeltable/utils/coroutine.py +6 -23
  186. pixeltable/utils/dbms.py +32 -6
  187. pixeltable/utils/description_helper.py +4 -5
  188. pixeltable/utils/documents.py +7 -18
  189. pixeltable/utils/exception_handler.py +7 -30
  190. pixeltable/utils/filecache.py +6 -6
  191. pixeltable/utils/formatter.py +86 -48
  192. pixeltable/utils/gcs_store.py +295 -0
  193. pixeltable/utils/http.py +133 -0
  194. pixeltable/utils/http_server.py +2 -3
  195. pixeltable/utils/iceberg.py +1 -2
  196. pixeltable/utils/image.py +17 -0
  197. pixeltable/utils/lancedb.py +90 -0
  198. pixeltable/utils/local_store.py +322 -0
  199. pixeltable/utils/misc.py +5 -0
  200. pixeltable/utils/object_stores.py +573 -0
  201. pixeltable/utils/pydantic.py +60 -0
  202. pixeltable/utils/pytorch.py +5 -6
  203. pixeltable/utils/s3_store.py +527 -0
  204. pixeltable/utils/sql.py +26 -0
  205. pixeltable/utils/system.py +30 -0
  206. pixeltable-0.5.7.dist-info/METADATA +579 -0
  207. pixeltable-0.5.7.dist-info/RECORD +227 -0
  208. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info}/WHEEL +1 -1
  209. pixeltable-0.5.7.dist-info/entry_points.txt +2 -0
  210. pixeltable/__version__.py +0 -3
  211. pixeltable/catalog/named_function.py +0 -40
  212. pixeltable/ext/__init__.py +0 -17
  213. pixeltable/ext/functions/__init__.py +0 -11
  214. pixeltable/ext/functions/whisperx.py +0 -77
  215. pixeltable/utils/media_store.py +0 -77
  216. pixeltable/utils/s3.py +0 -17
  217. pixeltable-0.3.14.dist-info/METADATA +0 -434
  218. pixeltable-0.3.14.dist-info/RECORD +0 -186
  219. pixeltable-0.3.14.dist-info/entry_points.txt +0 -3
  220. {pixeltable-0.3.14.dist-info → pixeltable-0.5.7.dist-info/licenses}/LICENSE +0 -0
@@ -1,11 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import dataclasses
3
4
  import datetime
4
5
  import io
5
6
  import urllib.parse
6
7
  import urllib.request
7
8
  from pathlib import Path
8
- from typing import Any, Optional
9
+ from typing import Any
9
10
 
10
11
  import numpy as np
11
12
  import pgvector.sqlalchemy # type: ignore[import-untyped]
@@ -13,14 +14,81 @@ import PIL
13
14
  import PIL.Image
14
15
  import sqlalchemy as sql
15
16
 
16
- from pixeltable import env
17
+ import pixeltable.utils.image as image_utils
18
+ from pixeltable import catalog, env
19
+ from pixeltable.utils.local_store import TempStore
20
+ from pixeltable.utils.misc import non_none_dict_factory
21
+
22
+
23
+ @dataclasses.dataclass
24
+ class ArrayMd:
25
+ """
26
+ Metadata for array cells that are stored externally.
27
+ """
28
+
29
+ start: int
30
+ end: int
31
+
32
+ # we store bool arrays as packed bits (uint8 arrays), and need to record the shape to reconstruct the array
33
+ is_bool: bool = False
34
+ shape: tuple[int, ...] | None = None
35
+
36
+ def as_dict(self) -> dict:
37
+ # dict_factory: suppress Nones
38
+ x = dataclasses.asdict(self, dict_factory=non_none_dict_factory)
39
+ return x
40
+
41
+
42
+ @dataclasses.dataclass
43
+ class BinaryMd:
44
+ """
45
+ Metadata for binary cells that are stored externally.
46
+ """
47
+
48
+ start: int
49
+ end: int
50
+
51
+
52
+ @dataclasses.dataclass
53
+ class CellMd:
54
+ """
55
+ Content of the cellmd column.
56
+
57
+ All fields are optional, to minimize storage.
58
+ """
59
+
60
+ errortype: str | None = None
61
+ errormsg: str | None = None
62
+
63
+ # a list of file urls that are used to store images and arrays; only set for json and array columns
64
+ # for json columns: a list of all urls referenced in the column value
65
+ # for array columns: a single url
66
+ file_urls: list[str] | None = None
67
+
68
+ array_md: ArrayMd | None = None
69
+ binary_md: BinaryMd | None = None
70
+
71
+ @classmethod
72
+ def from_dict(cls, d: dict) -> CellMd:
73
+ d = d.copy()
74
+ if 'array_md' in d:
75
+ d['array_md'] = ArrayMd(**d['array_md'])
76
+ if 'binary_md' in d:
77
+ d['binary_md'] = BinaryMd(**d['binary_md'])
78
+ return cls(**d)
79
+
80
+ def as_dict(self) -> dict:
81
+ x = dataclasses.asdict(self, dict_factory=non_none_dict_factory)
82
+ return x
17
83
 
18
84
 
19
85
  class DataRow:
20
86
  """
21
87
  Encapsulates all data and execution state needed by RowBuilder and DataRowBatch:
22
88
  - state for in-memory computation
23
- - state for storing the data
89
+ - state needed for expression evaluation
90
+ - containers for output column values
91
+
24
92
  This is not meant to be a black-box abstraction.
25
93
 
26
94
  In-memory representations by column type:
@@ -30,6 +98,8 @@ class DataRow:
30
98
  - BoolType: bool
31
99
  - TimestampType: datetime.datetime
32
100
  - DateType: datetime.date
101
+ - UUIDType: uuid.UUID
102
+ - BinaryType: bytes
33
103
  - JsonType: json-serializable object
34
104
  - ArrayType: numpy.ndarray
35
105
  - ImageType: PIL.Image.Image
@@ -38,37 +108,49 @@ class DataRow:
38
108
  - DocumentType: local path if available, otherwise url
39
109
  """
40
110
 
111
+ # expr evaluation state; indexed by slot idx
41
112
  vals: np.ndarray # of object
42
113
  has_val: np.ndarray # of bool
43
114
  excs: np.ndarray # of object
44
-
45
- # expr evaluation state; indexed by slot idx
46
115
  missing_slots: np.ndarray # of bool; number of missing dependencies
47
116
  missing_dependents: np.ndarray # of int16; number of missing dependents
48
117
  is_scheduled: np.ndarray # of bool; True if this slot is scheduled for evaluation
49
118
 
50
- # control structures that are shared across all DataRows in a batch
51
- img_slot_idxs: list[int]
52
- media_slot_idxs: list[int]
53
- array_slot_idxs: list[int]
54
-
55
- # the primary key of a store row is a sequence of ints (the number is different for table vs view)
56
- pk: Optional[tuple[int, ...]]
119
+ # CellMd needed for query execution; needs to be indexed by slot idx, not column id, to work for joins
120
+ slot_md: dict[int, CellMd]
57
121
 
58
122
  # file_urls:
59
123
  # - stored url of file for media in vals[i]
60
124
  # - None if vals[i] is not media type
61
125
  # - not None if file_paths[i] is not None
126
+ # TODO: this is a sparse vector; should it be a dict[int, str]?
62
127
  file_urls: np.ndarray # of str
63
128
 
64
129
  # file_paths:
65
130
  # - local path of media file in vals[i]; points to the file cache if file_urls[i] is remote
66
131
  # - None if vals[i] is not a media type or if there is no local file yet for file_urls[i]
132
+ # TODO: this is a sparse vector; should it be a dict[int, str]?
67
133
  file_paths: np.ndarray # of str
68
134
 
135
+ # If `may_have_exc` is False, then we guarantee that no slot has an exception set. This is used to optimize
136
+ # exception handling under normal operation.
137
+ _may_have_exc: bool
138
+
139
+ # the primary key of a store row is a sequence of ints (the number is different for table vs view)
140
+ pk: tuple[int, ...] | None
69
141
  # for nested rows (ie, those produced by JsonMapperDispatcher)
70
- parent_row: Optional[DataRow]
71
- parent_slot_idx: Optional[int]
142
+ parent_row: DataRow | None
143
+ parent_slot_idx: int | None
144
+
145
+ # state for table output (insert()/update()); key: column id
146
+ cell_vals: dict[int, Any] # materialized values of output columns, in the format required for the column
147
+ cell_md: dict[int, CellMd]
148
+
149
+ # control structures that are shared across all DataRows in a batch
150
+ img_slot_idxs: list[int]
151
+ media_slot_idxs: list[int]
152
+ array_slot_idxs: list[int]
153
+ json_slot_idxs: list[int]
72
154
 
73
155
  def __init__(
74
156
  self,
@@ -76,36 +158,42 @@ class DataRow:
76
158
  img_slot_idxs: list[int],
77
159
  media_slot_idxs: list[int],
78
160
  array_slot_idxs: list[int],
79
- parent_row: Optional[DataRow] = None,
80
- parent_slot_idx: Optional[int] = None,
161
+ json_slot_idxs: list[int],
162
+ parent_row: DataRow | None = None,
163
+ parent_slot_idx: int | None = None,
81
164
  ):
82
- self.img_slot_idxs = img_slot_idxs
83
- self.media_slot_idxs = media_slot_idxs
84
- self.array_slot_idxs = array_slot_idxs
85
165
  self.init(size)
86
166
  self.parent_row = parent_row
87
167
  self.parent_slot_idx = parent_slot_idx
88
-
89
- def init(self, num_slots: int) -> None:
90
- self.vals = np.full(num_slots, None, dtype=object)
91
- self.has_val = np.zeros(num_slots, dtype=bool)
92
- self.excs = np.full(num_slots, None, dtype=object)
93
- self.missing_slots = np.zeros(num_slots, dtype=bool)
94
- self.missing_dependents = np.zeros(num_slots, dtype=np.int16)
95
- self.is_scheduled = np.zeros(num_slots, dtype=bool)
168
+ self.img_slot_idxs = img_slot_idxs
169
+ self.media_slot_idxs = media_slot_idxs
170
+ self.array_slot_idxs = array_slot_idxs
171
+ self.json_slot_idxs = json_slot_idxs
172
+
173
+ def init(self, size: int) -> None:
174
+ self.vals = np.full(size, None, dtype=object)
175
+ self.has_val = np.zeros(size, dtype=bool)
176
+ self.excs = np.full(size, None, dtype=object)
177
+ self.missing_slots = np.zeros(size, dtype=bool)
178
+ self.missing_dependents = np.zeros(size, dtype=np.int16)
179
+ self.is_scheduled = np.zeros(size, dtype=bool)
180
+ self.slot_md = {}
181
+ self.file_urls = np.full(size, None, dtype=object)
182
+ self.file_paths = np.full(size, None, dtype=object)
183
+ self._may_have_exc = False
184
+ self.cell_vals = {}
185
+ self.cell_md = {}
96
186
  self.pk = None
97
- self.file_urls = np.full(num_slots, None, dtype=object)
98
- self.file_paths = np.full(num_slots, None, dtype=object)
99
187
  self.parent_row = None
100
188
  self.parent_slot_idx = None
101
189
 
102
- def clear(self, idxs: Optional[np.ndarray] = None) -> None:
103
- if idxs is not None:
104
- self.has_val[idxs] = False
105
- self.vals[idxs] = None
106
- self.excs[idxs] = None
107
- self.file_urls[idxs] = None
108
- self.file_paths[idxs] = None
190
+ def clear(self, slot_idxs: np.ndarray | None = None) -> None:
191
+ if slot_idxs is not None:
192
+ self.has_val[slot_idxs] = False
193
+ self.vals[slot_idxs] = None
194
+ self.excs[slot_idxs] = None
195
+ self.file_urls[slot_idxs] = None
196
+ self.file_paths[slot_idxs] = None
109
197
  else:
110
198
  self.init(len(self.vals))
111
199
 
@@ -132,20 +220,23 @@ class DataRow:
132
220
  def set_pk(self, pk: tuple[int, ...]) -> None:
133
221
  self.pk = pk
134
222
 
135
- def has_exc(self, slot_idx: Optional[int] = None) -> bool:
223
+ def has_exc(self, slot_idx: int | None = None) -> bool:
136
224
  """
137
225
  Returns True if an exception has been set for the given slot index, or for any slot index if slot_idx is None
138
226
  """
227
+ if not self._may_have_exc:
228
+ return False
229
+
139
230
  if slot_idx is not None:
140
231
  return self.excs[slot_idx] is not None
141
232
  return (self.excs != None).any()
142
233
 
143
- def get_exc(self, slot_idx: int) -> Optional[Exception]:
234
+ def get_exc(self, slot_idx: int) -> Exception | None:
144
235
  exc = self.excs[slot_idx]
145
236
  assert exc is None or isinstance(exc, Exception)
146
237
  return exc
147
238
 
148
- def get_first_exc(self) -> Optional[Exception]:
239
+ def get_first_exc(self) -> Exception | None:
149
240
  mask = self.excs != None
150
241
  if not mask.any():
151
242
  return None
@@ -154,6 +245,7 @@ class DataRow:
154
245
  def set_exc(self, slot_idx: int, exc: Exception) -> None:
155
246
  assert self.excs[slot_idx] is None
156
247
  self.excs[slot_idx] = exc
248
+ self._may_have_exc = True
157
249
 
158
250
  # an exception means the value is None
159
251
  self.has_val[slot_idx] = True
@@ -179,7 +271,7 @@ class DataRow:
179
271
 
180
272
  return self.vals[index]
181
273
 
182
- def get_stored_val(self, index: int, sa_col_type: Optional[sql.types.TypeEngine] = None) -> Any:
274
+ def get_stored_val(self, index: int, sa_col_type: sql.types.TypeEngine | None = None) -> Any:
183
275
  """Return the value that gets stored in the db"""
184
276
  assert self.excs[index] is None
185
277
  if not self.has_val[index]:
@@ -214,6 +306,7 @@ class DataRow:
214
306
  """Assign in-memory cell value
215
307
  This allows overwriting
216
308
  """
309
+ assert isinstance(idx, int)
217
310
  assert self.excs[idx] is None
218
311
 
219
312
  if (idx in self.img_slot_idxs or idx in self.media_slot_idxs) and isinstance(val, str):
@@ -246,29 +339,46 @@ class DataRow:
246
339
  self.vals[idx] = val
247
340
  self.has_val[idx] = True
248
341
 
249
- def flush_img(self, index: int, filepath: Optional[str] = None) -> None:
250
- """Discard the in-memory value and save it to a local file, if filepath is not None"""
342
+ def prepare_col_val_for_save(self, index: int, col: catalog.Column | None = None) -> bool:
343
+ """
344
+ Prepare to save a column's value into the appropriate store. Discard unneeded values.
345
+
346
+ Return:
347
+ True if the media object in the column needs to be saved.
348
+ """
251
349
  if self.vals[index] is None:
252
- return
350
+ return False
351
+
352
+ if self.file_urls[index] is not None:
353
+ return False
354
+
253
355
  assert self.excs[index] is None
254
356
  if self.file_paths[index] is None:
255
- if filepath is not None:
256
- # we want to save this to a file
257
- self.file_paths[index] = filepath
258
- self.file_urls[index] = urllib.parse.urljoin('file:', urllib.request.pathname2url(filepath))
259
- image = self.vals[index]
260
- assert isinstance(image, PIL.Image.Image)
261
- # Default to JPEG unless the image has a transparency layer (which isn't supported by JPEG).
262
- # In that case, use WebP instead.
263
- format = 'webp' if image.has_transparency_data else 'jpeg'
264
- image.save(filepath, format=format)
357
+ if col is not None:
358
+ # This is a media object that needs to be saved
359
+ return True
265
360
  else:
266
- # we discard the content of this cell
361
+ # This is a media object that we don't care about, so we discard it
267
362
  self.has_val[index] = False
268
363
  else:
269
364
  # we already have a file for this image, nothing left to do
270
365
  pass
366
+
367
+ self.vals[index] = None
368
+ return False
369
+
370
+ def save_media_to_temp(self, index: int, col: catalog.Column) -> str:
371
+ """Save the media object in the column to the TempStore.
372
+ Objects cannot be saved directly to general destinations."""
373
+ assert col.col_type.is_media_type()
374
+ val = self.vals[index]
375
+ format = None
376
+ if isinstance(val, PIL.Image.Image):
377
+ format = image_utils.default_format(val)
378
+ filepath, url = TempStore.save_media_object(val, col, format=format)
379
+ self.file_paths[index] = str(filepath) if filepath is not None else None
271
380
  self.vals[index] = None
381
+ return url
272
382
 
273
383
  @property
274
384
  def rowid(self) -> tuple[int, ...]: