ome-arrow 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ome_arrow/__init__.py +8 -1
- ome_arrow/_version.py +2 -2
- ome_arrow/core.py +72 -7
- ome_arrow/export.py +302 -8
- ome_arrow/ingest.py +542 -124
- ome_arrow/meta.py +41 -0
- ome_arrow/transform.py +34 -0
- ome_arrow/view.py +20 -22
- {ome_arrow-0.0.4.dist-info → ome_arrow-0.0.6.dist-info}/METADATA +7 -2
- ome_arrow-0.0.6.dist-info/RECORD +14 -0
- {ome_arrow-0.0.4.dist-info → ome_arrow-0.0.6.dist-info}/WHEEL +1 -1
- ome_arrow-0.0.4.dist-info/RECORD +0 -14
- {ome_arrow-0.0.4.dist-info → ome_arrow-0.0.6.dist-info}/licenses/LICENSE +0 -0
- {ome_arrow-0.0.4.dist-info → ome_arrow-0.0.6.dist-info}/top_level.txt +0 -0
ome_arrow/ingest.py
CHANGED
|
@@ -3,6 +3,7 @@ Converting to and from OME-Arrow formats.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import itertools
|
|
6
|
+
import json
|
|
6
7
|
import re
|
|
7
8
|
import warnings
|
|
8
9
|
from datetime import datetime, timezone
|
|
@@ -20,11 +21,351 @@ from bioio_ome_zarr import Reader as OMEZarrReader
|
|
|
20
21
|
from ome_arrow.meta import OME_ARROW_STRUCT, OME_ARROW_TAG_TYPE, OME_ARROW_TAG_VERSION
|
|
21
22
|
|
|
22
23
|
|
|
24
|
+
def _ome_arrow_from_table(
|
|
25
|
+
table: pa.Table,
|
|
26
|
+
*,
|
|
27
|
+
column_name: Optional[str],
|
|
28
|
+
row_index: int,
|
|
29
|
+
strict_schema: bool,
|
|
30
|
+
) -> pa.StructScalar:
|
|
31
|
+
"""Extract a single OME-Arrow record from an Arrow table.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
table: Source Arrow table.
|
|
35
|
+
column_name: Column to read; auto-detected when None or invalid.
|
|
36
|
+
row_index: Row index to extract.
|
|
37
|
+
strict_schema: Require the exact OME-Arrow schema if True.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
A typed OME-Arrow StructScalar.
|
|
41
|
+
|
|
42
|
+
Raises:
|
|
43
|
+
ValueError: If the row index is out of range or no suitable column exists.
|
|
44
|
+
"""
|
|
45
|
+
if table.num_rows == 0:
|
|
46
|
+
raise ValueError("Table contains 0 rows; expected at least 1.")
|
|
47
|
+
if not (0 <= row_index < table.num_rows):
|
|
48
|
+
raise ValueError(f"row_index {row_index} out of range [0, {table.num_rows}).")
|
|
49
|
+
|
|
50
|
+
# 1) Locate the OME-Arrow column
|
|
51
|
+
def _struct_matches_ome_fields(t: pa.StructType) -> bool:
|
|
52
|
+
ome_fields = {f.name for f in OME_ARROW_STRUCT}
|
|
53
|
+
required_fields = ome_fields - {"image_type", "chunk_grid", "chunks"}
|
|
54
|
+
col_fields = {f.name for f in t}
|
|
55
|
+
return required_fields.issubset(col_fields)
|
|
56
|
+
|
|
57
|
+
requested_name = column_name
|
|
58
|
+
candidate_col = None
|
|
59
|
+
autodetected_name = None
|
|
60
|
+
|
|
61
|
+
if column_name is not None and column_name in table.column_names:
|
|
62
|
+
arr = table[column_name]
|
|
63
|
+
if not pa.types.is_struct(arr.type):
|
|
64
|
+
raise ValueError(f"Column '{column_name}' is not a Struct; got {arr.type}.")
|
|
65
|
+
if strict_schema and arr.type != OME_ARROW_STRUCT:
|
|
66
|
+
raise ValueError(
|
|
67
|
+
f"Column '{column_name}' schema != OME_ARROW_STRUCT.\n"
|
|
68
|
+
f"Got: {arr.type}\n"
|
|
69
|
+
f"Expect:{OME_ARROW_STRUCT}"
|
|
70
|
+
)
|
|
71
|
+
if not strict_schema and not _struct_matches_ome_fields(arr.type):
|
|
72
|
+
raise ValueError(
|
|
73
|
+
f"Column '{column_name}' does not have the expected OME-Arrow fields."
|
|
74
|
+
)
|
|
75
|
+
candidate_col = arr
|
|
76
|
+
else:
|
|
77
|
+
# Auto-detect a struct column that matches OME-Arrow fields
|
|
78
|
+
for name in table.column_names:
|
|
79
|
+
arr = table[name]
|
|
80
|
+
if pa.types.is_struct(arr.type):
|
|
81
|
+
if strict_schema and arr.type == OME_ARROW_STRUCT:
|
|
82
|
+
candidate_col = arr
|
|
83
|
+
autodetected_name = name
|
|
84
|
+
column_name = name
|
|
85
|
+
break
|
|
86
|
+
if not strict_schema and _struct_matches_ome_fields(arr.type):
|
|
87
|
+
candidate_col = arr
|
|
88
|
+
autodetected_name = name
|
|
89
|
+
column_name = name
|
|
90
|
+
break
|
|
91
|
+
if candidate_col is None:
|
|
92
|
+
if column_name is None:
|
|
93
|
+
hint = "no struct column with OME-Arrow fields was found."
|
|
94
|
+
else:
|
|
95
|
+
hint = f"column '{column_name}' not found and auto-detection failed."
|
|
96
|
+
raise ValueError(f"Could not locate an OME-Arrow struct column: {hint}")
|
|
97
|
+
|
|
98
|
+
# Emit warning if auto-detection was used
|
|
99
|
+
if autodetected_name is not None and autodetected_name != requested_name:
|
|
100
|
+
warnings.warn(
|
|
101
|
+
f"Requested column '{requested_name}' was not usable or not found. "
|
|
102
|
+
f"Auto-detected OME-Arrow column '{autodetected_name}'.",
|
|
103
|
+
UserWarning,
|
|
104
|
+
stacklevel=2,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# 2) Extract the row as a Python dict
|
|
108
|
+
record_dict: Dict[str, Any] = candidate_col.slice(row_index, 1).to_pylist()[0]
|
|
109
|
+
# Back-compat: older files won't include image_type; default to None.
|
|
110
|
+
if "image_type" not in record_dict:
|
|
111
|
+
record_dict["image_type"] = None
|
|
112
|
+
# Drop unexpected fields before casting to the canonical schema.
|
|
113
|
+
record_dict = {f.name: record_dict.get(f.name) for f in OME_ARROW_STRUCT}
|
|
114
|
+
|
|
115
|
+
# 3) Reconstruct a typed StructScalar using the canonical schema
|
|
116
|
+
scalar = pa.scalar(record_dict, type=OME_ARROW_STRUCT)
|
|
117
|
+
|
|
118
|
+
# Optional: soft validation via file-level metadata (if present)
|
|
119
|
+
try:
|
|
120
|
+
meta = table.schema.metadata or {}
|
|
121
|
+
meta.get(b"ome.arrow.type", b"").decode() == str(OME_ARROW_TAG_TYPE)
|
|
122
|
+
meta.get(b"ome.arrow.version", b"").decode() == str(OME_ARROW_TAG_VERSION)
|
|
123
|
+
except Exception:
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
return scalar
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _normalize_unit(unit: str | None) -> str | None:
|
|
130
|
+
if not unit:
|
|
131
|
+
return None
|
|
132
|
+
u = unit.strip().lower()
|
|
133
|
+
if u in {"micrometer", "micrometre", "micron", "microns", "um", "µm"}:
|
|
134
|
+
return "µm"
|
|
135
|
+
if u in {"nanometer", "nanometre", "nm"}:
|
|
136
|
+
return "nm"
|
|
137
|
+
return unit
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _read_physical_pixel_sizes(
|
|
141
|
+
img: BioImage,
|
|
142
|
+
) -> tuple[float, float, float, str | None, bool]:
|
|
143
|
+
pps = getattr(img, "physical_pixel_sizes", None)
|
|
144
|
+
if pps is None:
|
|
145
|
+
return 1.0, 1.0, 1.0, None, False
|
|
146
|
+
|
|
147
|
+
vx = getattr(pps, "X", None) or getattr(pps, "x", None)
|
|
148
|
+
vy = getattr(pps, "Y", None) or getattr(pps, "y", None)
|
|
149
|
+
vz = getattr(pps, "Z", None) or getattr(pps, "z", None)
|
|
150
|
+
|
|
151
|
+
if vx is None and vy is None and vz is None:
|
|
152
|
+
return 1.0, 1.0, 1.0, None, False
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
psize_x = float(vx or 1.0)
|
|
156
|
+
psize_y = float(vy or 1.0)
|
|
157
|
+
psize_z = float(vz or 1.0)
|
|
158
|
+
except Exception:
|
|
159
|
+
return 1.0, 1.0, 1.0, None, False
|
|
160
|
+
|
|
161
|
+
unit = getattr(pps, "unit", None) or getattr(pps, "units", None)
|
|
162
|
+
unit = _normalize_unit(str(unit)) if unit is not None else None
|
|
163
|
+
|
|
164
|
+
return psize_x, psize_y, psize_z, unit, True
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _load_zarr_attrs(zarr_path: Path) -> dict:
|
|
168
|
+
zarr_json = zarr_path / "zarr.json"
|
|
169
|
+
if zarr_json.exists():
|
|
170
|
+
try:
|
|
171
|
+
data = json.loads(zarr_json.read_text())
|
|
172
|
+
return data.get("attributes") or data.get("attrs") or {}
|
|
173
|
+
except Exception:
|
|
174
|
+
return {}
|
|
175
|
+
zattrs = zarr_path / ".zattrs"
|
|
176
|
+
if zattrs.exists():
|
|
177
|
+
try:
|
|
178
|
+
return json.loads(zattrs.read_text())
|
|
179
|
+
except Exception:
|
|
180
|
+
return {}
|
|
181
|
+
return {}
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _extract_multiscales(attrs: dict) -> list[dict]:
|
|
185
|
+
if not isinstance(attrs, dict):
|
|
186
|
+
return []
|
|
187
|
+
ome = attrs.get("ome")
|
|
188
|
+
if isinstance(ome, dict) and isinstance(ome.get("multiscales"), list):
|
|
189
|
+
return ome["multiscales"]
|
|
190
|
+
if isinstance(attrs.get("multiscales"), list):
|
|
191
|
+
return attrs["multiscales"]
|
|
192
|
+
return []
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _read_ngff_scale(zarr_path: Path) -> tuple[float, float, float, str | None] | None:
|
|
196
|
+
zarr_root = zarr_path
|
|
197
|
+
for parent in [zarr_path, *list(zarr_path.parents)]:
|
|
198
|
+
if parent.suffix.lower() in {".zarr", ".ome.zarr"}:
|
|
199
|
+
zarr_root = parent
|
|
200
|
+
break
|
|
201
|
+
|
|
202
|
+
for candidate in (zarr_path, zarr_root):
|
|
203
|
+
attrs = _load_zarr_attrs(candidate)
|
|
204
|
+
multiscales = _extract_multiscales(attrs)
|
|
205
|
+
if multiscales:
|
|
206
|
+
break
|
|
207
|
+
else:
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
ms = multiscales[0]
|
|
211
|
+
axes = ms.get("axes") or []
|
|
212
|
+
datasets = ms.get("datasets") or []
|
|
213
|
+
if not axes or not datasets:
|
|
214
|
+
return None
|
|
215
|
+
|
|
216
|
+
ds = next((d for d in datasets if str(d.get("path")) == "0"), datasets[0])
|
|
217
|
+
cts = ds.get("coordinateTransformations") or []
|
|
218
|
+
scale_ct = next((ct for ct in cts if ct.get("type") == "scale"), None)
|
|
219
|
+
if not scale_ct:
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
scale = scale_ct.get("scale") or []
|
|
223
|
+
if len(scale) != len(axes):
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
axis_scale: dict[str, float] = {}
|
|
227
|
+
axis_unit: dict[str, str] = {}
|
|
228
|
+
for i, ax in enumerate(axes):
|
|
229
|
+
name = str(ax.get("name", "")).lower()
|
|
230
|
+
if name in {"x", "y", "z"}:
|
|
231
|
+
try:
|
|
232
|
+
axis_scale[name] = float(scale[i])
|
|
233
|
+
except Exception:
|
|
234
|
+
continue
|
|
235
|
+
unit = _normalize_unit(ax.get("unit"))
|
|
236
|
+
if unit:
|
|
237
|
+
axis_unit[name] = unit
|
|
238
|
+
|
|
239
|
+
if not axis_scale:
|
|
240
|
+
return None
|
|
241
|
+
|
|
242
|
+
psize_x = axis_scale.get("x", 1.0)
|
|
243
|
+
psize_y = axis_scale.get("y", 1.0)
|
|
244
|
+
psize_z = axis_scale.get("z", 1.0)
|
|
245
|
+
|
|
246
|
+
units = [axis_unit.get(a) for a in ("x", "y", "z") if axis_unit.get(a)]
|
|
247
|
+
unit = units[0] if units and len(set(units)) == 1 else None
|
|
248
|
+
|
|
249
|
+
return psize_x, psize_y, psize_z, unit
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _normalize_chunk_shape(
|
|
253
|
+
chunk_shape: Optional[Tuple[int, int, int]],
|
|
254
|
+
size_z: int,
|
|
255
|
+
size_y: int,
|
|
256
|
+
size_x: int,
|
|
257
|
+
) -> Tuple[int, int, int]:
|
|
258
|
+
"""Normalize a chunk shape against image bounds.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
chunk_shape: Desired chunk shape as (Z, Y, X), or None.
|
|
262
|
+
size_z: Total Z size of the image.
|
|
263
|
+
size_y: Total Y size of the image.
|
|
264
|
+
size_x: Total X size of the image.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Tuple[int, int, int]: Normalized (Z, Y, X) chunk shape.
|
|
268
|
+
"""
|
|
269
|
+
if chunk_shape is None:
|
|
270
|
+
chunk_shape = (1, 512, 512)
|
|
271
|
+
if not isinstance(chunk_shape, (list, tuple)) or len(chunk_shape) != 3:
|
|
272
|
+
raise ValueError("chunk_shape must be a sequence of three integers (z,y,x)")
|
|
273
|
+
try:
|
|
274
|
+
cz_raw, cy_raw, cx_raw = (int(v) for v in chunk_shape)
|
|
275
|
+
except Exception as exc:
|
|
276
|
+
raise ValueError(
|
|
277
|
+
"chunk_shape must be a sequence of three integers (z,y,x)"
|
|
278
|
+
) from exc
|
|
279
|
+
if cz_raw <= 0 or cy_raw <= 0 or cx_raw <= 0:
|
|
280
|
+
raise ValueError("chunk_shape values must be positive integers")
|
|
281
|
+
cz = max(1, min(cz_raw, int(size_z)))
|
|
282
|
+
cy = max(1, min(cy_raw, int(size_y)))
|
|
283
|
+
cx = max(1, min(cx_raw, int(size_x)))
|
|
284
|
+
return cz, cy, cx
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def _build_chunks_from_planes(
|
|
288
|
+
*,
|
|
289
|
+
planes: List[Dict[str, Any]],
|
|
290
|
+
size_t: int,
|
|
291
|
+
size_c: int,
|
|
292
|
+
size_z: int,
|
|
293
|
+
size_y: int,
|
|
294
|
+
size_x: int,
|
|
295
|
+
chunk_shape: Optional[Tuple[int, int, int]],
|
|
296
|
+
chunk_order: str = "ZYX",
|
|
297
|
+
) -> List[Dict[str, Any]]:
|
|
298
|
+
"""Build chunked pixels from a list of flattened planes.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
planes: List of plane dicts with keys z, t, c, and pixels.
|
|
302
|
+
size_t: Total T size of the image.
|
|
303
|
+
size_c: Total C size of the image.
|
|
304
|
+
size_z: Total Z size of the image.
|
|
305
|
+
size_y: Total Y size of the image.
|
|
306
|
+
size_x: Total X size of the image.
|
|
307
|
+
chunk_shape: Desired chunk shape as (Z, Y, X).
|
|
308
|
+
chunk_order: Flattening order for chunk pixels (default "ZYX").
|
|
309
|
+
|
|
310
|
+
Returns:
|
|
311
|
+
List[Dict[str, Any]]: Chunk list with pixels stored as flat lists.
|
|
312
|
+
|
|
313
|
+
Raises:
|
|
314
|
+
ValueError: If an unsupported chunk_order is requested.
|
|
315
|
+
"""
|
|
316
|
+
if str(chunk_order).upper() != "ZYX":
|
|
317
|
+
raise ValueError("Only chunk_order='ZYX' is supported for now.")
|
|
318
|
+
|
|
319
|
+
cz, cy, cx = _normalize_chunk_shape(chunk_shape, size_z, size_y, size_x)
|
|
320
|
+
|
|
321
|
+
plane_map: Dict[Tuple[int, int, int], np.ndarray] = {}
|
|
322
|
+
for p in planes:
|
|
323
|
+
z = int(p["z"])
|
|
324
|
+
t = int(p["t"])
|
|
325
|
+
c = int(p["c"])
|
|
326
|
+
pix = p["pixels"]
|
|
327
|
+
arr2d = np.asarray(pix).reshape(size_y, size_x)
|
|
328
|
+
plane_map[(t, c, z)] = arr2d
|
|
329
|
+
|
|
330
|
+
dtype = next(iter(plane_map.values())).dtype if plane_map else np.uint16
|
|
331
|
+
|
|
332
|
+
chunks: List[Dict[str, Any]] = []
|
|
333
|
+
for t in range(size_t):
|
|
334
|
+
for c in range(size_c):
|
|
335
|
+
for z0 in range(0, size_z, cz):
|
|
336
|
+
sz = min(cz, size_z - z0)
|
|
337
|
+
for y0 in range(0, size_y, cy):
|
|
338
|
+
sy = min(cy, size_y - y0)
|
|
339
|
+
for x0 in range(0, size_x, cx):
|
|
340
|
+
sx = min(cx, size_x - x0)
|
|
341
|
+
slab = np.zeros((sz, sy, sx), dtype=dtype)
|
|
342
|
+
for zi in range(sz):
|
|
343
|
+
plane = plane_map.get((t, c, z0 + zi))
|
|
344
|
+
if plane is None:
|
|
345
|
+
continue
|
|
346
|
+
slab[zi] = plane[y0 : y0 + sy, x0 : x0 + sx]
|
|
347
|
+
chunks.append(
|
|
348
|
+
{
|
|
349
|
+
"t": t,
|
|
350
|
+
"c": c,
|
|
351
|
+
"z": z0,
|
|
352
|
+
"y": y0,
|
|
353
|
+
"x": x0,
|
|
354
|
+
"shape_z": sz,
|
|
355
|
+
"shape_y": sy,
|
|
356
|
+
"shape_x": sx,
|
|
357
|
+
"pixels": slab.reshape(-1).tolist(),
|
|
358
|
+
}
|
|
359
|
+
)
|
|
360
|
+
return chunks
|
|
361
|
+
|
|
362
|
+
|
|
23
363
|
def to_ome_arrow(
|
|
24
364
|
type_: str = OME_ARROW_TAG_TYPE,
|
|
25
365
|
version: str = OME_ARROW_TAG_VERSION,
|
|
26
366
|
image_id: str = "unnamed",
|
|
27
367
|
name: str = "unknown",
|
|
368
|
+
image_type: str | None = "image",
|
|
28
369
|
acquisition_datetime: Optional[datetime] = None,
|
|
29
370
|
dimension_order: str = "XYZCT",
|
|
30
371
|
dtype: str = "uint16",
|
|
@@ -39,6 +380,10 @@ def to_ome_arrow(
|
|
|
39
380
|
physical_size_unit: str = "µm",
|
|
40
381
|
channels: Optional[List[Dict[str, Any]]] = None,
|
|
41
382
|
planes: Optional[List[Dict[str, Any]]] = None,
|
|
383
|
+
chunks: Optional[List[Dict[str, Any]]] = None,
|
|
384
|
+
chunk_shape: Optional[Tuple[int, int, int]] = (1, 512, 512), # (Z, Y, X)
|
|
385
|
+
chunk_order: str = "ZYX",
|
|
386
|
+
build_chunks: bool = True,
|
|
42
387
|
masks: Any = None,
|
|
43
388
|
) -> pa.StructScalar:
|
|
44
389
|
"""
|
|
@@ -53,6 +398,9 @@ def to_ome_arrow(
|
|
|
53
398
|
version: Specification version string.
|
|
54
399
|
image_id: Unique image identifier.
|
|
55
400
|
name: Human-friendly name.
|
|
401
|
+
image_type: Open-ended image kind (e.g., "image", "label"). Note that
|
|
402
|
+
from_* helpers pass image_type=None by default to preserve
|
|
403
|
+
"unspecified" vs explicitly set ("image").
|
|
56
404
|
acquisition_datetime: Datetime of acquisition (defaults to now).
|
|
57
405
|
dimension_order: Dimension order ("XYZCT" or "XYCT").
|
|
58
406
|
dtype: Pixel data type string (e.g., "uint16").
|
|
@@ -61,6 +409,12 @@ def to_ome_arrow(
|
|
|
61
409
|
physical_size_unit: Unit string, default "µm".
|
|
62
410
|
channels: List of channel dicts. Autogenerates one if None.
|
|
63
411
|
planes: List of plane dicts. Empty if None.
|
|
412
|
+
chunks: Optional list of chunk dicts. If None and build_chunks is True,
|
|
413
|
+
chunks are derived from planes using chunk_shape.
|
|
414
|
+
chunk_shape: Chunk shape as (Z, Y, X). Defaults to (1, 512, 512).
|
|
415
|
+
chunk_order: Flattening order for chunk pixels (default "ZYX").
|
|
416
|
+
build_chunks: If True, build chunked pixels from planes when chunks
|
|
417
|
+
is None.
|
|
64
418
|
masks: Optional placeholder for future annotations.
|
|
65
419
|
|
|
66
420
|
Returns:
|
|
@@ -76,6 +430,7 @@ def to_ome_arrow(
|
|
|
76
430
|
version = str(version)
|
|
77
431
|
image_id = str(image_id)
|
|
78
432
|
name = str(name)
|
|
433
|
+
image_type = None if image_type is None else str(image_type)
|
|
79
434
|
dimension_order = str(dimension_order)
|
|
80
435
|
dtype = str(dtype)
|
|
81
436
|
physical_size_unit = str(physical_size_unit)
|
|
@@ -105,11 +460,62 @@ def to_ome_arrow(
|
|
|
105
460
|
if planes is None:
|
|
106
461
|
planes = [{"z": 0, "t": 0, "c": 0, "pixels": [0] * (size_x * size_y)}]
|
|
107
462
|
|
|
463
|
+
if chunks is None and build_chunks:
|
|
464
|
+
chunks = _build_chunks_from_planes(
|
|
465
|
+
planes=planes,
|
|
466
|
+
size_t=size_t,
|
|
467
|
+
size_c=size_c,
|
|
468
|
+
size_z=size_z,
|
|
469
|
+
size_y=size_y,
|
|
470
|
+
size_x=size_x,
|
|
471
|
+
chunk_shape=chunk_shape,
|
|
472
|
+
chunk_order=chunk_order,
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
chunk_grid = None
|
|
476
|
+
if chunks is not None:
|
|
477
|
+
chunk_order = str(chunk_order).upper()
|
|
478
|
+
if chunk_order != "ZYX":
|
|
479
|
+
raise ValueError("Only chunk_order='ZYX' is supported for now.")
|
|
480
|
+
if len(chunks) == 0:
|
|
481
|
+
raise ValueError("chunks must not be an empty list")
|
|
482
|
+
first = chunks[0]
|
|
483
|
+
try:
|
|
484
|
+
derived_shape = (
|
|
485
|
+
int(first["shape_z"]),
|
|
486
|
+
int(first["shape_y"]),
|
|
487
|
+
int(first["shape_x"]),
|
|
488
|
+
)
|
|
489
|
+
except Exception as exc:
|
|
490
|
+
raise ValueError(
|
|
491
|
+
"chunks entries must include shape_z/shape_y/shape_x"
|
|
492
|
+
) from exc
|
|
493
|
+
if derived_shape[0] <= 0 or derived_shape[1] <= 0 or derived_shape[2] <= 0:
|
|
494
|
+
raise ValueError("chunk shapes must be positive integers")
|
|
495
|
+
if chunk_shape is not None:
|
|
496
|
+
norm_shape = _normalize_chunk_shape(chunk_shape, size_z, size_y, size_x)
|
|
497
|
+
if norm_shape != derived_shape:
|
|
498
|
+
raise ValueError(
|
|
499
|
+
"chunk_shape does not match provided chunks "
|
|
500
|
+
f"(chunk_shape={norm_shape}, chunks_shape={derived_shape})"
|
|
501
|
+
)
|
|
502
|
+
cz, cy, cx = _normalize_chunk_shape(derived_shape, size_z, size_y, size_x)
|
|
503
|
+
chunk_grid = {
|
|
504
|
+
"order": "TCZYX",
|
|
505
|
+
"chunk_t": 1,
|
|
506
|
+
"chunk_c": 1,
|
|
507
|
+
"chunk_z": cz,
|
|
508
|
+
"chunk_y": cy,
|
|
509
|
+
"chunk_x": cx,
|
|
510
|
+
"chunk_order": str(chunk_order),
|
|
511
|
+
}
|
|
512
|
+
|
|
108
513
|
record = {
|
|
109
514
|
"type": type_,
|
|
110
515
|
"version": version,
|
|
111
516
|
"id": image_id,
|
|
112
517
|
"name": name,
|
|
518
|
+
"image_type": image_type,
|
|
113
519
|
"acquisition_datetime": acquisition_datetime or datetime.now(timezone.utc),
|
|
114
520
|
"pixels_meta": {
|
|
115
521
|
"dimension_order": dimension_order,
|
|
@@ -127,6 +533,8 @@ def to_ome_arrow(
|
|
|
127
533
|
"physical_size_z_unit": physical_size_unit,
|
|
128
534
|
"channels": channels,
|
|
129
535
|
},
|
|
536
|
+
"chunk_grid": chunk_grid,
|
|
537
|
+
"chunks": chunks,
|
|
130
538
|
"planes": planes,
|
|
131
539
|
"masks": masks,
|
|
132
540
|
}
|
|
@@ -140,9 +548,13 @@ def from_numpy(
|
|
|
140
548
|
dim_order: str = "TCZYX",
|
|
141
549
|
image_id: Optional[str] = None,
|
|
142
550
|
name: Optional[str] = None,
|
|
551
|
+
image_type: Optional[str] = None,
|
|
143
552
|
channel_names: Optional[Sequence[str]] = None,
|
|
144
553
|
acquisition_datetime: Optional[datetime] = None,
|
|
145
554
|
clamp_to_uint16: bool = True,
|
|
555
|
+
chunk_shape: Optional[Tuple[int, int, int]] = (1, 512, 512),
|
|
556
|
+
chunk_order: str = "ZYX",
|
|
557
|
+
build_chunks: bool = True,
|
|
146
558
|
# meta
|
|
147
559
|
physical_size_x: float = 1.0,
|
|
148
560
|
physical_size_y: float = 1.0,
|
|
@@ -150,42 +562,39 @@ def from_numpy(
|
|
|
150
562
|
physical_size_unit: str = "µm",
|
|
151
563
|
dtype_meta: Optional[str] = None, # if None, inferred from output dtype
|
|
152
564
|
) -> pa.StructScalar:
|
|
153
|
-
"""
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
- If Z is not in `dim_order`, `size_z` will be 1 and the meta
|
|
187
|
-
dimension_order becomes "XYCT"; otherwise "XYZCT".
|
|
188
|
-
- If T/C are absent in `dim_order`, they default to size 1.
|
|
565
|
+
"""Build an OME-Arrow StructScalar from a NumPy array.
|
|
566
|
+
|
|
567
|
+
Args:
|
|
568
|
+
arr: Image data with axes described by `dim_order`.
|
|
569
|
+
dim_order: Axis labels for `arr`. Must include "Y" and "X".
|
|
570
|
+
Supported examples: "YX", "ZYX", "CYX", "CZYX", "TYX", "TCYX", "TCZYX".
|
|
571
|
+
image_id: Optional stable image identifier.
|
|
572
|
+
name: Optional human label.
|
|
573
|
+
image_type: Open-ended image kind (e.g., "image", "label").
|
|
574
|
+
channel_names: Names for channels; defaults to C0..C{n-1}.
|
|
575
|
+
acquisition_datetime: Defaults to now (UTC) if None.
|
|
576
|
+
clamp_to_uint16: If True, clamp/cast planes to uint16 before serialization.
|
|
577
|
+
chunk_shape: Chunk shape as (Z, Y, X). Defaults to (1, 512, 512).
|
|
578
|
+
chunk_order: Flattening order for chunk pixels (default "ZYX").
|
|
579
|
+
build_chunks: If True, build chunked pixels from planes.
|
|
580
|
+
physical_size_x: Spatial pixel size (µm) for X.
|
|
581
|
+
physical_size_y: Spatial pixel size (µm) for Y.
|
|
582
|
+
physical_size_z: Spatial pixel size (µm) for Z when present.
|
|
583
|
+
physical_size_unit: Unit string for spatial axes (default "µm").
|
|
584
|
+
dtype_meta: Pixel dtype string to place in metadata; if None, inferred
|
|
585
|
+
from the (possibly cast) array's dtype.
|
|
586
|
+
|
|
587
|
+
Returns:
|
|
588
|
+
pa.StructScalar: Typed OME-Arrow record (schema = OME_ARROW_STRUCT).
|
|
589
|
+
|
|
590
|
+
Raises:
|
|
591
|
+
TypeError: If `arr` is not a NumPy ndarray.
|
|
592
|
+
ValueError: If `dim_order` is invalid or dimensions are non-positive.
|
|
593
|
+
|
|
594
|
+
Notes:
|
|
595
|
+
- If Z is not in `dim_order`, `size_z` will be 1 and the meta
|
|
596
|
+
dimension_order becomes "XYCT"; otherwise "XYZCT".
|
|
597
|
+
- If T/C are absent in `dim_order`, they default to size 1.
|
|
189
598
|
"""
|
|
190
599
|
|
|
191
600
|
if not isinstance(arr, np.ndarray):
|
|
@@ -273,6 +682,7 @@ def from_numpy(
|
|
|
273
682
|
return to_ome_arrow(
|
|
274
683
|
image_id=str(image_id or "unnamed"),
|
|
275
684
|
name=str(name or "unknown"),
|
|
685
|
+
image_type=image_type,
|
|
276
686
|
acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc),
|
|
277
687
|
dimension_order=meta_dim_order,
|
|
278
688
|
dtype=dtype_str,
|
|
@@ -287,6 +697,9 @@ def from_numpy(
|
|
|
287
697
|
physical_size_unit=str(physical_size_unit),
|
|
288
698
|
channels=channels,
|
|
289
699
|
planes=planes,
|
|
700
|
+
chunk_shape=chunk_shape,
|
|
701
|
+
chunk_order=chunk_order,
|
|
702
|
+
build_chunks=build_chunks,
|
|
290
703
|
masks=None,
|
|
291
704
|
)
|
|
292
705
|
|
|
@@ -295,6 +708,7 @@ def from_tiff(
|
|
|
295
708
|
tiff_path: str | Path,
|
|
296
709
|
image_id: Optional[str] = None,
|
|
297
710
|
name: Optional[str] = None,
|
|
711
|
+
image_type: Optional[str] = None,
|
|
298
712
|
channel_names: Optional[Sequence[str]] = None,
|
|
299
713
|
acquisition_datetime: Optional[datetime] = None,
|
|
300
714
|
clamp_to_uint16: bool = True,
|
|
@@ -309,6 +723,7 @@ def from_tiff(
|
|
|
309
723
|
tiff_path: Path to a TIFF readable by bioio.
|
|
310
724
|
image_id: Optional stable image identifier (defaults to stem).
|
|
311
725
|
name: Optional human label (defaults to file name).
|
|
726
|
+
image_type: Optional image kind (e.g., "image", "label").
|
|
312
727
|
channel_names: Optional channel names; defaults to C0..C{n-1}.
|
|
313
728
|
acquisition_datetime: Optional acquisition time (UTC now if None).
|
|
314
729
|
clamp_to_uint16: If True, clamp/cast planes to uint16.
|
|
@@ -338,13 +753,8 @@ def from_tiff(
|
|
|
338
753
|
if size_x <= 0 or size_y <= 0:
|
|
339
754
|
raise ValueError("Image must have positive Y and X dims.")
|
|
340
755
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
psize_x = float(getattr(pps, "X", None) or 1.0)
|
|
344
|
-
psize_y = float(getattr(pps, "Y", None) or 1.0)
|
|
345
|
-
psize_z = float(getattr(pps, "Z", None) or 1.0)
|
|
346
|
-
except Exception:
|
|
347
|
-
psize_x = psize_y = psize_z = 1.0
|
|
756
|
+
psize_x, psize_y, psize_z, unit, _pps_valid = _read_physical_pixel_sizes(img)
|
|
757
|
+
psize_unit = unit or "µm"
|
|
348
758
|
|
|
349
759
|
# --- NEW: coerce top-level strings --------------------------------
|
|
350
760
|
img_id = str(image_id or p.stem)
|
|
@@ -383,6 +793,7 @@ def from_tiff(
|
|
|
383
793
|
return to_ome_arrow(
|
|
384
794
|
image_id=img_id,
|
|
385
795
|
name=display_name,
|
|
796
|
+
image_type=image_type,
|
|
386
797
|
acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc),
|
|
387
798
|
dimension_order=dim_order,
|
|
388
799
|
dtype="uint16",
|
|
@@ -394,7 +805,7 @@ def from_tiff(
|
|
|
394
805
|
physical_size_x=psize_x,
|
|
395
806
|
physical_size_y=psize_y,
|
|
396
807
|
physical_size_z=psize_z,
|
|
397
|
-
physical_size_unit=
|
|
808
|
+
physical_size_unit=psize_unit,
|
|
398
809
|
channels=channels,
|
|
399
810
|
planes=planes,
|
|
400
811
|
masks=None,
|
|
@@ -409,7 +820,23 @@ def from_stack_pattern_path(
|
|
|
409
820
|
channel_names: Optional[List[str]] = None,
|
|
410
821
|
image_id: Optional[str] = None,
|
|
411
822
|
name: Optional[str] = None,
|
|
823
|
+
image_type: Optional[str] = None,
|
|
412
824
|
) -> pa.StructScalar:
|
|
825
|
+
"""Build an OME-Arrow record from a filename pattern describing a stack.
|
|
826
|
+
|
|
827
|
+
Args:
|
|
828
|
+
pattern_path: Path or pattern string describing the stack layout.
|
|
829
|
+
default_dim_for_unspecified: Dimension to use when tokens lack a dim.
|
|
830
|
+
map_series_to: Dimension to map series tokens to (e.g., "T"), or None.
|
|
831
|
+
clamp_to_uint16: Whether to clamp pixel values to uint16.
|
|
832
|
+
channel_names: Optional list of channel names to apply.
|
|
833
|
+
image_id: Optional image identifier override.
|
|
834
|
+
name: Optional display name override.
|
|
835
|
+
image_type: Optional image kind (e.g., "image", "label").
|
|
836
|
+
|
|
837
|
+
Returns:
|
|
838
|
+
A validated OME-Arrow StructScalar describing the stack.
|
|
839
|
+
"""
|
|
413
840
|
path = Path(pattern_path)
|
|
414
841
|
folder = path.parent
|
|
415
842
|
line = path.name.strip()
|
|
@@ -675,6 +1102,7 @@ def from_stack_pattern_path(
|
|
|
675
1102
|
return to_ome_arrow(
|
|
676
1103
|
image_id=str(img_id),
|
|
677
1104
|
name=str(display_name),
|
|
1105
|
+
image_type=image_type,
|
|
678
1106
|
acquisition_datetime=None,
|
|
679
1107
|
dimension_order=dim_order,
|
|
680
1108
|
dtype="uint16",
|
|
@@ -697,6 +1125,7 @@ def from_ome_zarr(
|
|
|
697
1125
|
zarr_path: str | Path,
|
|
698
1126
|
image_id: Optional[str] = None,
|
|
699
1127
|
name: Optional[str] = None,
|
|
1128
|
+
image_type: Optional[str] = None,
|
|
700
1129
|
channel_names: Optional[Sequence[str]] = None,
|
|
701
1130
|
acquisition_datetime: Optional[datetime] = None,
|
|
702
1131
|
clamp_to_uint16: bool = True,
|
|
@@ -715,6 +1144,8 @@ def from_ome_zarr(
|
|
|
715
1144
|
Optional stable image identifier (defaults to directory stem).
|
|
716
1145
|
name:
|
|
717
1146
|
Optional display name (defaults to directory name).
|
|
1147
|
+
image_type:
|
|
1148
|
+
Optional image kind (e.g., "image", "label").
|
|
718
1149
|
channel_names:
|
|
719
1150
|
Optional list of channel names. Defaults to C0, C1, ...
|
|
720
1151
|
acquisition_datetime:
|
|
@@ -741,13 +1172,15 @@ def from_ome_zarr(
|
|
|
741
1172
|
if size_x <= 0 or size_y <= 0:
|
|
742
1173
|
raise ValueError("Image must have positive Y and X dimensions.")
|
|
743
1174
|
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
1175
|
+
psize_x, psize_y, psize_z, unit, pps_valid = _read_physical_pixel_sizes(img)
|
|
1176
|
+
psize_unit = unit or "µm"
|
|
1177
|
+
|
|
1178
|
+
if not pps_valid:
|
|
1179
|
+
ngff_scale = _read_ngff_scale(p)
|
|
1180
|
+
if ngff_scale is not None:
|
|
1181
|
+
psize_x, psize_y, psize_z, unit = ngff_scale
|
|
1182
|
+
if unit:
|
|
1183
|
+
psize_unit = unit
|
|
751
1184
|
|
|
752
1185
|
img_id = str(image_id or p.stem)
|
|
753
1186
|
display_name = str(name or p.name)
|
|
@@ -794,6 +1227,7 @@ def from_ome_zarr(
|
|
|
794
1227
|
return to_ome_arrow(
|
|
795
1228
|
image_id=img_id,
|
|
796
1229
|
name=display_name,
|
|
1230
|
+
image_type=image_type,
|
|
797
1231
|
acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc),
|
|
798
1232
|
dimension_order=dim_order,
|
|
799
1233
|
dtype="uint16",
|
|
@@ -805,7 +1239,7 @@ def from_ome_zarr(
|
|
|
805
1239
|
physical_size_x=psize_x,
|
|
806
1240
|
physical_size_y=psize_y,
|
|
807
1241
|
physical_size_z=psize_z,
|
|
808
|
-
physical_size_unit=
|
|
1242
|
+
physical_size_unit=psize_unit,
|
|
809
1243
|
channels=channels,
|
|
810
1244
|
planes=planes,
|
|
811
1245
|
masks=None,
|
|
@@ -819,88 +1253,72 @@ def from_ome_parquet(
|
|
|
819
1253
|
row_index: int = 0,
|
|
820
1254
|
strict_schema: bool = False,
|
|
821
1255
|
) -> pa.StructScalar:
|
|
822
|
-
"""
|
|
823
|
-
|
|
1256
|
+
"""Read an OME-Arrow record from a Parquet file.
|
|
1257
|
+
|
|
1258
|
+
Args:
|
|
1259
|
+
parquet_path: Path to the Parquet file.
|
|
1260
|
+
column_name: Column to read; auto-detected when None or invalid.
|
|
1261
|
+
row_index: Row index to extract.
|
|
1262
|
+
strict_schema: Require the exact OME-Arrow schema if True.
|
|
1263
|
+
|
|
1264
|
+
Returns:
|
|
1265
|
+
A typed OME-Arrow StructScalar.
|
|
1266
|
+
|
|
1267
|
+
Raises:
|
|
1268
|
+
FileNotFoundError: If the Parquet path does not exist.
|
|
1269
|
+
ValueError: If the row index is out of range or no suitable column exists.
|
|
824
1270
|
"""
|
|
825
1271
|
p = Path(parquet_path)
|
|
826
1272
|
if not p.exists():
|
|
827
1273
|
raise FileNotFoundError(f"No such file: {p}")
|
|
828
1274
|
|
|
829
1275
|
table = pq.read_table(p)
|
|
1276
|
+
return _ome_arrow_from_table(
|
|
1277
|
+
table,
|
|
1278
|
+
column_name=column_name,
|
|
1279
|
+
row_index=row_index,
|
|
1280
|
+
strict_schema=strict_schema,
|
|
1281
|
+
)
|
|
830
1282
|
|
|
831
|
-
if table.num_rows == 0:
|
|
832
|
-
raise ValueError("Parquet file contains 0 rows; expected at least 1.")
|
|
833
|
-
if not (0 <= row_index < table.num_rows):
|
|
834
|
-
raise ValueError(f"row_index {row_index} out of range [0, {table.num_rows}).")
|
|
835
|
-
|
|
836
|
-
# 1) Locate the OME-Arrow column
|
|
837
|
-
def _struct_matches_ome_fields(t: pa.StructType) -> bool:
|
|
838
|
-
ome_fields = {f.name for f in OME_ARROW_STRUCT}
|
|
839
|
-
col_fields = {f.name for f in t}
|
|
840
|
-
return ome_fields == col_fields
|
|
841
|
-
|
|
842
|
-
requested_name = column_name
|
|
843
|
-
candidate_col = None
|
|
844
|
-
autodetected_name = None
|
|
845
1283
|
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
f"Expect:{OME_ARROW_STRUCT}"
|
|
855
|
-
)
|
|
856
|
-
if not strict_schema and not _struct_matches_ome_fields(arr.type):
|
|
857
|
-
raise ValueError(
|
|
858
|
-
f"Column '{column_name}' does not have the expected OME-Arrow fields."
|
|
859
|
-
)
|
|
860
|
-
candidate_col = arr
|
|
861
|
-
else:
|
|
862
|
-
# Auto-detect a struct column that matches OME-Arrow fields
|
|
863
|
-
for name in table.column_names:
|
|
864
|
-
arr = table[name]
|
|
865
|
-
if pa.types.is_struct(arr.type):
|
|
866
|
-
if strict_schema and arr.type == OME_ARROW_STRUCT:
|
|
867
|
-
candidate_col = arr
|
|
868
|
-
autodetected_name = name
|
|
869
|
-
column_name = name
|
|
870
|
-
break
|
|
871
|
-
if not strict_schema and _struct_matches_ome_fields(arr.type):
|
|
872
|
-
candidate_col = arr
|
|
873
|
-
autodetected_name = name
|
|
874
|
-
column_name = name
|
|
875
|
-
break
|
|
876
|
-
if candidate_col is None:
|
|
877
|
-
if column_name is None:
|
|
878
|
-
hint = "no struct column with OME-Arrow fields was found."
|
|
879
|
-
else:
|
|
880
|
-
hint = f"column '{column_name}' not found and auto-detection failed."
|
|
881
|
-
raise ValueError(f"Could not locate an OME-Arrow struct column: {hint}")
|
|
1284
|
+
def from_ome_vortex(
|
|
1285
|
+
vortex_path: str | Path,
|
|
1286
|
+
*,
|
|
1287
|
+
column_name: Optional[str] = "ome_arrow",
|
|
1288
|
+
row_index: int = 0,
|
|
1289
|
+
strict_schema: bool = False,
|
|
1290
|
+
) -> pa.StructScalar:
|
|
1291
|
+
"""Read an OME-Arrow record from a Vortex file.
|
|
882
1292
|
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
UserWarning,
|
|
889
|
-
stacklevel=2,
|
|
890
|
-
)
|
|
1293
|
+
Args:
|
|
1294
|
+
vortex_path: Path to the Vortex file.
|
|
1295
|
+
column_name: Column to read; auto-detected when None or invalid.
|
|
1296
|
+
row_index: Row index to extract.
|
|
1297
|
+
strict_schema: Require the exact OME-Arrow schema if True.
|
|
891
1298
|
|
|
892
|
-
|
|
893
|
-
|
|
1299
|
+
Returns:
|
|
1300
|
+
A typed OME-Arrow StructScalar.
|
|
894
1301
|
|
|
895
|
-
|
|
896
|
-
|
|
1302
|
+
Raises:
|
|
1303
|
+
FileNotFoundError: If the Vortex path does not exist.
|
|
1304
|
+
ImportError: If the optional `vortex-data` dependency is missing.
|
|
1305
|
+
ValueError: If the row index is out of range or no suitable column exists.
|
|
1306
|
+
"""
|
|
1307
|
+
p = Path(vortex_path)
|
|
1308
|
+
if not p.exists():
|
|
1309
|
+
raise FileNotFoundError(f"No such file: {p}")
|
|
897
1310
|
|
|
898
|
-
# Optional: soft validation via file-level metadata (if present)
|
|
899
1311
|
try:
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
1312
|
+
import vortex
|
|
1313
|
+
except ImportError as exc:
|
|
1314
|
+
raise ImportError(
|
|
1315
|
+
"Vortex support requires the optional 'vortex-data' dependency."
|
|
1316
|
+
) from exc
|
|
1317
|
+
|
|
1318
|
+
table = vortex.open(str(p)).to_arrow().read_all()
|
|
1319
|
+
return _ome_arrow_from_table(
|
|
1320
|
+
table,
|
|
1321
|
+
column_name=column_name,
|
|
1322
|
+
row_index=row_index,
|
|
1323
|
+
strict_schema=strict_schema,
|
|
1324
|
+
)
|