ome-arrow 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ome_arrow/ingest.py CHANGED
@@ -3,6 +3,7 @@ Converting to and from OME-Arrow formats.
3
3
  """
4
4
 
5
5
  import itertools
6
+ import json
6
7
  import re
7
8
  import warnings
8
9
  from datetime import datetime, timezone
@@ -20,11 +21,351 @@ from bioio_ome_zarr import Reader as OMEZarrReader
20
21
  from ome_arrow.meta import OME_ARROW_STRUCT, OME_ARROW_TAG_TYPE, OME_ARROW_TAG_VERSION
21
22
 
22
23
 
24
+ def _ome_arrow_from_table(
25
+ table: pa.Table,
26
+ *,
27
+ column_name: Optional[str],
28
+ row_index: int,
29
+ strict_schema: bool,
30
+ ) -> pa.StructScalar:
31
+ """Extract a single OME-Arrow record from an Arrow table.
32
+
33
+ Args:
34
+ table: Source Arrow table.
35
+ column_name: Column to read; auto-detected when None or invalid.
36
+ row_index: Row index to extract.
37
+ strict_schema: Require the exact OME-Arrow schema if True.
38
+
39
+ Returns:
40
+ A typed OME-Arrow StructScalar.
41
+
42
+ Raises:
43
+ ValueError: If the row index is out of range or no suitable column exists.
44
+ """
45
+ if table.num_rows == 0:
46
+ raise ValueError("Table contains 0 rows; expected at least 1.")
47
+ if not (0 <= row_index < table.num_rows):
48
+ raise ValueError(f"row_index {row_index} out of range [0, {table.num_rows}).")
49
+
50
+ # 1) Locate the OME-Arrow column
51
+ def _struct_matches_ome_fields(t: pa.StructType) -> bool:
52
+ ome_fields = {f.name for f in OME_ARROW_STRUCT}
53
+ required_fields = ome_fields - {"image_type", "chunk_grid", "chunks"}
54
+ col_fields = {f.name for f in t}
55
+ return required_fields.issubset(col_fields)
56
+
57
+ requested_name = column_name
58
+ candidate_col = None
59
+ autodetected_name = None
60
+
61
+ if column_name is not None and column_name in table.column_names:
62
+ arr = table[column_name]
63
+ if not pa.types.is_struct(arr.type):
64
+ raise ValueError(f"Column '{column_name}' is not a Struct; got {arr.type}.")
65
+ if strict_schema and arr.type != OME_ARROW_STRUCT:
66
+ raise ValueError(
67
+ f"Column '{column_name}' schema != OME_ARROW_STRUCT.\n"
68
+ f"Got: {arr.type}\n"
69
+ f"Expect:{OME_ARROW_STRUCT}"
70
+ )
71
+ if not strict_schema and not _struct_matches_ome_fields(arr.type):
72
+ raise ValueError(
73
+ f"Column '{column_name}' does not have the expected OME-Arrow fields."
74
+ )
75
+ candidate_col = arr
76
+ else:
77
+ # Auto-detect a struct column that matches OME-Arrow fields
78
+ for name in table.column_names:
79
+ arr = table[name]
80
+ if pa.types.is_struct(arr.type):
81
+ if strict_schema and arr.type == OME_ARROW_STRUCT:
82
+ candidate_col = arr
83
+ autodetected_name = name
84
+ column_name = name
85
+ break
86
+ if not strict_schema and _struct_matches_ome_fields(arr.type):
87
+ candidate_col = arr
88
+ autodetected_name = name
89
+ column_name = name
90
+ break
91
+ if candidate_col is None:
92
+ if column_name is None:
93
+ hint = "no struct column with OME-Arrow fields was found."
94
+ else:
95
+ hint = f"column '{column_name}' not found and auto-detection failed."
96
+ raise ValueError(f"Could not locate an OME-Arrow struct column: {hint}")
97
+
98
+ # Emit warning if auto-detection was used
99
+ if autodetected_name is not None and autodetected_name != requested_name:
100
+ warnings.warn(
101
+ f"Requested column '{requested_name}' was not usable or not found. "
102
+ f"Auto-detected OME-Arrow column '{autodetected_name}'.",
103
+ UserWarning,
104
+ stacklevel=2,
105
+ )
106
+
107
+ # 2) Extract the row as a Python dict
108
+ record_dict: Dict[str, Any] = candidate_col.slice(row_index, 1).to_pylist()[0]
109
+ # Back-compat: older files won't include image_type; default to None.
110
+ if "image_type" not in record_dict:
111
+ record_dict["image_type"] = None
112
+ # Drop unexpected fields before casting to the canonical schema.
113
+ record_dict = {f.name: record_dict.get(f.name) for f in OME_ARROW_STRUCT}
114
+
115
+ # 3) Reconstruct a typed StructScalar using the canonical schema
116
+ scalar = pa.scalar(record_dict, type=OME_ARROW_STRUCT)
117
+
118
+ # Optional: soft validation via file-level metadata (if present)
119
+ try:
120
+ meta = table.schema.metadata or {}
121
+ meta.get(b"ome.arrow.type", b"").decode() == str(OME_ARROW_TAG_TYPE)
122
+ meta.get(b"ome.arrow.version", b"").decode() == str(OME_ARROW_TAG_VERSION)
123
+ except Exception:
124
+ pass
125
+
126
+ return scalar
127
+
128
+
129
+ def _normalize_unit(unit: str | None) -> str | None:
130
+ if not unit:
131
+ return None
132
+ u = unit.strip().lower()
133
+ if u in {"micrometer", "micrometre", "micron", "microns", "um", "µm"}:
134
+ return "µm"
135
+ if u in {"nanometer", "nanometre", "nm"}:
136
+ return "nm"
137
+ return unit
138
+
139
+
140
+ def _read_physical_pixel_sizes(
141
+ img: BioImage,
142
+ ) -> tuple[float, float, float, str | None, bool]:
143
+ pps = getattr(img, "physical_pixel_sizes", None)
144
+ if pps is None:
145
+ return 1.0, 1.0, 1.0, None, False
146
+
147
+ vx = getattr(pps, "X", None) or getattr(pps, "x", None)
148
+ vy = getattr(pps, "Y", None) or getattr(pps, "y", None)
149
+ vz = getattr(pps, "Z", None) or getattr(pps, "z", None)
150
+
151
+ if vx is None and vy is None and vz is None:
152
+ return 1.0, 1.0, 1.0, None, False
153
+
154
+ try:
155
+ psize_x = float(vx or 1.0)
156
+ psize_y = float(vy or 1.0)
157
+ psize_z = float(vz or 1.0)
158
+ except Exception:
159
+ return 1.0, 1.0, 1.0, None, False
160
+
161
+ unit = getattr(pps, "unit", None) or getattr(pps, "units", None)
162
+ unit = _normalize_unit(str(unit)) if unit is not None else None
163
+
164
+ return psize_x, psize_y, psize_z, unit, True
165
+
166
+
167
+ def _load_zarr_attrs(zarr_path: Path) -> dict:
168
+ zarr_json = zarr_path / "zarr.json"
169
+ if zarr_json.exists():
170
+ try:
171
+ data = json.loads(zarr_json.read_text())
172
+ return data.get("attributes") or data.get("attrs") or {}
173
+ except Exception:
174
+ return {}
175
+ zattrs = zarr_path / ".zattrs"
176
+ if zattrs.exists():
177
+ try:
178
+ return json.loads(zattrs.read_text())
179
+ except Exception:
180
+ return {}
181
+ return {}
182
+
183
+
184
+ def _extract_multiscales(attrs: dict) -> list[dict]:
185
+ if not isinstance(attrs, dict):
186
+ return []
187
+ ome = attrs.get("ome")
188
+ if isinstance(ome, dict) and isinstance(ome.get("multiscales"), list):
189
+ return ome["multiscales"]
190
+ if isinstance(attrs.get("multiscales"), list):
191
+ return attrs["multiscales"]
192
+ return []
193
+
194
+
195
+ def _read_ngff_scale(zarr_path: Path) -> tuple[float, float, float, str | None] | None:
196
+ zarr_root = zarr_path
197
+ for parent in [zarr_path, *list(zarr_path.parents)]:
198
+ if parent.suffix.lower() in {".zarr", ".ome.zarr"}:
199
+ zarr_root = parent
200
+ break
201
+
202
+ for candidate in (zarr_path, zarr_root):
203
+ attrs = _load_zarr_attrs(candidate)
204
+ multiscales = _extract_multiscales(attrs)
205
+ if multiscales:
206
+ break
207
+ else:
208
+ return None
209
+
210
+ ms = multiscales[0]
211
+ axes = ms.get("axes") or []
212
+ datasets = ms.get("datasets") or []
213
+ if not axes or not datasets:
214
+ return None
215
+
216
+ ds = next((d for d in datasets if str(d.get("path")) == "0"), datasets[0])
217
+ cts = ds.get("coordinateTransformations") or []
218
+ scale_ct = next((ct for ct in cts if ct.get("type") == "scale"), None)
219
+ if not scale_ct:
220
+ return None
221
+
222
+ scale = scale_ct.get("scale") or []
223
+ if len(scale) != len(axes):
224
+ return None
225
+
226
+ axis_scale: dict[str, float] = {}
227
+ axis_unit: dict[str, str] = {}
228
+ for i, ax in enumerate(axes):
229
+ name = str(ax.get("name", "")).lower()
230
+ if name in {"x", "y", "z"}:
231
+ try:
232
+ axis_scale[name] = float(scale[i])
233
+ except Exception:
234
+ continue
235
+ unit = _normalize_unit(ax.get("unit"))
236
+ if unit:
237
+ axis_unit[name] = unit
238
+
239
+ if not axis_scale:
240
+ return None
241
+
242
+ psize_x = axis_scale.get("x", 1.0)
243
+ psize_y = axis_scale.get("y", 1.0)
244
+ psize_z = axis_scale.get("z", 1.0)
245
+
246
+ units = [axis_unit.get(a) for a in ("x", "y", "z") if axis_unit.get(a)]
247
+ unit = units[0] if units and len(set(units)) == 1 else None
248
+
249
+ return psize_x, psize_y, psize_z, unit
250
+
251
+
252
+ def _normalize_chunk_shape(
253
+ chunk_shape: Optional[Tuple[int, int, int]],
254
+ size_z: int,
255
+ size_y: int,
256
+ size_x: int,
257
+ ) -> Tuple[int, int, int]:
258
+ """Normalize a chunk shape against image bounds.
259
+
260
+ Args:
261
+ chunk_shape: Desired chunk shape as (Z, Y, X), or None.
262
+ size_z: Total Z size of the image.
263
+ size_y: Total Y size of the image.
264
+ size_x: Total X size of the image.
265
+
266
+ Returns:
267
+ Tuple[int, int, int]: Normalized (Z, Y, X) chunk shape.
268
+ """
269
+ if chunk_shape is None:
270
+ chunk_shape = (1, 512, 512)
271
+ if not isinstance(chunk_shape, (list, tuple)) or len(chunk_shape) != 3:
272
+ raise ValueError("chunk_shape must be a sequence of three integers (z,y,x)")
273
+ try:
274
+ cz_raw, cy_raw, cx_raw = (int(v) for v in chunk_shape)
275
+ except Exception as exc:
276
+ raise ValueError(
277
+ "chunk_shape must be a sequence of three integers (z,y,x)"
278
+ ) from exc
279
+ if cz_raw <= 0 or cy_raw <= 0 or cx_raw <= 0:
280
+ raise ValueError("chunk_shape values must be positive integers")
281
+ cz = max(1, min(cz_raw, int(size_z)))
282
+ cy = max(1, min(cy_raw, int(size_y)))
283
+ cx = max(1, min(cx_raw, int(size_x)))
284
+ return cz, cy, cx
285
+
286
+
287
+ def _build_chunks_from_planes(
288
+ *,
289
+ planes: List[Dict[str, Any]],
290
+ size_t: int,
291
+ size_c: int,
292
+ size_z: int,
293
+ size_y: int,
294
+ size_x: int,
295
+ chunk_shape: Optional[Tuple[int, int, int]],
296
+ chunk_order: str = "ZYX",
297
+ ) -> List[Dict[str, Any]]:
298
+ """Build chunked pixels from a list of flattened planes.
299
+
300
+ Args:
301
+ planes: List of plane dicts with keys z, t, c, and pixels.
302
+ size_t: Total T size of the image.
303
+ size_c: Total C size of the image.
304
+ size_z: Total Z size of the image.
305
+ size_y: Total Y size of the image.
306
+ size_x: Total X size of the image.
307
+ chunk_shape: Desired chunk shape as (Z, Y, X).
308
+ chunk_order: Flattening order for chunk pixels (default "ZYX").
309
+
310
+ Returns:
311
+ List[Dict[str, Any]]: Chunk list with pixels stored as flat lists.
312
+
313
+ Raises:
314
+ ValueError: If an unsupported chunk_order is requested.
315
+ """
316
+ if str(chunk_order).upper() != "ZYX":
317
+ raise ValueError("Only chunk_order='ZYX' is supported for now.")
318
+
319
+ cz, cy, cx = _normalize_chunk_shape(chunk_shape, size_z, size_y, size_x)
320
+
321
+ plane_map: Dict[Tuple[int, int, int], np.ndarray] = {}
322
+ for p in planes:
323
+ z = int(p["z"])
324
+ t = int(p["t"])
325
+ c = int(p["c"])
326
+ pix = p["pixels"]
327
+ arr2d = np.asarray(pix).reshape(size_y, size_x)
328
+ plane_map[(t, c, z)] = arr2d
329
+
330
+ dtype = next(iter(plane_map.values())).dtype if plane_map else np.uint16
331
+
332
+ chunks: List[Dict[str, Any]] = []
333
+ for t in range(size_t):
334
+ for c in range(size_c):
335
+ for z0 in range(0, size_z, cz):
336
+ sz = min(cz, size_z - z0)
337
+ for y0 in range(0, size_y, cy):
338
+ sy = min(cy, size_y - y0)
339
+ for x0 in range(0, size_x, cx):
340
+ sx = min(cx, size_x - x0)
341
+ slab = np.zeros((sz, sy, sx), dtype=dtype)
342
+ for zi in range(sz):
343
+ plane = plane_map.get((t, c, z0 + zi))
344
+ if plane is None:
345
+ continue
346
+ slab[zi] = plane[y0 : y0 + sy, x0 : x0 + sx]
347
+ chunks.append(
348
+ {
349
+ "t": t,
350
+ "c": c,
351
+ "z": z0,
352
+ "y": y0,
353
+ "x": x0,
354
+ "shape_z": sz,
355
+ "shape_y": sy,
356
+ "shape_x": sx,
357
+ "pixels": slab.reshape(-1).tolist(),
358
+ }
359
+ )
360
+ return chunks
361
+
362
+
23
363
  def to_ome_arrow(
24
364
  type_: str = OME_ARROW_TAG_TYPE,
25
365
  version: str = OME_ARROW_TAG_VERSION,
26
366
  image_id: str = "unnamed",
27
367
  name: str = "unknown",
368
+ image_type: str | None = "image",
28
369
  acquisition_datetime: Optional[datetime] = None,
29
370
  dimension_order: str = "XYZCT",
30
371
  dtype: str = "uint16",
@@ -39,6 +380,10 @@ def to_ome_arrow(
39
380
  physical_size_unit: str = "µm",
40
381
  channels: Optional[List[Dict[str, Any]]] = None,
41
382
  planes: Optional[List[Dict[str, Any]]] = None,
383
+ chunks: Optional[List[Dict[str, Any]]] = None,
384
+ chunk_shape: Optional[Tuple[int, int, int]] = (1, 512, 512), # (Z, Y, X)
385
+ chunk_order: str = "ZYX",
386
+ build_chunks: bool = True,
42
387
  masks: Any = None,
43
388
  ) -> pa.StructScalar:
44
389
  """
@@ -53,6 +398,9 @@ def to_ome_arrow(
53
398
  version: Specification version string.
54
399
  image_id: Unique image identifier.
55
400
  name: Human-friendly name.
401
+ image_type: Open-ended image kind (e.g., "image", "label"). Note that
402
+ from_* helpers pass image_type=None by default to preserve
403
+ "unspecified" vs explicitly set ("image").
56
404
  acquisition_datetime: Datetime of acquisition (defaults to now).
57
405
  dimension_order: Dimension order ("XYZCT" or "XYCT").
58
406
  dtype: Pixel data type string (e.g., "uint16").
@@ -61,6 +409,12 @@ def to_ome_arrow(
61
409
  physical_size_unit: Unit string, default "µm".
62
410
  channels: List of channel dicts. Autogenerates one if None.
63
411
  planes: List of plane dicts. Empty if None.
412
+ chunks: Optional list of chunk dicts. If None and build_chunks is True,
413
+ chunks are derived from planes using chunk_shape.
414
+ chunk_shape: Chunk shape as (Z, Y, X). Defaults to (1, 512, 512).
415
+ chunk_order: Flattening order for chunk pixels (default "ZYX").
416
+ build_chunks: If True, build chunked pixels from planes when chunks
417
+ is None.
64
418
  masks: Optional placeholder for future annotations.
65
419
 
66
420
  Returns:
@@ -76,6 +430,7 @@ def to_ome_arrow(
76
430
  version = str(version)
77
431
  image_id = str(image_id)
78
432
  name = str(name)
433
+ image_type = None if image_type is None else str(image_type)
79
434
  dimension_order = str(dimension_order)
80
435
  dtype = str(dtype)
81
436
  physical_size_unit = str(physical_size_unit)
@@ -105,11 +460,62 @@ def to_ome_arrow(
105
460
  if planes is None:
106
461
  planes = [{"z": 0, "t": 0, "c": 0, "pixels": [0] * (size_x * size_y)}]
107
462
 
463
+ if chunks is None and build_chunks:
464
+ chunks = _build_chunks_from_planes(
465
+ planes=planes,
466
+ size_t=size_t,
467
+ size_c=size_c,
468
+ size_z=size_z,
469
+ size_y=size_y,
470
+ size_x=size_x,
471
+ chunk_shape=chunk_shape,
472
+ chunk_order=chunk_order,
473
+ )
474
+
475
+ chunk_grid = None
476
+ if chunks is not None:
477
+ chunk_order = str(chunk_order).upper()
478
+ if chunk_order != "ZYX":
479
+ raise ValueError("Only chunk_order='ZYX' is supported for now.")
480
+ if len(chunks) == 0:
481
+ raise ValueError("chunks must not be an empty list")
482
+ first = chunks[0]
483
+ try:
484
+ derived_shape = (
485
+ int(first["shape_z"]),
486
+ int(first["shape_y"]),
487
+ int(first["shape_x"]),
488
+ )
489
+ except Exception as exc:
490
+ raise ValueError(
491
+ "chunks entries must include shape_z/shape_y/shape_x"
492
+ ) from exc
493
+ if derived_shape[0] <= 0 or derived_shape[1] <= 0 or derived_shape[2] <= 0:
494
+ raise ValueError("chunk shapes must be positive integers")
495
+ if chunk_shape is not None:
496
+ norm_shape = _normalize_chunk_shape(chunk_shape, size_z, size_y, size_x)
497
+ if norm_shape != derived_shape:
498
+ raise ValueError(
499
+ "chunk_shape does not match provided chunks "
500
+ f"(chunk_shape={norm_shape}, chunks_shape={derived_shape})"
501
+ )
502
+ cz, cy, cx = _normalize_chunk_shape(derived_shape, size_z, size_y, size_x)
503
+ chunk_grid = {
504
+ "order": "TCZYX",
505
+ "chunk_t": 1,
506
+ "chunk_c": 1,
507
+ "chunk_z": cz,
508
+ "chunk_y": cy,
509
+ "chunk_x": cx,
510
+ "chunk_order": str(chunk_order),
511
+ }
512
+
108
513
  record = {
109
514
  "type": type_,
110
515
  "version": version,
111
516
  "id": image_id,
112
517
  "name": name,
518
+ "image_type": image_type,
113
519
  "acquisition_datetime": acquisition_datetime or datetime.now(timezone.utc),
114
520
  "pixels_meta": {
115
521
  "dimension_order": dimension_order,
@@ -127,6 +533,8 @@ def to_ome_arrow(
127
533
  "physical_size_z_unit": physical_size_unit,
128
534
  "channels": channels,
129
535
  },
536
+ "chunk_grid": chunk_grid,
537
+ "chunks": chunks,
130
538
  "planes": planes,
131
539
  "masks": masks,
132
540
  }
@@ -140,9 +548,13 @@ def from_numpy(
140
548
  dim_order: str = "TCZYX",
141
549
  image_id: Optional[str] = None,
142
550
  name: Optional[str] = None,
551
+ image_type: Optional[str] = None,
143
552
  channel_names: Optional[Sequence[str]] = None,
144
553
  acquisition_datetime: Optional[datetime] = None,
145
554
  clamp_to_uint16: bool = True,
555
+ chunk_shape: Optional[Tuple[int, int, int]] = (1, 512, 512),
556
+ chunk_order: str = "ZYX",
557
+ build_chunks: bool = True,
146
558
  # meta
147
559
  physical_size_x: float = 1.0,
148
560
  physical_size_y: float = 1.0,
@@ -150,42 +562,39 @@ def from_numpy(
150
562
  physical_size_unit: str = "µm",
151
563
  dtype_meta: Optional[str] = None, # if None, inferred from output dtype
152
564
  ) -> pa.StructScalar:
153
- """
154
- Build an OME-Arrow StructScalar from a NumPy array.
155
-
156
- Parameters
157
- ----------
158
- arr : np.ndarray
159
- Image data with axes described by `dim_order`.
160
- dim_order : str, default "TCZYX"
161
- Axis labels for `arr`. Must include "Y" and "X".
162
- Supported examples: "YX", "ZYX", "CYX", "CZYX", "TYX", "TCYX", "TCZYX".
163
- image_id, name : Optional[str]
164
- Identifiers to embed in the record.
165
- channel_names : Optional[Sequence[str]]
166
- Names for channels; defaults to C0..C{n-1}.
167
- acquisition_datetime : Optional[datetime]
168
- Defaults to now (UTC) if None.
169
- clamp_to_uint16 : bool, default True
170
- If True, clamp/cast planes to uint16 before serialization.
171
- physical_size_x/y/z : float
172
- Spatial pixel sizes (µm), Z used if present.
173
- physical_size_unit : str
174
- Unit string for spatial axes (default "µm").
175
- dtype_meta : Optional[str]
176
- Pixel dtype string to place in metadata; if None, inferred from the
177
- (possibly cast) array's dtype.
178
-
179
- Returns
180
- -------
181
- pa.StructScalar
182
- Typed OME-Arrow record (schema = OME_ARROW_STRUCT).
183
-
184
- Notes
185
- -----
186
- - If Z is not in `dim_order`, `size_z` will be 1 and the meta
187
- dimension_order becomes "XYCT"; otherwise "XYZCT".
188
- - If T/C are absent in `dim_order`, they default to size 1.
565
+ """Build an OME-Arrow StructScalar from a NumPy array.
566
+
567
+ Args:
568
+ arr: Image data with axes described by `dim_order`.
569
+ dim_order: Axis labels for `arr`. Must include "Y" and "X".
570
+ Supported examples: "YX", "ZYX", "CYX", "CZYX", "TYX", "TCYX", "TCZYX".
571
+ image_id: Optional stable image identifier.
572
+ name: Optional human label.
573
+ image_type: Open-ended image kind (e.g., "image", "label").
574
+ channel_names: Names for channels; defaults to C0..C{n-1}.
575
+ acquisition_datetime: Defaults to now (UTC) if None.
576
+ clamp_to_uint16: If True, clamp/cast planes to uint16 before serialization.
577
+ chunk_shape: Chunk shape as (Z, Y, X). Defaults to (1, 512, 512).
578
+ chunk_order: Flattening order for chunk pixels (default "ZYX").
579
+ build_chunks: If True, build chunked pixels from planes.
580
+ physical_size_x: Spatial pixel size (µm) for X.
581
+ physical_size_y: Spatial pixel size (µm) for Y.
582
+ physical_size_z: Spatial pixel size (µm) for Z when present.
583
+ physical_size_unit: Unit string for spatial axes (default "µm").
584
+ dtype_meta: Pixel dtype string to place in metadata; if None, inferred
585
+ from the (possibly cast) array's dtype.
586
+
587
+ Returns:
588
+ pa.StructScalar: Typed OME-Arrow record (schema = OME_ARROW_STRUCT).
589
+
590
+ Raises:
591
+ TypeError: If `arr` is not a NumPy ndarray.
592
+ ValueError: If `dim_order` is invalid or dimensions are non-positive.
593
+
594
+ Notes:
595
+ - If Z is not in `dim_order`, `size_z` will be 1 and the meta
596
+ dimension_order becomes "XYCT"; otherwise "XYZCT".
597
+ - If T/C are absent in `dim_order`, they default to size 1.
189
598
  """
190
599
 
191
600
  if not isinstance(arr, np.ndarray):
@@ -273,6 +682,7 @@ def from_numpy(
273
682
  return to_ome_arrow(
274
683
  image_id=str(image_id or "unnamed"),
275
684
  name=str(name or "unknown"),
685
+ image_type=image_type,
276
686
  acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc),
277
687
  dimension_order=meta_dim_order,
278
688
  dtype=dtype_str,
@@ -287,6 +697,9 @@ def from_numpy(
287
697
  physical_size_unit=str(physical_size_unit),
288
698
  channels=channels,
289
699
  planes=planes,
700
+ chunk_shape=chunk_shape,
701
+ chunk_order=chunk_order,
702
+ build_chunks=build_chunks,
290
703
  masks=None,
291
704
  )
292
705
 
@@ -295,6 +708,7 @@ def from_tiff(
295
708
  tiff_path: str | Path,
296
709
  image_id: Optional[str] = None,
297
710
  name: Optional[str] = None,
711
+ image_type: Optional[str] = None,
298
712
  channel_names: Optional[Sequence[str]] = None,
299
713
  acquisition_datetime: Optional[datetime] = None,
300
714
  clamp_to_uint16: bool = True,
@@ -309,6 +723,7 @@ def from_tiff(
309
723
  tiff_path: Path to a TIFF readable by bioio.
310
724
  image_id: Optional stable image identifier (defaults to stem).
311
725
  name: Optional human label (defaults to file name).
726
+ image_type: Optional image kind (e.g., "image", "label").
312
727
  channel_names: Optional channel names; defaults to C0..C{n-1}.
313
728
  acquisition_datetime: Optional acquisition time (UTC now if None).
314
729
  clamp_to_uint16: If True, clamp/cast planes to uint16.
@@ -338,13 +753,8 @@ def from_tiff(
338
753
  if size_x <= 0 or size_y <= 0:
339
754
  raise ValueError("Image must have positive Y and X dims.")
340
755
 
341
- pps = getattr(img, "physical_pixel_sizes", None)
342
- try:
343
- psize_x = float(getattr(pps, "X", None) or 1.0)
344
- psize_y = float(getattr(pps, "Y", None) or 1.0)
345
- psize_z = float(getattr(pps, "Z", None) or 1.0)
346
- except Exception:
347
- psize_x = psize_y = psize_z = 1.0
756
+ psize_x, psize_y, psize_z, unit, _pps_valid = _read_physical_pixel_sizes(img)
757
+ psize_unit = unit or "µm"
348
758
 
349
759
  # --- NEW: coerce top-level strings --------------------------------
350
760
  img_id = str(image_id or p.stem)
@@ -383,6 +793,7 @@ def from_tiff(
383
793
  return to_ome_arrow(
384
794
  image_id=img_id,
385
795
  name=display_name,
796
+ image_type=image_type,
386
797
  acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc),
387
798
  dimension_order=dim_order,
388
799
  dtype="uint16",
@@ -394,7 +805,7 @@ def from_tiff(
394
805
  physical_size_x=psize_x,
395
806
  physical_size_y=psize_y,
396
807
  physical_size_z=psize_z,
397
- physical_size_unit="µm",
808
+ physical_size_unit=psize_unit,
398
809
  channels=channels,
399
810
  planes=planes,
400
811
  masks=None,
@@ -409,7 +820,23 @@ def from_stack_pattern_path(
409
820
  channel_names: Optional[List[str]] = None,
410
821
  image_id: Optional[str] = None,
411
822
  name: Optional[str] = None,
823
+ image_type: Optional[str] = None,
412
824
  ) -> pa.StructScalar:
825
+ """Build an OME-Arrow record from a filename pattern describing a stack.
826
+
827
+ Args:
828
+ pattern_path: Path or pattern string describing the stack layout.
829
+ default_dim_for_unspecified: Dimension to use when tokens lack a dim.
830
+ map_series_to: Dimension to map series tokens to (e.g., "T"), or None.
831
+ clamp_to_uint16: Whether to clamp pixel values to uint16.
832
+ channel_names: Optional list of channel names to apply.
833
+ image_id: Optional image identifier override.
834
+ name: Optional display name override.
835
+ image_type: Optional image kind (e.g., "image", "label").
836
+
837
+ Returns:
838
+ A validated OME-Arrow StructScalar describing the stack.
839
+ """
413
840
  path = Path(pattern_path)
414
841
  folder = path.parent
415
842
  line = path.name.strip()
@@ -675,6 +1102,7 @@ def from_stack_pattern_path(
675
1102
  return to_ome_arrow(
676
1103
  image_id=str(img_id),
677
1104
  name=str(display_name),
1105
+ image_type=image_type,
678
1106
  acquisition_datetime=None,
679
1107
  dimension_order=dim_order,
680
1108
  dtype="uint16",
@@ -697,6 +1125,7 @@ def from_ome_zarr(
697
1125
  zarr_path: str | Path,
698
1126
  image_id: Optional[str] = None,
699
1127
  name: Optional[str] = None,
1128
+ image_type: Optional[str] = None,
700
1129
  channel_names: Optional[Sequence[str]] = None,
701
1130
  acquisition_datetime: Optional[datetime] = None,
702
1131
  clamp_to_uint16: bool = True,
@@ -715,6 +1144,8 @@ def from_ome_zarr(
715
1144
  Optional stable image identifier (defaults to directory stem).
716
1145
  name:
717
1146
  Optional display name (defaults to directory name).
1147
+ image_type:
1148
+ Optional image kind (e.g., "image", "label").
718
1149
  channel_names:
719
1150
  Optional list of channel names. Defaults to C0, C1, ...
720
1151
  acquisition_datetime:
@@ -741,13 +1172,15 @@ def from_ome_zarr(
741
1172
  if size_x <= 0 or size_y <= 0:
742
1173
  raise ValueError("Image must have positive Y and X dimensions.")
743
1174
 
744
- pps = getattr(img, "physical_pixel_sizes", None)
745
- try:
746
- psize_x = float(getattr(pps, "X", None) or 1.0)
747
- psize_y = float(getattr(pps, "Y", None) or 1.0)
748
- psize_z = float(getattr(pps, "Z", None) or 1.0)
749
- except Exception:
750
- psize_x = psize_y = psize_z = 1.0
1175
+ psize_x, psize_y, psize_z, unit, pps_valid = _read_physical_pixel_sizes(img)
1176
+ psize_unit = unit or "µm"
1177
+
1178
+ if not pps_valid:
1179
+ ngff_scale = _read_ngff_scale(p)
1180
+ if ngff_scale is not None:
1181
+ psize_x, psize_y, psize_z, unit = ngff_scale
1182
+ if unit:
1183
+ psize_unit = unit
751
1184
 
752
1185
  img_id = str(image_id or p.stem)
753
1186
  display_name = str(name or p.name)
@@ -794,6 +1227,7 @@ def from_ome_zarr(
794
1227
  return to_ome_arrow(
795
1228
  image_id=img_id,
796
1229
  name=display_name,
1230
+ image_type=image_type,
797
1231
  acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc),
798
1232
  dimension_order=dim_order,
799
1233
  dtype="uint16",
@@ -805,7 +1239,7 @@ def from_ome_zarr(
805
1239
  physical_size_x=psize_x,
806
1240
  physical_size_y=psize_y,
807
1241
  physical_size_z=psize_z,
808
- physical_size_unit="µm",
1242
+ physical_size_unit=psize_unit,
809
1243
  channels=channels,
810
1244
  planes=planes,
811
1245
  masks=None,
@@ -819,88 +1253,72 @@ def from_ome_parquet(
819
1253
  row_index: int = 0,
820
1254
  strict_schema: bool = False,
821
1255
  ) -> pa.StructScalar:
822
- """
823
- Read an OME-Arrow record from a Parquet file and return a typed StructScalar.
1256
+ """Read an OME-Arrow record from a Parquet file.
1257
+
1258
+ Args:
1259
+ parquet_path: Path to the Parquet file.
1260
+ column_name: Column to read; auto-detected when None or invalid.
1261
+ row_index: Row index to extract.
1262
+ strict_schema: Require the exact OME-Arrow schema if True.
1263
+
1264
+ Returns:
1265
+ A typed OME-Arrow StructScalar.
1266
+
1267
+ Raises:
1268
+ FileNotFoundError: If the Parquet path does not exist.
1269
+ ValueError: If the row index is out of range or no suitable column exists.
824
1270
  """
825
1271
  p = Path(parquet_path)
826
1272
  if not p.exists():
827
1273
  raise FileNotFoundError(f"No such file: {p}")
828
1274
 
829
1275
  table = pq.read_table(p)
1276
+ return _ome_arrow_from_table(
1277
+ table,
1278
+ column_name=column_name,
1279
+ row_index=row_index,
1280
+ strict_schema=strict_schema,
1281
+ )
830
1282
 
831
- if table.num_rows == 0:
832
- raise ValueError("Parquet file contains 0 rows; expected at least 1.")
833
- if not (0 <= row_index < table.num_rows):
834
- raise ValueError(f"row_index {row_index} out of range [0, {table.num_rows}).")
835
-
836
- # 1) Locate the OME-Arrow column
837
- def _struct_matches_ome_fields(t: pa.StructType) -> bool:
838
- ome_fields = {f.name for f in OME_ARROW_STRUCT}
839
- col_fields = {f.name for f in t}
840
- return ome_fields == col_fields
841
-
842
- requested_name = column_name
843
- candidate_col = None
844
- autodetected_name = None
845
1283
 
846
- if column_name is not None and column_name in table.column_names:
847
- arr = table[column_name]
848
- if not pa.types.is_struct(arr.type):
849
- raise ValueError(f"Column '{column_name}' is not a Struct; got {arr.type}.")
850
- if strict_schema and arr.type != OME_ARROW_STRUCT:
851
- raise ValueError(
852
- f"Column '{column_name}' schema != OME_ARROW_STRUCT.\n"
853
- f"Got: {arr.type}\n"
854
- f"Expect:{OME_ARROW_STRUCT}"
855
- )
856
- if not strict_schema and not _struct_matches_ome_fields(arr.type):
857
- raise ValueError(
858
- f"Column '{column_name}' does not have the expected OME-Arrow fields."
859
- )
860
- candidate_col = arr
861
- else:
862
- # Auto-detect a struct column that matches OME-Arrow fields
863
- for name in table.column_names:
864
- arr = table[name]
865
- if pa.types.is_struct(arr.type):
866
- if strict_schema and arr.type == OME_ARROW_STRUCT:
867
- candidate_col = arr
868
- autodetected_name = name
869
- column_name = name
870
- break
871
- if not strict_schema and _struct_matches_ome_fields(arr.type):
872
- candidate_col = arr
873
- autodetected_name = name
874
- column_name = name
875
- break
876
- if candidate_col is None:
877
- if column_name is None:
878
- hint = "no struct column with OME-Arrow fields was found."
879
- else:
880
- hint = f"column '{column_name}' not found and auto-detection failed."
881
- raise ValueError(f"Could not locate an OME-Arrow struct column: {hint}")
1284
+ def from_ome_vortex(
1285
+ vortex_path: str | Path,
1286
+ *,
1287
+ column_name: Optional[str] = "ome_arrow",
1288
+ row_index: int = 0,
1289
+ strict_schema: bool = False,
1290
+ ) -> pa.StructScalar:
1291
+ """Read an OME-Arrow record from a Vortex file.
882
1292
 
883
- # Emit warning if auto-detection was used
884
- if autodetected_name is not None and autodetected_name != requested_name:
885
- warnings.warn(
886
- f"Requested column '{requested_name}' was not usable or not found. "
887
- f"Auto-detected OME-Arrow column '{autodetected_name}'.",
888
- UserWarning,
889
- stacklevel=2,
890
- )
1293
+ Args:
1294
+ vortex_path: Path to the Vortex file.
1295
+ column_name: Column to read; auto-detected when None or invalid.
1296
+ row_index: Row index to extract.
1297
+ strict_schema: Require the exact OME-Arrow schema if True.
891
1298
 
892
- # 2) Extract the row as a Python dict
893
- record_dict: Dict[str, Any] = candidate_col.slice(row_index, 1).to_pylist()[0]
1299
+ Returns:
1300
+ A typed OME-Arrow StructScalar.
894
1301
 
895
- # 3) Reconstruct a typed StructScalar using the canonical schema
896
- scalar = pa.scalar(record_dict, type=OME_ARROW_STRUCT)
1302
+ Raises:
1303
+ FileNotFoundError: If the Vortex path does not exist.
1304
+ ImportError: If the optional `vortex-data` dependency is missing.
1305
+ ValueError: If the row index is out of range or no suitable column exists.
1306
+ """
1307
+ p = Path(vortex_path)
1308
+ if not p.exists():
1309
+ raise FileNotFoundError(f"No such file: {p}")
897
1310
 
898
- # Optional: soft validation via file-level metadata (if present)
899
1311
  try:
900
- meta = table.schema.metadata or {}
901
- meta.get(b"ome.arrow.type", b"").decode() == str(OME_ARROW_TAG_TYPE)
902
- meta.get(b"ome.arrow.version", b"").decode() == str(OME_ARROW_TAG_VERSION)
903
- except Exception:
904
- pass
905
-
906
- return scalar
1312
+ import vortex
1313
+ except ImportError as exc:
1314
+ raise ImportError(
1315
+ "Vortex support requires the optional 'vortex-data' dependency."
1316
+ ) from exc
1317
+
1318
+ table = vortex.open(str(p)).to_arrow().read_all()
1319
+ return _ome_arrow_from_table(
1320
+ table,
1321
+ column_name=column_name,
1322
+ row_index=row_index,
1323
+ strict_schema=strict_schema,
1324
+ )