PyPI - ome-arrow - Versions diffs - 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

ome-arrow 0.0.4py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

ome_arrow/__init__.py +8 -1
ome_arrow/_version.py +2 -2
ome_arrow/core.py +72 -7
ome_arrow/export.py +302 -8
ome_arrow/ingest.py +542 -124
ome_arrow/meta.py +41 -0
ome_arrow/transform.py +34 -0
ome_arrow/view.py +20 -22
{ome_arrow-0.0.4.dist-info → ome_arrow-0.0.6.dist-info}/METADATA +7 -2
ome_arrow-0.0.6.dist-info/RECORD +14 -0
{ome_arrow-0.0.4.dist-info → ome_arrow-0.0.6.dist-info}/WHEEL +1 -1
ome_arrow-0.0.4.dist-info/RECORD +0 -14
{ome_arrow-0.0.4.dist-info → ome_arrow-0.0.6.dist-info}/licenses/LICENSE +0 -0
{ome_arrow-0.0.4.dist-info → ome_arrow-0.0.6.dist-info}/top_level.txt +0 -0

ome_arrow/ingest.py CHANGED Viewed

@@ -3,6 +3,7 @@ Converting to and from OME-Arrow formats.
 """
 import itertools
+import json
 import re
 import warnings
 from datetime import datetime, timezone
@@ -20,11 +21,351 @@ from bioio_ome_zarr import Reader as OMEZarrReader
 from ome_arrow.meta import OME_ARROW_STRUCT, OME_ARROW_TAG_TYPE, OME_ARROW_TAG_VERSION
+def _ome_arrow_from_table(
+    table: pa.Table,
+    *,
+    column_name: Optional[str],
+    row_index: int,
+    strict_schema: bool,
+) -> pa.StructScalar:
+    """Extract a single OME-Arrow record from an Arrow table.
+    Args:
+        table: Source Arrow table.
+        column_name: Column to read; auto-detected when None or invalid.
+        row_index: Row index to extract.
+        strict_schema: Require the exact OME-Arrow schema if True.
+    Returns:
+        A typed OME-Arrow StructScalar.
+    Raises:
+        ValueError: If the row index is out of range or no suitable column exists.
+    """
+    if table.num_rows == 0:
+        raise ValueError("Table contains 0 rows; expected at least 1.")
+    if not (0 <= row_index < table.num_rows):
+        raise ValueError(f"row_index {row_index} out of range [0, {table.num_rows}).")
+    # 1) Locate the OME-Arrow column
+    def _struct_matches_ome_fields(t: pa.StructType) -> bool:
+        ome_fields = {f.name for f in OME_ARROW_STRUCT}
+        required_fields = ome_fields - {"image_type", "chunk_grid", "chunks"}
+        col_fields = {f.name for f in t}
+        return required_fields.issubset(col_fields)
+    requested_name = column_name
+    candidate_col = None
+    autodetected_name = None
+    if column_name is not None and column_name in table.column_names:
+        arr = table[column_name]
+        if not pa.types.is_struct(arr.type):
+            raise ValueError(f"Column '{column_name}' is not a Struct; got {arr.type}.")
+        if strict_schema and arr.type != OME_ARROW_STRUCT:
+            raise ValueError(
+                f"Column '{column_name}' schema != OME_ARROW_STRUCT.\n"
+                f"Got:   {arr.type}\n"
+                f"Expect:{OME_ARROW_STRUCT}"
+            )
+        if not strict_schema and not _struct_matches_ome_fields(arr.type):
+            raise ValueError(
+                f"Column '{column_name}' does not have the expected OME-Arrow fields."
+            )
+        candidate_col = arr
+    else:
+        # Auto-detect a struct column that matches OME-Arrow fields
+        for name in table.column_names:
+            arr = table[name]
+            if pa.types.is_struct(arr.type):
+                if strict_schema and arr.type == OME_ARROW_STRUCT:
+                    candidate_col = arr
+                    autodetected_name = name
+                    column_name = name
+                    break
+                if not strict_schema and _struct_matches_ome_fields(arr.type):
+                    candidate_col = arr
+                    autodetected_name = name
+                    column_name = name
+                    break
+        if candidate_col is None:
+            if column_name is None:
+                hint = "no struct column with OME-Arrow fields was found."
+            else:
+                hint = f"column '{column_name}' not found and auto-detection failed."
+            raise ValueError(f"Could not locate an OME-Arrow struct column: {hint}")
+    # Emit warning if auto-detection was used
+    if autodetected_name is not None and autodetected_name != requested_name:
+        warnings.warn(
+            f"Requested column '{requested_name}' was not usable or not found. "
+            f"Auto-detected OME-Arrow column '{autodetected_name}'.",
+            UserWarning,
+            stacklevel=2,
+        )
+    # 2) Extract the row as a Python dict
+    record_dict: Dict[str, Any] = candidate_col.slice(row_index, 1).to_pylist()[0]
+    # Back-compat: older files won't include image_type; default to None.
+    if "image_type" not in record_dict:
+        record_dict["image_type"] = None
+    # Drop unexpected fields before casting to the canonical schema.
+    record_dict = {f.name: record_dict.get(f.name) for f in OME_ARROW_STRUCT}
+    # 3) Reconstruct a typed StructScalar using the canonical schema
+    scalar = pa.scalar(record_dict, type=OME_ARROW_STRUCT)
+    # Optional: soft validation via file-level metadata (if present)
+    try:
+        meta = table.schema.metadata or {}
+        meta.get(b"ome.arrow.type", b"").decode() == str(OME_ARROW_TAG_TYPE)
+        meta.get(b"ome.arrow.version", b"").decode() == str(OME_ARROW_TAG_VERSION)
+    except Exception:
+        pass
+    return scalar
+def _normalize_unit(unit: str | None) -> str | None:
+    if not unit:
+        return None
+    u = unit.strip().lower()
+    if u in {"micrometer", "micrometre", "micron", "microns", "um", "µm"}:
+        return "µm"
+    if u in {"nanometer", "nanometre", "nm"}:
+        return "nm"
+    return unit
+def _read_physical_pixel_sizes(
+    img: BioImage,
+) -> tuple[float, float, float, str | None, bool]:
+    pps = getattr(img, "physical_pixel_sizes", None)
+    if pps is None:
+        return 1.0, 1.0, 1.0, None, False
+    vx = getattr(pps, "X", None) or getattr(pps, "x", None)
+    vy = getattr(pps, "Y", None) or getattr(pps, "y", None)
+    vz = getattr(pps, "Z", None) or getattr(pps, "z", None)
+    if vx is None and vy is None and vz is None:
+        return 1.0, 1.0, 1.0, None, False
+    try:
+        psize_x = float(vx or 1.0)
+        psize_y = float(vy or 1.0)
+        psize_z = float(vz or 1.0)
+    except Exception:
+        return 1.0, 1.0, 1.0, None, False
+    unit = getattr(pps, "unit", None) or getattr(pps, "units", None)
+    unit = _normalize_unit(str(unit)) if unit is not None else None
+    return psize_x, psize_y, psize_z, unit, True
+def _load_zarr_attrs(zarr_path: Path) -> dict:
+    zarr_json = zarr_path / "zarr.json"
+    if zarr_json.exists():
+        try:
+            data = json.loads(zarr_json.read_text())
+            return data.get("attributes") or data.get("attrs") or {}
+        except Exception:
+            return {}
+    zattrs = zarr_path / ".zattrs"
+    if zattrs.exists():
+        try:
+            return json.loads(zattrs.read_text())
+        except Exception:
+            return {}
+    return {}
+def _extract_multiscales(attrs: dict) -> list[dict]:
+    if not isinstance(attrs, dict):
+        return []
+    ome = attrs.get("ome")
+    if isinstance(ome, dict) and isinstance(ome.get("multiscales"), list):
+        return ome["multiscales"]
+    if isinstance(attrs.get("multiscales"), list):
+        return attrs["multiscales"]
+    return []
+def _read_ngff_scale(zarr_path: Path) -> tuple[float, float, float, str | None] | None:
+    zarr_root = zarr_path
+    for parent in [zarr_path, *list(zarr_path.parents)]:
+        if parent.suffix.lower() in {".zarr", ".ome.zarr"}:
+            zarr_root = parent
+            break
+    for candidate in (zarr_path, zarr_root):
+        attrs = _load_zarr_attrs(candidate)
+        multiscales = _extract_multiscales(attrs)
+        if multiscales:
+            break
+    else:
+        return None
+    ms = multiscales[0]
+    axes = ms.get("axes") or []
+    datasets = ms.get("datasets") or []
+    if not axes or not datasets:
+        return None
+    ds = next((d for d in datasets if str(d.get("path")) == "0"), datasets[0])
+    cts = ds.get("coordinateTransformations") or []
+    scale_ct = next((ct for ct in cts if ct.get("type") == "scale"), None)
+    if not scale_ct:
+        return None
+    scale = scale_ct.get("scale") or []
+    if len(scale) != len(axes):
+        return None
+    axis_scale: dict[str, float] = {}
+    axis_unit: dict[str, str] = {}
+    for i, ax in enumerate(axes):
+        name = str(ax.get("name", "")).lower()
+        if name in {"x", "y", "z"}:
+            try:
+                axis_scale[name] = float(scale[i])
+            except Exception:
+                continue
+            unit = _normalize_unit(ax.get("unit"))
+            if unit:
+                axis_unit[name] = unit
+    if not axis_scale:
+        return None
+    psize_x = axis_scale.get("x", 1.0)
+    psize_y = axis_scale.get("y", 1.0)
+    psize_z = axis_scale.get("z", 1.0)
+    units = [axis_unit.get(a) for a in ("x", "y", "z") if axis_unit.get(a)]
+    unit = units[0] if units and len(set(units)) == 1 else None
+    return psize_x, psize_y, psize_z, unit
+def _normalize_chunk_shape(
+    chunk_shape: Optional[Tuple[int, int, int]],
+    size_z: int,
+    size_y: int,
+    size_x: int,
+) -> Tuple[int, int, int]:
+    """Normalize a chunk shape against image bounds.
+    Args:
+        chunk_shape: Desired chunk shape as (Z, Y, X), or None.
+        size_z: Total Z size of the image.
+        size_y: Total Y size of the image.
+        size_x: Total X size of the image.
+    Returns:
+        Tuple[int, int, int]: Normalized (Z, Y, X) chunk shape.
+    """
+    if chunk_shape is None:
+        chunk_shape = (1, 512, 512)
+    if not isinstance(chunk_shape, (list, tuple)) or len(chunk_shape) != 3:
+        raise ValueError("chunk_shape must be a sequence of three integers (z,y,x)")
+    try:
+        cz_raw, cy_raw, cx_raw = (int(v) for v in chunk_shape)
+    except Exception as exc:
+        raise ValueError(
+            "chunk_shape must be a sequence of three integers (z,y,x)"
+        ) from exc
+    if cz_raw <= 0 or cy_raw <= 0 or cx_raw <= 0:
+        raise ValueError("chunk_shape values must be positive integers")
+    cz = max(1, min(cz_raw, int(size_z)))
+    cy = max(1, min(cy_raw, int(size_y)))
+    cx = max(1, min(cx_raw, int(size_x)))
+    return cz, cy, cx
+def _build_chunks_from_planes(
+    *,
+    planes: List[Dict[str, Any]],
+    size_t: int,
+    size_c: int,
+    size_z: int,
+    size_y: int,
+    size_x: int,
+    chunk_shape: Optional[Tuple[int, int, int]],
+    chunk_order: str = "ZYX",
+) -> List[Dict[str, Any]]:
+    """Build chunked pixels from a list of flattened planes.
+    Args:
+        planes: List of plane dicts with keys z, t, c, and pixels.
+        size_t: Total T size of the image.
+        size_c: Total C size of the image.
+        size_z: Total Z size of the image.
+        size_y: Total Y size of the image.
+        size_x: Total X size of the image.
+        chunk_shape: Desired chunk shape as (Z, Y, X).
+        chunk_order: Flattening order for chunk pixels (default "ZYX").
+    Returns:
+        List[Dict[str, Any]]: Chunk list with pixels stored as flat lists.
+    Raises:
+        ValueError: If an unsupported chunk_order is requested.
+    """
+    if str(chunk_order).upper() != "ZYX":
+        raise ValueError("Only chunk_order='ZYX' is supported for now.")
+    cz, cy, cx = _normalize_chunk_shape(chunk_shape, size_z, size_y, size_x)
+    plane_map: Dict[Tuple[int, int, int], np.ndarray] = {}
+    for p in planes:
+        z = int(p["z"])
+        t = int(p["t"])
+        c = int(p["c"])
+        pix = p["pixels"]
+        arr2d = np.asarray(pix).reshape(size_y, size_x)
+        plane_map[(t, c, z)] = arr2d
+    dtype = next(iter(plane_map.values())).dtype if plane_map else np.uint16
+    chunks: List[Dict[str, Any]] = []
+    for t in range(size_t):
+        for c in range(size_c):
+            for z0 in range(0, size_z, cz):
+                sz = min(cz, size_z - z0)
+                for y0 in range(0, size_y, cy):
+                    sy = min(cy, size_y - y0)
+                    for x0 in range(0, size_x, cx):
+                        sx = min(cx, size_x - x0)
+                        slab = np.zeros((sz, sy, sx), dtype=dtype)
+                        for zi in range(sz):
+                            plane = plane_map.get((t, c, z0 + zi))
+                            if plane is None:
+                                continue
+                            slab[zi] = plane[y0 : y0 + sy, x0 : x0 + sx]
+                        chunks.append(
+                            {
+                                "t": t,
+                                "c": c,
+                                "z": z0,
+                                "y": y0,
+                                "x": x0,
+                                "shape_z": sz,
+                                "shape_y": sy,
+                                "shape_x": sx,
+                                "pixels": slab.reshape(-1).tolist(),
+                            }
+                        )
+    return chunks
 def to_ome_arrow(
     type_: str = OME_ARROW_TAG_TYPE,
     version: str = OME_ARROW_TAG_VERSION,
     image_id: str = "unnamed",
     name: str = "unknown",
+    image_type: str | None = "image",
     acquisition_datetime: Optional[datetime] = None,
     dimension_order: str = "XYZCT",
     dtype: str = "uint16",
@@ -39,6 +380,10 @@ def to_ome_arrow(
     physical_size_unit: str = "µm",
     channels: Optional[List[Dict[str, Any]]] = None,
     planes: Optional[List[Dict[str, Any]]] = None,
+    chunks: Optional[List[Dict[str, Any]]] = None,
+    chunk_shape: Optional[Tuple[int, int, int]] = (1, 512, 512),  # (Z, Y, X)
+    chunk_order: str = "ZYX",
+    build_chunks: bool = True,
     masks: Any = None,
 ) -> pa.StructScalar:
     """
@@ -53,6 +398,9 @@ def to_ome_arrow(
         version: Specification version string.
         image_id: Unique image identifier.
         name: Human-friendly name.
+        image_type: Open-ended image kind (e.g., "image", "label"). Note that
+            from_* helpers pass image_type=None by default to preserve
+            "unspecified" vs explicitly set ("image").
         acquisition_datetime: Datetime of acquisition (defaults to now).
         dimension_order: Dimension order ("XYZCT" or "XYCT").
         dtype: Pixel data type string (e.g., "uint16").
@@ -61,6 +409,12 @@ def to_ome_arrow(
         physical_size_unit: Unit string, default "µm".
         channels: List of channel dicts. Autogenerates one if None.
         planes: List of plane dicts. Empty if None.
+        chunks: Optional list of chunk dicts. If None and build_chunks is True,
+            chunks are derived from planes using chunk_shape.
+        chunk_shape: Chunk shape as (Z, Y, X). Defaults to (1, 512, 512).
+        chunk_order: Flattening order for chunk pixels (default "ZYX").
+        build_chunks: If True, build chunked pixels from planes when chunks
+            is None.
         masks: Optional placeholder for future annotations.
     Returns:
@@ -76,6 +430,7 @@ def to_ome_arrow(
     version = str(version)
     image_id = str(image_id)
     name = str(name)
+    image_type = None if image_type is None else str(image_type)
     dimension_order = str(dimension_order)
     dtype = str(dtype)
     physical_size_unit = str(physical_size_unit)
@@ -105,11 +460,62 @@ def to_ome_arrow(
     if planes is None:
         planes = [{"z": 0, "t": 0, "c": 0, "pixels": [0] * (size_x * size_y)}]
+    if chunks is None and build_chunks:
+        chunks = _build_chunks_from_planes(
+            planes=planes,
+            size_t=size_t,
+            size_c=size_c,
+            size_z=size_z,
+            size_y=size_y,
+            size_x=size_x,
+            chunk_shape=chunk_shape,
+            chunk_order=chunk_order,
+        )
+    chunk_grid = None
+    if chunks is not None:
+        chunk_order = str(chunk_order).upper()
+        if chunk_order != "ZYX":
+            raise ValueError("Only chunk_order='ZYX' is supported for now.")
+        if len(chunks) == 0:
+            raise ValueError("chunks must not be an empty list")
+        first = chunks[0]
+        try:
+            derived_shape = (
+                int(first["shape_z"]),
+                int(first["shape_y"]),
+                int(first["shape_x"]),
+            )
+        except Exception as exc:
+            raise ValueError(
+                "chunks entries must include shape_z/shape_y/shape_x"
+            ) from exc
+        if derived_shape[0] <= 0 or derived_shape[1] <= 0 or derived_shape[2] <= 0:
+            raise ValueError("chunk shapes must be positive integers")
+        if chunk_shape is not None:
+            norm_shape = _normalize_chunk_shape(chunk_shape, size_z, size_y, size_x)
+            if norm_shape != derived_shape:
+                raise ValueError(
+                    "chunk_shape does not match provided chunks "
+                    f"(chunk_shape={norm_shape}, chunks_shape={derived_shape})"
+                )
+        cz, cy, cx = _normalize_chunk_shape(derived_shape, size_z, size_y, size_x)
+        chunk_grid = {
+            "order": "TCZYX",
+            "chunk_t": 1,
+            "chunk_c": 1,
+            "chunk_z": cz,
+            "chunk_y": cy,
+            "chunk_x": cx,
+            "chunk_order": str(chunk_order),
+        }
     record = {
         "type": type_,
         "version": version,
         "id": image_id,
         "name": name,
+        "image_type": image_type,
         "acquisition_datetime": acquisition_datetime or datetime.now(timezone.utc),
         "pixels_meta": {
             "dimension_order": dimension_order,
@@ -127,6 +533,8 @@ def to_ome_arrow(
             "physical_size_z_unit": physical_size_unit,
             "channels": channels,
         },
+        "chunk_grid": chunk_grid,
+        "chunks": chunks,
         "planes": planes,
         "masks": masks,
     }
@@ -140,9 +548,13 @@ def from_numpy(
     dim_order: str = "TCZYX",
     image_id: Optional[str] = None,
     name: Optional[str] = None,
+    image_type: Optional[str] = None,
     channel_names: Optional[Sequence[str]] = None,
     acquisition_datetime: Optional[datetime] = None,
     clamp_to_uint16: bool = True,
+    chunk_shape: Optional[Tuple[int, int, int]] = (1, 512, 512),
+    chunk_order: str = "ZYX",
+    build_chunks: bool = True,
     # meta
     physical_size_x: float = 1.0,
     physical_size_y: float = 1.0,
@@ -150,42 +562,39 @@ def from_numpy(
     physical_size_unit: str = "µm",
     dtype_meta: Optional[str] = None,  # if None, inferred from output dtype
 ) -> pa.StructScalar:
-    """
-    Build an OME-Arrow StructScalar from a NumPy array.
-    Parameters
-    ----------
-    arr : np.ndarray
-        Image data with axes described by `dim_order`.
-    dim_order : str, default "TCZYX"
-        Axis labels for `arr`. Must include "Y" and "X".
-        Supported examples: "YX", "ZYX", "CYX", "CZYX", "TYX", "TCYX", "TCZYX".
-    image_id, name : Optional[str]
-        Identifiers to embed in the record.
-    channel_names : Optional[Sequence[str]]
-        Names for channels; defaults to C0..C{n-1}.
-    acquisition_datetime : Optional[datetime]
-        Defaults to now (UTC) if None.
-    clamp_to_uint16 : bool, default True
-        If True, clamp/cast planes to uint16 before serialization.
-    physical_size_x/y/z : float
-        Spatial pixel sizes (µm), Z used if present.
-    physical_size_unit : str
-        Unit string for spatial axes (default "µm").
-    dtype_meta : Optional[str]
-        Pixel dtype string to place in metadata; if None, inferred from the
-        (possibly cast) array's dtype.
-    Returns
-    -------
-    pa.StructScalar
-        Typed OME-Arrow record (schema = OME_ARROW_STRUCT).
-    Notes
-    -----
-    - If Z is not in `dim_order`, `size_z` will be 1 and the meta
-      dimension_order becomes "XYCT"; otherwise "XYZCT".
-    - If T/C are absent in `dim_order`, they default to size 1.
+    """Build an OME-Arrow StructScalar from a NumPy array.
+    Args:
+        arr: Image data with axes described by `dim_order`.
+        dim_order: Axis labels for `arr`. Must include "Y" and "X".
+            Supported examples: "YX", "ZYX", "CYX", "CZYX", "TYX", "TCYX", "TCZYX".
+        image_id: Optional stable image identifier.
+        name: Optional human label.
+        image_type: Open-ended image kind (e.g., "image", "label").
+        channel_names: Names for channels; defaults to C0..C{n-1}.
+        acquisition_datetime: Defaults to now (UTC) if None.
+        clamp_to_uint16: If True, clamp/cast planes to uint16 before serialization.
+        chunk_shape: Chunk shape as (Z, Y, X). Defaults to (1, 512, 512).
+        chunk_order: Flattening order for chunk pixels (default "ZYX").
+        build_chunks: If True, build chunked pixels from planes.
+        physical_size_x: Spatial pixel size (µm) for X.
+        physical_size_y: Spatial pixel size (µm) for Y.
+        physical_size_z: Spatial pixel size (µm) for Z when present.
+        physical_size_unit: Unit string for spatial axes (default "µm").
+        dtype_meta: Pixel dtype string to place in metadata; if None, inferred
+            from the (possibly cast) array's dtype.
+    Returns:
+        pa.StructScalar: Typed OME-Arrow record (schema = OME_ARROW_STRUCT).
+    Raises:
+        TypeError: If `arr` is not a NumPy ndarray.
+        ValueError: If `dim_order` is invalid or dimensions are non-positive.
+    Notes:
+        - If Z is not in `dim_order`, `size_z` will be 1 and the meta
+          dimension_order becomes "XYCT"; otherwise "XYZCT".
+        - If T/C are absent in `dim_order`, they default to size 1.
     """
     if not isinstance(arr, np.ndarray):
@@ -273,6 +682,7 @@ def from_numpy(
     return to_ome_arrow(
         image_id=str(image_id or "unnamed"),
         name=str(name or "unknown"),
+        image_type=image_type,
         acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc),
         dimension_order=meta_dim_order,
         dtype=dtype_str,
@@ -287,6 +697,9 @@ def from_numpy(
         physical_size_unit=str(physical_size_unit),
         channels=channels,
         planes=planes,
+        chunk_shape=chunk_shape,
+        chunk_order=chunk_order,
+        build_chunks=build_chunks,
         masks=None,
     )
@@ -295,6 +708,7 @@ def from_tiff(
     tiff_path: str | Path,
     image_id: Optional[str] = None,
     name: Optional[str] = None,
+    image_type: Optional[str] = None,
     channel_names: Optional[Sequence[str]] = None,
     acquisition_datetime: Optional[datetime] = None,
     clamp_to_uint16: bool = True,
@@ -309,6 +723,7 @@ def from_tiff(
         tiff_path: Path to a TIFF readable by bioio.
         image_id: Optional stable image identifier (defaults to stem).
         name: Optional human label (defaults to file name).
+        image_type: Optional image kind (e.g., "image", "label").
         channel_names: Optional channel names; defaults to C0..C{n-1}.
         acquisition_datetime: Optional acquisition time (UTC now if None).
         clamp_to_uint16: If True, clamp/cast planes to uint16.
@@ -338,13 +753,8 @@ def from_tiff(
     if size_x <= 0 or size_y <= 0:
         raise ValueError("Image must have positive Y and X dims.")
-    pps = getattr(img, "physical_pixel_sizes", None)
-    try:
-        psize_x = float(getattr(pps, "X", None) or 1.0)
-        psize_y = float(getattr(pps, "Y", None) or 1.0)
-        psize_z = float(getattr(pps, "Z", None) or 1.0)
-    except Exception:
-        psize_x = psize_y = psize_z = 1.0
+    psize_x, psize_y, psize_z, unit, _pps_valid = _read_physical_pixel_sizes(img)
+    psize_unit = unit or "µm"
     # --- NEW: coerce top-level strings --------------------------------
     img_id = str(image_id or p.stem)
@@ -383,6 +793,7 @@ def from_tiff(
     return to_ome_arrow(
         image_id=img_id,
         name=display_name,
+        image_type=image_type,
         acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc),
         dimension_order=dim_order,
         dtype="uint16",
@@ -394,7 +805,7 @@ def from_tiff(
         physical_size_x=psize_x,
         physical_size_y=psize_y,
         physical_size_z=psize_z,
-        physical_size_unit="µm",
+        physical_size_unit=psize_unit,
         channels=channels,
         planes=planes,
         masks=None,
@@ -409,7 +820,23 @@ def from_stack_pattern_path(
     channel_names: Optional[List[str]] = None,
     image_id: Optional[str] = None,
     name: Optional[str] = None,
+    image_type: Optional[str] = None,
 ) -> pa.StructScalar:
+    """Build an OME-Arrow record from a filename pattern describing a stack.
+    Args:
+        pattern_path: Path or pattern string describing the stack layout.
+        default_dim_for_unspecified: Dimension to use when tokens lack a dim.
+        map_series_to: Dimension to map series tokens to (e.g., "T"), or None.
+        clamp_to_uint16: Whether to clamp pixel values to uint16.
+        channel_names: Optional list of channel names to apply.
+        image_id: Optional image identifier override.
+        name: Optional display name override.
+        image_type: Optional image kind (e.g., "image", "label").
+    Returns:
+        A validated OME-Arrow StructScalar describing the stack.
+    """
     path = Path(pattern_path)
     folder = path.parent
     line = path.name.strip()
@@ -675,6 +1102,7 @@ def from_stack_pattern_path(
     return to_ome_arrow(
         image_id=str(img_id),
         name=str(display_name),
+        image_type=image_type,
         acquisition_datetime=None,
         dimension_order=dim_order,
         dtype="uint16",
@@ -697,6 +1125,7 @@ def from_ome_zarr(
     zarr_path: str | Path,
     image_id: Optional[str] = None,
     name: Optional[str] = None,
+    image_type: Optional[str] = None,
     channel_names: Optional[Sequence[str]] = None,
     acquisition_datetime: Optional[datetime] = None,
     clamp_to_uint16: bool = True,
@@ -715,6 +1144,8 @@ def from_ome_zarr(
             Optional stable image identifier (defaults to directory stem).
         name:
             Optional display name (defaults to directory name).
+        image_type:
+            Optional image kind (e.g., "image", "label").
         channel_names:
             Optional list of channel names. Defaults to C0, C1, ...
         acquisition_datetime:
@@ -741,13 +1172,15 @@ def from_ome_zarr(
     if size_x <= 0 or size_y <= 0:
         raise ValueError("Image must have positive Y and X dimensions.")
-    pps = getattr(img, "physical_pixel_sizes", None)
-    try:
-        psize_x = float(getattr(pps, "X", None) or 1.0)
-        psize_y = float(getattr(pps, "Y", None) or 1.0)
-        psize_z = float(getattr(pps, "Z", None) or 1.0)
-    except Exception:
-        psize_x = psize_y = psize_z = 1.0
+    psize_x, psize_y, psize_z, unit, pps_valid = _read_physical_pixel_sizes(img)
+    psize_unit = unit or "µm"
+    if not pps_valid:
+        ngff_scale = _read_ngff_scale(p)
+        if ngff_scale is not None:
+            psize_x, psize_y, psize_z, unit = ngff_scale
+            if unit:
+                psize_unit = unit
     img_id = str(image_id or p.stem)
     display_name = str(name or p.name)
@@ -794,6 +1227,7 @@ def from_ome_zarr(
     return to_ome_arrow(
         image_id=img_id,
         name=display_name,
+        image_type=image_type,
         acquisition_datetime=acquisition_datetime or datetime.now(timezone.utc),
         dimension_order=dim_order,
         dtype="uint16",
@@ -805,7 +1239,7 @@ def from_ome_zarr(
         physical_size_x=psize_x,
         physical_size_y=psize_y,
         physical_size_z=psize_z,
-        physical_size_unit="µm",
+        physical_size_unit=psize_unit,
         channels=channels,
         planes=planes,
         masks=None,
@@ -819,88 +1253,72 @@ def from_ome_parquet(
     row_index: int = 0,
     strict_schema: bool = False,
 ) -> pa.StructScalar:
-    """
-    Read an OME-Arrow record from a Parquet file and return a typed StructScalar.
+    """Read an OME-Arrow record from a Parquet file.
+    Args:
+        parquet_path: Path to the Parquet file.
+        column_name: Column to read; auto-detected when None or invalid.
+        row_index: Row index to extract.
+        strict_schema: Require the exact OME-Arrow schema if True.
+    Returns:
+        A typed OME-Arrow StructScalar.
+    Raises:
+        FileNotFoundError: If the Parquet path does not exist.
+        ValueError: If the row index is out of range or no suitable column exists.
     """
     p = Path(parquet_path)
     if not p.exists():
         raise FileNotFoundError(f"No such file: {p}")
     table = pq.read_table(p)
+    return _ome_arrow_from_table(
+        table,
+        column_name=column_name,
+        row_index=row_index,
+        strict_schema=strict_schema,
+    )
-    if table.num_rows == 0:
-        raise ValueError("Parquet file contains 0 rows; expected at least 1.")
-    if not (0 <= row_index < table.num_rows):
-        raise ValueError(f"row_index {row_index} out of range [0, {table.num_rows}).")
-    # 1) Locate the OME-Arrow column
-    def _struct_matches_ome_fields(t: pa.StructType) -> bool:
-        ome_fields = {f.name for f in OME_ARROW_STRUCT}
-        col_fields = {f.name for f in t}
-        return ome_fields == col_fields
-    requested_name = column_name
-    candidate_col = None
-    autodetected_name = None
-    if column_name is not None and column_name in table.column_names:
-        arr = table[column_name]
-        if not pa.types.is_struct(arr.type):
-            raise ValueError(f"Column '{column_name}' is not a Struct; got {arr.type}.")
-        if strict_schema and arr.type != OME_ARROW_STRUCT:
-            raise ValueError(
-                f"Column '{column_name}' schema != OME_ARROW_STRUCT.\n"
-                f"Got:   {arr.type}\n"
-                f"Expect:{OME_ARROW_STRUCT}"
-            )
-        if not strict_schema and not _struct_matches_ome_fields(arr.type):
-            raise ValueError(
-                f"Column '{column_name}' does not have the expected OME-Arrow fields."
-            )
-        candidate_col = arr
-    else:
-        # Auto-detect a struct column that matches OME-Arrow fields
-        for name in table.column_names:
-            arr = table[name]
-            if pa.types.is_struct(arr.type):
-                if strict_schema and arr.type == OME_ARROW_STRUCT:
-                    candidate_col = arr
-                    autodetected_name = name
-                    column_name = name
-                    break
-                if not strict_schema and _struct_matches_ome_fields(arr.type):
-                    candidate_col = arr
-                    autodetected_name = name
-                    column_name = name
-                    break
-        if candidate_col is None:
-            if column_name is None:
-                hint = "no struct column with OME-Arrow fields was found."
-            else:
-                hint = f"column '{column_name}' not found and auto-detection failed."
-            raise ValueError(f"Could not locate an OME-Arrow struct column: {hint}")
+def from_ome_vortex(
+    vortex_path: str | Path,
+    *,
+    column_name: Optional[str] = "ome_arrow",
+    row_index: int = 0,
+    strict_schema: bool = False,
+) -> pa.StructScalar:
+    """Read an OME-Arrow record from a Vortex file.
-    # Emit warning if auto-detection was used
-    if autodetected_name is not None and autodetected_name != requested_name:
-        warnings.warn(
-            f"Requested column '{requested_name}' was not usable or not found. "
-            f"Auto-detected OME-Arrow column '{autodetected_name}'.",
-            UserWarning,
-            stacklevel=2,
-        )
+    Args:
+        vortex_path: Path to the Vortex file.
+        column_name: Column to read; auto-detected when None or invalid.
+        row_index: Row index to extract.
+        strict_schema: Require the exact OME-Arrow schema if True.
-    # 2) Extract the row as a Python dict
-    record_dict: Dict[str, Any] = candidate_col.slice(row_index, 1).to_pylist()[0]
+    Returns:
+        A typed OME-Arrow StructScalar.
-    # 3) Reconstruct a typed StructScalar using the canonical schema
-    scalar = pa.scalar(record_dict, type=OME_ARROW_STRUCT)
+    Raises:
+        FileNotFoundError: If the Vortex path does not exist.
+        ImportError: If the optional `vortex-data` dependency is missing.
+        ValueError: If the row index is out of range or no suitable column exists.
+    """
+    p = Path(vortex_path)
+    if not p.exists():
+        raise FileNotFoundError(f"No such file: {p}")
-    # Optional: soft validation via file-level metadata (if present)
     try:
-        meta = table.schema.metadata or {}
-        meta.get(b"ome.arrow.type", b"").decode() == str(OME_ARROW_TAG_TYPE)
-        meta.get(b"ome.arrow.version", b"").decode() == str(OME_ARROW_TAG_VERSION)
-    except Exception:
-        pass
-    return scalar
+        import vortex
+    except ImportError as exc:
+        raise ImportError(
+            "Vortex support requires the optional 'vortex-data' dependency."
+        ) from exc
+    table = vortex.open(str(p)).to_arrow().read_all()
+    return _ome_arrow_from_table(
+        table,
+        column_name=column_name,
+        row_index=row_index,
+        strict_schema=strict_schema,
+    )

ome-arrow 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

ome-arrow 0.0.4py3-none-any.whl → 0.0.6py3-none-any.whl