PyPI - radiobject - Versions diffs - 0.1.0__py3-none-any.whl - Mend

radiobject 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

radiobject/__init__.py +24 -0
radiobject/_types.py +19 -0
radiobject/ctx.py +359 -0
radiobject/dataframe.py +186 -0
radiobject/imaging_metadata.py +387 -0
radiobject/indexing.py +45 -0
radiobject/ingest.py +132 -0
radiobject/ml/__init__.py +26 -0
radiobject/ml/cache.py +53 -0
radiobject/ml/compat/__init__.py +33 -0
radiobject/ml/compat/torchio.py +99 -0
radiobject/ml/config.py +42 -0
radiobject/ml/datasets/__init__.py +12 -0
radiobject/ml/datasets/collection_dataset.py +198 -0
radiobject/ml/datasets/multimodal.py +129 -0
radiobject/ml/datasets/patch_dataset.py +158 -0
radiobject/ml/datasets/segmentation_dataset.py +219 -0
radiobject/ml/datasets/volume_dataset.py +233 -0
radiobject/ml/distributed.py +82 -0
radiobject/ml/factory.py +249 -0
radiobject/ml/utils/__init__.py +13 -0
radiobject/ml/utils/labels.py +106 -0
radiobject/ml/utils/validation.py +85 -0
radiobject/ml/utils/worker_init.py +10 -0
radiobject/orientation.py +270 -0
radiobject/parallel.py +65 -0
radiobject/py.typed +0 -0
radiobject/query.py +788 -0
radiobject/radi_object.py +1665 -0
radiobject/streaming.py +389 -0
radiobject/utils.py +17 -0
radiobject/volume.py +438 -0
radiobject/volume_collection.py +1182 -0
radiobject-0.1.0.dist-info/METADATA +139 -0
radiobject-0.1.0.dist-info/RECORD +37 -0
radiobject-0.1.0.dist-info/WHEEL +4 -0
radiobject-0.1.0.dist-info/licenses/LICENSE +21 -0

radiobject/ml/compat/torchio.py ADDED Viewed

@@ -0,0 +1,99 @@
+"""TorchIO integration for RadiObject."""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, Sequence
+import torch
+from torch.utils.data import Dataset
+from radiobject._types import LabelSource
+from radiobject.ml.utils.labels import load_labels
+if TYPE_CHECKING:
+    from radiobject.volume_collection import VolumeCollection
+try:
+    import torchio as tio
+    HAS_TORCHIO = True
+except ImportError:
+    HAS_TORCHIO = False
+    tio = None
+def _require_torchio() -> None:
+    """Raise ImportError if TorchIO not installed."""
+    if not HAS_TORCHIO:
+        raise ImportError("TorchIO required. Install with: pip install radiobject[torchio]")
+class VolumeCollectionSubjectsDataset(Dataset):
+    """TorchIO-compatible dataset yielding Subject objects from VolumeCollection(s)."""
+    def __init__(
+        self,
+        collections: VolumeCollection | Sequence[VolumeCollection],
+        labels: LabelSource = None,
+        transform: Any | None = None,
+    ):
+        """Initialize TorchIO-compatible dataset.
+        Args:
+            collections: Single VolumeCollection or sequence of collections.
+                Each collection becomes a separate image in the Subject.
+            labels: Label source. Can be:
+                - str: Column name in collection's obs DataFrame
+                - pd.DataFrame: With obs_id as column/index and label values
+                - dict[str, Any]: Mapping from obs_id to label
+                - Callable[[str], Any]: Function taking obs_id, returning label
+                - None: No labels
+            transform: TorchIO transform (e.g., tio.Compose) applied to each Subject.
+        """
+        _require_torchio()
+        # Normalize to list
+        if not isinstance(collections, Sequence):
+            collections = [collections]
+        if not collections:
+            raise ValueError("At least one collection required")
+        self._collections = list(collections)
+        self._collection_names = [c.name or f"collection_{i}" for i, c in enumerate(collections)]
+        self._transform = transform
+        first_coll = self._collections[0]
+        self._n_subjects = len(first_coll)
+        # Load labels from first collection's obs
+        self._labels: dict[int, Any] | None = None
+        if labels is not None:
+            obs_df = first_coll.obs.read() if isinstance(labels, str) else None
+            self._labels = load_labels(first_coll, labels, obs_df)
+    def __len__(self) -> int:
+        return self._n_subjects
+    def __getitem__(self, idx: int) -> "tio.Subject":
+        """Return TorchIO Subject with images for all collections."""
+        subject_dict: dict[str, Any] = {}
+        for name, coll in zip(self._collection_names, self._collections):
+            data = coll.iloc[idx].to_numpy()
+            tensor = torch.from_numpy(data).unsqueeze(0).float()
+            subject_dict[name] = tio.ScalarImage(tensor=tensor)
+        if self._labels is not None and idx in self._labels:
+            subject_dict["label"] = self._labels[idx]
+        subject = tio.Subject(subject_dict)
+        if self._transform:
+            subject = self._transform(subject)
+        return subject
+    @property
+    def collection_names(self) -> list[str]:
+        """Names of collections in each Subject."""
+        return self._collection_names

radiobject/ml/config.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""Configuration models for ML training pipeline."""
+from enum import Enum
+from typing import Self
+from pydantic import BaseModel, field_validator, model_validator
+class LoadingMode(str, Enum):
+    """Volume loading strategy."""
+    FULL_VOLUME = "full_volume"
+    PATCH = "patch"
+    SLICE_2D = "slice_2d"
+class DatasetConfig(BaseModel):
+    """Configuration for RadiObjectDataset."""
+    loading_mode: LoadingMode = LoadingMode.FULL_VOLUME
+    patch_size: tuple[int, int, int] | None = None
+    patches_per_volume: int = 1
+    modalities: list[str] | None = None
+    label_column: str | None = None
+    value_filter: str | None = None
+    @model_validator(mode="after")
+    def validate_patch_config(self) -> Self:
+        """Validate patch configuration consistency."""
+        if self.loading_mode == LoadingMode.PATCH and self.patch_size is None:
+            raise ValueError("patch_size required when loading_mode is PATCH")
+        return self
+    @field_validator("patches_per_volume")
+    @classmethod
+    def validate_patches_per_volume(cls, v: int) -> int:
+        """Ensure patches_per_volume is positive."""
+        if v < 1:
+            raise ValueError("patches_per_volume must be >= 1")
+        return v
+    model_config = {"frozen": True}

radiobject/ml/datasets/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""PyTorch Dataset implementations for RadiObject."""
+from radiobject.ml.datasets.collection_dataset import VolumeCollectionDataset
+from radiobject.ml.datasets.patch_dataset import GridPatchDataset, PatchVolumeDataset
+from radiobject.ml.datasets.segmentation_dataset import SegmentationDataset
+__all__ = [
+    "VolumeCollectionDataset",
+    "GridPatchDataset",
+    "PatchVolumeDataset",
+    "SegmentationDataset",
+]

radiobject/ml/datasets/collection_dataset.py ADDED Viewed

@@ -0,0 +1,198 @@
+"""VolumeCollectionDataset - primary PyTorch Dataset for VolumeCollection(s)."""
+from __future__ import annotations
+from collections.abc import Callable
+from typing import TYPE_CHECKING, Any, Sequence
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+from radiobject._types import LabelSource
+from radiobject.ml.config import DatasetConfig, LoadingMode
+from radiobject.ml.utils.labels import load_labels
+from radiobject.ml.utils.validation import validate_collection_alignment, validate_uniform_shapes
+if TYPE_CHECKING:
+    from radiobject.volume_collection import VolumeCollection
+class VolumeCollectionDataset(Dataset):
+    """PyTorch Dataset for VolumeCollection(s) - primary ML interface."""
+    def __init__(
+        self,
+        collections: VolumeCollection | Sequence[VolumeCollection],
+        config: DatasetConfig | None = None,
+        labels: LabelSource = None,
+        transform: Callable[[dict[str, Any]], dict[str, Any]] | None = None,
+    ):
+        """Initialize dataset from VolumeCollection(s).
+        Args:
+            collections: Single VolumeCollection or sequence of collections.
+                Multiple collections are stacked along channel dimension.
+            config: Dataset configuration (loading mode, patch size, etc.).
+                If None, uses full volume mode.
+            labels: Label source. Can be:
+                - str: Column name in collection's obs DataFrame
+                - pd.DataFrame: With obs_id as column/index and label values
+                - dict[str, Any]: Mapping from obs_id to label
+                - Callable[[str], Any]: Function taking obs_id, returning label
+                - None: No labels
+            transform: Transform function applied to each sample dict.
+                MONAI dict transforms (e.g., RandFlipd) work directly.
+        """
+        self._config = config or DatasetConfig()
+        self._transform = transform
+        # Normalize to list
+        if not isinstance(collections, Sequence):
+            collections = [collections]
+        if not collections:
+            raise ValueError("At least one collection required")
+        self._collections = list(collections)
+        self._collection_names = [c.name or f"collection_{i}" for i, c in enumerate(collections)]
+        # Build dict for validation
+        collections_dict: dict[str, VolumeCollection] = {}
+        for name, coll in zip(self._collection_names, self._collections):
+            collections_dict[name] = coll
+        # Validate alignment if multi-modal
+        if len(collections_dict) > 1:
+            validate_collection_alignment(collections_dict)
+        # Validate uniform shapes (required for batched loading)
+        self._volume_shape = validate_uniform_shapes(collections_dict)
+        first_coll = self._collections[0]
+        self._n_volumes = len(first_coll)
+        # Load labels from first collection's obs
+        self._labels: dict[int, Any] | None = None
+        if labels is not None:
+            obs_df = first_coll.obs.read() if isinstance(labels, str) else None
+            self._labels = load_labels(first_coll, labels, obs_df)
+        # Compute dataset length based on loading mode
+        if self._config.loading_mode == LoadingMode.PATCH:
+            self._length = self._n_volumes * self._config.patches_per_volume
+        elif self._config.loading_mode == LoadingMode.SLICE_2D:
+            self._length = self._n_volumes * self._volume_shape[2]
+        else:
+            self._length = self._n_volumes
+    def __len__(self) -> int:
+        return self._length
+    def __getitem__(self, idx: int) -> dict[str, Any]:
+        if self._config.loading_mode == LoadingMode.PATCH:
+            return self._get_patch_item(idx)
+        elif self._config.loading_mode == LoadingMode.SLICE_2D:
+            return self._get_slice_item(idx)
+        else:
+            return self._get_full_volume_item(idx)
+    def _get_full_volume_item(self, idx: int) -> dict[str, Any]:
+        """Load full volume for all collections."""
+        volumes = [coll.iloc[idx].to_numpy() for coll in self._collections]
+        stacked = np.stack(volumes, axis=0)
+        result: dict[str, Any] = {
+            "image": torch.from_numpy(stacked),
+            "idx": idx,
+        }
+        self._add_label(result, idx)
+        if self._transform is not None:
+            result = self._transform(result)
+        return result
+    def _get_patch_item(self, idx: int) -> dict[str, Any]:
+        """Load a random patch from the volume."""
+        volume_idx = idx // self._config.patches_per_volume
+        patch_idx = idx % self._config.patches_per_volume
+        rng = np.random.default_rng(seed=idx)
+        patch_size = self._config.patch_size
+        assert patch_size is not None
+        max_start = tuple(max(0, self._volume_shape[i] - patch_size[i]) for i in range(3))
+        start = tuple(
+            rng.integers(0, max_start[i] + 1) if max_start[i] > 0 else 0 for i in range(3)
+        )
+        volumes = []
+        for coll in self._collections:
+            vol = coll.iloc[volume_idx]
+            patch = vol.slice(
+                slice(start[0], start[0] + patch_size[0]),
+                slice(start[1], start[1] + patch_size[1]),
+                slice(start[2], start[2] + patch_size[2]),
+            )
+            volumes.append(patch)
+        stacked = np.stack(volumes, axis=0)
+        result: dict[str, Any] = {
+            "image": torch.from_numpy(stacked),
+            "idx": volume_idx,
+            "patch_idx": patch_idx,
+            "patch_start": start,
+        }
+        self._add_label(result, volume_idx)
+        if self._transform is not None:
+            result = self._transform(result)
+        return result
+    def _get_slice_item(self, idx: int) -> dict[str, Any]:
+        """Load a 2D slice from the volume."""
+        volume_idx = idx // self._volume_shape[2]
+        slice_idx = idx % self._volume_shape[2]
+        slices = [coll.iloc[volume_idx].axial(slice_idx) for coll in self._collections]
+        stacked = np.stack(slices, axis=0)
+        result: dict[str, Any] = {
+            "image": torch.from_numpy(stacked),
+            "idx": volume_idx,
+            "slice_idx": slice_idx,
+        }
+        self._add_label(result, volume_idx)
+        if self._transform is not None:
+            result = self._transform(result)
+        return result
+    def _add_label(self, result: dict[str, Any], volume_idx: int) -> None:
+        """Add label column to sample dict from label source."""
+        if self._labels is not None and volume_idx in self._labels:
+            label = self._labels[volume_idx]
+            if isinstance(label, (int, float, np.integer, np.floating)):
+                result["label"] = torch.tensor(label)
+            else:
+                result["label"] = label
+    @property
+    def collection_names(self) -> list[str]:
+        """Names of collections being loaded (channel order)."""
+        return self._collection_names
+    @property
+    def volume_shape(self) -> tuple[int, int, int]:
+        """Shape of each volume (X, Y, Z)."""
+        return self._volume_shape
+    @property
+    def n_channels(self) -> int:
+        """Number of channels (collections) in output tensors."""
+        return len(self._collections)

radiobject/ml/datasets/multimodal.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""Multi-modal dataset for loading aligned volumes from multiple VolumeCollections."""
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any, Callable
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+if TYPE_CHECKING:
+    from radiobject.radi_object import RadiObject
+    from radiobject.volume_collection import VolumeCollection
+class MultiModalDataset(Dataset):
+    """Dataset for loading aligned volumes from multiple VolumeCollections."""
+    def __init__(
+        self,
+        radi_object: RadiObject,
+        modalities: list[str],
+        label_column: str | None = None,
+        value_filter: str | None = None,
+        transform: Callable[[dict[str, Any]], dict[str, Any]] | None = None,
+    ):
+        if not modalities:
+            raise ValueError("At least one modality required")
+        self._modalities = modalities
+        self._transform = transform
+        self._radi_object = radi_object
+        self._collections: dict[str, VolumeCollection] = {
+            mod: radi_object.collection(mod) for mod in modalities
+        }
+        first_coll = self._collections[modalities[0]]
+        self._n_volumes = len(first_coll)
+        self._volume_shape = first_coll.shape
+        self._validate_alignment()
+        self._labels: dict[int, Any] | None = None
+        if label_column:
+            self._load_labels(radi_object, label_column, value_filter)
+    def _validate_alignment(self) -> None:
+        """Validate that all modalities have matching subjects."""
+        first_mod = self._modalities[0]
+        first_coll = self._collections[first_mod]
+        first_subjects = set(first_coll.obs_subject_ids)
+        for mod in self._modalities[1:]:
+            coll = self._collections[mod]
+            if len(coll) != self._n_volumes:
+                raise ValueError(
+                    f"Modality '{mod}' has {len(coll)} volumes, expected {self._n_volumes}"
+                )
+            mod_subjects = set(coll.obs_subject_ids)
+            if mod_subjects != first_subjects:
+                missing = first_subjects - mod_subjects
+                extra = mod_subjects - first_subjects
+                raise ValueError(
+                    f"Subject mismatch for modality '{mod}': "
+                    f"missing={list(missing)[:3]}, extra={list(extra)[:3]}"
+                )
+    def _load_labels(
+        self,
+        radi_object: RadiObject,
+        label_column: str,
+        value_filter: str | None,
+    ) -> None:
+        """Load labels from obs_meta."""
+        obs_meta = radi_object.obs_meta.read(value_filter=value_filter)
+        if label_column not in obs_meta.columns:
+            raise ValueError(f"Label column '{label_column}' not found")
+        first_coll = self._collections[self._modalities[0]]
+        obs_subject_ids = first_coll.obs_subject_ids
+        self._labels = {}
+        for idx in range(self._n_volumes):
+            subject_id = obs_subject_ids[idx]
+            match = obs_meta[obs_meta["obs_subject_id"] == subject_id]
+            if len(match) > 0:
+                self._labels[idx] = match[label_column].iloc[0]
+    def __len__(self) -> int:
+        return self._n_volumes
+    def __getitem__(self, idx: int) -> dict[str, torch.Tensor]:
+        volumes = [self._collections[mod].iloc[idx].to_numpy() for mod in self._modalities]
+        stacked = np.stack(volumes, axis=0)
+        first_coll = self._collections[self._modalities[0]]
+        obs_id = first_coll.obs_ids[idx]
+        result: dict[str, Any] = {
+            "image": torch.from_numpy(stacked),
+            "idx": idx,
+            "obs_id": obs_id,
+        }
+        if self._labels is not None and idx in self._labels:
+            label = self._labels[idx]
+            if isinstance(label, (int, float, np.integer, np.floating)):
+                result["label"] = torch.tensor(label)
+            else:
+                result["label"] = label
+        if self._transform is not None:
+            result = self._transform(result)
+        return result
+    @property
+    def modalities(self) -> list[str]:
+        """List of modalities."""
+        return self._modalities
+    @property
+    def volume_shape(self) -> tuple[int, int, int]:
+        """Volume dimensions."""
+        return self._volume_shape

radiobject/ml/datasets/patch_dataset.py ADDED Viewed

@@ -0,0 +1,158 @@
+"""Specialized patch extraction dataset."""
+from __future__ import annotations
+from collections.abc import Callable
+from typing import TYPE_CHECKING, Any
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+if TYPE_CHECKING:
+    from radiobject.volume_collection import VolumeCollection
+class PatchVolumeDataset(Dataset):
+    """Dataset for extracting patches from a single VolumeCollection."""
+    def __init__(
+        self,
+        collection: VolumeCollection,
+        patch_size: tuple[int, int, int],
+        patches_per_volume: int = 1,
+        transform: Callable[[dict[str, Any]], dict[str, Any]] | None = None,
+    ):
+        self._collection = collection
+        self._patch_size = patch_size
+        self._patches_per_volume = patches_per_volume
+        self._transform = transform
+        self._obs_ids = collection.obs_ids
+        self._n_volumes = len(self._obs_ids)
+        self._volume_shape = collection.shape
+        self._length = self._n_volumes * patches_per_volume
+        if self._volume_shape is None:
+            raise ValueError("Collection must have uniform shape for patch extraction")
+        for i, dim in enumerate(patch_size):
+            if dim > self._volume_shape[i]:
+                raise ValueError(
+                    f"Patch dimension {i} ({dim}) exceeds volume dimension ({self._volume_shape[i]})"
+                )
+    def __len__(self) -> int:
+        return self._length
+    def __getitem__(self, idx: int) -> dict[str, torch.Tensor]:
+        volume_idx = idx // self._patches_per_volume
+        patch_idx = idx % self._patches_per_volume
+        rng = np.random.default_rng(seed=idx)
+        max_start = tuple(max(0, self._volume_shape[i] - self._patch_size[i]) for i in range(3))
+        start = tuple(
+            rng.integers(0, max_start[i] + 1) if max_start[i] > 0 else 0 for i in range(3)
+        )
+        vol = self._collection.iloc[volume_idx]
+        data = vol.slice(
+            slice(start[0], start[0] + self._patch_size[0]),
+            slice(start[1], start[1] + self._patch_size[1]),
+            slice(start[2], start[2] + self._patch_size[2]),
+        )
+        result: dict[str, Any] = {
+            "image": torch.from_numpy(data).unsqueeze(0),
+            "idx": volume_idx,
+            "patch_idx": patch_idx,
+            "patch_start": start,
+            "obs_id": self._obs_ids[volume_idx],
+        }
+        if self._transform is not None:
+            result = self._transform(result)
+        return result
+    @property
+    def volume_shape(self) -> tuple[int, int, int]:
+        """Shape of each volume."""
+        return self._volume_shape
+    @property
+    def patch_size(self) -> tuple[int, int, int]:
+        """Patch dimensions."""
+        return self._patch_size
+class GridPatchDataset(Dataset):
+    """Dataset for extracting patches on a regular grid (for inference)."""
+    def __init__(
+        self,
+        collection: VolumeCollection,
+        patch_size: tuple[int, int, int],
+        stride: tuple[int, int, int] | None = None,
+        transform: Callable[[dict[str, Any]], dict[str, Any]] | None = None,
+    ):
+        self._collection = collection
+        self._patch_size = patch_size
+        self._stride = stride or patch_size
+        self._transform = transform
+        self._obs_ids = collection.obs_ids
+        self._n_volumes = len(self._obs_ids)
+        self._volume_shape = collection.shape
+        if self._volume_shape is None:
+            raise ValueError("Collection must have uniform shape for grid patch extraction")
+        self._grid_positions = self._compute_grid_positions()
+        self._patches_per_volume = len(self._grid_positions)
+        self._length = self._n_volumes * self._patches_per_volume
+    def _compute_grid_positions(self) -> list[tuple[int, int, int]]:
+        """Compute grid patch positions for inference."""
+        positions = []
+        for x in range(0, self._volume_shape[0] - self._patch_size[0] + 1, self._stride[0]):
+            for y in range(0, self._volume_shape[1] - self._patch_size[1] + 1, self._stride[1]):
+                for z in range(0, self._volume_shape[2] - self._patch_size[2] + 1, self._stride[2]):
+                    positions.append((x, y, z))
+        if not positions:
+            positions.append((0, 0, 0))
+        return positions
+    def __len__(self) -> int:
+        return self._length
+    def __getitem__(self, idx: int) -> dict[str, torch.Tensor]:
+        volume_idx = idx // self._patches_per_volume
+        patch_idx = idx % self._patches_per_volume
+        start = self._grid_positions[patch_idx]
+        vol = self._collection.iloc[volume_idx]
+        data = vol.slice(
+            slice(start[0], start[0] + self._patch_size[0]),
+            slice(start[1], start[1] + self._patch_size[1]),
+            slice(start[2], start[2] + self._patch_size[2]),
+        )
+        result: dict[str, Any] = {
+            "image": torch.from_numpy(data).unsqueeze(0),
+            "idx": volume_idx,
+            "patch_idx": patch_idx,
+            "patch_start": start,
+            "obs_id": self._obs_ids[volume_idx],
+        }
+        if self._transform is not None:
+            result = self._transform(result)
+        return result
+    @property
+    def grid_positions(self) -> list[tuple[int, int, int]]:
+        """All patch start positions in the grid."""
+        return self._grid_positions