PyPI - fastMONAI - Versions diffs - 0.5.3__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

fastMONAI 0.5.3py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

fastMONAI/__init__.py +1 -1
fastMONAI/_modidx.py +224 -28
fastMONAI/dataset_info.py +329 -47
fastMONAI/external_data.py +1 -1
fastMONAI/utils.py +394 -22
fastMONAI/vision_all.py +3 -2
fastMONAI/vision_augmentation.py +264 -28
fastMONAI/vision_core.py +29 -132
fastMONAI/vision_data.py +6 -6
fastMONAI/vision_inference.py +35 -9
fastMONAI/vision_metrics.py +420 -19
fastMONAI/vision_patch.py +1259 -0
fastMONAI/vision_plot.py +90 -1
{fastmonai-0.5.3.dist-info → fastmonai-0.6.0.dist-info}/METADATA +5 -5
fastmonai-0.6.0.dist-info/RECORD +21 -0
{fastmonai-0.5.3.dist-info → fastmonai-0.6.0.dist-info}/WHEEL +1 -1
fastmonai-0.5.3.dist-info/RECORD +0 -20
{fastmonai-0.5.3.dist-info → fastmonai-0.6.0.dist-info}/entry_points.txt +0 -0
{fastmonai-0.5.3.dist-info → fastmonai-0.6.0.dist-info}/licenses/LICENSE +0 -0
{fastmonai-0.5.3.dist-info → fastmonai-0.6.0.dist-info}/top_level.txt +0 -0

fastMONAI/dataset_info.py CHANGED Viewed

@@ -1,10 +1,11 @@
 # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/08_dataset_info.ipynb.
 # %% auto 0
-__all__ = ['MedDataset', 'get_class_weights']
+__all__ = ['MedDataset', 'suggest_patch_size', 'get_class_weights']
 # %% ../nbs/08_dataset_info.ipynb 2
 from .vision_core import *
+from .vision_plot import find_max_slice
 from sklearn.utils.class_weight import compute_class_weight
 from concurrent.futures import ThreadPoolExecutor
@@ -12,29 +13,37 @@ import pandas as pd
 import numpy as np
 import torch
 import glob
+import matplotlib.pyplot as plt
+# %% ../nbs/08_dataset_info.ipynb 3
+import warnings
-# %% ../nbs/08_dataset_info.ipynb 4
 class MedDataset:
     """A class to extract and present information about the dataset."""
-    def __init__(self, path=None, postfix: str = '', img_list: list = None,
-                 reorder: bool = False, dtype: (MedImage, MedMask) = MedImage,
-                 max_workers: int = 1):
+    def __init__(self, dataframe=None, image_col:str=None, mask_col:str="mask_path",
+                 path=None, img_list=None, postfix:str='', apply_reorder:bool=True,
+                 dtype:(MedImage, MedMask)=MedImage, max_workers:int=1):
         """Constructs MedDataset object.
         Args:
-            path (str, optional): Path to the image folder.
-            postfix (str, optional): Specify the file type if there are different files in the folder.
-            img_list (List[str], optional): Alternatively, pass in a list with image paths.
-            reorder (bool, optional): Whether to reorder the data to be closest to canonical (RAS+) orientation.
-            dtype (Union[MedImage, MedMask], optional): Load data as datatype. Default is MedImage.
-            max_workers (int, optional): The number of worker threads. Default is 1.
+            dataframe: DataFrame containing image paths.
+            image_col: Column name for image paths (used for visualization).
+            mask_col: Column name for mask/label paths when using dataframe mode.
+            path: Directory path containing images.
+            img_list: List of image file paths to analyze.
+            postfix: File postfix filter when using path mode.
+            apply_reorder: Whether to reorder images to RAS+ orientation.
+            dtype: MedImage for images or MedMask for segmentation masks.
+            max_workers: Number of parallel workers for processing.
         """
+        self.input_df = dataframe
+        self.image_col = image_col
+        self.mask_col = mask_col
         self.path = path
-        self.postfix = postfix
         self.img_list = img_list
-        self.reorder = reorder
+        self.postfix = postfix
+        self.apply_reorder = apply_reorder
         self.dtype = dtype
         self.max_workers = max_workers
         self.df = self._create_data_frame()
@@ -42,70 +51,343 @@ class MedDataset:
     def _create_data_frame(self):
         """Private method that returns a dataframe with information about the dataset."""
-        if self.path:
-            self.img_list = glob.glob(f'{self.path}/*{self.postfix}*')
-            if not self.img_list: print('Could not find images. Check the image path')
+        # Handle img_list (simple list of paths)
+        if self.img_list is not None:
+            file_list = self.img_list
+        # Handle path-based initialization
+        elif self.path:
+            file_list = glob.glob(f'{self.path}/*{self.postfix}*')
+            if not file_list:
+                print('Could not find images. Check the image path')
+                return pd.DataFrame()
+        # Handle dataframe-based initialization
+        elif self.input_df is not None and self.mask_col in self.input_df.columns:
+            file_list = self.input_df[self.mask_col].tolist()
+        else:
+            print('Error: Must provide path, img_list, or dataframe with mask_col')
+            return pd.DataFrame()
+        # Process images to extract metadata
         with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
-            data_info_dict = list(executor.map(self._get_data_info, self.img_list))
+            data_info_dict = list(executor.map(self._get_data_info, file_list))
         df = pd.DataFrame(data_info_dict)
-        if df.orientation.nunique() > 1:
-            print('The volumes in this dataset have different orientations. '
-                  'Recommended to pass in the argument reorder=True when creating a MedDataset object for this dataset')
+        if len(df) > 0 and df.orientation.nunique() > 1 and not self.apply_reorder:
+            raise ValueError(
+                'Mixed orientations detected in dataset. '
+                'Please recreate MedDataset with apply_reorder=True to get correct resample values: '
+                'MedDataset(..., apply_reorder=True)'
+            )
         return df
     def summary(self):
         """Summary DataFrame of the dataset with example path for similar data."""
         columns = ['dim_0', 'dim_1', 'dim_2', 'voxel_0', 'voxel_1', 'voxel_2', 'orientation']
         return self.df.groupby(columns, as_index=False).agg(
             example_path=('path', 'min'), total=('path', 'size')
         ).sort_values('total', ascending=False)
-    def suggestion(self):
-        """Voxel value that appears most often in dim_0, dim_1 and dim_2, and whether the data should be reoriented."""
-        resample = [float(self.df.voxel_0.mode()[0]), float(self.df.voxel_1.mode()[0]), float(self.df.voxel_2.mode()[0])]
-        return resample, self.reorder
+    def get_suggestion(self, include_patch_size: bool = False):
+        """Returns suggested preprocessing parameters as a dictionary.
+        The returned target_spacing is derived from the mode (most common value)
+        of voxel spacings in the dataset.
+        Note:
+            apply_reorder is NOT included in the return value because it is not
+            data-derived. Access dataset.apply_reorder directly if needed.
+        Args:
+            include_patch_size: If True, includes suggested patch_size for
+                patch-based training. Requires vision_patch module.
+        Returns:
+            dict: {'target_spacing': [voxel_0, voxel_1, voxel_2]}
+                  If include_patch_size=True, also includes 'patch_size': [dim_0, dim_1, dim_2]
+        """
+        target_spacing = [float(self.df.voxel_0.mode()[0]), float(self.df.voxel_1.mode()[0]), float(self.df.voxel_2.mode()[0])]
+        result = {'target_spacing': target_spacing}
+        if include_patch_size:
+            result['patch_size'] = suggest_patch_size(self)
+        return result
     def _get_data_info(self, fn: str):
         """Private method to collect information about an image file."""
-        _, o, _ = med_img_reader(fn, reorder=self.reorder, only_tensor=False, dtype=self.dtype)
+        try:
+            _, o, _ = med_img_reader(fn, apply_reorder=self.apply_reorder, only_tensor=False, dtype=self.dtype)
-        info_dict = {'path': fn, 'dim_0': o.shape[1], 'dim_1': o.shape[2], 'dim_2': o.shape[3],
-                     'voxel_0': round(o.spacing[0], 4), 'voxel_1': round(o.spacing[1], 4), 'voxel_2': round(o.spacing[2], 4),
-                     'orientation': f'{"".join(o.orientation)}+'}
+            info_dict = {'path': fn, 'dim_0': o.shape[1], 'dim_1': o.shape[2], 'dim_2': o.shape[3],
+                         'voxel_0': round(o.spacing[0], 4), 'voxel_1': round(o.spacing[1], 4), 'voxel_2': round(o.spacing[2], 4),
+                         'orientation': f'{"".join(o.orientation)}+'}
-        if self.dtype is MedMask:
-            mask_labels_dict = o.count_labels()
-            mask_labels_dict = {f'voxel_count_{int(key)}': val for key, val in mask_labels_dict.items()}
-            info_dict.update(mask_labels_dict)
+            if self.dtype is MedMask:
+                # Calculate voxel volume in mm³
+                voxel_volume = o.spacing[0] * o.spacing[1] * o.spacing[2]
-        return info_dict
+                # Get voxel counts for each label
+                mask_labels_dict = o.count_labels()
-    def get_largest_img_size(self, resample: list = None) -> list:
-        """Get the largest image size in the dataset."""
-        dims = None
+                # Calculate volumes for each label > 0 (skip background)
+                for key, voxel_count in mask_labels_dict.items():
+                    label_int = int(key)
+                    if label_int > 0 and voxel_count > 0:  # Skip background (label 0)
+                        volume_mm3 = voxel_count * voxel_volume
+                        info_dict[f'label_{label_int}_volume_mm3'] = round(volume_mm3, 4)
-        if resample is not None:
+            return info_dict
+        except Exception as e:
+            print(f"Warning: Failed to process {fn}: {e}")
+            return {'path': fn, 'error': str(e)}
+    def calculate_target_size(self, target_spacing: list = None) -> list:
+        """Calculate the target image size for the dataset.
+        .. deprecated::
+            Use `get_size_statistics(target_spacing)['max']` instead for consistency
+            with other size statistics methods.
+        Args:
+            target_spacing: If provided, calculates size after resampling to this spacing.
+                           If None, returns original dimensions.
+        Returns:
+            list: [dim_0, dim_1, dim_2] largest dimensions in dataset.
+        """
+        warnings.warn(
+            "calculate_target_size() is deprecated. "
+            "Use get_size_statistics(target_spacing)['max'] instead.",
+            DeprecationWarning,
+            stacklevel=2
+        )
+        if target_spacing is not None:
             org_voxels = self.df[["voxel_0", "voxel_1", 'voxel_2']].values
             org_dims = self.df[["dim_0", "dim_1", 'dim_2']].values
-            ratio = org_voxels/resample
+            ratio = org_voxels/target_spacing
             new_dims = (org_dims * ratio).T
-            dims = [float(new_dims[0].max().round()), float(new_dims[1].max().round()), float(new_dims[2].max().round())]
+            # Use floor() to match TorchIO's Resample dimension calculation
+            dims = [float(np.floor(new_dims[0].max())), float(np.floor(new_dims[1].max())), float(np.floor(new_dims[2].max()))]
         else:
             dims = [float(self.df.dim_0.max()), float(self.df.dim_1.max()), float(self.df.dim_2.max())]
         return dims
-# %% ../nbs/08_dataset_info.ipynb 5
+    def get_size_statistics(self, target_spacing: list = None) -> dict:
+        """Calculate comprehensive size statistics for the dataset.
+        Args:
+            target_spacing: If provided, calculates statistics after
+                           simulating resampling to this spacing.
+        Returns:
+            dict with keys: 'median', 'min', 'max', 'std', 'percentile_10', 'percentile_90'
+                  Each value is a list [dim_0, dim_1, dim_2].
+        """
+        if len(self.df) == 0:
+            raise ValueError("Dataset is empty - cannot calculate statistics")
+        if target_spacing is not None:
+            # Simulate resampled dimensions
+            org_voxels = self.df[["voxel_0", "voxel_1", "voxel_2"]].values
+            org_dims = self.df[["dim_0", "dim_1", "dim_2"]].values
+            ratio = org_voxels / np.array(target_spacing)
+            dims = np.floor(org_dims * ratio)
+        else:
+            dims = self.df[["dim_0", "dim_1", "dim_2"]].values
+        return {
+            'median': [float(np.median(dims[:, i])) for i in range(3)],
+            'min': [float(np.min(dims[:, i])) for i in range(3)],
+            'max': [float(np.max(dims[:, i])) for i in range(3)],
+            'std': [float(np.std(dims[:, i])) for i in range(3)],
+            'percentile_10': [float(np.percentile(dims[:, i], 10)) for i in range(3)],
+            'percentile_90': [float(np.percentile(dims[:, i], 90)) for i in range(3)],
+        }
+    def get_volume_summary(self):
+        """Returns DataFrame with volume statistics for each label.
+        Returns:
+            DataFrame with columns: label, count, mean_mm3, median_mm3, min_mm3, max_mm3
+            Returns None if no volume columns found (dtype was not MedMask).
+        """
+        volume_cols = [col for col in self.df.columns if col.endswith('_volume_mm3')]
+        if not volume_cols:
+            return None
+        summary_data = []
+        for col in volume_cols:
+            non_zero = self.df[self.df[col] > 0][col]
+            if len(non_zero) > 0:
+                summary_data.append({
+                    'label': col.replace('_volume_mm3', ''),
+                    'count': len(non_zero),
+                    'mean_mm3': non_zero.mean(),
+                    'median_mm3': non_zero.median(),
+                    'min_mm3': non_zero.min(),
+                    'max_mm3': non_zero.max()
+                })
+        return pd.DataFrame(summary_data) if summary_data else None
+    def _visualize_single_case(self, img_path, mask_path, case_id, anatomical_plane=2, cmap='hot', figsize=(12, 5)):
+        """Helper method to visualize a single case."""
+        try:
+            # Create MedImage and MedMask with current preprocessing settings
+            suggestion = self.get_suggestion()
+            MedBase.item_preprocessing(target_spacing=suggestion['target_spacing'], apply_reorder=self.apply_reorder)
+            img = MedImage.create(img_path)
+            mask = MedMask.create(mask_path)
+            # Find optimal slice using explicit function
+            mask_data = mask.numpy()[0]  # Remove channel dimension
+            optimal_slice = find_max_slice(mask_data, anatomical_plane)
+            # Create subplot
+            fig, axes = plt.subplots(1, 2, figsize=figsize)
+            # Show image
+            img.show(ctx=axes[0], anatomical_plane=anatomical_plane, slice_index=optimal_slice)
+            axes[0].set_title(f"{case_id} - Image (slice {optimal_slice})")
+            # Show overlay
+            img.show(ctx=axes[1], anatomical_plane=anatomical_plane, slice_index=optimal_slice)
+            mask.show(ctx=axes[1], anatomical_plane=anatomical_plane, slice_index=optimal_slice,
+                     alpha=0.3, cmap=cmap)
+            axes[1].set_title(f"{case_id} - Overlay (slice {optimal_slice})")
+            # Adjust spacing to bring plots closer
+            plt.subplots_adjust(wspace=0.1)
+            plt.tight_layout()
+            plt.show()
+        except Exception as e:
+            print(f"Failed to visualize case {case_id}: {e}")
+    def visualize_cases(self, n_cases=4, anatomical_plane=2, cmap='hot', figsize=(12, 5)):
+        """Visualize cases from the dataset.
+        Args:
+            n_cases: Number of cases to show.
+            anatomical_plane: 0=sagittal, 1=coronal, 2=axial
+            cmap: Colormap for mask overlay
+            figsize: Figure size for each case
+        """
+        if self.input_df is None:
+            print("Error: No dataframe provided. Cannot visualize cases.")
+            return
+        if self.image_col is None:
+            print("Error: No image_col specified. Cannot visualize cases.")
+            return
+        # Check if required columns exist
+        if self.image_col not in self.input_df.columns:
+            print(f"Error: Column '{self.image_col}' not found in dataframe.")
+            return
+        if self.mask_col not in self.input_df.columns:
+            print(f"Error: Column '{self.mask_col}' not found in dataframe.")
+            return
+        for idx in range(min(n_cases, len(self.input_df))):
+            row = self.input_df.iloc[idx]
+            case_id = row.get('case_id', f'Case_{idx}')  # Fallback if no case_id
+            img_path = row[self.image_col]
+            mask_path = row[self.mask_col]
+            self._visualize_single_case(img_path, mask_path, case_id, anatomical_plane, cmap, figsize)
+            print("-" * 60)
+# %% ../nbs/08_dataset_info.ipynb 4
+def suggest_patch_size(
+    dataset: MedDataset,
+    target_spacing: list = None,
+    min_patch_size: list = None,
+    max_patch_size: list = None,
+    divisor: int = 16
+) -> list:
+    """Suggest optimal patch size based on median image dimensions.
+    Algorithm:
+    1. Use median shape for robustness to outliers
+    2. Round down to nearest multiple of divisor (16 for 4+ UNet pooling layers)
+    3. Clamp to [min_patch_size, max_patch_size]
+    Args:
+        dataset: MedDataset instance with analyzed images.
+        target_spacing: Target voxel spacing [x, y, z]. If None, uses
+            dataset.get_suggestion()['target_spacing'].
+        min_patch_size: Minimum per dimension. Default [32, 32, 32].
+        max_patch_size: Maximum per dimension. Default [256, 256, 256].
+        divisor: Ensure divisibility (default 16 for UNet compatibility).
+    Returns:
+        list: [patch_dim_0, patch_dim_1, patch_dim_2]
+    Example:
+        >>> from fastMONAI.dataset_info import MedDataset
+        >>> dataset = MedDataset(dataframe=df, mask_col='mask_path', dtype=MedMask)
+        >>>
+        >>> # Use recommended spacing
+        >>> patch_size = suggest_patch_size(dataset)
+        >>>
+        >>> # Use custom spacing
+        >>> patch_size = suggest_patch_size(dataset, target_spacing=[1.0, 1.0, 2.0])
+    """
+    # Defaults
+    min_patch_size = min_patch_size or [32, 32, 32]
+    max_patch_size = max_patch_size or [256, 256, 256]
+    # Use explicit spacing or get from dataset suggestion
+    if target_spacing is None:
+        suggestion = dataset.get_suggestion()
+        target_spacing = suggestion['target_spacing']
+    # Get size statistics (resampled to target_spacing)
+    stats = dataset.get_size_statistics(target_spacing)
+    median_shape = stats['median']
+    # Handle single-image edge case
+    if len(dataset.df) == 1:
+        warnings.warn("Single image dataset - using image dimensions directly")
+    # Step 1: Round down to nearest divisor
+    def round_to_divisor(val, div):
+        """Round down to nearest multiple of divisor."""
+        return max(div, int(val // div) * div)
+    patch_size = [round_to_divisor(dim, divisor) for dim in median_shape]
+    # Step 2: Clamp to bounds
+    patch_size = [
+        max(min_p, min(max_p, p))
+        for p, min_p, max_p in zip(patch_size, min_patch_size, max_patch_size)
+    ]
+    # Edge case: image smaller than suggested patch
+    for i, (p, median_dim) in enumerate(zip(patch_size, median_shape)):
+        if median_dim < p:
+            warnings.warn(
+                f"Median dimension {i} ({median_dim:.0f}) smaller than suggested "
+                f"patch_size ({p}). Images will require padding."
+            )
+    return patch_size
+# %% ../nbs/08_dataset_info.ipynb 6
 def get_class_weights(labels: (np.array, list), class_weight: str = 'balanced') -> torch.Tensor:
     """Calculates and returns the class weights.

fastMONAI/external_data.py CHANGED Viewed

@@ -94,7 +94,7 @@ def download_ixi_data(path: (str, Path) = '../data') -> Path:
         if len(list(img_path.iterdir())) >= 581:  # 581 imgs in the IXI dataset
             is_extracted = True
             print(f"Images already downloaded and extracted to {img_path}")
-    except:
+    except (FileNotFoundError, StopIteration, OSError):
         is_extracted = False
     if not is_extracted:

fastMONAI 0.5.3__py3-none-any.whl → 0.6.0__py3-none-any.whl

fastMONAI 0.5.3py3-none-any.whl → 0.6.0py3-none-any.whl