PyPI - dropdrop - Versions diffs - 1.1.0__py3-none-any.whl - Mend

dropdrop 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

dropdrop/__init__.py +16 -0
dropdrop/cache.py +133 -0
dropdrop/cli.py +252 -0
dropdrop/config.py +67 -0
dropdrop/pipeline.py +400 -0
dropdrop/stats.py +299 -0
dropdrop/ui.py +441 -0
dropdrop-1.1.0.dist-info/METADATA +179 -0
dropdrop-1.1.0.dist-info/RECORD +12 -0
dropdrop-1.1.0.dist-info/WHEEL +4 -0
dropdrop-1.1.0.dist-info/entry_points.txt +2 -0
dropdrop-1.1.0.dist-info/licenses/LICENSE +21 -0

dropdrop/pipeline.py ADDED Viewed

@@ -0,0 +1,400 @@
+"""Main droplet and inclusion detection pipeline."""
+import re
+import sys
+from collections import defaultdict
+from pathlib import Path
+import cv2
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+from .cache import CacheManager
+from .config import load_config
+# Required: Cellpose
+try:
+    from cellpose.models import CellposeModel
+except ImportError:
+    print("You need to have cellpose for this pipeline to work!")
+    sys.exit(1)
+class DropletInclusionPipeline:
+    """Main pipeline for droplet and inclusion detection."""
+    def __init__(self, config=None, store_visualizations=False, use_cache=True):
+        """Initialize pipeline with configuration.
+        Args:
+            config: Configuration dict. If None, loads from config.json.
+            store_visualizations: Whether to store visualization data for UI.
+            use_cache: Whether to use caching for expensive computations.
+        """
+        self.config = config if config else load_config()
+        self.results_data = []
+        self.store_visualizations = store_visualizations
+        self.visualization_data = {} if store_visualizations else None
+        self.use_cache = use_cache
+        self.cache = CacheManager(self.config) if use_cache else None
+    def parse_filename(self, filename):
+        """Extract z-stack index and frame index from filename.
+        Files without z-index are treated as single images (z_index=0).
+        """
+        z_match = re.search(r"_z(\d+)_", filename)
+        z_index = int(z_match.group(1)) if z_match else 0
+        f_match = re.search(r"a01f(\d+)d4", filename, re.IGNORECASE)
+        frame_index = int(f_match.group(1)) if f_match else None
+        return z_index, frame_index
+    def load_and_group_images(self, input_dir):
+        """Load images and group by frame index."""
+        input_path = Path(input_dir)
+        # Find all image files
+        extensions = [".tif", ".tiff", ".png", ".jpg", ".jpeg"]
+        image_files = []
+        for ext in extensions:
+            image_files.extend(input_path.glob(f"*{ext}"))
+            image_files.extend(input_path.glob(f"*{ext.upper()}"))
+        # Group by frame
+        frame_groups = defaultdict(list)
+        for filepath in image_files:
+            z_idx, frame_idx = self.parse_filename(filepath.name)
+            if frame_idx is not None:
+                frame_groups[frame_idx].append((z_idx, filepath))
+        # Sort z-stacks within each frame
+        for frame_idx in frame_groups:
+            frame_groups[frame_idx].sort(key=lambda x: x[0])
+        return frame_groups
+    def create_min_projection(self, z_stack_files):
+        """Create minimum intensity projection with CLAHE preprocessing."""
+        images = []
+        for z_idx, filepath in z_stack_files:
+            img = cv2.imread(str(filepath), cv2.IMREAD_ANYDEPTH | cv2.IMREAD_GRAYSCALE)
+            if img is not None:
+                # Convert to 8-bit first
+                if img.dtype == np.uint16:
+                    img = img.astype(np.float32) * 64
+                    img = np.clip(img, 0, 65535)
+                    img = (img / 256).astype(np.uint8)
+                else:
+                    img = np.clip(img, 0, 255).astype(np.uint8)
+                images.append(img)
+        if not images:
+            return None
+        # Create min projection
+        stack = np.stack(images, axis=0)
+        min_proj = np.min(stack, axis=0).astype(np.uint8)
+        # Apply CLAHE to normalize local contrast
+        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
+        min_proj = clahe.apply(min_proj)
+        return min_proj
+    def detect_droplets_cellpose(self, image):
+        """Detect droplets using Cellpose."""
+        model = CellposeModel(gpu=True)
+        masks, flows, styles = model.eval(
+            image,
+            normalize=True,
+            flow_threshold=self.config["cellpose_flow_threshold"],
+            cellprob_threshold=self.config["cellpose_cellprob_threshold"],
+        )
+        return self.masks_to_coordinates(masks)
+    def masks_to_coordinates(self, masks):
+        """Convert Cellpose masks to coordinate format."""
+        coordinate_list = []
+        # Get unique mask IDs (excluding 0 for background)
+        unique_ids = np.unique(masks)[1:]
+        for mask_id in unique_ids:
+            binary_mask = (masks == mask_id).astype(np.uint8)
+            coords = self.mask_to_coordinates(binary_mask)
+            if coords is not None:
+                coordinate_list.append(coords)
+        return coordinate_list
+    def mask_to_coordinates(self, binary_mask):
+        """Convert single binary mask to coordinates."""
+        contours, _ = cv2.findContours(
+            binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+        )
+        if not contours:
+            return None
+        contour = max(contours, key=cv2.contourArea)
+        coords = []
+        for point in contour:
+            coords.extend([str(point[0][0]), str(point[0][1])])
+        return ",".join(coords)
+    def coordinates_to_mask(self, coord_string, image_shape):
+        """Convert coordinate string back to binary mask."""
+        coords = [float(x) for x in coord_string.split(",")]
+        points = np.array(coords).reshape(-1, 2).astype(np.int32)
+        mask = np.zeros(image_shape, dtype=np.uint8)
+        cv2.fillPoly(mask, [points], 255)
+        return mask
+    def erode_mask(self, mask, erosion_pixels):
+        """Erode mask by specified number of pixels."""
+        if erosion_pixels <= 0:
+            return mask
+        kernel_size = 2 * erosion_pixels + 1
+        kernel = cv2.getStructuringElement(
+            cv2.MORPH_ELLIPSE, (kernel_size, kernel_size)
+        )
+        return cv2.erode(mask, kernel, iterations=1)
+    def detect_inclusions_in_droplet(self, image, droplet_mask, store_masked=False):
+        """Detect inclusions within a single droplet using black-hat morphology."""
+        masked_image = cv2.bitwise_and(image, image, mask=droplet_mask)
+        kernel_size = self.config["kernel_size"]
+        if kernel_size % 2 == 0:
+            kernel_size += 1
+        kernel = cv2.getStructuringElement(
+            cv2.MORPH_ELLIPSE, (kernel_size, kernel_size)
+        )
+        blackhat = cv2.morphologyEx(masked_image, cv2.MORPH_BLACKHAT, kernel)
+        _, inclusions = cv2.threshold(
+            blackhat, self.config["tophat_threshold"], 255, cv2.THRESH_BINARY
+        )
+        inclusions = cv2.bitwise_and(inclusions, inclusions, mask=droplet_mask)
+        filtered_inclusions, count = self.filter_inclusions_by_size(inclusions)
+        if store_masked:
+            return filtered_inclusions, count, blackhat
+        return filtered_inclusions, count
+    def filter_inclusions_by_size(self, inclusion_mask):
+        """Filter detected inclusions by size constraints and edge proximity."""
+        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
+            inclusion_mask, connectivity=8
+        )
+        h, w = inclusion_mask.shape
+        edge_buffer = self.config.get("edge_buffer", 5)
+        filtered_mask = np.zeros_like(inclusion_mask)
+        inclusion_count = 0
+        for label in range(1, num_labels):
+            area = stats[label, cv2.CC_STAT_AREA]
+            x = stats[label, cv2.CC_STAT_LEFT]
+            y = stats[label, cv2.CC_STAT_TOP]
+            w_comp = stats[label, cv2.CC_STAT_WIDTH]
+            h_comp = stats[label, cv2.CC_STAT_HEIGHT]
+            # Edge check
+            if (
+                x < edge_buffer
+                or y < edge_buffer
+                or x + w_comp > w - edge_buffer
+                or y + h_comp > h - edge_buffer
+            ):
+                continue
+            # Size check
+            if (
+                self.config["min_inclusion_area"]
+                <= area
+                <= self.config["max_inclusion_area"]
+            ):
+                filtered_mask[labels == label] = 255
+                inclusion_count += 1
+        return filtered_mask, inclusion_count
+    def process_frame(self, frame_idx, min_projection, droplet_coords=None):
+        """Process a single frame for droplets and inclusions."""
+        if self.store_visualizations:
+            frame_viz = {
+                "min_projection": min_projection,
+                "droplet_masks": [],
+                "eroded_masks": [],
+                "inclusion_masks": [],
+                "masked_images": [],
+            }
+        if droplet_coords is None:
+            droplet_coords = self.detect_droplets_cellpose(min_projection)
+        if not droplet_coords:
+            print(f"  Frame {frame_idx}: No droplets detected")
+            if self.store_visualizations:
+                self.visualization_data[frame_idx] = frame_viz
+            return
+        valid_droplet_idx = 0
+        for coords in droplet_coords:
+            droplet_mask = self.coordinates_to_mask(coords, min_projection.shape)
+            contours, _ = cv2.findContours(
+                droplet_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+            )
+            if not contours:
+                continue
+            M = cv2.moments(contours[0])
+            if M["m00"] == 0:
+                continue
+            cx = int(M["m10"] / M["m00"])
+            cy = int(M["m01"] / M["m00"])
+            area = cv2.contourArea(contours[0])
+            diameter = np.sqrt(4 * area / np.pi)
+            if not (
+                self.config["min_droplet_diameter"]
+                <= diameter
+                <= self.config["max_droplet_diameter"]
+            ):
+                continue
+            eroded_mask = self.erode_mask(droplet_mask, self.config["erosion_pixels"])
+            if np.sum(eroded_mask) == 0:
+                continue
+            if self.store_visualizations:
+                inclusion_mask, inclusion_count, blackhat = (
+                    self.detect_inclusions_in_droplet(
+                        min_projection, eroded_mask, store_masked=True
+                    )
+                )
+                frame_viz["masked_images"].append(blackhat)
+            else:
+                inclusion_mask, inclusion_count = self.detect_inclusions_in_droplet(
+                    min_projection, eroded_mask
+                )
+            if self.store_visualizations:
+                frame_viz["droplet_masks"].append({
+                    "mask": droplet_mask,
+                    "center": (cx, cy),
+                    "radius": diameter / 2,
+                    "inclusions": inclusion_count,
+                })
+                frame_viz["eroded_masks"].append(eroded_mask)
+                frame_viz["inclusion_masks"].append(inclusion_mask)
+            self.results_data.append({
+                "frame": frame_idx,
+                "droplet_id": valid_droplet_idx,
+                "center_x": cx,
+                "center_y": cy,
+                "diameter_px": diameter,
+                "diameter_um": diameter * self.config["px_to_um"],
+                "area_px": area,
+                "area_um2": area * (self.config["px_to_um"] ** 2),
+                "inclusions": inclusion_count,
+            })
+            valid_droplet_idx += 1
+        if self.store_visualizations:
+            self.visualization_data[frame_idx] = frame_viz
+        frame_data = [d for d in self.results_data if d["frame"] == frame_idx]
+        total_inclusions = sum(d["inclusions"] for d in frame_data)
+        print(
+            f"  Frame {frame_idx}: {len(frame_data)} valid droplets, "
+            f"{total_inclusions} total inclusions"
+        )
+    def run(self, input_dir, output_dir, frame_limit=None):
+        """Run the complete pipeline."""
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+        print("\nLoading and grouping images...")
+        frame_groups = self.load_and_group_images(input_dir)
+        if not frame_groups:
+            print("ERROR: No valid images found!")
+            return None
+        frame_indices = sorted(frame_groups.keys())
+        if frame_limit and frame_limit > 0:
+            frame_indices = frame_indices[:frame_limit]
+            print(f"Processing limited to first {frame_limit} frames")
+        print(
+            f"Found {len(frame_groups)} frames total, processing {len(frame_indices)} frames\n"
+        )
+        cache_hits = 0
+        for frame_idx in tqdm(frame_indices, desc="Processing frames"):
+            z_stack_files = frame_groups[frame_idx]
+            cache_key_file = z_stack_files[0][1].name if z_stack_files else None
+            if self.cache and cache_key_file and self.cache.is_valid(cache_key_file):
+                cached_data = self.cache.load_frame(cache_key_file)
+                min_proj = cached_data["min_projection"]
+                droplet_coords = cached_data["droplet_coords"]
+                cache_hits += 1
+                self.process_frame(frame_idx, min_proj, droplet_coords)
+            else:
+                min_proj = self.create_min_projection(z_stack_files)
+                if min_proj is None:
+                    continue
+                droplet_coords = self.detect_droplets_cellpose(min_proj)
+                if self.cache and cache_key_file:
+                    self.cache.save_frame(cache_key_file, min_proj, droplet_coords)
+                self.process_frame(frame_idx, min_proj, droplet_coords)
+        if cache_hits > 0:
+            print(f"\nCache: {cache_hits}/{len(frame_indices)} frames loaded from cache")
+        if self.results_data:
+            df = pd.DataFrame(self.results_data)
+            csv_path = output_path / "data.csv"
+            df.to_csv(csv_path, index=False)
+            print(f"\nResults saved to: {csv_path}")
+            self.print_summary(df)
+        else:
+            print("\nNo droplets detected in any frame!")
+        return self.results_data
+    def print_summary(self, df):
+        """Print one-line summary."""
+        print(
+            f"\nDetected {len(df)} droplets with {df['inclusions'].sum()} inclusions "
+            f"({df['inclusions'].mean():.2f} per droplet)"
+        )

dropdrop/stats.py ADDED Viewed

@@ -0,0 +1,299 @@
+"""Statistical analysis for droplet detection results."""
+from datetime import datetime
+from pathlib import Path
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+from scipy import stats
+# Set style for better-looking plots
+sns.set_style("whitegrid")
+plt.rcParams["figure.dpi"] = 100
+plt.rcParams["savefig.dpi"] = 300
+class DropletStatistics:
+    """Statistical analysis for droplet detection."""
+    def __init__(self, results_csv, settings=None):
+        """Initialize with results data and optional settings.
+        Args:
+            results_csv: Path to CSV file with detection results.
+            settings: Dict with 'count', 'dilution', 'poisson' keys.
+        """
+        self.df = pd.read_csv(results_csv)
+        self.settings = settings or {}
+        self.bead_count = self.settings.get("count", 6.5e5)
+        self.dilution = self.settings.get("dilution", 1000)
+        self.use_poisson = self.settings.get("poisson", True)
+    def calculate_poisson(self, median_diameter_um):
+        """Calculate theoretical Poisson distribution."""
+        radius_um = median_diameter_um / 2
+        volume_ml = (4 / 3) * np.pi * (radius_um**3) * 1e-9
+        lambda_val = (self.bead_count / (self.dilution * 2)) * volume_ml
+        max_inc = int(self.df["inclusions"].max()) + 3
+        x_range = np.arange(0, max_inc + 1)
+        theoretical = stats.poisson.pmf(x_range, lambda_val)
+        return x_range, theoretical, lambda_val
+    def plot_size_distribution(self, output_path):
+        """Plot droplet diameter distribution."""
+        fig, ax = plt.subplots(figsize=(8, 5))
+        diameters = self.df["diameter_um"].values
+        ax.hist(diameters, bins=25, color="steelblue", edgecolor="black", alpha=0.7)
+        mean_d = np.mean(diameters)
+        median_d = np.median(diameters)
+        ax.axvline(mean_d, color="red", linestyle="--", label=f"Mean: {mean_d:.1f}")
+        ax.axvline(
+            median_d, color="green", linestyle="--", label=f"Median: {median_d:.1f}"
+        )
+        ax.set_xlabel("Diameter (µm)")
+        ax.set_ylabel("Count")
+        ax.set_title("Droplet Size Distribution")
+        ax.legend()
+        ax.grid(True, alpha=0.3)
+        plt.tight_layout()
+        plt.savefig(output_path / "size_distribution.png", dpi=200)
+        plt.close()
+        return mean_d, median_d
+    def plot_poisson_comparison(self, output_path):
+        """Plot detected vs theoretical Poisson with chi-squared test."""
+        fig, ax = plt.subplots(figsize=(10, 6))
+        median_d = self.df["diameter_um"].median()
+        x_range, theoretical, lambda_val = self.calculate_poisson(median_d)
+        actual = self.df["inclusions"].value_counts().sort_index()
+        n_droplets = len(self.df)
+        chi2, p_value = self.perform_chi_squared(actual, theoretical, n_droplets)
+        detected_pct = []
+        theoretical_pct = theoretical * 100
+        for i in x_range:
+            if i in actual.index:
+                detected_pct.append(actual[i] / n_droplets * 100)
+            else:
+                detected_pct.append(0)
+        x = np.arange(len(x_range))
+        width = 0.35
+        ax.bar(
+            x - width / 2,
+            detected_pct,
+            width,
+            label="Detected",
+            color="royalblue",
+            alpha=0.8,
+        )
+        ax.bar(
+            x + width / 2,
+            theoretical_pct[: len(x)],
+            width,
+            label=f"Poisson (λ={lambda_val:.3f})",
+            color="coral",
+            alpha=0.8,
+        )
+        if p_value is not None:
+            result_text = f"χ² = {chi2:.2f}, p = {p_value:.4f}"
+            if p_value > 0.05:
+                result_text += "\n✓ Follows Poisson"
+            else:
+                result_text += "\n✗ Deviates from Poisson"
+            ax.text(
+                0.98,
+                0.85,
+                result_text,
+                transform=ax.transAxes,
+                ha="right",
+                va="top",
+                fontsize=10,
+                bbox=dict(boxstyle="round", facecolor="wheat", alpha=0.8),
+            )
+        ax.set_xlabel("Inclusions per Droplet")
+        ax.set_ylabel("Percentage (%)")
+        ax.set_title("Inclusion Distribution: Detected vs Theoretical")
+        ax.set_xticks(x)
+        ax.set_xticklabels(x_range)
+        ax.legend()
+        ax.grid(True, alpha=0.3, axis="y")
+        plt.tight_layout()
+        plt.savefig(output_path / "poisson_comparison.png", dpi=200)
+        plt.close()
+        return lambda_val, chi2, p_value
+    def perform_chi_squared(self, observed_counts, theoretical_probs, n_total):
+        """Perform chi-squared goodness-of-fit test."""
+        observed = []
+        expected = []
+        for i in observed_counts.index:
+            if i < len(theoretical_probs):
+                obs = observed_counts[i]
+                exp = theoretical_probs[i] * n_total
+                observed.append(obs)
+                expected.append(exp)
+        observed = np.array(observed)
+        expected = np.array(expected)
+        mask = expected >= 5
+        if mask.sum() < 2:
+            return None, None
+        observed_filtered = observed[mask]
+        expected_filtered = expected[mask]
+        expected_filtered = expected_filtered * (
+            observed_filtered.sum() / expected_filtered.sum()
+        )
+        chi2, p_value = stats.chisquare(observed_filtered, expected_filtered)
+        return chi2, p_value
+    def run_analysis(self, output_dir):
+        """Run analysis and print results."""
+        output_path = Path(output_dir)
+        output_path.mkdir(exist_ok=True)
+        mean_d, median_d = self.plot_size_distribution(output_path)
+        lambda_val, chi2, p_value = None, None, None
+        if self.use_poisson:
+            lambda_val, chi2, p_value = self.plot_poisson_comparison(output_path)
+        total_droplets = len(self.df)
+        total_inclusions = int(self.df["inclusions"].sum())
+        with_inclusions = int((self.df["inclusions"] > 0).sum())
+        std_d = self.df["diameter_um"].std()
+        self._write_summary(
+            output_path,
+            mean_d=mean_d,
+            median_d=median_d,
+            std_d=std_d,
+            total_droplets=total_droplets,
+            total_inclusions=total_inclusions,
+            with_inclusions=with_inclusions,
+            lambda_val=lambda_val,
+            chi2=chi2,
+            p_value=p_value,
+        )
+        print("\nSTATISTICAL SUMMARY")
+        print("-" * 40)
+        print(f"Droplets: {total_droplets}")
+        print(f"Mean diameter: {mean_d:.1f} µm")
+        print(
+            f"Inclusions: {total_inclusions} total, {total_inclusions / total_droplets:.2f} per droplet"
+        )
+        print(
+            f"With inclusions: {with_inclusions} ({with_inclusions / total_droplets * 100:.1f}%)"
+        )
+        if self.use_poisson and lambda_val is not None:
+            print(f"Theoretical λ: {lambda_val:.3f}")
+            if p_value is not None:
+                print(f"\nChi-squared test:")
+                print(f"  χ² = {chi2:.2f}, p = {p_value:.4f}")
+                if p_value > 0.05:
+                    print("  → Distribution follows Poisson (p > 0.05)")
+                else:
+                    print("  → Distribution deviates from Poisson (p < 0.05)")
+        print(f"\nOutput saved to: {output_path}")
+    def _write_summary(self, output_path, **stats):
+        """Write summary.txt file with all settings and statistics."""
+        project_name = output_path.name
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        input_dir = self.settings.get("input_dir", "N/A")
+        total_frames = self.df["frame"].nunique()
+        lines = [
+            "=" * 80,
+            "DROPDROP ANALYSIS SUMMARY".center(80),
+            "=" * 80,
+            "",
+            f"Project: {project_name}",
+            f"Date: {timestamp}",
+            f"Input: {input_dir} ({total_frames} frames)",
+            "",
+            "SETTINGS",
+            "-" * 40,
+            f"Poisson Analysis: {'ON' if self.use_poisson else 'OFF'}",
+        ]
+        if self.use_poisson:
+            lines.extend([
+                f"Stock Concentration: {self.bead_count:.2e} beads/uL",
+                f"Dilution Factor: {self.dilution}x",
+            ])
+        lines.extend([
+            "",
+            "RESULTS",
+            "-" * 40,
+            f"Total Frames Processed: {total_frames}",
+            f"Total Droplets Detected: {stats['total_droplets']:,}",
+            f"Total Beads Detected: {stats['total_inclusions']:,}",
+            "",
+            "Droplet Statistics:",
+            f"  Mean Diameter: {stats['mean_d']:.1f} um",
+            f"  Median Diameter: {stats['median_d']:.1f} um",
+            f"  Std Deviation: {stats['std_d']:.1f} um",
+            "",
+            "Bead Statistics:",
+            f"  Mean per Droplet: {stats['total_inclusions'] / stats['total_droplets']:.2f}",
+            f"  Droplets with Beads: {stats['with_inclusions']} ({stats['with_inclusions'] / stats['total_droplets'] * 100:.1f}%)",
+        ])
+        if self.use_poisson and stats.get("lambda_val") is not None:
+            lines.extend([
+                "",
+                "POISSON ANALYSIS",
+                "-" * 40,
+                f"Theoretical Lambda: {stats['lambda_val']:.3f}",
+            ])
+            if stats.get("p_value") is not None:
+                result = "FOLLOWS" if stats["p_value"] > 0.05 else "DEVIATES FROM"
+                lines.extend([
+                    f"Chi-squared: {stats['chi2']:.2f}",
+                    f"P-value: {stats['p_value']:.4f}",
+                    f"Result: Distribution {result} Poisson (p {'>' if stats['p_value'] > 0.05 else '<'} 0.05)",
+                ])
+        lines.extend([
+            "",
+            "=" * 80,
+            "Generated by DropDrop",
+            "=" * 80,
+        ])
+        summary_path = output_path / "summary.txt"
+        with open(summary_path, "w") as f:
+            f.write("\n".join(lines))