dropdrop 1.1.0__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dropdrop
3
- Version: 1.1.0
3
+ Version: 1.2.0
4
4
  Summary: Python pipeline script for detecting droplets with beads and other inclusions via cellpose
5
5
  License-Expression: MIT
6
6
  License-File: LICENSE
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dropdrop"
3
- version = "1.1.0"
3
+ version = "1.2.0"
4
4
  description = """Python pipeline script for detecting droplets with beads and
5
5
  other inclusions via cellpose"""
6
6
  readme = "README.md"
@@ -229,8 +229,22 @@ def main():
229
229
  # Always generate statistics (after any interactive corrections)
230
230
  print("\nGenerating statistical analysis...")
231
231
  csv_path = output_dir / "data.csv"
232
+
233
+ # Extract sample frames for report (always available from pipeline)
234
+ sample_frames = None
235
+ if pipeline.sample_frames:
236
+ sample_frames = []
237
+ for idx in sorted(pipeline.sample_frames.keys()):
238
+ viz = pipeline.sample_frames[idx]
239
+ sample_frames.append({
240
+ "frame_idx": idx,
241
+ "image": viz["min_projection"],
242
+ "droplet_masks": viz.get("droplet_masks", []),
243
+ "inclusion_masks": viz.get("inclusion_masks", []),
244
+ })
245
+
232
246
  stats_module = DropletStatistics(csv_path, settings)
233
- stats_module.run_analysis(str(output_dir))
247
+ stats_module.run_analysis(str(output_dir), sample_frames)
234
248
 
235
249
  # Launch viewer if requested (no editing, just viewing)
236
250
  if args.view and pipeline.visualization_data:
@@ -13,31 +13,28 @@ from tqdm import tqdm
13
13
  from .cache import CacheManager
14
14
  from .config import load_config
15
15
 
16
- # Required: Cellpose
17
- try:
18
- from cellpose.models import CellposeModel
19
- except ImportError:
20
- print("You need to have cellpose for this pipeline to work!")
21
- sys.exit(1)
22
-
23
16
 
24
17
  class DropletInclusionPipeline:
25
18
  """Main pipeline for droplet and inclusion detection."""
26
19
 
27
- def __init__(self, config=None, store_visualizations=False, use_cache=True):
20
+ def __init__(self, config=None, store_visualizations=False, use_cache=True, sample_count=3):
28
21
  """Initialize pipeline with configuration.
29
22
 
30
23
  Args:
31
24
  config: Configuration dict. If None, loads from config.json.
32
25
  store_visualizations: Whether to store visualization data for UI.
33
26
  use_cache: Whether to use caching for expensive computations.
27
+ sample_count: Number of sample frames to store for report (default 3).
34
28
  """
35
29
  self.config = config if config else load_config()
36
30
  self.results_data = []
37
31
  self.store_visualizations = store_visualizations
38
32
  self.visualization_data = {} if store_visualizations else None
33
+ self.sample_count = sample_count
34
+ self.sample_frames = {} # Always store a few samples for report
39
35
  self.use_cache = use_cache
40
36
  self.cache = CacheManager(self.config) if use_cache else None
37
+ self._cellpose_model = None
41
38
 
42
39
  def parse_filename(self, filename):
43
40
  """Extract z-stack index and frame index from filename.
@@ -107,9 +104,17 @@ class DropletInclusionPipeline:
107
104
 
108
105
  def detect_droplets_cellpose(self, image):
109
106
  """Detect droplets using Cellpose."""
110
- model = CellposeModel(gpu=True)
111
-
112
- masks, flows, styles = model.eval(
107
+ # Lazy import and model caching
108
+ if self._cellpose_model is None:
109
+ try:
110
+ from cellpose.models import CellposeModel
111
+ except ImportError:
112
+ print("ERROR: Cellpose is required for droplet detection.")
113
+ print("Install with: pip install cellpose")
114
+ sys.exit(1)
115
+ self._cellpose_model = CellposeModel(gpu=True)
116
+
117
+ masks, flows, styles = self._cellpose_model.eval(
113
118
  image,
114
119
  normalize=True,
115
120
  flow_threshold=self.config["cellpose_flow_threshold"],
@@ -237,7 +242,11 @@ class DropletInclusionPipeline:
237
242
 
238
243
  def process_frame(self, frame_idx, min_projection, droplet_coords=None):
239
244
  """Process a single frame for droplets and inclusions."""
240
- if self.store_visualizations:
245
+ # Determine if we need to store viz data (for UI or sample)
246
+ is_sample = hasattr(self, "_sample_indices") and frame_idx in self._sample_indices
247
+ store_viz = self.store_visualizations or is_sample
248
+
249
+ if store_viz:
241
250
  frame_viz = {
242
251
  "min_projection": min_projection,
243
252
  "droplet_masks": [],
@@ -251,8 +260,11 @@ class DropletInclusionPipeline:
251
260
 
252
261
  if not droplet_coords:
253
262
  print(f" Frame {frame_idx}: No droplets detected")
254
- if self.store_visualizations:
255
- self.visualization_data[frame_idx] = frame_viz
263
+ if store_viz:
264
+ if self.store_visualizations:
265
+ self.visualization_data[frame_idx] = frame_viz
266
+ if is_sample:
267
+ self.sample_frames[frame_idx] = frame_viz
256
268
  return
257
269
 
258
270
  valid_droplet_idx = 0
@@ -287,7 +299,7 @@ class DropletInclusionPipeline:
287
299
  if np.sum(eroded_mask) == 0:
288
300
  continue
289
301
 
290
- if self.store_visualizations:
302
+ if store_viz:
291
303
  inclusion_mask, inclusion_count, blackhat = (
292
304
  self.detect_inclusions_in_droplet(
293
305
  min_projection, eroded_mask, store_masked=True
@@ -299,7 +311,7 @@ class DropletInclusionPipeline:
299
311
  min_projection, eroded_mask
300
312
  )
301
313
 
302
- if self.store_visualizations:
314
+ if store_viz:
303
315
  frame_viz["droplet_masks"].append({
304
316
  "mask": droplet_mask,
305
317
  "center": (cx, cy),
@@ -323,8 +335,11 @@ class DropletInclusionPipeline:
323
335
 
324
336
  valid_droplet_idx += 1
325
337
 
326
- if self.store_visualizations:
327
- self.visualization_data[frame_idx] = frame_viz
338
+ if store_viz:
339
+ if self.store_visualizations:
340
+ self.visualization_data[frame_idx] = frame_viz
341
+ if is_sample:
342
+ self.sample_frames[frame_idx] = frame_viz
328
343
 
329
344
  frame_data = [d for d in self.results_data if d["frame"] == frame_idx]
330
345
  total_inclusions = sum(d["inclusions"] for d in frame_data)
@@ -354,6 +369,11 @@ class DropletInclusionPipeline:
354
369
  f"Found {len(frame_groups)} frames total, processing {len(frame_indices)} frames\n"
355
370
  )
356
371
 
372
+ # Select sample frames for report (random subset)
373
+ import random
374
+ n_samples = min(self.sample_count, len(frame_indices))
375
+ self._sample_indices = set(random.sample(frame_indices, n_samples))
376
+
357
377
  cache_hits = 0
358
378
  for frame_idx in tqdm(frame_indices, desc="Processing frames"):
359
379
  z_stack_files = frame_groups[frame_idx]
@@ -3,6 +3,7 @@
3
3
  from datetime import datetime
4
4
  from pathlib import Path
5
5
 
6
+ import cv2
6
7
  import matplotlib.pyplot as plt
7
8
  import numpy as np
8
9
  import pandas as pd
@@ -114,11 +115,11 @@ class DropletStatistics:
114
115
  )
115
116
 
116
117
  if p_value is not None:
117
- result_text = f"χ² = {chi2:.2f}, p = {p_value:.4f}"
118
+ result_text = f"X2 = {chi2:.2f}, p = {p_value:.4f}"
118
119
  if p_value > 0.05:
119
- result_text += "\n✓ Follows Poisson"
120
+ result_text += "\nFollows Poisson"
120
121
  else:
121
- result_text += "\n✗ Deviates from Poisson"
122
+ result_text += "\nDeviates from Poisson"
122
123
  ax.text(
123
124
  0.98,
124
125
  0.85,
@@ -173,7 +174,180 @@ class DropletStatistics:
173
174
  chi2, p_value = stats.chisquare(observed_filtered, expected_filtered)
174
175
  return chi2, p_value
175
176
 
176
- def run_analysis(self, output_dir):
177
+ def create_report(self, output_path, stats_data, sample_frames=None):
178
+ """Create combined report image with plots, stats, and sample frames.
179
+
180
+ Args:
181
+ output_path: Path object for output directory.
182
+ stats_data: Dict with mean_d, median_d, std_d, lambda_val, chi2, p_value.
183
+ sample_frames: Optional list of dicts with 'frame_idx', 'image', 'droplet_masks'.
184
+ """
185
+ n_samples = len(sample_frames) if sample_frames else 0
186
+
187
+ if self.use_poisson:
188
+ # 2 rows: [size_dist, poisson, stats] + [sample frames]
189
+ n_cols = max(3, n_samples)
190
+ fig = plt.figure(figsize=(5 * n_cols, 10))
191
+ gs = fig.add_gridspec(2, n_cols, height_ratios=[1, 1])
192
+ ax_size = fig.add_subplot(gs[0, 0])
193
+ ax_poisson = fig.add_subplot(gs[0, 1])
194
+ ax_stats = fig.add_subplot(gs[0, 2])
195
+ else:
196
+ # 2 rows: [size_dist, stats] + [sample frames]
197
+ n_cols = max(2, n_samples)
198
+ fig = plt.figure(figsize=(5 * n_cols, 10))
199
+ gs = fig.add_gridspec(2, n_cols, height_ratios=[1, 1])
200
+ ax_size = fig.add_subplot(gs[0, 0])
201
+ ax_stats = fig.add_subplot(gs[0, 1])
202
+ ax_poisson = None
203
+
204
+ # Plot 1: Size distribution
205
+ diameters = self.df["diameter_um"].values
206
+ ax_size.hist(diameters, bins=25, color="steelblue", edgecolor="black", alpha=0.7)
207
+ ax_size.axvline(
208
+ stats_data["mean_d"], color="red", linestyle="--",
209
+ label=f"Mean: {stats_data['mean_d']:.1f}"
210
+ )
211
+ ax_size.axvline(
212
+ stats_data["median_d"], color="green", linestyle="--",
213
+ label=f"Median: {stats_data['median_d']:.1f}"
214
+ )
215
+ ax_size.set_xlabel("Diameter (µm)")
216
+ ax_size.set_ylabel("Count")
217
+ ax_size.set_title("Droplet Size Distribution")
218
+ ax_size.legend()
219
+ ax_size.grid(True, alpha=0.3)
220
+
221
+ # Plot 2: Poisson comparison (if enabled)
222
+ if ax_poisson is not None and stats_data.get("lambda_val") is not None:
223
+ median_d = self.df["diameter_um"].median()
224
+ x_range, theoretical, lambda_val = self.calculate_poisson(median_d)
225
+ actual = self.df["inclusions"].value_counts().sort_index()
226
+ n_droplets = len(self.df)
227
+
228
+ detected_pct = []
229
+ theoretical_pct = theoretical * 100
230
+ for i in x_range:
231
+ detected_pct.append(actual.get(i, 0) / n_droplets * 100)
232
+
233
+ x = np.arange(len(x_range))
234
+ width = 0.35
235
+ ax_poisson.bar(
236
+ x - width / 2, detected_pct, width,
237
+ label="Detected", color="royalblue", alpha=0.8
238
+ )
239
+ ax_poisson.bar(
240
+ x + width / 2, theoretical_pct[:len(x)], width,
241
+ label=f"Poisson (λ={lambda_val:.3f})", color="coral", alpha=0.8
242
+ )
243
+
244
+ if stats_data.get("p_value") is not None:
245
+ result_text = f"X2 = {stats_data['chi2']:.2f}, p = {stats_data['p_value']:.4f}"
246
+ result_text += "\nFollows Poisson" if stats_data["p_value"] > 0.05 else "\nDeviates"
247
+ ax_poisson.text(
248
+ 0.98, 0.85, result_text, transform=ax_poisson.transAxes,
249
+ ha="right", va="top", fontsize=10,
250
+ bbox=dict(boxstyle="round", facecolor="wheat", alpha=0.8)
251
+ )
252
+
253
+ ax_poisson.set_xlabel("Inclusions per Droplet")
254
+ ax_poisson.set_ylabel("Percentage (%)")
255
+ ax_poisson.set_title("Inclusion Distribution")
256
+ ax_poisson.set_xticks(x)
257
+ ax_poisson.set_xticklabels(x_range)
258
+ ax_poisson.legend()
259
+ ax_poisson.grid(True, alpha=0.3, axis="y")
260
+
261
+ # Stats text box
262
+ total_droplets = len(self.df)
263
+ total_inclusions = int(self.df["inclusions"].sum())
264
+ with_inclusions = int((self.df["inclusions"] > 0).sum())
265
+
266
+ project_name = output_path.name
267
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M")
268
+ total_frames = self.df["frame"].nunique()
269
+
270
+ stats_lines = [
271
+ f"Project: {project_name}",
272
+ f"Date: {timestamp}",
273
+ f"Frames: {total_frames}",
274
+ "",
275
+ f"Droplets: {total_droplets:,}",
276
+ f"Inclusions: {total_inclusions:,}",
277
+ f"Mean/droplet: {total_inclusions / total_droplets:.2f}",
278
+ f"With incl: {with_inclusions / total_droplets * 100:.1f}%",
279
+ "",
280
+ f"Diameter: {stats_data['mean_d']:.1f} ± {stats_data['std_d']:.1f} µm",
281
+ ]
282
+
283
+ if self.use_poisson and stats_data.get("lambda_val") is not None:
284
+ stats_lines.extend([
285
+ "",
286
+ f"Dilution: {self.dilution}x",
287
+ f"λ theoretical: {stats_data['lambda_val']:.4f}",
288
+ ])
289
+ if stats_data.get("p_value") is not None:
290
+ result = "FOLLOWS" if stats_data["p_value"] > 0.05 else "DEVIATES"
291
+ stats_lines.append(f"Result: {result} Poisson")
292
+
293
+ ax_stats.axis("off")
294
+ ax_stats.text(
295
+ 0.1, 0.95, "\n".join(stats_lines), transform=ax_stats.transAxes,
296
+ fontsize=11, verticalalignment="top", fontfamily="monospace",
297
+ bbox=dict(boxstyle="round", facecolor="lightgray", alpha=0.3)
298
+ )
299
+ ax_stats.set_title("Summary")
300
+
301
+ # Sample frames (bottom row)
302
+ if sample_frames:
303
+ for i, sample in enumerate(sample_frames[:n_cols]):
304
+ ax_sample = fig.add_subplot(gs[1, i])
305
+ self._draw_sample_frame(ax_sample, sample)
306
+
307
+ plt.suptitle("DropDrop Analysis Report", fontsize=14, fontweight="bold")
308
+ plt.tight_layout()
309
+ plt.savefig(output_path / "report.png", dpi=200, bbox_inches="tight")
310
+ plt.close()
311
+
312
+ def _draw_sample_frame(self, ax, sample):
313
+ """Draw a sample frame with detection overlay."""
314
+ frame_idx = sample["frame_idx"]
315
+ image = sample["image"]
316
+ droplet_masks = sample.get("droplet_masks", [])
317
+ inclusion_masks = sample.get("inclusion_masks", [])
318
+
319
+ # Convert grayscale to RGB for colored overlay
320
+ if len(image.shape) == 2:
321
+ display = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
322
+ else:
323
+ display = image.copy()
324
+
325
+ # Draw droplet contours in green
326
+ for droplet in droplet_masks:
327
+ mask = droplet.get("mask")
328
+ if mask is not None:
329
+ contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
330
+ cv2.drawContours(display, contours, -1, (0, 255, 0), 2)
331
+ # Draw inclusion count
332
+ center = droplet.get("center")
333
+ count = droplet.get("inclusions", 0)
334
+ if center:
335
+ cv2.putText(
336
+ display, str(count), (int(center[0]) - 10, int(center[1]) + 10),
337
+ cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 0), 2
338
+ )
339
+
340
+ # Draw inclusion masks in red
341
+ for inc_mask in inclusion_masks:
342
+ if inc_mask is not None:
343
+ contours, _ = cv2.findContours(inc_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
344
+ cv2.drawContours(display, contours, -1, (255, 0, 0), -1)
345
+
346
+ ax.imshow(display)
347
+ ax.set_title(f"Frame {frame_idx}")
348
+ ax.axis("off")
349
+
350
+ def run_analysis(self, output_dir, sample_frames=None):
177
351
  """Run analysis and print results."""
178
352
  output_path = Path(output_dir)
179
353
  output_path.mkdir(exist_ok=True)
@@ -202,6 +376,17 @@ class DropletStatistics:
202
376
  p_value=p_value,
203
377
  )
204
378
 
379
+ # Create combined report
380
+ stats_data = {
381
+ "mean_d": mean_d,
382
+ "median_d": median_d,
383
+ "std_d": std_d,
384
+ "lambda_val": lambda_val,
385
+ "chi2": chi2,
386
+ "p_value": p_value,
387
+ }
388
+ self.create_report(output_path, stats_data, sample_frames)
389
+
205
390
  print("\nSTATISTICAL SUMMARY")
206
391
  print("-" * 40)
207
392
  print(f"Droplets: {total_droplets}")
@@ -117,7 +117,7 @@ wheels = [
117
117
 
118
118
  [[package]]
119
119
  name = "dropdrop"
120
- version = "1.1.0"
120
+ version = "1.2.0"
121
121
  source = { editable = "." }
122
122
  dependencies = [
123
123
  { name = "cellpose" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes