napari-tmidas 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,490 @@
1
+ # processing_functions/timepoint_merger.py
2
+ """
3
+ Processing function for merging timepoint folders into time series stacks.
4
+
5
+ This module provides functionality to merge folders containing individual timepoint images
6
+ into single time series stacks (TZYX or TYX format). It can handle both 2D (YX) and 3D (ZYX)
7
+ input images and automatically sorts them by filename to maintain temporal order.
8
+
9
+ The function works by detecting when it's processing the first file from a folder,
10
+ then loading ALL files from that folder to create a merged time series.
11
+ """
12
+
13
+ import os
14
+ import re
15
+ from typing import List, Tuple
16
+
17
+ import numpy as np
18
+ import tifffile
19
+ from skimage.io import imread
20
+
21
+ from napari_tmidas._registry import BatchProcessingRegistry
22
+
23
+
24
+ def natural_sort_key(filename: str) -> List:
25
+ """
26
+ Generate a key for natural sorting of filenames containing numbers.
27
+
28
+ This ensures that files are sorted in the correct order:
29
+ file1.tif, file2.tif, ..., file10.tif, file11.tif
30
+ instead of: file1.tif, file10.tif, file11.tif, file2.tif
31
+ """
32
+ return [
33
+ int(text) if text.isdigit() else text.lower()
34
+ for text in re.split("([0-9]+)", filename)
35
+ ]
36
+
37
+
38
+ def find_timepoint_images(
39
+ folder_path: str, file_extensions: List[str] = None
40
+ ) -> List[str]:
41
+ """
42
+ Find and sort image files in a folder.
43
+
44
+ Parameters:
45
+ -----------
46
+ folder_path : str
47
+ Path to the folder containing timepoint images
48
+ file_extensions : List[str], optional
49
+ List of file extensions to look for (default: ['.tif', '.tiff', '.png', '.jpg'])
50
+
51
+ Returns:
52
+ --------
53
+ List[str]
54
+ Sorted list of image file paths
55
+ """
56
+ if file_extensions is None:
57
+ file_extensions = [".tif", ".tiff", ".png", ".jpg", ".jpeg"]
58
+
59
+ if not os.path.exists(folder_path):
60
+ raise ValueError(f"Folder does not exist: {folder_path}")
61
+
62
+ # Find all image files
63
+ image_files = []
64
+ for file in os.listdir(folder_path):
65
+ if any(file.lower().endswith(ext) for ext in file_extensions):
66
+ image_files.append(os.path.join(folder_path, file))
67
+
68
+ if not image_files:
69
+ raise ValueError(f"No image files found in folder: {folder_path}")
70
+
71
+ # Sort files naturally (handling numbers correctly)
72
+ image_files.sort(key=lambda x: natural_sort_key(os.path.basename(x)))
73
+
74
+ return image_files
75
+
76
+
77
+ def load_and_validate_images(image_files: List[str]) -> Tuple[np.ndarray, str]:
78
+ """
79
+ Load all images and validate they have consistent dimensions.
80
+
81
+ Parameters:
82
+ -----------
83
+ image_files : List[str]
84
+ List of image file paths
85
+
86
+ Returns:
87
+ --------
88
+ Tuple[np.ndarray, str]
89
+ Tuple of (stacked images array, dimension order string)
90
+ """
91
+ print(f"Loading {len(image_files)} timepoint images...")
92
+
93
+ # Load first image to determine dimensions and data type
94
+ first_image = imread(image_files[0])
95
+ print(
96
+ f"First image shape: {first_image.shape}, dtype: {first_image.dtype}"
97
+ )
98
+
99
+ # Determine dimension order
100
+ if len(first_image.shape) == 2:
101
+ # 2D image (YX)
102
+ dimension_order = "TYX"
103
+ expected_shape = first_image.shape
104
+ elif len(first_image.shape) == 3:
105
+ # 3D image (ZYX) - assuming Z is the first dimension
106
+ dimension_order = "TZYX"
107
+ expected_shape = first_image.shape
108
+ else:
109
+ raise ValueError(
110
+ f"Unsupported image dimensionality: {first_image.shape}"
111
+ )
112
+
113
+ # Pre-allocate array for all timepoints
114
+ stack_shape = (len(image_files),) + expected_shape
115
+ print(
116
+ f"Creating time series with shape: {stack_shape} ({dimension_order})"
117
+ )
118
+
119
+ # Use the same dtype as the first image
120
+ time_series = np.zeros(stack_shape, dtype=first_image.dtype)
121
+
122
+ # Load all images
123
+ time_series[0] = first_image
124
+
125
+ for i, image_file in enumerate(image_files[1:], 1):
126
+ try:
127
+ image = imread(image_file)
128
+
129
+ # Validate shape consistency
130
+ if image.shape != expected_shape:
131
+ raise ValueError(
132
+ f"Image {os.path.basename(image_file)} has shape {image.shape}, "
133
+ f"expected {expected_shape}. All images must have the same dimensions."
134
+ )
135
+
136
+ # Validate dtype consistency
137
+ if image.dtype != first_image.dtype:
138
+ print(
139
+ f"Warning: Converting {os.path.basename(image_file)} from {image.dtype} to {first_image.dtype}"
140
+ )
141
+ image = image.astype(first_image.dtype)
142
+
143
+ time_series[i] = image
144
+
145
+ if (i + 1) % 10 == 0 or i == len(image_files) - 1:
146
+ print(f"Loaded {i + 1}/{len(image_files)} images")
147
+
148
+ except Exception as e:
149
+ raise ValueError(f"Error loading {image_file}: {str(e)}") from e
150
+
151
+ print(f"Successfully loaded all {len(image_files)} timepoints")
152
+ return time_series, dimension_order
153
+
154
+
155
+ # Global variable to track which folders have been processed
156
+ _processed_folders = set()
157
+
158
+
159
+ # Advanced version with more options
160
+ @BatchProcessingRegistry.register(
161
+ name="Merge Timepoints",
162
+ suffix="_merge_timeseries",
163
+ description="Advanced timepoint merging with subsampling and memory optimization. IMPORTANT: Set thread count to 1!",
164
+ parameters={
165
+ "subsample_factor": {
166
+ "type": int,
167
+ "default": 1,
168
+ "min": 1,
169
+ "max": 10,
170
+ "description": "Take every Nth timepoint (1 = all timepoints, 2 = every other, etc.)",
171
+ },
172
+ "max_timepoints": {
173
+ "type": int,
174
+ "default": 0,
175
+ "min": 0,
176
+ "max": 10000,
177
+ "description": "Maximum number of timepoints to include (0 = no limit)",
178
+ },
179
+ "start_timepoint": {
180
+ "type": int,
181
+ "default": 0,
182
+ "min": 0,
183
+ "max": 1000,
184
+ "description": "Starting timepoint index (0-based)",
185
+ },
186
+ "memory_efficient": {
187
+ "type": bool,
188
+ "default": False,
189
+ "description": "Use memory-efficient loading for very large datasets",
190
+ },
191
+ },
192
+ )
193
+ def merge_timepoint_folder_advanced(
194
+ image: np.ndarray,
195
+ subsample_factor: int = 1,
196
+ max_timepoints: int = 0,
197
+ start_timepoint: int = 0,
198
+ memory_efficient: bool = False,
199
+ ) -> np.ndarray:
200
+ """
201
+ Advanced timepoint merging with additional options for large datasets.
202
+
203
+ This function provides additional control over the merging process, including
204
+ subsampling, time range selection, and memory-efficient processing for large datasets.
205
+
206
+ IMPORTANT: This function should be run with thread count = 1 in the batch processing
207
+ widget, as it processes entire folders at once.
208
+
209
+ Parameters:
210
+ -----------
211
+ image : numpy.ndarray
212
+ Input image (used to determine the current file being processed)
213
+ subsample_factor : int
214
+ Take every Nth timepoint (1 = all, 2 = every other, etc.)
215
+ max_timepoints : int
216
+ Maximum number of timepoints to include (0 = no limit)
217
+ start_timepoint : int
218
+ Starting timepoint index (0-based)
219
+ memory_efficient : bool
220
+ Use memory-efficient loading (loads images one at a time)
221
+
222
+ Returns:
223
+ --------
224
+ numpy.ndarray
225
+ Time series array with selected timepoints
226
+ """
227
+ global _processed_folders
228
+
229
+ # Get folder path and file suffix from batch processing context
230
+ import inspect
231
+
232
+ current_file = None
233
+ output_folder = None
234
+ input_suffix = None
235
+
236
+ for frame_info in inspect.stack():
237
+ frame_locals = frame_info.frame.f_locals
238
+ if "filepath" in frame_locals:
239
+ current_file = frame_locals["filepath"]
240
+ if "self" in frame_locals:
241
+ obj = frame_locals["self"]
242
+ if hasattr(obj, "output_folder") and hasattr(obj, "input_suffix"):
243
+ output_folder = obj.output_folder
244
+ input_suffix = obj.input_suffix
245
+ break
246
+
247
+ if current_file is None:
248
+ raise ValueError("Could not determine current file path")
249
+
250
+ folder_path = os.path.dirname(current_file)
251
+ folder_name = os.path.basename(folder_path)
252
+
253
+ if output_folder is None:
254
+ output_folder = folder_path
255
+
256
+ if input_suffix is None:
257
+ input_suffix = os.path.splitext(current_file)[1]
258
+
259
+ # Check if already processed
260
+ advanced_key = f"{folder_path}_advanced"
261
+ if advanced_key in _processed_folders:
262
+ print(
263
+ f"Advanced processing for {folder_name} already completed, skipping..."
264
+ )
265
+ return image
266
+
267
+ _processed_folders.add(advanced_key)
268
+
269
+ print(f"🔄 ADVANCED PROCESSING FOLDER: {folder_name}")
270
+ print(f"Using file suffix: {input_suffix}")
271
+
272
+ # Use the same suffix from the batch processing widget
273
+ extensions = [input_suffix]
274
+
275
+ # Find all timepoint images
276
+ try:
277
+ image_files = find_timepoint_images(folder_path, extensions)
278
+ image_files.sort(key=lambda x: natural_sort_key(os.path.basename(x)))
279
+
280
+ print(f"Found {len(image_files)} total timepoints")
281
+
282
+ # Show all filenames for verification
283
+ print("📁 Complete file list:")
284
+ for i, file_path in enumerate(image_files):
285
+ print(f" {i:2d}: {os.path.basename(file_path)}")
286
+ # Show a break after 20 files to avoid too much output
287
+ if i == 19 and len(image_files) > 25:
288
+ print(
289
+ f" ... (showing first 20 and last 5 of {len(image_files)} files)"
290
+ )
291
+ break
292
+
293
+ # If we had a break, show the last few files
294
+ if len(image_files) > 25:
295
+ for i, file_path in enumerate(
296
+ image_files[-5:], len(image_files) - 5
297
+ ):
298
+ print(f" {i:2d}: {os.path.basename(file_path)}")
299
+
300
+ # Apply timepoint selection
301
+ if start_timepoint > 0:
302
+ if start_timepoint >= len(image_files):
303
+ _processed_folders.discard(advanced_key)
304
+ raise ValueError(
305
+ f"start_timepoint ({start_timepoint}) >= total timepoints ({len(image_files)})"
306
+ )
307
+ image_files = image_files[start_timepoint:]
308
+ print(
309
+ f"Starting from timepoint {start_timepoint}: {len(image_files)} remaining"
310
+ )
311
+
312
+ # Apply subsampling
313
+ if subsample_factor > 1:
314
+ image_files = image_files[::subsample_factor]
315
+ print(
316
+ f"Subsampling by factor {subsample_factor}: {len(image_files)} timepoints selected"
317
+ )
318
+
319
+ # Apply maximum timepoints limit
320
+ if max_timepoints > 0 and len(image_files) > max_timepoints:
321
+ image_files = image_files[:max_timepoints]
322
+ print(f"Limited to {max_timepoints} timepoints")
323
+
324
+ if len(image_files) < 1:
325
+ _processed_folders.discard(advanced_key)
326
+ raise ValueError("No timepoints selected after applying filters")
327
+
328
+ print(f"Final selection: {len(image_files)} timepoints")
329
+
330
+ # Show final selection if filtering was applied
331
+ if subsample_factor > 1 or max_timepoints > 0 or start_timepoint > 0:
332
+ print("📁 Final selected files:")
333
+ for i, file_path in enumerate(image_files):
334
+ print(f" {i:2d}: {os.path.basename(file_path)}")
335
+
336
+ # Load images based on memory efficiency setting
337
+ if memory_efficient and len(image_files) > 100:
338
+ print("Using memory-efficient loading...")
339
+
340
+ # Load first image to determine shape and dtype
341
+ first_image = imread(image_files[0])
342
+ stack_shape = (len(image_files),) + first_image.shape
343
+
344
+ # Create memory-mapped array if possible, otherwise regular array
345
+ try:
346
+ import tempfile
347
+
348
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
349
+ time_series = np.memmap(
350
+ temp_file.name,
351
+ dtype=first_image.dtype,
352
+ mode="w+",
353
+ shape=stack_shape,
354
+ )
355
+ print(f"Created memory-mapped array: {stack_shape}")
356
+ except (OSError, ValueError):
357
+ time_series = np.zeros(stack_shape, dtype=first_image.dtype)
358
+ print(f"Created regular array: {stack_shape}")
359
+
360
+ time_series[0] = first_image
361
+
362
+ # Load remaining images one by one
363
+ for i, image_file in enumerate(image_files[1:], 1):
364
+ if i % 50 == 0:
365
+ print(f"Loading timepoint {i+1}/{len(image_files)}")
366
+
367
+ img = imread(image_file)
368
+ if img.shape != first_image.shape:
369
+ _processed_folders.discard(advanced_key)
370
+ raise ValueError(
371
+ f"Shape mismatch at timepoint {i}: {img.shape} vs {first_image.shape}"
372
+ )
373
+
374
+ time_series[i] = img
375
+
376
+ # Convert back to regular array if using memmap
377
+ if isinstance(time_series, np.memmap):
378
+ result = np.array(time_series)
379
+ del time_series # Clean up memmap
380
+ time_series = result
381
+ else:
382
+ # Use standard loading
383
+ time_series = load_and_validate_images(image_files)[0]
384
+
385
+ # Save the advanced time series
386
+ output_filename = f"{folder_name}_merged_timepoints.tif"
387
+ output_path = os.path.join(output_folder, output_filename)
388
+
389
+ print(f"💾 Saving advanced time series to: {output_path}")
390
+
391
+ size_gb = time_series.nbytes / (1024**3)
392
+ use_bigtiff = size_gb > 2.0
393
+
394
+ tifffile.imwrite(
395
+ output_path,
396
+ time_series,
397
+ compression="zlib",
398
+ bigtiff=use_bigtiff,
399
+ )
400
+
401
+ print("✅ Successfully saved advanced time series!")
402
+ print(f"📁 Output file: {output_filename}")
403
+ print(f"📊 File size: {size_gb:.2f} GB")
404
+ print(f"📐 Final shape: {time_series.shape}")
405
+
406
+ # IMPORTANT: Return the original image unchanged so the batch processor
407
+ # doesn't save individual processed files. The merged file is already saved above.
408
+ return image
409
+
410
+ except Exception as e:
411
+ _processed_folders.discard(advanced_key)
412
+ raise ValueError(
413
+ f"Error in advanced timepoint merging: {str(e)}"
414
+ ) from e
415
+
416
+
417
+ # Command-line utility function
418
+ def merge_timepoints_cli():
419
+ """Command-line interface for merging timepoint folders."""
420
+ import argparse
421
+
422
+ parser = argparse.ArgumentParser(
423
+ description="Merge timepoint images into time series"
424
+ )
425
+ parser.add_argument(
426
+ "input_folder", help="Folder containing timepoint images"
427
+ )
428
+ parser.add_argument("output_file", help="Output file path")
429
+ parser.add_argument(
430
+ "--extensions",
431
+ default=".tif,.tiff,.png,.jpg",
432
+ help="File extensions to include (comma-separated)",
433
+ )
434
+ parser.add_argument(
435
+ "--subsample",
436
+ type=int,
437
+ default=1,
438
+ help="Subsample factor (take every Nth timepoint)",
439
+ )
440
+ parser.add_argument(
441
+ "--max-timepoints",
442
+ type=int,
443
+ default=0,
444
+ help="Maximum number of timepoints (0 = no limit)",
445
+ )
446
+ parser.add_argument(
447
+ "--start", type=int, default=0, help="Starting timepoint index"
448
+ )
449
+
450
+ args = parser.parse_args()
451
+
452
+ try:
453
+ # Parse extensions
454
+ extensions = [
455
+ ext.strip() for ext in args.extensions.split(",") if ext.strip()
456
+ ]
457
+
458
+ # Find and sort files
459
+ image_files = find_timepoint_images(args.input_folder, extensions)
460
+
461
+ # Apply filters
462
+ if args.start > 0:
463
+ image_files = image_files[args.start :]
464
+ if args.subsample > 1:
465
+ image_files = image_files[:: args.subsample]
466
+ if args.max_timepoints > 0:
467
+ image_files = image_files[: args.max_timepoints]
468
+
469
+ # Load and save
470
+ result = load_and_validate_images(image_files)[0]
471
+ tifffile.imwrite(args.output_file, result, compression="zlib")
472
+
473
+ print(f"Successfully saved time series to {args.output_file}")
474
+ print(f"Final shape: {result.shape}")
475
+ print(f"Data type: {result.dtype}")
476
+ print(
477
+ f"File size: {os.path.getsize(args.output_file) / (1024**2):.1f} MB"
478
+ )
479
+
480
+ except (ValueError, OSError, RuntimeError) as e:
481
+ print(f"Error: {str(e)}")
482
+ return 1
483
+
484
+ return 0
485
+
486
+
487
+ if __name__ == "__main__":
488
+ import sys
489
+
490
+ sys.exit(merge_timepoints_cli())