isoview 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
isoview/io.py ADDED
@@ -0,0 +1,942 @@
1
+ """I/O operations for microscopy data."""
2
+
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+ import numpy as np
7
+ import pyklb
8
+ import tifffile
9
+ import xmltodict
10
+
11
+
12
+ CAMERA_PIXEL_SIZE = 6.5
13
+
14
+ def read_volume(path: Path, dimensions: Optional[tuple[int, int, int]] = None) -> np.ndarray:
15
+ """Read 3D volume from file (klb, tiff, zarr, or stack).
16
+
17
+ Args:
18
+ path: file path
19
+ dimensions: (width, height, depth) required for .stack files (MATLAB convention)
20
+ """
21
+ path = Path(path)
22
+
23
+ if path.suffix == ".klb":
24
+ volume = pyklb.readfull(str(path))
25
+ elif path.suffix == ".zarr":
26
+ try:
27
+ import zarr
28
+ except ImportError as e:
29
+ raise ImportError(
30
+ "zarr required for .zarr format. "
31
+ "Install with: uv pip install 'isoview[parallel]'"
32
+ ) from e
33
+
34
+ # open zarr array (supports both ome-zarr and plain zarr)
35
+ store = zarr.storage.LocalStore(str(path))
36
+ root = zarr.open_group(store=store, mode="r")
37
+
38
+ # for ome-zarr, data is in resolution level "0"
39
+ if "0" in root:
40
+ arr = root["0"]
41
+ else:
42
+ # plain zarr array at root
43
+ arr = zarr.open_array(store=store, mode="r")
44
+
45
+ volume = arr[:]
46
+ elif path.suffix in (".tif", ".tiff"):
47
+ volume = tifffile.imread(path)
48
+ elif path.suffix == ".stack":
49
+ # get height and width from xml, calculate depth from file size
50
+ if dimensions is None:
51
+ xml_path = _find_xml_for_stack(path)
52
+ if xml_path:
53
+ metadata = read_xml_metadata(xml_path)
54
+ if "dimensions" in metadata:
55
+ dims = metadata["dimensions"]
56
+ if dims.ndim > 1:
57
+ dims = dims[0]
58
+ dimensions = tuple(dims)
59
+
60
+ if dimensions is None:
61
+ raise ValueError(f"dimensions required for .stack file: {path}")
62
+
63
+ # calculate actual depth from file size
64
+ # dimensions from XML are [width, height, depth] (MATLAB convention)
65
+ width, height, _ = dimensions
66
+ file_size = path.stat().st_size
67
+ total_pixels = file_size // 2 # uint16 = 2 bytes
68
+ depth = total_pixels // (height * width)
69
+
70
+ # read binary stack (uint16, bsq format, little-endian)
71
+ # bsq stores band-sequential: each z-slice stored row by row (y then x)
72
+ volume = np.fromfile(path, dtype=np.uint16)
73
+ volume = volume.reshape((depth, height, width)) # (z, y, x)
74
+ else:
75
+ raise ValueError(f"unsupported format: {path.suffix}")
76
+
77
+ return volume
78
+
79
+
80
+ def _find_xml_for_stack(stack_path: Path) -> Optional[Path]:
81
+ """Find associated XML metadata file for a stack file."""
82
+ # try same directory with channel xml
83
+ parent = stack_path.parent
84
+ xml_files = list(parent.glob("*.xml"))
85
+ if xml_files:
86
+ return xml_files[0]
87
+
88
+ # try parent directories
89
+ for _ in range(2):
90
+ parent = parent.parent
91
+ xml_files = list(parent.glob("*.xml"))
92
+ if xml_files:
93
+ return xml_files[0]
94
+
95
+ return None
96
+
97
+
98
+ def write_volume(
99
+ volume: np.ndarray,
100
+ path: Path,
101
+ pixel_spacing: Optional[np.ndarray] = None,
102
+ compression: Optional[str] = "blosc-zstd",
103
+ compression_level: int = 5,
104
+ chunks: Optional[tuple[int, int, int]] = None,
105
+ shards: Optional[tuple[int, int, int]] = None,
106
+ metadata: Optional[dict] = None,
107
+ camera_subpath: Optional[str] = None,
108
+ camera_metadata: Optional[dict] = None,
109
+ ) -> None:
110
+ """Write 3D volume to file (klb, tiff, or zarr).
111
+
112
+ Args:
113
+ volume: 3D array (z, y, x)
114
+ path: output file path
115
+ pixel_spacing: physical spacing in micrometers [z, y, x]
116
+ compression: compression type for zarr
117
+ - 'blosc-zstd': Blosc with Zstandard
118
+ - 'blosc-lz4': Blosc with LZ4
119
+ - 'blosc-lz4hc': Blosc with LZ4HC
120
+ - 'gzip': Standard gzip compression
121
+ - None: No compression (fastest I/O, largest files)
122
+ compression_level: compression level (0-9, higher = more compression)
123
+ chunks: inner chunk shape for zarr (z, y, x)
124
+ - With sharding: size of individually accessible sub-chunks
125
+ - Without sharding: standard chunk size
126
+ - Default with sharding: (1, full_y, full_x) - one frame per chunk
127
+ - Default without sharding: (10, 128, 128)
128
+ shards: outer chunk/shard shape for zarr (z, y, x)
129
+ - Enables Zarr v3 sharding to reduce file count
130
+ - Groups multiple chunks into single storage objects
131
+ - Recommended: (10, full_y, full_x) - 10 frames per shard file
132
+ - None: disables sharding (default)
133
+ metadata: common microscope metadata dictionary (written to root)
134
+ camera_subpath: optional camera subgroup path (e.g., "camera_0") for multi-camera consolidated zarr
135
+ camera_metadata: optional camera-specific metadata (written to camera subgroup)
136
+ """
137
+ path = Path(path)
138
+ path.parent.mkdir(parents=True, exist_ok=True)
139
+
140
+ if path.suffix == ".zarr":
141
+ try:
142
+ import zarr
143
+ from zarr.codecs import BloscCodec, GzipCodec, BZ2
144
+ except ImportError as e:
145
+ raise ImportError(
146
+ "zarr required for .zarr format. "
147
+ "Install with: uv pip install 'isoview[parallel]'"
148
+ ) from e
149
+
150
+ # auto-calculate chunks and shards if not provided
151
+ if chunks is None:
152
+ if shards is not None:
153
+ # with sharding: use full-frame inner chunks for optimal planar access
154
+ chunks = (1, volume.shape[1], volume.shape[2])
155
+ else:
156
+ # without sharding: use smaller chunks
157
+ chunks = (
158
+ min(10, volume.shape[0]),
159
+ min(128, volume.shape[1]),
160
+ min(128, volume.shape[2]),
161
+ )
162
+
163
+ # auto-enable sharding for large z-stacks if not explicitly disabled
164
+ if shards is None and volume.shape[0] >= 20:
165
+ # enable sharding: 10 frames per shard file, full FOV
166
+ shards = (10, volume.shape[1], volume.shape[2])
167
+ # ensure chunks is full-frame for sharded access
168
+ if chunks != (1, volume.shape[1], volume.shape[2]):
169
+ chunks = (1, volume.shape[1], volume.shape[2])
170
+
171
+ # build compressor list (zarr v3 uses compressors parameter, not codecs)
172
+ compressors = []
173
+
174
+ if compression == "blosc-zstd":
175
+ compressors.append(
176
+ BloscCodec(
177
+ cname="zstd",
178
+ clevel=compression_level,
179
+ shuffle="bitshuffle", # excellent for uint16
180
+ )
181
+ )
182
+ elif compression == "blosc-lz4":
183
+ compressors.append(
184
+ BloscCodec(
185
+ cname="lz4",
186
+ clevel=compression_level,
187
+ shuffle="shuffle",
188
+ )
189
+ )
190
+ elif compression == "blosc-lz4hc":
191
+ compressors.append(
192
+ BloscCodec(
193
+ cname="lz4hc",
194
+ clevel=compression_level,
195
+ shuffle="bitshuffle",
196
+ )
197
+ )
198
+ elif compression == "gzip":
199
+ compressors.append(GzipCodec(level=compression_level))
200
+ elif compression == "bzip2":
201
+ compressors.append(BZ2(level=compression_level))
202
+ elif compression is not None:
203
+ raise ValueError(
204
+ f"unsupported compression: {compression}. "
205
+ f"use 'blosc-zstd', 'blosc-lz4', 'blosc-lz4hc', 'gzip', 'bzip2', or None"
206
+ )
207
+
208
+ # create zarr store
209
+ store = zarr.storage.LocalStore(str(path))
210
+
211
+ # determine array path based on camera_subpath
212
+ if camera_subpath:
213
+ array_path = f"{camera_subpath}/0"
214
+ else:
215
+ array_path = "0"
216
+
217
+ # create array with optional sharding (use zarr.create_array for v3)
218
+ if shards is not None:
219
+ # validate that chunks evenly divide shards
220
+ for i, (shard_dim, chunk_dim) in enumerate(zip(shards, chunks)):
221
+ if shard_dim % chunk_dim != 0:
222
+ raise ValueError(
223
+ f"shard dimension {shard_dim} must be evenly divisible by "
224
+ f"chunk dimension {chunk_dim} at axis {i}"
225
+ )
226
+
227
+ # high-level API with sharding
228
+ arr = zarr.create_array(
229
+ store=store,
230
+ name=array_path,
231
+ shape=volume.shape,
232
+ shards=shards, # outer chunks (shard size)
233
+ chunks=chunks, # inner chunks (sub-chunk size)
234
+ dtype=volume.dtype,
235
+ compressors=compressors if compressors else None,
236
+ zarr_format=3,
237
+ overwrite=True,
238
+ )
239
+ else:
240
+ # standard zarr without sharding
241
+ arr = zarr.create_array(
242
+ store=store,
243
+ name=array_path,
244
+ shape=volume.shape,
245
+ chunks=chunks,
246
+ dtype=volume.dtype,
247
+ compressors=compressors if compressors else None,
248
+ zarr_format=3,
249
+ overwrite=True,
250
+ )
251
+
252
+ # write data
253
+ arr[:] = volume
254
+
255
+ # open group to add metadata
256
+ root = zarr.open_group(store=store, mode="r+")
257
+
258
+ # add ome-zarr metadata
259
+ if pixel_spacing is not None:
260
+ z_spacing, y_spacing, x_spacing = pixel_spacing[:3]
261
+ else:
262
+ z_spacing = y_spacing = x_spacing = 1.0
263
+
264
+ # Build multiscales metadata
265
+ multiscales_meta = {
266
+ "version": "0.5",
267
+ "axes": [
268
+ {"name": "z", "type": "space", "unit": "micrometer"},
269
+ {"name": "y", "type": "space", "unit": "micrometer"},
270
+ {"name": "x", "type": "space", "unit": "micrometer"},
271
+ ],
272
+ "datasets": [
273
+ {
274
+ "path": "0",
275
+ "coordinateTransformations": [
276
+ {
277
+ "type": "scale",
278
+ "scale": [
279
+ float(z_spacing),
280
+ float(y_spacing),
281
+ float(x_spacing),
282
+ ],
283
+ }
284
+ ],
285
+ }
286
+ ],
287
+ }
288
+
289
+ # Add name from metadata if available
290
+ if metadata and "data_header" in metadata:
291
+ multiscales_meta["name"] = str(metadata["data_header"])
292
+
293
+ # Add metadata to appropriate group
294
+ if camera_subpath:
295
+ # Multi-camera mode: add multiscales to camera subgroup
296
+ camera_group = root[camera_subpath]
297
+ camera_group.attrs["multiscales"] = [multiscales_meta]
298
+
299
+ # Add common microscope metadata to root (once, shared across cameras)
300
+ # Only write if not already present
301
+ if metadata and "data_header" not in root.attrs:
302
+ for key, value in metadata.items():
303
+ if isinstance(value, np.ndarray):
304
+ root.attrs[key] = value.tolist()
305
+ elif isinstance(value, (np.integer, np.floating)):
306
+ root.attrs[key] = value.item()
307
+ else:
308
+ root.attrs[key] = value
309
+
310
+ # Add camera-specific metadata to camera subgroup
311
+ if camera_metadata:
312
+ for key, value in camera_metadata.items():
313
+ if isinstance(value, np.ndarray):
314
+ camera_group.attrs[key] = value.tolist()
315
+ elif isinstance(value, (np.integer, np.floating)):
316
+ camera_group.attrs[key] = value.item()
317
+ else:
318
+ camera_group.attrs[key] = value
319
+ else:
320
+ # Single-camera mode: add to root
321
+ root.attrs["multiscales"] = [multiscales_meta]
322
+
323
+ # Add microscope metadata directly to root attrs
324
+ if metadata:
325
+ # Convert numpy arrays to lists for JSON serialization
326
+ for key, value in metadata.items():
327
+ if isinstance(value, np.ndarray):
328
+ root.attrs[key] = value.tolist()
329
+ elif isinstance(value, (np.integer, np.floating)):
330
+ root.attrs[key] = value.item()
331
+ else:
332
+ root.attrs[key] = value
333
+
334
+ elif path.suffix == ".klb":
335
+ if pixel_spacing is not None:
336
+ spacing = [1.0, 1.0] + list(pixel_spacing[:3])
337
+ else:
338
+ spacing = [1.0, 1.0, 1.0, 1.0, 1.0]
339
+ pyklb.writefull(volume, str(path), pixelspacing_tczyx=spacing)
340
+
341
+ elif path.suffix in (".tif", ".tiff"):
342
+ metadata = {}
343
+ if pixel_spacing is not None:
344
+ # tiff resolution is pixels per unit, spacing is unit per pixel
345
+ metadata["resolution"] = (1.0 / pixel_spacing[0], 1.0 / pixel_spacing[1])
346
+ metadata["unit"] = "um"
347
+ tifffile.imwrite(path, volume, metadata=metadata)
348
+
349
+ else:
350
+ raise ValueError(f"unsupported format: {path.suffix}")
351
+
352
+
353
+ def write_to_consolidated_zarr(
354
+ base_path: Path,
355
+ subpath: str,
356
+ volume: np.ndarray,
357
+ pixel_spacing: Optional[np.ndarray] = None,
358
+ compression: Optional[str] = "blosc-zstd",
359
+ compression_level: int = 9,
360
+ chunks: Optional[tuple[int, int, int]] = None,
361
+ shards: Optional[tuple[int, int, int]] = None,
362
+ label_metadata: Optional[dict] = None,
363
+ camera_subpath: Optional[str] = None,
364
+ ) -> None:
365
+ """Write array to a consolidated OME-Zarr structure.
366
+
367
+ Args:
368
+ base_path: Base Zarr group path (e.g., .../data_TM000000_SPM00.zarr)
369
+ subpath: Relative path within the Zarr group (e.g., "projections/xy", "labels/segmentation")
370
+ volume: Array data to write
371
+ pixel_spacing: Physical spacing in micrometers [z, y, x]
372
+ compression: Compression type
373
+ compression_level: Compression level (0-9)
374
+ chunks: Chunk shape
375
+ shards: Shard shape
376
+ label_metadata: Optional metadata for label images (colors, properties)
377
+ camera_subpath: Optional camera subgroup (e.g., "camera_0") for multi-camera structure
378
+ """
379
+ try:
380
+ import zarr
381
+ from zarr.codecs import BloscCodec, GzipCodec, BZ2
382
+ except ImportError as e:
383
+ raise ImportError(
384
+ "zarr required for consolidated output. "
385
+ "Install with: uv pip install 'isoview[parallel]'"
386
+ ) from e
387
+
388
+ # Open or create the base group
389
+ store = zarr.storage.LocalStore(str(base_path))
390
+ root = zarr.open_group(store=store, mode="a")
391
+
392
+ # Navigate to camera subgroup if specified
393
+ if camera_subpath:
394
+ if camera_subpath not in root:
395
+ root.create_group(camera_subpath)
396
+ base_group = root[camera_subpath]
397
+ else:
398
+ base_group = root
399
+
400
+ # Create subgroup structure
401
+ parts = subpath.split("/")
402
+ current_group = base_group
403
+ for part in parts[:-1]:
404
+ if part not in current_group:
405
+ current_group = current_group.create_group(part)
406
+ else:
407
+ current_group = current_group[part]
408
+
409
+ # Prepare compression codecs
410
+ compressors = []
411
+ if compression == "blosc-zstd":
412
+ compressors.append(
413
+ BloscCodec(cname="zstd", clevel=compression_level, shuffle="bitshuffle")
414
+ )
415
+ elif compression == "blosc-lz4":
416
+ compressors.append(
417
+ BloscCodec(cname="lz4", clevel=compression_level, shuffle="shuffle")
418
+ )
419
+ elif compression == "blosc-lz4hc":
420
+ compressors.append(
421
+ BloscCodec(cname="lz4hc", clevel=compression_level, shuffle="bitshuffle")
422
+ )
423
+ elif compression == "gzip":
424
+ compressors.append(GzipCodec(level=compression_level))
425
+ elif compression == "bzip2":
426
+ compressors.append(BZ2(level=compression_level))
427
+
428
+ # Auto-calculate chunks/shards
429
+ if chunks is None:
430
+ if shards is not None:
431
+ chunks = (1, volume.shape[1], volume.shape[2])
432
+ else:
433
+ chunks = (
434
+ min(10, volume.shape[0]),
435
+ min(128, volume.shape[1]),
436
+ min(128, volume.shape[2]),
437
+ )
438
+
439
+ if shards is None and volume.shape[0] >= 20:
440
+ shards = (10, volume.shape[1], volume.shape[2])
441
+ if chunks != (1, volume.shape[1], volume.shape[2]):
442
+ chunks = (1, volume.shape[1], volume.shape[2])
443
+
444
+ # Create array
445
+ array_name = parts[-1]
446
+
447
+ # Build full path from root
448
+ path_parts = []
449
+ if camera_subpath:
450
+ path_parts.append(camera_subpath)
451
+ path_parts.extend(parts[:-1])
452
+ path_parts.append(array_name)
453
+ path_parts.append("0")
454
+ full_array_path = "/".join(path_parts)
455
+
456
+ if shards is not None:
457
+ arr = zarr.create_array(
458
+ store=store,
459
+ name=full_array_path,
460
+ shape=volume.shape,
461
+ shards=shards,
462
+ chunks=chunks,
463
+ dtype=volume.dtype,
464
+ compressors=compressors if compressors else None,
465
+ zarr_format=3,
466
+ overwrite=True,
467
+ )
468
+ else:
469
+ arr = zarr.create_array(
470
+ store=store,
471
+ name=full_array_path,
472
+ shape=volume.shape,
473
+ chunks=chunks,
474
+ dtype=volume.dtype,
475
+ compressors=compressors if compressors else None,
476
+ zarr_format=3,
477
+ overwrite=True,
478
+ )
479
+
480
+ # Write data
481
+ arr[:] = volume
482
+
483
+ # Add metadata to the array's parent group
484
+ if array_name not in current_group:
485
+ array_group = current_group.create_group(array_name)
486
+ else:
487
+ array_group = current_group[array_name]
488
+
489
+ # Add multiscales metadata (for images and projections, not labels)
490
+ if pixel_spacing is not None:
491
+ z_spacing, y_spacing, x_spacing = pixel_spacing[:3]
492
+ else:
493
+ z_spacing = y_spacing = x_spacing = 1.0
494
+
495
+ # Don't add multiscales for labels (they use image-label instead)
496
+ if not label_metadata:
497
+ array_group.attrs["multiscales"] = [
498
+ {
499
+ "version": "0.5",
500
+ "axes": [
501
+ {"name": "z", "type": "space", "unit": "micrometer"},
502
+ {"name": "y", "type": "space", "unit": "micrometer"},
503
+ {"name": "x", "type": "space", "unit": "micrometer"},
504
+ ],
505
+ "datasets": [
506
+ {
507
+ "path": "0",
508
+ "coordinateTransformations": [
509
+ {"type": "scale", "scale": [float(z_spacing), float(y_spacing), float(x_spacing)]}
510
+ ],
511
+ }
512
+ ],
513
+ }
514
+ ]
515
+
516
+ # Add label metadata if provided
517
+ if label_metadata:
518
+ array_group.attrs["image-label"] = label_metadata
519
+
520
+
521
+ def setup_consolidated_zarr(
522
+ base_path: Path,
523
+ metadata: Optional[dict] = None,
524
+ camera_subpath: Optional[str] = None,
525
+ ) -> None:
526
+ """Initialize a consolidated OME-Zarr file with proper structure.
527
+
528
+ Creates the base group with metadata and prepares for labels/projections.
529
+
530
+ Args:
531
+ base_path: Path to the .zarr file
532
+ metadata: Microscope metadata dictionary
533
+ camera_subpath: Optional camera subgroup (e.g., "camera_0") for multi-camera structure
534
+ """
535
+ try:
536
+ import zarr
537
+ except ImportError as e:
538
+ raise ImportError(
539
+ "zarr required for consolidated output. "
540
+ "Install with: uv pip install 'isoview[parallel]'"
541
+ ) from e
542
+
543
+ store = zarr.storage.LocalStore(str(base_path))
544
+ root = zarr.open_group(store=store, mode="a")
545
+
546
+ # Navigate to camera subgroup if specified
547
+ if camera_subpath:
548
+ if camera_subpath not in root:
549
+ root.create_group(camera_subpath)
550
+ base_group = root[camera_subpath]
551
+ else:
552
+ base_group = root
553
+
554
+ # Create labels group
555
+ if "labels" not in base_group:
556
+ labels_group = base_group.create_group("labels")
557
+ labels_group.attrs["labels"] = [] # Will be populated as labels are added
558
+
559
+ # Create projections group (custom, not part of OME-Zarr spec but useful)
560
+ if "projections" not in base_group:
561
+ base_group.create_group("projections")
562
+
563
+ # Add microscope metadata to root (shared across cameras, only write once)
564
+ if metadata and "data_header" not in root.attrs:
565
+ for key, value in metadata.items():
566
+ if isinstance(value, np.ndarray):
567
+ root.attrs[key] = value.tolist()
568
+ elif isinstance(value, (np.integer, np.floating)):
569
+ root.attrs[key] = value.item()
570
+ else:
571
+ root.attrs[key] = value
572
+
573
+
574
+ def add_label_to_registry(
575
+ base_path: Path,
576
+ label_name: str,
577
+ camera_subpath: Optional[str] = None,
578
+ ) -> None:
579
+ """Add a label to the labels registry in OME-Zarr.
580
+
581
+ Args:
582
+ base_path: Path to the .zarr file
583
+ label_name: Name of the label (e.g., "segmentation", "xz_mask")
584
+ camera_subpath: Optional camera subgroup (e.g., "camera_0") for multi-camera structure
585
+ """
586
+ try:
587
+ import zarr
588
+ except ImportError:
589
+ return
590
+
591
+ store = zarr.storage.LocalStore(str(base_path))
592
+ root = zarr.open_group(store=store, mode="a")
593
+
594
+ # Navigate to camera subgroup if specified
595
+ if camera_subpath:
596
+ if camera_subpath in root:
597
+ base_group = root[camera_subpath]
598
+ else:
599
+ return
600
+ else:
601
+ base_group = root
602
+
603
+ if "labels" in base_group:
604
+ labels_group = base_group["labels"]
605
+ current_labels = list(labels_group.attrs.get("labels", []))
606
+ if label_name not in current_labels:
607
+ current_labels.append(label_name)
608
+ labels_group.attrs["labels"] = current_labels
609
+
610
+
611
+ def read_all_xml_metadata(input_dir: Path, specimen: int) -> tuple[dict, dict]:
612
+ """Read and merge metadata from all XML files for a specimen.
613
+
614
+ Args:
615
+ input_dir: Root directory containing specimen data
616
+ specimen: Specimen number
617
+
618
+ Returns:
619
+ Tuple of (common_metadata, per_camera_metadata) where:
620
+ - common_metadata: Metadata that's the same across all cameras
621
+ - per_camera_metadata: Dict mapping camera index to camera-specific metadata
622
+ """
623
+ # Find all XML files for this specimen
624
+ xml_files = []
625
+
626
+ # Check SPM00/ch*.xml pattern
627
+ spm_dir = input_dir / f"SPM{specimen:02d}"
628
+ if spm_dir.exists():
629
+ xml_files.extend(sorted(spm_dir.glob("ch*.xml")))
630
+
631
+ # Fallback: check root level
632
+ if not xml_files:
633
+ xml_files.extend(sorted(input_dir.glob(f"*spec{specimen:02d}*.xml")))
634
+
635
+ if not xml_files:
636
+ return {}, {}
637
+
638
+ # Read all XML files
639
+ all_metadata = []
640
+ for xml_file in xml_files:
641
+ try:
642
+ meta = read_xml_metadata(xml_file)
643
+ all_metadata.append(meta)
644
+ except Exception as e:
645
+ print(f"Warning: Failed to read {xml_file}: {e}")
646
+ continue
647
+
648
+ if not all_metadata:
649
+ return {}, {}
650
+
651
+ # Separate common and per-camera metadata
652
+ common_metadata = {}
653
+ per_camera_metadata = {}
654
+
655
+ # Get all keys from all metadata dicts
656
+ all_keys = set()
657
+ for meta in all_metadata:
658
+ all_keys.update(meta.keys())
659
+
660
+ # Helper function to check if values are equal
661
+ def values_equal(v1, v2):
662
+ if isinstance(v1, np.ndarray) and isinstance(v2, np.ndarray):
663
+ return v1.shape == v2.shape and np.allclose(v1, v2)
664
+ return v1 == v2
665
+
666
+ for key in all_keys:
667
+ # Collect values for this key across all cameras
668
+ values = [meta.get(key) for meta in all_metadata if key in meta]
669
+
670
+ if not values:
671
+ continue
672
+
673
+ # Check if all values are the same
674
+ all_same = all(values_equal(values[0], v) for v in values[1:])
675
+
676
+ if all_same:
677
+ # Same across all cameras - add to common metadata
678
+ common_metadata[key] = values[0]
679
+ else:
680
+ # Different across cameras - add to per-camera metadata
681
+ for i, meta in enumerate(all_metadata):
682
+ if key in meta:
683
+ if i not in per_camera_metadata:
684
+ per_camera_metadata[i] = {}
685
+ per_camera_metadata[i][key] = meta[key]
686
+
687
+ return common_metadata, per_camera_metadata
688
+
689
+
690
+ def read_xml_metadata(xml_path: Path) -> dict:
691
+ """Parse microscope metadata from xml file.
692
+
693
+ Returns dictionary with both raw and calculated metadata fields.
694
+ """
695
+ with open(xml_path, "r") as f:
696
+ data = xmltodict.parse(f.read())
697
+
698
+ metadata = {}
699
+
700
+ # handle push_config format
701
+ config = data.get("push_config", {})
702
+ info_list = config.get("info", [])
703
+ if not isinstance(info_list, list):
704
+ info_list = [info_list]
705
+
706
+ # parse all info elements
707
+ for info in info_list:
708
+ # original fields to keep
709
+ if "@data_header" in info:
710
+ metadata["data_header"] = info["@data_header"]
711
+ if "@specimen_name" in info:
712
+ metadata["specimen_name"] = info["@specimen_name"]
713
+ if "@timestamp" in info:
714
+ metadata["timestamp"] = info["@timestamp"]
715
+ if "@time_point" in info:
716
+ metadata["time_point"] = int(info["@time_point"])
717
+ if "@specimen_XYZT" in info:
718
+ metadata["specimen_XYZT"] = info["@specimen_XYZT"]
719
+ if "@angle" in info:
720
+ metadata["angle"] = float(info["@angle"])
721
+ if "@camera_index" in info:
722
+ metadata["camera_index"] = info["@camera_index"]
723
+ if "@camera_type" in info:
724
+ metadata["camera_type"] = info["@camera_type"]
725
+ if "@camera_roi" in info:
726
+ metadata["camera_roi"] = info["@camera_roi"]
727
+ if "@wavelength" in info:
728
+ metadata["wavelength"] = info["@wavelength"]
729
+ if "@illumination_arms" in info:
730
+ metadata["illumination_arms"] = info["@illumination_arms"]
731
+ if "@illumination_filter" in info:
732
+ metadata["illumination_filter"] = info["@illumination_filter"]
733
+ if "@exposure_time" in info:
734
+ exposure_time = float(info["@exposure_time"])
735
+ metadata["exposure_time"] = exposure_time
736
+ if "@detection_filter" in info:
737
+ metadata["detection_filter"] = info["@detection_filter"]
738
+ if "@dimensions" in info:
739
+ dims_str = info["@dimensions"]
740
+ # split by comma for multiple cameras
741
+ camera_dims = []
742
+ for cam_dims in dims_str.split(","):
743
+ dims = [int(x) for x in cam_dims.strip().split("x")]
744
+ if len(dims) == 3:
745
+ camera_dims.append(dims)
746
+ if camera_dims:
747
+ metadata["dimensions"] = np.array(camera_dims)
748
+ if "@z_step" in info:
749
+ metadata["z_step"] = float(info["@z_step"])
750
+ if "@detection_objective" in info:
751
+ metadata["detection_objective"] = info["@detection_objective"]
752
+ obj_str = info["@detection_objective"].split(",")[0] # first camera
753
+ # Extract magnification: look for number followed by 'x'
754
+ # e.g., "SpecialOptics 16x/0.70" -> 16
755
+ import re
756
+ mag_match = re.search(r'(\d+(?:\.\d+)?)x', obj_str, re.IGNORECASE)
757
+ if mag_match:
758
+ metadata["objective_mag"] = float(mag_match.group(1))
759
+
760
+ # calculate derived fields
761
+ if "dimensions" in metadata:
762
+ dims = metadata["dimensions"]
763
+ if dims.ndim > 1:
764
+ dims = dims[0] # use first camera
765
+ metadata["zplanes"] = int(dims[-1])
766
+
767
+ if "exposure_time" in metadata and "zplanes" in metadata:
768
+ # fps = 1000 / exposure_time_ms
769
+ metadata["fps"] = 1000.0 / metadata["exposure_time"]
770
+ # vps = fps / zplanes (volumes per second)
771
+ metadata["vps"] = metadata["fps"] / metadata["zplanes"]
772
+
773
+ # pixel resolution calculation
774
+ # pixel size of C11440-22C camera = 6.5 um
775
+ metadata["camera_pixel_size_um"] = CAMERA_PIXEL_SIZE
776
+
777
+ if "objective_mag" in metadata:
778
+ # pixel resolution = camera pixel size / magnification
779
+ metadata["pixel_resolution_um"] = metadata["camera_pixel_size_um"] / metadata["objective_mag"]
780
+
781
+ return metadata
782
+
783
+
784
+ def read_background_values(
785
+ input_dir: Path,
786
+ percentile: float = 3.0,
787
+ ) -> list[float]:
788
+ """Read background values from Background_*.tif files.
789
+
790
+ MATLAB computes backgroundValues by reading ALL Background_*.tif files
791
+ (sorted by name) and computing prctile(image(:), 3) on each. The values
792
+ are indexed by loop counter (1-indexed in MATLAB), not by camera number.
793
+
794
+ For cameras=[2], MATLAB uses backgroundValues(1) = Background_0.tif.
795
+
796
+ Args:
797
+ input_dir: directory containing Background_*.tif files
798
+ percentile: percentile for background estimation (default 3.0)
799
+
800
+ Returns:
801
+ list of background values in file sort order
802
+ """
803
+ from .corrections import percentile_interp
804
+
805
+ background_values = []
806
+ bg_files = sorted(input_dir.glob("Background_*.tif"))
807
+
808
+ for bg_file in bg_files:
809
+ img = tifffile.imread(bg_file)
810
+ bg = percentile_interp(img.ravel().astype(np.float64), percentile)
811
+ background_values.append(bg)
812
+
813
+ return background_values
814
+
815
+
816
+ def detect_timepoints(input_dir: Path, specimen: int) -> list[int]:
817
+ """Detect all available timepoints in input directory.
818
+
819
+ Args:
820
+ input_dir: root input directory
821
+ specimen: specimen number
822
+
823
+ Returns:
824
+ sorted list of timepoint indices
825
+ """
826
+ input_dir = Path(input_dir)
827
+ timepoints = set()
828
+
829
+ # check SPM##/TM##### structure
830
+ spm_dir = input_dir / f"SPM{specimen:02d}"
831
+ if spm_dir.exists():
832
+ for tm_dir in spm_dir.glob("TM*"):
833
+ if tm_dir.is_dir():
834
+ try:
835
+ tp = int(tm_dir.name[2:])
836
+ timepoints.add(tp)
837
+ except ValueError:
838
+ continue
839
+
840
+ # check TM##### at root level
841
+ for tm_dir in input_dir.glob("TM*"):
842
+ if tm_dir.is_dir():
843
+ try:
844
+ tp = int(tm_dir.name[2:])
845
+ timepoints.add(tp)
846
+ except ValueError:
847
+ continue
848
+
849
+ # check stack files directly (SPC##_TM#####_...)
850
+ for stack_file in input_dir.glob("SPC*_TM*_*.stack"):
851
+ name = stack_file.name
852
+ tm_idx = name.find("_TM")
853
+ if tm_idx >= 0:
854
+ try:
855
+ tp_str = name[tm_idx+3:tm_idx+8]
856
+ tp = int(tp_str)
857
+ timepoints.add(tp)
858
+ except ValueError:
859
+ continue
860
+
861
+ if not timepoints:
862
+ raise ValueError(f"no timepoints found in {input_dir}")
863
+
864
+ return sorted(timepoints)
865
+
866
+
867
+ def infer_file_structure(input_dir: Path, specimen: int, channels: list[int]) -> str:
868
+ """Automatically determine input file structure.
869
+
870
+ Returns:
871
+ 'timepoint': organized by SPM##/TM##### folders
872
+ 'channel': organized by channel folders
873
+ """
874
+ input_dir = Path(input_dir)
875
+
876
+ # check for specimen subdirectory structure
877
+ spm_dir = input_dir / f"SPM{specimen:02d}"
878
+ if spm_dir.exists():
879
+ tm_dirs = list(spm_dir.glob("TM*"))
880
+ if tm_dirs:
881
+ return "timepoint"
882
+
883
+ # check for timepoint structure at root
884
+ tm_dirs = list(input_dir.glob("TM*"))
885
+ if tm_dirs:
886
+ return "timepoint"
887
+
888
+ # check for channel structure
889
+ ch_files = list(input_dir.glob(f"ch{channels[0]:02d}_spec{specimen:02d}.xml"))
890
+ if ch_files:
891
+ return "channel"
892
+
893
+ # check for direct stack files
894
+ stack_files = list(input_dir.glob("*.stack"))
895
+ if stack_files:
896
+ return "channel"
897
+
898
+ raise ValueError(f"could not determine file structure in {input_dir}")
899
+
900
+
901
+ def find_stack_file(
902
+ input_dir: Path,
903
+ specimen: int,
904
+ timepoint: int,
905
+ camera: int,
906
+ channel: int,
907
+ structure: str,
908
+ ) -> Path:
909
+ """Locate stack file based on directory structure."""
910
+ input_dir = Path(input_dir)
911
+
912
+ if structure == "timepoint":
913
+ # try with SPM subdirectory first
914
+ spm_pattern = (
915
+ f"SPM{specimen:02d}/TM{timepoint:05d}/ANG000/"
916
+ f"SPC{specimen:02d}_TM{timepoint:05d}_ANG000_"
917
+ f"CM{camera}_CHN{channel:02d}_PH0.stack"
918
+ )
919
+ spm_path = input_dir / spm_pattern
920
+ if spm_path.exists():
921
+ return spm_path
922
+
923
+ # try without SPM subdirectory
924
+ pattern = (
925
+ f"TM{timepoint:05d}/ANG000/"
926
+ f"SPC{specimen:02d}_TM{timepoint:05d}_ANG000_"
927
+ f"CM{camera}_CHN{channel:02d}_PH0.stack"
928
+ )
929
+ stack_path = input_dir / pattern
930
+ if stack_path.exists():
931
+ return stack_path
932
+
933
+ raise FileNotFoundError(f"stack not found: {spm_path} or {stack_path}")
934
+ else: # channel
935
+ pattern = (
936
+ f"SPC{specimen:02d}_TM{timepoint:05d}_ANG000_"
937
+ f"CM{camera}_CHN{channel:02d}_PH0.stack"
938
+ )
939
+ stack_path = input_dir / pattern
940
+ if not stack_path.exists():
941
+ raise FileNotFoundError(f"stack not found: {stack_path}")
942
+ return stack_path