nnInteractive 2.3.1__tar.gz → 2.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {nninteractive-2.3.1 → nninteractive-2.3.3}/PKG-INFO +1 -1
  2. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/inference_session.py +142 -36
  3. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/remote/remote_session.py +34 -3
  4. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/server/app.py +64 -29
  5. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/server/main.py +10 -0
  6. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/interaction/point.py +5 -0
  7. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/crop.py +27 -1
  8. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive.egg-info/PKG-INFO +1 -1
  9. {nninteractive-2.3.1 → nninteractive-2.3.3}/pyproject.toml +1 -1
  10. {nninteractive-2.3.1 → nninteractive-2.3.3}/LICENSE +0 -0
  11. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/__init__.py +0 -0
  12. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/__init__.py +0 -0
  13. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/cvpr2025_challenge_baseline/__init__.py +0 -0
  14. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/cvpr2025_challenge_baseline/predict.py +0 -0
  15. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/remote/__init__.py +0 -0
  16. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/remote/_protocol.py +0 -0
  17. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/remote/serialization.py +0 -0
  18. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/server/__init__.py +0 -0
  19. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/interaction/__init__.py +0 -0
  20. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/setup.py +0 -0
  21. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/metadata.py +0 -0
  22. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/reader.py +0 -0
  23. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/run.py +0 -0
  24. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/__init__.py +0 -0
  25. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/__init__.py +0 -0
  26. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/automatic_mask_generator.py +0 -0
  27. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/benchmark.py +0 -0
  28. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/build_sam.py +0 -0
  29. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/__init__.py +0 -0
  30. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/__init__.py +0 -0
  31. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/hieradet.py +0 -0
  32. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/image_encoder.py +0 -0
  33. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/utils.py +0 -0
  34. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/memory_attention.py +0 -0
  35. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/memory_encoder.py +0 -0
  36. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/position_encoding.py +0 -0
  37. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/__init__.py +0 -0
  38. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/mask_decoder.py +0 -0
  39. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/prompt_encoder.py +0 -0
  40. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/transformer.py +0 -0
  41. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam2_base.py +0 -0
  42. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam2_utils.py +0 -0
  43. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/sam2_image_predictor.py +0 -0
  44. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/sam2_video_predictor.py +0 -0
  45. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/sam2_video_predictor_legacy.py +0 -0
  46. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/__init__.py +0 -0
  47. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/amg.py +0 -0
  48. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/misc.py +0 -0
  49. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/transforms.py +0 -0
  50. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/setup.py +0 -0
  51. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/__init__.py +0 -0
  52. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/__init__.py +0 -0
  53. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/sam2_datasets.py +0 -0
  54. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/transforms.py +0 -0
  55. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/utils.py +0 -0
  56. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/vos_dataset.py +0 -0
  57. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/vos_raw_dataset.py +0 -0
  58. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/vos_sampler.py +0 -0
  59. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/vos_segment_loader.py +0 -0
  60. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/loss_fns.py +0 -0
  61. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/model/__init__.py +0 -0
  62. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/model/sam2.py +0 -0
  63. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/optimizer.py +0 -0
  64. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/scripts/sav_frame_extraction_submitit.py +0 -0
  65. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/train.py +0 -0
  66. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/trainer.py +0 -0
  67. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/__init__.py +0 -0
  68. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/checkpoint_utils.py +0 -0
  69. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/data_utils.py +0 -0
  70. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/distributed.py +0 -0
  71. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/logger.py +0 -0
  72. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/train_utils.py +0 -0
  73. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/supervoxel.py +0 -0
  74. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/trainer/__init__.py +0 -0
  75. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/trainer/nnInteractiveTrainer.py +0 -0
  76. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/__init__.py +0 -0
  77. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/bboxes.py +0 -0
  78. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/checkpoint_cleansing.py +0 -0
  79. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/erosion_dilation.py +0 -0
  80. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/inference_helpers.py +0 -0
  81. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/os_shennanigans.py +0 -0
  82. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/rounding.py +0 -0
  83. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive.egg-info/SOURCES.txt +0 -0
  84. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive.egg-info/dependency_links.txt +0 -0
  85. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive.egg-info/entry_points.txt +0 -0
  86. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive.egg-info/requires.txt +0 -0
  87. {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive.egg-info/top_level.txt +0 -0
  88. {nninteractive-2.3.1 → nninteractive-2.3.3}/readme.md +0 -0
  89. {nninteractive-2.3.1 → nninteractive-2.3.3}/setup.cfg +0 -0
  90. {nninteractive-2.3.1 → nninteractive-2.3.3}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nnInteractive
3
- Version: 2.3.1
3
+ Version: 2.3.3
4
4
  Summary: Inference code for nnInteractive
5
5
  Author: Helmholtz Imaging Applied Computer Vision Lab
6
6
  Author-email: Fabian Isensee <f.isensee@dkfz-heidelberg.de>
@@ -31,12 +31,20 @@ from nnInteractive.utils.inference_helpers import (
31
31
  transform_coordinates_noresampling,
32
32
  version_to_tuple,
33
33
  )
34
+ from nnInteractive.utils.os_shennanigans import is_linux_kernel_6_11
34
35
  from nnInteractive.utils.rounding import round_to_nearest_odd
35
36
 
36
37
 
37
38
  class nnInteractiveInferenceSession:
38
39
  INFERENCE_SESSION_VERSION = nnInteractive.__version__
39
40
  REFINEMENT_CACHE_GPU_HEADROOM_BYTES = 4 * 1024**3
41
+ # Maximum adaptive zoom-out factor (see _predict). Also bounds the largest interaction crop,
42
+ # which sizes the reusable blosc2 decompression buffer.
43
+ MAX_AUTOZOOM_FACTOR = 4
44
+ # 'auto' interaction storage threshold: images with at most this many spatial voxels
45
+ # (512*512*1024) use the dense tensor backend; larger ones use blosc2 to bound RAM.
46
+ AUTO_TENSOR_MAX_VOXELS = 2**28
47
+ INTERACTIONS_STORAGE_OPTIONS = ("blosc2", "tensor", "auto")
40
48
  # Interactions implemented by this inference session.
41
49
  SUPPORTED_INTERACTION_KEYS = ("scribble", "lasso", "points", "bbox2d", "bbox3d")
42
50
 
@@ -47,6 +55,7 @@ class nnInteractiveInferenceSession:
47
55
  verbose: bool = False,
48
56
  torch_n_threads: int = 8,
49
57
  do_autozoom: bool = True,
58
+ interactions_storage: str = "auto",
50
59
  ):
51
60
  """
52
61
  Only intended to work with nnInteractiveTrainerV2 and its derivatives
@@ -57,7 +66,22 @@ class nnInteractiveInferenceSession:
57
66
  This is recommended for the persistent inference server, where the
58
67
  process is long-lived so the one-time compile cost is paid only once and
59
68
  amortized across the whole session lifetime.
69
+
70
+ ``interactions_storage``: storage backend for the interaction tensor, one of
71
+ ``"blosc2"``, ``"tensor"`` or ``"auto"`` (default).
72
+ ``"blosc2"`` keeps it as a compact blosc2 in-memory NDArray (low RAM, pays
73
+ (de)compression on every read/write). ``"tensor"`` stores it as a dense CPU
74
+ float16 ``torch.Tensor`` (more RAM, far lower per-access overhead; pinned memory
75
+ by default, skipped when ``device`` is not CUDA or on Linux kernel 6.11 where
76
+ pinning is buggy). ``"auto"`` decides per image at initialization from the
77
+ interaction tensor's voxel count: at most ``AUTO_TENSOR_MAX_VOXELS`` (512*512*1024)
78
+ spatial voxels uses ``"tensor"``, larger uses ``"blosc2"``.
60
79
  """
80
+ if interactions_storage not in self.INTERACTIONS_STORAGE_OPTIONS:
81
+ raise ValueError(
82
+ f"interactions_storage must be one of {self.INTERACTIONS_STORAGE_OPTIONS}, "
83
+ f"got {interactions_storage!r}."
84
+ )
61
85
  print("session initialized")
62
86
 
63
87
  self.network = None
@@ -69,6 +93,9 @@ class nnInteractiveInferenceSession:
69
93
  self._interactions_shape = None
70
94
  self.device = device
71
95
  self.use_torch_compile = use_torch_compile
96
+ self.interactions_storage = interactions_storage
97
+ # Concrete backend ("blosc2"/"tensor") resolved per image in _initialize_interactions.
98
+ self._interactions_storage_resolved: Optional[str] = None
72
99
  self.interaction_decay = None
73
100
  self.current_interaction_intensity: float = 1.0
74
101
  self._fp16_max_value = float(torch.finfo(torch.float16).max)
@@ -86,7 +113,10 @@ class nnInteractiveInferenceSession:
86
113
  self.license: Optional[str] = None
87
114
 
88
115
  # image specific
89
- self.interactions = None # blosc2.NDArray once initialized
116
+ self.interactions = None # blosc2.NDArray or dense torch.Tensor (see interactions_storage)
117
+ # Reusable, pre-faulted float16 buffer to decompress blosc2 interaction crops into (Path B).
118
+ # Allocated per image in _initialize_interactions; None for the dense-tensor backend.
119
+ self._interactions_read_buffer = None
90
120
  self.preprocessed_image: torch.Tensor = None
91
121
  self.preprocessed_props = None
92
122
  self.target_buffer: Union[np.ndarray, torch.Tensor] = None
@@ -303,19 +333,38 @@ class nnInteractiveInferenceSession:
303
333
 
304
334
  def _interactions_inplace_maximum(self, channel_idx: int, int_slicer, new_values) -> None:
305
335
  """In-place element-wise maximum for a subregion of a channel."""
336
+ full_slicer = (channel_idx, *int_slicer)
337
+ if isinstance(self.interactions, torch.Tensor):
338
+ # Dense torch backend: operate in place without a numpy round-trip.
339
+ if not isinstance(new_values, torch.Tensor):
340
+ new_values = torch.as_tensor(new_values)
341
+ view = self.interactions[full_slicer]
342
+ torch.maximum(view, new_values.to(view.dtype), out=view)
343
+ return
306
344
  if isinstance(new_values, torch.Tensor):
307
345
  new_values = new_values.cpu().numpy().astype(np.float16)
308
- full_slicer = (channel_idx, *int_slicer)
309
346
  current_sub = np.asarray(self.interactions[full_slicer])
310
347
  np.maximum(current_sub, new_values, out=current_sub)
311
348
  self.interactions[full_slicer] = current_sub
312
349
 
313
350
  def _write_interactions_channel(self, channel_idx: int, value) -> None:
314
351
  """Write a full channel. Handles torch→numpy for blosc2."""
352
+ if isinstance(self.interactions, torch.Tensor):
353
+ if not isinstance(value, torch.Tensor):
354
+ value = torch.as_tensor(value)
355
+ self.interactions[channel_idx] = value.to(self.interactions.dtype)
356
+ return
315
357
  if isinstance(value, torch.Tensor):
316
358
  value = value.cpu().numpy().astype(np.float16)
317
359
  self.interactions[channel_idx] = value
318
360
 
361
+ def _read_interactions_to_device(self, full_slicer, device) -> torch.Tensor:
362
+ """Read an interaction subregion as a torch.Tensor on ``device``, regardless of backend."""
363
+ sub = self.interactions[full_slicer]
364
+ if isinstance(sub, torch.Tensor):
365
+ return sub.to(device)
366
+ return torch.from_numpy(np.asarray(sub)).to(device)
367
+
319
368
  def _paste_prediction_to_target_buffer(self, prediction: torch.Tensor, bbox: List[List[int]]) -> None:
320
369
  target_bbox = self._interaction_bbox_to_target_bbox(bbox)
321
370
  if isinstance(self.target_buffer, torch.Tensor):
@@ -556,11 +605,30 @@ class nnInteractiveInferenceSession:
556
605
  self.original_image_shape = None
557
606
  self._last_paste_bbox = None
558
607
 
559
- def _initialize_interactions(self, image_torch: torch.Tensor):
560
- shape = (self.num_interaction_channels, *image_torch.shape[1:])
561
- if self.verbose:
562
- print("Initialize interactions with blosc2 in-memory compression")
563
- self.interactions = blosc2.zeros(
608
+ def _resolve_interactions_storage(self, spatial_shape) -> str:
609
+ """Resolve the configured storage to a concrete backend ("blosc2" or "tensor").
610
+
611
+ For "auto", pick "tensor" for images with at most AUTO_TENSOR_MAX_VOXELS spatial voxels
612
+ (lower per-access overhead) and "blosc2" for larger ones (to bound RAM).
613
+ """
614
+ if self.interactions_storage != "auto":
615
+ return self.interactions_storage
616
+ n_voxels = int(np.prod(spatial_shape, dtype=np.int64))
617
+ return "blosc2" if n_voxels > self.AUTO_TENSOR_MAX_VOXELS else "tensor"
618
+
619
+ def _new_interactions_array(self, shape, compression_nthreads: int):
620
+ """Allocate a zeroed interaction array using the resolved backend.
621
+
622
+ "tensor" selects a dense CPU float16 torch.Tensor (more RAM, lower per-access
623
+ overhead); "blosc2" uses a compact blosc2 in-memory NDArray.
624
+ """
625
+ if self._interactions_storage_resolved == "tensor":
626
+ # Pinning enables faster non-blocking host->device copies, but only helps for a
627
+ # CUDA target and is buggy on Linux kernel 6.11 (see utils/os_shennanigans).
628
+ pin = self.device.type == "cuda" and not is_linux_kernel_6_11()
629
+ tensor = torch.zeros(shape, dtype=torch.float16, device="cpu")
630
+ return tensor.pin_memory() if pin else tensor
631
+ return blosc2.zeros(
564
632
  shape,
565
633
  dtype=np.float16,
566
634
  chunks=(1, *[min(64, s) for s in shape[1:]]),
@@ -570,11 +638,49 @@ class nnInteractiveInferenceSession:
570
638
  "codec": blosc2.Codec.LZ4,
571
639
  "clevel": 5,
572
640
  "filters": [blosc2.Filter.NOFILTER],
573
- "nthreads": min(self.torch_n_threads, os.cpu_count()),
641
+ "nthreads": compression_nthreads,
574
642
  },
575
- dparams={"nthreads": 4},
643
+ # Decompression of this sparse interaction tensor is fastest single-threaded:
644
+ # blosc2's per-chunk thread sync costs more than it saves here, badly so on
645
+ # many-core/many-CCD servers (see benchmarks). Multithreading only hurts.
646
+ dparams={"nthreads": 1},
576
647
  )
648
+
649
+ def _initialize_interactions(self, image_torch: torch.Tensor):
650
+ shape = (self.num_interaction_channels, *image_torch.shape[1:])
651
+ self._interactions_storage_resolved = self._resolve_interactions_storage(shape[1:])
652
+ via_auto = self.interactions_storage == "auto"
653
+ if self.verbose or via_auto:
654
+ backend = "dense torch.Tensor" if self._interactions_storage_resolved == "tensor" else "blosc2 in-memory compression"
655
+ print(f"Initialize interactions with {backend}{' (auto)' if via_auto else ''}")
656
+ self.interactions = self._new_interactions_array(shape, min(self.torch_n_threads, os.cpu_count()))
577
657
  self._interactions_shape = shape
658
+ self._interactions_read_buffer = self._new_interactions_read_buffer(shape)
659
+
660
+ def _new_interactions_read_buffer(self, shape) -> Optional[np.ndarray]:
661
+ """Pre-faulted buffer to decompress blosc2 interaction crops into (Path B), or None.
662
+
663
+ Sized to the largest possible crop: the patch size scaled by the maximum autozoom factor,
664
+ capped to the image size. Only allocated for the blosc2 backend that exposes the
665
+ decompress-into-buffer method; the dense-tensor backend returns views and needs no buffer.
666
+ """
667
+ if self._interactions_storage_resolved != "blosc2":
668
+ return None
669
+ if not hasattr(self.interactions, "get_slice_numpy"):
670
+ print(
671
+ "WARNING: this blosc2 build has no NDArray.get_slice_numpy; cannot reuse a "
672
+ "decompression buffer for interaction crops. Falling back to a fresh allocation on "
673
+ "every read (slower). Consider updating blosc2."
674
+ )
675
+ return None
676
+ max_valid = [
677
+ min(round(p * self.MAX_AUTOZOOM_FACTOR), s)
678
+ for p, s in zip(self.configuration_manager.patch_size, shape[1:])
679
+ ]
680
+ n = self.num_interaction_channels * int(np.prod(max_valid, dtype=np.int64))
681
+ buffer = np.empty(n, dtype=np.float16)
682
+ buffer[:] = 0 # first-touch the pages once, up front
683
+ return buffer
578
684
 
579
685
  @torch.inference_mode()
580
686
  def _background_set_image(self, image: np.ndarray, image_properties: dict):
@@ -635,20 +741,7 @@ class nnInteractiveInferenceSession:
635
741
  """
636
742
  if self.interactions is not None:
637
743
  del self.interactions
638
- self.interactions = blosc2.zeros(
639
- self._interactions_shape,
640
- dtype=np.float16,
641
- chunks=(1, *[min(64, s) for s in self._interactions_shape[1:]]),
642
- blocks=(1, *[min(32, s) for s in self._interactions_shape[1:]]),
643
- # Interactions compress better with NOFILTER, which is also faster than SHUFFLE.
644
- cparams={
645
- "codec": blosc2.Codec.LZ4,
646
- "clevel": 5,
647
- "filters": [blosc2.Filter.NOFILTER],
648
- "nthreads": os.cpu_count(),
649
- },
650
- dparams={"nthreads": 4},
651
- )
744
+ self.interactions = self._new_interactions_array(self._interactions_shape, os.cpu_count())
652
745
  self.current_interaction_intensity = 1.0
653
746
 
654
747
  if self.target_buffer is not None:
@@ -980,7 +1073,9 @@ class nnInteractiveInferenceSession:
980
1073
  Returns:
981
1074
 
982
1075
  """
983
- print("Current cratio", self.interactions.cratio)
1076
+ if not isinstance(self.interactions, torch.Tensor):
1077
+ # cratio is a blosc2-only diagnostic; the dense tensor backend has no compression.
1078
+ print("Current cratio", self.interactions.cratio)
984
1079
 
985
1080
  assert self.pad_mode_data == "constant", "pad modes other than constant are not implemented here"
986
1081
  assert len(self.new_interaction_centers) == len(self.new_interaction_zoom_out_factors)
@@ -996,7 +1091,7 @@ class nnInteractiveInferenceSession:
996
1091
  "!!!WE NO LONGER RUN ONE PREDICTION PER CENTER AND ONLY USE THE LAST ADDED INTERACTION AS CENTER!!!"
997
1092
  )
998
1093
  prediction_center, zoom_out_factor = self.new_interaction_centers[-1], self.new_interaction_zoom_out_factors[-1]
999
- zoom_out_factor = min(4, zoom_out_factor)
1094
+ zoom_out_factor = min(self.MAX_AUTOZOOM_FACTOR, zoom_out_factor)
1000
1095
 
1001
1096
  start_predict = time()
1002
1097
  with torch.autocast(self.device.type, enabled=True) if self.device.type == "cuda" else dummy_context():
@@ -1005,7 +1100,9 @@ class nnInteractiveInferenceSession:
1005
1100
  input_for_predict, scaled_patch_size, scaled_bbox, previous_prediction = self._build_network_input(
1006
1101
  prediction_center, zoom_out_factor
1007
1102
  )
1008
- pred = self.network(input_for_predict[None])[0].argmax(0).detach()
1103
+ # .contiguous() is required for torch.compile: the input may be a non-contiguous
1104
+ # view (e.g. from the dense-tensor backend), and the compiled graph assumes contiguity.
1105
+ pred = self.network(input_for_predict[None].contiguous())[0].argmax(0).detach()
1009
1106
  del input_for_predict
1010
1107
 
1011
1108
  # detect changes at border. If there are, we enter autozoom
@@ -1022,17 +1119,19 @@ class nnInteractiveInferenceSession:
1022
1119
  start_zoomout = time()
1023
1120
  while has_change and self.do_autozoom:
1024
1121
  print(f"AutoZoom zoom out factor {zoom_out_factor}")
1025
- # we allow a max zoom out of 4
1026
- if zoom_out_factor >= 4:
1122
+ # we allow a max zoom out of MAX_AUTOZOOM_FACTOR
1123
+ if zoom_out_factor >= self.MAX_AUTOZOOM_FACTOR:
1027
1124
  break
1028
1125
  else:
1029
1126
  zoom_out_factor *= zoom_out_growth_factor
1030
- zoom_out_factor = min(4, zoom_out_factor)
1127
+ zoom_out_factor = min(self.MAX_AUTOZOOM_FACTOR, zoom_out_factor)
1031
1128
 
1032
1129
  input_for_predict, scaled_patch_size, scaled_bbox, previous_prediction_resized = (
1033
1130
  self._build_network_input(prediction_center, zoom_out_factor)
1034
1131
  )
1035
- pred = self.network(input_for_predict[None])[0].argmax(0).detach()
1132
+ # .contiguous() is required for torch.compile: the input may be a non-contiguous
1133
+ # view (e.g. from the dense-tensor backend), and the compiled graph assumes contiguity.
1134
+ pred = self.network(input_for_predict[None].contiguous())[0].argmax(0).detach()
1036
1135
  del input_for_predict
1037
1136
  empty_cache(self.device)
1038
1137
 
@@ -1077,7 +1176,9 @@ class nnInteractiveInferenceSession:
1077
1176
 
1078
1177
  # cropping happens on CPU, padding happens on GPU (later)
1079
1178
  crop_img, pad_image = crop_to_valid(self.preprocessed_image, scaled_bbox)
1080
- interactions_tensor, pad_interaction = crop_to_valid(self.interactions, scaled_bbox)
1179
+ interactions_tensor, pad_interaction = crop_to_valid(
1180
+ self.interactions, scaled_bbox, out=self._interactions_read_buffer
1181
+ )
1081
1182
  # For blosc2, crop_to_valid returns a numpy array; convert to torch (still on CPU).
1082
1183
  if not isinstance(interactions_tensor, torch.Tensor):
1083
1184
  interactions_tensor = torch.from_numpy(np.asarray(interactions_tensor))
@@ -1174,7 +1275,8 @@ class nnInteractiveInferenceSession:
1174
1275
  dim=0,
1175
1276
  )
1176
1277
 
1177
- pred = self.network(patch[None])[0].argmax(0).detach()
1278
+ # .contiguous(): see _predict — required for torch.compile with possibly non-contiguous input.
1279
+ pred = self.network(patch[None].contiguous())[0].argmax(0).detach()
1178
1280
  paste_tensor(
1179
1281
  cache_interactions,
1180
1282
  pred.to(cache_interactions.device, dtype=cache_interactions.dtype),
@@ -1261,7 +1363,7 @@ class nnInteractiveInferenceSession:
1261
1363
  pred_slicer = tuple(slice(lb, ub) for lb, ub in pred_bbox)
1262
1364
  local_slicer = tuple(slice(lb, ub) for lb, ub in local_seen_bbox)
1263
1365
 
1264
- prev_sub = torch.from_numpy(np.asarray(self.interactions[(prev_seg_ch, *seen_slicer)])).to(self.device)
1366
+ prev_sub = self._read_interactions_to_device((prev_seg_ch, *seen_slicer), self.device)
1265
1367
 
1266
1368
  diff_local[local_slicer] = (pred[pred_slicer] != prev_sub).to(diff_local.dtype)
1267
1369
  del prev_sub
@@ -1280,7 +1382,7 @@ class nnInteractiveInferenceSession:
1280
1382
  def _mark_prev_seg_in_local_diff(self, diff_local: torch.Tensor, planning_bbox: List[List[int]]) -> None:
1281
1383
  prev_seg_ch = self._get_prev_seg_channel()
1282
1384
  planning_slicer = tuple(slice(lb, ub) for lb, ub in planning_bbox)
1283
- prev_sub = torch.from_numpy(np.asarray(self.interactions[(prev_seg_ch, *planning_slicer)])).to(self.device)
1385
+ prev_sub = self._read_interactions_to_device((prev_seg_ch, *planning_slicer), self.device)
1284
1386
  diff_local[prev_sub > 0.5] = 1
1285
1387
  del prev_sub
1286
1388
 
@@ -1548,8 +1650,12 @@ class nnInteractiveInferenceSession:
1548
1650
  self.network = self.network.to(self.device)
1549
1651
 
1550
1652
  def __del__(self):
1551
- self._finish_preprocessing_and_initialize_interactions()
1552
- self.executor.shutdown()
1653
+ # Be robust to a partially-constructed instance (e.g. __init__ raised on bad arguments):
1654
+ # these attributes may not exist yet.
1655
+ if hasattr(self, "preprocess_future"):
1656
+ self._finish_preprocessing_and_initialize_interactions()
1657
+ if hasattr(self, "executor"):
1658
+ self.executor.shutdown()
1553
1659
 
1554
1660
 
1555
1661
  if __name__ == "__main__":
@@ -132,6 +132,13 @@ class nnInteractiveRemoteInferenceSession:
132
132
  the server before the client gives up. Default 60s matches observed
133
133
  prediction times (100ms..~10s) with headroom for slow links. On
134
134
  expiry: ``httpx.ReadTimeout``.
135
+ set_image_read_timeout:
136
+ Read timeout (seconds) used *only* for ``set_image``. After the volume
137
+ is uploaded, the server decompresses and preprocesses the full image
138
+ before responding, which can take far longer than a prediction on a
139
+ large volume. ``set_image`` therefore gets its own generous read
140
+ timeout instead of the much tighter ``read_timeout`` used for
141
+ predictions. On expiry: ``httpx.ReadTimeout``.
135
142
  write_timeout:
136
143
  Seconds to finish uploading the request body. ``set_image`` uploads
137
144
  the full 4D volume so this is the longest-running upload. On expiry:
@@ -146,6 +153,7 @@ class nnInteractiveRemoteInferenceSession:
146
153
  api_key: Optional[str] = None,
147
154
  connect_timeout: float = 10.0,
148
155
  read_timeout: float = 60.0,
156
+ set_image_read_timeout: float = 600.0,
149
157
  write_timeout: float = 120.0,
150
158
  pool_timeout: float = 10.0,
151
159
  ):
@@ -166,6 +174,15 @@ class nnInteractiveRemoteInferenceSession:
166
174
  ),
167
175
  headers=headers,
168
176
  )
177
+ # Per-request timeout override for set_image: same connect/write/pool as
178
+ # the client default, but a much longer read budget for server-side
179
+ # decompression + preprocessing of the full volume.
180
+ self._set_image_timeout = httpx.Timeout(
181
+ connect=connect_timeout,
182
+ read=set_image_read_timeout,
183
+ write=write_timeout,
184
+ pool=pool_timeout,
185
+ )
169
186
  self._lease_token: Optional[str] = None
170
187
 
171
188
  # Claim a session on the server. The lease token is then attached to
@@ -242,12 +259,21 @@ class nnInteractiveRemoteInferenceSession:
242
259
  resp.raise_for_status()
243
260
  return resp
244
261
 
245
- def _post_binary(self, path: str, meta: dict, array_bytes: bytes) -> httpx.Response:
262
+ def _post_binary(
263
+ self,
264
+ path: str,
265
+ meta: dict,
266
+ array_bytes: bytes,
267
+ timeout: Union[httpx.Timeout, float, None] = None,
268
+ ) -> httpx.Response:
246
269
  headers = {
247
270
  META_HEADER: json.dumps(_to_jsonable(meta), separators=(",", ":")),
248
271
  "Content-Type": CONTENT_TYPE_OCTET_STREAM,
249
272
  }
250
- resp = self._http.post(path, content=array_bytes, headers=headers)
273
+ # httpx treats timeout=None as "no override" only when the arg is
274
+ # omitted; pass it through explicitly only when a caller supplied one.
275
+ kwargs = {} if timeout is None else {"timeout": timeout}
276
+ resp = self._http.post(path, content=array_bytes, headers=headers, **kwargs)
251
277
  _raise_for_lease_errors(resp)
252
278
  resp.raise_for_status()
253
279
  return resp
@@ -300,7 +326,12 @@ class nnInteractiveRemoteInferenceSession:
300
326
  def set_image(self, image: np.ndarray, image_properties: Optional[dict] = None) -> None:
301
327
  assert image.ndim == 4, f"expected a 4d image as input, got {image.ndim}d. Shape {image.shape}"
302
328
  meta = {"image_properties": image_properties or {}}
303
- resp = self._post_binary(PATH_SET_IMAGE, meta, pack_array(image, nthreads=_compression_threads()))
329
+ resp = self._post_binary(
330
+ PATH_SET_IMAGE,
331
+ meta,
332
+ pack_array(image, nthreads=_compression_threads()),
333
+ timeout=self._set_image_timeout,
334
+ )
304
335
  info = resp.json()
305
336
  self.original_image_shape = tuple(info["original_image_shape"])
306
337
 
@@ -31,6 +31,15 @@ Concurrency model:
31
31
  prediction runs at a time.
32
32
  - The acquisition order is always (session lock → gpu lock) so there is no
33
33
  deadlock potential.
34
+ - The endpoints that carry large payloads (``set_image`` and the mask
35
+ interactions) are ``async`` so they can ``await`` the upload, but their
36
+ CPU-bound work (blosc2 decompression, image preprocessing, prediction,
37
+ response compression) is dispatched to a worker thread via
38
+ ``run_in_threadpool``. This keeps the event loop free during a long
39
+ ``set_image``/predict so lightweight endpoints — ``/heartbeat``,
40
+ ``/healthz`` — and the background reaper stay responsive, and so two
41
+ clients can genuinely preprocess concurrently. Acquiring a session/gpu
42
+ lock therefore also happens off the loop, never stalling it.
34
43
  """
35
44
 
36
45
  from __future__ import annotations
@@ -50,6 +59,7 @@ import blosc2
50
59
  import numpy as np
51
60
  import torch
52
61
  from fastapi import Depends, FastAPI, HTTPException, Header, Request, Response, status
62
+ from starlette.concurrency import run_in_threadpool
53
63
 
54
64
  from nnInteractive.inference.inference_session import nnInteractiveInferenceSession
55
65
  from nnInteractive.inference.remote._protocol import (
@@ -151,6 +161,7 @@ class SessionRegistry:
151
161
  torch_n_threads: int,
152
162
  do_autozoom: bool,
153
163
  use_torch_compile: bool,
164
+ interactions_storage: str,
154
165
  verbose: bool,
155
166
  ) -> None:
156
167
  self._artifacts = artifacts
@@ -161,6 +172,7 @@ class SessionRegistry:
161
172
  self._torch_n_threads = torch_n_threads
162
173
  self._do_autozoom = do_autozoom
163
174
  self._use_torch_compile = use_torch_compile
175
+ self._interactions_storage = interactions_storage
164
176
  self._verbose = verbose
165
177
  self._entries: dict[str, SessionEntry] = {}
166
178
  self._mu = threading.Lock()
@@ -189,6 +201,7 @@ class SessionRegistry:
189
201
  verbose=self._verbose,
190
202
  torch_n_threads=self._torch_n_threads,
191
203
  do_autozoom=self._do_autozoom,
204
+ interactions_storage=self._interactions_storage,
192
205
  )
193
206
  session.initialize_from_loaded_artifacts(self._artifacts)
194
207
  entry = SessionEntry(session)
@@ -290,6 +303,7 @@ def make_app(
290
303
  torch_n_threads: int = 8,
291
304
  do_autozoom: bool = True,
292
305
  use_torch_compile: bool = False,
306
+ interactions_storage: str = "auto",
293
307
  verbose: bool = False,
294
308
  api_key: Optional[str] = None,
295
309
  sweep_interval_seconds: float = 15.0,
@@ -303,6 +317,7 @@ def make_app(
303
317
  torch_n_threads=torch_n_threads,
304
318
  do_autozoom=do_autozoom,
305
319
  use_torch_compile=use_torch_compile,
320
+ interactions_storage=interactions_storage,
306
321
  verbose=verbose,
307
322
  )
308
323
  gpu_lock = threading.Lock()
@@ -353,6 +368,7 @@ def make_app(
353
368
  verbose=False,
354
369
  torch_n_threads=torch_n_threads,
355
370
  do_autozoom=do_autozoom,
371
+ interactions_storage=interactions_storage,
356
372
  )
357
373
  _capability_session.initialize_from_loaded_artifacts(artifacts)
358
374
  _capability_snapshot = _build_capability_snapshot(_capability_session)
@@ -570,17 +586,24 @@ def make_app(
570
586
  async def set_image(request: Request, entry: SessionEntry = lease) -> dict:
571
587
  meta = _parse_meta_header(request.headers.get(META_HEADER))
572
588
  body = await request.body()
573
- image = unpack_array(body)
574
589
  image_properties = meta.get("image_properties") or {}
575
590
 
576
- def _do(session):
577
- session.set_image(image, image_properties)
578
- # set_image preprocesses in a background thread; force completion so
579
- # subsequent calls can safely use original_image_shape.
580
- session._finish_preprocessing_and_initialize_interactions()
581
- return {"original_image_shape": list(session.original_image_shape)}
591
+ # Decompression + full-volume preprocessing are CPU-bound and can run
592
+ # for many seconds on a large image. Run them in a worker thread so the
593
+ # event loop keeps servicing heartbeats/healthz and the reaper.
594
+ def _work():
595
+ image = unpack_array(body)
582
596
 
583
- return _under_session_lock(entry, _do)
597
+ def _do(session):
598
+ session.set_image(image, image_properties)
599
+ # set_image preprocesses in a background thread; force completion
600
+ # so subsequent calls can safely use original_image_shape.
601
+ session._finish_preprocessing_and_initialize_interactions()
602
+ return {"original_image_shape": list(session.original_image_shape)}
603
+
604
+ return _under_session_lock(entry, _do)
605
+
606
+ return await run_in_threadpool(_work)
584
607
 
585
608
  @app.post(PATH_SET_TARGET_BUFFER, dependencies=[auth])
586
609
  def set_target_buffer(payload: dict, entry: SessionEntry = lease) -> dict:
@@ -652,41 +675,53 @@ def make_app(
652
675
  async def _handle_mask_interaction(request: Request, entry: SessionEntry, kind: str) -> Response:
653
676
  meta = _parse_meta_header(request.headers.get(META_HEADER))
654
677
  body = await request.body()
655
- mask = unpack_array(body)
656
678
  run_prediction = bool(meta.get("run_prediction", True))
657
679
  interaction_bbox = meta.get("interaction_bbox")
658
680
  if interaction_bbox is not None:
659
681
  interaction_bbox = [list(b) for b in interaction_bbox]
660
682
 
661
- def _do(session):
662
- method = session.add_scribble_interaction if kind == "scribble" else session.add_lasso_interaction
663
- method(
664
- mask,
665
- bool(meta["include_interaction"]),
666
- run_prediction=run_prediction,
667
- override_capability_checks=bool(meta.get("override_capability_checks", False)),
668
- interaction_bbox=interaction_bbox,
669
- )
670
- return _build_prediction_response(session, run_prediction)
683
+ # Decompression + prediction + response compression are CPU/GPU-bound;
684
+ # run them off the event loop (see set_image).
685
+ def _work():
686
+ mask = unpack_array(body)
671
687
 
672
- return _under_session_and_gpu_lock(entry, _do)
688
+ def _do(session):
689
+ method = session.add_scribble_interaction if kind == "scribble" else session.add_lasso_interaction
690
+ method(
691
+ mask,
692
+ bool(meta["include_interaction"]),
693
+ run_prediction=run_prediction,
694
+ override_capability_checks=bool(meta.get("override_capability_checks", False)),
695
+ interaction_bbox=interaction_bbox,
696
+ )
697
+ return _build_prediction_response(session, run_prediction)
698
+
699
+ return _under_session_and_gpu_lock(entry, _do)
700
+
701
+ return await run_in_threadpool(_work)
673
702
 
674
703
  @app.post(PATH_ADD_INITIAL_SEG, dependencies=[auth])
675
704
  async def add_initial_seg_interaction(request: Request, entry: SessionEntry = lease) -> Response:
676
705
  meta = _parse_meta_header(request.headers.get(META_HEADER))
677
706
  body = await request.body()
678
- initial_seg = unpack_array(body)
679
707
  run_prediction = bool(meta.get("run_prediction", False))
680
708
 
681
- def _do(session):
682
- session.add_initial_seg_interaction(
683
- initial_seg=initial_seg,
684
- run_prediction=run_prediction,
685
- override_capability_checks=bool(meta.get("override_capability_checks", False)),
686
- )
687
- return _build_prediction_response(session, run_prediction)
709
+ # Decompression + (optional) prediction are CPU/GPU-bound; run them off
710
+ # the event loop (see set_image).
711
+ def _work():
712
+ initial_seg = unpack_array(body)
688
713
 
689
- return _under_session_and_gpu_lock(entry, _do)
714
+ def _do(session):
715
+ session.add_initial_seg_interaction(
716
+ initial_seg=initial_seg,
717
+ run_prediction=run_prediction,
718
+ override_capability_checks=bool(meta.get("override_capability_checks", False)),
719
+ )
720
+ return _build_prediction_response(session, run_prediction)
721
+
722
+ return _under_session_and_gpu_lock(entry, _do)
723
+
724
+ return await run_in_threadpool(_work)
690
725
 
691
726
  return app
692
727
 
@@ -61,6 +61,15 @@ def _build_parser() -> argparse.ArgumentParser:
61
61
  "the long-lived process. Pass this flag to skip compilation (e.g. for faster startup or "
62
62
  "to work around a compile/backend issue).",
63
63
  )
64
+ p.add_argument(
65
+ "--interactions-storage",
66
+ choices=["blosc2", "tensor", "auto"],
67
+ default="auto",
68
+ help="Storage backend for the interaction tensor (default: auto). 'blosc2': compact "
69
+ "in-memory array (low RAM, pays (de)compression per read/write). 'tensor': dense pinned "
70
+ "CPU float16 torch.Tensor (more RAM, lower per-access overhead). 'auto': per image, use "
71
+ "'tensor' for images up to 512x512x1024 voxels and 'blosc2' for larger ones.",
72
+ )
64
73
  p.add_argument(
65
74
  "--no-autozoom",
66
75
  action="store_true",
@@ -222,6 +231,7 @@ def main(argv=None) -> int:
222
231
  torch_n_threads=args.torch_n_threads,
223
232
  do_autozoom=not args.no_autozoom,
224
233
  use_torch_compile=use_torch_compile,
234
+ interactions_storage=args.interactions_storage,
225
235
  verbose=args.verbose,
226
236
  api_key=api_key,
227
237
  )
@@ -124,6 +124,11 @@ class PointInteraction_stub:
124
124
  )
125
125
 
126
126
  target_slices = (channel_idx, *slices)
127
+ if isinstance(interaction_map, torch.Tensor):
128
+ # Dense torch backend: in-place maximum, no numpy round-trip.
129
+ view = interaction_map[target_slices]
130
+ torch.maximum(view, strel[structuring_slices].to(view.dtype), out=view)
131
+ return interaction_map
127
132
  current_sub = np.asarray(interaction_map[target_slices])
128
133
  strel_np = strel[structuring_slices].numpy().astype(current_sub.dtype)
129
134
  np.maximum(current_sub, strel_np, out=current_sub)
@@ -190,7 +190,7 @@ def paste_tensor(target, source, bbox, channel_idx=None):
190
190
  return target
191
191
 
192
192
 
193
- def crop_to_valid(img, bbox):
193
+ def crop_to_valid(img, bbox, out=None):
194
194
  """
195
195
  Crops the image to the part of the bounding box that lies within the image.
196
196
  Supports a 4D tensor of shape (C, X, Y, Z). The bounding box is specified as
@@ -200,6 +200,12 @@ def crop_to_valid(img, bbox):
200
200
  img: Input tensor (or blosc2 NDArray) of shape (C, X, Y, Z).
201
201
  bbox (list or tuple): Bounding box as a list of three intervals for spatial dims:
202
202
  [[x1, x2], [y1, y2], [z1, z2]].
203
+ out (np.ndarray, optional): A flat, pre-faulted float16 buffer to decompress a blosc2
204
+ crop into, avoiding a fresh allocation + page-fault on every call
205
+ ("Path B"). Only used when ``img`` is a blosc2 NDArray exposing
206
+ ``get_slice_numpy`` and the crop fits; otherwise ignored and a fresh
207
+ array is returned. When used, the returned crop is a VIEW into ``out``
208
+ and is only valid until the next call that reuses the same buffer.
203
209
 
204
210
  Returns:
205
211
  cropped: Cropped data of shape (C, cropped_x, cropped_y, cropped_z).
@@ -224,6 +230,26 @@ def crop_to_valid(img, bbox):
224
230
  pad_right = end - dim_size if end > dim_size else 0
225
231
  pad.append((pad_left, pad_right))
226
232
 
233
+ # Path B: decompress the blosc2 crop straight into a reused, pre-faulted buffer to avoid the
234
+ # per-call allocation + first-touch page-fault cost. get_slice_numpy is blosc2's internal
235
+ # decompress-into-buffer method (what __getitem__ calls under the hood); guarded since it is
236
+ # not a documented public API. Falls back to a fresh allocation if the crop would not fit.
237
+ if out is not None and not isinstance(img, torch.Tensor) and hasattr(img, "get_slice_numpy"):
238
+ valid_shape = [ce - cs for cs, ce in crop_indices]
239
+ output_shape = (img.shape[0], *valid_shape)
240
+ n = int(np.prod(output_shape, dtype=np.int64))
241
+ if n <= out.size:
242
+ view = out[:n].reshape(output_shape)
243
+ start = (0, *[cs for cs, _ in crop_indices])
244
+ stop = (img.shape[0], *[ce for _, ce in crop_indices])
245
+ img.get_slice_numpy(view, (start, stop))
246
+ return view, pad
247
+ print(
248
+ f"WARNING: interaction crop of {n} elements (shape {output_shape}) exceeds the reusable "
249
+ f"decompression buffer of {out.size} elements; this should never happen. Falling back to "
250
+ "a fresh allocation."
251
+ )
252
+
227
253
  # Crop the image on spatial dimensions, leaving the channel dimension intact.
228
254
  cropped = img[
229
255
  :,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nnInteractive
3
- Version: 2.3.1
3
+ Version: 2.3.3
4
4
  Summary: Inference code for nnInteractive
5
5
  Author: Helmholtz Imaging Applied Computer Vision Lab
6
6
  Author-email: Fabian Isensee <f.isensee@dkfz-heidelberg.de>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "nnInteractive"
3
- version = "2.3.1"
3
+ version = "2.3.3"
4
4
  requires-python = ">=3.10"
5
5
  description = "Inference code for nnInteractive"
6
6
  readme = "readme.md"
File without changes
File without changes
File without changes
File without changes