nnInteractive 2.3.2__tar.gz → 2.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {nninteractive-2.3.2 → nninteractive-2.3.3}/PKG-INFO +1 -1
  2. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/inference/inference_session.py +142 -36
  3. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/inference/server/app.py +6 -0
  4. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/inference/server/main.py +10 -0
  5. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/interaction/point.py +5 -0
  6. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/utils/crop.py +27 -1
  7. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive.egg-info/PKG-INFO +1 -1
  8. {nninteractive-2.3.2 → nninteractive-2.3.3}/pyproject.toml +1 -1
  9. {nninteractive-2.3.2 → nninteractive-2.3.3}/LICENSE +0 -0
  10. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/__init__.py +0 -0
  11. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/inference/__init__.py +0 -0
  12. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/inference/cvpr2025_challenge_baseline/__init__.py +0 -0
  13. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/inference/cvpr2025_challenge_baseline/predict.py +0 -0
  14. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/inference/remote/__init__.py +0 -0
  15. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/inference/remote/_protocol.py +0 -0
  16. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/inference/remote/remote_session.py +0 -0
  17. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/inference/remote/serialization.py +0 -0
  18. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/inference/server/__init__.py +0 -0
  19. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/interaction/__init__.py +0 -0
  20. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/setup.py +0 -0
  21. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/metadata.py +0 -0
  22. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/reader.py +0 -0
  23. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/run.py +0 -0
  24. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/__init__.py +0 -0
  25. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/__init__.py +0 -0
  26. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/automatic_mask_generator.py +0 -0
  27. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/benchmark.py +0 -0
  28. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/build_sam.py +0 -0
  29. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/__init__.py +0 -0
  30. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/__init__.py +0 -0
  31. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/hieradet.py +0 -0
  32. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/image_encoder.py +0 -0
  33. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/utils.py +0 -0
  34. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/memory_attention.py +0 -0
  35. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/memory_encoder.py +0 -0
  36. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/position_encoding.py +0 -0
  37. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/__init__.py +0 -0
  38. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/mask_decoder.py +0 -0
  39. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/prompt_encoder.py +0 -0
  40. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/transformer.py +0 -0
  41. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam2_base.py +0 -0
  42. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam2_utils.py +0 -0
  43. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/sam2_image_predictor.py +0 -0
  44. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/sam2_video_predictor.py +0 -0
  45. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/sam2_video_predictor_legacy.py +0 -0
  46. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/__init__.py +0 -0
  47. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/amg.py +0 -0
  48. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/misc.py +0 -0
  49. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/transforms.py +0 -0
  50. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/setup.py +0 -0
  51. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/__init__.py +0 -0
  52. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/__init__.py +0 -0
  53. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/sam2_datasets.py +0 -0
  54. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/transforms.py +0 -0
  55. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/utils.py +0 -0
  56. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/vos_dataset.py +0 -0
  57. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/vos_raw_dataset.py +0 -0
  58. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/vos_sampler.py +0 -0
  59. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/vos_segment_loader.py +0 -0
  60. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/loss_fns.py +0 -0
  61. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/model/__init__.py +0 -0
  62. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/model/sam2.py +0 -0
  63. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/optimizer.py +0 -0
  64. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/scripts/sav_frame_extraction_submitit.py +0 -0
  65. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/train.py +0 -0
  66. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/trainer.py +0 -0
  67. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/__init__.py +0 -0
  68. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/checkpoint_utils.py +0 -0
  69. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/data_utils.py +0 -0
  70. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/distributed.py +0 -0
  71. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/logger.py +0 -0
  72. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/train_utils.py +0 -0
  73. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/supervoxel.py +0 -0
  74. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/trainer/__init__.py +0 -0
  75. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/trainer/nnInteractiveTrainer.py +0 -0
  76. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/utils/__init__.py +0 -0
  77. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/utils/bboxes.py +0 -0
  78. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/utils/checkpoint_cleansing.py +0 -0
  79. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/utils/erosion_dilation.py +0 -0
  80. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/utils/inference_helpers.py +0 -0
  81. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/utils/os_shennanigans.py +0 -0
  82. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive/utils/rounding.py +0 -0
  83. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive.egg-info/SOURCES.txt +0 -0
  84. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive.egg-info/dependency_links.txt +0 -0
  85. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive.egg-info/entry_points.txt +0 -0
  86. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive.egg-info/requires.txt +0 -0
  87. {nninteractive-2.3.2 → nninteractive-2.3.3}/nnInteractive.egg-info/top_level.txt +0 -0
  88. {nninteractive-2.3.2 → nninteractive-2.3.3}/readme.md +0 -0
  89. {nninteractive-2.3.2 → nninteractive-2.3.3}/setup.cfg +0 -0
  90. {nninteractive-2.3.2 → nninteractive-2.3.3}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nnInteractive
3
- Version: 2.3.2
3
+ Version: 2.3.3
4
4
  Summary: Inference code for nnInteractive
5
5
  Author: Helmholtz Imaging Applied Computer Vision Lab
6
6
  Author-email: Fabian Isensee <f.isensee@dkfz-heidelberg.de>
@@ -31,12 +31,20 @@ from nnInteractive.utils.inference_helpers import (
31
31
  transform_coordinates_noresampling,
32
32
  version_to_tuple,
33
33
  )
34
+ from nnInteractive.utils.os_shennanigans import is_linux_kernel_6_11
34
35
  from nnInteractive.utils.rounding import round_to_nearest_odd
35
36
 
36
37
 
37
38
  class nnInteractiveInferenceSession:
38
39
  INFERENCE_SESSION_VERSION = nnInteractive.__version__
39
40
  REFINEMENT_CACHE_GPU_HEADROOM_BYTES = 4 * 1024**3
41
+ # Maximum adaptive zoom-out factor (see _predict). Also bounds the largest interaction crop,
42
+ # which sizes the reusable blosc2 decompression buffer.
43
+ MAX_AUTOZOOM_FACTOR = 4
44
+ # 'auto' interaction storage threshold: images with at most this many spatial voxels
45
+ # (512*512*1024) use the dense tensor backend; larger ones use blosc2 to bound RAM.
46
+ AUTO_TENSOR_MAX_VOXELS = 2**28
47
+ INTERACTIONS_STORAGE_OPTIONS = ("blosc2", "tensor", "auto")
40
48
  # Interactions implemented by this inference session.
41
49
  SUPPORTED_INTERACTION_KEYS = ("scribble", "lasso", "points", "bbox2d", "bbox3d")
42
50
 
@@ -47,6 +55,7 @@ class nnInteractiveInferenceSession:
47
55
  verbose: bool = False,
48
56
  torch_n_threads: int = 8,
49
57
  do_autozoom: bool = True,
58
+ interactions_storage: str = "auto",
50
59
  ):
51
60
  """
52
61
  Only intended to work with nnInteractiveTrainerV2 and its derivatives
@@ -57,7 +66,22 @@ class nnInteractiveInferenceSession:
57
66
  This is recommended for the persistent inference server, where the
58
67
  process is long-lived so the one-time compile cost is paid only once and
59
68
  amortized across the whole session lifetime.
69
+
70
+ ``interactions_storage``: storage backend for the interaction tensor, one of
71
+ ``"blosc2"``, ``"tensor"`` or ``"auto"`` (default).
72
+ ``"blosc2"`` keeps it as a compact blosc2 in-memory NDArray (low RAM, pays
73
+ (de)compression on every read/write). ``"tensor"`` stores it as a dense CPU
74
+ float16 ``torch.Tensor`` (more RAM, far lower per-access overhead; pinned memory
75
+ by default, skipped when ``device`` is not CUDA or on Linux kernel 6.11 where
76
+ pinning is buggy). ``"auto"`` decides per image at initialization from the
77
+ interaction tensor's voxel count: at most ``AUTO_TENSOR_MAX_VOXELS`` (512*512*1024)
78
+ spatial voxels uses ``"tensor"``, larger uses ``"blosc2"``.
60
79
  """
80
+ if interactions_storage not in self.INTERACTIONS_STORAGE_OPTIONS:
81
+ raise ValueError(
82
+ f"interactions_storage must be one of {self.INTERACTIONS_STORAGE_OPTIONS}, "
83
+ f"got {interactions_storage!r}."
84
+ )
61
85
  print("session initialized")
62
86
 
63
87
  self.network = None
@@ -69,6 +93,9 @@ class nnInteractiveInferenceSession:
69
93
  self._interactions_shape = None
70
94
  self.device = device
71
95
  self.use_torch_compile = use_torch_compile
96
+ self.interactions_storage = interactions_storage
97
+ # Concrete backend ("blosc2"/"tensor") resolved per image in _initialize_interactions.
98
+ self._interactions_storage_resolved: Optional[str] = None
72
99
  self.interaction_decay = None
73
100
  self.current_interaction_intensity: float = 1.0
74
101
  self._fp16_max_value = float(torch.finfo(torch.float16).max)
@@ -86,7 +113,10 @@ class nnInteractiveInferenceSession:
86
113
  self.license: Optional[str] = None
87
114
 
88
115
  # image specific
89
- self.interactions = None # blosc2.NDArray once initialized
116
+ self.interactions = None # blosc2.NDArray or dense torch.Tensor (see interactions_storage)
117
+ # Reusable, pre-faulted float16 buffer to decompress blosc2 interaction crops into (Path B).
118
+ # Allocated per image in _initialize_interactions; None for the dense-tensor backend.
119
+ self._interactions_read_buffer = None
90
120
  self.preprocessed_image: torch.Tensor = None
91
121
  self.preprocessed_props = None
92
122
  self.target_buffer: Union[np.ndarray, torch.Tensor] = None
@@ -303,19 +333,38 @@ class nnInteractiveInferenceSession:
303
333
 
304
334
  def _interactions_inplace_maximum(self, channel_idx: int, int_slicer, new_values) -> None:
305
335
  """In-place element-wise maximum for a subregion of a channel."""
336
+ full_slicer = (channel_idx, *int_slicer)
337
+ if isinstance(self.interactions, torch.Tensor):
338
+ # Dense torch backend: operate in place without a numpy round-trip.
339
+ if not isinstance(new_values, torch.Tensor):
340
+ new_values = torch.as_tensor(new_values)
341
+ view = self.interactions[full_slicer]
342
+ torch.maximum(view, new_values.to(view.dtype), out=view)
343
+ return
306
344
  if isinstance(new_values, torch.Tensor):
307
345
  new_values = new_values.cpu().numpy().astype(np.float16)
308
- full_slicer = (channel_idx, *int_slicer)
309
346
  current_sub = np.asarray(self.interactions[full_slicer])
310
347
  np.maximum(current_sub, new_values, out=current_sub)
311
348
  self.interactions[full_slicer] = current_sub
312
349
 
313
350
  def _write_interactions_channel(self, channel_idx: int, value) -> None:
314
351
  """Write a full channel. Handles torch→numpy for blosc2."""
352
+ if isinstance(self.interactions, torch.Tensor):
353
+ if not isinstance(value, torch.Tensor):
354
+ value = torch.as_tensor(value)
355
+ self.interactions[channel_idx] = value.to(self.interactions.dtype)
356
+ return
315
357
  if isinstance(value, torch.Tensor):
316
358
  value = value.cpu().numpy().astype(np.float16)
317
359
  self.interactions[channel_idx] = value
318
360
 
361
+ def _read_interactions_to_device(self, full_slicer, device) -> torch.Tensor:
362
+ """Read an interaction subregion as a torch.Tensor on ``device``, regardless of backend."""
363
+ sub = self.interactions[full_slicer]
364
+ if isinstance(sub, torch.Tensor):
365
+ return sub.to(device)
366
+ return torch.from_numpy(np.asarray(sub)).to(device)
367
+
319
368
  def _paste_prediction_to_target_buffer(self, prediction: torch.Tensor, bbox: List[List[int]]) -> None:
320
369
  target_bbox = self._interaction_bbox_to_target_bbox(bbox)
321
370
  if isinstance(self.target_buffer, torch.Tensor):
@@ -556,11 +605,30 @@ class nnInteractiveInferenceSession:
556
605
  self.original_image_shape = None
557
606
  self._last_paste_bbox = None
558
607
 
559
- def _initialize_interactions(self, image_torch: torch.Tensor):
560
- shape = (self.num_interaction_channels, *image_torch.shape[1:])
561
- if self.verbose:
562
- print("Initialize interactions with blosc2 in-memory compression")
563
- self.interactions = blosc2.zeros(
608
+ def _resolve_interactions_storage(self, spatial_shape) -> str:
609
+ """Resolve the configured storage to a concrete backend ("blosc2" or "tensor").
610
+
611
+ For "auto", pick "tensor" for images with at most AUTO_TENSOR_MAX_VOXELS spatial voxels
612
+ (lower per-access overhead) and "blosc2" for larger ones (to bound RAM).
613
+ """
614
+ if self.interactions_storage != "auto":
615
+ return self.interactions_storage
616
+ n_voxels = int(np.prod(spatial_shape, dtype=np.int64))
617
+ return "blosc2" if n_voxels > self.AUTO_TENSOR_MAX_VOXELS else "tensor"
618
+
619
+ def _new_interactions_array(self, shape, compression_nthreads: int):
620
+ """Allocate a zeroed interaction array using the resolved backend.
621
+
622
+ "tensor" selects a dense CPU float16 torch.Tensor (more RAM, lower per-access
623
+ overhead); "blosc2" uses a compact blosc2 in-memory NDArray.
624
+ """
625
+ if self._interactions_storage_resolved == "tensor":
626
+ # Pinning enables faster non-blocking host->device copies, but only helps for a
627
+ # CUDA target and is buggy on Linux kernel 6.11 (see utils/os_shennanigans).
628
+ pin = self.device.type == "cuda" and not is_linux_kernel_6_11()
629
+ tensor = torch.zeros(shape, dtype=torch.float16, device="cpu")
630
+ return tensor.pin_memory() if pin else tensor
631
+ return blosc2.zeros(
564
632
  shape,
565
633
  dtype=np.float16,
566
634
  chunks=(1, *[min(64, s) for s in shape[1:]]),
@@ -570,11 +638,49 @@ class nnInteractiveInferenceSession:
570
638
  "codec": blosc2.Codec.LZ4,
571
639
  "clevel": 5,
572
640
  "filters": [blosc2.Filter.NOFILTER],
573
- "nthreads": min(self.torch_n_threads, os.cpu_count()),
641
+ "nthreads": compression_nthreads,
574
642
  },
575
- dparams={"nthreads": 4},
643
+ # Decompression of this sparse interaction tensor is fastest single-threaded:
644
+ # blosc2's per-chunk thread sync costs more than it saves here, badly so on
645
+ # many-core/many-CCD servers (see benchmarks). Multithreading only hurts.
646
+ dparams={"nthreads": 1},
576
647
  )
648
+
649
+ def _initialize_interactions(self, image_torch: torch.Tensor):
650
+ shape = (self.num_interaction_channels, *image_torch.shape[1:])
651
+ self._interactions_storage_resolved = self._resolve_interactions_storage(shape[1:])
652
+ via_auto = self.interactions_storage == "auto"
653
+ if self.verbose or via_auto:
654
+ backend = "dense torch.Tensor" if self._interactions_storage_resolved == "tensor" else "blosc2 in-memory compression"
655
+ print(f"Initialize interactions with {backend}{' (auto)' if via_auto else ''}")
656
+ self.interactions = self._new_interactions_array(shape, min(self.torch_n_threads, os.cpu_count()))
577
657
  self._interactions_shape = shape
658
+ self._interactions_read_buffer = self._new_interactions_read_buffer(shape)
659
+
660
+ def _new_interactions_read_buffer(self, shape) -> Optional[np.ndarray]:
661
+ """Pre-faulted buffer to decompress blosc2 interaction crops into (Path B), or None.
662
+
663
+ Sized to the largest possible crop: the patch size scaled by the maximum autozoom factor,
664
+ capped to the image size. Only allocated for the blosc2 backend that exposes the
665
+ decompress-into-buffer method; the dense-tensor backend returns views and needs no buffer.
666
+ """
667
+ if self._interactions_storage_resolved != "blosc2":
668
+ return None
669
+ if not hasattr(self.interactions, "get_slice_numpy"):
670
+ print(
671
+ "WARNING: this blosc2 build has no NDArray.get_slice_numpy; cannot reuse a "
672
+ "decompression buffer for interaction crops. Falling back to a fresh allocation on "
673
+ "every read (slower). Consider updating blosc2."
674
+ )
675
+ return None
676
+ max_valid = [
677
+ min(round(p * self.MAX_AUTOZOOM_FACTOR), s)
678
+ for p, s in zip(self.configuration_manager.patch_size, shape[1:])
679
+ ]
680
+ n = self.num_interaction_channels * int(np.prod(max_valid, dtype=np.int64))
681
+ buffer = np.empty(n, dtype=np.float16)
682
+ buffer[:] = 0 # first-touch the pages once, up front
683
+ return buffer
578
684
 
579
685
  @torch.inference_mode()
580
686
  def _background_set_image(self, image: np.ndarray, image_properties: dict):
@@ -635,20 +741,7 @@ class nnInteractiveInferenceSession:
635
741
  """
636
742
  if self.interactions is not None:
637
743
  del self.interactions
638
- self.interactions = blosc2.zeros(
639
- self._interactions_shape,
640
- dtype=np.float16,
641
- chunks=(1, *[min(64, s) for s in self._interactions_shape[1:]]),
642
- blocks=(1, *[min(32, s) for s in self._interactions_shape[1:]]),
643
- # Interactions compress better with NOFILTER, which is also faster than SHUFFLE.
644
- cparams={
645
- "codec": blosc2.Codec.LZ4,
646
- "clevel": 5,
647
- "filters": [blosc2.Filter.NOFILTER],
648
- "nthreads": os.cpu_count(),
649
- },
650
- dparams={"nthreads": 4},
651
- )
744
+ self.interactions = self._new_interactions_array(self._interactions_shape, os.cpu_count())
652
745
  self.current_interaction_intensity = 1.0
653
746
 
654
747
  if self.target_buffer is not None:
@@ -980,7 +1073,9 @@ class nnInteractiveInferenceSession:
980
1073
  Returns:
981
1074
 
982
1075
  """
983
- print("Current cratio", self.interactions.cratio)
1076
+ if not isinstance(self.interactions, torch.Tensor):
1077
+ # cratio is a blosc2-only diagnostic; the dense tensor backend has no compression.
1078
+ print("Current cratio", self.interactions.cratio)
984
1079
 
985
1080
  assert self.pad_mode_data == "constant", "pad modes other than constant are not implemented here"
986
1081
  assert len(self.new_interaction_centers) == len(self.new_interaction_zoom_out_factors)
@@ -996,7 +1091,7 @@ class nnInteractiveInferenceSession:
996
1091
  "!!!WE NO LONGER RUN ONE PREDICTION PER CENTER AND ONLY USE THE LAST ADDED INTERACTION AS CENTER!!!"
997
1092
  )
998
1093
  prediction_center, zoom_out_factor = self.new_interaction_centers[-1], self.new_interaction_zoom_out_factors[-1]
999
- zoom_out_factor = min(4, zoom_out_factor)
1094
+ zoom_out_factor = min(self.MAX_AUTOZOOM_FACTOR, zoom_out_factor)
1000
1095
 
1001
1096
  start_predict = time()
1002
1097
  with torch.autocast(self.device.type, enabled=True) if self.device.type == "cuda" else dummy_context():
@@ -1005,7 +1100,9 @@ class nnInteractiveInferenceSession:
1005
1100
  input_for_predict, scaled_patch_size, scaled_bbox, previous_prediction = self._build_network_input(
1006
1101
  prediction_center, zoom_out_factor
1007
1102
  )
1008
- pred = self.network(input_for_predict[None])[0].argmax(0).detach()
1103
+ # .contiguous() is required for torch.compile: the input may be a non-contiguous
1104
+ # view (e.g. from the dense-tensor backend), and the compiled graph assumes contiguity.
1105
+ pred = self.network(input_for_predict[None].contiguous())[0].argmax(0).detach()
1009
1106
  del input_for_predict
1010
1107
 
1011
1108
  # detect changes at border. If there are, we enter autozoom
@@ -1022,17 +1119,19 @@ class nnInteractiveInferenceSession:
1022
1119
  start_zoomout = time()
1023
1120
  while has_change and self.do_autozoom:
1024
1121
  print(f"AutoZoom zoom out factor {zoom_out_factor}")
1025
- # we allow a max zoom out of 4
1026
- if zoom_out_factor >= 4:
1122
+ # we allow a max zoom out of MAX_AUTOZOOM_FACTOR
1123
+ if zoom_out_factor >= self.MAX_AUTOZOOM_FACTOR:
1027
1124
  break
1028
1125
  else:
1029
1126
  zoom_out_factor *= zoom_out_growth_factor
1030
- zoom_out_factor = min(4, zoom_out_factor)
1127
+ zoom_out_factor = min(self.MAX_AUTOZOOM_FACTOR, zoom_out_factor)
1031
1128
 
1032
1129
  input_for_predict, scaled_patch_size, scaled_bbox, previous_prediction_resized = (
1033
1130
  self._build_network_input(prediction_center, zoom_out_factor)
1034
1131
  )
1035
- pred = self.network(input_for_predict[None])[0].argmax(0).detach()
1132
+ # .contiguous() is required for torch.compile: the input may be a non-contiguous
1133
+ # view (e.g. from the dense-tensor backend), and the compiled graph assumes contiguity.
1134
+ pred = self.network(input_for_predict[None].contiguous())[0].argmax(0).detach()
1036
1135
  del input_for_predict
1037
1136
  empty_cache(self.device)
1038
1137
 
@@ -1077,7 +1176,9 @@ class nnInteractiveInferenceSession:
1077
1176
 
1078
1177
  # cropping happens on CPU, padding happens on GPU (later)
1079
1178
  crop_img, pad_image = crop_to_valid(self.preprocessed_image, scaled_bbox)
1080
- interactions_tensor, pad_interaction = crop_to_valid(self.interactions, scaled_bbox)
1179
+ interactions_tensor, pad_interaction = crop_to_valid(
1180
+ self.interactions, scaled_bbox, out=self._interactions_read_buffer
1181
+ )
1081
1182
  # For blosc2, crop_to_valid returns a numpy array; convert to torch (still on CPU).
1082
1183
  if not isinstance(interactions_tensor, torch.Tensor):
1083
1184
  interactions_tensor = torch.from_numpy(np.asarray(interactions_tensor))
@@ -1174,7 +1275,8 @@ class nnInteractiveInferenceSession:
1174
1275
  dim=0,
1175
1276
  )
1176
1277
 
1177
- pred = self.network(patch[None])[0].argmax(0).detach()
1278
+ # .contiguous(): see _predict — required for torch.compile with possibly non-contiguous input.
1279
+ pred = self.network(patch[None].contiguous())[0].argmax(0).detach()
1178
1280
  paste_tensor(
1179
1281
  cache_interactions,
1180
1282
  pred.to(cache_interactions.device, dtype=cache_interactions.dtype),
@@ -1261,7 +1363,7 @@ class nnInteractiveInferenceSession:
1261
1363
  pred_slicer = tuple(slice(lb, ub) for lb, ub in pred_bbox)
1262
1364
  local_slicer = tuple(slice(lb, ub) for lb, ub in local_seen_bbox)
1263
1365
 
1264
- prev_sub = torch.from_numpy(np.asarray(self.interactions[(prev_seg_ch, *seen_slicer)])).to(self.device)
1366
+ prev_sub = self._read_interactions_to_device((prev_seg_ch, *seen_slicer), self.device)
1265
1367
 
1266
1368
  diff_local[local_slicer] = (pred[pred_slicer] != prev_sub).to(diff_local.dtype)
1267
1369
  del prev_sub
@@ -1280,7 +1382,7 @@ class nnInteractiveInferenceSession:
1280
1382
  def _mark_prev_seg_in_local_diff(self, diff_local: torch.Tensor, planning_bbox: List[List[int]]) -> None:
1281
1383
  prev_seg_ch = self._get_prev_seg_channel()
1282
1384
  planning_slicer = tuple(slice(lb, ub) for lb, ub in planning_bbox)
1283
- prev_sub = torch.from_numpy(np.asarray(self.interactions[(prev_seg_ch, *planning_slicer)])).to(self.device)
1385
+ prev_sub = self._read_interactions_to_device((prev_seg_ch, *planning_slicer), self.device)
1284
1386
  diff_local[prev_sub > 0.5] = 1
1285
1387
  del prev_sub
1286
1388
 
@@ -1548,8 +1650,12 @@ class nnInteractiveInferenceSession:
1548
1650
  self.network = self.network.to(self.device)
1549
1651
 
1550
1652
  def __del__(self):
1551
- self._finish_preprocessing_and_initialize_interactions()
1552
- self.executor.shutdown()
1653
+ # Be robust to a partially-constructed instance (e.g. __init__ raised on bad arguments):
1654
+ # these attributes may not exist yet.
1655
+ if hasattr(self, "preprocess_future"):
1656
+ self._finish_preprocessing_and_initialize_interactions()
1657
+ if hasattr(self, "executor"):
1658
+ self.executor.shutdown()
1553
1659
 
1554
1660
 
1555
1661
  if __name__ == "__main__":
@@ -161,6 +161,7 @@ class SessionRegistry:
161
161
  torch_n_threads: int,
162
162
  do_autozoom: bool,
163
163
  use_torch_compile: bool,
164
+ interactions_storage: str,
164
165
  verbose: bool,
165
166
  ) -> None:
166
167
  self._artifacts = artifacts
@@ -171,6 +172,7 @@ class SessionRegistry:
171
172
  self._torch_n_threads = torch_n_threads
172
173
  self._do_autozoom = do_autozoom
173
174
  self._use_torch_compile = use_torch_compile
175
+ self._interactions_storage = interactions_storage
174
176
  self._verbose = verbose
175
177
  self._entries: dict[str, SessionEntry] = {}
176
178
  self._mu = threading.Lock()
@@ -199,6 +201,7 @@ class SessionRegistry:
199
201
  verbose=self._verbose,
200
202
  torch_n_threads=self._torch_n_threads,
201
203
  do_autozoom=self._do_autozoom,
204
+ interactions_storage=self._interactions_storage,
202
205
  )
203
206
  session.initialize_from_loaded_artifacts(self._artifacts)
204
207
  entry = SessionEntry(session)
@@ -300,6 +303,7 @@ def make_app(
300
303
  torch_n_threads: int = 8,
301
304
  do_autozoom: bool = True,
302
305
  use_torch_compile: bool = False,
306
+ interactions_storage: str = "auto",
303
307
  verbose: bool = False,
304
308
  api_key: Optional[str] = None,
305
309
  sweep_interval_seconds: float = 15.0,
@@ -313,6 +317,7 @@ def make_app(
313
317
  torch_n_threads=torch_n_threads,
314
318
  do_autozoom=do_autozoom,
315
319
  use_torch_compile=use_torch_compile,
320
+ interactions_storage=interactions_storage,
316
321
  verbose=verbose,
317
322
  )
318
323
  gpu_lock = threading.Lock()
@@ -363,6 +368,7 @@ def make_app(
363
368
  verbose=False,
364
369
  torch_n_threads=torch_n_threads,
365
370
  do_autozoom=do_autozoom,
371
+ interactions_storage=interactions_storage,
366
372
  )
367
373
  _capability_session.initialize_from_loaded_artifacts(artifacts)
368
374
  _capability_snapshot = _build_capability_snapshot(_capability_session)
@@ -61,6 +61,15 @@ def _build_parser() -> argparse.ArgumentParser:
61
61
  "the long-lived process. Pass this flag to skip compilation (e.g. for faster startup or "
62
62
  "to work around a compile/backend issue).",
63
63
  )
64
+ p.add_argument(
65
+ "--interactions-storage",
66
+ choices=["blosc2", "tensor", "auto"],
67
+ default="auto",
68
+ help="Storage backend for the interaction tensor (default: auto). 'blosc2': compact "
69
+ "in-memory array (low RAM, pays (de)compression per read/write). 'tensor': dense pinned "
70
+ "CPU float16 torch.Tensor (more RAM, lower per-access overhead). 'auto': per image, use "
71
+ "'tensor' for images up to 512x512x1024 voxels and 'blosc2' for larger ones.",
72
+ )
64
73
  p.add_argument(
65
74
  "--no-autozoom",
66
75
  action="store_true",
@@ -222,6 +231,7 @@ def main(argv=None) -> int:
222
231
  torch_n_threads=args.torch_n_threads,
223
232
  do_autozoom=not args.no_autozoom,
224
233
  use_torch_compile=use_torch_compile,
234
+ interactions_storage=args.interactions_storage,
225
235
  verbose=args.verbose,
226
236
  api_key=api_key,
227
237
  )
@@ -124,6 +124,11 @@ class PointInteraction_stub:
124
124
  )
125
125
 
126
126
  target_slices = (channel_idx, *slices)
127
+ if isinstance(interaction_map, torch.Tensor):
128
+ # Dense torch backend: in-place maximum, no numpy round-trip.
129
+ view = interaction_map[target_slices]
130
+ torch.maximum(view, strel[structuring_slices].to(view.dtype), out=view)
131
+ return interaction_map
127
132
  current_sub = np.asarray(interaction_map[target_slices])
128
133
  strel_np = strel[structuring_slices].numpy().astype(current_sub.dtype)
129
134
  np.maximum(current_sub, strel_np, out=current_sub)
@@ -190,7 +190,7 @@ def paste_tensor(target, source, bbox, channel_idx=None):
190
190
  return target
191
191
 
192
192
 
193
- def crop_to_valid(img, bbox):
193
+ def crop_to_valid(img, bbox, out=None):
194
194
  """
195
195
  Crops the image to the part of the bounding box that lies within the image.
196
196
  Supports a 4D tensor of shape (C, X, Y, Z). The bounding box is specified as
@@ -200,6 +200,12 @@ def crop_to_valid(img, bbox):
200
200
  img: Input tensor (or blosc2 NDArray) of shape (C, X, Y, Z).
201
201
  bbox (list or tuple): Bounding box as a list of three intervals for spatial dims:
202
202
  [[x1, x2], [y1, y2], [z1, z2]].
203
+ out (np.ndarray, optional): A flat, pre-faulted float16 buffer to decompress a blosc2
204
+ crop into, avoiding a fresh allocation + page-fault on every call
205
+ ("Path B"). Only used when ``img`` is a blosc2 NDArray exposing
206
+ ``get_slice_numpy`` and the crop fits; otherwise ignored and a fresh
207
+ array is returned. When used, the returned crop is a VIEW into ``out``
208
+ and is only valid until the next call that reuses the same buffer.
203
209
 
204
210
  Returns:
205
211
  cropped: Cropped data of shape (C, cropped_x, cropped_y, cropped_z).
@@ -224,6 +230,26 @@ def crop_to_valid(img, bbox):
224
230
  pad_right = end - dim_size if end > dim_size else 0
225
231
  pad.append((pad_left, pad_right))
226
232
 
233
+ # Path B: decompress the blosc2 crop straight into a reused, pre-faulted buffer to avoid the
234
+ # per-call allocation + first-touch page-fault cost. get_slice_numpy is blosc2's internal
235
+ # decompress-into-buffer method (what __getitem__ calls under the hood); guarded since it is
236
+ # not a documented public API. Falls back to a fresh allocation if the crop would not fit.
237
+ if out is not None and not isinstance(img, torch.Tensor) and hasattr(img, "get_slice_numpy"):
238
+ valid_shape = [ce - cs for cs, ce in crop_indices]
239
+ output_shape = (img.shape[0], *valid_shape)
240
+ n = int(np.prod(output_shape, dtype=np.int64))
241
+ if n <= out.size:
242
+ view = out[:n].reshape(output_shape)
243
+ start = (0, *[cs for cs, _ in crop_indices])
244
+ stop = (img.shape[0], *[ce for _, ce in crop_indices])
245
+ img.get_slice_numpy(view, (start, stop))
246
+ return view, pad
247
+ print(
248
+ f"WARNING: interaction crop of {n} elements (shape {output_shape}) exceeds the reusable "
249
+ f"decompression buffer of {out.size} elements; this should never happen. Falling back to "
250
+ "a fresh allocation."
251
+ )
252
+
227
253
  # Crop the image on spatial dimensions, leaving the channel dimension intact.
228
254
  cropped = img[
229
255
  :,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nnInteractive
3
- Version: 2.3.2
3
+ Version: 2.3.3
4
4
  Summary: Inference code for nnInteractive
5
5
  Author: Helmholtz Imaging Applied Computer Vision Lab
6
6
  Author-email: Fabian Isensee <f.isensee@dkfz-heidelberg.de>
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "nnInteractive"
3
- version = "2.3.2"
3
+ version = "2.3.3"
4
4
  requires-python = ">=3.10"
5
5
  description = "Inference code for nnInteractive"
6
6
  readme = "readme.md"
File without changes
File without changes
File without changes
File without changes