nnInteractive 2.3.1__tar.gz → 2.3.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nninteractive-2.3.1 → nninteractive-2.3.3}/PKG-INFO +1 -1
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/inference_session.py +142 -36
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/remote/remote_session.py +34 -3
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/server/app.py +64 -29
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/server/main.py +10 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/interaction/point.py +5 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/crop.py +27 -1
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive.egg-info/PKG-INFO +1 -1
- {nninteractive-2.3.1 → nninteractive-2.3.3}/pyproject.toml +1 -1
- {nninteractive-2.3.1 → nninteractive-2.3.3}/LICENSE +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/cvpr2025_challenge_baseline/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/cvpr2025_challenge_baseline/predict.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/remote/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/remote/_protocol.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/remote/serialization.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/server/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/interaction/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/setup.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/metadata.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/reader.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/run.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/automatic_mask_generator.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/benchmark.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/build_sam.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/hieradet.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/image_encoder.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/backbones/utils.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/memory_attention.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/memory_encoder.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/position_encoding.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/mask_decoder.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/prompt_encoder.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam/transformer.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam2_base.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/modeling/sam2_utils.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/sam2_image_predictor.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/sam2_video_predictor.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/sam2_video_predictor_legacy.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/amg.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/misc.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/transforms.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/setup.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/sam2_datasets.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/transforms.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/utils.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/vos_dataset.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/vos_raw_dataset.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/vos_sampler.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/dataset/vos_segment_loader.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/loss_fns.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/model/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/model/sam2.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/optimizer.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/scripts/sav_frame_extraction_submitit.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/train.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/trainer.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/checkpoint_utils.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/data_utils.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/distributed.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/logger.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/utils/train_utils.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/supervoxel.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/trainer/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/trainer/nnInteractiveTrainer.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/__init__.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/bboxes.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/checkpoint_cleansing.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/erosion_dilation.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/inference_helpers.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/os_shennanigans.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/utils/rounding.py +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive.egg-info/SOURCES.txt +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive.egg-info/dependency_links.txt +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive.egg-info/entry_points.txt +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive.egg-info/requires.txt +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive.egg-info/top_level.txt +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/readme.md +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/setup.cfg +0 -0
- {nninteractive-2.3.1 → nninteractive-2.3.3}/setup.py +0 -0
|
@@ -31,12 +31,20 @@ from nnInteractive.utils.inference_helpers import (
|
|
|
31
31
|
transform_coordinates_noresampling,
|
|
32
32
|
version_to_tuple,
|
|
33
33
|
)
|
|
34
|
+
from nnInteractive.utils.os_shennanigans import is_linux_kernel_6_11
|
|
34
35
|
from nnInteractive.utils.rounding import round_to_nearest_odd
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
class nnInteractiveInferenceSession:
|
|
38
39
|
INFERENCE_SESSION_VERSION = nnInteractive.__version__
|
|
39
40
|
REFINEMENT_CACHE_GPU_HEADROOM_BYTES = 4 * 1024**3
|
|
41
|
+
# Maximum adaptive zoom-out factor (see _predict). Also bounds the largest interaction crop,
|
|
42
|
+
# which sizes the reusable blosc2 decompression buffer.
|
|
43
|
+
MAX_AUTOZOOM_FACTOR = 4
|
|
44
|
+
# 'auto' interaction storage threshold: images with at most this many spatial voxels
|
|
45
|
+
# (512*512*1024) use the dense tensor backend; larger ones use blosc2 to bound RAM.
|
|
46
|
+
AUTO_TENSOR_MAX_VOXELS = 2**28
|
|
47
|
+
INTERACTIONS_STORAGE_OPTIONS = ("blosc2", "tensor", "auto")
|
|
40
48
|
# Interactions implemented by this inference session.
|
|
41
49
|
SUPPORTED_INTERACTION_KEYS = ("scribble", "lasso", "points", "bbox2d", "bbox3d")
|
|
42
50
|
|
|
@@ -47,6 +55,7 @@ class nnInteractiveInferenceSession:
|
|
|
47
55
|
verbose: bool = False,
|
|
48
56
|
torch_n_threads: int = 8,
|
|
49
57
|
do_autozoom: bool = True,
|
|
58
|
+
interactions_storage: str = "auto",
|
|
50
59
|
):
|
|
51
60
|
"""
|
|
52
61
|
Only intended to work with nnInteractiveTrainerV2 and its derivatives
|
|
@@ -57,7 +66,22 @@ class nnInteractiveInferenceSession:
|
|
|
57
66
|
This is recommended for the persistent inference server, where the
|
|
58
67
|
process is long-lived so the one-time compile cost is paid only once and
|
|
59
68
|
amortized across the whole session lifetime.
|
|
69
|
+
|
|
70
|
+
``interactions_storage``: storage backend for the interaction tensor, one of
|
|
71
|
+
``"blosc2"``, ``"tensor"`` or ``"auto"`` (default).
|
|
72
|
+
``"blosc2"`` keeps it as a compact blosc2 in-memory NDArray (low RAM, pays
|
|
73
|
+
(de)compression on every read/write). ``"tensor"`` stores it as a dense CPU
|
|
74
|
+
float16 ``torch.Tensor`` (more RAM, far lower per-access overhead; pinned memory
|
|
75
|
+
by default, skipped when ``device`` is not CUDA or on Linux kernel 6.11 where
|
|
76
|
+
pinning is buggy). ``"auto"`` decides per image at initialization from the
|
|
77
|
+
interaction tensor's voxel count: at most ``AUTO_TENSOR_MAX_VOXELS`` (512*512*1024)
|
|
78
|
+
spatial voxels uses ``"tensor"``, larger uses ``"blosc2"``.
|
|
60
79
|
"""
|
|
80
|
+
if interactions_storage not in self.INTERACTIONS_STORAGE_OPTIONS:
|
|
81
|
+
raise ValueError(
|
|
82
|
+
f"interactions_storage must be one of {self.INTERACTIONS_STORAGE_OPTIONS}, "
|
|
83
|
+
f"got {interactions_storage!r}."
|
|
84
|
+
)
|
|
61
85
|
print("session initialized")
|
|
62
86
|
|
|
63
87
|
self.network = None
|
|
@@ -69,6 +93,9 @@ class nnInteractiveInferenceSession:
|
|
|
69
93
|
self._interactions_shape = None
|
|
70
94
|
self.device = device
|
|
71
95
|
self.use_torch_compile = use_torch_compile
|
|
96
|
+
self.interactions_storage = interactions_storage
|
|
97
|
+
# Concrete backend ("blosc2"/"tensor") resolved per image in _initialize_interactions.
|
|
98
|
+
self._interactions_storage_resolved: Optional[str] = None
|
|
72
99
|
self.interaction_decay = None
|
|
73
100
|
self.current_interaction_intensity: float = 1.0
|
|
74
101
|
self._fp16_max_value = float(torch.finfo(torch.float16).max)
|
|
@@ -86,7 +113,10 @@ class nnInteractiveInferenceSession:
|
|
|
86
113
|
self.license: Optional[str] = None
|
|
87
114
|
|
|
88
115
|
# image specific
|
|
89
|
-
self.interactions = None # blosc2.NDArray
|
|
116
|
+
self.interactions = None # blosc2.NDArray or dense torch.Tensor (see interactions_storage)
|
|
117
|
+
# Reusable, pre-faulted float16 buffer to decompress blosc2 interaction crops into (Path B).
|
|
118
|
+
# Allocated per image in _initialize_interactions; None for the dense-tensor backend.
|
|
119
|
+
self._interactions_read_buffer = None
|
|
90
120
|
self.preprocessed_image: torch.Tensor = None
|
|
91
121
|
self.preprocessed_props = None
|
|
92
122
|
self.target_buffer: Union[np.ndarray, torch.Tensor] = None
|
|
@@ -303,19 +333,38 @@ class nnInteractiveInferenceSession:
|
|
|
303
333
|
|
|
304
334
|
def _interactions_inplace_maximum(self, channel_idx: int, int_slicer, new_values) -> None:
|
|
305
335
|
"""In-place element-wise maximum for a subregion of a channel."""
|
|
336
|
+
full_slicer = (channel_idx, *int_slicer)
|
|
337
|
+
if isinstance(self.interactions, torch.Tensor):
|
|
338
|
+
# Dense torch backend: operate in place without a numpy round-trip.
|
|
339
|
+
if not isinstance(new_values, torch.Tensor):
|
|
340
|
+
new_values = torch.as_tensor(new_values)
|
|
341
|
+
view = self.interactions[full_slicer]
|
|
342
|
+
torch.maximum(view, new_values.to(view.dtype), out=view)
|
|
343
|
+
return
|
|
306
344
|
if isinstance(new_values, torch.Tensor):
|
|
307
345
|
new_values = new_values.cpu().numpy().astype(np.float16)
|
|
308
|
-
full_slicer = (channel_idx, *int_slicer)
|
|
309
346
|
current_sub = np.asarray(self.interactions[full_slicer])
|
|
310
347
|
np.maximum(current_sub, new_values, out=current_sub)
|
|
311
348
|
self.interactions[full_slicer] = current_sub
|
|
312
349
|
|
|
313
350
|
def _write_interactions_channel(self, channel_idx: int, value) -> None:
|
|
314
351
|
"""Write a full channel. Handles torch→numpy for blosc2."""
|
|
352
|
+
if isinstance(self.interactions, torch.Tensor):
|
|
353
|
+
if not isinstance(value, torch.Tensor):
|
|
354
|
+
value = torch.as_tensor(value)
|
|
355
|
+
self.interactions[channel_idx] = value.to(self.interactions.dtype)
|
|
356
|
+
return
|
|
315
357
|
if isinstance(value, torch.Tensor):
|
|
316
358
|
value = value.cpu().numpy().astype(np.float16)
|
|
317
359
|
self.interactions[channel_idx] = value
|
|
318
360
|
|
|
361
|
+
def _read_interactions_to_device(self, full_slicer, device) -> torch.Tensor:
|
|
362
|
+
"""Read an interaction subregion as a torch.Tensor on ``device``, regardless of backend."""
|
|
363
|
+
sub = self.interactions[full_slicer]
|
|
364
|
+
if isinstance(sub, torch.Tensor):
|
|
365
|
+
return sub.to(device)
|
|
366
|
+
return torch.from_numpy(np.asarray(sub)).to(device)
|
|
367
|
+
|
|
319
368
|
def _paste_prediction_to_target_buffer(self, prediction: torch.Tensor, bbox: List[List[int]]) -> None:
|
|
320
369
|
target_bbox = self._interaction_bbox_to_target_bbox(bbox)
|
|
321
370
|
if isinstance(self.target_buffer, torch.Tensor):
|
|
@@ -556,11 +605,30 @@ class nnInteractiveInferenceSession:
|
|
|
556
605
|
self.original_image_shape = None
|
|
557
606
|
self._last_paste_bbox = None
|
|
558
607
|
|
|
559
|
-
def
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
608
|
+
def _resolve_interactions_storage(self, spatial_shape) -> str:
|
|
609
|
+
"""Resolve the configured storage to a concrete backend ("blosc2" or "tensor").
|
|
610
|
+
|
|
611
|
+
For "auto", pick "tensor" for images with at most AUTO_TENSOR_MAX_VOXELS spatial voxels
|
|
612
|
+
(lower per-access overhead) and "blosc2" for larger ones (to bound RAM).
|
|
613
|
+
"""
|
|
614
|
+
if self.interactions_storage != "auto":
|
|
615
|
+
return self.interactions_storage
|
|
616
|
+
n_voxels = int(np.prod(spatial_shape, dtype=np.int64))
|
|
617
|
+
return "blosc2" if n_voxels > self.AUTO_TENSOR_MAX_VOXELS else "tensor"
|
|
618
|
+
|
|
619
|
+
def _new_interactions_array(self, shape, compression_nthreads: int):
|
|
620
|
+
"""Allocate a zeroed interaction array using the resolved backend.
|
|
621
|
+
|
|
622
|
+
"tensor" selects a dense CPU float16 torch.Tensor (more RAM, lower per-access
|
|
623
|
+
overhead); "blosc2" uses a compact blosc2 in-memory NDArray.
|
|
624
|
+
"""
|
|
625
|
+
if self._interactions_storage_resolved == "tensor":
|
|
626
|
+
# Pinning enables faster non-blocking host->device copies, but only helps for a
|
|
627
|
+
# CUDA target and is buggy on Linux kernel 6.11 (see utils/os_shennanigans).
|
|
628
|
+
pin = self.device.type == "cuda" and not is_linux_kernel_6_11()
|
|
629
|
+
tensor = torch.zeros(shape, dtype=torch.float16, device="cpu")
|
|
630
|
+
return tensor.pin_memory() if pin else tensor
|
|
631
|
+
return blosc2.zeros(
|
|
564
632
|
shape,
|
|
565
633
|
dtype=np.float16,
|
|
566
634
|
chunks=(1, *[min(64, s) for s in shape[1:]]),
|
|
@@ -570,11 +638,49 @@ class nnInteractiveInferenceSession:
|
|
|
570
638
|
"codec": blosc2.Codec.LZ4,
|
|
571
639
|
"clevel": 5,
|
|
572
640
|
"filters": [blosc2.Filter.NOFILTER],
|
|
573
|
-
"nthreads":
|
|
641
|
+
"nthreads": compression_nthreads,
|
|
574
642
|
},
|
|
575
|
-
|
|
643
|
+
# Decompression of this sparse interaction tensor is fastest single-threaded:
|
|
644
|
+
# blosc2's per-chunk thread sync costs more than it saves here, badly so on
|
|
645
|
+
# many-core/many-CCD servers (see benchmarks). Multithreading only hurts.
|
|
646
|
+
dparams={"nthreads": 1},
|
|
576
647
|
)
|
|
648
|
+
|
|
649
|
+
def _initialize_interactions(self, image_torch: torch.Tensor):
|
|
650
|
+
shape = (self.num_interaction_channels, *image_torch.shape[1:])
|
|
651
|
+
self._interactions_storage_resolved = self._resolve_interactions_storage(shape[1:])
|
|
652
|
+
via_auto = self.interactions_storage == "auto"
|
|
653
|
+
if self.verbose or via_auto:
|
|
654
|
+
backend = "dense torch.Tensor" if self._interactions_storage_resolved == "tensor" else "blosc2 in-memory compression"
|
|
655
|
+
print(f"Initialize interactions with {backend}{' (auto)' if via_auto else ''}")
|
|
656
|
+
self.interactions = self._new_interactions_array(shape, min(self.torch_n_threads, os.cpu_count()))
|
|
577
657
|
self._interactions_shape = shape
|
|
658
|
+
self._interactions_read_buffer = self._new_interactions_read_buffer(shape)
|
|
659
|
+
|
|
660
|
+
def _new_interactions_read_buffer(self, shape) -> Optional[np.ndarray]:
|
|
661
|
+
"""Pre-faulted buffer to decompress blosc2 interaction crops into (Path B), or None.
|
|
662
|
+
|
|
663
|
+
Sized to the largest possible crop: the patch size scaled by the maximum autozoom factor,
|
|
664
|
+
capped to the image size. Only allocated for the blosc2 backend that exposes the
|
|
665
|
+
decompress-into-buffer method; the dense-tensor backend returns views and needs no buffer.
|
|
666
|
+
"""
|
|
667
|
+
if self._interactions_storage_resolved != "blosc2":
|
|
668
|
+
return None
|
|
669
|
+
if not hasattr(self.interactions, "get_slice_numpy"):
|
|
670
|
+
print(
|
|
671
|
+
"WARNING: this blosc2 build has no NDArray.get_slice_numpy; cannot reuse a "
|
|
672
|
+
"decompression buffer for interaction crops. Falling back to a fresh allocation on "
|
|
673
|
+
"every read (slower). Consider updating blosc2."
|
|
674
|
+
)
|
|
675
|
+
return None
|
|
676
|
+
max_valid = [
|
|
677
|
+
min(round(p * self.MAX_AUTOZOOM_FACTOR), s)
|
|
678
|
+
for p, s in zip(self.configuration_manager.patch_size, shape[1:])
|
|
679
|
+
]
|
|
680
|
+
n = self.num_interaction_channels * int(np.prod(max_valid, dtype=np.int64))
|
|
681
|
+
buffer = np.empty(n, dtype=np.float16)
|
|
682
|
+
buffer[:] = 0 # first-touch the pages once, up front
|
|
683
|
+
return buffer
|
|
578
684
|
|
|
579
685
|
@torch.inference_mode()
|
|
580
686
|
def _background_set_image(self, image: np.ndarray, image_properties: dict):
|
|
@@ -635,20 +741,7 @@ class nnInteractiveInferenceSession:
|
|
|
635
741
|
"""
|
|
636
742
|
if self.interactions is not None:
|
|
637
743
|
del self.interactions
|
|
638
|
-
self.interactions =
|
|
639
|
-
self._interactions_shape,
|
|
640
|
-
dtype=np.float16,
|
|
641
|
-
chunks=(1, *[min(64, s) for s in self._interactions_shape[1:]]),
|
|
642
|
-
blocks=(1, *[min(32, s) for s in self._interactions_shape[1:]]),
|
|
643
|
-
# Interactions compress better with NOFILTER, which is also faster than SHUFFLE.
|
|
644
|
-
cparams={
|
|
645
|
-
"codec": blosc2.Codec.LZ4,
|
|
646
|
-
"clevel": 5,
|
|
647
|
-
"filters": [blosc2.Filter.NOFILTER],
|
|
648
|
-
"nthreads": os.cpu_count(),
|
|
649
|
-
},
|
|
650
|
-
dparams={"nthreads": 4},
|
|
651
|
-
)
|
|
744
|
+
self.interactions = self._new_interactions_array(self._interactions_shape, os.cpu_count())
|
|
652
745
|
self.current_interaction_intensity = 1.0
|
|
653
746
|
|
|
654
747
|
if self.target_buffer is not None:
|
|
@@ -980,7 +1073,9 @@ class nnInteractiveInferenceSession:
|
|
|
980
1073
|
Returns:
|
|
981
1074
|
|
|
982
1075
|
"""
|
|
983
|
-
|
|
1076
|
+
if not isinstance(self.interactions, torch.Tensor):
|
|
1077
|
+
# cratio is a blosc2-only diagnostic; the dense tensor backend has no compression.
|
|
1078
|
+
print("Current cratio", self.interactions.cratio)
|
|
984
1079
|
|
|
985
1080
|
assert self.pad_mode_data == "constant", "pad modes other than constant are not implemented here"
|
|
986
1081
|
assert len(self.new_interaction_centers) == len(self.new_interaction_zoom_out_factors)
|
|
@@ -996,7 +1091,7 @@ class nnInteractiveInferenceSession:
|
|
|
996
1091
|
"!!!WE NO LONGER RUN ONE PREDICTION PER CENTER AND ONLY USE THE LAST ADDED INTERACTION AS CENTER!!!"
|
|
997
1092
|
)
|
|
998
1093
|
prediction_center, zoom_out_factor = self.new_interaction_centers[-1], self.new_interaction_zoom_out_factors[-1]
|
|
999
|
-
zoom_out_factor = min(
|
|
1094
|
+
zoom_out_factor = min(self.MAX_AUTOZOOM_FACTOR, zoom_out_factor)
|
|
1000
1095
|
|
|
1001
1096
|
start_predict = time()
|
|
1002
1097
|
with torch.autocast(self.device.type, enabled=True) if self.device.type == "cuda" else dummy_context():
|
|
@@ -1005,7 +1100,9 @@ class nnInteractiveInferenceSession:
|
|
|
1005
1100
|
input_for_predict, scaled_patch_size, scaled_bbox, previous_prediction = self._build_network_input(
|
|
1006
1101
|
prediction_center, zoom_out_factor
|
|
1007
1102
|
)
|
|
1008
|
-
|
|
1103
|
+
# .contiguous() is required for torch.compile: the input may be a non-contiguous
|
|
1104
|
+
# view (e.g. from the dense-tensor backend), and the compiled graph assumes contiguity.
|
|
1105
|
+
pred = self.network(input_for_predict[None].contiguous())[0].argmax(0).detach()
|
|
1009
1106
|
del input_for_predict
|
|
1010
1107
|
|
|
1011
1108
|
# detect changes at border. If there are, we enter autozoom
|
|
@@ -1022,17 +1119,19 @@ class nnInteractiveInferenceSession:
|
|
|
1022
1119
|
start_zoomout = time()
|
|
1023
1120
|
while has_change and self.do_autozoom:
|
|
1024
1121
|
print(f"AutoZoom zoom out factor {zoom_out_factor}")
|
|
1025
|
-
# we allow a max zoom out of
|
|
1026
|
-
if zoom_out_factor >=
|
|
1122
|
+
# we allow a max zoom out of MAX_AUTOZOOM_FACTOR
|
|
1123
|
+
if zoom_out_factor >= self.MAX_AUTOZOOM_FACTOR:
|
|
1027
1124
|
break
|
|
1028
1125
|
else:
|
|
1029
1126
|
zoom_out_factor *= zoom_out_growth_factor
|
|
1030
|
-
zoom_out_factor = min(
|
|
1127
|
+
zoom_out_factor = min(self.MAX_AUTOZOOM_FACTOR, zoom_out_factor)
|
|
1031
1128
|
|
|
1032
1129
|
input_for_predict, scaled_patch_size, scaled_bbox, previous_prediction_resized = (
|
|
1033
1130
|
self._build_network_input(prediction_center, zoom_out_factor)
|
|
1034
1131
|
)
|
|
1035
|
-
|
|
1132
|
+
# .contiguous() is required for torch.compile: the input may be a non-contiguous
|
|
1133
|
+
# view (e.g. from the dense-tensor backend), and the compiled graph assumes contiguity.
|
|
1134
|
+
pred = self.network(input_for_predict[None].contiguous())[0].argmax(0).detach()
|
|
1036
1135
|
del input_for_predict
|
|
1037
1136
|
empty_cache(self.device)
|
|
1038
1137
|
|
|
@@ -1077,7 +1176,9 @@ class nnInteractiveInferenceSession:
|
|
|
1077
1176
|
|
|
1078
1177
|
# cropping happens on CPU, padding happens on GPU (later)
|
|
1079
1178
|
crop_img, pad_image = crop_to_valid(self.preprocessed_image, scaled_bbox)
|
|
1080
|
-
interactions_tensor, pad_interaction = crop_to_valid(
|
|
1179
|
+
interactions_tensor, pad_interaction = crop_to_valid(
|
|
1180
|
+
self.interactions, scaled_bbox, out=self._interactions_read_buffer
|
|
1181
|
+
)
|
|
1081
1182
|
# For blosc2, crop_to_valid returns a numpy array; convert to torch (still on CPU).
|
|
1082
1183
|
if not isinstance(interactions_tensor, torch.Tensor):
|
|
1083
1184
|
interactions_tensor = torch.from_numpy(np.asarray(interactions_tensor))
|
|
@@ -1174,7 +1275,8 @@ class nnInteractiveInferenceSession:
|
|
|
1174
1275
|
dim=0,
|
|
1175
1276
|
)
|
|
1176
1277
|
|
|
1177
|
-
|
|
1278
|
+
# .contiguous(): see _predict — required for torch.compile with possibly non-contiguous input.
|
|
1279
|
+
pred = self.network(patch[None].contiguous())[0].argmax(0).detach()
|
|
1178
1280
|
paste_tensor(
|
|
1179
1281
|
cache_interactions,
|
|
1180
1282
|
pred.to(cache_interactions.device, dtype=cache_interactions.dtype),
|
|
@@ -1261,7 +1363,7 @@ class nnInteractiveInferenceSession:
|
|
|
1261
1363
|
pred_slicer = tuple(slice(lb, ub) for lb, ub in pred_bbox)
|
|
1262
1364
|
local_slicer = tuple(slice(lb, ub) for lb, ub in local_seen_bbox)
|
|
1263
1365
|
|
|
1264
|
-
prev_sub =
|
|
1366
|
+
prev_sub = self._read_interactions_to_device((prev_seg_ch, *seen_slicer), self.device)
|
|
1265
1367
|
|
|
1266
1368
|
diff_local[local_slicer] = (pred[pred_slicer] != prev_sub).to(diff_local.dtype)
|
|
1267
1369
|
del prev_sub
|
|
@@ -1280,7 +1382,7 @@ class nnInteractiveInferenceSession:
|
|
|
1280
1382
|
def _mark_prev_seg_in_local_diff(self, diff_local: torch.Tensor, planning_bbox: List[List[int]]) -> None:
|
|
1281
1383
|
prev_seg_ch = self._get_prev_seg_channel()
|
|
1282
1384
|
planning_slicer = tuple(slice(lb, ub) for lb, ub in planning_bbox)
|
|
1283
|
-
prev_sub =
|
|
1385
|
+
prev_sub = self._read_interactions_to_device((prev_seg_ch, *planning_slicer), self.device)
|
|
1284
1386
|
diff_local[prev_sub > 0.5] = 1
|
|
1285
1387
|
del prev_sub
|
|
1286
1388
|
|
|
@@ -1548,8 +1650,12 @@ class nnInteractiveInferenceSession:
|
|
|
1548
1650
|
self.network = self.network.to(self.device)
|
|
1549
1651
|
|
|
1550
1652
|
def __del__(self):
|
|
1551
|
-
|
|
1552
|
-
|
|
1653
|
+
# Be robust to a partially-constructed instance (e.g. __init__ raised on bad arguments):
|
|
1654
|
+
# these attributes may not exist yet.
|
|
1655
|
+
if hasattr(self, "preprocess_future"):
|
|
1656
|
+
self._finish_preprocessing_and_initialize_interactions()
|
|
1657
|
+
if hasattr(self, "executor"):
|
|
1658
|
+
self.executor.shutdown()
|
|
1553
1659
|
|
|
1554
1660
|
|
|
1555
1661
|
if __name__ == "__main__":
|
{nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/inference/remote/remote_session.py
RENAMED
|
@@ -132,6 +132,13 @@ class nnInteractiveRemoteInferenceSession:
|
|
|
132
132
|
the server before the client gives up. Default 60s matches observed
|
|
133
133
|
prediction times (100ms..~10s) with headroom for slow links. On
|
|
134
134
|
expiry: ``httpx.ReadTimeout``.
|
|
135
|
+
set_image_read_timeout:
|
|
136
|
+
Read timeout (seconds) used *only* for ``set_image``. After the volume
|
|
137
|
+
is uploaded, the server decompresses and preprocesses the full image
|
|
138
|
+
before responding, which can take far longer than a prediction on a
|
|
139
|
+
large volume. ``set_image`` therefore gets its own generous read
|
|
140
|
+
timeout instead of the much tighter ``read_timeout`` used for
|
|
141
|
+
predictions. On expiry: ``httpx.ReadTimeout``.
|
|
135
142
|
write_timeout:
|
|
136
143
|
Seconds to finish uploading the request body. ``set_image`` uploads
|
|
137
144
|
the full 4D volume so this is the longest-running upload. On expiry:
|
|
@@ -146,6 +153,7 @@ class nnInteractiveRemoteInferenceSession:
|
|
|
146
153
|
api_key: Optional[str] = None,
|
|
147
154
|
connect_timeout: float = 10.0,
|
|
148
155
|
read_timeout: float = 60.0,
|
|
156
|
+
set_image_read_timeout: float = 600.0,
|
|
149
157
|
write_timeout: float = 120.0,
|
|
150
158
|
pool_timeout: float = 10.0,
|
|
151
159
|
):
|
|
@@ -166,6 +174,15 @@ class nnInteractiveRemoteInferenceSession:
|
|
|
166
174
|
),
|
|
167
175
|
headers=headers,
|
|
168
176
|
)
|
|
177
|
+
# Per-request timeout override for set_image: same connect/write/pool as
|
|
178
|
+
# the client default, but a much longer read budget for server-side
|
|
179
|
+
# decompression + preprocessing of the full volume.
|
|
180
|
+
self._set_image_timeout = httpx.Timeout(
|
|
181
|
+
connect=connect_timeout,
|
|
182
|
+
read=set_image_read_timeout,
|
|
183
|
+
write=write_timeout,
|
|
184
|
+
pool=pool_timeout,
|
|
185
|
+
)
|
|
169
186
|
self._lease_token: Optional[str] = None
|
|
170
187
|
|
|
171
188
|
# Claim a session on the server. The lease token is then attached to
|
|
@@ -242,12 +259,21 @@ class nnInteractiveRemoteInferenceSession:
|
|
|
242
259
|
resp.raise_for_status()
|
|
243
260
|
return resp
|
|
244
261
|
|
|
245
|
-
def _post_binary(
|
|
262
|
+
def _post_binary(
|
|
263
|
+
self,
|
|
264
|
+
path: str,
|
|
265
|
+
meta: dict,
|
|
266
|
+
array_bytes: bytes,
|
|
267
|
+
timeout: Union[httpx.Timeout, float, None] = None,
|
|
268
|
+
) -> httpx.Response:
|
|
246
269
|
headers = {
|
|
247
270
|
META_HEADER: json.dumps(_to_jsonable(meta), separators=(",", ":")),
|
|
248
271
|
"Content-Type": CONTENT_TYPE_OCTET_STREAM,
|
|
249
272
|
}
|
|
250
|
-
|
|
273
|
+
# httpx treats timeout=None as "no override" only when the arg is
|
|
274
|
+
# omitted; pass it through explicitly only when a caller supplied one.
|
|
275
|
+
kwargs = {} if timeout is None else {"timeout": timeout}
|
|
276
|
+
resp = self._http.post(path, content=array_bytes, headers=headers, **kwargs)
|
|
251
277
|
_raise_for_lease_errors(resp)
|
|
252
278
|
resp.raise_for_status()
|
|
253
279
|
return resp
|
|
@@ -300,7 +326,12 @@ class nnInteractiveRemoteInferenceSession:
|
|
|
300
326
|
def set_image(self, image: np.ndarray, image_properties: Optional[dict] = None) -> None:
|
|
301
327
|
assert image.ndim == 4, f"expected a 4d image as input, got {image.ndim}d. Shape {image.shape}"
|
|
302
328
|
meta = {"image_properties": image_properties or {}}
|
|
303
|
-
resp = self._post_binary(
|
|
329
|
+
resp = self._post_binary(
|
|
330
|
+
PATH_SET_IMAGE,
|
|
331
|
+
meta,
|
|
332
|
+
pack_array(image, nthreads=_compression_threads()),
|
|
333
|
+
timeout=self._set_image_timeout,
|
|
334
|
+
)
|
|
304
335
|
info = resp.json()
|
|
305
336
|
self.original_image_shape = tuple(info["original_image_shape"])
|
|
306
337
|
|
|
@@ -31,6 +31,15 @@ Concurrency model:
|
|
|
31
31
|
prediction runs at a time.
|
|
32
32
|
- The acquisition order is always (session lock → gpu lock) so there is no
|
|
33
33
|
deadlock potential.
|
|
34
|
+
- The endpoints that carry large payloads (``set_image`` and the mask
|
|
35
|
+
interactions) are ``async`` so they can ``await`` the upload, but their
|
|
36
|
+
CPU-bound work (blosc2 decompression, image preprocessing, prediction,
|
|
37
|
+
response compression) is dispatched to a worker thread via
|
|
38
|
+
``run_in_threadpool``. This keeps the event loop free during a long
|
|
39
|
+
``set_image``/predict so lightweight endpoints — ``/heartbeat``,
|
|
40
|
+
``/healthz`` — and the background reaper stay responsive, and so two
|
|
41
|
+
clients can genuinely preprocess concurrently. Acquiring a session/gpu
|
|
42
|
+
lock therefore also happens off the loop, never stalling it.
|
|
34
43
|
"""
|
|
35
44
|
|
|
36
45
|
from __future__ import annotations
|
|
@@ -50,6 +59,7 @@ import blosc2
|
|
|
50
59
|
import numpy as np
|
|
51
60
|
import torch
|
|
52
61
|
from fastapi import Depends, FastAPI, HTTPException, Header, Request, Response, status
|
|
62
|
+
from starlette.concurrency import run_in_threadpool
|
|
53
63
|
|
|
54
64
|
from nnInteractive.inference.inference_session import nnInteractiveInferenceSession
|
|
55
65
|
from nnInteractive.inference.remote._protocol import (
|
|
@@ -151,6 +161,7 @@ class SessionRegistry:
|
|
|
151
161
|
torch_n_threads: int,
|
|
152
162
|
do_autozoom: bool,
|
|
153
163
|
use_torch_compile: bool,
|
|
164
|
+
interactions_storage: str,
|
|
154
165
|
verbose: bool,
|
|
155
166
|
) -> None:
|
|
156
167
|
self._artifacts = artifacts
|
|
@@ -161,6 +172,7 @@ class SessionRegistry:
|
|
|
161
172
|
self._torch_n_threads = torch_n_threads
|
|
162
173
|
self._do_autozoom = do_autozoom
|
|
163
174
|
self._use_torch_compile = use_torch_compile
|
|
175
|
+
self._interactions_storage = interactions_storage
|
|
164
176
|
self._verbose = verbose
|
|
165
177
|
self._entries: dict[str, SessionEntry] = {}
|
|
166
178
|
self._mu = threading.Lock()
|
|
@@ -189,6 +201,7 @@ class SessionRegistry:
|
|
|
189
201
|
verbose=self._verbose,
|
|
190
202
|
torch_n_threads=self._torch_n_threads,
|
|
191
203
|
do_autozoom=self._do_autozoom,
|
|
204
|
+
interactions_storage=self._interactions_storage,
|
|
192
205
|
)
|
|
193
206
|
session.initialize_from_loaded_artifacts(self._artifacts)
|
|
194
207
|
entry = SessionEntry(session)
|
|
@@ -290,6 +303,7 @@ def make_app(
|
|
|
290
303
|
torch_n_threads: int = 8,
|
|
291
304
|
do_autozoom: bool = True,
|
|
292
305
|
use_torch_compile: bool = False,
|
|
306
|
+
interactions_storage: str = "auto",
|
|
293
307
|
verbose: bool = False,
|
|
294
308
|
api_key: Optional[str] = None,
|
|
295
309
|
sweep_interval_seconds: float = 15.0,
|
|
@@ -303,6 +317,7 @@ def make_app(
|
|
|
303
317
|
torch_n_threads=torch_n_threads,
|
|
304
318
|
do_autozoom=do_autozoom,
|
|
305
319
|
use_torch_compile=use_torch_compile,
|
|
320
|
+
interactions_storage=interactions_storage,
|
|
306
321
|
verbose=verbose,
|
|
307
322
|
)
|
|
308
323
|
gpu_lock = threading.Lock()
|
|
@@ -353,6 +368,7 @@ def make_app(
|
|
|
353
368
|
verbose=False,
|
|
354
369
|
torch_n_threads=torch_n_threads,
|
|
355
370
|
do_autozoom=do_autozoom,
|
|
371
|
+
interactions_storage=interactions_storage,
|
|
356
372
|
)
|
|
357
373
|
_capability_session.initialize_from_loaded_artifacts(artifacts)
|
|
358
374
|
_capability_snapshot = _build_capability_snapshot(_capability_session)
|
|
@@ -570,17 +586,24 @@ def make_app(
|
|
|
570
586
|
async def set_image(request: Request, entry: SessionEntry = lease) -> dict:
|
|
571
587
|
meta = _parse_meta_header(request.headers.get(META_HEADER))
|
|
572
588
|
body = await request.body()
|
|
573
|
-
image = unpack_array(body)
|
|
574
589
|
image_properties = meta.get("image_properties") or {}
|
|
575
590
|
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
return {"original_image_shape": list(session.original_image_shape)}
|
|
591
|
+
# Decompression + full-volume preprocessing are CPU-bound and can run
|
|
592
|
+
# for many seconds on a large image. Run them in a worker thread so the
|
|
593
|
+
# event loop keeps servicing heartbeats/healthz and the reaper.
|
|
594
|
+
def _work():
|
|
595
|
+
image = unpack_array(body)
|
|
582
596
|
|
|
583
|
-
|
|
597
|
+
def _do(session):
|
|
598
|
+
session.set_image(image, image_properties)
|
|
599
|
+
# set_image preprocesses in a background thread; force completion
|
|
600
|
+
# so subsequent calls can safely use original_image_shape.
|
|
601
|
+
session._finish_preprocessing_and_initialize_interactions()
|
|
602
|
+
return {"original_image_shape": list(session.original_image_shape)}
|
|
603
|
+
|
|
604
|
+
return _under_session_lock(entry, _do)
|
|
605
|
+
|
|
606
|
+
return await run_in_threadpool(_work)
|
|
584
607
|
|
|
585
608
|
@app.post(PATH_SET_TARGET_BUFFER, dependencies=[auth])
|
|
586
609
|
def set_target_buffer(payload: dict, entry: SessionEntry = lease) -> dict:
|
|
@@ -652,41 +675,53 @@ def make_app(
|
|
|
652
675
|
async def _handle_mask_interaction(request: Request, entry: SessionEntry, kind: str) -> Response:
|
|
653
676
|
meta = _parse_meta_header(request.headers.get(META_HEADER))
|
|
654
677
|
body = await request.body()
|
|
655
|
-
mask = unpack_array(body)
|
|
656
678
|
run_prediction = bool(meta.get("run_prediction", True))
|
|
657
679
|
interaction_bbox = meta.get("interaction_bbox")
|
|
658
680
|
if interaction_bbox is not None:
|
|
659
681
|
interaction_bbox = [list(b) for b in interaction_bbox]
|
|
660
682
|
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
bool(meta["include_interaction"]),
|
|
666
|
-
run_prediction=run_prediction,
|
|
667
|
-
override_capability_checks=bool(meta.get("override_capability_checks", False)),
|
|
668
|
-
interaction_bbox=interaction_bbox,
|
|
669
|
-
)
|
|
670
|
-
return _build_prediction_response(session, run_prediction)
|
|
683
|
+
# Decompression + prediction + response compression are CPU/GPU-bound;
|
|
684
|
+
# run them off the event loop (see set_image).
|
|
685
|
+
def _work():
|
|
686
|
+
mask = unpack_array(body)
|
|
671
687
|
|
|
672
|
-
|
|
688
|
+
def _do(session):
|
|
689
|
+
method = session.add_scribble_interaction if kind == "scribble" else session.add_lasso_interaction
|
|
690
|
+
method(
|
|
691
|
+
mask,
|
|
692
|
+
bool(meta["include_interaction"]),
|
|
693
|
+
run_prediction=run_prediction,
|
|
694
|
+
override_capability_checks=bool(meta.get("override_capability_checks", False)),
|
|
695
|
+
interaction_bbox=interaction_bbox,
|
|
696
|
+
)
|
|
697
|
+
return _build_prediction_response(session, run_prediction)
|
|
698
|
+
|
|
699
|
+
return _under_session_and_gpu_lock(entry, _do)
|
|
700
|
+
|
|
701
|
+
return await run_in_threadpool(_work)
|
|
673
702
|
|
|
674
703
|
@app.post(PATH_ADD_INITIAL_SEG, dependencies=[auth])
|
|
675
704
|
async def add_initial_seg_interaction(request: Request, entry: SessionEntry = lease) -> Response:
|
|
676
705
|
meta = _parse_meta_header(request.headers.get(META_HEADER))
|
|
677
706
|
body = await request.body()
|
|
678
|
-
initial_seg = unpack_array(body)
|
|
679
707
|
run_prediction = bool(meta.get("run_prediction", False))
|
|
680
708
|
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
override_capability_checks=bool(meta.get("override_capability_checks", False)),
|
|
686
|
-
)
|
|
687
|
-
return _build_prediction_response(session, run_prediction)
|
|
709
|
+
# Decompression + (optional) prediction are CPU/GPU-bound; run them off
|
|
710
|
+
# the event loop (see set_image).
|
|
711
|
+
def _work():
|
|
712
|
+
initial_seg = unpack_array(body)
|
|
688
713
|
|
|
689
|
-
|
|
714
|
+
def _do(session):
|
|
715
|
+
session.add_initial_seg_interaction(
|
|
716
|
+
initial_seg=initial_seg,
|
|
717
|
+
run_prediction=run_prediction,
|
|
718
|
+
override_capability_checks=bool(meta.get("override_capability_checks", False)),
|
|
719
|
+
)
|
|
720
|
+
return _build_prediction_response(session, run_prediction)
|
|
721
|
+
|
|
722
|
+
return _under_session_and_gpu_lock(entry, _do)
|
|
723
|
+
|
|
724
|
+
return await run_in_threadpool(_work)
|
|
690
725
|
|
|
691
726
|
return app
|
|
692
727
|
|
|
@@ -61,6 +61,15 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
61
61
|
"the long-lived process. Pass this flag to skip compilation (e.g. for faster startup or "
|
|
62
62
|
"to work around a compile/backend issue).",
|
|
63
63
|
)
|
|
64
|
+
p.add_argument(
|
|
65
|
+
"--interactions-storage",
|
|
66
|
+
choices=["blosc2", "tensor", "auto"],
|
|
67
|
+
default="auto",
|
|
68
|
+
help="Storage backend for the interaction tensor (default: auto). 'blosc2': compact "
|
|
69
|
+
"in-memory array (low RAM, pays (de)compression per read/write). 'tensor': dense pinned "
|
|
70
|
+
"CPU float16 torch.Tensor (more RAM, lower per-access overhead). 'auto': per image, use "
|
|
71
|
+
"'tensor' for images up to 512x512x1024 voxels and 'blosc2' for larger ones.",
|
|
72
|
+
)
|
|
64
73
|
p.add_argument(
|
|
65
74
|
"--no-autozoom",
|
|
66
75
|
action="store_true",
|
|
@@ -222,6 +231,7 @@ def main(argv=None) -> int:
|
|
|
222
231
|
torch_n_threads=args.torch_n_threads,
|
|
223
232
|
do_autozoom=not args.no_autozoom,
|
|
224
233
|
use_torch_compile=use_torch_compile,
|
|
234
|
+
interactions_storage=args.interactions_storage,
|
|
225
235
|
verbose=args.verbose,
|
|
226
236
|
api_key=api_key,
|
|
227
237
|
)
|
|
@@ -124,6 +124,11 @@ class PointInteraction_stub:
|
|
|
124
124
|
)
|
|
125
125
|
|
|
126
126
|
target_slices = (channel_idx, *slices)
|
|
127
|
+
if isinstance(interaction_map, torch.Tensor):
|
|
128
|
+
# Dense torch backend: in-place maximum, no numpy round-trip.
|
|
129
|
+
view = interaction_map[target_slices]
|
|
130
|
+
torch.maximum(view, strel[structuring_slices].to(view.dtype), out=view)
|
|
131
|
+
return interaction_map
|
|
127
132
|
current_sub = np.asarray(interaction_map[target_slices])
|
|
128
133
|
strel_np = strel[structuring_slices].numpy().astype(current_sub.dtype)
|
|
129
134
|
np.maximum(current_sub, strel_np, out=current_sub)
|
|
@@ -190,7 +190,7 @@ def paste_tensor(target, source, bbox, channel_idx=None):
|
|
|
190
190
|
return target
|
|
191
191
|
|
|
192
192
|
|
|
193
|
-
def crop_to_valid(img, bbox):
|
|
193
|
+
def crop_to_valid(img, bbox, out=None):
|
|
194
194
|
"""
|
|
195
195
|
Crops the image to the part of the bounding box that lies within the image.
|
|
196
196
|
Supports a 4D tensor of shape (C, X, Y, Z). The bounding box is specified as
|
|
@@ -200,6 +200,12 @@ def crop_to_valid(img, bbox):
|
|
|
200
200
|
img: Input tensor (or blosc2 NDArray) of shape (C, X, Y, Z).
|
|
201
201
|
bbox (list or tuple): Bounding box as a list of three intervals for spatial dims:
|
|
202
202
|
[[x1, x2], [y1, y2], [z1, z2]].
|
|
203
|
+
out (np.ndarray, optional): A flat, pre-faulted float16 buffer to decompress a blosc2
|
|
204
|
+
crop into, avoiding a fresh allocation + page-fault on every call
|
|
205
|
+
("Path B"). Only used when ``img`` is a blosc2 NDArray exposing
|
|
206
|
+
``get_slice_numpy`` and the crop fits; otherwise ignored and a fresh
|
|
207
|
+
array is returned. When used, the returned crop is a VIEW into ``out``
|
|
208
|
+
and is only valid until the next call that reuses the same buffer.
|
|
203
209
|
|
|
204
210
|
Returns:
|
|
205
211
|
cropped: Cropped data of shape (C, cropped_x, cropped_y, cropped_z).
|
|
@@ -224,6 +230,26 @@ def crop_to_valid(img, bbox):
|
|
|
224
230
|
pad_right = end - dim_size if end > dim_size else 0
|
|
225
231
|
pad.append((pad_left, pad_right))
|
|
226
232
|
|
|
233
|
+
# Path B: decompress the blosc2 crop straight into a reused, pre-faulted buffer to avoid the
|
|
234
|
+
# per-call allocation + first-touch page-fault cost. get_slice_numpy is blosc2's internal
|
|
235
|
+
# decompress-into-buffer method (what __getitem__ calls under the hood); guarded since it is
|
|
236
|
+
# not a documented public API. Falls back to a fresh allocation if the crop would not fit.
|
|
237
|
+
if out is not None and not isinstance(img, torch.Tensor) and hasattr(img, "get_slice_numpy"):
|
|
238
|
+
valid_shape = [ce - cs for cs, ce in crop_indices]
|
|
239
|
+
output_shape = (img.shape[0], *valid_shape)
|
|
240
|
+
n = int(np.prod(output_shape, dtype=np.int64))
|
|
241
|
+
if n <= out.size:
|
|
242
|
+
view = out[:n].reshape(output_shape)
|
|
243
|
+
start = (0, *[cs for cs, _ in crop_indices])
|
|
244
|
+
stop = (img.shape[0], *[ce for _, ce in crop_indices])
|
|
245
|
+
img.get_slice_numpy(view, (start, stop))
|
|
246
|
+
return view, pad
|
|
247
|
+
print(
|
|
248
|
+
f"WARNING: interaction crop of {n} elements (shape {output_shape}) exceeds the reusable "
|
|
249
|
+
f"decompression buffer of {out.size} elements; this should never happen. Falling back to "
|
|
250
|
+
"a fresh allocation."
|
|
251
|
+
)
|
|
252
|
+
|
|
227
253
|
# Crop the image on spatial dimensions, leaving the channel dimension intact.
|
|
228
254
|
cropped = img[
|
|
229
255
|
:,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/benchmark.py
RENAMED
|
File without changes
|
{nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/build_sam.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/__init__.py
RENAMED
|
File without changes
|
{nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/amg.py
RENAMED
|
File without changes
|
{nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/sam2/utils/misc.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/loss_fns.py
RENAMED
|
File without changes
|
|
File without changes
|
{nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/model/sam2.py
RENAMED
|
File without changes
|
{nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/optimizer.py
RENAMED
|
File without changes
|
|
File without changes
|
{nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/train.py
RENAMED
|
File without changes
|
{nninteractive-2.3.1 → nninteractive-2.3.3}/nnInteractive/supervoxel/src/sam2/training/trainer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|