PyPI - nnInteractive - Versions diffs - 2.0.0__tar.gz → 2.2.0__tar.gz - Mend

nnInteractive 2.0.0tar.gz → 2.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

{nninteractive-2.0.0 → nninteractive-2.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nnInteractive
-Version: 2.0.0
+Version: 2.2.0
 Summary: Inference code for nnInteractive
 Author: Helmholtz Imaging Applied Computer Vision Lab
 Author-email: Fabian Isensee <f.isensee@dkfz-heidelberg.de>
@@ -223,10 +223,17 @@ Requires-Dist: nnunetv2>=2.7.0
 Requires-Dist: torch!=2.9.*,>=2.1.2
 Requires-Dist: acvl-utils<0.3,>=0.2.3
 Requires-Dist: batchgenerators>=0.25.1
+Requires-Dist: fastapi>=0.110
+Requires-Dist: uvicorn[standard]>=0.27
+Requires-Dist: httpx>=0.27
+Requires-Dist: blosc2
 Provides-Extra: dev
 Requires-Dist: black; extra == "dev"
 Requires-Dist: ruff; extra == "dev"
 Requires-Dist: pre-commit; extra == "dev"
+Provides-Extra: client
+Requires-Dist: httpx>=0.27; extra == "client"
+Requires-Dist: blosc2; extra == "client"
 Dynamic: license-file
 <img src="imgs/nnInteractive_header_white.png">
@@ -417,14 +424,42 @@ session.add_point_interaction(POINT_COORDINATES, include_interaction=False)
 session.add_bbox_interaction(BBOX_COORDINATES, include_interaction=True)
 # Example: Add a scribble interaction
-# - A 3D image of the same shape as img where one slice (any axis-aligned orientation) contains a hand-drawn scribble.
 # - Background must be 0, and scribble must be 1.
 # - Use session.preferred_scribble_thickness for optimal results.
-session.add_scribble_interaction(SCRIBBLE_IMAGE, include_interaction=True)
+#
+# ✅ RECOMMENDED (v2): pass a small 2D crop plus its location.
+# Scribbles live on a single axis-aligned slice, so one of the three bbox
+# dimensions is always size 1 and the in-plane extent typically covers only
+# a small region. The cropped array is ORDERS OF MAGNITUDE
+# smaller than a full-volume mask for typical annotations, which makes this
+# path dramatically faster. Please prefer this
+# form in new integrations.
+#
+# SCRIBBLE_CROP.shape must equal the bbox size. INTERACTION_BBOX uses
+# half-open intervals [[x1,x2],[y1,y2],[z1,z2]] in original-image coordinates.
+# Example: a scribble drawn on axial slice z=64, covering x∈[100,140), y∈[80,150):
+#   SCRIBBLE_CROP    = <ndarray of shape (40, 70, 1), values 0 or 1>
+#   INTERACTION_BBOX = [[100, 140], [80, 150], [64, 65]]
+session.add_scribble_interaction(
+    SCRIBBLE_CROP,
+    include_interaction=True,
+    interaction_bbox=INTERACTION_BBOX,
+)
+# Legacy form (still supported, but discouraged): a 3D array matching the
+# full original image shape with the scribble baked into one slice.
+# session.add_scribble_interaction(SCRIBBLE_IMAGE, include_interaction=True)
 # Example: Add a lasso interaction
-# - Similarly to scribble a 3D image with a single slice containing a **closed contour** representing the selection.
-session.add_lasso_interaction(LASSO_IMAGE, include_interaction=True)
+# - Like scribble but the single slice contains a **closed contour** for the selection.
+# - Same recommendation applies: pass a 2D crop + interaction_bbox for a large speedup.
+session.add_lasso_interaction(
+    LASSO_CROP,
+    include_interaction=True,
+    interaction_bbox=INTERACTION_BBOX,
+)
+# Legacy full-volume form (discouraged):
+# session.add_lasso_interaction(LASSO_IMAGE, include_interaction=True)
 # You can combine any number of interactions as needed.
 # The model refines the segmentation result incrementally with each new interaction.
@@ -452,6 +487,41 @@ session.set_target_buffer(torch.zeros(NEW_IMAGE.shape[1:], dtype=torch.uint8))
 # Enjoy!
 ```
+## Running inference on a remote GPU (client / server)
+If the machine running your GUI does not have a powerful GPU, you can run the
+model on a remote box and drive it over HTTP with
+**`nnInteractiveRemoteInferenceSession`** — a drop-in replacement with the same
+public API as the local session. The server loads the model once at startup and
+hosts multiple concurrent client sessions; each client keeps its own image,
+target buffer, and interaction state.
+Start the server on the GPU box:
+```bash
+nninteractive-server \
+    --model-dir /path/to/checkpoint_folder --fold all \
+    --host 0.0.0.0 --port 1527 \
+    --api-key "$(openssl rand -hex 32)"
+```
+And in the client code, swap the local session for the remote one:
+```python
+from nnInteractive.inference.remote import nnInteractiveRemoteInferenceSession
+session = nnInteractiveRemoteInferenceSession(
+    server_url="http://gpu-box.lab:1527",
+    api_key="…",
+)
+# From here on, the API is identical to nnInteractiveInferenceSession.
+```
+For full details — installation, authentication, single-user SSH-tunnel setup,
+multi-user deployment behind a reverse proxy, concurrency/session model, idle
+expiry and heartbeats, GUI integration notes, and troubleshooting — see
+[`SERVER_CLIENT.md`](SERVER_CLIENT.md).
 ## nnInteractive SuperVoxels
 As part of the `nnInteractive` framework, we provide a dedicated module for **supervoxel generation** based on [SAM](https://github.com/facebookresearch/segment-anything) and [SAM2](https://github.com/facebookresearch/sam2). This replaces traditional superpixel methods (e.g., SLIC) with **foundation model–powered 3D pseudo-labels**.

{nninteractive-2.0.0 → nninteractive-2.2.0}/nnInteractive/inference/inference_session.py RENAMED Viewed

@@ -50,14 +50,16 @@ class nnInteractiveInferenceSession:
     ):
         """
         Only intended to work with nnInteractiveTrainerV2 and its derivatives
+        ``use_torch_compile``: compile the network with ``torch.compile``. The
+        first prediction after enabling this is slow (compilation happens lazily
+        on the first forward pass), but every subsequent prediction is faster.
+        This is recommended for the persistent inference server, where the
+        process is long-lived so the one-time compile cost is paid only once and
+        amortized across the whole session lifetime.
         """
         print("session initialized")
-        # set as part of initialization
-        assert use_torch_compile is False, (
-            "torch.compile is not supported. The blosc2-backed interaction tensor "
-            "requires numpy↔torch round-trips that break compile tracing."
-        )
         self.network = None
         self.label_manager = None
         self.dataset_json = None
@@ -83,6 +85,10 @@ class nnInteractiveInferenceSession:
         self.preprocessed_image: torch.Tensor = None
         self.preprocessed_props = None
         self.target_buffer: Union[np.ndarray, torch.Tensor] = None
+        # Bbox (in original-image coordinates) of the most recent target_buffer write.
+        # Captured inside _paste_prediction_to_target_buffer so remote callers can
+        # fetch just the touched region without diffing.
+        self._last_paste_bbox: Optional[List[List[int]]] = None
         # this will be set when loading the model (initialize_from_trained_model_folder)
         self.pad_mode_data = self.preferred_scribble_thickness = self.point_interaction = None
@@ -287,6 +293,7 @@ class nnInteractiveInferenceSession:
         else:
             pred_for_target = prediction.to("cpu")
         paste_tensor(self.target_buffer, pred_for_target, target_bbox)
+        self._last_paste_bbox = target_bbox
     def _estimate_refinement_cache_nbytes(self, cache_bbox: List[List[int]]) -> int:
         cache_voxels = int(np.prod(self._bbox_size(cache_bbox), dtype=np.int64))
@@ -517,6 +524,7 @@ class nnInteractiveInferenceSession:
         self.current_interaction_intensity = 1.0
         empty_cache(self.device)
         self.original_image_shape = None
+        self._last_paste_bbox = None
     def _initialize_interactions(self, image_torch: torch.Tensor):
         shape = (self.num_interaction_channels, *image_torch.shape[1:])
@@ -606,6 +614,7 @@ class nnInteractiveInferenceSession:
                 self.target_buffer.fill(0)
             elif isinstance(self.target_buffer, torch.Tensor):
                 self.target_buffer.zero_()
+        self._last_paste_bbox = None
         empty_cache(self.device)
     def add_bbox_interaction(
@@ -876,6 +885,42 @@ class nnInteractiveInferenceSession:
         else:
             del initial_seg
+    @torch.inference_mode()
+    def warmup(self) -> bool:
+        """Run a single dummy forward pass to trigger lazy ``torch.compile`` compilation up front.
+        With ``torch.compile`` enabled the network is compiled lazily on its first
+        forward pass, which would otherwise make the user's *first* real prediction
+        slow. Every prediction path — the initial coarse pass, the zoom-out
+        iterations, and the refinement patches — feeds the network an input of
+        identical shape ``[1, num_input_channels + num_interaction_channels,
+        *patch_size]`` (``_build_network_input`` always resizes the crop to
+        ``patch_size``, and refinement crops at exactly ``patch_size``). So a single
+        dummy pass at that shape populates the compile cache and every subsequent
+        real prediction is fast.
+        Returns ``True`` if a warmup pass was run, ``False`` if it was a no-op
+        (network not compiled — there is nothing to pre-compile, so a dummy pass
+        would not save the user any time). Mirrors ``_predict``'s autocast/
+        inference-mode context and the float32 input dtype that ``torch.cat``
+        produces when concatenating the float32 image with the fp16 interactions.
+        """
+        if self.network is None or self.configuration_manager is None:
+            raise RuntimeError("warmup() requires an initialized network; call initialize_* first")
+        if not isinstance(self.network, OptimizedModule):
+            return False
+        num_input_channels = (
+            determine_num_input_channels(self.plans_manager, self.configuration_manager, self.dataset_json)
+            + self.num_interaction_channels
+        )
+        patch_size = self.configuration_manager.patch_size
+        dummy = torch.zeros((1, num_input_channels, *patch_size), dtype=torch.float32, device=self.device)
+        with torch.autocast(self.device.type, enabled=True) if self.device.type == "cuda" else dummy_context():
+            self.network(dummy)
+        del dummy
+        empty_cache(self.device)
+        return True
     @torch.inference_mode()
     def _predict(self, force_full_refine: bool = False):
         """
@@ -1287,6 +1332,36 @@ class nnInteractiveInferenceSession:
         """
         This is used when making predictions with a trained model
         """
+        artifacts = self._load_model_artifacts_from_disk(model_training_output_dir, use_fold, checkpoint_name)
+        self.initialize_from_loaded_artifacts(artifacts)
+    def _load_model_artifacts_from_disk(
+        self,
+        model_training_output_dir: str,
+        use_fold: Union[int, str] = None,
+        checkpoint_name: str = "checkpoint_final.pth",
+    ) -> dict:
+        """Read all model artifacts from disk and build the network on ``self.device``.
+        Returns an artifact dict that can be applied to this or any other freshly
+        constructed session via :meth:`initialize_from_loaded_artifacts`. The
+        returned values are the actual objects (the ``nn.Module`` with its
+        weights and buffers, the plans/configuration managers, the dataset
+        json, the label manager) — not copies. Multiple sessions calling
+        :meth:`initialize_from_loaded_artifacts` with the same dict will all
+        end up with ``self.network`` pointing at the same module instance and
+        the same weight tensors on the GPU. This is safe as long as callers
+        treat these objects as read-only after construction; in the multi-
+        session server that is enforced by running inference under
+        ``@torch.inference_mode()`` and serializing predict calls with a
+        global GPU lock.
+        Note: this also mutates ``self`` (applies capability, sets pad/decay/
+        thickness) because ``num_interaction_channels`` is required to build the
+        network. The caller should follow up with
+        :meth:`initialize_from_loaded_artifacts` (this is what
+        :meth:`initialize_from_trained_model_folder` does).
+        """
         point_interaction_use_etd = True
         (
             capability_content,
@@ -1353,12 +1428,41 @@ class nnInteractiveInferenceSession:
         ).to(self.device)
         network.load_state_dict(parameters)
-        self.plans_manager = plans_manager
-        self.configuration_manager = configuration_manager
-        self.network = network
-        self.dataset_json = dataset_json
-        self.trainer_name = trainer_name
-        self.label_manager = plans_manager.get_label_manager(dataset_json)
+        return {
+            "capability_content": capability_content,
+            "point_interaction": self.point_interaction,
+            "preferred_scribble_thickness": self.preferred_scribble_thickness,
+            "interaction_decay": self.interaction_decay,
+            "pad_mode_data": self.pad_mode_data,
+            "network": network,
+            "plans_manager": plans_manager,
+            "configuration_manager": configuration_manager,
+            "dataset_json": dataset_json,
+            "trainer_name": trainer_name,
+            "label_manager": plans_manager.get_label_manager(dataset_json),
+        }
+    def initialize_from_loaded_artifacts(self, artifacts: dict):
+        """Apply pre-loaded artifacts to this session instance.
+        ``artifacts`` is the dict returned by :meth:`_load_model_artifacts_from_disk`.
+        Useful for spawning multiple sessions that share one loaded model (e.g.
+        the multi-session inference server). All artifact entries — including
+        ``self.network`` — are stored by reference; passing the same dict to
+        multiple sessions does not duplicate the network or its weights in
+        memory.
+        """
+        self.preferred_scribble_thickness = artifacts["preferred_scribble_thickness"]
+        self.interaction_decay = artifacts["interaction_decay"]
+        self.pad_mode_data = artifacts["pad_mode_data"]
+        self.point_interaction = artifacts["point_interaction"]
+        self._apply_capability(artifacts["capability_content"])
+        self.plans_manager = artifacts["plans_manager"]
+        self.configuration_manager = artifacts["configuration_manager"]
+        self.network = artifacts["network"]
+        self.dataset_json = artifacts["dataset_json"]
+        self.trainer_name = artifacts["trainer_name"]
+        self.label_manager = artifacts["label_manager"]
         if self.use_torch_compile and not isinstance(self.network, OptimizedModule):
             print("Using torch.compile")
             self.network = torch.compile(self.network)

nninteractive-2.2.0/nnInteractive/inference/remote/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+from nnInteractive.inference.remote.remote_session import (
+    ServerAtCapacityError,
+    SessionExpiredError,
+    nnInteractiveRemoteInferenceSession,
+)
+__all__ = [
+    "nnInteractiveRemoteInferenceSession",
+    "SessionExpiredError",
+    "ServerAtCapacityError",
+]

nninteractive-2.2.0/nnInteractive/inference/remote/_protocol.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""Shared constants for the nnInteractive client/server HTTP protocol."""
+# HTTP header used to carry JSON-encoded metadata alongside a binary array body.
+META_HEADER = "X-Meta"
+# HTTP header used to carry a per-client lease token identifying which session
+# on the (multi-session) server the request applies to.
+LEASE_HEADER = "X-Lease-Token"
+# Endpoint paths.
+PATH_HEALTHZ = "/healthz"
+PATH_CAPABILITIES = "/capabilities"
+PATH_CLAIM = "/claim"
+PATH_RELEASE = "/release"
+PATH_HEARTBEAT = "/heartbeat"
+PATH_LEASE_STATUS = "/lease_status"
+PATH_SET_IMAGE = "/set_image"
+PATH_SET_TARGET_BUFFER = "/set_target_buffer"
+PATH_RESET_INTERACTIONS = "/reset_interactions"
+PATH_SET_DO_AUTOZOOM = "/set_do_autozoom"
+PATH_ADD_BBOX = "/add_bbox_interaction"
+PATH_ADD_POINT = "/add_point_interaction"
+PATH_ADD_SCRIBBLE = "/add_scribble_interaction"
+PATH_ADD_LASSO = "/add_lasso_interaction"
+PATH_ADD_INITIAL_SEG = "/add_initial_seg_interaction"
+# Body content type for endpoints that ship a packed numpy array.
+CONTENT_TYPE_OCTET_STREAM = "application/octet-stream"

nnInteractive 2.0.0__tar.gz → 2.2.0__tar.gz

nnInteractive 2.0.0tar.gz → 2.2.0tar.gz