PyPI - nnInteractive - Versions diffs - 2.3.0__tar.gz → 2.3.2__tar.gz - Mend

nnInteractive 2.3.0tar.gz → 2.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

{nninteractive-2.3.0 → nninteractive-2.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nnInteractive
-Version: 2.3.0
+Version: 2.3.2
 Summary: Inference code for nnInteractive
 Author: Helmholtz Imaging Applied Computer Vision Lab
 Author-email: Fabian Isensee <f.isensee@dkfz-heidelberg.de>

{nninteractive-2.3.0 → nninteractive-2.3.2}/nnInteractive/inference/remote/remote_session.py RENAMED Viewed

@@ -42,6 +42,16 @@ from nnInteractive.inference.remote._protocol import (
 from nnInteractive.inference.remote.serialization import pack_array, unpack_array
+def _compression_threads() -> int:
+    """blosc2 thread count for client-side upload compression.
+    Full logical CPU count: blosc2 scales measurably onto SMT siblings, so use them all to
+    minimize upload latency. Per-call only (passed to pack_array → compress2), so it never
+    mutates blosc2's global nthreads.
+    """
+    return max(1, os.cpu_count() or 1)
 class SessionExpiredError(RuntimeError):
     """Raised when the server reports the client's lease no longer exists.
@@ -122,6 +132,13 @@ class nnInteractiveRemoteInferenceSession:
         the server before the client gives up. Default 60s matches observed
         prediction times (100ms..~10s) with headroom for slow links. On
         expiry: ``httpx.ReadTimeout``.
+    set_image_read_timeout:
+        Read timeout (seconds) used *only* for ``set_image``. After the volume
+        is uploaded, the server decompresses and preprocesses the full image
+        before responding, which can take far longer than a prediction on a
+        large volume. ``set_image`` therefore gets its own generous read
+        timeout instead of the much tighter ``read_timeout`` used for
+        predictions. On expiry: ``httpx.ReadTimeout``.
     write_timeout:
         Seconds to finish uploading the request body. ``set_image`` uploads
         the full 4D volume so this is the longest-running upload. On expiry:
@@ -136,6 +153,7 @@ class nnInteractiveRemoteInferenceSession:
         api_key: Optional[str] = None,
         connect_timeout: float = 10.0,
         read_timeout: float = 60.0,
+        set_image_read_timeout: float = 600.0,
         write_timeout: float = 120.0,
         pool_timeout: float = 10.0,
     ):
@@ -156,6 +174,15 @@ class nnInteractiveRemoteInferenceSession:
             ),
             headers=headers,
         )
+        # Per-request timeout override for set_image: same connect/write/pool as
+        # the client default, but a much longer read budget for server-side
+        # decompression + preprocessing of the full volume.
+        self._set_image_timeout = httpx.Timeout(
+            connect=connect_timeout,
+            read=set_image_read_timeout,
+            write=write_timeout,
+            pool=pool_timeout,
+        )
         self._lease_token: Optional[str] = None
         # Claim a session on the server. The lease token is then attached to
@@ -232,12 +259,21 @@ class nnInteractiveRemoteInferenceSession:
         resp.raise_for_status()
         return resp
-    def _post_binary(self, path: str, meta: dict, array_bytes: bytes) -> httpx.Response:
+    def _post_binary(
+        self,
+        path: str,
+        meta: dict,
+        array_bytes: bytes,
+        timeout: Union[httpx.Timeout, float, None] = None,
+    ) -> httpx.Response:
         headers = {
             META_HEADER: json.dumps(_to_jsonable(meta), separators=(",", ":")),
             "Content-Type": CONTENT_TYPE_OCTET_STREAM,
         }
-        resp = self._http.post(path, content=array_bytes, headers=headers)
+        # httpx treats timeout=None as "no override" only when the arg is
+        # omitted; pass it through explicitly only when a caller supplied one.
+        kwargs = {} if timeout is None else {"timeout": timeout}
+        resp = self._http.post(path, content=array_bytes, headers=headers, **kwargs)
         _raise_for_lease_errors(resp)
         resp.raise_for_status()
         return resp
@@ -290,7 +326,12 @@ class nnInteractiveRemoteInferenceSession:
     def set_image(self, image: np.ndarray, image_properties: Optional[dict] = None) -> None:
         assert image.ndim == 4, f"expected a 4d image as input, got {image.ndim}d. Shape {image.shape}"
         meta = {"image_properties": image_properties or {}}
-        resp = self._post_binary(PATH_SET_IMAGE, meta, pack_array(image))
+        resp = self._post_binary(
+            PATH_SET_IMAGE,
+            meta,
+            pack_array(image, nthreads=_compression_threads()),
+            timeout=self._set_image_timeout,
+        )
         info = resp.json()
         self.original_image_shape = tuple(info["original_image_shape"])
@@ -402,7 +443,9 @@ class nnInteractiveRemoteInferenceSession:
             "interaction_bbox": ([list(b) for b in interaction_bbox] if interaction_bbox is not None else None),
         }
         # Interactions (scribble/lasso masks) compress best with NOFILTER; skip auto-selection.
-        resp = self._post_binary(path, meta, pack_array(mask_image, filters=[blosc2.Filter.NOFILTER]))
+        resp = self._post_binary(
+            path, meta, pack_array(mask_image, filters=[blosc2.Filter.NOFILTER], nthreads=_compression_threads())
+        )
         self._apply_prediction_response(resp)
     def add_initial_seg_interaction(
@@ -428,7 +471,11 @@ class nnInteractiveRemoteInferenceSession:
             "override_capability_checks": bool(override_capability_checks),
         }
         # Segmentations compress best with NOFILTER; skip auto-selection.
-        resp = self._post_binary(PATH_ADD_INITIAL_SEG, meta, pack_array(initial_seg, filters=[blosc2.Filter.NOFILTER]))
+        resp = self._post_binary(
+            PATH_ADD_INITIAL_SEG,
+            meta,
+            pack_array(initial_seg, filters=[blosc2.Filter.NOFILTER], nthreads=_compression_threads()),
+        )
         self._apply_prediction_response(resp)
     # ------------------------------------------------------------------ #

{nninteractive-2.3.0 → nninteractive-2.3.2}/nnInteractive/inference/remote/serialization.py RENAMED Viewed

@@ -51,18 +51,21 @@ _ID_CODEC = {v: k for k, v in _CODEC_ID.items()}
 _SELECT_FILTER_CROP_FRACTION = 0.25
-def _compress_all(raw: memoryview, total: int, codec: blosc2.Codec, clevel: int, filters: list) -> int:
+def _compress_all(
+    raw: memoryview, total: int, codec: blosc2.Codec, clevel: int, filters: list, nthreads: Optional[int]
+) -> int:
     """Compressed byte length of ``raw`` under ``filters``, chunked exactly as pack_array does."""
+    extra = {} if nthreads is None else {"nthreads": nthreads}
     size = 0
     nchunks = (total + _CHUNK_SIZE - 1) // _CHUNK_SIZE
     for i in range(nchunks):
         start = i * _CHUNK_SIZE
         end = min(start + _CHUNK_SIZE, total)
-        size += len(blosc2.compress2(raw[start:end], codec=codec, clevel=clevel, filters=filters))
+        size += len(blosc2.compress2(raw[start:end], codec=codec, clevel=clevel, filters=filters, **extra))
     return size
-def _select_filter(arr: np.ndarray, codec: blosc2.Codec, clevel: int) -> "blosc2.Filter":
+def _select_filter(arr: np.ndarray, codec: blosc2.Codec, clevel: int, nthreads: Optional[int]) -> "blosc2.Filter":
     """Pick NOFILTER vs SHUFFLE for ``arr`` by trial-compressing a small centered crop.
     Uses ``compress2`` on the raw bytes — exactly the path pack_array takes — so the decision
@@ -79,7 +82,7 @@ def _select_filter(arr: np.ndarray, codec: blosc2.Codec, clevel: int) -> "blosc2
         best_filter, best_bytes = blosc2.Filter.NOFILTER, None
         for f in (blosc2.Filter.NOFILTER, blosc2.Filter.SHUFFLE):
-            cb = _compress_all(raw, total, codec, clevel, [f])
+            cb = _compress_all(raw, total, codec, clevel, [f], nthreads)
             if best_bytes is None or cb < best_bytes:
                 best_bytes, best_filter = cb, f
         return best_filter
@@ -95,6 +98,7 @@ def pack_array(
     codec: blosc2.Codec = blosc2.Codec.ZSTD,
     clevel: int = 3,
     filters: Optional[list] = None,
+    nthreads: Optional[int] = None,
 ) -> bytes:
     """Serialize a numpy array to a self-describing compressed byte string.
@@ -104,6 +108,10 @@ def pack_array(
     know the optimum (interactions and segmentations compress best with NOFILTER) should pass
     ``[blosc2.Filter.NOFILTER]`` to skip the selection. The chosen filter is self-describing
     inside the blosc2 frame, so unpack_array (decompress2) needs no changes.
+    ``nthreads`` is the per-call blosc2 thread count for compression. ``None`` (the default)
+    inherits blosc2's global ``nthreads`` (= core count). Passing an explicit value overrides
+    it for this call only, without mutating global state.
     """
     arr = np.ascontiguousarray(arr)
     dtype_str = arr.dtype.str.lstrip("<>|=").encode("ascii")
@@ -137,13 +145,14 @@ def pack_array(
     if filters is None:
         # Auto-select the better filter from a small centered crop, using the same
         # compress2 path as below for consistency.
-        filters = [_select_filter(arr, codec, clevel)]
+        filters = [_select_filter(arr, codec, clevel, nthreads)]
+    extra = {} if nthreads is None else {"nthreads": nthreads}
     parts = [header, shape_bytes, struct.pack("<I", nchunks)]
     for i in range(nchunks):
         start = i * _CHUNK_SIZE
         end = min(start + _CHUNK_SIZE, total)
-        chunk = blosc2.compress2(raw[start:end], codec=codec, clevel=clevel, filters=filters)
+        chunk = blosc2.compress2(raw[start:end], codec=codec, clevel=clevel, filters=filters, **extra)
         parts.append(struct.pack("<QQ", end - start, len(chunk)))
         parts.append(chunk)
     return b"".join(parts)

{nninteractive-2.3.0 → nninteractive-2.3.2}/nnInteractive/inference/server/app.py RENAMED Viewed

@@ -31,6 +31,15 @@ Concurrency model:
     prediction runs at a time.
   - The acquisition order is always (session lock → gpu lock) so there is no
     deadlock potential.
+  - The endpoints that carry large payloads (``set_image`` and the mask
+    interactions) are ``async`` so they can ``await`` the upload, but their
+    CPU-bound work (blosc2 decompression, image preprocessing, prediction,
+    response compression) is dispatched to a worker thread via
+    ``run_in_threadpool``. This keeps the event loop free during a long
+    ``set_image``/predict so lightweight endpoints — ``/heartbeat``,
+    ``/healthz`` — and the background reaper stay responsive, and so two
+    clients can genuinely preprocess concurrently. Acquiring a session/gpu
+    lock therefore also happens off the loop, never stalling it.
 """
 from __future__ import annotations
@@ -50,6 +59,7 @@ import blosc2
 import numpy as np
 import torch
 from fastapi import Depends, FastAPI, HTTPException, Header, Request, Response, status
+from starlette.concurrency import run_in_threadpool
 from nnInteractive.inference.inference_session import nnInteractiveInferenceSession
 from nnInteractive.inference.remote._protocol import (
@@ -443,7 +453,9 @@ def make_app(
         session._last_paste_bbox = None
         return Response(
             # Segmentations compress best with NOFILTER; skip auto-selection.
-            content=pack_array(sub, filters=[blosc2.Filter.NOFILTER]),
+            content=pack_array(
+                sub, filters=[blosc2.Filter.NOFILTER], nthreads=min(session.torch_n_threads, os.cpu_count())
+            ),
             media_type=CONTENT_TYPE_OCTET_STREAM,
             headers={META_HEADER: json.dumps(meta, separators=(",", ":"))},
         )
@@ -568,17 +580,24 @@ def make_app(
     async def set_image(request: Request, entry: SessionEntry = lease) -> dict:
         meta = _parse_meta_header(request.headers.get(META_HEADER))
         body = await request.body()
-        image = unpack_array(body)
         image_properties = meta.get("image_properties") or {}
-        def _do(session):
-            session.set_image(image, image_properties)
-            # set_image preprocesses in a background thread; force completion so
-            # subsequent calls can safely use original_image_shape.
-            session._finish_preprocessing_and_initialize_interactions()
-            return {"original_image_shape": list(session.original_image_shape)}
+        # Decompression + full-volume preprocessing are CPU-bound and can run
+        # for many seconds on a large image. Run them in a worker thread so the
+        # event loop keeps servicing heartbeats/healthz and the reaper.
+        def _work():
+            image = unpack_array(body)
-        return _under_session_lock(entry, _do)
+            def _do(session):
+                session.set_image(image, image_properties)
+                # set_image preprocesses in a background thread; force completion
+                # so subsequent calls can safely use original_image_shape.
+                session._finish_preprocessing_and_initialize_interactions()
+                return {"original_image_shape": list(session.original_image_shape)}
+            return _under_session_lock(entry, _do)
+        return await run_in_threadpool(_work)
     @app.post(PATH_SET_TARGET_BUFFER, dependencies=[auth])
     def set_target_buffer(payload: dict, entry: SessionEntry = lease) -> dict:
@@ -650,41 +669,53 @@ def make_app(
     async def _handle_mask_interaction(request: Request, entry: SessionEntry, kind: str) -> Response:
         meta = _parse_meta_header(request.headers.get(META_HEADER))
         body = await request.body()
-        mask = unpack_array(body)
         run_prediction = bool(meta.get("run_prediction", True))
         interaction_bbox = meta.get("interaction_bbox")
         if interaction_bbox is not None:
             interaction_bbox = [list(b) for b in interaction_bbox]
-        def _do(session):
-            method = session.add_scribble_interaction if kind == "scribble" else session.add_lasso_interaction
-            method(
-                mask,
-                bool(meta["include_interaction"]),
-                run_prediction=run_prediction,
-                override_capability_checks=bool(meta.get("override_capability_checks", False)),
-                interaction_bbox=interaction_bbox,
-            )
-            return _build_prediction_response(session, run_prediction)
+        # Decompression + prediction + response compression are CPU/GPU-bound;
+        # run them off the event loop (see set_image).
+        def _work():
+            mask = unpack_array(body)
-        return _under_session_and_gpu_lock(entry, _do)
+            def _do(session):
+                method = session.add_scribble_interaction if kind == "scribble" else session.add_lasso_interaction
+                method(
+                    mask,
+                    bool(meta["include_interaction"]),
+                    run_prediction=run_prediction,
+                    override_capability_checks=bool(meta.get("override_capability_checks", False)),
+                    interaction_bbox=interaction_bbox,
+                )
+                return _build_prediction_response(session, run_prediction)
+            return _under_session_and_gpu_lock(entry, _do)
+        return await run_in_threadpool(_work)
     @app.post(PATH_ADD_INITIAL_SEG, dependencies=[auth])
     async def add_initial_seg_interaction(request: Request, entry: SessionEntry = lease) -> Response:
         meta = _parse_meta_header(request.headers.get(META_HEADER))
         body = await request.body()
-        initial_seg = unpack_array(body)
         run_prediction = bool(meta.get("run_prediction", False))
-        def _do(session):
-            session.add_initial_seg_interaction(
-                initial_seg=initial_seg,
-                run_prediction=run_prediction,
-                override_capability_checks=bool(meta.get("override_capability_checks", False)),
-            )
-            return _build_prediction_response(session, run_prediction)
+        # Decompression + (optional) prediction are CPU/GPU-bound; run them off
+        # the event loop (see set_image).
+        def _work():
+            initial_seg = unpack_array(body)
-        return _under_session_and_gpu_lock(entry, _do)
+            def _do(session):
+                session.add_initial_seg_interaction(
+                    initial_seg=initial_seg,
+                    run_prediction=run_prediction,
+                    override_capability_checks=bool(meta.get("override_capability_checks", False)),
+                )
+                return _build_prediction_response(session, run_prediction)
+            return _under_session_and_gpu_lock(entry, _do)
+        return await run_in_threadpool(_work)
     return app

{nninteractive-2.3.0 → nninteractive-2.3.2}/nnInteractive.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nnInteractive
-Version: 2.3.0
+Version: 2.3.2
 Summary: Inference code for nnInteractive
 Author: Helmholtz Imaging Applied Computer Vision Lab
 Author-email: Fabian Isensee <f.isensee@dkfz-heidelberg.de>

{nninteractive-2.3.0 → nninteractive-2.3.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "nnInteractive"
-version = "2.3.0"
+version = "2.3.2"
 requires-python = ">=3.10"
 description = "Inference code for nnInteractive"
 readme = "readme.md"