PyPI - nv-ingest-api - Versions diffs - 2025.10.28.dev20251028__tar.gz → 2025.11.8.dev20251108__tar.gz - Mend

nv-ingest-api 2025.10.28.dev20251028tar.gz → 2025.11.8.dev20251108tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (182) hide show

{nv_ingest_api-2025.10.28.dev20251028/src/nv_ingest_api.egg-info → nv_ingest_api-2025.11.8.dev20251108}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nv-ingest-api
-Version: 2025.10.28.dev20251028
+Version: 2025.11.8.dev20251108
 Summary: Python module with core document ingestion functions.
 Author-email: Jeremy Dyer <jdyer@nvidia.com>
 License:                                  Apache License

{nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/nim_client.py RENAMED Viewed

@@ -5,6 +5,7 @@
 import hashlib
 import json
 import logging
+import re
 import threading
 import time
 import queue
@@ -24,6 +25,12 @@ from nv_ingest_api.util.string_processing import generate_url
 logger = logging.getLogger(__name__)
+# Regex pattern to detect CUDA-related errors in Triton gRPC responses
+CUDA_ERROR_REGEX = re.compile(
+    r"(model reload|illegal memory access|illegal instruction|invalid argument|failed to (copy|load|perform) .*: .*|TritonModelException: failed to copy data: .*)",  # noqa: E501
+    re.IGNORECASE,
+)
 # A simple structure to hold a request's data and its Future for the result
 InferenceRequest = namedtuple("InferenceRequest", ["data", "future", "model_name", "dims", "kwargs"])
@@ -40,7 +47,7 @@ class NimClient:
         endpoints: Tuple[str, str],
         auth_token: Optional[str] = None,
         timeout: float = 120.0,
-        max_retries: int = 5,
+        max_retries: int = 10,
         max_429_retries: int = 5,
         enable_dynamic_batching: bool = False,
         dynamic_batch_timeout: float = 0.1,  # 100 milliseconds
@@ -60,11 +67,11 @@ class NimClient:
         auth_token : str, optional
             Authorization token for HTTP requests (default: None).
         timeout : float, optional
-            Timeout for HTTP requests in seconds (default: 30.0).
+            Timeout for HTTP requests in seconds (default: 120.0).
         max_retries : int, optional
-            The maximum number of retries for non-429 server-side errors (default: 5).
+            The maximum number of retries for non-429 server-side errors (default: 10).
         max_429_retries : int, optional
-            The maximum number of retries specifically for 429 errors (default: 10).
+            The maximum number of retries specifically for 429 errors (default: 5).
         Raises
         ------
@@ -323,7 +330,7 @@ class NimClient:
         outputs = [grpcclient.InferRequestedOutput(output_name) for output_name in output_names]
-        base_delay = 0.5
+        base_delay = 2.0
         attempt = 0
         retries_429 = 0
         max_grpc_retries = self.max_429_retries
@@ -342,8 +349,58 @@ class NimClient:
                     return [response.as_numpy(output.name()) for output in outputs]
             except grpcclient.InferenceServerException as e:
-                status = e.status()
-                if status == "StatusCode.UNAVAILABLE" and "Exceeds maximum queue size".lower() in e.message().lower():
+                status = str(e.status())
+                message = e.message()
+                # Handle CUDA memory errors
+                if status == "StatusCode.INTERNAL":
+                    if CUDA_ERROR_REGEX.search(message):
+                        logger.warning(
+                            f"Received gRPC INTERNAL error with CUDA-related message for model '{model_name}'. "
+                            f"Attempt {attempt + 1} of {self.max_retries}. Message (truncated): {message[:500]}"
+                        )
+                        if attempt >= self.max_retries - 1:
+                            logger.error(f"Max retries exceeded for CUDA errors on model '{model_name}'.")
+                            raise e
+                        # Try to reload models before retrying
+                        model_reload_succeeded = reload_models(client=self.client, client_timeout=self.timeout)
+                        if not model_reload_succeeded:
+                            logger.error(f"Failed to reload models for model '{model_name}'.")
+                    else:
+                        logger.warning(
+                            f"Received gRPC INTERNAL error for model '{model_name}'. "
+                            f"Attempt {attempt + 1} of {self.max_retries}. Message (truncated): {message[:500]}"
+                        )
+                        if attempt >= self.max_retries - 1:
+                            logger.error(f"Max retries exceeded for INTERNAL error on model '{model_name}'.")
+                            raise e
+                    # Common retry logic for both CUDA and non-CUDA INTERNAL errors
+                    backoff_time = base_delay * (2**attempt)
+                    time.sleep(backoff_time)
+                    attempt += 1
+                    continue
+                # Handle errors that can occur after model reload (NOT_FOUND, model not loaded)
+                if status == "StatusCode.NOT_FOUND":
+                    logger.warning(
+                        f"Received gRPC {status} error for model '{model_name}'. "
+                        f"Attempt {attempt + 1} of {self.max_retries}. Message: {message[:500]}"
+                    )
+                    if attempt >= self.max_retries - 1:
+                        logger.error(f"Max retries exceeded for model not found errors on model '{model_name}'.")
+                        raise e
+                    # Retry with exponential backoff WITHOUT reloading
+                    backoff_time = base_delay * (2**attempt)
+                    logger.info(
+                        f"Retrying after {backoff_time}s backoff for model not found error on model '{model_name}'."
+                    )
+                    time.sleep(backoff_time)
+                    attempt += 1
+                    continue
+                if status == "StatusCode.UNAVAILABLE" and "Exceeds maximum queue size".lower() in message.lower():
                     retries_429 += 1
                     logger.warning(
                         f"Received gRPC {status} for model '{model_name}'. "
@@ -357,13 +414,12 @@ class NimClient:
                     time.sleep(backoff_time)
                     continue
-                else:
-                    # For other server-side errors (e.g., INVALID_ARGUMENT, NOT_FOUND),
-                    # retrying will not help. We should fail fast.
-                    logger.error(
-                        f"Received non-retryable gRPC error from Triton for model '{model_name}': {e.message()}"
-                    )
-                    raise
+                # For other server-side errors (e.g., INVALID_ARGUMENT, etc.),
+                # fail fast as retrying will not help
+                logger.error(
+                    f"Received non-retryable gRPC error {status} from Triton for model '{model_name}': {message}"
+                )
+                raise
             except Exception as e:
                 # Catch any other unexpected exceptions (e.g., network issues not caught by Triton client)
@@ -681,3 +737,57 @@ class NimClientManager:
 def get_nim_client_manager(*args, **kwargs) -> NimClientManager:
     """Returns the singleton instance of the NimClientManager."""
     return NimClientManager(*args, **kwargs)
+def reload_models(client: grpcclient.InferenceServerClient, exclude: list[str] = [], client_timeout: int = 120) -> bool:
+    """
+    Reloads all models in the Triton server except for the models in the exclude list.
+    Parameters
+    ----------
+    client : grpcclient.InferenceServerClient
+        The gRPC client connected to the Triton server.
+    exclude : list[str], optional
+        A list of model names to exclude from reloading.
+    client_timeout : int, optional
+        Timeout for client operations in seconds (default: 120).
+    Returns
+    -------
+    bool
+        True if all models were successfully reloaded, False otherwise.
+    """
+    model_index = client.get_model_repository_index()
+    exclude = set(exclude)
+    names = [m.name for m in model_index.models if m.name not in exclude]
+    logger.info(f"Reloading {len(names)} model(s): {', '.join(names) if names else '(none)'}")
+    # 1) Unload
+    for name in names:
+        try:
+            client.unload_model(name)
+        except grpcclient.InferenceServerException as e:
+            msg = e.message()
+            if "explicit model load / unload" in msg.lower():
+                status = e.status()
+                logger.warning(
+                    f"[SKIP Model Reload] Explicit model control disabled; cannot unload '{name}'. Status: {status}."
+                )
+                return False
+            logger.error(f"[ERROR] Failed to unload '{name}': {msg}")
+            return False
+    # 2) Load
+    for name in names:
+        client.load_model(name)
+    # 3) Readiness check
+    for name in names:
+        ready = client.is_model_ready(model_name=name, client_timeout=client_timeout)
+        if not ready:
+            logger.warning(f"[Warning] Triton Not ready: {name}")
+            return False
+    logger.info("✅ Reload of models complete.")
+    return True

{nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py RENAMED Viewed

@@ -43,6 +43,24 @@ class PdfConfigSchema(BaseModelNoExt):
     split_page_count: Annotated[int, Field(ge=1)] = 32
+class RoutingOptionsSchema(BaseModelNoExt):
+    # Queue routing hint for QoS scheduler
+    queue_hint: Optional[str] = None
+    @field_validator("queue_hint")
+    @classmethod
+    def validate_queue_hint(cls, v):
+        if v is None:
+            return v
+        if not isinstance(v, str):
+            raise ValueError("queue_hint must be a string")
+        s = v.lower()
+        allowed = {"default", "immediate", "micro", "small", "medium", "large"}
+        if s not in allowed:
+            raise ValueError("queue_hint must be one of: default, immediate, micro, small, medium, large")
+        return s
 # Ingest Task Schemas
@@ -128,6 +146,7 @@ class IngestTaskEmbedSchema(BaseModelNoExt):
     audio_elements_modality: Optional[str] = None
     custom_content_field: Optional[str] = None
     result_target_field: Optional[str] = None
+    dimensions: Optional[int] = None
 class IngestTaskVdbUploadSchema(BaseModelNoExt):
@@ -283,8 +302,27 @@ class IngestJobSchema(BaseModelNoExt):
     job_id: Union[str, int]
     tasks: List[IngestTaskSchema]
     tracing_options: Optional[TracingOptionsSchema] = None
+    routing_options: Optional[RoutingOptionsSchema] = None
     pdf_config: Optional[PdfConfigSchema] = None
+    @model_validator(mode="before")
+    @classmethod
+    def migrate_queue_hint(cls, values):
+        """
+        Backward-compatibility shim: if a legacy client sends
+        tracing_options.queue_hint, move it into routing_options.queue_hint.
+        """
+        try:
+            topt = values.get("tracing_options") or {}
+            ropt = values.get("routing_options") or {}
+            if isinstance(topt, dict) and "queue_hint" in topt and "queue_hint" not in ropt:
+                ropt["queue_hint"] = topt.pop("queue_hint")
+                values["routing_options"] = ropt
+                values["tracing_options"] = topt
+        except Exception:
+            pass
+        return values
 # ------------------------------------------------------------------------------
 # Utility Functions

{nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py RENAMED Viewed

@@ -244,7 +244,7 @@ class TableMetadataSchema(BaseModelNoExt):
 class ChartMetadataSchema(BaseModelNoExt):
     """
-    The schema for extracted chart content.
+    The schema for table content extracted from charts.
     """
     caption: str = ""

{nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py RENAMED Viewed

@@ -10,7 +10,7 @@ class ImageCaptionExtractionSchema(BaseModel):
     api_key: str = Field(default="", repr=False)
     endpoint_url: str = "https://integrate.api.nvidia.com/v1/chat/completions"
     prompt: str = "Caption the content of this image:"
-    model_name: str = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
+    model_name: str = "nvidia/nemotron-nano-12b-v2-vl"
     raise_on_failure: bool = False
     model_config = ConfigDict(extra="forbid")

{nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py RENAMED Viewed

@@ -30,6 +30,7 @@ class TextEmbeddingSchema(BaseModel):
     audio_elements_modality: str = Field(default="text")
     custom_content_field: Optional[str] = None
     result_target_field: Optional[str] = None
+    dimensions: Optional[int] = None
     model_config = ConfigDict(extra="forbid")

{nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/transform/embed_text.py RENAMED Viewed

@@ -40,6 +40,7 @@ def _make_async_request(
     truncate: str,
     filter_errors: bool,
     modalities: Optional[List[str]] = None,
+    dimensions: Optional[int] = None,
 ) -> list:
     """
     Interacts directly with the NIM embedding service to calculate embeddings for a batch of prompts.
@@ -96,6 +97,7 @@ def _make_async_request(
             model=embedding_model,
             encoding_format=encoding_format,
             extra_body=extra_body,
+            dimensions=dimensions,
         )
         response["embedding"] = resp.data
@@ -124,6 +126,7 @@ def _async_request_handler(
     truncate: str,
     filter_errors: bool,
     modalities: Optional[List[str]] = None,
+    dimensions: Optional[int] = None,
 ) -> List[dict]:
     """
     Gathers calculated embedding results from the NIM embedding service concurrently.
@@ -168,6 +171,7 @@ def _async_request_handler(
                 truncate=truncate,
                 filter_errors=filter_errors,
                 modalities=modality_batch,
+                dimensions=dimensions,
             )
             for prompt_batch, modality_batch in zip(prompts, modalities)
         ]
@@ -186,6 +190,7 @@ def _async_runner(
     truncate: str,
     filter_errors: bool,
     modalities: Optional[List[str]] = None,
+    dimensions: Optional[int] = None,
 ) -> dict:
     """
     Concurrently launches all NIM embedding requests and flattens the results.
@@ -224,6 +229,7 @@ def _async_runner(
         truncate,
         filter_errors,
         modalities=modalities,
+        dimensions=dimensions,
     )
     flat_results = {"embeddings": [], "info_msgs": []}
@@ -562,6 +568,7 @@ def transform_create_text_embeddings_internal(
     endpoint_url = task_config.get("endpoint_url") or transform_config.embedding_nim_endpoint
     model_name = task_config.get("model_name") or transform_config.embedding_model
     custom_content_field = task_config.get("custom_content_field") or transform_config.custom_content_field
+    dimensions = task_config.get("dimensions") or transform_config.dimensions
     if execution_trace_log is None:
         execution_trace_log = {}
@@ -636,6 +643,7 @@ def transform_create_text_embeddings_internal(
                 transform_config.truncate,
                 False,
                 modalities=modality_batches,
+                dimensions=dimensions,
             )
             # Build a simple row index -> embedding map
             embeddings_dict = dict(
@@ -680,6 +688,7 @@ def transform_create_text_embeddings_internal(
                 transform_config.input_type,
                 transform_config.truncate,
                 False,
+                dimensions=dimensions,
             )
             custom_embeddings_dict = dict(
                 zip(

nv_ingest_api-2025.11.8.dev20251108/src/nv_ingest_api/util/message_brokers/qos_scheduler.py ADDED Viewed

@@ -0,0 +1,283 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+from typing import Dict, Optional
+import logging
+import time
+import random
+class _SchedulingStrategy:
+    """
+    Base scheduling strategy interface. Implementations must provide a non-blocking
+    single-sweep attempt over non-immediate queues and return a job or None.
+    """
+    def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
+        raise NotImplementedError
+class _LotteryStrategy(_SchedulingStrategy):
+    """
+    Lottery scheduling with fixed weights.
+    Weights: micro=4, small=2, large=1, medium=1, default=1
+    """
+    def __init__(self, prioritize_immediate: bool = True) -> None:
+        self._weights: Dict[str, int] = {
+            "micro": 4,
+            "small": 2,
+            "large": 1,
+            "medium": 1,
+            "default": 1,
+        }
+        self._prioritize_immediate: bool = bool(prioritize_immediate)
+    def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
+        # Immediate-first if enabled (non-blocking)
+        if self._prioritize_immediate:
+            try:
+                job = client.fetch_message(queues["immediate"], 0)
+                if job is not None:
+                    return job
+            except TimeoutError:
+                pass
+        candidates = list(order)
+        weights = [self._weights[q] for q in candidates]
+        while candidates:
+            try:
+                chosen = random.choices(candidates, weights=weights, k=1)[0]
+                job = client.fetch_message(queues[chosen], 0)
+                if job is not None:
+                    return job
+            except TimeoutError:
+                pass
+            finally:
+                idx = candidates.index(chosen)
+                del candidates[idx]
+                del weights[idx]
+        return None
+class _SimpleStrategy(_SchedulingStrategy):
+    """
+    Simple strategy placeholder. Actual simple-mode handling is done in QosScheduler.fetch_next
+    to directly fetch from the base 'default' queue using the provided timeout.
+    """
+    def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
+        # Block up to 30s on the base/default queue and return first available job
+        try:
+            return client.fetch_message(queues["default"], 30.0)
+        except TimeoutError:
+            return None
+class _RoundRobinStrategy(_SchedulingStrategy):
+    """
+    Simple round-robin over non-immediate queues. Maintains rotation across calls.
+    """
+    def __init__(self, order: list[str], prioritize_immediate: bool = True) -> None:
+        self._order = list(order)
+        self._len = len(self._order)
+        self._idx = 0
+        self._prioritize_immediate: bool = bool(prioritize_immediate)
+    def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
+        # Immediate-first if enabled (non-blocking)
+        if self._prioritize_immediate:
+            try:
+                job = client.fetch_message(queues["immediate"], 0)
+                if job is not None:
+                    return job
+            except TimeoutError:
+                pass
+        start_idx = self._idx
+        for step in range(self._len):
+            i = (start_idx + step) % self._len
+            qname = self._order[i]
+            try:
+                job = client.fetch_message(queues[qname], 0)
+                if job is not None:
+                    # advance rotation to the position after the chosen one
+                    self._idx = (i + 1) % self._len
+                    return job
+            except TimeoutError:
+                continue
+        return None
+class _WeightedRoundRobinStrategy(_SchedulingStrategy):
+    """
+    Smooth Weighted Round Robin (SWRR) using weights micro=4, small=2, large=1, medium=1, default=1.
+    Maintains current weights across calls.
+    """
+    def __init__(self, prioritize_immediate: bool = True) -> None:
+        self._weights: Dict[str, int] = {
+            "micro": 4,
+            "small": 2,
+            "large": 1,
+            "medium": 1,
+            "default": 1,
+        }
+        self._current: Dict[str, int] = {k: 0 for k in self._weights.keys()}
+        self._total: int = sum(self._weights.values())
+        self._prioritize_immediate: bool = bool(prioritize_immediate)
+    def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
+        # Immediate-first if enabled (non-blocking)
+        if self._prioritize_immediate:
+            try:
+                job = client.fetch_message(queues["immediate"], 0)
+                if job is not None:
+                    return job
+            except TimeoutError:
+                pass
+        # Attempt up to len(order) selections per sweep, excluding queues that prove empty
+        active = list(order)
+        for _ in range(len(order)):
+            if not active:
+                break
+            for q in active:
+                self._current[q] += self._weights[q]
+            chosen = max(active, key=lambda q: self._current[q])
+            self._current[chosen] -= self._total
+            try:
+                job = client.fetch_message(queues[chosen], 0)
+                if job is not None:
+                    return job
+            except TimeoutError:
+                job = None
+            # If no job available from chosen, exclude it for the remainder of this sweep
+            if job is None and chosen in active:
+                active.remove(chosen)
+        # Fallback: single non-blocking attempt for each queue in order
+        for q in order:
+            try:
+                job = client.fetch_message(queues[q], 0)
+                if job is not None:
+                    return job
+            except TimeoutError:
+                continue
+        return None
+class QosScheduler:
+    """
+    Simplified scheduler that fetches jobs from the default queue only.
+    Uses the provided timeout value when polling the broker.
+    """
+    def __init__(
+        self,
+        base_queue: str,
+        total_buffer_capacity: int = 1,
+        num_prefetch_threads: int = 0,
+        prefetch_poll_interval: float = 0.0,
+        prefetch_non_immediate: bool = False,
+        strategy: str = "lottery",
+        prioritize_immediate: bool = True,
+    ) -> None:
+        self.base_queue = base_queue
+        # Define all derived queues; default behavior still uses only "default"
+        self.queues: Dict[str, str] = {
+            "default": f"{base_queue}",
+            "immediate": f"{base_queue}_immediate",
+            "micro": f"{base_queue}_micro",
+            "small": f"{base_queue}_small",
+            "medium": f"{base_queue}_medium",
+            "large": f"{base_queue}_large",
+        }
+        # Priority order for multi-queue fetching; "immediate" always first
+        self._priority_order = [
+            "immediate",
+            "micro",
+            "small",
+            "medium",
+            "large",
+            "default",
+        ]
+        # Non-immediate queue order reference
+        self._non_immediate_order = ["micro", "small", "large", "medium", "default"]
+        # Logger
+        self._logger = logging.getLogger(__name__)
+        # No prefetching - just direct calls
+        self._total_buffer_capacity: int = int(total_buffer_capacity)
+        self._num_prefetch_threads: int = int(num_prefetch_threads)
+        self._prefetch_poll_interval: float = float(prefetch_poll_interval)
+        self._prefetch_non_immediate: bool = bool(prefetch_non_immediate)
+        # Strategy selection
+        self._simple_mode: bool = False
+        if strategy == "simple":
+            self._strategy_impl: _SchedulingStrategy = _SimpleStrategy()
+            self._simple_mode = True
+        elif strategy == "round_robin":
+            self._strategy_impl = _RoundRobinStrategy(self._non_immediate_order, prioritize_immediate)
+        elif strategy == "weighted_round_robin":
+            self._strategy_impl = _WeightedRoundRobinStrategy(prioritize_immediate)
+        else:
+            self._strategy_impl = _LotteryStrategy(prioritize_immediate)
+    # Context manager helpers for clean shutdown
+    def __enter__(self) -> "QosScheduler":
+        return self
+    def __exit__(self, exc_type, exc, tb) -> None:
+        self.close()
+    # ---------------------------- Public API ----------------------------
+    def close(self) -> None:
+        """
+        Cleanly close the scheduler. No-op for the current implementation
+        since we do not spin background threads.
+        """
+        return None
+    def fetch_next(self, client, timeout: float = 0.0) -> Optional[dict]:
+        """
+        Immediate-first, then strategy-based scheduling among non-immediate queues.
+        Behavior:
+        - Always check 'immediate' first (non-blocking). If present, return immediately.
+        - If not, select using the configured strategy (lottery, round_robin, weighted_round_robin).
+        - If no job is found in a full pass:
+          - If timeout <= 0: return None.
+          - Else: sleep in 0.5s increments and retry until accumulated elapsed time >= timeout.
+        """
+        # Simple mode: delegate to the strategy (blocks up to 30s on base queue)
+        if getattr(self, "_simple_mode", False):
+            return self._strategy_impl.try_once(client, self.queues, self._non_immediate_order)
+        start = time.monotonic()
+        while True:
+            # Strategy-based attempt (strategy may include immediate priority internally)
+            job = self._strategy_impl.try_once(client, self.queues, self._non_immediate_order)
+            if job is not None:
+                return job
+            # No job found in this sweep
+            if timeout <= 0:
+                return None
+            elapsed = time.monotonic() - start
+            if elapsed >= timeout:
+                return None
+            # Sleep up to 0.5s, but not beyond remaining timeout
+            remaining = timeout - elapsed
+            sleep_time = 0.5 if remaining > 0.5 else remaining
+            if sleep_time > 0:
+                time.sleep(sleep_time)
+            else:
+                return None

{nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py RENAMED Viewed

@@ -35,6 +35,7 @@ class SimpleClient(MessageBrokerClientBase):
         connection_timeout: int = 300,
         max_pool_size: int = 128,
         use_ssl: bool = False,
+        api_version: str = "v1",
     ):
         """
         Initialize the SimpleClient with configuration parameters.

{nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py RENAMED Viewed

@@ -5,8 +5,9 @@
 import logging
 import math
-import multiprocessing as mp
 import os
+import sys
+import multiprocessing as mp
 from threading import Lock
 from typing import Any, Callable, Optional
@@ -103,7 +104,12 @@ class ProcessWorkerPoolSingleton:
             The total number of worker processes to start.
         """
         self._total_workers = total_max_workers
-        self._context: mp.context.ForkContext = mp.get_context("fork")
+        start_method = "fork"
+        if sys.platform.lower() == "darwin":
+            start_method = "spawn"
+        self._context: mp.context.ForkContext = mp.get_context(start_method)
         # Bounded task queue: maximum tasks queued = 2 * total_max_workers.
         self._task_queue: mp.Queue = self._context.Queue(maxsize=2 * total_max_workers)
         self._next_task_id: int = 0

nv-ingest-api 2025.10.28.dev20251028__tar.gz → 2025.11.8.dev20251108__tar.gz

nv-ingest-api 2025.10.28.dev20251028tar.gz → 2025.11.8.dev20251108tar.gz