PyPI - nv-ingest - Versions diffs - 2025.10.22.dev20251022__py3-none-any.whl → 2025.11.19.dev20251119__py3-none-any.whl - Mend

nv-ingest 2025.10.22.dev20251022py3-none-any.whl → 2025.11.19.dev20251119py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

nv_ingest/framework/orchestration/process/termination.py CHANGED Viewed

@@ -19,20 +19,45 @@ logger = logging.getLogger(__name__)
 def _safe_log(level: int, msg: str) -> None:
-    """Best-effort logging that won't crash during interpreter shutdown."""
+    """Best-effort logging that won't emit handler tracebacks on closed streams.
+    Temporarily disables logging.raiseExceptions to prevent the logging module
+    from printing "--- Logging error ---" to stderr if a handler's stream is
+    already closed (common during process teardown). Falls back to writing to
+    sys.__stderr__ if available.
+    """
     try:
-        logger.log(level, msg)
+        import logging as _logging
+        prev = getattr(_logging, "raiseExceptions", True)
+        # Suppress handler errors being printed to stderr
+        _logging.raiseExceptions = False
+        # If there are no handlers, skip and use stderr fallback
+        if logger.handlers:
+            logger.log(level, msg)
+            return
     except Exception:
+        # Intentionally ignore and try stderr fallback
+        pass
+    finally:
         try:
-            # Fallback to stderr if available
-            import sys
+            import logging as _logging  # re-import safe even if earlier failed
-            if hasattr(sys, "__stderr__") and sys.__stderr__:
-                sys.__stderr__.write(msg + "\n")
-                sys.__stderr__.flush()
+            _logging.raiseExceptions = prev  # type: ignore[name-defined]
         except Exception:
             pass
+    # Fallback to stderr if available
+    try:
+        import sys
+        if hasattr(sys, "__stderr__") and sys.__stderr__:
+            sys.__stderr__.write(msg + "\n")
+            sys.__stderr__.flush()
+    except Exception:
+        pass
 def kill_pipeline_process_group(process) -> None:
     """
@@ -74,7 +99,17 @@ def kill_pipeline_process_group(process) -> None:
     try:
         # Send graceful termination to the entire process group
-        os.killpg(os.getpgid(pid), signal.SIGTERM)
+        try:
+            pgid = os.getpgid(pid)
+        except Exception:
+            # Process already gone
+            _safe_log(logging.DEBUG, f"Process group for PID {pid} not found during SIGTERM phase")
+            return
+        try:
+            os.killpg(pgid, signal.SIGTERM)
+        except ProcessLookupError:
+            _safe_log(logging.DEBUG, f"Process group for PID {pid} no longer exists (SIGTERM)")
+            return
         # If we have a Process handle, give it a chance to exit cleanly
         if proc is not None and hasattr(proc, "join"):
@@ -95,7 +130,12 @@ def kill_pipeline_process_group(process) -> None:
         if still_alive:
             _safe_log(logging.WARNING, "Process group did not terminate gracefully, using SIGKILL")
             try:
-                os.killpg(os.getpgid(pid), signal.SIGKILL)
+                try:
+                    pgid2 = os.getpgid(pid)
+                except Exception:
+                    _safe_log(logging.DEBUG, f"Process group for PID {pid} vanished before SIGKILL")
+                    return
+                os.killpg(pgid2, signal.SIGKILL)
             finally:
                 if proc is not None and hasattr(proc, "join"):
                     try:

nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py CHANGED Viewed

@@ -152,11 +152,11 @@ if __name__ == "__main__":
     os.environ["OCR_MODEL_NAME"] = "paddle"
     os.environ["NEMORETRIEVER_PARSE_HTTP_ENDPOINT"] = "https://integrate.api.nvidia.com/v1/chat/completions"
     os.environ["VLM_CAPTION_ENDPOINT"] = "https://integrate.api.nvidia.com/v1/chat/completions"
-    os.environ["VLM_CAPTION_MODEL_NAME"] = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
+    os.environ["VLM_CAPTION_MODEL_NAME"] = "nvidia/nemotron-nano-12b-v2-vl"
     logger.info("Environment variables set.")
     image_caption_endpoint_url = "https://integrate.api.nvidia.com/v1/chat/completions"
-    model_name = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
+    model_name = "nvidia/nemotron-nano-12b-v2-vl"
     yolox_grpc, yolox_http, yolox_auth, yolox_protocol = get_nim_service("yolox")
     (
         yolox_table_structure_grpc,

nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py CHANGED Viewed

@@ -5,7 +5,6 @@
 import logging
 from typing import Optional
 import ray
 from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
@@ -67,7 +66,6 @@ class AudioExtractorStage(RayActorStage):
         # Extract the DataFrame payload.
         df_ledger = control_message.payload()
         self._logger.debug("Extracted payload with %d rows.", len(df_ledger))
         # Remove the "audio_data_extract" task from the message to obtain task-specific configuration.
         task_config = remove_task_by_type(control_message, "extract")
         self._logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))

nv_ingest/framework/orchestration/ray/stages/extractors/ocr_extractor.py ADDED Viewed

@@ -0,0 +1,71 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
+# All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+import logging
+import ray
+from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
+from nv_ingest.framework.util.flow_control import filter_by_task
+from nv_ingest_api.internal.extract.image.ocr_extractor import extract_text_data_from_image_internal
+from nv_ingest_api.internal.primitives.ingest_control_message import IngestControlMessage, remove_task_by_type
+from nv_ingest_api.internal.primitives.tracing.tagging import traceable, set_trace_timestamps_with_parent_context
+from nv_ingest_api.internal.schemas.extract.extract_ocr_schema import OCRExtractorSchema
+from nv_ingest_api.util.exception_handlers.decorators import nv_ingest_node_failure_try_except
+from typing import Optional
+from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
+logger = logging.getLogger(__name__)
+@ray.remote
+class OCRExtractorStage(RayActorStage):
+    """
+    A Ray actor stage that extracts text data from image content.
+    It expects an IngestControlMessage containing a DataFrame with image data. It then:
+      1. Removes the "text_data_extract" task from the message.
+      2. Calls the text extraction logic using a validated configuration.
+      3. Updates the message payload with the extracted text DataFrame.
+    """
+    def __init__(self, config: OCRExtractorSchema, stage_name: Optional[str] = None) -> None:
+        super().__init__(config, log_to_stdout=False, stage_name=stage_name)
+        try:
+            self.validated_config = config
+            self._logger.info("OCRExtractorStage configuration validated successfully.")
+        except Exception as e:
+            self._logger.exception(f"Error validating Text extractor config: {e}")
+            raise
+    @nv_ingest_node_failure_try_except()
+    @traceable()
+    @udf_intercept_hook()
+    @filter_by_task(required_tasks=["ocr_data_extract"])
+    def on_data(self, control_message: IngestControlMessage) -> IngestControlMessage:
+        # Extract DataFrame payload
+        df_ledger = control_message.payload()
+        if df_ledger.empty:
+            return control_message
+        # Remove the "text_data_extract" task from the message
+        task_config = remove_task_by_type(control_message, "ocr_data_extract")
+        execution_trace_log = {}
+        new_df, extraction_info = extract_text_data_from_image_internal(
+            df_extraction_ledger=df_ledger,
+            task_config=task_config,
+            extraction_config=self.validated_config,
+            execution_trace_log=execution_trace_log,
+        )
+        control_message.payload(new_df)
+        control_message.set_metadata("ocr_extraction_info", extraction_info)
+        do_trace_tagging = control_message.get_metadata("config::add_trace_tagging") is True
+        if do_trace_tagging and execution_trace_log:
+            parent_name = self.stage_name if self.stage_name else "ocr_extractor"
+            set_trace_timestamps_with_parent_context(control_message, execution_trace_log, parent_name, logger)
+        return control_message

nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py CHANGED Viewed

@@ -30,6 +30,7 @@ from nv_ingest_api.internal.schemas.meta.ingest_job_schema import validate_inges
 from nv_ingest_api.util.message_brokers.simple_message_broker.simple_client import SimpleClient
 from nv_ingest_api.util.service_clients.redis.redis_client import RedisClient
 from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
+from nv_ingest_api.util.message_brokers.qos_scheduler import QosScheduler
 logger = logging.getLogger(__name__)
@@ -89,8 +90,10 @@ class MessageBrokerTaskSourceConfig(BaseModel):
     # Use the discriminated union for broker_client
     broker_client: Union[RedisClientConfig, SimpleClientConfig] = Field(..., discriminator="client_type")
-    task_queue: str = Field(..., description="The name of the queue to fetch tasks from.")
-    poll_interval: float = Field(default=0.1, gt=0, description="Polling interval in seconds.")
+    task_queue: str = Field(
+        ..., description="The base name of the queue to fetch tasks from. Derives sub-queues for fair scheduling."
+    )
+    poll_interval: float = Field(default=0.0, gt=0, description="Polling interval in seconds.")
 @ray.remote
@@ -134,7 +137,29 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
         self._current_backoff_sleep: float = 0.0
         self._last_backoff_log_time: float = 0.0
-        self._logger.debug("MessageBrokerTaskSourceStage initialized. Task queue: %s", self.task_queue)
+        # Initialize QoS scheduler. Use a simple base-queue strategy for SimpleClient.
+        strategy = "simple" if isinstance(self.client, SimpleClient) else "lottery"
+        self.scheduler = QosScheduler(
+            self.task_queue,
+            num_prefetch_threads=6,  # one per category (no-op for simple strategy)
+            total_buffer_capacity=96,  # e.g., ~16 per thread
+            prefetch_poll_interval=0.002,  # faster polling for responsiveness
+            prefetch_non_immediate=True,  # enable prefetch for non-immediate categories
+            strategy=strategy,
+        )
+        self._logger.info(
+            "MessageBrokerTaskSourceStage initialized. Base task queue: %s | Derived queues: %s",
+            self.task_queue,
+            {
+                "immediate": f"{self.task_queue}_immediate",
+                "micro": f"{self.task_queue}_micro",
+                "small": f"{self.task_queue}_small",
+                "medium": f"{self.task_queue}_medium",
+                "large": f"{self.task_queue}_large",
+                "default": f"{self.task_queue}",
+            },
+        )
     # --- Private helper methods ---
     def _create_client(self):
@@ -265,14 +290,21 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
         return control_message
-    def _fetch_message(self, timeout=100):
+    def _fetch_message(self, timeout=0):
         """
-        Fetch a message from the message broker.
+        Fetch a message from the message broker using fair scheduling across derived queues.
+        This is a non-blocking sweep across all queues for the current scheduling cycle. If no
+        message is found across any queue, return None so the caller can sleep briefly.
         """
         try:
-            job = self.client.fetch_message(self.task_queue, timeout)
+            # Use scheduler to fetch next. In simple strategy this will block up to poll_interval on base queue.
+            job = self.scheduler.fetch_next(self.client, timeout=self.config.poll_interval)
             if job is None:
-                self._logger.debug("No message received from '%s'", self.task_queue)
+                self._logger.debug(
+                    "No message received from derived queues for base "
+                    "'%s' (immediate, micro, small, medium, large, default)",
+                    self.task_queue,
+                )
                 # Do not treat normal empty polls as failures
                 self._fetch_failure_count = 0
                 self._current_backoff_sleep = 0.0
@@ -336,7 +368,8 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
         Instead of reading from an input edge, fetch a message from the broker.
         """
         self._logger.debug("read_input: calling _fetch_message()")
-        job = self._fetch_message(timeout=100)
+        # Perform a non-blocking sweep across all queues for this cycle
+        job = self._fetch_message(timeout=0)
         if job is None:
             # Sleep for either the configured poll interval or the current backoff, whichever is larger
             sleep_time = max(self.config.poll_interval, getattr(self, "_current_backoff_sleep", 0.0))

nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py CHANGED Viewed

@@ -218,12 +218,33 @@ class RedisIngestService(IngestServiceMeta):
             ttl_for_result: Optional[int] = (
                 self._result_data_ttl_seconds if self._fetch_mode == FetchMode.NON_DESTRUCTIVE else None
             )
+            # Determine target queue based on optional QoS hint
+            queue_hint = None
+            try:
+                routing_opts = job_spec.get("routing_options") or {}
+                tracing_opts = job_spec.get("tracing_options") or {}
+                queue_hint = routing_opts.get("queue_hint") or tracing_opts.get("queue_hint")
+            except Exception:
+                queue_hint = None
+            allowed = {"default", "immediate", "micro", "small", "medium", "large"}
+            if isinstance(queue_hint, str) and queue_hint in allowed:
+                if queue_hint == "default":
+                    channel_name = self._redis_task_queue
+                else:
+                    channel_name = f"{self._redis_task_queue}_{queue_hint}"
+            else:
+                channel_name = self._redis_task_queue
+            logger.debug(
+                f"Submitting job {trace_id} to queue '{channel_name}' (hint={queue_hint}) "
+                f"with result TTL: {ttl_for_result}"
+            )
             logger.debug(
                 f"Submitting job {trace_id} to queue '{self._redis_task_queue}' with result TTL: {ttl_for_result}"
             )
             await self._run_bounded_to_thread(
                 self._ingest_client.submit_message,
-                channel_name=self._redis_task_queue,
+                channel_name=channel_name,
                 message=job_spec_json,
                 ttl_seconds=ttl_for_result,
             )
@@ -436,12 +457,13 @@ class RedisIngestService(IngestServiceMeta):
         metadata_key = f"parent:{parent_job_id}:metadata"
         try:
-            # Store subjob IDs as a set
-            await self._run_bounded_to_thread(
-                self._ingest_client.get_client().sadd,
-                parent_key,
-                *subjob_ids,
-            )
+            # Store subjob IDs as a set (only if there are subjobs)
+            if subjob_ids:
+                await self._run_bounded_to_thread(
+                    self._ingest_client.get_client().sadd,
+                    parent_key,
+                    *subjob_ids,
+                )
             # Store metadata as hash (including original subjob ordering for deterministic fetches)
             metadata_to_store = dict(metadata)
@@ -500,21 +522,21 @@ class RedisIngestService(IngestServiceMeta):
         metadata_key = f"parent:{parent_job_id}:metadata"
         try:
-            # Check if this is a parent job
+            # Check if this is a parent job (check metadata_key since non-split PDFs may not have parent_key)
             exists = await self._run_bounded_to_thread(
                 self._ingest_client.get_client().exists,
-                parent_key,
+                metadata_key,  # Check metadata instead of parent_key for non-split PDF support
             )
             if not exists:
                 return None
-            # Get subjob IDs
+            # Get subjob IDs (may be empty for non-split PDFs)
             subjob_ids_bytes = await self._run_bounded_to_thread(
                 self._ingest_client.get_client().smembers,
                 parent_key,
             )
-            subjob_id_set = {id.decode("utf-8") for id in subjob_ids_bytes}
+            subjob_id_set = {id.decode("utf-8") for id in subjob_ids_bytes} if subjob_ids_bytes else set()
             # Get metadata
             metadata_dict = await self._run_bounded_to_thread(

nv_ingest/pipeline/default_libmode_pipeline_impl.py CHANGED Viewed

@@ -318,8 +318,8 @@ stages:
     actor: "nv_ingest.framework.orchestration.ray.stages.transforms.image_caption:ImageCaptionTransformStage"
     config:
       api_key: $NGC_API_KEY|$NVIDIA_API_KEY
-      endpoint_url: $VLM_CAPTION_ENDPOINT|"https://integrate.api.nvidia.com/v1/chat/completions"
-      model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
+      endpoint_url: $VLM_CAPTION_ENDPOINT|"http://vlm:8000/v1/chat/completions"
+      model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/nemotron-nano-12b-v2-vl"
       prompt: "Caption the content of this image:"
     replicas:
       min_replicas: 0

nv_ingest/pipeline/default_pipeline_impl.py CHANGED Viewed

@@ -192,6 +192,27 @@ stages:
         strategy: "static"
         value: 1
+  - name: "ocr_extractor"
+    type: "stage"
+    phase: 1  # EXTRACTION
+    actor: "nv_ingest.framework.orchestration.ray.stages.extractors.ocr_extractor:OCRExtractorStage"
+    config:
+      endpoint_config:
+        ocr_endpoints: [
+          $OCR_GRPC_ENDPOINT|"ocr:8001",
+          $OCR_HTTP_ENDPOINT|"http://ocr:8000/v1/infer",
+        ]
+        ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
+        auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
+    replicas:
+      min_replicas: 0
+      max_replicas:
+        strategy: "static"
+        value: 4
+      static_replicas:
+        strategy: "static"
+        value: 3
   - name: "infographic_extractor"
     type: "stage"
     phase: 1  # EXTRACTION
@@ -317,7 +338,8 @@ stages:
     actor: "nv_ingest.framework.orchestration.ray.stages.transforms.image_caption:ImageCaptionTransformStage"
     config:
       api_key: $NGC_API_KEY|$NVIDIA_API_KEY
-      model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
+      model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/nemotron-nano-12b-v2-vl"
+      endpoint_url: $VLM_CAPTION_ENDPOINT|"http://vlm:8000/v1/chat/completions"
       prompt: "Caption the content of this image:"
     replicas:
       min_replicas: 0
@@ -427,76 +449,79 @@ edges:
   # Intake
   - from: "source_stage"
     to: "metadata_injector"
-    queue_size: 32
+    queue_size: 4
   # Document Extractors
   - from: "metadata_injector"
     to: "pdf_extractor"
-    queue_size: 32
+    queue_size: 8
   - from: "pdf_extractor"
     to: "audio_extractor"
-    queue_size: 32
+    queue_size: 4
   - from: "audio_extractor"
     to: "docx_extractor"
-    queue_size: 32
+    queue_size: 4
   - from: "docx_extractor"
     to: "pptx_extractor"
-    queue_size: 32
+    queue_size: 4
   - from: "pptx_extractor"
     to: "image_extractor"
-    queue_size: 32
+    queue_size: 4
   - from: "image_extractor"
     to: "html_extractor"
-    queue_size: 32
+    queue_size: 4
   - from: "html_extractor"
     to: "infographic_extractor"
-    queue_size: 32
+    queue_size: 4
   # Primitive Extractors
   - from: "infographic_extractor"
     to: "table_extractor"
-    queue_size: 32
+    queue_size: 4
   - from: "table_extractor"
     to: "chart_extractor"
-    queue_size: 32
+    queue_size: 4
   - from: "chart_extractor"
+    to: "ocr_extractor"
+    queue_size: 8
+  - from: "ocr_extractor"
     to: "image_filter"
-    queue_size: 32
+    queue_size: 4
   # Primitive Mutators
   - from: "image_filter"
     to: "image_dedup"
-    queue_size: 32
+    queue_size: 4
   - from: "image_dedup"
     to: "text_splitter"
-    queue_size: 32
+    queue_size: 4
   # Primitive Transforms
   - from: "text_splitter"
     to: "image_caption"
-    queue_size: 32
+    queue_size: 4
   - from: "image_caption"
     to: "text_embedder"
-    queue_size: 32
+    queue_size: 4
   - from: "text_embedder"
     to: "image_storage"
-    queue_size: 32
+    queue_size: 4
   # Primitive Storage
   - from: "image_storage"
     to: "embedding_storage"
-    queue_size: 32
+    queue_size: 4
   - from: "embedding_storage"
     to: "broker_response"
-    queue_size: 32
+    queue_size: 4
   # Response and Telemetry
   - from: "broker_response"
     to: "otel_tracer"
-    queue_size: 32
+    queue_size: 4
   - from: "otel_tracer"
     to: "default_drain"
-    queue_size: 32
+    queue_size: 4
 # Pipeline Runtime Configuration
 pipeline:

{nv_ingest-2025.10.22.dev20251022.dist-info → nv_ingest-2025.11.19.dev20251119.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nv-ingest
-Version: 2025.10.22.dev20251022
+Version: 2025.11.19.dev20251119
 Summary: Python module for multimodal document ingestion
 Author-email: Jeremy Dyer <jdyer@nvidia.com>
 License:                                  Apache License
@@ -226,7 +226,6 @@ Requires-Dist: isodate>=0.7.2
 Requires-Dist: langdetect>=1.0.9
 Requires-Dist: minio>=7.2.12
 Requires-Dist: librosa>=0.10.2
-Requires-Dist: openai>=1.82.0
 Requires-Dist: opentelemetry-api>=1.27.0
 Requires-Dist: opentelemetry-exporter-otlp>=1.27.0
 Requires-Dist: opentelemetry-sdk>=1.27.0

{nv_ingest-2025.10.22.dev20251022.dist-info → nv_ingest-2025.11.19.dev20251119.dist-info}/RECORD RENAMED Viewed

@@ -7,27 +7,27 @@ nv_ingest/api/v1/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,
 nv_ingest/api/v1/health.py,sha256=pV-RoVq5y0iBPp0qZoLzd1xKpd0JiHAi0UMyMj99LqU,4740
 nv_ingest/api/v1/ingest.py,sha256=LWk3LN4lBd3uO8h30EN42g3LHCVcO00avVd5ohVK7NI,19392
 nv_ingest/api/v1/metrics.py,sha256=ZGVRApYLnzc2f2C7wRgGd7deqiXan-jxfA-33a16clY,981
-nv_ingest/api/v2/README.md,sha256=tbQOcD_67YWedboAcDRlZJgjvVZZTW1-ZodcqP0iynk,7133
+nv_ingest/api/v2/README.md,sha256=VhpdjEmCyr3qIOhwqISFx9C5WezJFcxYc-NB9S98HMg,7562
 nv_ingest/api/v2/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
-nv_ingest/api/v2/ingest.py,sha256=v5l1c1BdmgyPqMzRj8CezI3dR6HpKOuevfomT1v4RGc,37313
+nv_ingest/api/v2/ingest.py,sha256=ikbZE2eAjSnFmt5CcpTduY1t9DsUQBhnBQlsd3HaBww,53103
 nv_ingest/framework/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest/framework/orchestration/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest/framework/orchestration/execution/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest/framework/orchestration/execution/helpers.py,sha256=-F8SZh7ISWtzJz6X1O2LQ133t-17Jxi8lL-NHz4rwj0,2818
 nv_ingest/framework/orchestration/execution/options.py,sha256=Ms1t4591EIv4ZrMRdhsCYPgLnMVXJosG3MURCbPXUoA,3983
 nv_ingest/framework/orchestration/process/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
-nv_ingest/framework/orchestration/process/dependent_services.py,sha256=ERf2M4O6pvbLDFrvayBHHL7M-FIwECeDEDTY3bi7MBg,2940
+nv_ingest/framework/orchestration/process/dependent_services.py,sha256=s0j_rsFtCKHFIuvOkBe9NEAkPNPhSYse_ApeHka8gyg,3032
 nv_ingest/framework/orchestration/process/execution.py,sha256=P1kzpYV23e4QYrKw9Td1TCZK3CK1ENVqqnI_axRCqBk,19814
 nv_ingest/framework/orchestration/process/lifecycle.py,sha256=L5NDwnzSMQPGjqJDC8jC75L1YqWey-dtK8N_HgBzb0E,8001
-nv_ingest/framework/orchestration/process/strategies.py,sha256=D7fdTPA7uuteoj6McA6hm1J5ArqoDdSZ7W6_ONDX7N0,7845
-nv_ingest/framework/orchestration/process/termination.py,sha256=_aI2ZzCasGfqwu0fcvufOlr1BGAay_Noxq5pAu67gv4,3593
+nv_ingest/framework/orchestration/process/strategies.py,sha256=Q1Q04PPseF775omeS0FoXfK187NiS_bbqTaaJRwzKn8,7972
+nv_ingest/framework/orchestration/process/termination.py,sha256=PAogFeW0FATFS6Mcp_UkZgq_SbWV18RtdZN-0NbComw,5042
 nv_ingest/framework/orchestration/ray/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest/framework/orchestration/ray/edges/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py,sha256=PQliU_kyGbO9o42njpb8FrDMLrbLqwZzmBNXifxyG5Y,2312
 nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py,sha256=VFii2yxJuikimOxie3edKq5JN06g78AF8bdHSHVX8p8,2677
 nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py,sha256=N6NH4KgZJ60e_JkGRcSmfQtX37qtX4TMcavOR-n3heE,2549
 nv_ingest/framework/orchestration/ray/examples/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
-nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py,sha256=hnRLybIpVTj3mXkLW0ErWVn4vRsInjNZmA80JqDiQuw,16473
+nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py,sha256=Bn4rjkO14BwvvUNG_HBCSVXetYk7DKqRRsYHJADWqjc,16455
 nv_ingest/framework/orchestration/ray/examples/task_source_harness.py,sha256=Yt7uxThg7s8WuMiaHLKC8r1XAG7QixegfkT-juE5oNw,1953
 nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py,sha256=XkvsoIzH5ftXvAZ4ox7mxbx7ESVx6D8Xupcwbqgd52w,3277
 nv_ingest/framework/orchestration/ray/primitives/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
@@ -38,12 +38,13 @@ nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py,sha256=t9lf6zTj
 nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py,sha256=GGY6_i6_g5xTFzdo9Qmsu9i4knMTq6pJfgm-aaPEt_o,17226
 nv_ingest/framework/orchestration/ray/stages/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
-nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py,sha256=4SdgvzI9oJ_OK5oWGir9wXVIPV4Pont2EKv9mwcWMC0,3631
+nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py,sha256=UVp_kDmkaBlfO0Mbl_IxKq6imzLvs4-DKHgUHJIh3mo,3629
 nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py,sha256=rfaDx6PqRCguhSYkJI6iVmMMtAlJNxzKfUrLmw_fKqs,4381
 nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py,sha256=R4vshPcAUN2U6BIv8BCZQ862wLx8RJhCGXfpQ3K09Bs,3627
 nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py,sha256=7JrZSVIrK4_wr2s7TOTss7pgTY2F9GPQ7Ze3F_WFlKU,3642
 nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py,sha256=iY9fEfucfgCmO2ixX6qwn418J97nJz_FQGh7B6yziVo,3980
 nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py,sha256=v5J7dnJBEaDfjoTz_N_yC3RAt6lwMLgLT28V-ahquLE,3261
+nv_ingest/framework/orchestration/ray/stages/extractors/ocr_extractor.py,sha256=pwVoA5-CF9GVWusoFZOMGBvSyW5udD9bdxVJXA_SghE,3188
 nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py,sha256=QagIA99AsHLihjRbXm-2BphdoQGHwzOHlqLyz7oDOSk,4992
 nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py,sha256=RMbbl7Cuj4BT-TcgUx_0k8R-DLdw-o3fHxcIBIgrWt4,3776
 nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py,sha256=p71ktv6v5T-9npYpCbgbwW6-fS-65UWS7rCm8OWr2Bc,4170
@@ -61,7 +62,7 @@ nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py,sha256=wQSlVx3T14
 nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py,sha256=_USW1Vq8G2Wn-QFdPfFQCrtKG46hHeJvkEGbBxdpbVM,1488
 nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py,sha256=QcvMQXIJ7EWIxty76Mo5Xv38Oj6X2KuS8qXQlf7E1uA,11676
 nv_ingest/framework/orchestration/ray/stages/sources/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
-nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py,sha256=Qm9XtTNX2CcUAlZRw33BS3Ql0djcsMGp52FPA2zHu3Q,22340
+nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py,sha256=LrqaWpWyuiAHlpXWKYSyHZJBFegGXfNlpCXrucbK5NM,24067
 nv_ingest/framework/orchestration/ray/stages/storage/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py,sha256=WZN_-3Li-izDaPtk8IMrtn2os1ckT3U8Rb2PsfOWrcI,4009
 nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py,sha256=EUtwhSDf-qGLVEhWEInr1VaLsvpcHUSyzCmHQVai-Ps,3547
@@ -103,22 +104,22 @@ nv_ingest/framework/util/flow_control/udf_intercept.py,sha256=zQ9uuCcHLEd0P52Eiw
 nv_ingest/framework/util/service/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest/framework/util/service/impl/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest/framework/util/service/impl/ingest/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
-nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py,sha256=OuGC3FFhkLQLR3x4s-tyxGguYYn8ORKr2xkzMy2br0g,22552
+nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py,sha256=59P-BMWnFY37GJm5w23-TMxgLhiZGZpJogC0gjDBaTA,23835
 nv_ingest/framework/util/service/meta/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest/framework/util/service/meta/ingest/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py,sha256=QS3uNxWBl5dIcmIpJKNe8_TLcTUuN2vcKyHeAwa-eSo,1589
 nv_ingest/framework/util/telemetry/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest/framework/util/telemetry/global_stats.py,sha256=nq65pEEdiwjAfGiqsxG1CeQMC96O3CfQxsZuGFCY-ds,4554
 nv_ingest/pipeline/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
-nv_ingest/pipeline/default_libmode_pipeline_impl.py,sha256=MiyKe8RS18PNYwEVvrASiHFpynR_BavOe0hhVnUdbEc,15618
-nv_ingest/pipeline/default_pipeline_impl.py,sha256=irVm_wmJW5a7a3xTJd18AFZfwLheERkhCty-0XZrIMY,15288
+nv_ingest/pipeline/default_libmode_pipeline_impl.py,sha256=yNJtjfHQyxtasGa1hQrvgX7UrPa7BAd0oog8EIN8Y_w,15592
+nv_ingest/pipeline/default_pipeline_impl.py,sha256=DhClC17lWUvtBIi2mCC4WkLWT0lxY-CFY0n6nriAxas,16017
 nv_ingest/pipeline/ingest_pipeline.py,sha256=wHAJhqAM2s8nbY-8itVogmSU-yVN4PZONGWcKnhzgfg,17794
 nv_ingest/pipeline/pipeline_schema.py,sha256=rLZZz2It2o2hVNWrZUJU8CarrqRei1fho3ZEMkkoBcg,17940
 nv_ingest/pipeline/config/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
 nv_ingest/pipeline/config/loaders.py,sha256=75Yr9WYO7j7ghvKTnYLfZXQZEH3J3VEZo5J4TunC_Us,7590
 nv_ingest/pipeline/config/replica_resolver.py,sha256=3zjh8gmepEYORFZRM4inq7GoBW0YL3gzUDiixUugjzQ,8899
-nv_ingest-2025.10.22.dev20251022.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-nv_ingest-2025.10.22.dev20251022.dist-info/METADATA,sha256=fBAiUkJijOoKO-QsdNYEpDF9X1ovQ2BBSBBhLP-Yykw,15122
-nv_ingest-2025.10.22.dev20251022.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-nv_ingest-2025.10.22.dev20251022.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
-nv_ingest-2025.10.22.dev20251022.dist-info/RECORD,,
+nv_ingest-2025.11.19.dev20251119.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+nv_ingest-2025.11.19.dev20251119.dist-info/METADATA,sha256=arJTf3Axy2qKAFDlP4lsKCftTw4vnJp3EECP6hmylYU,15092
+nv_ingest-2025.11.19.dev20251119.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+nv_ingest-2025.11.19.dev20251119.dist-info/top_level.txt,sha256=sjb0ajIsgn3YgftSjZHlYO0HjYAIIhNuXG_AmywCvaU,10
+nv_ingest-2025.11.19.dev20251119.dist-info/RECORD,,

{nv_ingest-2025.10.22.dev20251022.dist-info → nv_ingest-2025.11.19.dev20251119.dist-info}/WHEEL RENAMED Viewed

File without changes

{nv_ingest-2025.10.22.dev20251022.dist-info → nv_ingest-2025.11.19.dev20251119.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{nv_ingest-2025.10.22.dev20251022.dist-info → nv_ingest-2025.11.19.dev20251119.dist-info}/top_level.txt RENAMED Viewed

File without changes

nv-ingest 2025.10.22.dev20251022__py3-none-any.whl → 2025.11.19.dev20251119__py3-none-any.whl

nv-ingest 2025.10.22.dev20251022py3-none-any.whl → 2025.11.19.dev20251119py3-none-any.whl