nv-ingest 2025.7.22.dev20250722__tar.gz → 2025.7.24.dev20250724__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/PKG-INFO +2 -5
- nv_ingest-2025.7.24.dev20250724/nv_ingest/framework/orchestration/ray/util/env_config.py +75 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +21 -10
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +19 -4
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest.egg-info/PKG-INFO +2 -5
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest.egg-info/SOURCES.txt +1 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest.egg-info/requires.txt +1 -4
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/pyproject.toml +1 -4
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/LICENSE +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/MANIFEST.in +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/api/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/api/main.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/api/v1/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/api/v1/health.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/api/v1/ingest.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/api/v1/metrics.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/schemas/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/util/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/util/service/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/version.py +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest.egg-info/dependency_links.txt +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest.egg-info/top_level.txt +0 -0
- {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nv-ingest
|
|
3
|
-
Version: 2025.7.
|
|
3
|
+
Version: 2025.7.24.dev20250724
|
|
4
4
|
Summary: Python module for multimodal document ingestion
|
|
5
5
|
Author-email: Jeremy Dyer <jdyer@nvidia.com>
|
|
6
6
|
License: Apache License
|
|
@@ -231,7 +231,7 @@ Requires-Dist: opentelemetry-exporter-otlp>=1.27.0
|
|
|
231
231
|
Requires-Dist: opentelemetry-sdk>=1.27.0
|
|
232
232
|
Requires-Dist: pydantic>2.0.0
|
|
233
233
|
Requires-Dist: pydantic-settings>2.0.0
|
|
234
|
-
Requires-Dist: pypdfium2==4.30.
|
|
234
|
+
Requires-Dist: pypdfium2==4.30.0
|
|
235
235
|
Requires-Dist: pytest>=8.0.2
|
|
236
236
|
Requires-Dist: pytest-mock>=3.14.0
|
|
237
237
|
Requires-Dist: pytest-cov>=6.0.0
|
|
@@ -240,7 +240,6 @@ Requires-Dist: python-docx>=1.1.2
|
|
|
240
240
|
Requires-Dist: python-dotenv>=1.0.1
|
|
241
241
|
Requires-Dist: python-pptx>=1.0.2
|
|
242
242
|
Requires-Dist: prometheus-client
|
|
243
|
-
Requires-Dist: torch>=2.4.1
|
|
244
243
|
Requires-Dist: ray[all]>=2.37.0
|
|
245
244
|
Requires-Dist: redis>=5.2.1
|
|
246
245
|
Requires-Dist: requests>=2.28.2
|
|
@@ -248,8 +247,6 @@ Requires-Dist: scikit-learn>=1.6.0
|
|
|
248
247
|
Requires-Dist: scipy>=1.15.1
|
|
249
248
|
Requires-Dist: setuptools>=78.1.1
|
|
250
249
|
Requires-Dist: tabulate>=0.9.0
|
|
251
|
-
Requires-Dist: torchvision
|
|
252
|
-
Requires-Dist: torchaudio
|
|
253
250
|
Requires-Dist: transformers>=4.47.0
|
|
254
251
|
Requires-Dist: tqdm>=4.67.1
|
|
255
252
|
Requires-Dist: uvicorn
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
logger = logging.getLogger(__name__)
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def str_to_bool(value: str) -> bool:
|
|
8
|
+
"""
|
|
9
|
+
Convert string to boolean value.
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
value : str
|
|
14
|
+
String value to convert
|
|
15
|
+
|
|
16
|
+
Returns
|
|
17
|
+
-------
|
|
18
|
+
bool
|
|
19
|
+
Boolean representation of the string
|
|
20
|
+
"""
|
|
21
|
+
return value.strip().lower() in {"1", "true", "yes", "on"}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_env_var(name: str, default, var_type=None):
|
|
25
|
+
"""
|
|
26
|
+
Get environment variable with type conversion and default value.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
name : str
|
|
31
|
+
Environment variable name
|
|
32
|
+
default : Any
|
|
33
|
+
Default value if environment variable is not set
|
|
34
|
+
var_type : type, optional
|
|
35
|
+
Type to convert to. If None, infers from default value type
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
Any
|
|
40
|
+
Environment variable value converted to the appropriate type
|
|
41
|
+
"""
|
|
42
|
+
value = os.environ.get(name)
|
|
43
|
+
if value is None:
|
|
44
|
+
return default
|
|
45
|
+
|
|
46
|
+
# Determine type from default if not explicitly provided
|
|
47
|
+
target_type = var_type or type(default)
|
|
48
|
+
|
|
49
|
+
# Handle boolean conversion specially
|
|
50
|
+
if target_type is bool:
|
|
51
|
+
return str_to_bool(value)
|
|
52
|
+
|
|
53
|
+
# For other types, use direct conversion
|
|
54
|
+
try:
|
|
55
|
+
return target_type(value)
|
|
56
|
+
except (ValueError, TypeError) as e:
|
|
57
|
+
logger.warning(
|
|
58
|
+
f"Failed to convert environment variable {name}='{value}' to \
|
|
59
|
+
{target_type.__name__}. Using default: {default}, error: {e}"
|
|
60
|
+
)
|
|
61
|
+
return default
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# Dynamic Memory Scaling Configuration
|
|
65
|
+
DISABLE_DYNAMIC_SCALING = get_env_var("INGEST_DISABLE_DYNAMIC_SCALING", False, bool)
|
|
66
|
+
DYNAMIC_MEMORY_THRESHOLD = get_env_var("INGEST_DYNAMIC_MEMORY_THRESHOLD", 0.75, float)
|
|
67
|
+
DYNAMIC_MEMORY_KP = get_env_var("INGEST_DYNAMIC_MEMORY_KP", 0.2, float)
|
|
68
|
+
DYNAMIC_MEMORY_KI = get_env_var("INGEST_DYNAMIC_MEMORY_KI", 0.01, float)
|
|
69
|
+
DYNAMIC_MEMORY_EMA_ALPHA = get_env_var("INGEST_DYNAMIC_MEMORY_EMA_ALPHA", 0.1, float)
|
|
70
|
+
DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH = get_env_var("INGEST_DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH", 0, int)
|
|
71
|
+
DYNAMIC_MEMORY_PENALTY_FACTOR = get_env_var("INGEST_DYNAMIC_MEMORY_PENALTY_FACTOR", 0.1, float)
|
|
72
|
+
DYNAMIC_MEMORY_ERROR_BOOST_FACTOR = get_env_var("INGEST_DYNAMIC_MEMORY_ERROR_BOOST_FACTOR", 1.5, float)
|
|
73
|
+
DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION = get_env_var(
|
|
74
|
+
"INGEST_DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION", 0.15, float
|
|
75
|
+
)
|
|
@@ -23,18 +23,21 @@ from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import (
|
|
|
23
23
|
RayPipelineInterface,
|
|
24
24
|
)
|
|
25
25
|
from nv_ingest.framework.orchestration.ray.util.pipeline.pipeline_builders import setup_ingestion_pipeline
|
|
26
|
+
from nv_ingest.framework.orchestration.ray.util.env_config import (
|
|
27
|
+
DISABLE_DYNAMIC_SCALING,
|
|
28
|
+
DYNAMIC_MEMORY_THRESHOLD,
|
|
29
|
+
DYNAMIC_MEMORY_KP,
|
|
30
|
+
DYNAMIC_MEMORY_KI,
|
|
31
|
+
DYNAMIC_MEMORY_EMA_ALPHA,
|
|
32
|
+
DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH,
|
|
33
|
+
DYNAMIC_MEMORY_PENALTY_FACTOR,
|
|
34
|
+
DYNAMIC_MEMORY_ERROR_BOOST_FACTOR,
|
|
35
|
+
DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION,
|
|
36
|
+
)
|
|
26
37
|
|
|
27
38
|
logger = logging.getLogger(__name__)
|
|
28
39
|
|
|
29
40
|
|
|
30
|
-
def str_to_bool(value: str) -> bool:
|
|
31
|
-
return value.strip().lower() in {"1", "true", "yes", "on"}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
DISABLE_DYNAMIC_SCALING = str_to_bool(os.environ.get("INGEST_DISABLE_DYNAMIC_SCALING", "false"))
|
|
35
|
-
DYNAMIC_MEMORY_THRESHOLD = float(os.environ.get("INGEST_DYNAMIC_MEMORY_THRESHOLD", 0.75))
|
|
36
|
-
|
|
37
|
-
|
|
38
41
|
class PipelineCreationSchema(BaseModel):
|
|
39
42
|
"""
|
|
40
43
|
Schema for pipeline creation configuration.
|
|
@@ -88,7 +91,7 @@ class PipelineCreationSchema(BaseModel):
|
|
|
88
91
|
# Vision language model settings
|
|
89
92
|
vlm_caption_endpoint: str = os.getenv(
|
|
90
93
|
"VLM_CAPTION_ENDPOINT",
|
|
91
|
-
"https://
|
|
94
|
+
"https://integrate.api.nvidia.com/v1/chat/completions",
|
|
92
95
|
)
|
|
93
96
|
vlm_caption_model_name: str = os.getenv("VLM_CAPTION_MODEL_NAME", "nvidia/llama-3.1-nemotron-nano-vl-8b-v1")
|
|
94
97
|
|
|
@@ -235,7 +238,15 @@ def _launch_pipeline(
|
|
|
235
238
|
dynamic_memory_threshold = dynamic_memory_threshold if dynamic_memory_threshold else DYNAMIC_MEMORY_THRESHOLD
|
|
236
239
|
|
|
237
240
|
scaling_config = ScalingConfig(
|
|
238
|
-
dynamic_memory_scaling=dynamic_memory_scaling,
|
|
241
|
+
dynamic_memory_scaling=dynamic_memory_scaling,
|
|
242
|
+
dynamic_memory_threshold=dynamic_memory_threshold,
|
|
243
|
+
pid_kp=DYNAMIC_MEMORY_KP,
|
|
244
|
+
pid_ki=DYNAMIC_MEMORY_KI,
|
|
245
|
+
pid_ema_alpha=DYNAMIC_MEMORY_EMA_ALPHA,
|
|
246
|
+
pid_target_queue_depth=DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH,
|
|
247
|
+
pid_penalty_factor=DYNAMIC_MEMORY_PENALTY_FACTOR,
|
|
248
|
+
pid_error_boost_factor=DYNAMIC_MEMORY_ERROR_BOOST_FACTOR,
|
|
249
|
+
rcm_memory_safety_buffer_fraction=DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION,
|
|
239
250
|
)
|
|
240
251
|
|
|
241
252
|
pipeline = RayPipeline(scaling_config=scaling_config)
|
|
@@ -57,6 +57,7 @@ from nv_ingest_api.internal.schemas.transform.transform_image_filter_schema impo
|
|
|
57
57
|
from nv_ingest_api.internal.schemas.transform.transform_text_embedding_schema import TextEmbeddingSchema
|
|
58
58
|
from nv_ingest_api.internal.schemas.transform.transform_text_splitter_schema import TextSplitterSchema
|
|
59
59
|
from nv_ingest_api.util.system.hardware_info import SystemResourceProbe
|
|
60
|
+
from nv_ingest.framework.orchestration.ray.util.env_config import DYNAMIC_MEMORY_THRESHOLD
|
|
60
61
|
|
|
61
62
|
logger = logging.getLogger(__name__)
|
|
62
63
|
|
|
@@ -178,7 +179,7 @@ def add_pdf_extractor_stage(pipeline, default_cpu_count, stage_name="pdf_extract
|
|
|
178
179
|
total_memory_mb = psutil.virtual_memory().total / (1024**2)
|
|
179
180
|
|
|
180
181
|
# Allocate up to 75% of memory to this stage, using a 10GB high watermark per worker.
|
|
181
|
-
allocatable_memory_for_stage_mb = total_memory_mb *
|
|
182
|
+
allocatable_memory_for_stage_mb = total_memory_mb * DYNAMIC_MEMORY_THRESHOLD
|
|
182
183
|
memory_based_replicas = int(allocatable_memory_for_stage_mb / 10_000.0)
|
|
183
184
|
|
|
184
185
|
# Cap the number of replicas by the number of available CPU cores.
|
|
@@ -522,7 +523,7 @@ def add_text_embedding_stage(pipeline, default_cpu_count, stage_name="text_embed
|
|
|
522
523
|
stage_actor=TextEmbeddingTransformStage,
|
|
523
524
|
config=config,
|
|
524
525
|
min_replicas=0,
|
|
525
|
-
max_replicas=
|
|
526
|
+
max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.07, replica_limit=6),
|
|
526
527
|
)
|
|
527
528
|
|
|
528
529
|
return stage_name
|
|
@@ -627,8 +628,22 @@ def add_source_stage(pipeline, default_cpu_count, source_name="pipeline_source")
|
|
|
627
628
|
return source_name
|
|
628
629
|
|
|
629
630
|
|
|
630
|
-
def _get_max_replicas(default_cpu_count=None, percentage_of_cpu=0.14):
|
|
631
|
+
def _get_max_replicas(default_cpu_count=None, percentage_of_cpu=0.14, replica_limit=None):
|
|
632
|
+
"""
|
|
633
|
+
Calculate max replicas based on CPU percentage with optional upper limit.
|
|
634
|
+
|
|
635
|
+
Args:
|
|
636
|
+
default_cpu_count (int, optional): CPU cores to use. Auto-detected if None.
|
|
637
|
+
percentage_of_cpu (float, optional): CPU percentage to allocate. Defaults to 0.14.
|
|
638
|
+
replica_limit (int, optional): Upper bound for replicas. Defaults to None.
|
|
639
|
+
|
|
640
|
+
Returns:
|
|
641
|
+
int: Maximum replicas, at least 1.
|
|
642
|
+
"""
|
|
631
643
|
if default_cpu_count is None:
|
|
632
644
|
default_cpu_count = _system_resource_probe.get_cpu_count()
|
|
633
645
|
|
|
634
|
-
|
|
646
|
+
_max_replicas = int(max(1, (default_cpu_count * percentage_of_cpu)))
|
|
647
|
+
if replica_limit is not None:
|
|
648
|
+
_max_replicas = min(_max_replicas, replica_limit)
|
|
649
|
+
return _max_replicas
|
{nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nv-ingest
|
|
3
|
-
Version: 2025.7.
|
|
3
|
+
Version: 2025.7.24.dev20250724
|
|
4
4
|
Summary: Python module for multimodal document ingestion
|
|
5
5
|
Author-email: Jeremy Dyer <jdyer@nvidia.com>
|
|
6
6
|
License: Apache License
|
|
@@ -231,7 +231,7 @@ Requires-Dist: opentelemetry-exporter-otlp>=1.27.0
|
|
|
231
231
|
Requires-Dist: opentelemetry-sdk>=1.27.0
|
|
232
232
|
Requires-Dist: pydantic>2.0.0
|
|
233
233
|
Requires-Dist: pydantic-settings>2.0.0
|
|
234
|
-
Requires-Dist: pypdfium2==4.30.
|
|
234
|
+
Requires-Dist: pypdfium2==4.30.0
|
|
235
235
|
Requires-Dist: pytest>=8.0.2
|
|
236
236
|
Requires-Dist: pytest-mock>=3.14.0
|
|
237
237
|
Requires-Dist: pytest-cov>=6.0.0
|
|
@@ -240,7 +240,6 @@ Requires-Dist: python-docx>=1.1.2
|
|
|
240
240
|
Requires-Dist: python-dotenv>=1.0.1
|
|
241
241
|
Requires-Dist: python-pptx>=1.0.2
|
|
242
242
|
Requires-Dist: prometheus-client
|
|
243
|
-
Requires-Dist: torch>=2.4.1
|
|
244
243
|
Requires-Dist: ray[all]>=2.37.0
|
|
245
244
|
Requires-Dist: redis>=5.2.1
|
|
246
245
|
Requires-Dist: requests>=2.28.2
|
|
@@ -248,8 +247,6 @@ Requires-Dist: scikit-learn>=1.6.0
|
|
|
248
247
|
Requires-Dist: scipy>=1.15.1
|
|
249
248
|
Requires-Dist: setuptools>=78.1.1
|
|
250
249
|
Requires-Dist: tabulate>=0.9.0
|
|
251
|
-
Requires-Dist: torchvision
|
|
252
|
-
Requires-Dist: torchaudio
|
|
253
250
|
Requires-Dist: transformers>=4.47.0
|
|
254
251
|
Requires-Dist: tqdm>=4.67.1
|
|
255
252
|
Requires-Dist: uvicorn
|
{nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest.egg-info/SOURCES.txt
RENAMED
|
@@ -71,6 +71,7 @@ nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py
|
|
|
71
71
|
nv_ingest/framework/orchestration/ray/stages/utility/__init__.py
|
|
72
72
|
nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py
|
|
73
73
|
nv_ingest/framework/orchestration/ray/util/__init__.py
|
|
74
|
+
nv_ingest/framework/orchestration/ray/util/env_config.py
|
|
74
75
|
nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py
|
|
75
76
|
nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py
|
|
76
77
|
nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py
|
{nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest.egg-info/requires.txt
RENAMED
|
@@ -16,7 +16,7 @@ opentelemetry-exporter-otlp>=1.27.0
|
|
|
16
16
|
opentelemetry-sdk>=1.27.0
|
|
17
17
|
pydantic>2.0.0
|
|
18
18
|
pydantic-settings>2.0.0
|
|
19
|
-
pypdfium2==4.30.
|
|
19
|
+
pypdfium2==4.30.0
|
|
20
20
|
pytest>=8.0.2
|
|
21
21
|
pytest-mock>=3.14.0
|
|
22
22
|
pytest-cov>=6.0.0
|
|
@@ -25,7 +25,6 @@ python-docx>=1.1.2
|
|
|
25
25
|
python-dotenv>=1.0.1
|
|
26
26
|
python-pptx>=1.0.2
|
|
27
27
|
prometheus-client
|
|
28
|
-
torch>=2.4.1
|
|
29
28
|
ray[all]>=2.37.0
|
|
30
29
|
redis>=5.2.1
|
|
31
30
|
requests>=2.28.2
|
|
@@ -33,8 +32,6 @@ scikit-learn>=1.6.0
|
|
|
33
32
|
scipy>=1.15.1
|
|
34
33
|
setuptools>=78.1.1
|
|
35
34
|
tabulate>=0.9.0
|
|
36
|
-
torchvision
|
|
37
|
-
torchaudio
|
|
38
35
|
transformers>=4.47.0
|
|
39
36
|
tqdm>=4.67.1
|
|
40
37
|
uvicorn
|
|
@@ -35,7 +35,7 @@ dependencies = [
|
|
|
35
35
|
"opentelemetry-sdk>=1.27.0",
|
|
36
36
|
"pydantic>2.0.0",
|
|
37
37
|
"pydantic-settings>2.0.0",
|
|
38
|
-
"pypdfium2==4.30.
|
|
38
|
+
"pypdfium2==4.30.0",
|
|
39
39
|
"pytest>=8.0.2",
|
|
40
40
|
"pytest-mock>=3.14.0",
|
|
41
41
|
"pytest-cov>=6.0.0",
|
|
@@ -44,7 +44,6 @@ dependencies = [
|
|
|
44
44
|
"python-dotenv>=1.0.1",
|
|
45
45
|
"python-pptx>=1.0.2",
|
|
46
46
|
"prometheus-client",
|
|
47
|
-
"torch>=2.4.1",
|
|
48
47
|
"ray[all]>=2.37.0",
|
|
49
48
|
"redis>=5.2.1",
|
|
50
49
|
"requests>=2.28.2",
|
|
@@ -52,8 +51,6 @@ dependencies = [
|
|
|
52
51
|
"scipy>=1.15.1",
|
|
53
52
|
"setuptools>=78.1.1",
|
|
54
53
|
"tabulate>=0.9.0",
|
|
55
|
-
"torchvision",
|
|
56
|
-
"torchaudio",
|
|
57
54
|
"transformers>=4.47.0",
|
|
58
55
|
"tqdm>=4.67.1",
|
|
59
56
|
"uvicorn",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/api/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/api/v1/__init__.py
RENAMED
|
File without changes
|
{nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/api/v1/health.py
RENAMED
|
File without changes
|
{nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/api/v1/ingest.py
RENAMED
|
File without changes
|
{nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/api/v1/metrics.py
RENAMED
|
File without changes
|
{nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest/framework/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.24.dev20250724}/nv_ingest.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|