nv-ingest 2025.7.22.dev20250722__tar.gz → 2025.7.23.dev20250723__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

Files changed (109) hide show
  1. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/PKG-INFO +2 -5
  2. nv_ingest-2025.7.23.dev20250723/nv_ingest/framework/orchestration/ray/util/env_config.py +75 -0
  3. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +20 -9
  4. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +19 -4
  5. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest.egg-info/PKG-INFO +2 -5
  6. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest.egg-info/SOURCES.txt +1 -0
  7. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest.egg-info/requires.txt +1 -4
  8. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/pyproject.toml +1 -4
  9. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/LICENSE +0 -0
  10. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/MANIFEST.in +0 -0
  11. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/__init__.py +0 -0
  12. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/api/__init__.py +0 -0
  13. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/api/main.py +0 -0
  14. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/api/v1/__init__.py +0 -0
  15. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/api/v1/health.py +0 -0
  16. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/api/v1/ingest.py +0 -0
  17. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/api/v1/metrics.py +0 -0
  18. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/__init__.py +0 -0
  19. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/__init__.py +0 -0
  20. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
  21. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
  22. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
  23. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
  24. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
  25. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
  26. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
  27. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
  28. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
  29. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
  30. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  31. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
  32. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
  33. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
  34. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
  35. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
  36. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
  37. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
  38. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
  39. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
  40. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
  41. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
  42. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
  43. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
  44. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
  45. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
  46. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
  47. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
  48. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
  49. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
  50. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
  51. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
  52. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
  53. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
  54. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
  55. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
  56. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
  57. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
  58. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
  59. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
  60. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
  61. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
  62. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
  63. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
  64. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
  65. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
  66. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
  67. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
  68. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
  69. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
  70. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
  71. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
  72. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
  73. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
  74. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
  75. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
  76. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
  77. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +0 -0
  78. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
  79. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
  80. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
  81. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
  82. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/schemas/__init__.py +0 -0
  83. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
  84. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
  85. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
  86. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
  87. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
  88. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
  89. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
  90. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
  91. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
  92. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
  93. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
  94. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/util/__init__.py +0 -0
  95. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
  96. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
  97. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/util/service/__init__.py +0 -0
  98. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
  99. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
  100. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
  101. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
  102. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
  103. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
  104. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
  105. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
  106. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest/version.py +0 -0
  107. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest.egg-info/dependency_links.txt +0 -0
  108. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/nv_ingest.egg-info/top_level.txt +0 -0
  109. {nv_ingest-2025.7.22.dev20250722 → nv_ingest-2025.7.23.dev20250723}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.7.22.dev20250722
3
+ Version: 2025.7.23.dev20250723
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -231,7 +231,7 @@ Requires-Dist: opentelemetry-exporter-otlp>=1.27.0
231
231
  Requires-Dist: opentelemetry-sdk>=1.27.0
232
232
  Requires-Dist: pydantic>2.0.0
233
233
  Requires-Dist: pydantic-settings>2.0.0
234
- Requires-Dist: pypdfium2==4.30.1
234
+ Requires-Dist: pypdfium2==4.30.0
235
235
  Requires-Dist: pytest>=8.0.2
236
236
  Requires-Dist: pytest-mock>=3.14.0
237
237
  Requires-Dist: pytest-cov>=6.0.0
@@ -240,7 +240,6 @@ Requires-Dist: python-docx>=1.1.2
240
240
  Requires-Dist: python-dotenv>=1.0.1
241
241
  Requires-Dist: python-pptx>=1.0.2
242
242
  Requires-Dist: prometheus-client
243
- Requires-Dist: torch>=2.4.1
244
243
  Requires-Dist: ray[all]>=2.37.0
245
244
  Requires-Dist: redis>=5.2.1
246
245
  Requires-Dist: requests>=2.28.2
@@ -248,8 +247,6 @@ Requires-Dist: scikit-learn>=1.6.0
248
247
  Requires-Dist: scipy>=1.15.1
249
248
  Requires-Dist: setuptools>=78.1.1
250
249
  Requires-Dist: tabulate>=0.9.0
251
- Requires-Dist: torchvision
252
- Requires-Dist: torchaudio
253
250
  Requires-Dist: transformers>=4.47.0
254
251
  Requires-Dist: tqdm>=4.67.1
255
252
  Requires-Dist: uvicorn
@@ -0,0 +1,75 @@
1
+ import os
2
+ import logging
3
+
4
+ logger = logging.getLogger(__name__)
5
+
6
+
7
+ def str_to_bool(value: str) -> bool:
8
+ """
9
+ Convert string to boolean value.
10
+
11
+ Parameters
12
+ ----------
13
+ value : str
14
+ String value to convert
15
+
16
+ Returns
17
+ -------
18
+ bool
19
+ Boolean representation of the string
20
+ """
21
+ return value.strip().lower() in {"1", "true", "yes", "on"}
22
+
23
+
24
+ def get_env_var(name: str, default, var_type=None):
25
+ """
26
+ Get environment variable with type conversion and default value.
27
+
28
+ Parameters
29
+ ----------
30
+ name : str
31
+ Environment variable name
32
+ default : Any
33
+ Default value if environment variable is not set
34
+ var_type : type, optional
35
+ Type to convert to. If None, infers from default value type
36
+
37
+ Returns
38
+ -------
39
+ Any
40
+ Environment variable value converted to the appropriate type
41
+ """
42
+ value = os.environ.get(name)
43
+ if value is None:
44
+ return default
45
+
46
+ # Determine type from default if not explicitly provided
47
+ target_type = var_type or type(default)
48
+
49
+ # Handle boolean conversion specially
50
+ if target_type is bool:
51
+ return str_to_bool(value)
52
+
53
+ # For other types, use direct conversion
54
+ try:
55
+ return target_type(value)
56
+ except (ValueError, TypeError) as e:
57
+ logger.warning(
58
+ f"Failed to convert environment variable {name}='{value}' to \
59
+ {target_type.__name__}. Using default: {default}, error: {e}"
60
+ )
61
+ return default
62
+
63
+
64
+ # Dynamic Memory Scaling Configuration
65
+ DISABLE_DYNAMIC_SCALING = get_env_var("INGEST_DISABLE_DYNAMIC_SCALING", False, bool)
66
+ DYNAMIC_MEMORY_THRESHOLD = get_env_var("INGEST_DYNAMIC_MEMORY_THRESHOLD", 0.75, float)
67
+ DYNAMIC_MEMORY_KP = get_env_var("INGEST_DYNAMIC_MEMORY_KP", 0.2, float)
68
+ DYNAMIC_MEMORY_KI = get_env_var("INGEST_DYNAMIC_MEMORY_KI", 0.01, float)
69
+ DYNAMIC_MEMORY_EMA_ALPHA = get_env_var("INGEST_DYNAMIC_MEMORY_EMA_ALPHA", 0.1, float)
70
+ DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH = get_env_var("INGEST_DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH", 0, int)
71
+ DYNAMIC_MEMORY_PENALTY_FACTOR = get_env_var("INGEST_DYNAMIC_MEMORY_PENALTY_FACTOR", 0.1, float)
72
+ DYNAMIC_MEMORY_ERROR_BOOST_FACTOR = get_env_var("INGEST_DYNAMIC_MEMORY_ERROR_BOOST_FACTOR", 1.5, float)
73
+ DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION = get_env_var(
74
+ "INGEST_DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION", 0.15, float
75
+ )
@@ -23,18 +23,21 @@ from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import (
23
23
  RayPipelineInterface,
24
24
  )
25
25
  from nv_ingest.framework.orchestration.ray.util.pipeline.pipeline_builders import setup_ingestion_pipeline
26
+ from nv_ingest.framework.orchestration.ray.util.env_config import (
27
+ DISABLE_DYNAMIC_SCALING,
28
+ DYNAMIC_MEMORY_THRESHOLD,
29
+ DYNAMIC_MEMORY_KP,
30
+ DYNAMIC_MEMORY_KI,
31
+ DYNAMIC_MEMORY_EMA_ALPHA,
32
+ DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH,
33
+ DYNAMIC_MEMORY_PENALTY_FACTOR,
34
+ DYNAMIC_MEMORY_ERROR_BOOST_FACTOR,
35
+ DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION,
36
+ )
26
37
 
27
38
  logger = logging.getLogger(__name__)
28
39
 
29
40
 
30
- def str_to_bool(value: str) -> bool:
31
- return value.strip().lower() in {"1", "true", "yes", "on"}
32
-
33
-
34
- DISABLE_DYNAMIC_SCALING = str_to_bool(os.environ.get("INGEST_DISABLE_DYNAMIC_SCALING", "false"))
35
- DYNAMIC_MEMORY_THRESHOLD = float(os.environ.get("INGEST_DYNAMIC_MEMORY_THRESHOLD", 0.75))
36
-
37
-
38
41
  class PipelineCreationSchema(BaseModel):
39
42
  """
40
43
  Schema for pipeline creation configuration.
@@ -235,7 +238,15 @@ def _launch_pipeline(
235
238
  dynamic_memory_threshold = dynamic_memory_threshold if dynamic_memory_threshold else DYNAMIC_MEMORY_THRESHOLD
236
239
 
237
240
  scaling_config = ScalingConfig(
238
- dynamic_memory_scaling=dynamic_memory_scaling, dynamic_memory_threshold=dynamic_memory_threshold
241
+ dynamic_memory_scaling=dynamic_memory_scaling,
242
+ dynamic_memory_threshold=dynamic_memory_threshold,
243
+ pid_kp=DYNAMIC_MEMORY_KP,
244
+ pid_ki=DYNAMIC_MEMORY_KI,
245
+ pid_ema_alpha=DYNAMIC_MEMORY_EMA_ALPHA,
246
+ pid_target_queue_depth=DYNAMIC_MEMORY_TARGET_QUEUE_DEPTH,
247
+ pid_penalty_factor=DYNAMIC_MEMORY_PENALTY_FACTOR,
248
+ pid_error_boost_factor=DYNAMIC_MEMORY_ERROR_BOOST_FACTOR,
249
+ rcm_memory_safety_buffer_fraction=DYNAMIC_MEMORY_RCM_MEMORY_SAFETY_BUFFER_FRACTION,
239
250
  )
240
251
 
241
252
  pipeline = RayPipeline(scaling_config=scaling_config)
@@ -57,6 +57,7 @@ from nv_ingest_api.internal.schemas.transform.transform_image_filter_schema impo
57
57
  from nv_ingest_api.internal.schemas.transform.transform_text_embedding_schema import TextEmbeddingSchema
58
58
  from nv_ingest_api.internal.schemas.transform.transform_text_splitter_schema import TextSplitterSchema
59
59
  from nv_ingest_api.util.system.hardware_info import SystemResourceProbe
60
+ from nv_ingest.framework.orchestration.ray.util.env_config import DYNAMIC_MEMORY_THRESHOLD
60
61
 
61
62
  logger = logging.getLogger(__name__)
62
63
 
@@ -178,7 +179,7 @@ def add_pdf_extractor_stage(pipeline, default_cpu_count, stage_name="pdf_extract
178
179
  total_memory_mb = psutil.virtual_memory().total / (1024**2)
179
180
 
180
181
  # Allocate up to 75% of memory to this stage, using a 10GB high watermark per worker.
181
- allocatable_memory_for_stage_mb = total_memory_mb * 0.75
182
+ allocatable_memory_for_stage_mb = total_memory_mb * DYNAMIC_MEMORY_THRESHOLD
182
183
  memory_based_replicas = int(allocatable_memory_for_stage_mb / 10_000.0)
183
184
 
184
185
  # Cap the number of replicas by the number of available CPU cores.
@@ -522,7 +523,7 @@ def add_text_embedding_stage(pipeline, default_cpu_count, stage_name="text_embed
522
523
  stage_actor=TextEmbeddingTransformStage,
523
524
  config=config,
524
525
  min_replicas=0,
525
- max_replicas=2,
526
+ max_replicas=_get_max_replicas(default_cpu_count, percentage_of_cpu=0.07, replica_limit=6),
526
527
  )
527
528
 
528
529
  return stage_name
@@ -627,8 +628,22 @@ def add_source_stage(pipeline, default_cpu_count, source_name="pipeline_source")
627
628
  return source_name
628
629
 
629
630
 
630
- def _get_max_replicas(default_cpu_count=None, percentage_of_cpu=0.14):
631
+ def _get_max_replicas(default_cpu_count=None, percentage_of_cpu=0.14, replica_limit=None):
632
+ """
633
+ Calculate max replicas based on CPU percentage with optional upper limit.
634
+
635
+ Args:
636
+ default_cpu_count (int, optional): CPU cores to use. Auto-detected if None.
637
+ percentage_of_cpu (float, optional): CPU percentage to allocate. Defaults to 0.14.
638
+ replica_limit (int, optional): Upper bound for replicas. Defaults to None.
639
+
640
+ Returns:
641
+ int: Maximum replicas, at least 1.
642
+ """
631
643
  if default_cpu_count is None:
632
644
  default_cpu_count = _system_resource_probe.get_cpu_count()
633
645
 
634
- return int(max(1, (default_cpu_count * percentage_of_cpu)))
646
+ _max_replicas = int(max(1, (default_cpu_count * percentage_of_cpu)))
647
+ if replica_limit is not None:
648
+ _max_replicas = min(_max_replicas, replica_limit)
649
+ return _max_replicas
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.7.22.dev20250722
3
+ Version: 2025.7.23.dev20250723
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -231,7 +231,7 @@ Requires-Dist: opentelemetry-exporter-otlp>=1.27.0
231
231
  Requires-Dist: opentelemetry-sdk>=1.27.0
232
232
  Requires-Dist: pydantic>2.0.0
233
233
  Requires-Dist: pydantic-settings>2.0.0
234
- Requires-Dist: pypdfium2==4.30.1
234
+ Requires-Dist: pypdfium2==4.30.0
235
235
  Requires-Dist: pytest>=8.0.2
236
236
  Requires-Dist: pytest-mock>=3.14.0
237
237
  Requires-Dist: pytest-cov>=6.0.0
@@ -240,7 +240,6 @@ Requires-Dist: python-docx>=1.1.2
240
240
  Requires-Dist: python-dotenv>=1.0.1
241
241
  Requires-Dist: python-pptx>=1.0.2
242
242
  Requires-Dist: prometheus-client
243
- Requires-Dist: torch>=2.4.1
244
243
  Requires-Dist: ray[all]>=2.37.0
245
244
  Requires-Dist: redis>=5.2.1
246
245
  Requires-Dist: requests>=2.28.2
@@ -248,8 +247,6 @@ Requires-Dist: scikit-learn>=1.6.0
248
247
  Requires-Dist: scipy>=1.15.1
249
248
  Requires-Dist: setuptools>=78.1.1
250
249
  Requires-Dist: tabulate>=0.9.0
251
- Requires-Dist: torchvision
252
- Requires-Dist: torchaudio
253
250
  Requires-Dist: transformers>=4.47.0
254
251
  Requires-Dist: tqdm>=4.67.1
255
252
  Requires-Dist: uvicorn
@@ -71,6 +71,7 @@ nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py
71
71
  nv_ingest/framework/orchestration/ray/stages/utility/__init__.py
72
72
  nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py
73
73
  nv_ingest/framework/orchestration/ray/util/__init__.py
74
+ nv_ingest/framework/orchestration/ray/util/env_config.py
74
75
  nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py
75
76
  nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py
76
77
  nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py
@@ -16,7 +16,7 @@ opentelemetry-exporter-otlp>=1.27.0
16
16
  opentelemetry-sdk>=1.27.0
17
17
  pydantic>2.0.0
18
18
  pydantic-settings>2.0.0
19
- pypdfium2==4.30.1
19
+ pypdfium2==4.30.0
20
20
  pytest>=8.0.2
21
21
  pytest-mock>=3.14.0
22
22
  pytest-cov>=6.0.0
@@ -25,7 +25,6 @@ python-docx>=1.1.2
25
25
  python-dotenv>=1.0.1
26
26
  python-pptx>=1.0.2
27
27
  prometheus-client
28
- torch>=2.4.1
29
28
  ray[all]>=2.37.0
30
29
  redis>=5.2.1
31
30
  requests>=2.28.2
@@ -33,8 +32,6 @@ scikit-learn>=1.6.0
33
32
  scipy>=1.15.1
34
33
  setuptools>=78.1.1
35
34
  tabulate>=0.9.0
36
- torchvision
37
- torchaudio
38
35
  transformers>=4.47.0
39
36
  tqdm>=4.67.1
40
37
  uvicorn
@@ -35,7 +35,7 @@ dependencies = [
35
35
  "opentelemetry-sdk>=1.27.0",
36
36
  "pydantic>2.0.0",
37
37
  "pydantic-settings>2.0.0",
38
- "pypdfium2==4.30.1",
38
+ "pypdfium2==4.30.0",
39
39
  "pytest>=8.0.2",
40
40
  "pytest-mock>=3.14.0",
41
41
  "pytest-cov>=6.0.0",
@@ -44,7 +44,6 @@ dependencies = [
44
44
  "python-dotenv>=1.0.1",
45
45
  "python-pptx>=1.0.2",
46
46
  "prometheus-client",
47
- "torch>=2.4.1",
48
47
  "ray[all]>=2.37.0",
49
48
  "redis>=5.2.1",
50
49
  "requests>=2.28.2",
@@ -52,8 +51,6 @@ dependencies = [
52
51
  "scipy>=1.15.1",
53
52
  "setuptools>=78.1.1",
54
53
  "tabulate>=0.9.0",
55
- "torchvision",
56
- "torchaudio",
57
54
  "transformers>=4.47.0",
58
55
  "tqdm>=4.67.1",
59
56
  "uvicorn",