nv-ingest 2025.8.14.dev20250814__tar.gz → 2025.8.15.dev20250815__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

Files changed (126) hide show
  1. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/PKG-INFO +1 -1
  2. nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/execution/helpers.py +85 -0
  3. nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/execution/options.py +112 -0
  4. nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/process/dependent_services.py +55 -0
  5. nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/process/execution.py +497 -0
  6. nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/process/lifecycle.py +122 -0
  7. nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/process/strategies.py +182 -0
  8. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +1 -1
  9. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +4 -4
  10. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +23 -23
  11. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +5 -5
  12. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +8 -4
  13. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +16 -16
  14. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +9 -5
  15. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +8 -4
  16. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +10 -6
  17. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +22 -10
  18. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +18 -17
  19. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +10 -5
  20. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +14 -13
  21. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +15 -13
  22. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +3 -0
  23. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +3 -3
  24. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +22 -13
  25. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +10 -7
  26. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +10 -8
  27. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +4 -4
  28. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +5 -2
  29. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +71 -61
  30. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +7 -5
  31. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +8 -4
  32. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +8 -4
  33. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +17 -7
  34. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +7 -5
  35. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +13 -14
  36. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +18 -12
  37. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +11 -3
  38. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +1 -2
  39. nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +100 -0
  40. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +13 -3
  41. nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/util/flow_control/udf_intercept.py +352 -0
  42. nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/util/service/meta/ingest/__init__.py +3 -0
  43. nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/util/telemetry/__init__.py +3 -0
  44. nv_ingest-2025.8.15.dev20250815/nv_ingest/pipeline/__init__.py +3 -0
  45. nv_ingest-2025.8.15.dev20250815/nv_ingest/pipeline/config/__init__.py +3 -0
  46. nv_ingest-2025.8.15.dev20250815/nv_ingest/pipeline/config/loaders.py +198 -0
  47. nv_ingest-2025.8.15.dev20250815/nv_ingest/pipeline/config/replica_resolver.py +227 -0
  48. nv_ingest-2025.8.15.dev20250815/nv_ingest/pipeline/default_pipeline_impl.py +517 -0
  49. nv_ingest-2025.8.15.dev20250815/nv_ingest/pipeline/ingest_pipeline.py +389 -0
  50. nv_ingest-2025.8.15.dev20250815/nv_ingest/pipeline/pipeline_schema.py +398 -0
  51. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest.egg-info/PKG-INFO +1 -1
  52. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest.egg-info/SOURCES.txt +17 -3
  53. nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +0 -359
  54. nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -393
  55. nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +0 -649
  56. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/LICENSE +0 -0
  57. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/MANIFEST.in +0 -0
  58. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/__init__.py +0 -0
  59. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/api/__init__.py +0 -0
  60. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/api/main.py +0 -0
  61. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/api/v1/__init__.py +0 -0
  62. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/api/v1/health.py +0 -0
  63. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/api/v1/ingest.py +0 -0
  64. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/api/v1/metrics.py +0 -0
  65. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/__init__.py +0 -0
  66. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/__init__.py +0 -0
  67. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/execution}/__init__.py +0 -0
  68. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/edges → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/process}/__init__.py +0 -0
  69. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/examples → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray}/__init__.py +0 -0
  70. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/primitives → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/edges}/__init__.py +0 -0
  71. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
  72. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
  73. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
  74. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/stages → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/examples}/__init__.py +0 -0
  75. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
  76. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
  77. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/stages/extractors → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/primitives}/__init__.py +0 -0
  78. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  79. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
  80. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/stages/injectors → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/stages}/__init__.py +0 -0
  81. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/stages/meta → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/stages/extractors}/__init__.py +0 -0
  82. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/stages/mutate → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/stages/injectors}/__init__.py +0 -0
  83. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/stages/sinks → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/stages/meta}/__init__.py +0 -0
  84. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
  85. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/stages/sources → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/stages/mutate}/__init__.py +0 -0
  86. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/stages/storage → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/stages/sinks}/__init__.py +0 -0
  87. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/stages/telemetry → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/stages/sources}/__init__.py +0 -0
  88. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/stages/transforms → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/stages/storage}/__init__.py +0 -0
  89. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/stages/utility → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/stages/telemetry}/__init__.py +0 -0
  90. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
  91. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/util → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/stages/transforms}/__init__.py +0 -0
  92. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/util/pipeline → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/stages/utility}/__init__.py +0 -0
  93. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/orchestration/ray/util/system_tools → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/util}/__init__.py +0 -0
  94. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
  95. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/util → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/util/pipeline}/__init__.py +0 -0
  96. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/util/service → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/orchestration/ray/util/system_tools}/__init__.py +0 -0
  97. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
  98. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
  99. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/schemas/__init__.py +0 -0
  100. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
  101. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
  102. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
  103. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
  104. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
  105. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
  106. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
  107. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
  108. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
  109. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
  110. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
  111. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/util/service/impl → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/util}/__init__.py +0 -0
  112. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
  113. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
  114. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/util/service/impl/ingest → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/util/service}/__init__.py +0 -0
  115. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/util/service/meta → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/util/service/impl}/__init__.py +0 -0
  116. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/util/service/meta → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/util/service/impl}/ingest/__init__.py +0 -0
  117. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
  118. {nv_ingest-2025.8.14.dev20250814/nv_ingest/framework/util/telemetry → nv_ingest-2025.8.15.dev20250815/nv_ingest/framework/util/service/meta}/__init__.py +0 -0
  119. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
  120. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
  121. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest/version.py +0 -0
  122. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest.egg-info/dependency_links.txt +0 -0
  123. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest.egg-info/requires.txt +0 -0
  124. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/nv_ingest.egg-info/top_level.txt +0 -0
  125. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/pyproject.toml +0 -0
  126. {nv_ingest-2025.8.14.dev20250814 → nv_ingest-2025.8.15.dev20250815}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.8.14.dev20250814
3
+ Version: 2025.8.15.dev20250815
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -0,0 +1,85 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ """
6
+ Helper functions for pipeline execution configuration.
7
+
8
+ This module contains generic helper functions for converting individual parameters
9
+ into structured configuration objects, supporting the declarative execution architecture.
10
+ """
11
+
12
+ from typing import Optional, TextIO
13
+
14
+ from nv_ingest.framework.orchestration.execution.options import PipelineRuntimeOverrides, ExecutionOptions
15
+ from nv_ingest.framework.orchestration.process.strategies import ProcessExecutionStrategy, create_execution_strategy
16
+
17
+
18
+ def create_runtime_overrides(
19
+ disable_dynamic_scaling: Optional[bool], dynamic_memory_threshold: Optional[float]
20
+ ) -> PipelineRuntimeOverrides:
21
+ """
22
+ Create runtime override object from individual parameters.
23
+
24
+ This function converts the individual override parameters into
25
+ a structured PipelineRuntimeOverrides object for declarative processing.
26
+
27
+ Parameters
28
+ ----------
29
+ disable_dynamic_scaling : Optional[bool]
30
+ Dynamic scaling override value.
31
+ dynamic_memory_threshold : Optional[float]
32
+ Memory threshold override value.
33
+
34
+ Returns
35
+ -------
36
+ PipelineRuntimeOverrides
37
+ Structured override object containing the provided values.
38
+ """
39
+ return PipelineRuntimeOverrides(
40
+ disable_dynamic_scaling=disable_dynamic_scaling, dynamic_memory_threshold=dynamic_memory_threshold
41
+ )
42
+
43
+
44
+ def create_execution_options(block: bool, stdout: Optional[TextIO], stderr: Optional[TextIO]) -> ExecutionOptions:
45
+ """
46
+ Create execution options object from individual parameters.
47
+
48
+ This function converts individual execution parameters into
49
+ a structured ExecutionOptions object for declarative processing.
50
+
51
+ Parameters
52
+ ----------
53
+ block : bool
54
+ Whether to block until pipeline completion.
55
+ stdout : Optional[TextIO]
56
+ Output stream for subprocess redirection.
57
+ stderr : Optional[TextIO]
58
+ Error stream for subprocess redirection.
59
+
60
+ Returns
61
+ -------
62
+ ExecutionOptions
63
+ Structured options object containing the provided values.
64
+ """
65
+ return ExecutionOptions(block=block, stdout=stdout, stderr=stderr)
66
+
67
+
68
+ def select_execution_strategy(run_in_subprocess: bool) -> ProcessExecutionStrategy:
69
+ """
70
+ Select appropriate execution strategy based on parameters.
71
+
72
+ This function encapsulates the logic for choosing between
73
+ in-process and subprocess execution strategies.
74
+
75
+ Parameters
76
+ ----------
77
+ run_in_subprocess : bool
78
+ Whether to run in a subprocess.
79
+
80
+ Returns
81
+ -------
82
+ ProcessExecutionStrategy
83
+ Configured execution strategy instance.
84
+ """
85
+ return create_execution_strategy(run_in_subprocess=run_in_subprocess)
@@ -0,0 +1,112 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ """
6
+ Data classes for pipeline execution configuration and options.
7
+
8
+ This module defines declarative data structures for configuring pipeline execution,
9
+ replacing imperative parameter passing with structured configuration objects.
10
+ """
11
+
12
+ from dataclasses import dataclass
13
+ from typing import Optional, TextIO, Union
14
+
15
+ from nv_ingest.framework.orchestration.ray.primitives.ray_pipeline import (
16
+ RayPipelineInterface,
17
+ RayPipelineSubprocessInterface,
18
+ )
19
+
20
+
21
+ @dataclass
22
+ class PipelineRuntimeOverrides:
23
+ """
24
+ Runtime parameter overrides for pipeline configuration.
25
+
26
+ These overrides are applied to the base pipeline configuration
27
+ to customize runtime behavior without modifying the source config.
28
+
29
+ Attributes
30
+ ----------
31
+ disable_dynamic_scaling : Optional[bool]
32
+ Override for dynamic scaling behavior. If provided, overrides
33
+ the pipeline config's disable_dynamic_scaling setting.
34
+ dynamic_memory_threshold : Optional[float]
35
+ Override for memory threshold used in dynamic scaling decisions.
36
+ Must be between 0.0 and 1.0 if provided.
37
+ """
38
+
39
+ disable_dynamic_scaling: Optional[bool] = None
40
+ dynamic_memory_threshold: Optional[float] = None
41
+
42
+ def __post_init__(self):
43
+ """Validate override values."""
44
+ if self.dynamic_memory_threshold is not None:
45
+ if not (0.0 <= self.dynamic_memory_threshold <= 1.0):
46
+ raise ValueError(
47
+ f"dynamic_memory_threshold must be between 0.0 and 1.0, " f"got {self.dynamic_memory_threshold}"
48
+ )
49
+
50
+
51
+ @dataclass
52
+ class ExecutionOptions:
53
+ """
54
+ Options controlling pipeline execution behavior.
55
+
56
+ These options determine how the pipeline is executed (blocking vs non-blocking)
57
+ and where output is directed for subprocess execution.
58
+
59
+ Attributes
60
+ ----------
61
+ block : bool
62
+ If True, blocks until pipeline completes. If False, returns
63
+ immediately with a control interface.
64
+ stdout : Optional[TextIO]
65
+ Stream for subprocess stdout redirection. Only used when
66
+ run_in_subprocess=True. If None, redirected to /dev/null.
67
+ stderr : Optional[TextIO]
68
+ Stream for subprocess stderr redirection. Only used when
69
+ run_in_subprocess=True. If None, redirected to /dev/null.
70
+ """
71
+
72
+ block: bool = True
73
+ stdout: Optional[TextIO] = None
74
+ stderr: Optional[TextIO] = None
75
+
76
+
77
+ @dataclass
78
+ class ExecutionResult:
79
+ """
80
+ Result of pipeline execution containing interface and timing information.
81
+
82
+ This class encapsulates the results of pipeline execution and provides
83
+ methods to convert to the legacy return format for backward compatibility.
84
+
85
+ Attributes
86
+ ----------
87
+ interface : Union[RayPipelineInterface, RayPipelineSubprocessInterface, None]
88
+ Pipeline control interface. None for blocking subprocess execution.
89
+ elapsed_time : Optional[float]
90
+ Total execution time in seconds. Only set for blocking execution.
91
+ """
92
+
93
+ interface: Union[RayPipelineInterface, RayPipelineSubprocessInterface, None]
94
+ elapsed_time: Optional[float] = None
95
+
96
+ def get_return_value(self) -> Union[RayPipelineInterface, float, RayPipelineSubprocessInterface]:
97
+ """
98
+ Convert to legacy return format for backward compatibility.
99
+
100
+ Returns
101
+ -------
102
+ Union[RayPipelineInterface, float, RayPipelineSubprocessInterface]
103
+ - If blocking execution: returns elapsed time (float)
104
+ - If non-blocking execution: returns pipeline interface
105
+ """
106
+ if self.elapsed_time is not None:
107
+ return self.elapsed_time
108
+ elif self.interface is not None:
109
+ return self.interface
110
+ else:
111
+ # This should not happen in normal execution
112
+ raise RuntimeError("ExecutionResult has neither interface nor elapsed_time")
@@ -0,0 +1,55 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ """
6
+ Dependent services management for pipeline orchestration.
7
+
8
+ This module contains utilities for starting and managing dependent services
9
+ that the pipeline requires, such as message brokers and other infrastructure.
10
+ """
11
+
12
+ import logging
13
+ import multiprocessing
14
+ import socket
15
+ from nv_ingest_api.util.message_brokers.simple_message_broker.broker import SimpleMessageBroker
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def start_simple_message_broker(broker_client: dict) -> multiprocessing.Process:
21
+ """
22
+ Starts a SimpleMessageBroker server in a separate process.
23
+
24
+ Parameters
25
+ ----------
26
+ broker_client : dict
27
+ Broker configuration. Expected keys include:
28
+ - "port": the port to bind the server to,
29
+ - "broker_params": optionally including "max_queue_size",
30
+ - and any other parameters required by SimpleMessageBroker.
31
+
32
+ Returns
33
+ -------
34
+ multiprocessing.Process
35
+ The process running the SimpleMessageBroker server.
36
+ """
37
+
38
+ def broker_server():
39
+ # Use max_queue_size from broker_params or default to 10000.
40
+ broker_params = broker_client.get("broker_params", {})
41
+ max_queue_size = broker_params.get("max_queue_size", 10000)
42
+ server_host = broker_client.get("host", "0.0.0.0")
43
+ server_port = broker_client.get("port", 7671)
44
+ # Optionally, set socket options here for reuse.
45
+ server = SimpleMessageBroker(server_host, server_port, max_queue_size)
46
+ # Enable address reuse on the server socket.
47
+ server.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
48
+ server.serve_forever()
49
+
50
+ p = multiprocessing.Process(target=broker_server)
51
+ p.daemon = False
52
+ p.start()
53
+ logger.info(f"Started SimpleMessageBroker server in separate process on port {broker_client.get('port', 7671)}")
54
+
55
+ return p