nv-ingest 2025.10.24.dev20251024__tar.gz → 2025.11.9.dev20251109__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/PKG-INFO +1 -1
  2. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/api/v2/README.md +44 -18
  3. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/api/v2/ingest.py +190 -6
  4. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/process/dependent_services.py +17 -10
  5. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/process/strategies.py +6 -2
  6. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/process/termination.py +49 -9
  7. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +2 -2
  8. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +41 -8
  9. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +26 -5
  10. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/pipeline/default_libmode_pipeline_impl.py +2 -2
  11. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/pipeline/default_pipeline_impl.py +21 -21
  12. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest.egg-info/PKG-INFO +1 -1
  13. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/LICENSE +0 -0
  14. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/MANIFEST.in +0 -0
  15. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/__init__.py +0 -0
  16. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/api/__init__.py +0 -0
  17. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/api/main.py +0 -0
  18. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/api/tracing.py +0 -0
  19. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/api/v1/__init__.py +0 -0
  20. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/api/v1/health.py +0 -0
  21. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/api/v1/ingest.py +0 -0
  22. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/api/v1/metrics.py +0 -0
  23. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/api/v2/__init__.py +0 -0
  24. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/__init__.py +0 -0
  25. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/__init__.py +0 -0
  26. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/execution/__init__.py +0 -0
  27. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/execution/helpers.py +0 -0
  28. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/execution/options.py +0 -0
  29. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/process/__init__.py +0 -0
  30. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/process/execution.py +0 -0
  31. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/process/lifecycle.py +0 -0
  32. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
  33. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
  34. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
  35. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
  36. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
  37. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
  38. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
  39. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
  40. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
  41. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  42. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
  43. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
  44. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
  45. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
  46. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
  47. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
  48. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
  49. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
  50. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
  51. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
  52. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
  53. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
  54. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
  55. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
  56. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
  57. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
  58. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
  59. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
  60. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
  61. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
  62. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
  63. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
  64. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
  65. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
  66. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
  67. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
  68. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
  69. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
  70. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
  71. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
  72. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
  73. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
  74. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
  75. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
  76. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
  77. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
  78. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
  79. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
  80. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
  81. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
  82. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
  83. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
  84. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
  85. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
  86. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
  87. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
  88. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
  89. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
  90. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
  91. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
  92. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
  93. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/schemas/__init__.py +0 -0
  94. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
  95. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
  96. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
  97. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
  98. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
  99. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
  100. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
  101. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
  102. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
  103. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
  104. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
  105. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/util/__init__.py +0 -0
  106. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
  107. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
  108. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/util/flow_control/udf_intercept.py +0 -0
  109. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/util/service/__init__.py +0 -0
  110. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
  111. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
  112. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
  113. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
  114. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
  115. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
  116. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
  117. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/pipeline/__init__.py +0 -0
  118. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/pipeline/config/__init__.py +0 -0
  119. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/pipeline/config/loaders.py +0 -0
  120. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/pipeline/config/replica_resolver.py +0 -0
  121. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/pipeline/ingest_pipeline.py +0 -0
  122. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/pipeline/pipeline_schema.py +0 -0
  123. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest/version.py +0 -0
  124. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest.egg-info/SOURCES.txt +0 -0
  125. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest.egg-info/dependency_links.txt +0 -0
  126. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest.egg-info/requires.txt +0 -0
  127. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/nv_ingest.egg-info/top_level.txt +0 -0
  128. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/pyproject.toml +0 -0
  129. {nv_ingest-2025.10.24.dev20251024 → nv_ingest-2025.11.9.dev20251109}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.10.24.dev20251024
3
+ Version: 2025.11.9.dev20251109
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -11,15 +11,6 @@ The V2 API introduces automatic PDF splitting at the REST layer to improve proce
11
11
  3. **Transparent Aggregation**: Results are automatically aggregated when fetching parent jobs
12
12
  4. **Backward Compatible**: PDFs with page counts ≤ `PDF_SPLIT_PAGE_COUNT` behave identical to V1
13
13
 
14
- ## Tracing & Aggregated Metadata
15
-
16
- - V2 endpoints open an OpenTelemetry span using the shared `traced_endpoint` decorator. The span name defaults to the function name, or can be overridden when applying the decorator.
17
- - `submit_job_v2` records the parent span's `trace_id` into each subjob's `tracing_options`, enabling downstream Ray stages (e.g., the message broker sink) to attach chunk-level telemetry consistently.
18
- - Response headers still return `x-trace-id` derived from the active span context, allowing clients to correlate downstream work.
19
- - When `/v2/fetch_job/{parent_id}` aggregates completed chunks, it captures any `trace` / `annotations` dictionaries emitted by the sink for each subjob and includes them in the response payload (see "Aggregated response" below).
20
-
21
- This behaviour matches the V1 tracing model and sets the foundation for adding W3C `traceparent` propagation in future changes.
22
-
23
14
  ## How It Works
24
15
 
25
16
  1. **Submit**: When a PDF with pages exceeding `PDF_SPLIT_PAGE_COUNT` is submitted to `/v2/submit_job`:
@@ -36,6 +27,33 @@ This behaviour matches the V1 tracing model and sets the foundation for adding W
36
27
  - Pending work returns 202 (processing)
37
28
  - Failed chunks are noted without failing the entire job; metadata records which chunks failed
38
29
 
30
+
31
+ ## Client Library Features
32
+
33
+ ### Accessing Trace Metrics
34
+
35
+ The Python client library provides convenient access to trace metrics via the `return_traces` parameter:
36
+
37
+ ```python
38
+ from nv_ingest_client.client import Ingestor
39
+
40
+ ingestor = Ingestor(
41
+ message_client_hostname="localhost",
42
+ message_client_port=7670,
43
+ message_client_kwargs={"api_version": "v2"}
44
+ ).files("/path/to/pdfs").extract().embed()
45
+
46
+ # Get results with trace metrics
47
+ results, traces = ingestor.ingest(return_traces=True)
48
+
49
+ # Access timing for first document
50
+ pdf_time = traces[0]["trace::resident_time::pdf_extractor"] / 1e9
51
+ table_time = traces[0]["trace::resident_time::table_extractor"] / 1e9
52
+ print(f"PDF: {pdf_time:.2f}s, Tables: {table_time:.2f}s")
53
+ ```
54
+
55
+ **Note:** For split PDFs, `resident_time` represents aggregated compute time across all chunks. For non-split PDFs, it is computed client-side from entry/exit pairs.
56
+
39
57
  ### Aggregated response
40
58
 
41
59
  The fetch endpoint returns a JSON body shaped like the following:
@@ -163,15 +181,23 @@ For split PDFs, parent-level metrics are automatically computed for each stage (
163
181
  - Failed chunk entries remain in `failed_subjobs`; missing chunks indicate the sink did not emit telemetry
164
182
  - **To access chunk traces:** Use `metadata.trace_segments[]` - each segment contains the full trace dict for that chunk
165
183
 
166
- ## Testing
184
+ ### Advanced: Accessing Full Metadata
167
185
 
168
- Use the V2 test script with environment variable:
169
- ```bash
170
- # Run with V2 endpoints
171
- DATASET_DIR=/data/splits python scripts/tests/cases/dc20_v2_e2e.py
172
- ```
186
+ For advanced use cases requiring per-chunk trace breakdown or full metadata, use `include_parent_trace_ids`:
187
+
188
+ ```python
189
+ results, traces, parent_trace_ids = ingestor.ingest(
190
+ return_traces=True,
191
+ include_parent_trace_ids=True
192
+ )
193
+
194
+ # Fetch full parent job metadata (including trace_segments)
195
+ import requests
196
+ response = requests.get(f"http://localhost:7670/v2/fetch_job/{parent_trace_ids[0]}")
197
+ metadata = response.json()["metadata"]
173
198
 
174
- Or set the API version for any existing code:
175
- ```bash
176
- export NV_INGEST_API_VERSION=v2
199
+ # Access per-chunk traces
200
+ for segment in metadata["trace_segments"]:
201
+ print(f"Chunk {segment['chunk_index']}: pages {segment['start_page']}-{segment['end_page']}")
202
+ print(f" Traces: {len(segment['trace'])} entries")
177
203
  ```
@@ -12,6 +12,7 @@ import logging
12
12
  import os
13
13
  import time
14
14
  import uuid
15
+ import random
15
16
 
16
17
  from fastapi import APIRouter, Request, Response
17
18
  from fastapi import HTTPException
@@ -44,6 +45,42 @@ router = APIRouter()
44
45
 
45
46
  DEFAULT_PDF_SPLIT_PAGE_COUNT = 32
46
47
 
48
+ # Default QoS thresholds (pages). Tunable via environment variables:
49
+ # QOS_MAX_PAGES_MICRO, QOS_MAX_PAGES_SMALL, QOS_MAX_PAGES_MEDIUM
50
+ _QOS_DEFAULTS = {
51
+ "micro": 8,
52
+ "small": 64,
53
+ "medium": 256,
54
+ }
55
+
56
+
57
+ def get_qos_tier_for_page_count(page_count: int) -> str:
58
+ """
59
+ Select QoS tier for a document based on its total page count.
60
+ Tiers: 'micro', 'small', 'medium', 'large', 'default'
61
+ Thresholds can be tuned via environment variables:
62
+ - QOS_MAX_PAGES_MICRO (default: 4)
63
+ - QOS_MAX_PAGES_SMALL (default: 16)
64
+ - QOS_MAX_PAGES_MEDIUM (default: 64)
65
+ Anything above MEDIUM is 'large'. Non-positive page_count returns 'default'.
66
+ """
67
+ try:
68
+ micro_max = int(os.getenv("QOS_MAX_PAGES_MICRO", str(_QOS_DEFAULTS["micro"])))
69
+ small_max = int(os.getenv("QOS_MAX_PAGES_SMALL", str(_QOS_DEFAULTS["small"])))
70
+ medium_max = int(os.getenv("QOS_MAX_PAGES_MEDIUM", str(_QOS_DEFAULTS["medium"])))
71
+ except ValueError:
72
+ micro_max, small_max, medium_max = _QOS_DEFAULTS["micro"], _QOS_DEFAULTS["small"], _QOS_DEFAULTS["medium"]
73
+
74
+ if page_count <= 0:
75
+ return "default"
76
+ if page_count <= micro_max:
77
+ return "micro"
78
+ if page_count <= small_max:
79
+ return "small"
80
+ if page_count <= medium_max:
81
+ return "medium"
82
+ return "large"
83
+
47
84
 
48
85
  def get_pdf_split_page_count(client_override: Optional[int] = None) -> int:
49
86
  """
@@ -432,6 +469,76 @@ def _extract_ray_telemetry(result: Dict[str, Any]) -> Tuple[Optional[Dict[str, A
432
469
  return trace_dict, annotations_dict
433
470
 
434
471
 
472
+ def _normalize_chunk_records(
473
+ records: Optional[List[Any]],
474
+ descriptor: Dict[str, Any],
475
+ parent_metadata: Dict[str, Any],
476
+ ) -> List[Any]:
477
+ """Re-map chunk-local metadata to document-level context for aggregation."""
478
+
479
+ if not isinstance(records, list):
480
+ return []
481
+
482
+ total_pages = parent_metadata.get("total_pages")
483
+ original_source_id = parent_metadata.get("original_source_id")
484
+ original_source_name = parent_metadata.get("original_source_name")
485
+
486
+ start_page = descriptor.get("start_page")
487
+ page_offset = start_page - 1 if isinstance(start_page, int) and start_page > 0 else 0
488
+
489
+ normalized_entries: List[Any] = []
490
+
491
+ for entry in records:
492
+ if not isinstance(entry, dict):
493
+ normalized_entries.append(entry)
494
+ continue
495
+
496
+ normalized_entry = entry.copy()
497
+ original_metadata = entry.get("metadata")
498
+
499
+ if isinstance(original_metadata, dict):
500
+ normalized_metadata = original_metadata.copy()
501
+ normalized_entry["metadata"] = normalized_metadata
502
+
503
+ original_source_meta = original_metadata.get("source_metadata")
504
+ if isinstance(original_source_meta, dict):
505
+ normalized_source_meta = original_source_meta.copy()
506
+ normalized_metadata["source_metadata"] = normalized_source_meta
507
+
508
+ if original_source_id:
509
+ normalized_source_meta["source_id"] = original_source_id
510
+ if original_source_name:
511
+ normalized_source_meta["source_name"] = original_source_name
512
+
513
+ original_content_meta = original_metadata.get("content_metadata")
514
+ if isinstance(original_content_meta, dict):
515
+ normalized_content_meta = original_content_meta.copy()
516
+ normalized_metadata["content_metadata"] = normalized_content_meta
517
+
518
+ page_number = normalized_content_meta.get("page_number")
519
+ if isinstance(page_number, int) and page_number >= 0:
520
+ normalized_content_meta["page_number"] = page_number + page_offset
521
+
522
+ if isinstance(total_pages, int) and isinstance(normalized_content_meta.get("page_count"), int):
523
+ # Ensure optional per-record page count reflects the full document
524
+ normalized_content_meta["page_count"] = total_pages
525
+
526
+ original_hierarchy = original_content_meta.get("hierarchy")
527
+ if isinstance(original_hierarchy, dict):
528
+ normalized_hierarchy = original_hierarchy.copy()
529
+ normalized_content_meta["hierarchy"] = normalized_hierarchy
530
+
531
+ hierarchy_page = normalized_hierarchy.get("page")
532
+ if isinstance(hierarchy_page, int) and hierarchy_page >= 0:
533
+ normalized_hierarchy["page"] = hierarchy_page + page_offset
534
+ if isinstance(total_pages, int):
535
+ normalized_hierarchy["page_count"] = total_pages
536
+
537
+ normalized_entries.append(normalized_entry)
538
+
539
+ return normalized_entries
540
+
541
+
435
542
  def _aggregate_parent_traces(chunk_traces: Dict[str, Any]) -> Dict[str, Any]:
436
543
  """
437
544
  Aggregate chunk-level traces into parent-level metrics.
@@ -574,7 +681,8 @@ def _build_aggregated_response(
574
681
  if result is not None:
575
682
  # Add page data to aggregated result
576
683
  if "data" in result:
577
- aggregated_result["data"].extend(result["data"])
684
+ normalized_records = _normalize_chunk_records(result.get("data"), descriptor, metadata)
685
+ aggregated_result["data"].extend(normalized_records)
578
686
  chunk_entry = dict(descriptor)
579
687
  aggregated_result["metadata"]["chunks"].append(chunk_entry)
580
688
 
@@ -631,6 +739,51 @@ def _build_aggregated_response(
631
739
  return aggregated_result
632
740
 
633
741
 
742
+ # ---------------------------------------------------------------------------
743
+ # Bursty submission helpers (fairness without long-lived in-flight tasks)
744
+ # ---------------------------------------------------------------------------
745
+
746
+
747
+ def _get_submit_burst_params() -> Tuple[int, int, int]:
748
+ """
749
+ Returns (burst_size, pause_ms, jitter_ms) from environment with sane defaults.
750
+ - V2_SUBMIT_BURST_SIZE (default: 16)
751
+ - V2_SUBMIT_BURST_PAUSE_MS (default: 25)
752
+ - V2_SUBMIT_BURST_JITTER_MS (default: 10)
753
+ """
754
+ burst_size = int(os.getenv("V2_SUBMIT_BURST_SIZE", "16"))
755
+ pause_ms = int(os.getenv("V2_SUBMIT_BURST_PAUSE_MS", "50"))
756
+ jitter_ms = int(os.getenv("V2_SUBMIT_BURST_JITTER_MS", "15"))
757
+
758
+ return max(1, burst_size), max(0, pause_ms), max(0, jitter_ms)
759
+
760
+
761
+ async def _submit_subjobs_in_bursts(
762
+ items: List[Tuple[str, MessageWrapper]],
763
+ ingest_service: "INGEST_SERVICE_T",
764
+ *,
765
+ burst_size: int,
766
+ pause_ms: int,
767
+ jitter_ms: int,
768
+ ) -> None:
769
+ """
770
+ Submit subjobs in sequential bursts and await each burst to completion.
771
+ This avoids keeping a large number of pending tasks in the REST handler
772
+ and allows other concurrent requests to interleave enqueue work between bursts.
773
+ """
774
+ for offset in range(0, len(items), burst_size):
775
+ burst = items[offset : offset + burst_size]
776
+ tasks = [ingest_service.submit_job(wrapper, subjob_id) for (subjob_id, wrapper) in burst]
777
+ # Propagate any errors from this burst
778
+ await asyncio.gather(*tasks)
779
+
780
+ # Pause with jitter to yield to other request handlers before next burst
781
+ if offset + burst_size < len(items):
782
+ delay_ms = pause_ms + (random.randint(0, jitter_ms) if jitter_ms > 0 else 0)
783
+ if delay_ms > 0:
784
+ await asyncio.sleep(delay_ms / 1000.0)
785
+
786
+
634
787
  # POST /v2/submit_job
635
788
  @router.post(
636
789
  "/submit_job",
@@ -681,22 +834,24 @@ async def submit_job_v2(
681
834
  pdf_content = base64.b64decode(payloads[0])
682
835
  page_count = get_pdf_page_count(pdf_content)
683
836
  pdf_page_count_cache = page_count # Cache for later use
837
+ qos_tier = get_qos_tier_for_page_count(page_count)
684
838
  pages_per_chunk = get_pdf_split_page_count(client_override=client_split_page_count)
685
839
 
686
840
  # Split if the document has more pages than our chunk size
687
841
  if page_count > pages_per_chunk:
688
842
  logger.warning(
689
- "Splitting PDF %s into %s-page chunks (total pages: %s)",
843
+ "Splitting PDF %s into %s-page chunks (total pages: %s) -> (qos_tier: %s)",
690
844
  original_source_name,
691
845
  pages_per_chunk,
692
846
  page_count,
847
+ qos_tier,
693
848
  )
694
849
 
695
850
  chunks = split_pdf_to_chunks(pdf_content, pages_per_chunk)
696
851
 
697
852
  subjob_ids: List[str] = []
698
853
  subjob_descriptors: List[Dict[str, Any]] = []
699
- submission_tasks = []
854
+ submission_items: List[Tuple[str, MessageWrapper]] = []
700
855
 
701
856
  try:
702
857
  parent_uuid = uuid.UUID(parent_job_id)
@@ -717,7 +872,19 @@ async def submit_job_v2(
717
872
  original_source_id=original_source_id,
718
873
  original_source_name=original_source_name,
719
874
  )
720
- submission_tasks.append(ingest_service.submit_job(subjob_wrapper, subjob_id))
875
+
876
+ # Inject QoS routing hint into subjob routing_options (keeps API and service loosely coupled)
877
+ try:
878
+ sub_spec = json.loads(subjob_wrapper.payload)
879
+ routing_opts = sub_spec.get("routing_options") or {}
880
+ routing_opts["queue_hint"] = qos_tier
881
+ sub_spec["routing_options"] = routing_opts
882
+ subjob_wrapper = MessageWrapper(payload=json.dumps(sub_spec))
883
+ except Exception:
884
+ # Best-effort; if we cannot inject, fall back to default routing
885
+ pass
886
+
887
+ submission_items.append((subjob_id, subjob_wrapper))
721
888
  subjob_ids.append(subjob_id)
722
889
  subjob_descriptors.append(
723
890
  {
@@ -729,8 +896,15 @@ async def submit_job_v2(
729
896
  }
730
897
  )
731
898
 
732
- if submission_tasks:
733
- await asyncio.gather(*submission_tasks)
899
+ if submission_items:
900
+ burst_size, pause_ms, jitter_ms = _get_submit_burst_params()
901
+ await _submit_subjobs_in_bursts(
902
+ submission_items,
903
+ ingest_service,
904
+ burst_size=burst_size,
905
+ pause_ms=pause_ms,
906
+ jitter_ms=jitter_ms,
907
+ )
734
908
 
735
909
  parent_metadata: Dict[str, Any] = {
736
910
  "total_pages": page_count,
@@ -758,6 +932,16 @@ async def submit_job_v2(
758
932
  if "tracing_options" not in job_spec_dict:
759
933
  job_spec_dict["tracing_options"] = {"trace": True}
760
934
  job_spec_dict["tracing_options"]["trace_id"] = str(current_trace_id)
935
+ # If this was a PDF and we computed page_count, route the single job using the same QoS tier
936
+ try:
937
+ if (
938
+ document_types
939
+ and document_types[0].lower() == "pdf"
940
+ and "queue_hint" not in (job_spec_dict.get("routing_options") or {})
941
+ ):
942
+ job_spec_dict.setdefault("routing_options", {})["queue_hint"] = qos_tier
943
+ except Exception:
944
+ pass
761
945
  updated_job_spec = MessageWrapper(payload=json.dumps(job_spec_dict))
762
946
 
763
947
  span.add_event("Submitting as single job (no split needed)")
@@ -18,6 +18,18 @@ from nv_ingest_api.util.message_brokers.simple_message_broker.broker import Simp
18
18
  logger = logging.getLogger(__name__)
19
19
 
20
20
 
21
+ def _broker_server_target(host, port, max_queue_size):
22
+ """
23
+ Target function to be run in a separate process for the SimpleMessageBroker.
24
+ """
25
+ server = SimpleMessageBroker(host, port, max_queue_size)
26
+ try:
27
+ server.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
28
+ except Exception:
29
+ pass
30
+ server.serve_forever()
31
+
32
+
21
33
  def start_simple_message_broker(broker_client: dict) -> multiprocessing.Process:
22
34
  """
23
35
  Starts a SimpleMessageBroker server in a separate process.
@@ -58,16 +70,11 @@ def start_simple_message_broker(broker_client: dict) -> multiprocessing.Process:
58
70
  f"continuing to spawn a broker process (tests expect a Process to be returned)"
59
71
  )
60
72
 
61
- def broker_server():
62
- # Optionally, set socket options here for reuse (note: binding occurs in server __init__).
63
- server = SimpleMessageBroker(server_host, server_port, max_queue_size)
64
- try:
65
- server.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
66
- except Exception:
67
- pass
68
- server.serve_forever()
69
-
70
- p = multiprocessing.Process(target=broker_server)
73
+ p = multiprocessing.Process(
74
+ target=_broker_server_target,
75
+ args=(server_host, server_port, max_queue_size),
76
+ daemon=True,
77
+ )
71
78
  # If we're launching from inside the pipeline subprocess, mark daemon so the
72
79
  # broker dies automatically when the subprocess exits.
73
80
  p.daemon = os.environ.get("NV_INGEST_BROKER_IN_SUBPROCESS") == "1"
@@ -11,9 +11,10 @@ Strategy pattern for clean separation of execution concerns.
11
11
  """
12
12
 
13
13
  import atexit
14
- import os
15
14
  import logging
16
15
  import multiprocessing
16
+ import os
17
+ import sys
17
18
  import time
18
19
  from abc import ABC, abstractmethod
19
20
 
@@ -132,7 +133,10 @@ class SubprocessStrategy(ProcessExecutionStrategy):
132
133
  logger.info("Launching pipeline in Python subprocess using multiprocessing.")
133
134
 
134
135
  # Create subprocess using fork context
135
- ctx = multiprocessing.get_context("fork")
136
+ start_method = "fork"
137
+ if sys.platform.lower() == "darwin":
138
+ start_method = "spawn"
139
+ ctx = multiprocessing.get_context(start_method)
136
140
  process = ctx.Process(
137
141
  target=run_pipeline_process,
138
142
  args=(
@@ -19,20 +19,45 @@ logger = logging.getLogger(__name__)
19
19
 
20
20
 
21
21
  def _safe_log(level: int, msg: str) -> None:
22
- """Best-effort logging that won't crash during interpreter shutdown."""
22
+ """Best-effort logging that won't emit handler tracebacks on closed streams.
23
+
24
+ Temporarily disables logging.raiseExceptions to prevent the logging module
25
+ from printing "--- Logging error ---" to stderr if a handler's stream is
26
+ already closed (common during process teardown). Falls back to writing to
27
+ sys.__stderr__ if available.
28
+ """
23
29
  try:
24
- logger.log(level, msg)
30
+ import logging as _logging
31
+
32
+ prev = getattr(_logging, "raiseExceptions", True)
33
+ # Suppress handler errors being printed to stderr
34
+ _logging.raiseExceptions = False
35
+
36
+ # If there are no handlers, skip and use stderr fallback
37
+ if logger.handlers:
38
+ logger.log(level, msg)
39
+ return
25
40
  except Exception:
41
+ # Intentionally ignore and try stderr fallback
42
+ pass
43
+ finally:
26
44
  try:
27
- # Fallback to stderr if available
28
- import sys
45
+ import logging as _logging # re-import safe even if earlier failed
29
46
 
30
- if hasattr(sys, "__stderr__") and sys.__stderr__:
31
- sys.__stderr__.write(msg + "\n")
32
- sys.__stderr__.flush()
47
+ _logging.raiseExceptions = prev # type: ignore[name-defined]
33
48
  except Exception:
34
49
  pass
35
50
 
51
+ # Fallback to stderr if available
52
+ try:
53
+ import sys
54
+
55
+ if hasattr(sys, "__stderr__") and sys.__stderr__:
56
+ sys.__stderr__.write(msg + "\n")
57
+ sys.__stderr__.flush()
58
+ except Exception:
59
+ pass
60
+
36
61
 
37
62
  def kill_pipeline_process_group(process) -> None:
38
63
  """
@@ -74,7 +99,17 @@ def kill_pipeline_process_group(process) -> None:
74
99
 
75
100
  try:
76
101
  # Send graceful termination to the entire process group
77
- os.killpg(os.getpgid(pid), signal.SIGTERM)
102
+ try:
103
+ pgid = os.getpgid(pid)
104
+ except Exception:
105
+ # Process already gone
106
+ _safe_log(logging.DEBUG, f"Process group for PID {pid} not found during SIGTERM phase")
107
+ return
108
+ try:
109
+ os.killpg(pgid, signal.SIGTERM)
110
+ except ProcessLookupError:
111
+ _safe_log(logging.DEBUG, f"Process group for PID {pid} no longer exists (SIGTERM)")
112
+ return
78
113
 
79
114
  # If we have a Process handle, give it a chance to exit cleanly
80
115
  if proc is not None and hasattr(proc, "join"):
@@ -95,7 +130,12 @@ def kill_pipeline_process_group(process) -> None:
95
130
  if still_alive:
96
131
  _safe_log(logging.WARNING, "Process group did not terminate gracefully, using SIGKILL")
97
132
  try:
98
- os.killpg(os.getpgid(pid), signal.SIGKILL)
133
+ try:
134
+ pgid2 = os.getpgid(pid)
135
+ except Exception:
136
+ _safe_log(logging.DEBUG, f"Process group for PID {pid} vanished before SIGKILL")
137
+ return
138
+ os.killpg(pgid2, signal.SIGKILL)
99
139
  finally:
100
140
  if proc is not None and hasattr(proc, "join"):
101
141
  try:
@@ -152,11 +152,11 @@ if __name__ == "__main__":
152
152
  os.environ["OCR_MODEL_NAME"] = "paddle"
153
153
  os.environ["NEMORETRIEVER_PARSE_HTTP_ENDPOINT"] = "https://integrate.api.nvidia.com/v1/chat/completions"
154
154
  os.environ["VLM_CAPTION_ENDPOINT"] = "https://integrate.api.nvidia.com/v1/chat/completions"
155
- os.environ["VLM_CAPTION_MODEL_NAME"] = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
155
+ os.environ["VLM_CAPTION_MODEL_NAME"] = "nvidia/nemotron-nano-12b-v2-vl"
156
156
  logger.info("Environment variables set.")
157
157
 
158
158
  image_caption_endpoint_url = "https://integrate.api.nvidia.com/v1/chat/completions"
159
- model_name = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
159
+ model_name = "nvidia/nemotron-nano-12b-v2-vl"
160
160
  yolox_grpc, yolox_http, yolox_auth, yolox_protocol = get_nim_service("yolox")
161
161
  (
162
162
  yolox_table_structure_grpc,
@@ -30,6 +30,7 @@ from nv_ingest_api.internal.schemas.meta.ingest_job_schema import validate_inges
30
30
  from nv_ingest_api.util.message_brokers.simple_message_broker.simple_client import SimpleClient
31
31
  from nv_ingest_api.util.service_clients.redis.redis_client import RedisClient
32
32
  from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
33
+ from nv_ingest_api.util.message_brokers.qos_scheduler import QosScheduler
33
34
 
34
35
  logger = logging.getLogger(__name__)
35
36
 
@@ -89,8 +90,10 @@ class MessageBrokerTaskSourceConfig(BaseModel):
89
90
 
90
91
  # Use the discriminated union for broker_client
91
92
  broker_client: Union[RedisClientConfig, SimpleClientConfig] = Field(..., discriminator="client_type")
92
- task_queue: str = Field(..., description="The name of the queue to fetch tasks from.")
93
- poll_interval: float = Field(default=0.1, gt=0, description="Polling interval in seconds.")
93
+ task_queue: str = Field(
94
+ ..., description="The base name of the queue to fetch tasks from. Derives sub-queues for fair scheduling."
95
+ )
96
+ poll_interval: float = Field(default=0.0, gt=0, description="Polling interval in seconds.")
94
97
 
95
98
 
96
99
  @ray.remote
@@ -134,7 +137,29 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
134
137
  self._current_backoff_sleep: float = 0.0
135
138
  self._last_backoff_log_time: float = 0.0
136
139
 
137
- self._logger.debug("MessageBrokerTaskSourceStage initialized. Task queue: %s", self.task_queue)
140
+ # Initialize QoS scheduler. Use a simple base-queue strategy for SimpleClient.
141
+ strategy = "simple" if isinstance(self.client, SimpleClient) else "lottery"
142
+ self.scheduler = QosScheduler(
143
+ self.task_queue,
144
+ num_prefetch_threads=6, # one per category (no-op for simple strategy)
145
+ total_buffer_capacity=96, # e.g., ~16 per thread
146
+ prefetch_poll_interval=0.002, # faster polling for responsiveness
147
+ prefetch_non_immediate=True, # enable prefetch for non-immediate categories
148
+ strategy=strategy,
149
+ )
150
+
151
+ self._logger.info(
152
+ "MessageBrokerTaskSourceStage initialized. Base task queue: %s | Derived queues: %s",
153
+ self.task_queue,
154
+ {
155
+ "immediate": f"{self.task_queue}_immediate",
156
+ "micro": f"{self.task_queue}_micro",
157
+ "small": f"{self.task_queue}_small",
158
+ "medium": f"{self.task_queue}_medium",
159
+ "large": f"{self.task_queue}_large",
160
+ "default": f"{self.task_queue}",
161
+ },
162
+ )
138
163
 
139
164
  # --- Private helper methods ---
140
165
  def _create_client(self):
@@ -265,14 +290,21 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
265
290
 
266
291
  return control_message
267
292
 
268
- def _fetch_message(self, timeout=100):
293
+ def _fetch_message(self, timeout=0):
269
294
  """
270
- Fetch a message from the message broker.
295
+ Fetch a message from the message broker using fair scheduling across derived queues.
296
+ This is a non-blocking sweep across all queues for the current scheduling cycle. If no
297
+ message is found across any queue, return None so the caller can sleep briefly.
271
298
  """
272
299
  try:
273
- job = self.client.fetch_message(self.task_queue, timeout)
300
+ # Use scheduler to fetch next. In simple strategy this will block up to poll_interval on base queue.
301
+ job = self.scheduler.fetch_next(self.client, timeout=self.config.poll_interval)
274
302
  if job is None:
275
- self._logger.debug("No message received from '%s'", self.task_queue)
303
+ self._logger.debug(
304
+ "No message received from derived queues for base "
305
+ "'%s' (immediate, micro, small, medium, large, default)",
306
+ self.task_queue,
307
+ )
276
308
  # Do not treat normal empty polls as failures
277
309
  self._fetch_failure_count = 0
278
310
  self._current_backoff_sleep = 0.0
@@ -336,7 +368,8 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
336
368
  Instead of reading from an input edge, fetch a message from the broker.
337
369
  """
338
370
  self._logger.debug("read_input: calling _fetch_message()")
339
- job = self._fetch_message(timeout=100)
371
+ # Perform a non-blocking sweep across all queues for this cycle
372
+ job = self._fetch_message(timeout=0)
340
373
  if job is None:
341
374
  # Sleep for either the configured poll interval or the current backoff, whichever is larger
342
375
  sleep_time = max(self.config.poll_interval, getattr(self, "_current_backoff_sleep", 0.0))