nv-ingest 2025.10.28.dev20251028__tar.gz → 2025.10.29.dev20251029__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

Files changed (129) hide show
  1. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/PKG-INFO +1 -1
  2. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v2/ingest.py +72 -1
  3. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/process/dependent_services.py +17 -10
  4. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/process/strategies.py +6 -2
  5. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +4 -4
  6. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest.egg-info/PKG-INFO +1 -1
  7. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/LICENSE +0 -0
  8. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/MANIFEST.in +0 -0
  9. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/__init__.py +0 -0
  10. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/__init__.py +0 -0
  11. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/main.py +0 -0
  12. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/tracing.py +0 -0
  13. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v1/__init__.py +0 -0
  14. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v1/health.py +0 -0
  15. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v1/ingest.py +0 -0
  16. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v1/metrics.py +0 -0
  17. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v2/README.md +0 -0
  18. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/api/v2/__init__.py +0 -0
  19. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/__init__.py +0 -0
  20. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/__init__.py +0 -0
  21. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/execution/__init__.py +0 -0
  22. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/execution/helpers.py +0 -0
  23. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/execution/options.py +0 -0
  24. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/process/__init__.py +0 -0
  25. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/process/execution.py +0 -0
  26. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/process/lifecycle.py +0 -0
  27. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/process/termination.py +0 -0
  28. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
  29. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
  30. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
  31. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
  32. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
  33. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
  34. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
  35. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
  36. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
  37. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
  38. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  39. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
  40. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
  41. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
  42. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
  43. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
  44. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
  45. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
  46. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
  47. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
  48. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
  49. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
  50. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
  51. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
  52. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
  53. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
  54. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
  55. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
  56. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
  57. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
  58. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
  59. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
  60. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
  61. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
  62. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
  63. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
  64. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
  65. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
  66. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
  67. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
  68. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
  69. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
  70. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
  71. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
  72. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
  73. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
  74. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
  75. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
  76. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
  77. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
  78. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
  79. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
  80. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
  81. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
  82. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
  83. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
  84. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
  85. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
  86. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
  87. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
  88. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
  89. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
  90. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
  91. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/__init__.py +0 -0
  92. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
  93. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
  94. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
  95. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
  96. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
  97. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
  98. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
  99. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
  100. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
  101. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
  102. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
  103. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/__init__.py +0 -0
  104. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
  105. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
  106. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/flow_control/udf_intercept.py +0 -0
  107. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/service/__init__.py +0 -0
  108. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
  109. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
  110. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
  111. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
  112. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
  113. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
  114. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
  115. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/__init__.py +0 -0
  116. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/config/__init__.py +0 -0
  117. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/config/loaders.py +0 -0
  118. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/config/replica_resolver.py +0 -0
  119. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/default_libmode_pipeline_impl.py +0 -0
  120. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/default_pipeline_impl.py +0 -0
  121. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/ingest_pipeline.py +0 -0
  122. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/pipeline/pipeline_schema.py +0 -0
  123. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest/version.py +0 -0
  124. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest.egg-info/SOURCES.txt +0 -0
  125. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest.egg-info/dependency_links.txt +0 -0
  126. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest.egg-info/requires.txt +0 -0
  127. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/nv_ingest.egg-info/top_level.txt +0 -0
  128. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/pyproject.toml +0 -0
  129. {nv_ingest-2025.10.28.dev20251028 → nv_ingest-2025.10.29.dev20251029}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.10.28.dev20251028
3
+ Version: 2025.10.29.dev20251029
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -432,6 +432,76 @@ def _extract_ray_telemetry(result: Dict[str, Any]) -> Tuple[Optional[Dict[str, A
432
432
  return trace_dict, annotations_dict
433
433
 
434
434
 
435
+ def _normalize_chunk_records(
436
+ records: Optional[List[Any]],
437
+ descriptor: Dict[str, Any],
438
+ parent_metadata: Dict[str, Any],
439
+ ) -> List[Any]:
440
+ """Re-map chunk-local metadata to document-level context for aggregation."""
441
+
442
+ if not isinstance(records, list):
443
+ return []
444
+
445
+ total_pages = parent_metadata.get("total_pages")
446
+ original_source_id = parent_metadata.get("original_source_id")
447
+ original_source_name = parent_metadata.get("original_source_name")
448
+
449
+ start_page = descriptor.get("start_page")
450
+ page_offset = start_page - 1 if isinstance(start_page, int) and start_page > 0 else 0
451
+
452
+ normalized_entries: List[Any] = []
453
+
454
+ for entry in records:
455
+ if not isinstance(entry, dict):
456
+ normalized_entries.append(entry)
457
+ continue
458
+
459
+ normalized_entry = entry.copy()
460
+ original_metadata = entry.get("metadata")
461
+
462
+ if isinstance(original_metadata, dict):
463
+ normalized_metadata = original_metadata.copy()
464
+ normalized_entry["metadata"] = normalized_metadata
465
+
466
+ original_source_meta = original_metadata.get("source_metadata")
467
+ if isinstance(original_source_meta, dict):
468
+ normalized_source_meta = original_source_meta.copy()
469
+ normalized_metadata["source_metadata"] = normalized_source_meta
470
+
471
+ if original_source_id:
472
+ normalized_source_meta["source_id"] = original_source_id
473
+ if original_source_name:
474
+ normalized_source_meta["source_name"] = original_source_name
475
+
476
+ original_content_meta = original_metadata.get("content_metadata")
477
+ if isinstance(original_content_meta, dict):
478
+ normalized_content_meta = original_content_meta.copy()
479
+ normalized_metadata["content_metadata"] = normalized_content_meta
480
+
481
+ page_number = normalized_content_meta.get("page_number")
482
+ if isinstance(page_number, int) and page_number >= 0:
483
+ normalized_content_meta["page_number"] = page_number + page_offset
484
+
485
+ if isinstance(total_pages, int) and isinstance(normalized_content_meta.get("page_count"), int):
486
+ # Ensure optional per-record page count reflects the full document
487
+ normalized_content_meta["page_count"] = total_pages
488
+
489
+ original_hierarchy = original_content_meta.get("hierarchy")
490
+ if isinstance(original_hierarchy, dict):
491
+ normalized_hierarchy = original_hierarchy.copy()
492
+ normalized_content_meta["hierarchy"] = normalized_hierarchy
493
+
494
+ hierarchy_page = normalized_hierarchy.get("page")
495
+ if isinstance(hierarchy_page, int) and hierarchy_page >= 0:
496
+ normalized_hierarchy["page"] = hierarchy_page + page_offset
497
+ if isinstance(total_pages, int):
498
+ normalized_hierarchy["page_count"] = total_pages
499
+
500
+ normalized_entries.append(normalized_entry)
501
+
502
+ return normalized_entries
503
+
504
+
435
505
  def _aggregate_parent_traces(chunk_traces: Dict[str, Any]) -> Dict[str, Any]:
436
506
  """
437
507
  Aggregate chunk-level traces into parent-level metrics.
@@ -574,7 +644,8 @@ def _build_aggregated_response(
574
644
  if result is not None:
575
645
  # Add page data to aggregated result
576
646
  if "data" in result:
577
- aggregated_result["data"].extend(result["data"])
647
+ normalized_records = _normalize_chunk_records(result.get("data"), descriptor, metadata)
648
+ aggregated_result["data"].extend(normalized_records)
578
649
  chunk_entry = dict(descriptor)
579
650
  aggregated_result["metadata"]["chunks"].append(chunk_entry)
580
651
 
@@ -18,6 +18,18 @@ from nv_ingest_api.util.message_brokers.simple_message_broker.broker import Simp
18
18
  logger = logging.getLogger(__name__)
19
19
 
20
20
 
21
+ def _broker_server_target(host, port, max_queue_size):
22
+ """
23
+ Target function to be run in a separate process for the SimpleMessageBroker.
24
+ """
25
+ server = SimpleMessageBroker(host, port, max_queue_size)
26
+ try:
27
+ server.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
28
+ except Exception:
29
+ pass
30
+ server.serve_forever()
31
+
32
+
21
33
  def start_simple_message_broker(broker_client: dict) -> multiprocessing.Process:
22
34
  """
23
35
  Starts a SimpleMessageBroker server in a separate process.
@@ -58,16 +70,11 @@ def start_simple_message_broker(broker_client: dict) -> multiprocessing.Process:
58
70
  f"continuing to spawn a broker process (tests expect a Process to be returned)"
59
71
  )
60
72
 
61
- def broker_server():
62
- # Optionally, set socket options here for reuse (note: binding occurs in server __init__).
63
- server = SimpleMessageBroker(server_host, server_port, max_queue_size)
64
- try:
65
- server.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
66
- except Exception:
67
- pass
68
- server.serve_forever()
69
-
70
- p = multiprocessing.Process(target=broker_server)
73
+ p = multiprocessing.Process(
74
+ target=_broker_server_target,
75
+ args=(server_host, server_port, max_queue_size),
76
+ daemon=True,
77
+ )
71
78
  # If we're launching from inside the pipeline subprocess, mark daemon so the
72
79
  # broker dies automatically when the subprocess exits.
73
80
  p.daemon = os.environ.get("NV_INGEST_BROKER_IN_SUBPROCESS") == "1"
@@ -11,9 +11,10 @@ Strategy pattern for clean separation of execution concerns.
11
11
  """
12
12
 
13
13
  import atexit
14
- import os
15
14
  import logging
16
15
  import multiprocessing
16
+ import os
17
+ import sys
17
18
  import time
18
19
  from abc import ABC, abstractmethod
19
20
 
@@ -132,7 +133,10 @@ class SubprocessStrategy(ProcessExecutionStrategy):
132
133
  logger.info("Launching pipeline in Python subprocess using multiprocessing.")
133
134
 
134
135
  # Create subprocess using fork context
135
- ctx = multiprocessing.get_context("fork")
136
+ start_method = "fork"
137
+ if sys.platform.lower() == "darwin":
138
+ start_method = "spawn"
139
+ ctx = multiprocessing.get_context(start_method)
136
140
  process = ctx.Process(
137
141
  target=run_pipeline_process,
138
142
  args=(
@@ -501,21 +501,21 @@ class RedisIngestService(IngestServiceMeta):
501
501
  metadata_key = f"parent:{parent_job_id}:metadata"
502
502
 
503
503
  try:
504
- # Check if this is a parent job
504
+ # Check if this is a parent job (check metadata_key since non-split PDFs may not have parent_key)
505
505
  exists = await self._run_bounded_to_thread(
506
506
  self._ingest_client.get_client().exists,
507
- parent_key,
507
+ metadata_key, # Check metadata instead of parent_key for non-split PDF support
508
508
  )
509
509
 
510
510
  if not exists:
511
511
  return None
512
512
 
513
- # Get subjob IDs
513
+ # Get subjob IDs (may be empty for non-split PDFs)
514
514
  subjob_ids_bytes = await self._run_bounded_to_thread(
515
515
  self._ingest_client.get_client().smembers,
516
516
  parent_key,
517
517
  )
518
- subjob_id_set = {id.decode("utf-8") for id in subjob_ids_bytes}
518
+ subjob_id_set = {id.decode("utf-8") for id in subjob_ids_bytes} if subjob_ids_bytes else set()
519
519
 
520
520
  # Get metadata
521
521
  metadata_dict = await self._run_bounded_to_thread(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.10.28.dev20251028
3
+ Version: 2025.10.29.dev20251029
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License