nv-ingest 2025.10.14.dev20251014__tar.gz → 2025.10.16.dev20251016__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

Files changed (129) hide show
  1. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/PKG-INFO +1 -1
  2. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v2/ingest.py +30 -4
  3. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/default_libmode_pipeline_impl.py +14 -14
  4. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/default_pipeline_impl.py +11 -11
  5. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest.egg-info/PKG-INFO +1 -1
  6. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/LICENSE +0 -0
  7. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/MANIFEST.in +0 -0
  8. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/__init__.py +0 -0
  9. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/__init__.py +0 -0
  10. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/main.py +0 -0
  11. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/tracing.py +0 -0
  12. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v1/__init__.py +0 -0
  13. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v1/health.py +0 -0
  14. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v1/ingest.py +0 -0
  15. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v1/metrics.py +0 -0
  16. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v2/README.md +0 -0
  17. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/api/v2/__init__.py +0 -0
  18. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/__init__.py +0 -0
  19. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/__init__.py +0 -0
  20. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/execution/__init__.py +0 -0
  21. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/execution/helpers.py +0 -0
  22. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/execution/options.py +0 -0
  23. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/process/__init__.py +0 -0
  24. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/process/dependent_services.py +0 -0
  25. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/process/execution.py +0 -0
  26. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/process/lifecycle.py +0 -0
  27. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/process/strategies.py +0 -0
  28. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/process/termination.py +0 -0
  29. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
  30. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
  31. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
  32. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
  33. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
  34. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
  35. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
  36. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
  37. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
  38. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
  39. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  40. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
  41. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
  42. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
  43. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
  44. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
  45. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
  46. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
  47. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
  48. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
  49. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
  50. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
  51. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
  52. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
  53. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
  54. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
  55. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
  56. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
  57. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
  58. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
  59. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
  60. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
  61. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
  62. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
  63. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
  64. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
  65. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
  66. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
  67. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
  68. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
  69. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
  70. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
  71. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
  72. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
  73. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
  74. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
  75. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
  76. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
  77. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
  78. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
  79. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
  80. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
  81. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
  82. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
  83. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
  84. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
  85. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
  86. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
  87. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
  88. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
  89. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
  90. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
  91. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
  92. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/__init__.py +0 -0
  93. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
  94. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
  95. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
  96. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
  97. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
  98. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
  99. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
  100. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
  101. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
  102. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
  103. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
  104. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/__init__.py +0 -0
  105. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
  106. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
  107. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/flow_control/udf_intercept.py +0 -0
  108. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/service/__init__.py +0 -0
  109. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
  110. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
  111. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
  112. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
  113. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
  114. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
  115. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
  116. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
  117. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/__init__.py +0 -0
  118. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/config/__init__.py +0 -0
  119. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/config/loaders.py +0 -0
  120. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/config/replica_resolver.py +0 -0
  121. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/ingest_pipeline.py +0 -0
  122. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/pipeline/pipeline_schema.py +0 -0
  123. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest/version.py +0 -0
  124. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest.egg-info/SOURCES.txt +0 -0
  125. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest.egg-info/dependency_links.txt +0 -0
  126. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest.egg-info/requires.txt +0 -0
  127. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/nv_ingest.egg-info/top_level.txt +0 -0
  128. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/pyproject.toml +0 -0
  129. {nv_ingest-2025.10.14.dev20251014 → nv_ingest-2025.10.16.dev20251016}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.10.14.dev20251014
3
+ Version: 2025.10.16.dev20251016
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -45,9 +45,30 @@ router = APIRouter()
45
45
  DEFAULT_PDF_SPLIT_PAGE_COUNT = 32
46
46
 
47
47
 
48
- def get_pdf_split_page_count() -> int:
49
- """Resolve the configured page chunk size for PDF splitting."""
48
+ def get_pdf_split_page_count(client_override: Optional[int] = None) -> int:
49
+ """
50
+ Resolve the page chunk size for PDF splitting with client override support.
50
51
 
52
+ Priority: client_override (clamped) > env var > default (32)
53
+ Enforces boundaries: min=1, max=128
54
+ """
55
+ MIN_PAGES = 1
56
+ MAX_PAGES = 128
57
+
58
+ # Client override takes precedence if provided
59
+ if client_override is not None:
60
+ clamped = max(MIN_PAGES, min(client_override, MAX_PAGES))
61
+ if clamped != client_override:
62
+ logger.warning(
63
+ "Client requested split_page_count=%s; clamped to %s (min=%s, max=%s)",
64
+ client_override,
65
+ clamped,
66
+ MIN_PAGES,
67
+ MAX_PAGES,
68
+ )
69
+ return clamped
70
+
71
+ # Fall back to environment variable
51
72
  raw_value = os.environ.get("PDF_SPLIT_PAGE_COUNT")
52
73
  if raw_value is None:
53
74
  return DEFAULT_PDF_SPLIT_PAGE_COUNT
@@ -530,6 +551,10 @@ async def submit_job_v2(
530
551
  # Parse job spec
531
552
  job_spec_dict = json.loads(job_spec.payload)
532
553
 
554
+ # Extract PDF configuration if provided by client
555
+ pdf_config = job_spec_dict.get("pdf_config", {})
556
+ client_split_page_count = pdf_config.get("split_page_count") if pdf_config else None
557
+
533
558
  # Extract document type and payload from the proper structure
534
559
  job_payload = job_spec_dict.get("job_payload", {})
535
560
  document_types = job_payload.get("document_type", [])
@@ -546,12 +571,12 @@ async def submit_job_v2(
546
571
  # Decode the payload to check page count
547
572
  pdf_content = base64.b64decode(payloads[0])
548
573
  page_count = get_pdf_page_count(pdf_content)
549
- pages_per_chunk = get_pdf_split_page_count()
574
+ pages_per_chunk = get_pdf_split_page_count(client_override=client_split_page_count)
550
575
 
551
576
  # Split if the document has more pages than our chunk size
552
577
  if page_count > pages_per_chunk:
553
578
  logger.warning(
554
- "[dev-reload-check] Splitting PDF %s into %s-page chunks (total pages: %s)",
579
+ "Splitting PDF %s into %s-page chunks (total pages: %s)",
555
580
  original_source_name,
556
581
  pages_per_chunk,
557
582
  page_count,
@@ -599,6 +624,7 @@ async def submit_job_v2(
599
624
 
600
625
  parent_metadata: Dict[str, Any] = {
601
626
  "total_pages": page_count,
627
+ "pages_per_chunk": pages_per_chunk,
602
628
  "original_source_id": original_source_id,
603
629
  "original_source_name": original_source_name,
604
630
  "document_type": document_types[0] if document_types else "pdf",
@@ -65,14 +65,14 @@ stages:
65
65
  actor: "nv_ingest.framework.orchestration.ray.stages.extractors.pdf_extractor:PDFExtractorStage"
66
66
  config:
67
67
  pdfium_config:
68
- auth_token: $NGC_API_KEY|""
68
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
69
69
  yolox_endpoints: [
70
70
  $YOLOX_GRPC_ENDPOINT|"",
71
71
  $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
72
72
  ]
73
73
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
74
74
  nemoretriever_parse_config:
75
- auth_token: $NGC_API_KEY|""
75
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
76
76
  nemoretriever_parse_endpoints: [
77
77
  $NEMORETRIEVER_PARSE_GRPC_ENDPOINT|"",
78
78
  $NEMORETRIEVER_PARSE_HTTP_ENDPOINT|"https://integrate.api.nvidia.com/v1/chat/completions"
@@ -106,7 +106,7 @@ stages:
106
106
  ]
107
107
  function_id: $AUDIO_FUNCTION_ID|"1598d209-5e27-4d3c-8079-4751568b1081"
108
108
  audio_infer_protocol: $AUDIO_INFER_PROTOCOL|grpc
109
- auth_token: $NGC_API_KEY|""
109
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
110
110
  replicas:
111
111
  min_replicas: 0
112
112
  max_replicas:
@@ -127,7 +127,7 @@ stages:
127
127
  $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
128
128
  ]
129
129
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
130
- auth_token: $NGC_API_KEY|""
130
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
131
131
  replicas:
132
132
  min_replicas: 0
133
133
  max_replicas:
@@ -148,7 +148,7 @@ stages:
148
148
  $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
149
149
  ]
150
150
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
151
- auth_token: $NGC_API_KEY|""
151
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
152
152
  replicas:
153
153
  min_replicas: 0
154
154
  max_replicas:
@@ -169,7 +169,7 @@ stages:
169
169
  $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
170
170
  ]
171
171
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
172
- auth_token: $NGC_API_KEY|""
172
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
173
173
  replicas:
174
174
  min_replicas: 0
175
175
  max_replicas:
@@ -200,11 +200,11 @@ stages:
200
200
  config:
201
201
  endpoint_config:
202
202
  ocr_endpoints: [
203
- $OCR_GRPC_ENDPOINT|"grpc.nvcf.nvidia.com:443",
204
- $OCR_HTTP_ENDPOINT|""
203
+ $OCR_GRPC_ENDPOINT|"",
204
+ $OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/baidu/paddleocr"
205
205
  ]
206
- ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
207
- auth_token: $NGC_API_KEY|""
206
+ ocr_infer_protocol: $OCR_INFER_PROTOCOL|"http"
207
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
208
208
  replicas:
209
209
  min_replicas: 0
210
210
  max_replicas:
@@ -230,7 +230,7 @@ stages:
230
230
  $OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/baidu/paddleocr"
231
231
  ]
232
232
  ocr_infer_protocol: $PADDLE_INFER_PROTOCOL|"http"
233
- auth_token: $NGC_API_KEY|""
233
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
234
234
  replicas:
235
235
  min_replicas: 0
236
236
  max_replicas:
@@ -257,7 +257,7 @@ stages:
257
257
  $OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/baidu/paddleocr"
258
258
  ]
259
259
  ocr_infer_protocol: $OCR_INFER_PROTOCOL|"http"
260
- auth_token: $NGC_API_KEY|""
260
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
261
261
  replicas:
262
262
  min_replicas: 0
263
263
  max_replicas:
@@ -317,7 +317,7 @@ stages:
317
317
  phase: 4 # TRANSFORM
318
318
  actor: "nv_ingest.framework.orchestration.ray.stages.transforms.image_caption:ImageCaptionTransformStage"
319
319
  config:
320
- api_key: $NGC_API_KEY|""
320
+ api_key: $NGC_API_KEY|$NVIDIA_API_KEY
321
321
  endpoint_url: $VLM_CAPTION_ENDPOINT|"https://integrate.api.nvidia.com/v1/chat/completions"
322
322
  model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
323
323
  prompt: "Caption the content of this image:"
@@ -335,7 +335,7 @@ stages:
335
335
  phase: 4 # TRANSFORM
336
336
  actor: "nv_ingest.framework.orchestration.ray.stages.transforms.text_embed:TextEmbeddingTransformStage"
337
337
  config:
338
- api_key: $NGC_API_KEY|""
338
+ api_key: $NGC_API_KEY|$NVIDIA_API_KEY
339
339
  embedding_model: $EMBEDDING_NIM_MODEL_NAME|"nvidia/llama-3.2-nv-embedqa-1b-v2"
340
340
  embedding_nim_endpoint: $EMBEDDING_NIM_ENDPOINT|"https://integrate.api.nvidia.com/v1"
341
341
  replicas:
@@ -64,14 +64,14 @@ stages:
64
64
  actor: "nv_ingest.framework.orchestration.ray.stages.extractors.pdf_extractor:PDFExtractorStage"
65
65
  config:
66
66
  pdfium_config:
67
- auth_token: $NGC_API_KEY|""
67
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
68
68
  yolox_endpoints: [
69
69
  $YOLOX_GRPC_ENDPOINT|"page-elements:8001",
70
70
  $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
71
71
  ]
72
72
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
73
73
  nemoretriever_parse_config:
74
- auth_token: $NGC_API_KEY|""
74
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
75
75
  nemoretriever_parse_endpoints: [
76
76
  $NEMORETRIEVER_PARSE_GRPC_ENDPOINT|"",
77
77
  $NEMORETRIEVER_PARSE_HTTP_ENDPOINT|"http://nemoretriever-parse:8000/v1/chat/completions",
@@ -105,7 +105,7 @@ stages:
105
105
  ]
106
106
  function_id: $AUDIO_FUNCTION_ID|""
107
107
  audio_infer_protocol: $AUDIO_INFER_PROTOCOL|grpc
108
- auth_token: $NGC_API_KEY|""
108
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
109
109
  replicas:
110
110
  min_replicas: 0
111
111
  max_replicas:
@@ -126,7 +126,7 @@ stages:
126
126
  $YOLOX_HTTP_ENDPOINT|"",
127
127
  ]
128
128
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
129
- auth_token: $NGC_API_KEY|""
129
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
130
130
  replicas:
131
131
  min_replicas: 0
132
132
  max_replicas:
@@ -147,7 +147,7 @@ stages:
147
147
  $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
148
148
  ]
149
149
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
150
- auth_token: $NGC_API_KEY|""
150
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
151
151
  replicas:
152
152
  min_replicas: 0
153
153
  max_replicas:
@@ -168,7 +168,7 @@ stages:
168
168
  $YOLOX_HTTP_ENDPOINT|"http://page-elements:8000/v1/infer",
169
169
  ]
170
170
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|grpc
171
- auth_token: $NGC_API_KEY|""
171
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
172
172
  replicas:
173
173
  min_replicas: 0
174
174
  max_replicas:
@@ -203,7 +203,7 @@ stages:
203
203
  $OCR_HTTP_ENDPOINT|"http://ocr:8000/v1/infer",
204
204
  ]
205
205
  ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
206
- auth_token: $NGC_API_KEY|""
206
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
207
207
  replicas:
208
208
  min_replicas: 0
209
209
  max_replicas:
@@ -229,7 +229,7 @@ stages:
229
229
  $OCR_HTTP_ENDPOINT|"http://ocr:8000/v1/infer",
230
230
  ]
231
231
  ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
232
- auth_token: $NGC_API_KEY|""
232
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
233
233
  replicas:
234
234
  min_replicas: 0
235
235
  max_replicas:
@@ -256,7 +256,7 @@ stages:
256
256
  $OCR_HTTP_ENDPOINT|""
257
257
  ]
258
258
  ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
259
- auth_token: $NGC_API_KEY|""
259
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
260
260
  replicas:
261
261
  min_replicas: 0
262
262
  max_replicas:
@@ -316,7 +316,7 @@ stages:
316
316
  phase: 4 # TRANSFORM
317
317
  actor: "nv_ingest.framework.orchestration.ray.stages.transforms.image_caption:ImageCaptionTransformStage"
318
318
  config:
319
- api_key: $NGC_API_KEY|""
319
+ api_key: $NGC_API_KEY|$NVIDIA_API_KEY
320
320
  model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
321
321
  prompt: "Caption the content of this image:"
322
322
  replicas:
@@ -333,7 +333,7 @@ stages:
333
333
  phase: 4 # TRANSFORM
334
334
  actor: "nv_ingest.framework.orchestration.ray.stages.transforms.text_embed:TextEmbeddingTransformStage"
335
335
  config:
336
- api_key: $NGC_API_KEY|""
336
+ api_key: $NGC_API_KEY|$NVIDIA_API_KEY
337
337
  embedding_model: $EMBEDDING_NIM_MODEL_NAME|"nvidia/llama-3.2-nv-embedqa-1b-v2"
338
338
  embedding_nim_endpoint: $EMBEDDING_NIM_ENDPOINT|"http://embedding:8000/v1"
339
339
  replicas:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.10.14.dev20251014
3
+ Version: 2025.10.16.dev20251016
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License