nv-ingest 2025.10.22.dev20251022__tar.gz → 2025.10.23.dev20251023__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

Files changed (129) hide show
  1. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/PKG-INFO +1 -1
  2. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v2/ingest.py +58 -0
  3. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/default_pipeline_impl.py +1 -0
  4. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest.egg-info/PKG-INFO +1 -1
  5. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/LICENSE +0 -0
  6. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/MANIFEST.in +0 -0
  7. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/__init__.py +0 -0
  8. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/__init__.py +0 -0
  9. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/main.py +0 -0
  10. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/tracing.py +0 -0
  11. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v1/__init__.py +0 -0
  12. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v1/health.py +0 -0
  13. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v1/ingest.py +0 -0
  14. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v1/metrics.py +0 -0
  15. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v2/README.md +0 -0
  16. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/api/v2/__init__.py +0 -0
  17. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/__init__.py +0 -0
  18. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/__init__.py +0 -0
  19. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/execution/__init__.py +0 -0
  20. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/execution/helpers.py +0 -0
  21. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/execution/options.py +0 -0
  22. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/process/__init__.py +0 -0
  23. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/process/dependent_services.py +0 -0
  24. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/process/execution.py +0 -0
  25. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/process/lifecycle.py +0 -0
  26. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/process/strategies.py +0 -0
  27. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/process/termination.py +0 -0
  28. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
  29. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
  30. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
  31. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
  32. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
  33. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
  34. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
  35. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
  36. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
  37. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
  38. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  39. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
  40. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
  41. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
  42. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
  43. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
  44. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
  45. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
  46. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
  47. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
  48. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
  49. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
  50. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
  51. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
  52. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
  53. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
  54. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
  55. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
  56. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
  57. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
  58. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
  59. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
  60. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
  61. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
  62. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
  63. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
  64. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
  65. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
  66. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
  67. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
  68. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
  69. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
  70. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
  71. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
  72. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
  73. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
  74. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
  75. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
  76. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
  77. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
  78. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
  79. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
  80. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
  81. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
  82. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
  83. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
  84. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
  85. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
  86. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
  87. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
  88. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
  89. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
  90. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
  91. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/__init__.py +0 -0
  92. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
  93. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
  94. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
  95. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
  96. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
  97. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
  98. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
  99. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
  100. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
  101. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
  102. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
  103. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/__init__.py +0 -0
  104. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
  105. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
  106. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/flow_control/udf_intercept.py +0 -0
  107. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/service/__init__.py +0 -0
  108. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
  109. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
  110. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
  111. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
  112. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
  113. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
  114. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
  115. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
  116. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/__init__.py +0 -0
  117. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/config/__init__.py +0 -0
  118. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/config/loaders.py +0 -0
  119. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/config/replica_resolver.py +0 -0
  120. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/default_libmode_pipeline_impl.py +0 -0
  121. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/ingest_pipeline.py +0 -0
  122. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/pipeline/pipeline_schema.py +0 -0
  123. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest/version.py +0 -0
  124. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest.egg-info/SOURCES.txt +0 -0
  125. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest.egg-info/dependency_links.txt +0 -0
  126. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest.egg-info/requires.txt +0 -0
  127. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/nv_ingest.egg-info/top_level.txt +0 -0
  128. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/pyproject.toml +0 -0
  129. {nv_ingest-2025.10.22.dev20251022 → nv_ingest-2025.10.23.dev20251023}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.10.22.dev20251022
3
+ Version: 2025.10.23.dev20251023
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -672,11 +672,15 @@ async def submit_job_v2(
672
672
  original_source_id = source_ids[0] if source_ids else "unknown_source.pdf"
673
673
  original_source_name = source_names[0] if source_names else "unknown_source.pdf"
674
674
 
675
+ # Track page count for all PDFs (used for both splitting logic and metadata)
676
+ pdf_page_count_cache = None
677
+
675
678
  # Check if this is a PDF that needs splitting
676
679
  if document_types and payloads and document_types[0].lower() == "pdf":
677
680
  # Decode the payload to check page count
678
681
  pdf_content = base64.b64decode(payloads[0])
679
682
  page_count = get_pdf_page_count(pdf_content)
683
+ pdf_page_count_cache = page_count # Cache for later use
680
684
  pages_per_chunk = get_pdf_split_page_count(client_override=client_split_page_count)
681
685
 
682
686
  # Split if the document has more pages than our chunk size
@@ -762,6 +766,34 @@ async def submit_job_v2(
762
766
  await ingest_service.submit_job(updated_job_spec, parent_job_id)
763
767
  await ingest_service.set_job_state(parent_job_id, STATE_SUBMITTED)
764
768
 
769
+ # If this was a PDF (even if not split), store page count metadata for tracking
770
+ if pdf_page_count_cache is not None:
771
+ try:
772
+ # Use cached page count from earlier check to avoid re-decoding
773
+ # Store minimal metadata for non-split PDFs (consistent with split PDFs)
774
+ single_pdf_metadata: Dict[str, Any] = {
775
+ "total_pages": pdf_page_count_cache,
776
+ "pages_per_chunk": pdf_page_count_cache, # Single chunk = entire document
777
+ "original_source_id": original_source_id,
778
+ "original_source_name": original_source_name,
779
+ "document_type": document_types[0],
780
+ "subjob_order": [], # No subjobs for non-split PDFs
781
+ }
782
+
783
+ # Store as parent job metadata with empty subjob list for consistency
784
+ await ingest_service.set_parent_job_mapping(
785
+ parent_job_id,
786
+ [], # Empty subjob list
787
+ single_pdf_metadata,
788
+ subjob_descriptors=[],
789
+ )
790
+ logger.debug(
791
+ f"Stored page count metadata for non-split PDF {original_source_name}: {pdf_page_count_cache} pages"
792
+ )
793
+ except Exception as metadata_err:
794
+ # Don't fail the job if metadata storage fails
795
+ logger.warning(f"Failed to store page count metadata for {parent_job_id}: {metadata_err}")
796
+
765
797
  response.headers["x-trace-id"] = trace.format_trace_id(current_trace_id)
766
798
  return parent_job_id
767
799
 
@@ -898,6 +930,32 @@ async def fetch_job_v2(job_id: str, ingest_service: INGEST_SERVICE_T):
898
930
 
899
931
  logger.debug(f"Parent job {job_id} has {len(subjob_ids)} subjobs")
900
932
 
933
+ # Special case: Non-split PDFs have metadata but no subjobs
934
+ # Fetch the result directly and augment with page count metadata
935
+ if len(subjob_ids) == 0:
936
+ logger.debug(f"Job {job_id} is a non-split PDF, fetching result directly")
937
+ try:
938
+ job_response = await ingest_service.fetch_job(job_id)
939
+
940
+ # Augment response with page count metadata
941
+ if isinstance(job_response, dict):
942
+ if "metadata" not in job_response:
943
+ job_response["metadata"] = {}
944
+ job_response["metadata"]["total_pages"] = metadata.get("total_pages")
945
+ job_response["metadata"]["original_source_id"] = metadata.get("original_source_id")
946
+ job_response["metadata"]["original_source_name"] = metadata.get("original_source_name")
947
+
948
+ # Update job state after successful fetch
949
+ await _update_job_state_after_fetch(job_id, ingest_service)
950
+
951
+ return _stream_json_response(job_response)
952
+ except (TimeoutError, RedisError, ConnectionError):
953
+ logger.debug(f"Job {job_id} (non-split PDF) not ready yet")
954
+ raise HTTPException(status_code=202, detail="Job is processing. Retry later.")
955
+ except Exception as e:
956
+ logger.exception(f"Error fetching non-split PDF job {job_id}: {e}")
957
+ raise HTTPException(status_code=500, detail="Internal server error during job fetch.")
958
+
901
959
  # Build ordered descriptors for subjobs
902
960
  stored_descriptors = subjob_info.get("subjob_descriptors") or []
903
961
  descriptor_lookup = {entry.get("job_id"): entry for entry in stored_descriptors if isinstance(entry, dict)}
@@ -318,6 +318,7 @@ stages:
318
318
  config:
319
319
  api_key: $NGC_API_KEY|$NVIDIA_API_KEY
320
320
  model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
321
+ endpoint_url: $VLM_CAPTION_ENDPOINT|"http://vlm:8000/v1/chat/completions"
321
322
  prompt: "Caption the content of this image:"
322
323
  replicas:
323
324
  min_replicas: 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.10.22.dev20251022
3
+ Version: 2025.10.23.dev20251023
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License