nv-ingest 2025.10.8.dev20251008__tar.gz → 2025.10.10.dev20251010__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

Files changed (129) hide show
  1. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/PKG-INFO +2 -1
  2. nv_ingest-2025.10.10.dev20251010/nv_ingest/api/__init__.py +9 -0
  3. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/api/main.py +2 -0
  4. nv_ingest-2025.10.10.dev20251010/nv_ingest/api/tracing.py +82 -0
  5. nv_ingest-2025.10.10.dev20251010/nv_ingest/api/v2/README.md +104 -0
  6. nv_ingest-2025.10.10.dev20251010/nv_ingest/api/v2/ingest.py +816 -0
  7. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +192 -10
  8. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest.egg-info/PKG-INFO +2 -1
  9. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest.egg-info/SOURCES.txt +4 -0
  10. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest.egg-info/requires.txt +1 -0
  11. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/pyproject.toml +1 -0
  12. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/LICENSE +0 -0
  13. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/MANIFEST.in +0 -0
  14. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/__init__.py +0 -0
  15. {nv_ingest-2025.10.8.dev20251008/nv_ingest/api → nv_ingest-2025.10.10.dev20251010/nv_ingest/api/v1}/__init__.py +0 -0
  16. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/api/v1/health.py +0 -0
  17. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/api/v1/ingest.py +0 -0
  18. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/api/v1/metrics.py +0 -0
  19. {nv_ingest-2025.10.8.dev20251008/nv_ingest/api/v1 → nv_ingest-2025.10.10.dev20251010/nv_ingest/api/v2}/__init__.py +0 -0
  20. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/__init__.py +0 -0
  21. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/__init__.py +0 -0
  22. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/execution/__init__.py +0 -0
  23. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/execution/helpers.py +0 -0
  24. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/execution/options.py +0 -0
  25. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/process/__init__.py +0 -0
  26. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/process/dependent_services.py +0 -0
  27. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/process/execution.py +0 -0
  28. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/process/lifecycle.py +0 -0
  29. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/process/strategies.py +0 -0
  30. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/process/termination.py +0 -0
  31. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
  32. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
  33. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
  34. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
  35. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
  36. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
  37. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
  38. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
  39. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
  40. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
  41. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  42. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
  43. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
  44. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
  45. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
  46. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
  47. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
  48. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
  49. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
  50. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
  51. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
  52. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
  53. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
  54. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
  55. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
  56. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
  57. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
  58. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
  59. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
  60. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
  61. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
  62. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
  63. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
  64. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
  65. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
  66. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
  67. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
  68. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
  69. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
  70. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
  71. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
  72. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
  73. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
  74. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
  75. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
  76. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
  77. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
  78. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
  79. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
  80. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
  81. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
  82. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
  83. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
  84. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
  85. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
  86. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
  87. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
  88. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
  89. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
  90. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
  91. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
  92. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
  93. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
  94. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/schemas/__init__.py +0 -0
  95. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
  96. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
  97. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
  98. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
  99. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
  100. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
  101. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
  102. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
  103. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
  104. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
  105. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
  106. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/util/__init__.py +0 -0
  107. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
  108. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
  109. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/util/flow_control/udf_intercept.py +0 -0
  110. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/util/service/__init__.py +0 -0
  111. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
  112. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
  113. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
  114. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
  115. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
  116. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
  117. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
  118. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/pipeline/__init__.py +0 -0
  119. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/pipeline/config/__init__.py +0 -0
  120. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/pipeline/config/loaders.py +0 -0
  121. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/pipeline/config/replica_resolver.py +0 -0
  122. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/pipeline/default_libmode_pipeline_impl.py +0 -0
  123. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/pipeline/default_pipeline_impl.py +0 -0
  124. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/pipeline/ingest_pipeline.py +0 -0
  125. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/pipeline/pipeline_schema.py +0 -0
  126. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest/version.py +0 -0
  127. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest.egg-info/dependency_links.txt +0 -0
  128. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/nv_ingest.egg-info/top_level.txt +0 -0
  129. {nv_ingest-2025.10.8.dev20251008 → nv_ingest-2025.10.10.dev20251010}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.10.8.dev20251008
3
+ Version: 2025.10.10.dev20251010
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -230,6 +230,7 @@ Requires-Dist: openai>=1.82.0
230
230
  Requires-Dist: opentelemetry-api>=1.27.0
231
231
  Requires-Dist: opentelemetry-exporter-otlp>=1.27.0
232
232
  Requires-Dist: opentelemetry-sdk>=1.27.0
233
+ Requires-Dist: psutil>=7.1.0
233
234
  Requires-Dist: pydantic>2.0.0
234
235
  Requires-Dist: pydantic-settings>2.0.0
235
236
  Requires-Dist: pypdfium2==4.30.0
@@ -0,0 +1,9 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ """nv_ingest.api package."""
6
+
7
+ from .tracing import traced_endpoint # re-export for convenience
8
+
9
+ __all__ = ["traced_endpoint"]
@@ -15,6 +15,7 @@ from opentelemetry.sdk.trace.export import BatchSpanProcessor
15
15
  from .v1.health import router as HealthApiRouter
16
16
  from .v1.ingest import router as IngestApiRouter
17
17
  from .v1.metrics import router as MetricsApiRouter
18
+ from .v2.ingest import router as IngestApiRouterV2
18
19
 
19
20
  logger = logging.getLogger(__name__)
20
21
 
@@ -33,6 +34,7 @@ app = FastAPI(
33
34
  app.include_router(IngestApiRouter, prefix="/v1")
34
35
  app.include_router(HealthApiRouter, prefix="/v1/health")
35
36
  app.include_router(MetricsApiRouter, prefix="/v1")
37
+ app.include_router(IngestApiRouterV2, prefix="/v2")
36
38
 
37
39
  # Set up the tracer provider and add a processor for exporting traces
38
40
  resource = Resource(attributes={"service.name": "nv-ingest"})
@@ -0,0 +1,82 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ """HTTP endpoint tracing utilities."""
6
+
7
+ from functools import wraps
8
+ from inspect import iscoroutinefunction
9
+ from typing import Any, Callable, Optional, TypeVar
10
+
11
+ from fastapi import Request, Response
12
+ from opentelemetry import trace
13
+
14
+ F = TypeVar("F", bound=Callable[..., Any])
15
+
16
+ tracer = trace.get_tracer(__name__)
17
+
18
+
19
+ def traced_endpoint(name: Optional[str] = None) -> Callable[[F], F]:
20
+ """Wrap a FastAPI endpoint with a span whose name defaults to the function name.
21
+
22
+ The decorator preserves the wrapped callable's signature so FastAPI can continue
23
+ to perform dependency injection and generate OpenAPI documentation correctly.
24
+ """
25
+
26
+ def decorator(func: F) -> F:
27
+ span_name = name or func.__name__
28
+
29
+ if iscoroutinefunction(func):
30
+
31
+ @wraps(func)
32
+ async def async_wrapper(*args: Any, **kwargs: Any) -> Any:
33
+ with tracer.start_as_current_span(span_name) as span:
34
+ span.set_attribute("nv_ingest.endpoint", func.__qualname__)
35
+ _record_http_request(span, args, kwargs)
36
+ response = await func(*args, **kwargs)
37
+ _record_http_response(span, response)
38
+ return response
39
+
40
+ return async_wrapper # type: ignore[return-value]
41
+
42
+ @wraps(func)
43
+ def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
44
+ with tracer.start_as_current_span(span_name) as span:
45
+ span.set_attribute("nv_ingest.endpoint", func.__qualname__)
46
+ _record_http_request(span, args, kwargs)
47
+ result = func(*args, **kwargs)
48
+ _record_http_response(span, result)
49
+ return result
50
+
51
+ return sync_wrapper # type: ignore[return-value]
52
+
53
+ return decorator
54
+
55
+
56
+ def _record_http_request(span, args: tuple[Any, ...], kwargs: dict[str, Any]) -> None:
57
+ request = _find_type(Request, args, kwargs)
58
+ if request is None:
59
+ return
60
+ span.set_attribute("http.method", request.method)
61
+ span.set_attribute("http.url", str(request.url))
62
+
63
+
64
+ def _record_http_response(span, response: Any) -> None:
65
+ maybe_response = response if isinstance(response, Response) else None
66
+ if maybe_response is None:
67
+ maybe_response = _find_type(Response, (response,), {})
68
+ if maybe_response is None:
69
+ return
70
+ span.set_attribute("http.status_code", maybe_response.status_code)
71
+
72
+
73
+ def _find_type(expected_type: type, args: tuple[Any, ...], kwargs: dict[str, Any]) -> Optional[Any]:
74
+ """Return the first argument matching ``expected_type`` from args or kwargs."""
75
+
76
+ for arg in args:
77
+ if isinstance(arg, expected_type):
78
+ return arg
79
+ for value in kwargs.values():
80
+ if isinstance(value, expected_type):
81
+ return value
82
+ return None
@@ -0,0 +1,104 @@
1
+ # NV-Ingest V2 API
2
+
3
+ ## Overview
4
+
5
+ The V2 API introduces automatic PDF splitting at the REST layer to improve processing throughput. When a multi-page PDF is submitted, it's automatically split into configurable multi-page chunks (default 32 pages) before being sent to the Redis service that then communicates with our Ray processing backend.
6
+
7
+ ## Key Changes from V1
8
+
9
+ 1. **Automatic PDF Splitting**: PDFs over the configured `PDF_SPLIT_PAGE_COUNT` are automatically split into multi-page chunks
10
+ 2. **Parent-Child Job Tracking**: Parent jobs maintain relationships with their subjobs via Redis
11
+ 3. **Transparent Aggregation**: Results are automatically aggregated when fetching parent jobs
12
+ 4. **Backward Compatible**: PDFs with page counts ≤ `PDF_SPLIT_PAGE_COUNT` behave identical to V1
13
+
14
+ ## Tracing & Aggregated Metadata
15
+
16
+ - V2 endpoints open an OpenTelemetry span using the shared `traced_endpoint` decorator. The span name defaults to the function name, or can be overridden when applying the decorator.
17
+ - `submit_job_v2` records the parent span's `trace_id` into each subjob's `tracing_options`, enabling downstream Ray stages (e.g., the message broker sink) to attach chunk-level telemetry consistently.
18
+ - Response headers still return `x-trace-id` derived from the active span context, allowing clients to correlate downstream work.
19
+ - When `/v2/fetch_job/{parent_id}` aggregates completed chunks, it captures any `trace` / `annotations` dictionaries emitted by the sink for each subjob and includes them in the response payload (see "Aggregated response" below).
20
+
21
+ This behaviour matches the V1 tracing model and sets the foundation for adding W3C `traceparent` propagation in future changes.
22
+
23
+ ## How It Works
24
+
25
+ 1. **Submit**: When a PDF with pages exceeding `PDF_SPLIT_PAGE_COUNT` is submitted to `/v2/submit_job`:
26
+ - The PDF is split into page chunks (size determined by `PDF_SPLIT_PAGE_COUNT`)
27
+ - Each chunk becomes a subjob with deterministic IDs derived from the parent
28
+ - Source IDs are modified to maintain association: `document.pdf#page_1`
29
+ - Parent-child mapping is stored in Redis
30
+
31
+ 2. **Processing**: Each subjob is processed independently by Ray, appearing as chunk-sized PDFs that honor the configured `PDF_SPLIT_PAGE_COUNT`
32
+
33
+ 3. **Fetch**: When fetching the parent job via `/v2/fetch_job/{parent_id}`:
34
+ - Subjob states and results are retrieved concurrently (bounded by the Redis connection pool)
35
+ - If all complete, results are aggregated in original page order
36
+ - Pending work returns 202 (processing)
37
+ - Failed chunks are noted without failing the entire job; metadata records which chunks failed
38
+
39
+ ### Aggregated response
40
+
41
+ The fetch endpoint returns a JSON body shaped like the following:
42
+
43
+ ```
44
+ {
45
+ "data": [...],
46
+ "status": "success",
47
+ "metadata": {
48
+ "parent_job_id": "<uuid>",
49
+ "total_pages": 320,
50
+ "pages_per_chunk": 32,
51
+ "original_source_id": "document.pdf",
52
+ "subjob_ids": ["...", "..."],
53
+ "subjobs_failed": 0,
54
+ "failed_subjobs": [],
55
+ "chunks": [
56
+ {
57
+ "job_id": "...",
58
+ "chunk_index": 1,
59
+ "start_page": 1,
60
+ "end_page": 32,
61
+ "page_count": 32
62
+ }
63
+ // ... additional chunks ...
64
+ ],
65
+ "trace_segments": [
66
+ {
67
+ "job_id": "...",
68
+ "chunk_index": 1,
69
+ "start_page": 1,
70
+ "end_page": 32,
71
+ "trace": {"trace::sink_push": 1.7285796e+18, ...}
72
+ }
73
+ // ...
74
+ ],
75
+ "annotation_segments": [
76
+ {
77
+ "job_id": "...",
78
+ "chunk_index": 1,
79
+ "start_page": 1,
80
+ "end_page": 32,
81
+ "annotations": {"annotation::stage": "sink", ...}
82
+ }
83
+ // ...
84
+ ]
85
+ }
86
+ }
87
+ ```
88
+
89
+ - `trace_segments` and `annotation_segments` appear only when the sink emits telemetry for a given chunk.
90
+ - Clients can correlate chunk data by matching `job_id` or `chunk_index` across `chunks`, `trace_segments`, and `annotation_segments`.
91
+ - Failed chunk entries remain in `failed_subjobs`; if a chunk is missing from the telemetry arrays, the sink did not emit trace/annotation payloads for that chunk.
92
+
93
+ ## Testing
94
+
95
+ Use the V2 test script with environment variable:
96
+ ```bash
97
+ # Run with V2 endpoints
98
+ DATASET_DIR=/data/splits python scripts/tests/cases/dc20_v2_e2e.py
99
+ ```
100
+
101
+ Or set the API version for any existing code:
102
+ ```bash
103
+ export NV_INGEST_API_VERSION=v2
104
+ ```