nv-ingest 2025.12.2.dev20251202__tar.gz → 2025.12.4.dev20251204__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/PKG-INFO +3 -1
  2. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +72 -6
  3. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/pipeline/default_pipeline_impl.py +3 -0
  4. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest.egg-info/PKG-INFO +3 -1
  5. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest.egg-info/requires.txt +2 -0
  6. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/pyproject.toml +2 -0
  7. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/LICENSE +0 -0
  8. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/MANIFEST.in +0 -0
  9. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/__init__.py +0 -0
  10. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/api/__init__.py +0 -0
  11. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/api/main.py +0 -0
  12. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/api/tracing.py +0 -0
  13. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/api/v1/__init__.py +0 -0
  14. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/api/v1/health.py +0 -0
  15. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/api/v1/ingest.py +0 -0
  16. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/api/v1/metrics.py +0 -0
  17. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/api/v2/README.md +0 -0
  18. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/api/v2/__init__.py +0 -0
  19. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/api/v2/ingest.py +0 -0
  20. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/__init__.py +0 -0
  21. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/__init__.py +0 -0
  22. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/execution/__init__.py +0 -0
  23. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/execution/helpers.py +0 -0
  24. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/execution/options.py +0 -0
  25. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/process/__init__.py +0 -0
  26. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/process/dependent_services.py +0 -0
  27. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/process/execution.py +0 -0
  28. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/process/lifecycle.py +0 -0
  29. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/process/strategies.py +0 -0
  30. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/process/termination.py +0 -0
  31. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
  32. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
  33. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
  34. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
  35. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
  36. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
  37. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
  38. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
  39. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
  40. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
  41. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  42. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
  43. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
  44. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
  45. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
  46. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
  47. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
  48. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
  49. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
  50. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
  51. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
  52. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
  53. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
  54. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/extractors/ocr_extractor.py +0 -0
  55. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
  56. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
  57. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
  58. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
  59. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
  60. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
  61. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
  62. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
  63. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
  64. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
  65. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
  66. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
  67. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
  68. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
  69. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
  70. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
  71. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
  72. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +0 -0
  73. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
  74. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
  75. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
  76. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
  77. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
  78. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
  79. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
  80. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
  81. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
  82. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
  83. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
  84. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
  85. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
  86. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
  87. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
  88. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
  89. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
  90. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
  91. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
  92. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
  93. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
  94. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/schemas/__init__.py +0 -0
  95. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
  96. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
  97. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
  98. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
  99. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
  100. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
  101. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
  102. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
  103. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
  104. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
  105. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
  106. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/util/__init__.py +0 -0
  107. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
  108. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
  109. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/util/flow_control/udf_intercept.py +0 -0
  110. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/util/service/__init__.py +0 -0
  111. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
  112. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
  113. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
  114. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
  115. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
  116. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
  117. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
  118. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
  119. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/pipeline/__init__.py +0 -0
  120. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/pipeline/config/__init__.py +0 -0
  121. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/pipeline/config/loaders.py +0 -0
  122. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/pipeline/config/replica_resolver.py +0 -0
  123. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/pipeline/default_libmode_pipeline_impl.py +0 -0
  124. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/pipeline/ingest_pipeline.py +0 -0
  125. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/pipeline/pipeline_schema.py +0 -0
  126. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest/version.py +0 -0
  127. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest.egg-info/SOURCES.txt +0 -0
  128. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest.egg-info/dependency_links.txt +0 -0
  129. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/nv_ingest.egg-info/top_level.txt +0 -0
  130. {nv_ingest-2025.12.2.dev20251202 → nv_ingest-2025.12.4.dev20251204}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.12.2.dev20251202
3
+ Version: 2025.12.4.dev20251204
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -219,6 +219,8 @@ Requires-Dist: diskcache>=5.6.3
219
219
  Requires-Dist: fastapi>=0.115.6
220
220
  Requires-Dist: fastparquet>=2024.11.0
221
221
  Requires-Dist: fsspec>=2024.10.0
222
+ Requires-Dist: universal_pathlib>=0.2.6
223
+ Requires-Dist: s3fs>=2024.10.0
222
224
  Requires-Dist: gunicorn
223
225
  Requires-Dist: h11>=0.16.0
224
226
  Requires-Dist: httpx>=0.28.1
@@ -3,7 +3,9 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  import logging
6
+ import os
6
7
  from typing import Dict, Any, Optional
8
+ from urllib.parse import urlparse
7
9
 
8
10
  import pandas as pd
9
11
  import ray
@@ -26,7 +28,8 @@ logger = logging.getLogger(__name__)
26
28
  @ray.remote
27
29
  class ImageStorageStage(RayActorStage):
28
30
  """
29
- A Ray actor stage that stores images or structured content in MinIO and updates metadata with storage URLs.
31
+ A Ray actor stage that stores images or structured content using an fsspec-compatible backend and updates
32
+ metadata with storage URLs.
30
33
 
31
34
  This stage uses the validated configuration (ImageStorageModuleSchema) to process and store the DataFrame
32
35
  payload and updates the control message accordingly.
@@ -69,8 +72,16 @@ class ImageStorageStage(RayActorStage):
69
72
  task_config = remove_task_by_type(control_message, "store")
70
73
  # logger.debug("ImageStorageStage: Task configuration extracted: %s", pprint.pformat(task_config))
71
74
 
72
- store_structured: bool = task_config.get("structured", True)
73
- store_unstructured: bool = task_config.get("images", False)
75
+ stage_defaults = {
76
+ "structured": self.validated_config.structured,
77
+ "images": self.validated_config.images,
78
+ "storage_uri": self.validated_config.storage_uri,
79
+ "storage_options": self.validated_config.storage_options,
80
+ "public_base_url": self.validated_config.public_base_url,
81
+ }
82
+
83
+ store_structured: bool = task_config.get("structured", stage_defaults["structured"])
84
+ store_unstructured: bool = task_config.get("images", stage_defaults["images"])
74
85
 
75
86
  content_types: Dict[Any, Any] = {}
76
87
  if store_structured:
@@ -80,14 +91,34 @@ class ImageStorageStage(RayActorStage):
80
91
  content_types[ContentTypeEnum.IMAGE] = store_unstructured
81
92
 
82
93
  params: Dict[str, Any] = task_config.get("params", {})
83
- params["content_types"] = content_types
84
94
 
85
- logger.debug(f"Processing storage task with parameters: {params}")
95
+ storage_uri = task_config.get("storage_uri") or params.get("storage_uri") or stage_defaults["storage_uri"]
96
+ storage_options = {
97
+ **(stage_defaults["storage_options"] or {}),
98
+ **(task_config.get("storage_options") or {}),
99
+ **params.get("storage_options", {}),
100
+ }
101
+ if "public_base_url" in task_config:
102
+ public_base_url = task_config["public_base_url"]
103
+ else:
104
+ public_base_url = params.get("public_base_url", stage_defaults["public_base_url"])
105
+
106
+ storage_options = self._inject_storage_defaults(storage_uri, storage_options)
107
+
108
+ storage_params: Dict[str, Any] = {
109
+ "content_types": content_types,
110
+ "storage_uri": storage_uri,
111
+ "storage_options": storage_options,
112
+ }
113
+ if public_base_url:
114
+ storage_params["public_base_url"] = public_base_url
115
+
116
+ logger.debug("Processing storage task with parameters: %s", storage_params)
86
117
 
87
118
  # Store images or structured content.
88
119
  df_storage_ledger: pd.DataFrame = store_images_to_minio_internal(
89
120
  df_storage_ledger=df_payload,
90
- task_config=params,
121
+ task_config=storage_params,
91
122
  storage_config={},
92
123
  execution_trace_log=None,
93
124
  )
@@ -98,3 +129,38 @@ class ImageStorageStage(RayActorStage):
98
129
  control_message.payload(df_storage_ledger)
99
130
 
100
131
  return control_message
132
+
133
+ @staticmethod
134
+ def _inject_storage_defaults(storage_uri: str, storage_options: Dict[str, Any]) -> Dict[str, Any]:
135
+ """
136
+ Populate storage options for common backends (e.g., MinIO/S3) using environment defaults.
137
+ """
138
+ parsed_scheme = urlparse(storage_uri).scheme.lower()
139
+ merged_options: Dict[str, Any] = {k: v for k, v in storage_options.items() if v is not None}
140
+
141
+ if parsed_scheme not in {"s3", "s3a", "s3n"}:
142
+ return merged_options
143
+
144
+ def _set_if_absent(key: str, env_var: str) -> None:
145
+ if key not in merged_options and env_var in os.environ:
146
+ merged_options[key] = os.environ[env_var]
147
+
148
+ _set_if_absent("key", "MINIO_ACCESS_KEY")
149
+ _set_if_absent("secret", "MINIO_SECRET_KEY")
150
+ if "token" not in merged_options and os.environ.get("MINIO_SESSION_TOKEN"):
151
+ merged_options["token"] = os.environ["MINIO_SESSION_TOKEN"]
152
+
153
+ client_kwargs = dict(merged_options.get("client_kwargs", {}))
154
+ endpoint = os.environ.get("MINIO_INTERNAL_ADDRESS")
155
+ if not endpoint:
156
+ endpoint = "http://minio:9000"
157
+ if endpoint and not endpoint.startswith(("http://", "https://")):
158
+ endpoint = f"http://{endpoint}"
159
+ client_kwargs.setdefault("endpoint_url", endpoint)
160
+ region = os.environ.get("MINIO_REGION")
161
+ if region:
162
+ client_kwargs.setdefault("region_name", region)
163
+ if client_kwargs:
164
+ merged_options["client_kwargs"] = client_kwargs
165
+
166
+ return merged_options
@@ -372,6 +372,9 @@ stages:
372
372
  type: "stage"
373
373
  phase: 5 # RESPONSE
374
374
  actor: "nv_ingest.framework.orchestration.ray.stages.storage.image_storage:ImageStorageStage"
375
+ config:
376
+ storage_uri: $IMAGE_STORAGE_URI|"s3://nv-ingest/artifacts/store/images"
377
+ public_base_url: $IMAGE_STORAGE_PUBLIC_BASE_URL|""
375
378
  replicas:
376
379
  min_replicas: 0
377
380
  max_replicas:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.12.2.dev20251202
3
+ Version: 2025.12.4.dev20251204
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -219,6 +219,8 @@ Requires-Dist: diskcache>=5.6.3
219
219
  Requires-Dist: fastapi>=0.115.6
220
220
  Requires-Dist: fastparquet>=2024.11.0
221
221
  Requires-Dist: fsspec>=2024.10.0
222
+ Requires-Dist: universal_pathlib>=0.2.6
223
+ Requires-Dist: s3fs>=2024.10.0
222
224
  Requires-Dist: gunicorn
223
225
  Requires-Dist: h11>=0.16.0
224
226
  Requires-Dist: httpx>=0.28.1
@@ -4,6 +4,8 @@ diskcache>=5.6.3
4
4
  fastapi>=0.115.6
5
5
  fastparquet>=2024.11.0
6
6
  fsspec>=2024.10.0
7
+ universal_pathlib>=0.2.6
8
+ s3fs>=2024.10.0
7
9
  gunicorn
8
10
  h11>=0.16.0
9
11
  httpx>=0.28.1
@@ -26,6 +26,8 @@ dependencies = [
26
26
  "fastapi>=0.115.6",
27
27
  "fastparquet>=2024.11.0",
28
28
  "fsspec>=2024.10.0",
29
+ "universal_pathlib>=0.2.6",
30
+ "s3fs>=2024.10.0",
29
31
  "gunicorn",
30
32
  "h11>=0.16.0", # Must pin at or above 0.16.0 for CVE mitigation
31
33
  "httpx>=0.28.1",