nv-ingest 2025.8.20.dev20250820__tar.gz → 2025.8.21.dev20250821__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

Files changed (125) hide show
  1. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/PKG-INFO +1 -1
  2. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +2 -1
  3. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +5 -2
  4. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +2 -1
  5. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +2 -1
  6. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +2 -1
  7. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +5 -2
  8. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +2 -1
  9. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +2 -1
  10. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +2 -1
  11. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +2 -1
  12. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +5 -1
  13. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +2 -1
  14. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +5 -1
  15. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +5 -1
  16. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +4 -3
  17. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest.egg-info/PKG-INFO +1 -1
  18. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/LICENSE +0 -0
  19. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/MANIFEST.in +0 -0
  20. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/__init__.py +0 -0
  21. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/api/__init__.py +0 -0
  22. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/api/main.py +0 -0
  23. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/api/v1/__init__.py +0 -0
  24. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/api/v1/health.py +0 -0
  25. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/api/v1/ingest.py +0 -0
  26. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/api/v1/metrics.py +0 -0
  27. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/__init__.py +0 -0
  28. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/__init__.py +0 -0
  29. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/execution/__init__.py +0 -0
  30. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/execution/helpers.py +0 -0
  31. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/execution/options.py +0 -0
  32. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/process/__init__.py +0 -0
  33. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/process/dependent_services.py +0 -0
  34. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/process/execution.py +0 -0
  35. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/process/lifecycle.py +0 -0
  36. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/process/strategies.py +0 -0
  37. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/process/termination.py +0 -0
  38. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
  39. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
  40. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
  41. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
  42. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
  43. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
  44. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +0 -0
  45. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
  46. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
  47. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
  48. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  49. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
  50. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
  51. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
  52. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
  53. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
  54. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
  55. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
  56. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
  57. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
  58. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
  59. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
  60. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
  61. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
  62. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
  63. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
  64. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
  65. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
  66. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
  67. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
  68. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
  69. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
  70. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
  71. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
  72. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
  73. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
  74. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
  75. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
  76. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
  77. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
  78. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/util/env_config.py +0 -0
  79. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
  80. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
  81. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +0 -0
  82. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
  83. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
  84. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
  85. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
  86. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/schemas/__init__.py +0 -0
  87. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
  88. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
  89. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
  90. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
  91. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
  92. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
  93. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
  94. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
  95. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
  96. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
  97. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
  98. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/util/__init__.py +0 -0
  99. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
  100. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
  101. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/util/flow_control/udf_intercept.py +0 -0
  102. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/util/service/__init__.py +0 -0
  103. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
  104. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
  105. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
  106. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
  107. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
  108. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
  109. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
  110. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
  111. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/pipeline/__init__.py +0 -0
  112. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/pipeline/config/__init__.py +0 -0
  113. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/pipeline/config/loaders.py +0 -0
  114. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/pipeline/config/replica_resolver.py +0 -0
  115. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/pipeline/default_libmode_pipeline_impl.py +0 -0
  116. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/pipeline/default_pipeline_impl.py +0 -0
  117. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/pipeline/ingest_pipeline.py +0 -0
  118. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/pipeline/pipeline_schema.py +0 -0
  119. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest/version.py +0 -0
  120. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest.egg-info/SOURCES.txt +0 -0
  121. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest.egg-info/dependency_links.txt +0 -0
  122. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest.egg-info/requires.txt +0 -0
  123. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/nv_ingest.egg-info/top_level.txt +0 -0
  124. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/pyproject.toml +0 -0
  125. {nv_ingest-2025.8.20.dev20250820 → nv_ingest-2025.8.21.dev20250821}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.8.20.dev20250820
3
+ Version: 2025.8.21.dev20250821
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -17,6 +17,7 @@ from nv_ingest_api.internal.schemas.extract.extract_audio_schema import AudioExt
17
17
  from nv_ingest_api.util.exception_handlers.decorators import (
18
18
  nv_ingest_node_failure_try_except,
19
19
  )
20
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
20
21
 
21
22
  from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
22
23
 
@@ -69,7 +70,7 @@ class AudioExtractorStage(RayActorStage):
69
70
 
70
71
  # Remove the "audio_data_extract" task from the message to obtain task-specific configuration.
71
72
  task_config = remove_task_by_type(control_message, "extract")
72
- self._logger.debug("Extracted task config: %s", task_config)
73
+ self._logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
73
74
 
74
75
  # Perform audio text extraction.
75
76
  new_df, extraction_info = extract_text_from_audio_internal(
@@ -13,8 +13,11 @@ from nv_ingest.framework.util.flow_control import filter_by_task
13
13
  from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
14
14
  from nv_ingest_api.internal.primitives.tracing.tagging import traceable
15
15
  from nv_ingest_api.internal.schemas.extract.extract_chart_schema import ChartExtractorSchema
16
- from nv_ingest_api.util.exception_handlers.decorators import nv_ingest_node_failure_try_except
17
16
  from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
17
+ from nv_ingest_api.util.exception_handlers.decorators import (
18
+ nv_ingest_node_failure_try_except,
19
+ )
20
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
18
21
 
19
22
  logger = logging.getLogger(__name__)
20
23
 
@@ -66,7 +69,7 @@ class ChartExtractorStage(RayActorStage):
66
69
 
67
70
  # Remove the "chart_data_extract" task to obtain task-specific configuration.
68
71
  task_config = remove_task_by_type(control_message, "chart_data_extract")
69
- logger.debug("ChartExtractorStage: Task config extracted: %s", task_config)
72
+ logger.debug("ChartExtractorStage: Task config extracted: %s", sanitize_for_logging(task_config))
70
73
 
71
74
  # Perform chart data extraction.
72
75
  execution_trace_log = {}
@@ -16,6 +16,7 @@ from nv_ingest_api.internal.schemas.extract.extract_docx_schema import DocxExtra
16
16
  from nv_ingest_api.util.exception_handlers.decorators import (
17
17
  nv_ingest_node_failure_try_except,
18
18
  )
19
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
19
20
 
20
21
  from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
21
22
 
@@ -68,7 +69,7 @@ class DocxExtractorStage(RayActorStage):
68
69
 
69
70
  # Remove the "docx-extract" task from the message to obtain task-specific configuration.
70
71
  task_config = remove_task_by_type(control_message, "extract")
71
- self._logger.debug("Extracted task config: %s", task_config)
72
+ self._logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
72
73
 
73
74
  # Perform DOCX content extraction.
74
75
  new_df, extraction_info = extract_primitives_from_docx_internal(
@@ -17,6 +17,7 @@ from nv_ingest_api.internal.schemas.extract.extract_html_schema import HtmlExtra
17
17
  from nv_ingest_api.util.exception_handlers.decorators import (
18
18
  nv_ingest_node_failure_try_except,
19
19
  )
20
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
20
21
 
21
22
  from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
22
23
 
@@ -69,7 +70,7 @@ class HtmlExtractorStage(RayActorStage):
69
70
 
70
71
  # Remove the "html_content_extract" task from the message to obtain task-specific configuration.
71
72
  task_config = remove_task_by_type(control_message, "extract")
72
- self._logger.debug("Extracted task config: %s", task_config)
73
+ self._logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
73
74
 
74
75
  # Perform html content extraction.
75
76
  new_df, extraction_info = extract_markdown_from_html_internal(
@@ -16,6 +16,7 @@ from nv_ingest_api.internal.schemas.extract.extract_image_schema import ImageExt
16
16
  from nv_ingest_api.util.exception_handlers.decorators import (
17
17
  nv_ingest_node_failure_try_except,
18
18
  )
19
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
19
20
 
20
21
  from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
21
22
 
@@ -68,7 +69,7 @@ class ImageExtractorStage(RayActorStage):
68
69
 
69
70
  # Remove the "extract" task from the message to obtain task-specific configuration.
70
71
  task_config = remove_task_by_type(control_message, "extract")
71
- logger.debug("Extracted task config: %s", task_config)
72
+ logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
72
73
 
73
74
  # Perform image primitives extraction.
74
75
  new_df, extraction_info = extract_primitives_from_image_internal(
@@ -15,7 +15,10 @@ from nv_ingest_api.internal.primitives.tracing.tagging import set_trace_timestam
15
15
  from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
16
16
  from nv_ingest.framework.util.flow_control import filter_by_task
17
17
  from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
18
- from nv_ingest_api.util.exception_handlers.decorators import nv_ingest_node_failure_try_except
18
+ from nv_ingest_api.util.exception_handlers.decorators import (
19
+ nv_ingest_node_failure_try_except,
20
+ )
21
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
19
22
 
20
23
  logger = logging.getLogger(__name__)
21
24
 
@@ -87,7 +90,7 @@ class PDFExtractorStage(RayActorStage):
87
90
 
88
91
  # Remove the "extract" task from the message to obtain task-specific configuration.
89
92
  task_config = remove_task_by_type(control_message, "extract")
90
- logger.debug("Extracted task config: %s", task_config)
93
+ logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
91
94
 
92
95
  # Perform PDF extraction.
93
96
  execution_trace_log = {}
@@ -16,6 +16,7 @@ from nv_ingest_api.internal.schemas.extract.extract_table_schema import TableExt
16
16
  from nv_ingest_api.util.exception_handlers.decorators import (
17
17
  nv_ingest_node_failure_try_except,
18
18
  )
19
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
19
20
 
20
21
  logger = logging.getLogger(__name__)
21
22
 
@@ -65,7 +66,7 @@ class TableExtractorStage(RayActorStage):
65
66
 
66
67
  # Remove the "table_data_extract" task to obtain task-specific configuration.
67
68
  task_config = remove_task_by_type(control_message, "table_data_extract")
68
- logger.debug("Extracted task configuration: %s", task_config)
69
+ logger.debug("Extracted task configuration: %s", sanitize_for_logging(task_config))
69
70
 
70
71
  # Perform table data extraction.
71
72
  execution_trace_log = {}
@@ -25,6 +25,7 @@ from nv_ingest_api.util.exception_handlers.decorators import (
25
25
  nv_ingest_node_failure_try_except,
26
26
  )
27
27
  from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
28
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
28
29
 
29
30
  logger = logging.getLogger(__name__)
30
31
 
@@ -42,7 +43,7 @@ class MetadataInjectionStage(RayActorStage):
42
43
  # Call the base initializer to set attributes like self._running.
43
44
  super().__init__(config, stage_name=stage_name)
44
45
  # Additional initialization can be added here if necessary.
45
- self._logger.debug("MetadataInjectionStage initialized with config: %s", config)
46
+ self._logger.debug("MetadataInjectionStage initialized with config: %s", sanitize_for_logging(config))
46
47
 
47
48
  @nv_ingest_node_failure_try_except()
48
49
  @traceable()
@@ -18,6 +18,7 @@ from nv_ingest_api.internal.schemas.mutate.mutate_image_dedup_schema import Imag
18
18
  from nv_ingest_api.util.exception_handlers.decorators import (
19
19
  nv_ingest_node_failure_try_except,
20
20
  )
21
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
21
22
 
22
23
  logger = logging.getLogger(__name__)
23
24
 
@@ -68,7 +69,7 @@ class ImageDedupStage(RayActorStage):
68
69
 
69
70
  # Remove the "dedup" task from the message to obtain task-specific configuration.
70
71
  task_config = remove_task_by_type(control_message, "dedup")
71
- logger.debug("Extracted task config: %s", task_config)
72
+ logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
72
73
 
73
74
  # Perform image deduplication.
74
75
  new_df = deduplicate_images_internal(
@@ -17,6 +17,7 @@ from nv_ingest_api.internal.schemas.transform.transform_image_filter_schema impo
17
17
  from nv_ingest_api.util.exception_handlers.decorators import (
18
18
  nv_ingest_node_failure_try_except,
19
19
  )
20
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
20
21
 
21
22
  logger = logging.getLogger(__name__)
22
23
 
@@ -67,7 +68,7 @@ class ImageFilterStage(RayActorStage):
67
68
 
68
69
  # Remove the "filter" task from the message to obtain task-specific configuration.
69
70
  task_config = remove_task_by_type(control_message, "filter")
70
- logger.debug("Extracted task config: %s", task_config)
71
+ logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
71
72
 
72
73
  task_params: Dict[str, Any] = task_config.get("params", {})
73
74
 
@@ -29,6 +29,7 @@ from nv_ingest_api.internal.schemas.meta.ingest_job_schema import validate_inges
29
29
  # Import clients
30
30
  from nv_ingest_api.util.message_brokers.simple_message_broker.simple_client import SimpleClient
31
31
  from nv_ingest_api.util.service_clients.redis.redis_client import RedisClient
32
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
32
33
 
33
34
  logger = logging.getLogger(__name__)
34
35
 
@@ -104,8 +105,11 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
104
105
  def __init__(self, config: MessageBrokerTaskSourceConfig, stage_name: Optional[str] = None) -> None:
105
106
  super().__init__(config, log_to_stdout=False, stage_name=stage_name)
106
107
  self.config: MessageBrokerTaskSourceConfig # Add a type hint for self.config
108
+
109
+ # Sanitize config before logging to avoid leaking secrets
110
+ _sanitized = sanitize_for_logging(config)
107
111
  self._logger.debug(
108
- "Initializing MessageBrokerTaskSourceStage with config: %s", config.model_dump()
112
+ "Initializing MessageBrokerTaskSourceStage with config: %s", _sanitized
109
113
  ) # Log validated config
110
114
 
111
115
  # Access validated configuration directly via self.config
@@ -16,6 +16,7 @@ from nv_ingest_api.internal.store.embed_text_upload import store_text_embeddings
16
16
  from nv_ingest_api.util.exception_handlers.decorators import (
17
17
  nv_ingest_node_failure_try_except,
18
18
  )
19
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
19
20
 
20
21
  from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
21
22
 
@@ -68,7 +69,7 @@ class EmbeddingStorageStage(RayActorStage):
68
69
 
69
70
  # Remove the "store_embedding" task from the message to obtain task-specific configuration.
70
71
  task_config = remove_task_by_type(control_message, "store_embedding")
71
- logger.debug("Extracted task config: %s", task_config)
72
+ logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
72
73
 
73
74
  # Perform embedding storage.
74
75
  new_df = store_text_embeddings_internal(
@@ -18,6 +18,7 @@ from nv_ingest_api.internal.transform.caption_image import transform_image_creat
18
18
  from nv_ingest_api.util.exception_handlers.decorators import (
19
19
  nv_ingest_node_failure_try_except,
20
20
  )
21
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
21
22
 
22
23
  logger = logging.getLogger(__name__)
23
24
 
@@ -67,7 +68,10 @@ class ImageCaptionTransformStage(RayActorStage):
67
68
 
68
69
  # Remove the "caption" task to obtain task-specific configuration.
69
70
  task_config = remove_task_by_type(control_message, "caption")
70
- logger.debug("ImageCaptionTransformStage: Task configuration extracted: %s", pprint.pformat(task_config))
71
+ logger.debug(
72
+ "ImageCaptionTransformStage: Task configuration extracted: %s",
73
+ pprint.pformat(sanitize_for_logging(task_config)),
74
+ )
71
75
 
72
76
  # Call the caption extraction function.
73
77
  new_df = transform_image_create_vlm_caption_internal(
@@ -15,6 +15,7 @@ from nv_ingest_api.internal.transform.embed_text import transform_create_text_em
15
15
  from nv_ingest_api.util.exception_handlers.decorators import (
16
16
  nv_ingest_node_failure_try_except,
17
17
  )
18
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
18
19
 
19
20
  from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
20
21
 
@@ -62,7 +63,10 @@ class TextEmbeddingTransformStage(RayActorStage):
62
63
 
63
64
  # Remove the "embed" task to obtain task-specific configuration.
64
65
  task_config = remove_task_by_type(control_message, "embed")
65
- self._logger.debug("TextEmbeddingTransformStage: Task configuration extracted: %s", pprint.pformat(task_config))
66
+ self._logger.debug(
67
+ "TextEmbeddingTransformStage: Task configuration extracted: %s",
68
+ pprint.pformat(sanitize_for_logging(task_config)),
69
+ )
66
70
 
67
71
  # Call the text embedding extraction function.
68
72
  new_df, execution_trace_log = transform_create_text_embeddings_internal(
@@ -16,6 +16,7 @@ from nv_ingest_api.internal.transform.split_text import transform_text_split_and
16
16
  from nv_ingest_api.util.exception_handlers.decorators import (
17
17
  nv_ingest_node_failure_try_except,
18
18
  )
19
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
19
20
 
20
21
  from nv_ingest.framework.util.flow_control.udf_intercept import udf_intercept_hook
21
22
 
@@ -36,7 +37,7 @@ class TextSplitterStage(RayActorStage):
36
37
  super().__init__(config, stage_name=stage_name)
37
38
  # Store the validated configuration (assumed to be an instance of TextSplitterSchema)
38
39
  self.validated_config: TextSplitterSchema = config
39
- logger.debug("TextSplitterStage initialized with config: %s", config)
40
+ logger.info("TextSplitterStage initialized with config: %s", sanitize_for_logging(config))
40
41
 
41
42
  @nv_ingest_node_failure_try_except()
42
43
  @traceable()
@@ -63,7 +64,7 @@ class TextSplitterStage(RayActorStage):
63
64
 
64
65
  # Remove the "split" task to obtain task-specific configuration.
65
66
  task_config = remove_task_by_type(message, "split")
66
- logger.debug("Extracted task config: %s", task_config)
67
+ logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
67
68
 
68
69
  # Transform the DataFrame (split text and tokenize).
69
70
  df_updated = transform_text_split_and_tokenize_internal(
@@ -107,7 +108,7 @@ def text_splitter_fn(control_message: IngestControlMessage, stage_config: TextSp
107
108
 
108
109
  # Remove the "split" task to obtain task-specific configuration.
109
110
  task_config = remove_task_by_type(control_message, "split")
110
- logger.debug("Extracted task config: %s", task_config)
111
+ logger.debug("Extracted task config: %s", sanitize_for_logging(task_config))
111
112
 
112
113
  # Transform the DataFrame (split text and tokenize).
113
114
  df_updated = transform_text_split_and_tokenize_internal(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.8.20.dev20250820
3
+ Version: 2025.8.21.dev20250821
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License