nv-ingest-api 2025.8.20.dev20250820__tar.gz → 2025.8.22.dev20250822__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (176) hide show
  1. {nv_ingest_api-2025.8.20.dev20250820/src/nv_ingest_api.egg-info → nv_ingest_api-2025.8.22.dev20250822}/PKG-INFO +1 -1
  2. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/interface/__init__.py +14 -11
  3. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +3 -2
  4. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +3 -3
  5. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +2 -2
  6. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +2 -2
  7. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +2 -2
  8. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +2 -2
  9. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +3 -3
  10. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +2 -2
  11. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +2 -2
  12. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +4 -4
  13. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +2 -2
  14. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +1 -1
  15. nv_ingest_api-2025.8.22.dev20250822/src/nv_ingest_api/util/logging/sanitize.py +84 -0
  16. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822/src/nv_ingest_api.egg-info}/PKG-INFO +1 -1
  17. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api.egg-info/SOURCES.txt +1 -0
  18. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/LICENSE +0 -0
  19. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/MANIFEST.in +0 -0
  20. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/README.md +0 -0
  21. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/pyproject.toml +0 -0
  22. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/setup.cfg +0 -0
  23. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/__init__.py +0 -0
  24. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/interface/extract.py +0 -0
  25. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/interface/mutate.py +0 -0
  26. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/interface/store.py +0 -0
  27. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/interface/transform.py +0 -0
  28. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/interface/utility.py +0 -0
  29. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/__init__.py +0 -0
  30. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
  31. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/enums/common.py +0 -0
  32. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
  33. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
  34. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -0
  35. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
  36. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -0
  37. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
  38. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
  39. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
  40. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
  41. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/html/__init__.py +0 -0
  42. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/html/html_extractor.py +0 -0
  43. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
  44. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
  45. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/image/image_extractor.py +0 -0
  46. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
  47. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
  48. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
  49. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
  50. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
  51. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
  52. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
  53. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
  54. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +0 -0
  55. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -0
  56. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
  57. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
  58. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
  59. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
  60. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
  61. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -0
  62. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -0
  63. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/meta/__init__.py +0 -0
  64. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/meta/udf.py +0 -0
  65. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
  66. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
  67. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
  68. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
  69. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
  70. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
  71. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
  72. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
  73. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
  74. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
  75. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
  76. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
  77. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
  78. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -0
  79. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py +0 -0
  80. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -0
  81. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -0
  82. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
  83. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +0 -0
  84. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +0 -0
  85. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
  86. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
  87. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
  88. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
  89. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +0 -0
  90. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
  91. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
  92. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/extract/extract_html_schema.py +0 -0
  93. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
  94. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
  95. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
  96. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
  97. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
  98. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
  99. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +0 -0
  100. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/meta/udf.py +0 -0
  101. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
  102. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
  103. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
  104. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
  105. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
  106. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
  107. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
  108. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
  109. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/store/__init__.py +0 -0
  110. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
  111. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
  112. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
  113. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/transform/caption_image.py +0 -0
  114. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/transform/embed_text.py +0 -0
  115. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/internal/transform/split_text.py +0 -0
  116. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/__init__.py +0 -0
  117. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
  118. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/control_message/validators.py +0 -0
  119. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/converters/__init__.py +0 -0
  120. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
  121. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/converters/containers.py +0 -0
  122. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/converters/datetools.py +0 -0
  123. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/converters/dftools.py +0 -0
  124. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/converters/formats.py +0 -0
  125. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
  126. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
  127. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/detectors/language.py +0 -0
  128. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
  129. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/exception_handlers/converters.py +0 -0
  130. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -0
  131. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
  132. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
  133. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
  134. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
  135. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
  136. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
  137. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
  138. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/image_processing/transforms.py +0 -0
  139. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/imports/__init__.py +0 -0
  140. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/imports/callable_signatures.py +0 -0
  141. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/imports/dynamic_resolvers.py +0 -0
  142. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/introspection/__init__.py +0 -0
  143. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/introspection/class_inspect.py +0 -0
  144. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/introspection/function_inspect.py +0 -0
  145. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/logging/__init__.py +0 -0
  146. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/logging/configuration.py +0 -0
  147. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/message_brokers/__init__.py +0 -0
  148. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
  149. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
  150. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
  151. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -0
  152. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
  153. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
  154. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
  155. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -0
  156. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/nim/__init__.py +0 -0
  157. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
  158. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/pdf/pdfium.py +0 -0
  159. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/schema/__init__.py +0 -0
  160. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
  161. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/service_clients/__init__.py +0 -0
  162. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
  163. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
  164. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
  165. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +0 -0
  166. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
  167. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +0 -0
  168. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
  169. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/string_processing/configuration.py +0 -0
  170. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/string_processing/yaml.py +0 -0
  171. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/system/__init__.py +0 -0
  172. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api/util/system/hardware_info.py +0 -0
  173. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
  174. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api.egg-info/requires.txt +0 -0
  175. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
  176. {nv_ingest_api-2025.8.20.dev20250820 → nv_ingest_api-2025.8.22.dev20250822}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.8.20.dev20250820
3
+ Version: 2025.8.22.dev20250822
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -11,6 +11,7 @@ from typing import Dict, Any, Optional, List
11
11
  from pydantic import BaseModel
12
12
 
13
13
  from nv_ingest_api.internal.schemas.extract.extract_pdf_schema import PDFiumConfigSchema, NemoRetrieverParseConfigSchema
14
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
14
15
 
15
16
  logger = logging.getLogger(__name__)
16
17
 
@@ -180,29 +181,31 @@ def extraction_interface_relay_constructor(api_fn, task_keys: Optional[List[str]
180
181
  if extractor_schema is None:
181
182
  extractor_schema = {f"{extract_method}_config": extraction_config_dict}
182
183
 
183
- # Log the task and extractor configurations for debugging
184
+ # Log the task and extractor configurations for debugging (sanitized)
184
185
  logger.debug("\n" + "=" * 80)
185
186
  logger.debug(f"DEBUG - API Function: {api_fn.__name__}")
186
187
  logger.debug(f"DEBUG - Extract Method: {extract_method}")
187
188
  logger.debug("-" * 80)
188
189
 
189
- # Format the task config as a string and log it
190
- task_config_str = pprint.pformat(task_config, width=100, sort_dicts=False)
191
- logger.debug(f"DEBUG - Task Config:\n{task_config_str}")
190
+ # Sanitize and format the task config as a string and log it
191
+ sanitized_task_config = sanitize_for_logging(task_config)
192
+ task_config_str = pprint.pformat(sanitized_task_config, width=100, sort_dicts=False)
193
+ logger.debug(f"DEBUG - Task Config (sanitized):\n{task_config_str}")
192
194
  logger.debug("-" * 80)
193
195
 
194
- # Format the extractor config as a string and log it
196
+ # Sanitize and format the extractor config as a string and log it
195
197
  if hasattr(extractor_schema, "model_dump"):
196
- extractor_config_str = pprint.pformat(extractor_schema.model_dump(), width=100, sort_dicts=False)
198
+ sanitized_extractor_config = sanitize_for_logging(extractor_schema.model_dump())
197
199
  else:
198
- extractor_config_str = pprint.pformat(extractor_schema, width=100, sort_dicts=False)
200
+ sanitized_extractor_config = sanitize_for_logging(extractor_schema)
201
+ extractor_config_str = pprint.pformat(sanitized_extractor_config, width=100, sort_dicts=False)
199
202
  logger.debug(f"DEBUG - Extractor Config Type: {type(extractor_schema)}")
200
- logger.debug(f"DEBUG - Extractor Config:\n{extractor_config_str}")
203
+ logger.debug(f"DEBUG - Extractor Config (sanitized):\n{extractor_config_str}")
201
204
  logger.debug("=" * 80 + "\n")
202
205
 
203
- # Call the backend API function.
204
- pprint.pprint(task_config)
205
- pprint.pprint(extractor_schema)
206
+ # Call the backend API function. Print sanitized configs for any debug consumers of stdout.
207
+ pprint.pprint(sanitized_task_config)
208
+ pprint.pprint(sanitized_extractor_config)
206
209
  result = api_fn(ledger, task_config, extractor_schema, execution_trace_log)
207
210
 
208
211
  # If the result is a tuple, return only the first element
@@ -11,6 +11,7 @@ from typing import Any
11
11
  from typing import Dict
12
12
  from typing import List
13
13
  from typing import Optional
14
+ from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
14
15
 
15
16
  import pandas as pd
16
17
  from nv_ingest_api.internal.extract.pdf.engines import adobe_extractor
@@ -131,7 +132,7 @@ def _orchestrate_row_extraction(
131
132
  method_config = extractor_config[config_key]
132
133
  else:
133
134
  # If no matching config is found, log a warning but don't fail
134
- logger.warning(f"No {config_key} found in extractor_config: {extractor_config}")
135
+ logger.warning(f"No {config_key} found in extractor_config: {sanitize_for_logging(extractor_config)}")
135
136
  method_config = None
136
137
 
137
138
  # Add the method-specific config to the parameters if available
@@ -141,7 +142,7 @@ def _orchestrate_row_extraction(
141
142
 
142
143
  # The resulting parameters constitute the complete extractor_config
143
144
  extractor_config = params
144
- logger.debug(f"Final extractor_config: {extractor_config}")
145
+ logger.debug(f"Final extractor_config: {sanitize_for_logging(extractor_config)}")
145
146
 
146
147
  result = _work_extract_pdf(
147
148
  pdf_stream=pdf_stream,
@@ -7,7 +7,7 @@ import logging
7
7
  from typing import Optional
8
8
  from typing import Tuple
9
9
 
10
- from pydantic import BaseModel
10
+ from pydantic import BaseModel, Field
11
11
  from pydantic import root_validator
12
12
 
13
13
  logger = logging.getLogger(__name__)
@@ -42,12 +42,12 @@ class AudioConfigSchema(BaseModel):
42
42
  Pydantic config option to forbid extra fields.
43
43
  """
44
44
 
45
- auth_token: Optional[str] = None
45
+ auth_token: Optional[str] = Field(default=None, repr=False)
46
46
  audio_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
47
47
  audio_infer_protocol: Optional[str] = None
48
48
  function_id: Optional[str] = None
49
49
  use_ssl: Optional[bool] = None
50
- ssl_cert: Optional[str] = None
50
+ ssl_cert: Optional[str] = Field(default=None, repr=False)
51
51
  segment_audio: Optional[bool] = None
52
52
 
53
53
  @root_validator(pre=True)
@@ -6,7 +6,7 @@ import logging
6
6
  from typing import Optional
7
7
  from typing import Tuple
8
8
 
9
- from pydantic import field_validator, model_validator, ConfigDict, BaseModel
9
+ from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
10
10
 
11
11
  logger = logging.getLogger(__name__)
12
12
 
@@ -44,7 +44,7 @@ class ChartExtractorConfigSchema(BaseModel):
44
44
  Pydantic config option to forbid extra fields.
45
45
  """
46
46
 
47
- auth_token: Optional[str] = None
47
+ auth_token: Optional[str] = Field(default=None, repr=False)
48
48
 
49
49
  yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
50
50
  yolox_infer_protocol: str = ""
@@ -7,7 +7,7 @@ import logging
7
7
  from typing import Optional
8
8
  from typing import Tuple
9
9
 
10
- from pydantic import model_validator, ConfigDict, BaseModel
10
+ from pydantic import model_validator, ConfigDict, BaseModel, Field
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
@@ -41,7 +41,7 @@ class DocxConfigSchema(BaseModel):
41
41
  Pydantic config option to forbid extra fields.
42
42
  """
43
43
 
44
- auth_token: Optional[str] = None
44
+ auth_token: Optional[str] = Field(default=None, repr=False)
45
45
 
46
46
  yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
47
47
  yolox_infer_protocol: str = ""
@@ -7,7 +7,7 @@ import logging
7
7
  from typing import Optional
8
8
  from typing import Tuple
9
9
 
10
- from pydantic import model_validator, ConfigDict, BaseModel
10
+ from pydantic import model_validator, ConfigDict, BaseModel, Field
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
@@ -41,7 +41,7 @@ class ImageConfigSchema(BaseModel):
41
41
  Pydantic config option to forbid extra fields.
42
42
  """
43
43
 
44
- auth_token: Optional[str] = None
44
+ auth_token: Optional[str] = Field(default=None, repr=False)
45
45
 
46
46
  yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
47
47
  yolox_infer_protocol: str = ""
@@ -6,7 +6,7 @@ import logging
6
6
  from typing import Optional
7
7
  from typing import Tuple
8
8
 
9
- from pydantic import field_validator, model_validator, ConfigDict, BaseModel
9
+ from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
10
10
 
11
11
  logger = logging.getLogger(__name__)
12
12
 
@@ -40,7 +40,7 @@ class InfographicExtractorConfigSchema(BaseModel):
40
40
  Pydantic config option to forbid extra fields.
41
41
  """
42
42
 
43
- auth_token: Optional[str] = None
43
+ auth_token: Optional[str] = Field(default=None, repr=False)
44
44
 
45
45
  ocr_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
46
46
  ocr_infer_protocol: str = ""
@@ -7,7 +7,7 @@ import logging
7
7
  from typing import Optional
8
8
  from typing import Tuple
9
9
 
10
- from pydantic import model_validator, ConfigDict, BaseModel
10
+ from pydantic import model_validator, ConfigDict, BaseModel, Field
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
@@ -41,7 +41,7 @@ class PDFiumConfigSchema(BaseModel):
41
41
  Pydantic config option to forbid extra fields.
42
42
  """
43
43
 
44
- auth_token: Optional[str] = None
44
+ auth_token: Optional[str] = Field(default=None, repr=False)
45
45
 
46
46
  yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
47
47
  yolox_infer_protocol: str = ""
@@ -123,7 +123,7 @@ class NemoRetrieverParseConfigSchema(BaseModel):
123
123
  Pydantic config option to forbid extra fields.
124
124
  """
125
125
 
126
- auth_token: Optional[str] = None
126
+ auth_token: Optional[str] = Field(default=None, repr=False)
127
127
 
128
128
  yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
129
129
  yolox_infer_protocol: str = ""
@@ -7,7 +7,7 @@ import logging
7
7
  from typing import Optional
8
8
  from typing import Tuple
9
9
 
10
- from pydantic import model_validator, ConfigDict, BaseModel
10
+ from pydantic import model_validator, ConfigDict, BaseModel, Field
11
11
 
12
12
  logger = logging.getLogger(__name__)
13
13
 
@@ -41,7 +41,7 @@ class PPTXConfigSchema(BaseModel):
41
41
  Pydantic config option to forbid extra fields.
42
42
  """
43
43
 
44
- auth_token: Optional[str] = None
44
+ auth_token: Optional[str] = Field(default=None, repr=False)
45
45
 
46
46
  yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
47
47
  yolox_infer_protocol: str = ""
@@ -7,7 +7,7 @@ import logging
7
7
  from typing import Optional
8
8
  from typing import Tuple
9
9
 
10
- from pydantic import field_validator, model_validator, ConfigDict, BaseModel
10
+ from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
11
11
 
12
12
 
13
13
  logger = logging.getLogger(__name__)
@@ -42,7 +42,7 @@ class TableExtractorConfigSchema(BaseModel):
42
42
  Pydantic config option to forbid extra fields.
43
43
  """
44
44
 
45
- auth_token: Optional[str] = None
45
+ auth_token: Optional[str] = Field(default=None, repr=False)
46
46
 
47
47
  yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
48
48
  yolox_infer_protocol: str = ""
@@ -73,7 +73,7 @@ class IngestTaskStoreSchema(BaseModelNoExt):
73
73
 
74
74
  # Captioning: All fields are optional and override default parameters.
75
75
  class IngestTaskCaptionSchema(BaseModelNoExt):
76
- api_key: Optional[str] = None
76
+ api_key: Optional[str] = Field(default=None, repr=False)
77
77
  endpoint_url: Optional[str] = None
78
78
  prompt: Optional[str] = None
79
79
  model_name: Optional[str] = None
@@ -105,7 +105,7 @@ class IngestTaskDedupSchema(BaseModelNoExt):
105
105
  class IngestTaskEmbedSchema(BaseModelNoExt):
106
106
  endpoint_url: Optional[str] = None
107
107
  model_name: Optional[str] = None
108
- api_key: Optional[str] = None
108
+ api_key: Optional[str] = Field(default=None, repr=False)
109
109
  filter_errors: bool = False
110
110
  text_elements_modality: Optional[str] = None
111
111
  image_elements_modality: Optional[str] = None
@@ -121,13 +121,13 @@ class IngestTaskVdbUploadSchema(BaseModelNoExt):
121
121
 
122
122
 
123
123
  class IngestTaskAudioExtraction(BaseModelNoExt):
124
- auth_token: Optional[str] = None
124
+ auth_token: Optional[str] = Field(default=None, repr=False)
125
125
  grpc_endpoint: Optional[str] = None
126
126
  http_endpoint: Optional[str] = None
127
127
  infer_protocol: Optional[str] = None
128
128
  function_id: Optional[str] = None
129
129
  use_ssl: Optional[bool] = None
130
- ssl_cert: Optional[str] = None
130
+ ssl_cert: Optional[str] = Field(default=None, repr=False)
131
131
  segment_audio: Optional[bool] = None
132
132
 
133
133
 
@@ -3,11 +3,11 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
 
6
- from pydantic import ConfigDict, BaseModel, model_validator, field_validator
6
+ from pydantic import ConfigDict, BaseModel, model_validator, field_validator, Field
7
7
 
8
8
 
9
9
  class ImageCaptionExtractionSchema(BaseModel):
10
- api_key: str = ""
10
+ api_key: str = Field(default="", repr=False)
11
11
  endpoint_url: str = "https://integrate.api.nvidia.com/v1/chat/completions"
12
12
  prompt: str = "Caption the content of this image:"
13
13
  model_name: str = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
@@ -13,7 +13,7 @@ logger = logging.getLogger(__name__)
13
13
 
14
14
 
15
15
  class TextEmbeddingSchema(BaseModel):
16
- api_key: str = Field(default="")
16
+ api_key: str = Field(default="", repr=False)
17
17
  batch_size: int = Field(default=4)
18
18
  embedding_model: str = Field(default="nvidia/llama-3.2-nv-embedqa-1b-v2")
19
19
  embedding_nim_endpoint: str = Field(default="http://embedding:8000/v1")
@@ -0,0 +1,84 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Any, Mapping, MutableMapping, Sequence, Set
8
+
9
+ try:
10
+ # Pydantic is optional at runtime for this helper; import if available
11
+ from pydantic import BaseModel # type: ignore
12
+ except Exception: # pragma: no cover - pydantic always present in this repo
13
+ BaseModel = None # type: ignore
14
+
15
+
16
+ _DEFAULT_SENSITIVE_KEYS: Set[str] = {
17
+ "access_token",
18
+ "api_key",
19
+ "authorization",
20
+ "auth_token",
21
+ "client_secret",
22
+ "hf_access_token",
23
+ "hugging_face_access_token",
24
+ "password",
25
+ "refresh_token",
26
+ "secret",
27
+ "ssl_cert",
28
+ "x-api-key",
29
+ }
30
+
31
+ _REDACTION = "***REDACTED***"
32
+
33
+
34
+ def _is_mapping(obj: Any) -> bool:
35
+ try:
36
+ return isinstance(obj, Mapping)
37
+ except Exception:
38
+ return False
39
+
40
+
41
+ def _is_sequence(obj: Any) -> bool:
42
+ # Exclude strings/bytes from sequences we want to traverse
43
+ return isinstance(obj, Sequence) and not isinstance(obj, (str, bytes, bytearray))
44
+
45
+
46
+ def sanitize_for_logging(
47
+ data: Any,
48
+ sensitive_keys: Set[str] | None = None,
49
+ redaction: str = _REDACTION,
50
+ ) -> Any:
51
+ """
52
+ Recursively sanitize common secret fields from dicts, lists, tuples, and Pydantic models.
53
+
54
+ - Key comparison is case-insensitive and matches exact keys only.
55
+ - Does not mutate input; returns a sanitized deep copy.
56
+ - For Pydantic BaseModel instances, uses model_dump() before redaction.
57
+ """
58
+ keys = {k.lower() for k in (sensitive_keys or _DEFAULT_SENSITIVE_KEYS)}
59
+
60
+ # Handle Pydantic models without importing pydantic at module import time
61
+ if BaseModel is not None and isinstance(data, BaseModel): # type: ignore[arg-type]
62
+ try:
63
+ return sanitize_for_logging(data.model_dump(), keys, redaction)
64
+ except Exception:
65
+ # Fall through and try generic handling below
66
+ pass
67
+
68
+ # Dict-like
69
+ if _is_mapping(data):
70
+ out: MutableMapping[str, Any] = type(data)() # preserve mapping type where possible
71
+ for k, v in data.items(): # type: ignore[assignment]
72
+ key_lower = str(k).lower()
73
+ if key_lower in keys:
74
+ out[k] = redaction
75
+ else:
76
+ out[k] = sanitize_for_logging(v, keys, redaction)
77
+ return out
78
+
79
+ # List/Tuple/Sequence
80
+ if _is_sequence(data):
81
+ return type(data)(sanitize_for_logging(v, keys, redaction) for v in data)
82
+
83
+ # Fallback: return as-is
84
+ return data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.8.20.dev20250820
3
+ Version: 2025.8.22.dev20250822
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -145,6 +145,7 @@ src/nv_ingest_api/util/introspection/class_inspect.py
145
145
  src/nv_ingest_api/util/introspection/function_inspect.py
146
146
  src/nv_ingest_api/util/logging/__init__.py
147
147
  src/nv_ingest_api/util/logging/configuration.py
148
+ src/nv_ingest_api/util/logging/sanitize.py
148
149
  src/nv_ingest_api/util/message_brokers/__init__.py
149
150
  src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py
150
151
  src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py