nv-ingest-api 2025.6.24.dev20250625__tar.gz → 2025.7.8.dev20250708__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (167) hide show
  1. {nv_ingest_api-2025.6.24.dev20250625/src/nv_ingest_api.egg-info → nv_ingest_api-2025.7.8.dev20250708}/PKG-INFO +1 -1
  2. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/interface/utility.py +2 -16
  3. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +10 -0
  4. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +2 -2
  5. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/transform/embed_text.py +1 -1
  6. nv_ingest_api-2025.7.8.dev20250708/src/nv_ingest_api/util/imports/callable_signatures.py +50 -0
  7. nv_ingest_api-2025.7.8.dev20250708/src/nv_ingest_api/util/imports/dynamic_resolvers.py +110 -0
  8. nv_ingest_api-2025.7.8.dev20250708/src/nv_ingest_api/util/service_clients/redis/__init__.py +3 -0
  9. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708/src/nv_ingest_api.egg-info}/PKG-INFO +1 -1
  10. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api.egg-info/SOURCES.txt +3 -0
  11. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/LICENSE +0 -0
  12. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/MANIFEST.in +0 -0
  13. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/README.md +0 -0
  14. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/pyproject.toml +0 -0
  15. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/setup.cfg +0 -0
  16. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/__init__.py +0 -0
  17. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/interface/__init__.py +0 -0
  18. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/interface/extract.py +0 -0
  19. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/interface/mutate.py +0 -0
  20. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/interface/store.py +0 -0
  21. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/interface/transform.py +0 -0
  22. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/__init__.py +0 -0
  23. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
  24. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/enums/common.py +0 -0
  25. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
  26. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
  27. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -0
  28. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
  29. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -0
  30. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
  31. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
  32. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
  33. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
  34. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/html/__init__.py +0 -0
  35. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/html/html_extractor.py +0 -0
  36. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
  37. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
  38. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/image/image_extractor.py +0 -0
  39. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
  40. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
  41. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
  42. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
  43. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
  44. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
  45. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
  46. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
  47. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +0 -0
  48. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -0
  49. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -0
  50. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
  51. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
  52. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
  53. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
  54. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
  55. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -0
  56. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -0
  57. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
  58. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
  59. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
  60. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
  61. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
  62. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
  63. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
  64. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
  65. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
  66. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
  67. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
  68. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
  69. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
  70. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -0
  71. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/model_interface/paddle.py +0 -0
  72. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -0
  73. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -0
  74. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
  75. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +0 -0
  76. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +0 -0
  77. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
  78. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
  79. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
  80. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
  81. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +0 -0
  82. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
  83. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
  84. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +0 -0
  85. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +0 -0
  86. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -0
  87. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/extract/extract_html_schema.py +0 -0
  88. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -0
  89. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +0 -0
  90. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +0 -0
  91. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -0
  92. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +0 -0
  93. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
  94. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
  95. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
  96. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
  97. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
  98. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
  99. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +0 -0
  100. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
  101. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
  102. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
  103. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
  104. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
  105. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
  106. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
  107. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +0 -0
  108. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
  109. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/store/__init__.py +0 -0
  110. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
  111. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
  112. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
  113. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/transform/caption_image.py +0 -0
  114. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/internal/transform/split_text.py +0 -0
  115. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/__init__.py +0 -0
  116. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
  117. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/control_message/validators.py +0 -0
  118. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/converters/__init__.py +0 -0
  119. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
  120. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/converters/containers.py +0 -0
  121. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/converters/datetools.py +0 -0
  122. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/converters/dftools.py +0 -0
  123. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/converters/formats.py +0 -0
  124. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
  125. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
  126. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/detectors/language.py +0 -0
  127. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
  128. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/exception_handlers/converters.py +0 -0
  129. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -0
  130. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
  131. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
  132. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
  133. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
  134. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
  135. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
  136. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
  137. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/image_processing/transforms.py +0 -0
  138. {nv_ingest_api-2025.6.24.dev20250625/src/nv_ingest_api/util/message_brokers → nv_ingest_api-2025.7.8.dev20250708/src/nv_ingest_api/util/imports}/__init__.py +0 -0
  139. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/logging/__init__.py +0 -0
  140. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/logging/configuration.py +0 -0
  141. {nv_ingest_api-2025.6.24.dev20250625/src/nv_ingest_api/util/schema → nv_ingest_api-2025.7.8.dev20250708/src/nv_ingest_api/util/message_brokers}/__init__.py +0 -0
  142. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
  143. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
  144. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
  145. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -0
  146. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
  147. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
  148. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
  149. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -0
  150. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/nim/__init__.py +0 -0
  151. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
  152. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/pdf/pdfium.py +0 -0
  153. {nv_ingest_api-2025.6.24.dev20250625/src/nv_ingest_api/util/service_clients → nv_ingest_api-2025.7.8.dev20250708/src/nv_ingest_api/util/schema}/__init__.py +0 -0
  154. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
  155. {nv_ingest_api-2025.6.24.dev20250625/src/nv_ingest_api/util/service_clients/redis → nv_ingest_api-2025.7.8.dev20250708/src/nv_ingest_api/util/service_clients}/__init__.py +0 -0
  156. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
  157. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
  158. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +0 -0
  159. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
  160. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +0 -0
  161. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
  162. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/system/__init__.py +0 -0
  163. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api/util/system/hardware_info.py +0 -0
  164. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
  165. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api.egg-info/requires.txt +0 -0
  166. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
  167. {nv_ingest_api-2025.6.24.dev20250625 → nv_ingest_api-2025.7.8.dev20250708}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.6.24.dev20250625
3
+ Version: 2025.7.8.dev20250708
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -3,7 +3,6 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  import base64
6
- import os
7
6
  from io import BytesIO
8
7
 
9
8
  import pandas as pd
@@ -33,21 +32,6 @@ DOCUMENT_TO_CONTENT_MAPPING = {
33
32
  }
34
33
 
35
34
 
36
- # ------------------------------------------------------------------------------
37
- # Helper function to get the document type from a file extension.
38
- # ------------------------------------------------------------------------------
39
- def get_document_type_from_extension(file_path: str) -> str:
40
- ext = os.path.splitext(file_path)[1].lower()
41
- mapping = {
42
- ".png": DocumentTypeEnum.PNG,
43
- ".jpg": DocumentTypeEnum.JPEG,
44
- ".jpeg": DocumentTypeEnum.JPEG,
45
- ".tiff": DocumentTypeEnum.TIFF,
46
- ".svg": DocumentTypeEnum.SVG,
47
- }
48
- return mapping.get(ext, DocumentTypeEnum.UNKNOWN)
49
-
50
-
51
35
  # ------------------------------------------------------------------------------
52
36
  # Helper function to read a file and return its base64-encoded string.
53
37
  # ------------------------------------------------------------------------------
@@ -99,6 +83,7 @@ def create_source_metadata(source_name: str, source_id: str, document_type: str)
99
83
  "summary": "",
100
84
  "partition_id": -1,
101
85
  "access_level": "unknown", # You may wish to adjust this if needed.
86
+ "custom_content": {},
102
87
  }
103
88
 
104
89
 
@@ -130,6 +115,7 @@ def create_content_metadata(document_type: str) -> dict:
130
115
  },
131
116
  },
132
117
  "subtype": "",
118
+ "custom_content": {},
133
119
  }
134
120
 
135
121
 
@@ -46,6 +46,7 @@ class SourceMetadataSchema(BaseModelNoExt):
46
46
  summary: str = ""
47
47
  partition_id: int = -1
48
48
  access_level: Union[AccessLevelEnum, int] = AccessLevelEnum.UNKNOWN
49
+ custom_content: Optional[Dict[str, Any]] = None
49
50
 
50
51
  @field_validator("date_created", "last_modified")
51
52
  @classmethod
@@ -99,6 +100,7 @@ class ContentMetadataSchema(BaseModelNoExt):
99
100
  subtype: Union[ContentTypeEnum, str] = ""
100
101
  start_time: int = -1
101
102
  end_time: int = -1
103
+ custom_content: Optional[Dict[str, Any]] = None
102
104
 
103
105
 
104
106
  class TextMetadataSchema(BaseModelNoExt):
@@ -108,6 +110,7 @@ class TextMetadataSchema(BaseModelNoExt):
108
110
  language: LanguageEnum = "en" # default to Unknown? Maybe do some kind of heuristic check
109
111
  text_location: tuple = (0, 0, 0, 0)
110
112
  text_location_max_dimensions: tuple = (0, 0, 0, 0)
113
+ custom_content: Optional[Dict[str, Any]] = None
111
114
 
112
115
 
113
116
  class ImageMetadataSchema(BaseModelNoExt):
@@ -120,6 +123,7 @@ class ImageMetadataSchema(BaseModelNoExt):
120
123
  uploaded_image_url: str = ""
121
124
  width: int = 0
122
125
  height: int = 0
126
+ custom_content: Optional[Dict[str, Any]] = None
123
127
 
124
128
  @field_validator("image_type")
125
129
  def validate_image_type(cls, v):
@@ -143,6 +147,7 @@ class TableMetadataSchema(BaseModelNoExt):
143
147
  table_location: tuple = (0, 0, 0, 0)
144
148
  table_location_max_dimensions: tuple = (0, 0)
145
149
  uploaded_image_uri: str = ""
150
+ custom_content: Optional[Dict[str, Any]] = None
146
151
 
147
152
 
148
153
  class ChartMetadataSchema(BaseModelNoExt):
@@ -153,11 +158,13 @@ class ChartMetadataSchema(BaseModelNoExt):
153
158
  table_location: tuple = (0, 0, 0, 0)
154
159
  table_location_max_dimensions: tuple = (0, 0)
155
160
  uploaded_image_uri: str = ""
161
+ custom_content: Optional[Dict[str, Any]] = None
156
162
 
157
163
 
158
164
  class AudioMetadataSchema(BaseModelNoExt):
159
165
  audio_transcript: str = ""
160
166
  audio_type: str = ""
167
+ custom_content: Optional[Dict[str, Any]] = None
161
168
 
162
169
 
163
170
  # TODO consider deprecating this in favor of info msg...
@@ -166,6 +173,7 @@ class ErrorMetadataSchema(BaseModelNoExt):
166
173
  status: StatusEnum
167
174
  source_id: str = ""
168
175
  error_msg: str
176
+ custom_content: Optional[Dict[str, Any]] = None
169
177
 
170
178
 
171
179
  class InfoMessageMetadataSchema(BaseModelNoExt):
@@ -173,6 +181,7 @@ class InfoMessageMetadataSchema(BaseModelNoExt):
173
181
  status: StatusEnum
174
182
  message: str
175
183
  filter: bool
184
+ custom_content: Optional[Dict[str, Any]] = None
176
185
 
177
186
 
178
187
  # Main metadata schema
@@ -191,6 +200,7 @@ class MetadataSchema(BaseModelNoExt):
191
200
  info_message_metadata: Optional[InfoMessageMetadataSchema] = None
192
201
  debug_metadata: Optional[Dict[str, Any]] = None
193
202
  raise_on_failure: bool = False
203
+ custom_content: Optional[Dict[str, Any]] = None
194
204
 
195
205
  @model_validator(mode="before")
196
206
  @classmethod
@@ -8,8 +8,8 @@ from pydantic import ConfigDict, BaseModel
8
8
 
9
9
  class ImageCaptionExtractionSchema(BaseModel):
10
10
  api_key: str = "api_key"
11
- endpoint_url: str = "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-11b-vision-instruct/chat/completions"
11
+ endpoint_url: str = "https://ai.api.nvidia.com/v1/gr/nvidia/llama-3.1-nemotron-nano-vl-8b-v1/chat/completions"
12
12
  prompt: str = "Caption the content of this image:"
13
- model_name: str = "meta/llama-3.2-11b-vision-instruct"
13
+ model_name: str = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
14
14
  raise_on_failure: bool = False
15
15
  model_config = ConfigDict(extra="forbid")
@@ -466,7 +466,7 @@ def transform_create_text_embeddings_internal(
466
466
 
467
467
  for content_type, content_getter in pandas_content_extractor.items():
468
468
  if not content_getter:
469
- logger.debug(f"Skipping unsupported content type: {content_type}")
469
+ logger.warning(f"Skipping text_embedding generation for unsupported content type: {content_type}")
470
470
  continue
471
471
 
472
472
  # Get rows matching the content type
@@ -0,0 +1,50 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+
6
+ import inspect
7
+ from pydantic import BaseModel
8
+
9
+ from nv_ingest_api.internal.primitives.ingest_control_message import IngestControlMessage
10
+
11
+
12
+ def ingest_stage_callable_signature(sig: inspect.Signature):
13
+ """
14
+ Validates that a callable has the signature:
15
+ (IngestControlMessage, BaseModel) -> IngestControlMessage
16
+
17
+ Raises
18
+ ------
19
+ TypeError
20
+ If the signature does not match the expected pattern.
21
+ """
22
+ params = list(sig.parameters.values())
23
+
24
+ if len(params) != 2:
25
+ raise TypeError(f"Expected exactly 2 parameters, got {len(params)}")
26
+
27
+ if params[0].name != "control_message" or params[1].name != "stage_config":
28
+ raise TypeError("Expected parameter names: 'control_message', 'config'")
29
+
30
+ first_param = params[0].annotation
31
+ second_param = params[1].annotation
32
+ return_type = sig.return_annotation
33
+
34
+ if first_param is inspect.Parameter.empty:
35
+ raise TypeError("First parameter must be annotated with IngestControlMessage")
36
+
37
+ if second_param is inspect.Parameter.empty:
38
+ raise TypeError("Second parameter must be annotated with a subclass of BaseModel")
39
+
40
+ if return_type is inspect.Signature.empty:
41
+ raise TypeError("Return type must be annotated with IngestControlMessage")
42
+
43
+ if not issubclass(first_param, IngestControlMessage):
44
+ raise TypeError(f"First parameter must be IngestControlMessage, got {first_param}")
45
+
46
+ if not (issubclass(second_param, BaseModel)):
47
+ raise TypeError(f"Second parameter must be a subclass of BaseModel, got {second_param}")
48
+
49
+ if not issubclass(return_type, IngestControlMessage):
50
+ raise TypeError(f"Return type must be IngestControlMessage, got {return_type}")
@@ -0,0 +1,110 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ import importlib
6
+ import inspect
7
+ from typing import Callable, Union, List, Optional
8
+
9
+
10
+ def resolve_obj_from_path(path: str, allowed_base_paths: Optional[List[str]] = None) -> object:
11
+ """
12
+ Import and return an object from a string path of the form 'module.sub:attr'.
13
+
14
+ To enhance security, this function can restrict imports to a list of allowed base module paths.
15
+ """
16
+ if ":" not in path:
17
+ raise ValueError(f"Invalid path '{path}': expected format 'module.sub:attr'")
18
+ module_path, attr_name = path.split(":", 1)
19
+
20
+ # Security check: only allow imports from specified base paths if provided.
21
+ if allowed_base_paths:
22
+ is_allowed = any(module_path == base or module_path.startswith(base + ".") for base in allowed_base_paths)
23
+ if not is_allowed:
24
+ raise ImportError(
25
+ f"Module '{module_path}' is not in the list of allowed base paths. "
26
+ f"Allowed paths: {allowed_base_paths}"
27
+ )
28
+
29
+ try:
30
+ mod = importlib.import_module(module_path)
31
+ except ModuleNotFoundError as e:
32
+ raise ImportError(f"Could not import module '{module_path}'") from e
33
+ try:
34
+ obj = getattr(mod, attr_name)
35
+ except AttributeError as e:
36
+ raise AttributeError(f"Module '{module_path}' has no attribute '{attr_name}'") from e
37
+ return obj
38
+
39
+
40
+ def resolve_callable_from_path(
41
+ callable_path: str,
42
+ signature_schema: Union[List[str], Callable[[inspect.Signature], None], str],
43
+ allowed_base_paths: Optional[List[str]] = None,
44
+ ) -> Callable:
45
+ """
46
+ Import and return a callable from a module path string like 'module.submodule:callable_name',
47
+ and validate its signature using the required signature_schema (callable or path to callable).
48
+
49
+ Parameters
50
+ ----------
51
+ callable_path : str
52
+ The module path and callable in the format 'module.sub:callable'.
53
+ signature_schema : Union[List[str], Callable, str]
54
+ Either:
55
+ - A list of parameter names to require.
56
+ - A callable that takes an inspect.Signature and raises on failure.
57
+ - A string path to such a callable ('module.sub:schema_checker').
58
+ allowed_base_paths : Optional[List[str]]
59
+ An optional list of base module paths from which imports are allowed.
60
+ If provided, both the callable and any signature schema specified by path
61
+ must reside within one of these paths.
62
+
63
+ Returns
64
+ -------
65
+ Callable
66
+ The resolved and validated callable.
67
+
68
+ Raises
69
+ ------
70
+ ValueError
71
+ If the path is not correctly formatted.
72
+ ImportError
73
+ If the module cannot be imported or is not in the allowed paths.
74
+ AttributeError
75
+ If the attribute does not exist in the module.
76
+ TypeError
77
+ If the resolved attribute is not callable or the signature does not match.
78
+ """
79
+ obj = resolve_obj_from_path(callable_path, allowed_base_paths=allowed_base_paths)
80
+ if not callable(obj):
81
+ raise TypeError(f"Object '{callable_path}' is not callable")
82
+
83
+ # Load/check signature_schema
84
+ schema_checker = signature_schema
85
+ if isinstance(signature_schema, str):
86
+ # When loading the schema checker, apply the same security restrictions.
87
+ schema_checker = resolve_obj_from_path(signature_schema, allowed_base_paths=allowed_base_paths)
88
+
89
+ sig = inspect.signature(obj)
90
+ if isinstance(schema_checker, list):
91
+ actual_params = list(sig.parameters.keys())
92
+ missing = [p for p in schema_checker if p not in actual_params]
93
+ if missing:
94
+ raise TypeError(
95
+ f"Callable at '{callable_path}' is missing required parameters: {missing}\n"
96
+ f"Actual parameters: {actual_params}"
97
+ )
98
+ elif callable(schema_checker):
99
+ try:
100
+ schema_checker(sig)
101
+ except Exception as e:
102
+ raise TypeError(
103
+ f"Callable at '{callable_path}' failed custom signature validation:\n"
104
+ f" Signature: {sig}\n"
105
+ f" Error: {e}"
106
+ ) from e
107
+ else:
108
+ raise TypeError(f"Invalid signature_schema: expected list, callable, or str, got {type(signature_schema)}")
109
+
110
+ return obj
@@ -0,0 +1,3 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.6.24.dev20250625
3
+ Version: 2025.7.8.dev20250708
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -134,6 +134,9 @@ src/nv_ingest_api/util/image_processing/clustering.py
134
134
  src/nv_ingest_api/util/image_processing/processing.py
135
135
  src/nv_ingest_api/util/image_processing/table_and_chart.py
136
136
  src/nv_ingest_api/util/image_processing/transforms.py
137
+ src/nv_ingest_api/util/imports/__init__.py
138
+ src/nv_ingest_api/util/imports/callable_signatures.py
139
+ src/nv_ingest_api/util/imports/dynamic_resolvers.py
137
140
  src/nv_ingest_api/util/logging/__init__.py
138
141
  src/nv_ingest_api/util/logging/configuration.py
139
142
  src/nv_ingest_api/util/message_brokers/__init__.py