nv-ingest-api 2025.7.14.dev20250714__tar.gz → 2025.7.16.dev20250716__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (168) hide show
  1. {nv_ingest_api-2025.7.14.dev20250714/src/nv_ingest_api.egg-info → nv_ingest_api-2025.7.16.dev20250716}/PKG-INFO +2 -1
  2. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/pyproject.toml +1 -0
  3. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/enums/common.py +6 -0
  4. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +2 -1
  5. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +32 -20
  6. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +25 -1
  7. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +1 -0
  8. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +7 -12
  9. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +5 -0
  10. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +4 -0
  11. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/transform/embed_text.py +103 -12
  12. nv_ingest_api-2025.7.16.dev20250716/src/nv_ingest_api/util/image_processing/transforms.py +660 -0
  13. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/metadata/aggregators.py +4 -1
  14. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/pdf/pdfium.py +5 -13
  15. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716/src/nv_ingest_api.egg-info}/PKG-INFO +2 -1
  16. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api.egg-info/requires.txt +1 -0
  17. nv_ingest_api-2025.7.14.dev20250714/src/nv_ingest_api/util/image_processing/transforms.py +0 -407
  18. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/LICENSE +0 -0
  19. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/MANIFEST.in +0 -0
  20. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/README.md +0 -0
  21. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/setup.cfg +0 -0
  22. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/__init__.py +0 -0
  23. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/interface/__init__.py +0 -0
  24. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/interface/extract.py +0 -0
  25. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/interface/mutate.py +0 -0
  26. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/interface/store.py +0 -0
  27. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/interface/transform.py +0 -0
  28. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/interface/utility.py +0 -0
  29. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/__init__.py +0 -0
  30. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
  31. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
  32. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
  33. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -0
  34. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
  35. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -0
  36. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
  37. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
  38. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
  39. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
  40. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/html/__init__.py +0 -0
  41. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/html/html_extractor.py +0 -0
  42. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
  43. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
  44. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/image/image_extractor.py +0 -0
  45. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
  46. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
  47. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
  48. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
  49. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
  50. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
  51. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
  52. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
  53. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
  54. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
  55. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
  56. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
  57. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
  58. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -0
  59. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -0
  60. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
  61. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
  62. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
  63. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
  64. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
  65. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
  66. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
  67. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
  68. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
  69. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
  70. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
  71. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
  72. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
  73. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/model_interface/paddle.py +0 -0
  74. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -0
  75. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -0
  76. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
  77. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +0 -0
  78. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
  79. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
  80. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
  81. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
  82. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +0 -0
  83. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
  84. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
  85. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +0 -0
  86. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +0 -0
  87. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -0
  88. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/extract/extract_html_schema.py +0 -0
  89. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -0
  90. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +0 -0
  91. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +0 -0
  92. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -0
  93. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +0 -0
  94. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
  95. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
  96. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
  97. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
  98. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
  99. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
  100. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +0 -0
  101. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
  102. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
  103. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
  104. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
  105. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
  106. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
  107. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +0 -0
  108. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
  109. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
  110. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/store/__init__.py +0 -0
  111. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
  112. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
  113. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
  114. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/transform/caption_image.py +0 -0
  115. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/internal/transform/split_text.py +0 -0
  116. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/__init__.py +0 -0
  117. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
  118. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/control_message/validators.py +0 -0
  119. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/converters/__init__.py +0 -0
  120. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
  121. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/converters/containers.py +0 -0
  122. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/converters/datetools.py +0 -0
  123. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/converters/dftools.py +0 -0
  124. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/converters/formats.py +0 -0
  125. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
  126. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
  127. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/detectors/language.py +0 -0
  128. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
  129. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/exception_handlers/converters.py +0 -0
  130. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -0
  131. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
  132. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
  133. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
  134. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
  135. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
  136. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
  137. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
  138. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/imports/__init__.py +0 -0
  139. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/imports/callable_signatures.py +0 -0
  140. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/imports/dynamic_resolvers.py +0 -0
  141. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/logging/__init__.py +0 -0
  142. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/logging/configuration.py +0 -0
  143. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/message_brokers/__init__.py +0 -0
  144. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
  145. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
  146. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
  147. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -0
  148. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
  149. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
  150. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -0
  151. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/nim/__init__.py +0 -0
  152. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
  153. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/schema/__init__.py +0 -0
  154. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
  155. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/service_clients/__init__.py +0 -0
  156. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
  157. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
  158. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
  159. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +0 -0
  160. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
  161. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +0 -0
  162. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
  163. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/system/__init__.py +0 -0
  164. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api/util/system/hardware_info.py +0 -0
  165. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api.egg-info/SOURCES.txt +0 -0
  166. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
  167. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
  168. {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.16.dev20250716}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.7.14.dev20250714
3
+ Version: 2025.7.16.dev20250716
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -217,6 +217,7 @@ Requires-Dist: backoff==2.2.1
217
217
  Requires-Dist: pandas>=2.0
218
218
  Requires-Dist: pydantic>2.0.0
219
219
  Requires-Dist: pydantic-settings>2.0.0
220
+ Requires-Dist: tritonclient
220
221
  Dynamic: license-file
221
222
 
222
223
  # nv-ingest-api
@@ -21,6 +21,7 @@ dependencies = [
21
21
  "pandas>=2.0",
22
22
  "pydantic>2.0.0",
23
23
  "pydantic-settings>2.0.0",
24
+ "tritonclient",
24
25
  ]
25
26
 
26
27
  [project.urls]
@@ -52,6 +52,8 @@ class ContentDescriptionEnum(str, Enum):
52
52
  Description for image extracted from PDF document.
53
53
  PDF_INFOGRAPHIC : str
54
54
  Description for structured infographic extracted from PDF document.
55
+ PDF_PAGE_IMAGE : str
56
+ Description for a full-page image rendered from a PDF document.
55
57
  PDF_TABLE : str
56
58
  Description for structured table extracted from PDF document.
57
59
  PDF_TEXT : str
@@ -70,6 +72,7 @@ class ContentDescriptionEnum(str, Enum):
70
72
  PDF_CHART: str = "Structured chart extracted from PDF document."
71
73
  PDF_IMAGE: str = "Image extracted from PDF document."
72
74
  PDF_INFOGRAPHIC: str = "Structured infographic extracted from PDF document."
75
+ PDF_PAGE_IMAGE: str = "Full-page image rendered from a PDF document."
73
76
  PDF_TABLE: str = "Structured table extracted from PDF document."
74
77
  PDF_TEXT: str = "Unstructured text from PDF document."
75
78
  PPTX_IMAGE: str = "Image extracted from PPTX presentation."
@@ -94,6 +97,8 @@ class ContentTypeEnum(str, Enum):
94
97
  Represents image content.
95
98
  INFO_MSG : str
96
99
  Represents an informational message.
100
+ PAGE_IMAGE : str
101
+ Represents a full-page image rendered from a document.
97
102
  STRUCTURED : str
98
103
  Represents structured content.
99
104
  TEXT : str
@@ -111,6 +116,7 @@ class ContentTypeEnum(str, Enum):
111
116
  INFOGRAPHIC: str = "infographic"
112
117
  INFO_MSG: str = "info_message"
113
118
  NONE: str = "none"
119
+ PAGE_IMAGE: str = "page_image"
114
120
  STRUCTURED: str = "structured"
115
121
  TABLE: str = "table"
116
122
  TEXT: str = "text"
@@ -40,6 +40,7 @@ from nv_ingest_api.internal.schemas.meta.metadata_schema import validate_metadat
40
40
  from nv_ingest_api.internal.primitives.nim.model_interface.yolox import (
41
41
  YOLOX_PAGE_IMAGE_PREPROC_WIDTH,
42
42
  YOLOX_PAGE_IMAGE_PREPROC_HEIGHT,
43
+ YOLOX_PAGE_IMAGE_FORMAT,
43
44
  )
44
45
  from nv_ingest_api.internal.schemas.extract.extract_pdf_schema import NemoRetrieverParseConfigSchema
45
46
  from nv_ingest_api.util.metadata.aggregators import (
@@ -355,7 +356,7 @@ def nemoretriever_parse_extractor(
355
356
  img_numpy = crop_image(page_image, transformed_bbox)
356
357
 
357
358
  if img_numpy is not None:
358
- base64_img = numpy_to_base64(img_numpy)
359
+ base64_img = numpy_to_base64(img_numpy, format=YOLOX_PAGE_IMAGE_FORMAT)
359
360
  image = Base64Image(
360
361
  image=base64_img,
361
362
  bbox=transformed_bbox,
@@ -4,20 +4,21 @@
4
4
  # Copyright (c) 2024, NVIDIA CORPORATION.
5
5
 
6
6
  import base64
7
+ import inspect
7
8
  import io
8
-
9
- import pandas as pd
10
- from typing import Any, Dict, List, Optional
11
9
  import logging
10
+ from typing import Any
11
+ from typing import Dict
12
+ from typing import List
13
+ from typing import Optional
12
14
 
13
- from nv_ingest_api.internal.extract.pdf.engines import (
14
- adobe_extractor,
15
- llama_parse_extractor,
16
- nemoretriever_parse_extractor,
17
- pdfium_extractor,
18
- tika_extractor,
19
- unstructured_io_extractor,
20
- )
15
+ import pandas as pd
16
+ from nv_ingest_api.internal.extract.pdf.engines import adobe_extractor
17
+ from nv_ingest_api.internal.extract.pdf.engines import llama_parse_extractor
18
+ from nv_ingest_api.internal.extract.pdf.engines import nemoretriever_parse_extractor
19
+ from nv_ingest_api.internal.extract.pdf.engines import pdfium_extractor
20
+ from nv_ingest_api.internal.extract.pdf.engines import tika_extractor
21
+ from nv_ingest_api.internal.extract.pdf.engines import unstructured_io_extractor
21
22
  from nv_ingest_api.util.exception_handlers.decorators import unified_exception_handler
22
23
 
23
24
  # Import extraction functions for different engines.
@@ -43,6 +44,7 @@ def _work_extract_pdf(
43
44
  extract_infographics: bool,
44
45
  extract_tables: bool,
45
46
  extract_charts: bool,
47
+ extract_page_as_image: bool,
46
48
  extractor_config: dict,
47
49
  execution_trace_log=None,
48
50
  ) -> Any:
@@ -52,17 +54,25 @@ def _work_extract_pdf(
52
54
 
53
55
  extract_method = extractor_config["extract_method"]
54
56
  extractor_fn = EXTRACTOR_LOOKUP.get(extract_method, pdfium_extractor)
55
- return extractor_fn(
56
- pdf_stream,
57
- extract_text,
58
- extract_images,
59
- extract_infographics,
60
- extract_tables,
61
- extract_charts,
62
- extractor_config,
63
- execution_trace_log,
57
+
58
+ extractor_fn_args = dict(
59
+ pdf_stream=pdf_stream,
60
+ extract_text=extract_text,
61
+ extract_images=extract_images,
62
+ extract_infographics=extract_infographics,
63
+ extract_tables=extract_tables,
64
+ extract_charts=extract_charts,
65
+ extractor_config=extractor_config,
66
+ execution_trace_log=execution_trace_log,
64
67
  )
65
68
 
69
+ if "extract_page_as_image" in inspect.signature(extractor_fn).parameters:
70
+ extractor_fn_args["extract_page_as_image"] = extract_page_as_image
71
+ elif extract_page_as_image:
72
+ logger.warning(f"`extract_page_as_image` is set to True, but {extract_method} does not support it.")
73
+
74
+ return extractor_fn(**extractor_fn_args)
75
+
66
76
 
67
77
  @unified_exception_handler
68
78
  def _orchestrate_row_extraction(
@@ -97,6 +107,7 @@ def _orchestrate_row_extraction(
97
107
  extract_tables = params.pop("extract_tables", False)
98
108
  extract_charts = params.pop("extract_charts", False)
99
109
  extract_infographics = params.pop("extract_infographics", False)
110
+ extract_page_as_image = params.pop("extract_page_as_image", False)
100
111
  extract_method = params.get("extract_method", "pdfium")
101
112
  except KeyError as e:
102
113
  raise ValueError(f"Missing required extraction flag: {e}")
@@ -137,6 +148,7 @@ def _orchestrate_row_extraction(
137
148
  extract_text=extract_text,
138
149
  extract_images=extract_images,
139
150
  extract_infographics=extract_infographics,
151
+ extract_page_as_image=extract_page_as_image,
140
152
  extract_tables=extract_tables,
141
153
  extract_charts=extract_charts,
142
154
  extractor_config=extractor_config,
@@ -24,16 +24,19 @@ import numpy as np
24
24
  import pandas as pd
25
25
  import pypdfium2 as libpdfium
26
26
 
27
+ from nv_ingest_api.internal.enums.common import ContentTypeEnum
27
28
  from nv_ingest_api.internal.primitives.nim.default_values import YOLOX_MAX_BATCH_SIZE
28
29
  from nv_ingest_api.internal.primitives.nim.model_interface.yolox import (
29
30
  YOLOX_PAGE_IMAGE_PREPROC_WIDTH,
30
31
  YOLOX_PAGE_IMAGE_PREPROC_HEIGHT,
32
+ YOLOX_PAGE_IMAGE_FORMAT,
31
33
  get_yolox_model_name,
32
34
  YoloxPageElementsModelInterface,
33
35
  )
34
36
  from nv_ingest_api.internal.schemas.extract.extract_pdf_schema import PDFiumConfigSchema
35
37
  from nv_ingest_api.internal.enums.common import TableFormatEnum, TextTypeEnum, AccessLevelEnum
36
38
  from nv_ingest_api.util.metadata.aggregators import (
39
+ construct_image_metadata_from_base64,
37
40
  construct_image_metadata_from_pdf_image,
38
41
  extract_pdf_metadata,
39
42
  construct_text_metadata,
@@ -46,6 +49,7 @@ from nv_ingest_api.util.pdf.pdfium import (
46
49
  extract_image_like_objects_from_pdfium_page,
47
50
  )
48
51
  from nv_ingest_api.util.pdf.pdfium import pdfium_pages_to_numpy
52
+ from nv_ingest_api.util.image_processing import scale_image_to_encoding_size
49
53
  from nv_ingest_api.util.image_processing.transforms import numpy_to_base64, crop_image
50
54
 
51
55
  logger = logging.getLogger(__name__)
@@ -186,7 +190,7 @@ def _extract_page_element_images(
186
190
  if cropped is None:
187
191
  continue
188
192
 
189
- base64_img = numpy_to_base64(cropped)
193
+ base64_img = numpy_to_base64(cropped, format=YOLOX_PAGE_IMAGE_FORMAT)
190
194
 
191
195
  bbox_in_orig_coord = (
192
196
  int(w1) - pad_width,
@@ -384,6 +388,7 @@ def pdfium_extractor(
384
388
  extract_infographics: bool,
385
389
  extract_tables: bool,
386
390
  extract_charts: bool,
391
+ extract_page_as_image: bool,
387
392
  extractor_config: dict,
388
393
  execution_trace_log: Optional[List[Any]] = None,
389
394
  ) -> pd.DataFrame:
@@ -524,6 +529,24 @@ def pdfium_extractor(
524
529
  )
525
530
  extracted_data.extend(image_data)
526
531
 
532
+ # Full page image extraction
533
+ if extract_page_as_image:
534
+ page_text = _extract_page_text(page)
535
+ image, _ = pdfium_pages_to_numpy([page], scale_tuple=(16384, 16384), trace_info=execution_trace_log)
536
+ base64_image = numpy_to_base64(image[0])
537
+ if len(base64_image) > 2**24 - 1:
538
+ base64_image, _ = scale_image_to_encoding_size(base64_image, max_base64_size=2**24 - 1)
539
+ image_meta = construct_image_metadata_from_base64(
540
+ base64_image,
541
+ page_idx,
542
+ page_count,
543
+ source_metadata,
544
+ base_unified_metadata,
545
+ subtype=ContentTypeEnum.PAGE_IMAGE,
546
+ text=page_text,
547
+ )
548
+ extracted_data.append(image_meta)
549
+
527
550
  # If we want tables or charts, rasterize the page and store it
528
551
  if extract_tables or extract_charts or extract_infographics:
529
552
  image, padding_offsets = pdfium_pages_to_numpy(
@@ -574,6 +597,7 @@ def pdfium_extractor(
574
597
  execution_trace_log=execution_trace_log,
575
598
  )
576
599
  futures.append(future)
600
+
577
601
  pages_for_tables.clear()
578
602
 
579
603
  # Wait for all asynchronous jobs to complete.
@@ -120,6 +120,7 @@ class NemoRetrieverParseModelInterface(ModelInterface):
120
120
  logger.debug("Formatting input for HTTP NemoRetrieverParse model")
121
121
  # Prepare payload for HTTP request
122
122
 
123
+ ## TODO: Ask @Edward Kim if we want to switch to JPEG/PNG here
123
124
  if "images" in data:
124
125
  base64_list = [numpy_to_base64(img) for img in data["images"]]
125
126
  else:
@@ -2,9 +2,7 @@
2
2
  # All rights reserved.
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
-
6
- import base64
7
- import io
5
+ import os
8
6
  import logging
9
7
  import warnings
10
8
  from math import log
@@ -20,11 +18,11 @@ import packaging
20
18
  import pandas as pd
21
19
  import torch
22
20
  import torchvision
23
- from PIL import Image
24
21
 
25
22
  from nv_ingest_api.internal.primitives.nim import ModelInterface
26
23
  from nv_ingest_api.internal.primitives.nim.model_interface.helpers import get_model_name
27
24
  from nv_ingest_api.util.image_processing import scale_image_to_encoding_size
25
+ from nv_ingest_api.util.image_processing.transforms import numpy_to_base64
28
26
 
29
27
  logger = logging.getLogger(__name__)
30
28
 
@@ -35,6 +33,7 @@ YOLOX_PAGE_MIN_SCORE = 0.1
35
33
  YOLOX_PAGE_NIM_MAX_IMAGE_SIZE = 512_000
36
34
  YOLOX_PAGE_IMAGE_PREPROC_HEIGHT = 1024
37
35
  YOLOX_PAGE_IMAGE_PREPROC_WIDTH = 1024
36
+ YOLOX_PAGE_IMAGE_FORMAT = os.getenv("YOLOX_PAGE_IMAGE_FORMAT", "PNG")
38
37
 
39
38
  # yolox-page-elements-v1 contants
40
39
  YOLOX_PAGE_V1_NUM_CLASSES = 4
@@ -239,15 +238,11 @@ class YoloxModelInterfaceBase(ModelInterface):
239
238
  # Convert to uint8 if needed.
240
239
  if image.dtype != np.uint8:
241
240
  image = (image * 255).astype(np.uint8)
242
- # Convert the numpy array to a PIL Image.
243
- image_pil = Image.fromarray(image)
244
- original_size = image_pil.size
245
-
246
- # Save the image to a buffer and encode to base64.
247
- buffered = io.BytesIO()
248
- image_pil.save(buffered, format="PNG")
249
- image_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
250
241
 
242
+ # Get original size directly from numpy array (width, height)
243
+ original_size = (image.shape[1], image.shape[0])
244
+ # Convert numpy array directly to base64 using OpenCV
245
+ image_b64 = numpy_to_base64(image, format=YOLOX_PAGE_IMAGE_FORMAT)
251
246
  # Scale the image if necessary.
252
247
  scaled_image_b64, new_size = scale_image_to_encoding_size(
253
248
  image_b64, max_base64_size=self.nim_max_image_size
@@ -107,6 +107,10 @@ class IngestTaskEmbedSchema(BaseModelNoExt):
107
107
  model_name: Optional[str] = None
108
108
  api_key: Optional[str] = None
109
109
  filter_errors: bool = False
110
+ text_elements_modality: Optional[str] = None
111
+ image_elements_modality: Optional[str] = None
112
+ structured_elements_modality: Optional[str] = None
113
+ audio_elements_modality: Optional[str] = None
110
114
 
111
115
 
112
116
  class IngestTaskVdbUploadSchema(BaseModelNoExt):
@@ -195,6 +199,7 @@ class IngestTaskSchema(BaseModelNoExt):
195
199
  validated_task_properties = expected_schema_cls(**task_properties)
196
200
  values["type"] = task_type # ensure type is now always the enum
197
201
  values["task_properties"] = validated_task_properties
202
+
198
203
  return values
199
204
 
200
205
  @field_validator("type", mode="before")
@@ -22,5 +22,9 @@ class TextEmbeddingSchema(BaseModel):
22
22
  input_type: str = Field(default="passage")
23
23
  raise_on_failure: bool = Field(default=False)
24
24
  truncate: str = Field(default="END")
25
+ text_elements_modality: str = Field(default="text")
26
+ image_elements_modality: str = Field(default="text")
27
+ structured_elements_modality: str = Field(default="text")
28
+ audio_elements_modality: str = Field(default="text")
25
29
 
26
30
  model_config = ConfigDict(extra="forbid")
@@ -4,6 +4,7 @@
4
4
 
5
5
  import logging
6
6
  from concurrent.futures import ThreadPoolExecutor
7
+ from functools import partial
7
8
  from typing import Any, Dict, Tuple, Optional, Iterable, List
8
9
 
9
10
  import pandas as pd
@@ -19,6 +20,9 @@ from nv_ingest_api.util.schema.schema_validator import validate_schema
19
20
  logger = logging.getLogger(__name__)
20
21
 
21
22
 
23
+ MULTI_MODAL_MODELS = ["llama-3.2-nemoretriever-1b-vlm-embed-v1"]
24
+
25
+
22
26
  # ------------------------------------------------------------------------------
23
27
  # Asynchronous Embedding Requests
24
28
  # ------------------------------------------------------------------------------
@@ -33,6 +37,7 @@ def _make_async_request(
33
37
  input_type: str,
34
38
  truncate: str,
35
39
  filter_errors: bool,
40
+ modalities: Optional[List[str]] = None,
36
41
  ) -> list:
37
42
  """
38
43
  Interacts directly with the NIM embedding service to calculate embeddings for a batch of prompts.
@@ -74,11 +79,18 @@ def _make_async_request(
74
79
  base_url=embedding_nim_endpoint,
75
80
  )
76
81
 
82
+ extra_body = {
83
+ "input_type": input_type,
84
+ "truncate": truncate,
85
+ }
86
+ if modalities:
87
+ extra_body["modality"] = modalities
88
+
77
89
  resp = client.embeddings.create(
78
90
  input=prompts,
79
91
  model=embedding_model,
80
92
  encoding_format=encoding_format,
81
- extra_body={"input_type": input_type, "truncate": truncate},
93
+ extra_body=extra_body,
82
94
  )
83
95
 
84
96
  response["embedding"] = resp.data
@@ -110,6 +122,7 @@ def _async_request_handler(
110
122
  input_type: str,
111
123
  truncate: str,
112
124
  filter_errors: bool,
125
+ modalities: Optional[List[str]] = None,
113
126
  ) -> List[dict]:
114
127
  """
115
128
  Gathers calculated embedding results from the NIM embedding service concurrently.
@@ -138,6 +151,9 @@ def _async_request_handler(
138
151
  List[dict]
139
152
  A list of response dictionaries from the embedding service.
140
153
  """
154
+ if modalities is None:
155
+ modalities = [None] * len(prompts)
156
+
141
157
  with ThreadPoolExecutor() as executor:
142
158
  futures = [
143
159
  executor.submit(
@@ -150,8 +166,9 @@ def _async_request_handler(
150
166
  input_type=input_type,
151
167
  truncate=truncate,
152
168
  filter_errors=filter_errors,
169
+ modalities=modality_batch,
153
170
  )
154
- for prompt_batch in prompts
171
+ for prompt_batch, modality_batch in zip(prompts, modalities)
155
172
  ]
156
173
  results = [future.result() for future in futures]
157
174
 
@@ -167,6 +184,7 @@ def _async_runner(
167
184
  input_type: str,
168
185
  truncate: str,
169
186
  filter_errors: bool,
187
+ modalities: Optional[List[str]] = None,
170
188
  ) -> dict:
171
189
  """
172
190
  Concurrently launches all NIM embedding requests and flattens the results.
@@ -204,6 +222,7 @@ def _async_runner(
204
222
  input_type,
205
223
  truncate,
206
224
  filter_errors,
225
+ modalities=modalities,
207
226
  )
208
227
 
209
228
  flat_results = {"embeddings": [], "info_msgs": []}
@@ -263,7 +282,19 @@ def _add_embeddings(row, embeddings, info_msgs):
263
282
  return row
264
283
 
265
284
 
266
- def _get_pandas_text_content(row):
285
+ def _format_image_input_string(image_b64: Optional[str]) -> str:
286
+ if not image_b64:
287
+ return
288
+ return f"data:image/png;base64,{image_b64}"
289
+
290
+
291
+ def _format_text_image_pair_input_string(text: Optional[str], image_b64: Optional[str]) -> str:
292
+ if (not text) or (not text.strip()) or (not image_b64):
293
+ return
294
+ return f"{text.strip()} {_format_image_input_string(image_b64)}"
295
+
296
+
297
+ def _get_pandas_text_content(row, modality="text"):
267
298
  """
268
299
  Extracts text content from a DataFrame row.
269
300
 
@@ -280,7 +311,7 @@ def _get_pandas_text_content(row):
280
311
  return row["content"]
281
312
 
282
313
 
283
- def _get_pandas_table_content(row):
314
+ def _get_pandas_table_content(row, modality="text"):
284
315
  """
285
316
  Extracts table/chart content from a DataFrame row.
286
317
 
@@ -294,10 +325,19 @@ def _get_pandas_table_content(row):
294
325
  str
295
326
  The table/chart content from the row.
296
327
  """
297
- return row.get("table_metadata", {}).get("table_content")
328
+ if modality == "text":
329
+ content = row.get("table_metadata", {}).get("table_content")
330
+ elif modality == "image":
331
+ content = _format_image_input_string(row.get("content"))
332
+ elif modality == "text_image":
333
+ text = row.get("table_metadata", {}).get("table_content")
334
+ image = row.get("content")
335
+ content = _format_text_image_pair_input_string(text, image)
336
+
337
+ return content
298
338
 
299
339
 
300
- def _get_pandas_image_content(row):
340
+ def _get_pandas_image_content(row, modality="text"):
301
341
  """
302
342
  Extracts image caption content from a DataFrame row.
303
343
 
@@ -311,10 +351,28 @@ def _get_pandas_image_content(row):
311
351
  str
312
352
  The image caption from the row.
313
353
  """
314
- return row.get("image_metadata", {}).get("caption")
354
+ subtype = row.get("content_metadata", {}).get("subtype")
355
+ if modality == "text":
356
+ if subtype == "page_image":
357
+ content = row.get("image_metadata", {}).get("text")
358
+ else:
359
+ content = row.get("image_metadata", {}).get("caption")
360
+ elif modality == "image":
361
+ content = _format_image_input_string(row.get("content"))
362
+ elif modality == "text_image":
363
+ if subtype == "page_image":
364
+ text = row.get("image_metadata", {}).get("text")
365
+ else:
366
+ text = row.get("image_metadata", {}).get("caption")
367
+ image = row.get("content")
368
+ content = _format_text_image_pair_input_string(text, image)
315
369
 
370
+ # A workaround to save memory.
371
+ row["content"] = ""
372
+ return content
316
373
 
317
- def _get_pandas_audio_content(row):
374
+
375
+ def _get_pandas_audio_content(row, modality="text"):
318
376
  """
319
377
  A pandas UDF used to select extracted audio transcription to be used to create embeddings.
320
378
  """
@@ -408,6 +466,23 @@ def _concatenate_extractions_pandas(
408
466
  # ------------------------------------------------------------------------------
409
467
 
410
468
 
469
+ def does_model_support_multimodal_embeddings(model: str) -> bool:
470
+ """
471
+ Checks if a given model supports multi-modal embeddings.
472
+
473
+ Parameters
474
+ ----------
475
+ model : str
476
+ The name of the model.
477
+
478
+ Returns
479
+ -------
480
+ bool
481
+ True if the model supports multi-modal embeddings, False otherwise.
482
+ """
483
+ return model in MULTI_MODAL_MODELS
484
+
485
+
411
486
  def transform_create_text_embeddings_internal(
412
487
  df_transform_ledger: pd.DataFrame,
413
488
  task_config: Dict[str, Any],
@@ -460,6 +535,15 @@ def transform_create_text_embeddings_internal(
460
535
  ContentTypeEnum.AUDIO: _get_pandas_audio_content,
461
536
  ContentTypeEnum.VIDEO: lambda x: None, # Not supported yet.
462
537
  }
538
+ task_type_to_modality = {
539
+ ContentTypeEnum.TEXT: task_config.get("text_elements_modality") or transform_config.text_elements_modality,
540
+ ContentTypeEnum.STRUCTURED: (
541
+ task_config.get("structured_elements_modality") or transform_config.structured_elements_modality
542
+ ),
543
+ ContentTypeEnum.IMAGE: task_config.get("image_elements_modality") or transform_config.image_elements_modality,
544
+ ContentTypeEnum.AUDIO: task_config.get("audio_elements_modality") or transform_config.audio_elements_modality,
545
+ ContentTypeEnum.VIDEO: lambda x: None, # Not supported yet.
546
+ }
463
547
 
464
548
  def _content_type_getter(row):
465
549
  return row["content_metadata"]["type"]
@@ -480,7 +564,7 @@ def transform_create_text_embeddings_internal(
480
564
  # Extract content and normalize empty or non-str to None
481
565
  extracted_content = (
482
566
  df_content["metadata"]
483
- .apply(content_getter)
567
+ .apply(partial(content_getter, modality=task_type_to_modality[content_type]))
484
568
  .apply(lambda x: x.strip() if isinstance(x, str) and x.strip() else None)
485
569
  )
486
570
  df_content["_content"] = extracted_content
@@ -488,9 +572,15 @@ def transform_create_text_embeddings_internal(
488
572
  # Prepare batches for only valid (non-None) content
489
573
  valid_content_mask = df_content["_content"].notna()
490
574
  if valid_content_mask.any():
491
- filtered_content_batches = _generate_batches(
492
- df_content.loc[valid_content_mask, "_content"].tolist(), batch_size=transform_config.batch_size
493
- )
575
+ filtered_content_list = df_content.loc[valid_content_mask, "_content"].tolist()
576
+ filtered_content_batches = _generate_batches(filtered_content_list, batch_size=transform_config.batch_size)
577
+
578
+ if model_name in MULTI_MODAL_MODELS:
579
+ modality_list = [task_type_to_modality[content_type]] * len(filtered_content_list)
580
+ modality_batches = _generate_batches(modality_list, batch_size=transform_config.batch_size)
581
+ else:
582
+ modality_batches = None
583
+
494
584
  content_embeddings = _async_runner(
495
585
  filtered_content_batches,
496
586
  api_key,
@@ -500,6 +590,7 @@ def transform_create_text_embeddings_internal(
500
590
  transform_config.input_type,
501
591
  transform_config.truncate,
502
592
  False,
593
+ modalities=modality_batches,
503
594
  )
504
595
  # Build a simple row index -> embedding map
505
596
  embeddings_dict = dict(