nv-ingest-api 2025.5.11.dev20250511__tar.gz → 2025.5.13.dev20250513__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (163) hide show
  1. {nv_ingest_api-2025.5.11.dev20250511/src/nv_ingest_api.egg-info → nv_ingest_api-2025.5.13.dev20250513}/PKG-INFO +1 -1
  2. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/interface/transform.py +1 -1
  3. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +3 -3
  4. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/image/image_extractor.py +5 -5
  5. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +1 -1
  6. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +44 -17
  7. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +1 -1
  8. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +35 -38
  9. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +7 -1
  10. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +17 -9
  11. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +20 -16
  12. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +1 -1
  13. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +2 -2
  14. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +1 -1
  15. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/transform/caption_image.py +1 -1
  16. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/transform/embed_text.py +75 -56
  17. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/exception_handlers/converters.py +1 -1
  18. nv_ingest_api-2025.5.13.dev20250513/src/nv_ingest_api/util/exception_handlers/decorators.py +481 -0
  19. nv_ingest_api-2025.5.13.dev20250513/src/nv_ingest_api/util/logging/configuration.py +38 -0
  20. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/pdf/pdfium.py +1 -1
  21. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +1 -1
  22. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +1 -1
  23. nv_ingest_api-2025.5.13.dev20250513/src/nv_ingest_api/util/system/__init__.py +0 -0
  24. nv_ingest_api-2025.5.13.dev20250513/src/nv_ingest_api/util/system/hardware_info.py +426 -0
  25. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513/src/nv_ingest_api.egg-info}/PKG-INFO +1 -1
  26. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api.egg-info/SOURCES.txt +3 -1
  27. nv_ingest_api-2025.5.11.dev20250511/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -223
  28. nv_ingest_api-2025.5.11.dev20250511/src/nv_ingest_api/util/logging/configuration.py +0 -31
  29. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/LICENSE +0 -0
  30. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/MANIFEST.in +0 -0
  31. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/README.md +0 -0
  32. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/pyproject.toml +0 -0
  33. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/setup.cfg +0 -0
  34. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/__init__.py +0 -0
  35. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/interface/__init__.py +0 -0
  36. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/interface/extract.py +0 -0
  37. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/interface/mutate.py +0 -0
  38. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/interface/store.py +0 -0
  39. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/interface/utility.py +0 -0
  40. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/__init__.py +0 -0
  41. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
  42. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/enums/common.py +0 -0
  43. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
  44. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
  45. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -0
  46. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
  47. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
  48. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
  49. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
  50. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
  51. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
  52. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
  53. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
  54. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
  55. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
  56. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
  57. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
  58. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
  59. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
  60. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
  61. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -0
  62. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -0
  63. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
  64. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
  65. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
  66. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
  67. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
  68. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
  69. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
  70. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
  71. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
  72. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
  73. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
  74. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
  75. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
  76. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
  77. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
  78. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
  79. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
  80. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
  81. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -0
  82. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/paddle.py +0 -0
  83. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -0
  84. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
  85. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
  86. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
  87. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
  88. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
  89. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
  90. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
  91. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +0 -0
  92. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +0 -0
  93. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -0
  94. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -0
  95. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +0 -0
  96. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -0
  97. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +0 -0
  98. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
  99. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
  100. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
  101. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
  102. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
  103. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
  104. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +0 -0
  105. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
  106. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
  107. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
  108. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
  109. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
  110. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
  111. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
  112. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +0 -0
  113. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
  114. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/store/__init__.py +0 -0
  115. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
  116. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
  117. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
  118. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/transform/split_text.py +0 -0
  119. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/__init__.py +0 -0
  120. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
  121. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/control_message/validators.py +0 -0
  122. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/converters/__init__.py +0 -0
  123. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
  124. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/converters/containers.py +0 -0
  125. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/converters/datetools.py +0 -0
  126. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/converters/dftools.py +0 -0
  127. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/converters/formats.py +0 -0
  128. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
  129. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
  130. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/detectors/language.py +0 -0
  131. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
  132. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
  133. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
  134. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
  135. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
  136. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
  137. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
  138. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
  139. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/image_processing/transforms.py +0 -0
  140. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/logging/__init__.py +0 -0
  141. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/message_brokers/__init__.py +0 -0
  142. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
  143. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
  144. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
  145. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -0
  146. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
  147. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
  148. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
  149. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -0
  150. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/nim/__init__.py +0 -0
  151. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
  152. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/schema/__init__.py +0 -0
  153. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
  154. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/service_clients/__init__.py +0 -0
  155. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
  156. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
  157. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
  158. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
  159. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
  160. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
  161. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api.egg-info/requires.txt +0 -0
  162. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
  163. {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.5.11.dev20250511
3
+ Version: 2025.5.13.dev20250513
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -207,7 +207,7 @@ def transform_image_create_vlm_caption(
207
207
  "api_key": api_key,
208
208
  "prompt": prompt,
209
209
  "endpoint_url": endpoint_url,
210
- "model_name": model_name,
210
+ "image_caption_model_name": model_name,
211
211
  }
212
212
  filtered_task_config: Dict[str, str] = {k: v for k, v in task_config.items() if v is not None}
213
213
 
@@ -7,7 +7,7 @@ import base64
7
7
  import functools
8
8
  import io
9
9
  import logging
10
- from typing import Optional, Dict, Any, Union
10
+ from typing import Optional, Dict, Any, Union, Tuple
11
11
 
12
12
  import pandas as pd
13
13
  from pydantic import BaseModel
@@ -146,7 +146,7 @@ def extract_primitives_from_docx_internal(
146
146
  task_config: Union[Dict[str, Any], BaseModel],
147
147
  extraction_config: DocxExtractorSchema,
148
148
  execution_trace_log: Optional[Dict[str, Any]] = None,
149
- ) -> pd.DataFrame:
149
+ ) -> Tuple[pd.DataFrame, Union[Dict, None]]:
150
150
  """
151
151
  Processes a pandas DataFrame containing DOCX files encoded in base64, extracting text from
152
152
  each document and replacing the original content with the extracted text.
@@ -202,4 +202,4 @@ def extract_primitives_from_docx_internal(
202
202
  else:
203
203
  extracted_df = pd.DataFrame({"document_type": [], "metadata": [], "uuid": []})
204
204
 
205
- return extracted_df
205
+ return extracted_df, {}
@@ -16,7 +16,7 @@ import pandas as pd
16
16
  from pydantic import BaseModel
17
17
 
18
18
  from nv_ingest_api.internal.extract.image.image_helpers.common import unstructured_image_extractor
19
- from nv_ingest_api.internal.schemas.extract.extract_image_schema import ImageExtractorSchema
19
+ from nv_ingest_api.internal.schemas.extract.extract_image_schema import ImageConfigSchema
20
20
  from nv_ingest_api.util.exception_handlers.decorators import unified_exception_handler
21
21
 
22
22
  logger = logging.getLogger(__name__)
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
26
26
  def _decode_and_extract_from_image(
27
27
  base64_row: pd.Series,
28
28
  task_config: Dict[str, Any],
29
- validated_extraction_config: ImageExtractorSchema,
29
+ validated_extraction_config: ImageConfigSchema,
30
30
  execution_trace_log: Optional[List[Any]] = None,
31
31
  ) -> Any:
32
32
  """
@@ -106,10 +106,10 @@ def _decode_and_extract_from_image(
106
106
 
107
107
  logger.debug(
108
108
  f"decode_and_extract: Extracting image content using image_extraction_config: "
109
- f"{validated_extraction_config.image_extraction_config}"
109
+ f"{validated_extraction_config}"
110
110
  )
111
- if validated_extraction_config.image_extraction_config is not None:
112
- extract_params["image_extraction_config"] = validated_extraction_config.image_extraction_config
111
+ if validated_extraction_config is not None:
112
+ extract_params["image_extraction_config"] = validated_extraction_config
113
113
 
114
114
  if execution_trace_log is not None:
115
115
  extract_params["trace_info"] = execution_trace_log
@@ -476,7 +476,7 @@ def _extract_text_and_bounding_boxes(
476
476
 
477
477
  def _create_clients(nemoretriever_parse_config):
478
478
  model_interface = nemoretriever_parse_utils.NemoRetrieverParseModelInterface(
479
- model_name=nemoretriever_parse_config.model_name,
479
+ model_name=nemoretriever_parse_config.nemoretriever_parse_model_name,
480
480
  )
481
481
  nemoretriever_parse_client = create_inference_client(
482
482
  nemoretriever_parse_config.nemoretriever_parse_endpoints,
@@ -17,7 +17,6 @@
17
17
 
18
18
  import logging
19
19
  import io
20
- import operator
21
20
  import re
22
21
  import uuid
23
22
  from collections import defaultdict
@@ -155,6 +154,12 @@ def _finalize_images(
155
154
  extracted_data.append(image_entry)
156
155
 
157
156
 
157
+ def _safe_position(shape):
158
+ top = shape.top if shape.top is not None else float("inf")
159
+ left = shape.left if shape.left is not None else float("inf")
160
+ return (top, left)
161
+
162
+
158
163
  # -----------------------------------------------------------------------------
159
164
  # Helper Function: Recursive Image Extraction
160
165
  # -----------------------------------------------------------------------------
@@ -283,7 +288,7 @@ def python_pptx(
283
288
 
284
289
  for slide_idx, slide in enumerate(presentation.slides):
285
290
  # Obtain a flat list of shapes (ungrouped) sorted by top then left.
286
- shapes = sorted(ungroup_shapes(slide.shapes), key=operator.attrgetter("top", "left"))
291
+ shapes = sorted(ungroup_shapes(slide.shapes), key=_safe_position)
287
292
 
288
293
  page_nearby_blocks = {
289
294
  "text": {"content": [], "bbox": []},
@@ -656,21 +661,43 @@ def get_bbox(
656
661
  shape_object: Optional[Slide] = None,
657
662
  text_depth: Optional[TextTypeEnum] = None,
658
663
  ):
659
- bbox = (-1, -1, -1, -1)
660
- if text_depth == TextTypeEnum.DOCUMENT:
661
- bbox = (-1, -1, -1, -1)
662
- elif text_depth == TextTypeEnum.PAGE:
663
- top = left = 0
664
- width = presentation_object.slide_width
665
- height = presentation_object.slide_height
666
- bbox = (top, left, top + height, left + width)
667
- elif shape_object:
668
- top = shape_object.top
669
- left = shape_object.left
670
- width = shape_object.width
671
- height = shape_object.height
672
- bbox = (top, left, top + height, left + width)
673
- return bbox
664
+ """
665
+ Safely computes bounding box for a slide, shape, or document.
666
+ Ensures that missing or None values are gracefully handled.
667
+
668
+ Returns
669
+ -------
670
+ Tuple[int, int, int, int]
671
+ Bounding box as (top, left, bottom, right).
672
+ Defaults to (-1, -1, -1, -1) if invalid or unsupported.
673
+ """
674
+ try:
675
+ if text_depth == TextTypeEnum.DOCUMENT:
676
+ return (-1, -1, -1, -1)
677
+
678
+ elif text_depth == TextTypeEnum.PAGE and presentation_object:
679
+ top = left = 0
680
+ width = presentation_object.slide_width
681
+ height = presentation_object.slide_height
682
+ return (top, left, top + height, left + width)
683
+
684
+ elif shape_object:
685
+ top = shape_object.top if shape_object.top is not None else -1
686
+ left = shape_object.left if shape_object.left is not None else -1
687
+ width = shape_object.width if shape_object.width is not None else -1
688
+ height = shape_object.height if shape_object.height is not None else -1
689
+
690
+ # If all are valid, return normally, else return placeholder
691
+ if -1 in [top, left, width, height]:
692
+ return (-1, -1, -1, -1)
693
+
694
+ return (top, left, top + height, left + width)
695
+
696
+ except Exception as e:
697
+ logger.warning(f"get_bbox: Failed to compute bbox due to {e}")
698
+ return (-1, -1, -1, -1)
699
+
700
+ return (-1, -1, -1, -1)
674
701
 
675
702
 
676
703
  def ungroup_shapes(shapes):
@@ -184,4 +184,4 @@ def extract_primitives_from_pptx_internal(
184
184
  else:
185
185
  extracted_df = pd.DataFrame({"document_type": [], "metadata": [], "uuid": []})
186
186
 
187
- return extracted_df
187
+ return extracted_df, {}
@@ -5,9 +5,9 @@
5
5
  from typing import Any, Dict, List, Optional, Tuple
6
6
 
7
7
  from nv_ingest_api.internal.primitives.nim import ModelInterface
8
+ import numpy as np
8
9
 
9
10
 
10
- # Assume ModelInterface is defined elsewhere in the project.
11
11
  class EmbeddingModelInterface(ModelInterface):
12
12
  """
13
13
  An interface for handling inference with an embedding model endpoint.
@@ -22,20 +22,13 @@ class EmbeddingModelInterface(ModelInterface):
22
22
 
23
23
  def prepare_data_for_inference(self, data: Dict[str, Any]) -> Dict[str, Any]:
24
24
  """
25
- Prepare input data for embedding inference. Ensures that a 'prompts' key is provided
26
- and that its value is a list.
27
-
28
- Raises
29
- ------
30
- KeyError
31
- If the 'prompts' key is missing.
25
+ Prepare input data for embedding inference. Returns a list of strings representing the text to be embedded.
32
26
  """
33
27
  if "prompts" not in data:
34
28
  raise KeyError("Input data must include 'prompts'.")
35
- # Ensure the prompts are in list format.
36
29
  if not isinstance(data["prompts"], list):
37
30
  data["prompts"] = [data["prompts"]]
38
- return data
31
+ return {"prompts": data["prompts"]}
39
32
 
40
33
  def format_input(
41
34
  self, data: Dict[str, Any], protocol: str, max_batch_size: int, **kwargs
@@ -63,29 +56,32 @@ class EmbeddingModelInterface(ModelInterface):
63
56
  - payloads is a list of JSON-serializable payload dictionaries.
64
57
  - batch_data_list is a list of dictionaries containing the key "prompts" corresponding to each batch.
65
58
  """
66
- if protocol != "http":
67
- raise ValueError("EmbeddingModelInterface only supports HTTP protocol.")
68
-
69
- prompts = data.get("prompts", [])
70
59
 
71
60
  def chunk_list(lst, chunk_size):
61
+ lst = lst["prompts"]
72
62
  return [lst[i : i + chunk_size] for i in range(0, len(lst), chunk_size)]
73
63
 
74
- batches = chunk_list(prompts, max_batch_size)
75
- payloads = []
76
- batch_data_list = []
77
- for batch in batches:
78
- payload = {
79
- "model": kwargs.get("model_name"),
80
- "input": batch,
81
- "encoding_format": kwargs.get("encoding_format", "float"),
82
- "extra_body": {
83
- "input_type": kwargs.get("input_type", "query"),
64
+ batches = chunk_list(data, max_batch_size)
65
+ if protocol == "http":
66
+ payloads = []
67
+ batch_data_list = []
68
+ for batch in batches:
69
+ payload = {
70
+ "model": kwargs.get("model_name"),
71
+ "input": batch,
72
+ "encoding_format": kwargs.get("encoding_format", "float"),
73
+ "input_type": kwargs.get("input_type", "passage"),
84
74
  "truncate": kwargs.get("truncate", "NONE"),
85
- },
86
- }
87
- payloads.append(payload)
88
- batch_data_list.append({"prompts": batch})
75
+ }
76
+ payloads.append(payload)
77
+ batch_data_list.append({"prompts": batch})
78
+ elif protocol == "grpc":
79
+ payloads = []
80
+ batch_data_list = []
81
+ for batch in batches:
82
+ text_np = np.array([[text.encode("utf-8")] for text in batch], dtype=np.object_)
83
+ payloads.append(text_np)
84
+ batch_data_list.append({"prompts": batch})
89
85
  return payloads, batch_data_list
90
86
 
91
87
  def parse_output(self, response: Any, protocol: str, data: Optional[Dict[str, Any]] = None, **kwargs) -> Any:
@@ -108,16 +104,17 @@ class EmbeddingModelInterface(ModelInterface):
108
104
  list
109
105
  A list of generated embeddings extracted from the response.
110
106
  """
111
- if protocol != "http":
112
- raise ValueError("EmbeddingModelInterface only supports HTTP protocol.")
113
- if isinstance(response, dict):
114
- embeddings = response.get("data")
115
- if not embeddings:
116
- raise RuntimeError("Unexpected response format: 'data' key is missing or empty.")
117
- # Each item in embeddings is expected to have an 'embedding' field.
118
- return [item.get("embedding", None) for item in embeddings]
119
- else:
120
- return [str(response)]
107
+ if protocol == "http":
108
+ if isinstance(response, dict):
109
+ embeddings = response.get("data")
110
+ if not embeddings:
111
+ raise RuntimeError("Unexpected response format: 'data' key is missing or empty.")
112
+ # Each item in embeddings is expected to have an 'embedding' field.
113
+ return [item.get("embedding", None) for item in embeddings]
114
+ else:
115
+ return [str(response)]
116
+ elif protocol == "grpc":
117
+ return [res.flatten() for res in response]
121
118
 
122
119
  def process_inference_results(self, output: Any, protocol: str, **kwargs) -> Any:
123
120
  """
@@ -709,7 +709,13 @@ def postprocess_results(
709
709
  raise ValueError(f"Error in postprocessing {result.shape} and {original_image_shape}: {e}")
710
710
 
711
711
  for box, score, label in zip(bboxes, scores, labels):
712
- class_name = class_labels[int(label)]
712
+ # TODO(Devin): Sometimes we get back unexpected class labels?
713
+ if (label < 0) or (label >= len(class_labels)):
714
+ logger.warning(f"Invalid class label {label} found in postprocessing")
715
+ continue
716
+ else:
717
+ class_name = class_labels[int(label)]
718
+
713
719
  annotation_dict[class_name].append([round(float(x), 4) for x in np.concatenate((box, [score]))])
714
720
 
715
721
  out.append(annotation_dict)
@@ -129,7 +129,7 @@ class NimClient:
129
129
  """
130
130
  if self.protocol == "grpc":
131
131
  logger.debug("Performing gRPC inference for a batch...")
132
- response = self._grpc_infer(batch_input, model_name)
132
+ response = self._grpc_infer(batch_input, model_name, **kwargs)
133
133
  logger.debug("gRPC inference received response for a batch")
134
134
  elif self.protocol == "http":
135
135
  logger.debug("Performing HTTP inference for a batch...")
@@ -221,7 +221,7 @@ class NimClient:
221
221
 
222
222
  return all_results
223
223
 
224
- def _grpc_infer(self, formatted_input: np.ndarray, model_name: str) -> np.ndarray:
224
+ def _grpc_infer(self, formatted_input: np.ndarray, model_name: str, **kwargs) -> np.ndarray:
225
225
  """
226
226
  Perform inference using the gRPC protocol.
227
227
 
@@ -238,16 +238,24 @@ class NimClient:
238
238
  The output of the model as a numpy array.
239
239
  """
240
240
 
241
- input_tensors = [grpcclient.InferInput("input", formatted_input.shape, datatype="FP32")]
242
- input_tensors[0].set_data_from_numpy(formatted_input)
241
+ parameters = kwargs.get("parameters", {})
242
+ output_names = kwargs.get("outputs", ["output"])
243
+ dtype = kwargs.get("dtype", "FP32")
244
+ input_name = kwargs.get("input_name", "input")
243
245
 
244
- outputs = [grpcclient.InferRequestedOutput("output")]
245
- response = self.client.infer(model_name=model_name, inputs=input_tensors, outputs=outputs)
246
- logger.debug(f"gRPC inference response: {response}")
246
+ input_tensors = grpcclient.InferInput(input_name, formatted_input.shape, datatype=dtype)
247
+ input_tensors.set_data_from_numpy(formatted_input)
247
248
 
248
- # TODO(self.client.has_error(response)) => raise error
249
+ outputs = [grpcclient.InferRequestedOutput(output_name) for output_name in output_names]
250
+ response = self.client.infer(
251
+ model_name=model_name, parameters=parameters, inputs=[input_tensors], outputs=outputs
252
+ )
253
+ logger.debug(f"gRPC inference response: {response}")
249
254
 
250
- return response.as_numpy("output")
255
+ if len(outputs) == 1:
256
+ return response.as_numpy(outputs[0].name())
257
+ else:
258
+ return [response.as_numpy(output.name()) for output in outputs]
251
259
 
252
260
  def _http_infer(self, formatted_input: dict) -> dict:
253
261
  """
@@ -31,13 +31,15 @@ def traceable(trace_name=None):
31
31
 
32
32
  Notes
33
33
  -----
34
- The decorated function must accept a IngestControlMessage object as its first argument. The
35
- IngestControlMessage object must implement `has_metadata`, `get_metadata`, and `set_metadata`
36
- methods used by the decorator to check for the trace tagging flag and to add trace metadata.
34
+ The decorated function must accept a IngestControlMessage object as one of its arguments.
35
+ For a regular function, this is expected to be the first argument; for a class method,
36
+ this is expected to be the second argument (after 'self'). The IngestControlMessage object
37
+ must implement `has_metadata`, `get_metadata`, and `set_metadata` methods used by the decorator
38
+ to check for the trace tagging flag and to add trace metadata.
37
39
 
38
40
  The trace metadata added by the decorator includes two entries:
39
- - 'trace::entry::<trace_name>': The monotonic timestamp marking the function's entry.
40
- - 'trace::exit::<trace_name>': The monotonic timestamp marking the function's exit.
41
+ - 'trace::entry::<trace_name>': The timestamp marking the function's entry.
42
+ - 'trace::exit::<trace_name>': The timestamp marking the function's exit.
41
43
 
42
44
  Example
43
45
  -------
@@ -47,23 +49,25 @@ def traceable(trace_name=None):
47
49
  ... def process_message(message):
48
50
  ... pass
49
51
 
50
- Applying the decorator with a custom trace name:
51
-
52
- >>> @traceable(custom_trace_name="CustomTraceName")
53
- ... def process_message(message):
54
- ... pass
55
-
56
- In both examples, `process_message` will have entry and exit timestamps added to the
57
- IngestControlMessage's metadata if 'config::add_trace_tagging' is True.
52
+ Applying the decorator with a custom trace name on a class method:
58
53
 
54
+ >>> class Processor:
55
+ ... @traceable(trace_name="CustomTrace")
56
+ ... def process(self, message):
57
+ ... pass
59
58
  """
60
59
 
61
60
  def decorator_trace_tagging(func):
62
61
  @functools.wraps(func)
63
62
  def wrapper_trace_tagging(*args, **kwargs):
64
- # Assuming the first argument is always the message
65
63
  ts_fetched = datetime.now()
66
- message = args[0]
64
+ # Determine which argument is the message.
65
+ if hasattr(args[0], "has_metadata"):
66
+ message = args[0]
67
+ elif len(args) > 1 and hasattr(args[1], "has_metadata"):
68
+ message = args[1]
69
+ else:
70
+ raise ValueError("traceable decorator could not find a message argument with 'has_metadata()'")
67
71
 
68
72
  do_trace_tagging = (message.has_metadata("config::add_trace_tagging") is True) and (
69
73
  message.get_metadata("config::add_trace_tagging") is True
@@ -79,7 +83,7 @@ def traceable(trace_name=None):
79
83
  message.set_timestamp(f"trace::entry::{trace_prefix}_channel_in", ts_send)
80
84
  message.set_timestamp(f"trace::exit::{trace_prefix}_channel_in", ts_fetched)
81
85
 
82
- # Call the decorated function
86
+ # Call the decorated function.
83
87
  result = func(*args, **kwargs)
84
88
 
85
89
  if do_trace_tagging:
@@ -131,7 +131,7 @@ class NemoRetrieverParseConfigSchema(BaseModel):
131
131
  nemoretriever_parse_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
132
132
  nemoretriever_parse_infer_protocol: str = ""
133
133
 
134
- model_name: str = "nvidia/nemoretriever-parse"
134
+ nemoretriever_parse_model_name: str = "nvidia/nemoretriever-parse"
135
135
 
136
136
  timeout: float = 300.0
137
137
 
@@ -76,7 +76,7 @@ class IngestTaskCaptionSchema(BaseModelNoExt):
76
76
  api_key: Optional[str] = None
77
77
  endpoint_url: Optional[str] = None
78
78
  prompt: Optional[str] = None
79
- model_name: Optional[str] = None
79
+ caption_model_name: Optional[str] = None
80
80
 
81
81
 
82
82
  class IngestTaskFilterParamsSchema(BaseModelNoExt):
@@ -104,7 +104,7 @@ class IngestTaskDedupSchema(BaseModelNoExt):
104
104
 
105
105
  class IngestTaskEmbedSchema(BaseModelNoExt):
106
106
  endpoint_url: Optional[str] = None
107
- model_name: Optional[str] = None
107
+ embedding_model_name: Optional[str] = None
108
108
  api_key: Optional[str] = None
109
109
  filter_errors: bool = False
110
110
 
@@ -10,6 +10,6 @@ class ImageCaptionExtractionSchema(BaseModel):
10
10
  api_key: str = "api_key"
11
11
  endpoint_url: str = "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-11b-vision-instruct/chat/completions"
12
12
  prompt: str = "Caption the content of this image:"
13
- model_name: str = "meta/llama-3.2-11b-vision-instruct"
13
+ image_caption_model_name: str = "meta/llama-3.2-11b-vision-instruct"
14
14
  raise_on_failure: bool = False
15
15
  model_config = ConfigDict(extra="forbid")
@@ -173,7 +173,7 @@ def transform_image_create_vlm_caption_internal(
173
173
  api_key: str = task_config.get("api_key") or transform_config.api_key
174
174
  prompt: str = task_config.get("prompt") or transform_config.prompt
175
175
  endpoint_url: str = task_config.get("endpoint_url") or transform_config.endpoint_url
176
- model_name: str = task_config.get("model_name") or transform_config.model_name
176
+ model_name: str = task_config.get("image_caption_model_name") or transform_config.image_caption_model_name
177
177
 
178
178
  # Create a mask for rows where the content type is "image".
179
179
  df_mask: pd.Series = df_transform_ledger["metadata"].apply(