nv-ingest-api 2025.10.28.dev20251028__tar.gz → 2025.11.8.dev20251108__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. {nv_ingest_api-2025.10.28.dev20251028/src/nv_ingest_api.egg-info → nv_ingest_api-2025.11.8.dev20251108}/PKG-INFO +1 -1
  2. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +124 -14
  3. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +38 -0
  4. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +1 -1
  5. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +1 -1
  6. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +1 -0
  7. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/transform/embed_text.py +9 -0
  8. nv_ingest_api-2025.11.8.dev20251108/src/nv_ingest_api/util/message_brokers/qos_scheduler.py +283 -0
  9. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +1 -0
  10. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +8 -2
  11. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/nim/__init__.py +7 -1
  12. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +160 -0
  13. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108/src/nv_ingest_api.egg-info}/PKG-INFO +1 -1
  14. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api.egg-info/SOURCES.txt +1 -0
  15. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/LICENSE +0 -0
  16. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/MANIFEST.in +0 -0
  17. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/README.md +0 -0
  18. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/pyproject.toml +0 -0
  19. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/setup.cfg +0 -0
  20. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/__init__.py +0 -0
  21. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/interface/__init__.py +0 -0
  22. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/interface/extract.py +0 -0
  23. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/interface/mutate.py +0 -0
  24. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/interface/store.py +0 -0
  25. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/interface/transform.py +0 -0
  26. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/interface/utility.py +0 -0
  27. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/__init__.py +0 -0
  28. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
  29. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/enums/common.py +0 -0
  30. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
  31. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
  32. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -0
  33. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
  34. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -0
  35. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
  36. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
  37. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
  38. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
  39. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/html/__init__.py +0 -0
  40. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/html/html_extractor.py +0 -0
  41. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
  42. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
  43. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/image/image_extractor.py +0 -0
  44. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
  45. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
  46. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
  47. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
  48. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
  49. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
  50. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
  51. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
  52. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +0 -0
  53. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -0
  54. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -0
  55. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
  56. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
  57. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
  58. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
  59. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
  60. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -0
  61. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -0
  62. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/meta/__init__.py +0 -0
  63. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/meta/udf.py +0 -0
  64. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
  65. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
  66. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
  67. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
  68. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
  69. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
  70. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
  71. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
  72. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
  73. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
  74. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
  75. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
  76. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
  77. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -0
  78. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py +0 -0
  79. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -0
  80. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -0
  81. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
  82. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +0 -0
  83. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
  84. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
  85. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
  86. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
  87. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +0 -0
  88. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
  89. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
  90. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +0 -0
  91. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +0 -0
  92. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -0
  93. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_html_schema.py +0 -0
  94. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -0
  95. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +0 -0
  96. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +0 -0
  97. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -0
  98. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +0 -0
  99. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
  100. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
  101. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
  102. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
  103. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
  104. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
  105. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/meta/udf.py +0 -0
  106. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/mixins.py +0 -0
  107. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
  108. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
  109. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
  110. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
  111. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
  112. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
  113. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
  114. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
  115. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/store/__init__.py +0 -0
  116. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
  117. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
  118. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
  119. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/transform/caption_image.py +0 -0
  120. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/transform/split_text.py +0 -0
  121. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/__init__.py +0 -0
  122. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
  123. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/control_message/validators.py +0 -0
  124. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/converters/__init__.py +0 -0
  125. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
  126. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/converters/containers.py +0 -0
  127. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/converters/datetools.py +0 -0
  128. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/converters/dftools.py +0 -0
  129. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/converters/formats.py +0 -0
  130. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
  131. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/dataloader/__init__.py +0 -0
  132. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/dataloader/dataloader.py +0 -0
  133. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
  134. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/detectors/language.py +0 -0
  135. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
  136. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/exception_handlers/converters.py +0 -0
  137. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -0
  138. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
  139. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
  140. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
  141. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
  142. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
  143. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
  144. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
  145. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/image_processing/transforms.py +0 -0
  146. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/imports/__init__.py +0 -0
  147. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/imports/callable_signatures.py +0 -0
  148. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/imports/dynamic_resolvers.py +0 -0
  149. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/introspection/__init__.py +0 -0
  150. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/introspection/class_inspect.py +0 -0
  151. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/introspection/function_inspect.py +0 -0
  152. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/logging/__init__.py +0 -0
  153. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/logging/configuration.py +0 -0
  154. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/logging/sanitize.py +0 -0
  155. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/message_brokers/__init__.py +0 -0
  156. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
  157. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
  158. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
  159. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
  160. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
  161. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
  162. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
  163. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/pdf/pdfium.py +0 -0
  164. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/schema/__init__.py +0 -0
  165. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
  166. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/service_clients/__init__.py +0 -0
  167. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
  168. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
  169. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
  170. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
  171. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +0 -0
  172. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
  173. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/string_processing/configuration.py +0 -0
  174. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/string_processing/yaml.py +0 -0
  175. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/system/__init__.py +0 -0
  176. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/system/hardware_info.py +0 -0
  177. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
  178. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api.egg-info/requires.txt +0 -0
  179. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
  180. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/udfs/__init__.py +0 -0
  181. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/udfs/llm_summarizer_udf.py +0 -0
  182. {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.10.28.dev20251028
3
+ Version: 2025.11.8.dev20251108
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -5,6 +5,7 @@
5
5
  import hashlib
6
6
  import json
7
7
  import logging
8
+ import re
8
9
  import threading
9
10
  import time
10
11
  import queue
@@ -24,6 +25,12 @@ from nv_ingest_api.util.string_processing import generate_url
24
25
 
25
26
  logger = logging.getLogger(__name__)
26
27
 
28
+ # Regex pattern to detect CUDA-related errors in Triton gRPC responses
29
+ CUDA_ERROR_REGEX = re.compile(
30
+ r"(model reload|illegal memory access|illegal instruction|invalid argument|failed to (copy|load|perform) .*: .*|TritonModelException: failed to copy data: .*)", # noqa: E501
31
+ re.IGNORECASE,
32
+ )
33
+
27
34
  # A simple structure to hold a request's data and its Future for the result
28
35
  InferenceRequest = namedtuple("InferenceRequest", ["data", "future", "model_name", "dims", "kwargs"])
29
36
 
@@ -40,7 +47,7 @@ class NimClient:
40
47
  endpoints: Tuple[str, str],
41
48
  auth_token: Optional[str] = None,
42
49
  timeout: float = 120.0,
43
- max_retries: int = 5,
50
+ max_retries: int = 10,
44
51
  max_429_retries: int = 5,
45
52
  enable_dynamic_batching: bool = False,
46
53
  dynamic_batch_timeout: float = 0.1, # 100 milliseconds
@@ -60,11 +67,11 @@ class NimClient:
60
67
  auth_token : str, optional
61
68
  Authorization token for HTTP requests (default: None).
62
69
  timeout : float, optional
63
- Timeout for HTTP requests in seconds (default: 30.0).
70
+ Timeout for HTTP requests in seconds (default: 120.0).
64
71
  max_retries : int, optional
65
- The maximum number of retries for non-429 server-side errors (default: 5).
72
+ The maximum number of retries for non-429 server-side errors (default: 10).
66
73
  max_429_retries : int, optional
67
- The maximum number of retries specifically for 429 errors (default: 10).
74
+ The maximum number of retries specifically for 429 errors (default: 5).
68
75
 
69
76
  Raises
70
77
  ------
@@ -323,7 +330,7 @@ class NimClient:
323
330
 
324
331
  outputs = [grpcclient.InferRequestedOutput(output_name) for output_name in output_names]
325
332
 
326
- base_delay = 0.5
333
+ base_delay = 2.0
327
334
  attempt = 0
328
335
  retries_429 = 0
329
336
  max_grpc_retries = self.max_429_retries
@@ -342,8 +349,58 @@ class NimClient:
342
349
  return [response.as_numpy(output.name()) for output in outputs]
343
350
 
344
351
  except grpcclient.InferenceServerException as e:
345
- status = e.status()
346
- if status == "StatusCode.UNAVAILABLE" and "Exceeds maximum queue size".lower() in e.message().lower():
352
+ status = str(e.status())
353
+ message = e.message()
354
+
355
+ # Handle CUDA memory errors
356
+ if status == "StatusCode.INTERNAL":
357
+ if CUDA_ERROR_REGEX.search(message):
358
+ logger.warning(
359
+ f"Received gRPC INTERNAL error with CUDA-related message for model '{model_name}'. "
360
+ f"Attempt {attempt + 1} of {self.max_retries}. Message (truncated): {message[:500]}"
361
+ )
362
+ if attempt >= self.max_retries - 1:
363
+ logger.error(f"Max retries exceeded for CUDA errors on model '{model_name}'.")
364
+ raise e
365
+ # Try to reload models before retrying
366
+ model_reload_succeeded = reload_models(client=self.client, client_timeout=self.timeout)
367
+ if not model_reload_succeeded:
368
+ logger.error(f"Failed to reload models for model '{model_name}'.")
369
+ else:
370
+ logger.warning(
371
+ f"Received gRPC INTERNAL error for model '{model_name}'. "
372
+ f"Attempt {attempt + 1} of {self.max_retries}. Message (truncated): {message[:500]}"
373
+ )
374
+ if attempt >= self.max_retries - 1:
375
+ logger.error(f"Max retries exceeded for INTERNAL error on model '{model_name}'.")
376
+ raise e
377
+
378
+ # Common retry logic for both CUDA and non-CUDA INTERNAL errors
379
+ backoff_time = base_delay * (2**attempt)
380
+ time.sleep(backoff_time)
381
+ attempt += 1
382
+ continue
383
+
384
+ # Handle errors that can occur after model reload (NOT_FOUND, model not loaded)
385
+ if status == "StatusCode.NOT_FOUND":
386
+ logger.warning(
387
+ f"Received gRPC {status} error for model '{model_name}'. "
388
+ f"Attempt {attempt + 1} of {self.max_retries}. Message: {message[:500]}"
389
+ )
390
+ if attempt >= self.max_retries - 1:
391
+ logger.error(f"Max retries exceeded for model not found errors on model '{model_name}'.")
392
+ raise e
393
+
394
+ # Retry with exponential backoff WITHOUT reloading
395
+ backoff_time = base_delay * (2**attempt)
396
+ logger.info(
397
+ f"Retrying after {backoff_time}s backoff for model not found error on model '{model_name}'."
398
+ )
399
+ time.sleep(backoff_time)
400
+ attempt += 1
401
+ continue
402
+
403
+ if status == "StatusCode.UNAVAILABLE" and "Exceeds maximum queue size".lower() in message.lower():
347
404
  retries_429 += 1
348
405
  logger.warning(
349
406
  f"Received gRPC {status} for model '{model_name}'. "
@@ -357,13 +414,12 @@ class NimClient:
357
414
  time.sleep(backoff_time)
358
415
  continue
359
416
 
360
- else:
361
- # For other server-side errors (e.g., INVALID_ARGUMENT, NOT_FOUND),
362
- # retrying will not help. We should fail fast.
363
- logger.error(
364
- f"Received non-retryable gRPC error from Triton for model '{model_name}': {e.message()}"
365
- )
366
- raise
417
+ # For other server-side errors (e.g., INVALID_ARGUMENT, etc.),
418
+ # fail fast as retrying will not help
419
+ logger.error(
420
+ f"Received non-retryable gRPC error {status} from Triton for model '{model_name}': {message}"
421
+ )
422
+ raise
367
423
 
368
424
  except Exception as e:
369
425
  # Catch any other unexpected exceptions (e.g., network issues not caught by Triton client)
@@ -681,3 +737,57 @@ class NimClientManager:
681
737
  def get_nim_client_manager(*args, **kwargs) -> NimClientManager:
682
738
  """Returns the singleton instance of the NimClientManager."""
683
739
  return NimClientManager(*args, **kwargs)
740
+
741
+
742
+ def reload_models(client: grpcclient.InferenceServerClient, exclude: list[str] = [], client_timeout: int = 120) -> bool:
743
+ """
744
+ Reloads all models in the Triton server except for the models in the exclude list.
745
+
746
+ Parameters
747
+ ----------
748
+ client : grpcclient.InferenceServerClient
749
+ The gRPC client connected to the Triton server.
750
+ exclude : list[str], optional
751
+ A list of model names to exclude from reloading.
752
+ client_timeout : int, optional
753
+ Timeout for client operations in seconds (default: 120).
754
+
755
+ Returns
756
+ -------
757
+ bool
758
+ True if all models were successfully reloaded, False otherwise.
759
+ """
760
+ model_index = client.get_model_repository_index()
761
+ exclude = set(exclude)
762
+ names = [m.name for m in model_index.models if m.name not in exclude]
763
+
764
+ logger.info(f"Reloading {len(names)} model(s): {', '.join(names) if names else '(none)'}")
765
+
766
+ # 1) Unload
767
+ for name in names:
768
+ try:
769
+ client.unload_model(name)
770
+ except grpcclient.InferenceServerException as e:
771
+ msg = e.message()
772
+ if "explicit model load / unload" in msg.lower():
773
+ status = e.status()
774
+ logger.warning(
775
+ f"[SKIP Model Reload] Explicit model control disabled; cannot unload '{name}'. Status: {status}."
776
+ )
777
+ return False
778
+ logger.error(f"[ERROR] Failed to unload '{name}': {msg}")
779
+ return False
780
+
781
+ # 2) Load
782
+ for name in names:
783
+ client.load_model(name)
784
+
785
+ # 3) Readiness check
786
+ for name in names:
787
+ ready = client.is_model_ready(model_name=name, client_timeout=client_timeout)
788
+ if not ready:
789
+ logger.warning(f"[Warning] Triton Not ready: {name}")
790
+ return False
791
+
792
+ logger.info("✅ Reload of models complete.")
793
+ return True
@@ -43,6 +43,24 @@ class PdfConfigSchema(BaseModelNoExt):
43
43
  split_page_count: Annotated[int, Field(ge=1)] = 32
44
44
 
45
45
 
46
+ class RoutingOptionsSchema(BaseModelNoExt):
47
+ # Queue routing hint for QoS scheduler
48
+ queue_hint: Optional[str] = None
49
+
50
+ @field_validator("queue_hint")
51
+ @classmethod
52
+ def validate_queue_hint(cls, v):
53
+ if v is None:
54
+ return v
55
+ if not isinstance(v, str):
56
+ raise ValueError("queue_hint must be a string")
57
+ s = v.lower()
58
+ allowed = {"default", "immediate", "micro", "small", "medium", "large"}
59
+ if s not in allowed:
60
+ raise ValueError("queue_hint must be one of: default, immediate, micro, small, medium, large")
61
+ return s
62
+
63
+
46
64
  # Ingest Task Schemas
47
65
 
48
66
 
@@ -128,6 +146,7 @@ class IngestTaskEmbedSchema(BaseModelNoExt):
128
146
  audio_elements_modality: Optional[str] = None
129
147
  custom_content_field: Optional[str] = None
130
148
  result_target_field: Optional[str] = None
149
+ dimensions: Optional[int] = None
131
150
 
132
151
 
133
152
  class IngestTaskVdbUploadSchema(BaseModelNoExt):
@@ -283,8 +302,27 @@ class IngestJobSchema(BaseModelNoExt):
283
302
  job_id: Union[str, int]
284
303
  tasks: List[IngestTaskSchema]
285
304
  tracing_options: Optional[TracingOptionsSchema] = None
305
+ routing_options: Optional[RoutingOptionsSchema] = None
286
306
  pdf_config: Optional[PdfConfigSchema] = None
287
307
 
308
+ @model_validator(mode="before")
309
+ @classmethod
310
+ def migrate_queue_hint(cls, values):
311
+ """
312
+ Backward-compatibility shim: if a legacy client sends
313
+ tracing_options.queue_hint, move it into routing_options.queue_hint.
314
+ """
315
+ try:
316
+ topt = values.get("tracing_options") or {}
317
+ ropt = values.get("routing_options") or {}
318
+ if isinstance(topt, dict) and "queue_hint" in topt and "queue_hint" not in ropt:
319
+ ropt["queue_hint"] = topt.pop("queue_hint")
320
+ values["routing_options"] = ropt
321
+ values["tracing_options"] = topt
322
+ except Exception:
323
+ pass
324
+ return values
325
+
288
326
 
289
327
  # ------------------------------------------------------------------------------
290
328
  # Utility Functions
@@ -244,7 +244,7 @@ class TableMetadataSchema(BaseModelNoExt):
244
244
 
245
245
  class ChartMetadataSchema(BaseModelNoExt):
246
246
  """
247
- The schema for extracted chart content.
247
+ The schema for table content extracted from charts.
248
248
  """
249
249
 
250
250
  caption: str = ""
@@ -10,7 +10,7 @@ class ImageCaptionExtractionSchema(BaseModel):
10
10
  api_key: str = Field(default="", repr=False)
11
11
  endpoint_url: str = "https://integrate.api.nvidia.com/v1/chat/completions"
12
12
  prompt: str = "Caption the content of this image:"
13
- model_name: str = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
13
+ model_name: str = "nvidia/nemotron-nano-12b-v2-vl"
14
14
  raise_on_failure: bool = False
15
15
  model_config = ConfigDict(extra="forbid")
16
16
 
@@ -30,6 +30,7 @@ class TextEmbeddingSchema(BaseModel):
30
30
  audio_elements_modality: str = Field(default="text")
31
31
  custom_content_field: Optional[str] = None
32
32
  result_target_field: Optional[str] = None
33
+ dimensions: Optional[int] = None
33
34
 
34
35
  model_config = ConfigDict(extra="forbid")
35
36
 
@@ -40,6 +40,7 @@ def _make_async_request(
40
40
  truncate: str,
41
41
  filter_errors: bool,
42
42
  modalities: Optional[List[str]] = None,
43
+ dimensions: Optional[int] = None,
43
44
  ) -> list:
44
45
  """
45
46
  Interacts directly with the NIM embedding service to calculate embeddings for a batch of prompts.
@@ -96,6 +97,7 @@ def _make_async_request(
96
97
  model=embedding_model,
97
98
  encoding_format=encoding_format,
98
99
  extra_body=extra_body,
100
+ dimensions=dimensions,
99
101
  )
100
102
 
101
103
  response["embedding"] = resp.data
@@ -124,6 +126,7 @@ def _async_request_handler(
124
126
  truncate: str,
125
127
  filter_errors: bool,
126
128
  modalities: Optional[List[str]] = None,
129
+ dimensions: Optional[int] = None,
127
130
  ) -> List[dict]:
128
131
  """
129
132
  Gathers calculated embedding results from the NIM embedding service concurrently.
@@ -168,6 +171,7 @@ def _async_request_handler(
168
171
  truncate=truncate,
169
172
  filter_errors=filter_errors,
170
173
  modalities=modality_batch,
174
+ dimensions=dimensions,
171
175
  )
172
176
  for prompt_batch, modality_batch in zip(prompts, modalities)
173
177
  ]
@@ -186,6 +190,7 @@ def _async_runner(
186
190
  truncate: str,
187
191
  filter_errors: bool,
188
192
  modalities: Optional[List[str]] = None,
193
+ dimensions: Optional[int] = None,
189
194
  ) -> dict:
190
195
  """
191
196
  Concurrently launches all NIM embedding requests and flattens the results.
@@ -224,6 +229,7 @@ def _async_runner(
224
229
  truncate,
225
230
  filter_errors,
226
231
  modalities=modalities,
232
+ dimensions=dimensions,
227
233
  )
228
234
 
229
235
  flat_results = {"embeddings": [], "info_msgs": []}
@@ -562,6 +568,7 @@ def transform_create_text_embeddings_internal(
562
568
  endpoint_url = task_config.get("endpoint_url") or transform_config.embedding_nim_endpoint
563
569
  model_name = task_config.get("model_name") or transform_config.embedding_model
564
570
  custom_content_field = task_config.get("custom_content_field") or transform_config.custom_content_field
571
+ dimensions = task_config.get("dimensions") or transform_config.dimensions
565
572
 
566
573
  if execution_trace_log is None:
567
574
  execution_trace_log = {}
@@ -636,6 +643,7 @@ def transform_create_text_embeddings_internal(
636
643
  transform_config.truncate,
637
644
  False,
638
645
  modalities=modality_batches,
646
+ dimensions=dimensions,
639
647
  )
640
648
  # Build a simple row index -> embedding map
641
649
  embeddings_dict = dict(
@@ -680,6 +688,7 @@ def transform_create_text_embeddings_internal(
680
688
  transform_config.input_type,
681
689
  transform_config.truncate,
682
690
  False,
691
+ dimensions=dimensions,
683
692
  )
684
693
  custom_embeddings_dict = dict(
685
694
  zip(
@@ -0,0 +1,283 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Dict, Optional
8
+ import logging
9
+ import time
10
+ import random
11
+
12
+
13
+ class _SchedulingStrategy:
14
+ """
15
+ Base scheduling strategy interface. Implementations must provide a non-blocking
16
+ single-sweep attempt over non-immediate queues and return a job or None.
17
+ """
18
+
19
+ def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
20
+ raise NotImplementedError
21
+
22
+
23
+ class _LotteryStrategy(_SchedulingStrategy):
24
+ """
25
+ Lottery scheduling with fixed weights.
26
+ Weights: micro=4, small=2, large=1, medium=1, default=1
27
+ """
28
+
29
+ def __init__(self, prioritize_immediate: bool = True) -> None:
30
+ self._weights: Dict[str, int] = {
31
+ "micro": 4,
32
+ "small": 2,
33
+ "large": 1,
34
+ "medium": 1,
35
+ "default": 1,
36
+ }
37
+ self._prioritize_immediate: bool = bool(prioritize_immediate)
38
+
39
+ def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
40
+ # Immediate-first if enabled (non-blocking)
41
+ if self._prioritize_immediate:
42
+ try:
43
+ job = client.fetch_message(queues["immediate"], 0)
44
+ if job is not None:
45
+ return job
46
+ except TimeoutError:
47
+ pass
48
+ candidates = list(order)
49
+ weights = [self._weights[q] for q in candidates]
50
+ while candidates:
51
+ try:
52
+ chosen = random.choices(candidates, weights=weights, k=1)[0]
53
+ job = client.fetch_message(queues[chosen], 0)
54
+ if job is not None:
55
+ return job
56
+ except TimeoutError:
57
+ pass
58
+ finally:
59
+ idx = candidates.index(chosen)
60
+ del candidates[idx]
61
+ del weights[idx]
62
+ return None
63
+
64
+
65
+ class _SimpleStrategy(_SchedulingStrategy):
66
+ """
67
+ Simple strategy placeholder. Actual simple-mode handling is done in QosScheduler.fetch_next
68
+ to directly fetch from the base 'default' queue using the provided timeout.
69
+ """
70
+
71
+ def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
72
+ # Block up to 30s on the base/default queue and return first available job
73
+ try:
74
+ return client.fetch_message(queues["default"], 30.0)
75
+ except TimeoutError:
76
+ return None
77
+
78
+
79
+ class _RoundRobinStrategy(_SchedulingStrategy):
80
+ """
81
+ Simple round-robin over non-immediate queues. Maintains rotation across calls.
82
+ """
83
+
84
+ def __init__(self, order: list[str], prioritize_immediate: bool = True) -> None:
85
+ self._order = list(order)
86
+ self._len = len(self._order)
87
+ self._idx = 0
88
+ self._prioritize_immediate: bool = bool(prioritize_immediate)
89
+
90
+ def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
91
+ # Immediate-first if enabled (non-blocking)
92
+ if self._prioritize_immediate:
93
+ try:
94
+ job = client.fetch_message(queues["immediate"], 0)
95
+ if job is not None:
96
+ return job
97
+ except TimeoutError:
98
+ pass
99
+ start_idx = self._idx
100
+ for step in range(self._len):
101
+ i = (start_idx + step) % self._len
102
+ qname = self._order[i]
103
+ try:
104
+ job = client.fetch_message(queues[qname], 0)
105
+ if job is not None:
106
+ # advance rotation to the position after the chosen one
107
+ self._idx = (i + 1) % self._len
108
+ return job
109
+ except TimeoutError:
110
+ continue
111
+ return None
112
+
113
+
114
+ class _WeightedRoundRobinStrategy(_SchedulingStrategy):
115
+ """
116
+ Smooth Weighted Round Robin (SWRR) using weights micro=4, small=2, large=1, medium=1, default=1.
117
+ Maintains current weights across calls.
118
+ """
119
+
120
+ def __init__(self, prioritize_immediate: bool = True) -> None:
121
+ self._weights: Dict[str, int] = {
122
+ "micro": 4,
123
+ "small": 2,
124
+ "large": 1,
125
+ "medium": 1,
126
+ "default": 1,
127
+ }
128
+ self._current: Dict[str, int] = {k: 0 for k in self._weights.keys()}
129
+ self._total: int = sum(self._weights.values())
130
+ self._prioritize_immediate: bool = bool(prioritize_immediate)
131
+
132
+ def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
133
+ # Immediate-first if enabled (non-blocking)
134
+ if self._prioritize_immediate:
135
+ try:
136
+ job = client.fetch_message(queues["immediate"], 0)
137
+ if job is not None:
138
+ return job
139
+ except TimeoutError:
140
+ pass
141
+ # Attempt up to len(order) selections per sweep, excluding queues that prove empty
142
+ active = list(order)
143
+ for _ in range(len(order)):
144
+ if not active:
145
+ break
146
+ for q in active:
147
+ self._current[q] += self._weights[q]
148
+ chosen = max(active, key=lambda q: self._current[q])
149
+ self._current[chosen] -= self._total
150
+ try:
151
+ job = client.fetch_message(queues[chosen], 0)
152
+ if job is not None:
153
+ return job
154
+ except TimeoutError:
155
+ job = None
156
+ # If no job available from chosen, exclude it for the remainder of this sweep
157
+ if job is None and chosen in active:
158
+ active.remove(chosen)
159
+ # Fallback: single non-blocking attempt for each queue in order
160
+ for q in order:
161
+ try:
162
+ job = client.fetch_message(queues[q], 0)
163
+ if job is not None:
164
+ return job
165
+ except TimeoutError:
166
+ continue
167
+ return None
168
+
169
+
170
+ class QosScheduler:
171
+ """
172
+ Simplified scheduler that fetches jobs from the default queue only.
173
+ Uses the provided timeout value when polling the broker.
174
+ """
175
+
176
+ def __init__(
177
+ self,
178
+ base_queue: str,
179
+ total_buffer_capacity: int = 1,
180
+ num_prefetch_threads: int = 0,
181
+ prefetch_poll_interval: float = 0.0,
182
+ prefetch_non_immediate: bool = False,
183
+ strategy: str = "lottery",
184
+ prioritize_immediate: bool = True,
185
+ ) -> None:
186
+ self.base_queue = base_queue
187
+
188
+ # Define all derived queues; default behavior still uses only "default"
189
+ self.queues: Dict[str, str] = {
190
+ "default": f"{base_queue}",
191
+ "immediate": f"{base_queue}_immediate",
192
+ "micro": f"{base_queue}_micro",
193
+ "small": f"{base_queue}_small",
194
+ "medium": f"{base_queue}_medium",
195
+ "large": f"{base_queue}_large",
196
+ }
197
+
198
+ # Priority order for multi-queue fetching; "immediate" always first
199
+ self._priority_order = [
200
+ "immediate",
201
+ "micro",
202
+ "small",
203
+ "medium",
204
+ "large",
205
+ "default",
206
+ ]
207
+
208
+ # Non-immediate queue order reference
209
+ self._non_immediate_order = ["micro", "small", "large", "medium", "default"]
210
+
211
+ # Logger
212
+ self._logger = logging.getLogger(__name__)
213
+
214
+ # No prefetching - just direct calls
215
+ self._total_buffer_capacity: int = int(total_buffer_capacity)
216
+ self._num_prefetch_threads: int = int(num_prefetch_threads)
217
+ self._prefetch_poll_interval: float = float(prefetch_poll_interval)
218
+ self._prefetch_non_immediate: bool = bool(prefetch_non_immediate)
219
+
220
+ # Strategy selection
221
+ self._simple_mode: bool = False
222
+ if strategy == "simple":
223
+ self._strategy_impl: _SchedulingStrategy = _SimpleStrategy()
224
+ self._simple_mode = True
225
+ elif strategy == "round_robin":
226
+ self._strategy_impl = _RoundRobinStrategy(self._non_immediate_order, prioritize_immediate)
227
+ elif strategy == "weighted_round_robin":
228
+ self._strategy_impl = _WeightedRoundRobinStrategy(prioritize_immediate)
229
+ else:
230
+ self._strategy_impl = _LotteryStrategy(prioritize_immediate)
231
+
232
+ # Context manager helpers for clean shutdown
233
+ def __enter__(self) -> "QosScheduler":
234
+ return self
235
+
236
+ def __exit__(self, exc_type, exc, tb) -> None:
237
+ self.close()
238
+
239
+ # ---------------------------- Public API ----------------------------
240
+ def close(self) -> None:
241
+ """
242
+ Cleanly close the scheduler. No-op for the current implementation
243
+ since we do not spin background threads.
244
+ """
245
+ return None
246
+
247
+ def fetch_next(self, client, timeout: float = 0.0) -> Optional[dict]:
248
+ """
249
+ Immediate-first, then strategy-based scheduling among non-immediate queues.
250
+
251
+ Behavior:
252
+ - Always check 'immediate' first (non-blocking). If present, return immediately.
253
+ - If not, select using the configured strategy (lottery, round_robin, weighted_round_robin).
254
+ - If no job is found in a full pass:
255
+ - If timeout <= 0: return None.
256
+ - Else: sleep in 0.5s increments and retry until accumulated elapsed time >= timeout.
257
+ """
258
+ # Simple mode: delegate to the strategy (blocks up to 30s on base queue)
259
+ if getattr(self, "_simple_mode", False):
260
+ return self._strategy_impl.try_once(client, self.queues, self._non_immediate_order)
261
+
262
+ start = time.monotonic()
263
+ while True:
264
+ # Strategy-based attempt (strategy may include immediate priority internally)
265
+ job = self._strategy_impl.try_once(client, self.queues, self._non_immediate_order)
266
+ if job is not None:
267
+ return job
268
+
269
+ # No job found in this sweep
270
+ if timeout <= 0:
271
+ return None
272
+
273
+ elapsed = time.monotonic() - start
274
+ if elapsed >= timeout:
275
+ return None
276
+
277
+ # Sleep up to 0.5s, but not beyond remaining timeout
278
+ remaining = timeout - elapsed
279
+ sleep_time = 0.5 if remaining > 0.5 else remaining
280
+ if sleep_time > 0:
281
+ time.sleep(sleep_time)
282
+ else:
283
+ return None
@@ -35,6 +35,7 @@ class SimpleClient(MessageBrokerClientBase):
35
35
  connection_timeout: int = 300,
36
36
  max_pool_size: int = 128,
37
37
  use_ssl: bool = False,
38
+ api_version: str = "v1",
38
39
  ):
39
40
  """
40
41
  Initialize the SimpleClient with configuration parameters.
@@ -5,8 +5,9 @@
5
5
 
6
6
  import logging
7
7
  import math
8
- import multiprocessing as mp
9
8
  import os
9
+ import sys
10
+ import multiprocessing as mp
10
11
  from threading import Lock
11
12
  from typing import Any, Callable, Optional
12
13
 
@@ -103,7 +104,12 @@ class ProcessWorkerPoolSingleton:
103
104
  The total number of worker processes to start.
104
105
  """
105
106
  self._total_workers = total_max_workers
106
- self._context: mp.context.ForkContext = mp.get_context("fork")
107
+
108
+ start_method = "fork"
109
+ if sys.platform.lower() == "darwin":
110
+ start_method = "spawn"
111
+ self._context: mp.context.ForkContext = mp.get_context(start_method)
112
+
107
113
  # Bounded task queue: maximum tasks queued = 2 * total_max_workers.
108
114
  self._task_queue: mp.Queue = self._context.Queue(maxsize=2 * total_max_workers)
109
115
  self._next_task_id: int = 0