nv-ingest-api 2025.10.28.dev20251028__tar.gz → 2025.11.8.dev20251108__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nv_ingest_api-2025.10.28.dev20251028/src/nv_ingest_api.egg-info → nv_ingest_api-2025.11.8.dev20251108}/PKG-INFO +1 -1
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +124 -14
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +38 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +1 -1
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +1 -1
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +1 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/transform/embed_text.py +9 -0
- nv_ingest_api-2025.11.8.dev20251108/src/nv_ingest_api/util/message_brokers/qos_scheduler.py +283 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +1 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +8 -2
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/nim/__init__.py +7 -1
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +160 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108/src/nv_ingest_api.egg-info}/PKG-INFO +1 -1
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api.egg-info/SOURCES.txt +1 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/LICENSE +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/MANIFEST.in +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/README.md +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/pyproject.toml +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/setup.cfg +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/interface/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/interface/extract.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/interface/mutate.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/interface/store.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/interface/transform.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/interface/utility.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/enums/common.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/html/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/html/html_extractor.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/image/image_extractor.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/meta/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/meta/udf.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_html_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/meta/udf.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/mixins.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/store/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/transform/caption_image.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/internal/transform/split_text.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/control_message/validators.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/converters/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/converters/containers.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/converters/datetools.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/converters/dftools.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/converters/formats.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/dataloader/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/dataloader/dataloader.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/detectors/language.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/exception_handlers/converters.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/image_processing/transforms.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/imports/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/imports/callable_signatures.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/imports/dynamic_resolvers.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/introspection/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/introspection/class_inspect.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/introspection/function_inspect.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/logging/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/logging/configuration.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/logging/sanitize.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/message_brokers/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/pdf/pdfium.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/schema/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/service_clients/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/string_processing/configuration.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/string_processing/yaml.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/system/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api/util/system/hardware_info.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api.egg-info/requires.txt +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/udfs/__init__.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/udfs/llm_summarizer_udf.py +0 -0
- {nv_ingest_api-2025.10.28.dev20251028 → nv_ingest_api-2025.11.8.dev20251108}/src/version.py +0 -0
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
import hashlib
|
|
6
6
|
import json
|
|
7
7
|
import logging
|
|
8
|
+
import re
|
|
8
9
|
import threading
|
|
9
10
|
import time
|
|
10
11
|
import queue
|
|
@@ -24,6 +25,12 @@ from nv_ingest_api.util.string_processing import generate_url
|
|
|
24
25
|
|
|
25
26
|
logger = logging.getLogger(__name__)
|
|
26
27
|
|
|
28
|
+
# Regex pattern to detect CUDA-related errors in Triton gRPC responses
|
|
29
|
+
CUDA_ERROR_REGEX = re.compile(
|
|
30
|
+
r"(model reload|illegal memory access|illegal instruction|invalid argument|failed to (copy|load|perform) .*: .*|TritonModelException: failed to copy data: .*)", # noqa: E501
|
|
31
|
+
re.IGNORECASE,
|
|
32
|
+
)
|
|
33
|
+
|
|
27
34
|
# A simple structure to hold a request's data and its Future for the result
|
|
28
35
|
InferenceRequest = namedtuple("InferenceRequest", ["data", "future", "model_name", "dims", "kwargs"])
|
|
29
36
|
|
|
@@ -40,7 +47,7 @@ class NimClient:
|
|
|
40
47
|
endpoints: Tuple[str, str],
|
|
41
48
|
auth_token: Optional[str] = None,
|
|
42
49
|
timeout: float = 120.0,
|
|
43
|
-
max_retries: int =
|
|
50
|
+
max_retries: int = 10,
|
|
44
51
|
max_429_retries: int = 5,
|
|
45
52
|
enable_dynamic_batching: bool = False,
|
|
46
53
|
dynamic_batch_timeout: float = 0.1, # 100 milliseconds
|
|
@@ -60,11 +67,11 @@ class NimClient:
|
|
|
60
67
|
auth_token : str, optional
|
|
61
68
|
Authorization token for HTTP requests (default: None).
|
|
62
69
|
timeout : float, optional
|
|
63
|
-
Timeout for HTTP requests in seconds (default:
|
|
70
|
+
Timeout for HTTP requests in seconds (default: 120.0).
|
|
64
71
|
max_retries : int, optional
|
|
65
|
-
The maximum number of retries for non-429 server-side errors (default:
|
|
72
|
+
The maximum number of retries for non-429 server-side errors (default: 10).
|
|
66
73
|
max_429_retries : int, optional
|
|
67
|
-
The maximum number of retries specifically for 429 errors (default:
|
|
74
|
+
The maximum number of retries specifically for 429 errors (default: 5).
|
|
68
75
|
|
|
69
76
|
Raises
|
|
70
77
|
------
|
|
@@ -323,7 +330,7 @@ class NimClient:
|
|
|
323
330
|
|
|
324
331
|
outputs = [grpcclient.InferRequestedOutput(output_name) for output_name in output_names]
|
|
325
332
|
|
|
326
|
-
base_delay = 0
|
|
333
|
+
base_delay = 2.0
|
|
327
334
|
attempt = 0
|
|
328
335
|
retries_429 = 0
|
|
329
336
|
max_grpc_retries = self.max_429_retries
|
|
@@ -342,8 +349,58 @@ class NimClient:
|
|
|
342
349
|
return [response.as_numpy(output.name()) for output in outputs]
|
|
343
350
|
|
|
344
351
|
except grpcclient.InferenceServerException as e:
|
|
345
|
-
status = e.status()
|
|
346
|
-
|
|
352
|
+
status = str(e.status())
|
|
353
|
+
message = e.message()
|
|
354
|
+
|
|
355
|
+
# Handle CUDA memory errors
|
|
356
|
+
if status == "StatusCode.INTERNAL":
|
|
357
|
+
if CUDA_ERROR_REGEX.search(message):
|
|
358
|
+
logger.warning(
|
|
359
|
+
f"Received gRPC INTERNAL error with CUDA-related message for model '{model_name}'. "
|
|
360
|
+
f"Attempt {attempt + 1} of {self.max_retries}. Message (truncated): {message[:500]}"
|
|
361
|
+
)
|
|
362
|
+
if attempt >= self.max_retries - 1:
|
|
363
|
+
logger.error(f"Max retries exceeded for CUDA errors on model '{model_name}'.")
|
|
364
|
+
raise e
|
|
365
|
+
# Try to reload models before retrying
|
|
366
|
+
model_reload_succeeded = reload_models(client=self.client, client_timeout=self.timeout)
|
|
367
|
+
if not model_reload_succeeded:
|
|
368
|
+
logger.error(f"Failed to reload models for model '{model_name}'.")
|
|
369
|
+
else:
|
|
370
|
+
logger.warning(
|
|
371
|
+
f"Received gRPC INTERNAL error for model '{model_name}'. "
|
|
372
|
+
f"Attempt {attempt + 1} of {self.max_retries}. Message (truncated): {message[:500]}"
|
|
373
|
+
)
|
|
374
|
+
if attempt >= self.max_retries - 1:
|
|
375
|
+
logger.error(f"Max retries exceeded for INTERNAL error on model '{model_name}'.")
|
|
376
|
+
raise e
|
|
377
|
+
|
|
378
|
+
# Common retry logic for both CUDA and non-CUDA INTERNAL errors
|
|
379
|
+
backoff_time = base_delay * (2**attempt)
|
|
380
|
+
time.sleep(backoff_time)
|
|
381
|
+
attempt += 1
|
|
382
|
+
continue
|
|
383
|
+
|
|
384
|
+
# Handle errors that can occur after model reload (NOT_FOUND, model not loaded)
|
|
385
|
+
if status == "StatusCode.NOT_FOUND":
|
|
386
|
+
logger.warning(
|
|
387
|
+
f"Received gRPC {status} error for model '{model_name}'. "
|
|
388
|
+
f"Attempt {attempt + 1} of {self.max_retries}. Message: {message[:500]}"
|
|
389
|
+
)
|
|
390
|
+
if attempt >= self.max_retries - 1:
|
|
391
|
+
logger.error(f"Max retries exceeded for model not found errors on model '{model_name}'.")
|
|
392
|
+
raise e
|
|
393
|
+
|
|
394
|
+
# Retry with exponential backoff WITHOUT reloading
|
|
395
|
+
backoff_time = base_delay * (2**attempt)
|
|
396
|
+
logger.info(
|
|
397
|
+
f"Retrying after {backoff_time}s backoff for model not found error on model '{model_name}'."
|
|
398
|
+
)
|
|
399
|
+
time.sleep(backoff_time)
|
|
400
|
+
attempt += 1
|
|
401
|
+
continue
|
|
402
|
+
|
|
403
|
+
if status == "StatusCode.UNAVAILABLE" and "Exceeds maximum queue size".lower() in message.lower():
|
|
347
404
|
retries_429 += 1
|
|
348
405
|
logger.warning(
|
|
349
406
|
f"Received gRPC {status} for model '{model_name}'. "
|
|
@@ -357,13 +414,12 @@ class NimClient:
|
|
|
357
414
|
time.sleep(backoff_time)
|
|
358
415
|
continue
|
|
359
416
|
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
raise
|
|
417
|
+
# For other server-side errors (e.g., INVALID_ARGUMENT, etc.),
|
|
418
|
+
# fail fast as retrying will not help
|
|
419
|
+
logger.error(
|
|
420
|
+
f"Received non-retryable gRPC error {status} from Triton for model '{model_name}': {message}"
|
|
421
|
+
)
|
|
422
|
+
raise
|
|
367
423
|
|
|
368
424
|
except Exception as e:
|
|
369
425
|
# Catch any other unexpected exceptions (e.g., network issues not caught by Triton client)
|
|
@@ -681,3 +737,57 @@ class NimClientManager:
|
|
|
681
737
|
def get_nim_client_manager(*args, **kwargs) -> NimClientManager:
|
|
682
738
|
"""Returns the singleton instance of the NimClientManager."""
|
|
683
739
|
return NimClientManager(*args, **kwargs)
|
|
740
|
+
|
|
741
|
+
|
|
742
|
+
def reload_models(client: grpcclient.InferenceServerClient, exclude: list[str] = [], client_timeout: int = 120) -> bool:
|
|
743
|
+
"""
|
|
744
|
+
Reloads all models in the Triton server except for the models in the exclude list.
|
|
745
|
+
|
|
746
|
+
Parameters
|
|
747
|
+
----------
|
|
748
|
+
client : grpcclient.InferenceServerClient
|
|
749
|
+
The gRPC client connected to the Triton server.
|
|
750
|
+
exclude : list[str], optional
|
|
751
|
+
A list of model names to exclude from reloading.
|
|
752
|
+
client_timeout : int, optional
|
|
753
|
+
Timeout for client operations in seconds (default: 120).
|
|
754
|
+
|
|
755
|
+
Returns
|
|
756
|
+
-------
|
|
757
|
+
bool
|
|
758
|
+
True if all models were successfully reloaded, False otherwise.
|
|
759
|
+
"""
|
|
760
|
+
model_index = client.get_model_repository_index()
|
|
761
|
+
exclude = set(exclude)
|
|
762
|
+
names = [m.name for m in model_index.models if m.name not in exclude]
|
|
763
|
+
|
|
764
|
+
logger.info(f"Reloading {len(names)} model(s): {', '.join(names) if names else '(none)'}")
|
|
765
|
+
|
|
766
|
+
# 1) Unload
|
|
767
|
+
for name in names:
|
|
768
|
+
try:
|
|
769
|
+
client.unload_model(name)
|
|
770
|
+
except grpcclient.InferenceServerException as e:
|
|
771
|
+
msg = e.message()
|
|
772
|
+
if "explicit model load / unload" in msg.lower():
|
|
773
|
+
status = e.status()
|
|
774
|
+
logger.warning(
|
|
775
|
+
f"[SKIP Model Reload] Explicit model control disabled; cannot unload '{name}'. Status: {status}."
|
|
776
|
+
)
|
|
777
|
+
return False
|
|
778
|
+
logger.error(f"[ERROR] Failed to unload '{name}': {msg}")
|
|
779
|
+
return False
|
|
780
|
+
|
|
781
|
+
# 2) Load
|
|
782
|
+
for name in names:
|
|
783
|
+
client.load_model(name)
|
|
784
|
+
|
|
785
|
+
# 3) Readiness check
|
|
786
|
+
for name in names:
|
|
787
|
+
ready = client.is_model_ready(model_name=name, client_timeout=client_timeout)
|
|
788
|
+
if not ready:
|
|
789
|
+
logger.warning(f"[Warning] Triton Not ready: {name}")
|
|
790
|
+
return False
|
|
791
|
+
|
|
792
|
+
logger.info("✅ Reload of models complete.")
|
|
793
|
+
return True
|
|
@@ -43,6 +43,24 @@ class PdfConfigSchema(BaseModelNoExt):
|
|
|
43
43
|
split_page_count: Annotated[int, Field(ge=1)] = 32
|
|
44
44
|
|
|
45
45
|
|
|
46
|
+
class RoutingOptionsSchema(BaseModelNoExt):
|
|
47
|
+
# Queue routing hint for QoS scheduler
|
|
48
|
+
queue_hint: Optional[str] = None
|
|
49
|
+
|
|
50
|
+
@field_validator("queue_hint")
|
|
51
|
+
@classmethod
|
|
52
|
+
def validate_queue_hint(cls, v):
|
|
53
|
+
if v is None:
|
|
54
|
+
return v
|
|
55
|
+
if not isinstance(v, str):
|
|
56
|
+
raise ValueError("queue_hint must be a string")
|
|
57
|
+
s = v.lower()
|
|
58
|
+
allowed = {"default", "immediate", "micro", "small", "medium", "large"}
|
|
59
|
+
if s not in allowed:
|
|
60
|
+
raise ValueError("queue_hint must be one of: default, immediate, micro, small, medium, large")
|
|
61
|
+
return s
|
|
62
|
+
|
|
63
|
+
|
|
46
64
|
# Ingest Task Schemas
|
|
47
65
|
|
|
48
66
|
|
|
@@ -128,6 +146,7 @@ class IngestTaskEmbedSchema(BaseModelNoExt):
|
|
|
128
146
|
audio_elements_modality: Optional[str] = None
|
|
129
147
|
custom_content_field: Optional[str] = None
|
|
130
148
|
result_target_field: Optional[str] = None
|
|
149
|
+
dimensions: Optional[int] = None
|
|
131
150
|
|
|
132
151
|
|
|
133
152
|
class IngestTaskVdbUploadSchema(BaseModelNoExt):
|
|
@@ -283,8 +302,27 @@ class IngestJobSchema(BaseModelNoExt):
|
|
|
283
302
|
job_id: Union[str, int]
|
|
284
303
|
tasks: List[IngestTaskSchema]
|
|
285
304
|
tracing_options: Optional[TracingOptionsSchema] = None
|
|
305
|
+
routing_options: Optional[RoutingOptionsSchema] = None
|
|
286
306
|
pdf_config: Optional[PdfConfigSchema] = None
|
|
287
307
|
|
|
308
|
+
@model_validator(mode="before")
|
|
309
|
+
@classmethod
|
|
310
|
+
def migrate_queue_hint(cls, values):
|
|
311
|
+
"""
|
|
312
|
+
Backward-compatibility shim: if a legacy client sends
|
|
313
|
+
tracing_options.queue_hint, move it into routing_options.queue_hint.
|
|
314
|
+
"""
|
|
315
|
+
try:
|
|
316
|
+
topt = values.get("tracing_options") or {}
|
|
317
|
+
ropt = values.get("routing_options") or {}
|
|
318
|
+
if isinstance(topt, dict) and "queue_hint" in topt and "queue_hint" not in ropt:
|
|
319
|
+
ropt["queue_hint"] = topt.pop("queue_hint")
|
|
320
|
+
values["routing_options"] = ropt
|
|
321
|
+
values["tracing_options"] = topt
|
|
322
|
+
except Exception:
|
|
323
|
+
pass
|
|
324
|
+
return values
|
|
325
|
+
|
|
288
326
|
|
|
289
327
|
# ------------------------------------------------------------------------------
|
|
290
328
|
# Utility Functions
|
|
@@ -10,7 +10,7 @@ class ImageCaptionExtractionSchema(BaseModel):
|
|
|
10
10
|
api_key: str = Field(default="", repr=False)
|
|
11
11
|
endpoint_url: str = "https://integrate.api.nvidia.com/v1/chat/completions"
|
|
12
12
|
prompt: str = "Caption the content of this image:"
|
|
13
|
-
model_name: str = "nvidia/
|
|
13
|
+
model_name: str = "nvidia/nemotron-nano-12b-v2-vl"
|
|
14
14
|
raise_on_failure: bool = False
|
|
15
15
|
model_config = ConfigDict(extra="forbid")
|
|
16
16
|
|
|
@@ -30,6 +30,7 @@ class TextEmbeddingSchema(BaseModel):
|
|
|
30
30
|
audio_elements_modality: str = Field(default="text")
|
|
31
31
|
custom_content_field: Optional[str] = None
|
|
32
32
|
result_target_field: Optional[str] = None
|
|
33
|
+
dimensions: Optional[int] = None
|
|
33
34
|
|
|
34
35
|
model_config = ConfigDict(extra="forbid")
|
|
35
36
|
|
|
@@ -40,6 +40,7 @@ def _make_async_request(
|
|
|
40
40
|
truncate: str,
|
|
41
41
|
filter_errors: bool,
|
|
42
42
|
modalities: Optional[List[str]] = None,
|
|
43
|
+
dimensions: Optional[int] = None,
|
|
43
44
|
) -> list:
|
|
44
45
|
"""
|
|
45
46
|
Interacts directly with the NIM embedding service to calculate embeddings for a batch of prompts.
|
|
@@ -96,6 +97,7 @@ def _make_async_request(
|
|
|
96
97
|
model=embedding_model,
|
|
97
98
|
encoding_format=encoding_format,
|
|
98
99
|
extra_body=extra_body,
|
|
100
|
+
dimensions=dimensions,
|
|
99
101
|
)
|
|
100
102
|
|
|
101
103
|
response["embedding"] = resp.data
|
|
@@ -124,6 +126,7 @@ def _async_request_handler(
|
|
|
124
126
|
truncate: str,
|
|
125
127
|
filter_errors: bool,
|
|
126
128
|
modalities: Optional[List[str]] = None,
|
|
129
|
+
dimensions: Optional[int] = None,
|
|
127
130
|
) -> List[dict]:
|
|
128
131
|
"""
|
|
129
132
|
Gathers calculated embedding results from the NIM embedding service concurrently.
|
|
@@ -168,6 +171,7 @@ def _async_request_handler(
|
|
|
168
171
|
truncate=truncate,
|
|
169
172
|
filter_errors=filter_errors,
|
|
170
173
|
modalities=modality_batch,
|
|
174
|
+
dimensions=dimensions,
|
|
171
175
|
)
|
|
172
176
|
for prompt_batch, modality_batch in zip(prompts, modalities)
|
|
173
177
|
]
|
|
@@ -186,6 +190,7 @@ def _async_runner(
|
|
|
186
190
|
truncate: str,
|
|
187
191
|
filter_errors: bool,
|
|
188
192
|
modalities: Optional[List[str]] = None,
|
|
193
|
+
dimensions: Optional[int] = None,
|
|
189
194
|
) -> dict:
|
|
190
195
|
"""
|
|
191
196
|
Concurrently launches all NIM embedding requests and flattens the results.
|
|
@@ -224,6 +229,7 @@ def _async_runner(
|
|
|
224
229
|
truncate,
|
|
225
230
|
filter_errors,
|
|
226
231
|
modalities=modalities,
|
|
232
|
+
dimensions=dimensions,
|
|
227
233
|
)
|
|
228
234
|
|
|
229
235
|
flat_results = {"embeddings": [], "info_msgs": []}
|
|
@@ -562,6 +568,7 @@ def transform_create_text_embeddings_internal(
|
|
|
562
568
|
endpoint_url = task_config.get("endpoint_url") or transform_config.embedding_nim_endpoint
|
|
563
569
|
model_name = task_config.get("model_name") or transform_config.embedding_model
|
|
564
570
|
custom_content_field = task_config.get("custom_content_field") or transform_config.custom_content_field
|
|
571
|
+
dimensions = task_config.get("dimensions") or transform_config.dimensions
|
|
565
572
|
|
|
566
573
|
if execution_trace_log is None:
|
|
567
574
|
execution_trace_log = {}
|
|
@@ -636,6 +643,7 @@ def transform_create_text_embeddings_internal(
|
|
|
636
643
|
transform_config.truncate,
|
|
637
644
|
False,
|
|
638
645
|
modalities=modality_batches,
|
|
646
|
+
dimensions=dimensions,
|
|
639
647
|
)
|
|
640
648
|
# Build a simple row index -> embedding map
|
|
641
649
|
embeddings_dict = dict(
|
|
@@ -680,6 +688,7 @@ def transform_create_text_embeddings_internal(
|
|
|
680
688
|
transform_config.input_type,
|
|
681
689
|
transform_config.truncate,
|
|
682
690
|
False,
|
|
691
|
+
dimensions=dimensions,
|
|
683
692
|
)
|
|
684
693
|
custom_embeddings_dict = dict(
|
|
685
694
|
zip(
|
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Dict, Optional
|
|
8
|
+
import logging
|
|
9
|
+
import time
|
|
10
|
+
import random
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class _SchedulingStrategy:
|
|
14
|
+
"""
|
|
15
|
+
Base scheduling strategy interface. Implementations must provide a non-blocking
|
|
16
|
+
single-sweep attempt over non-immediate queues and return a job or None.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
|
|
20
|
+
raise NotImplementedError
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class _LotteryStrategy(_SchedulingStrategy):
|
|
24
|
+
"""
|
|
25
|
+
Lottery scheduling with fixed weights.
|
|
26
|
+
Weights: micro=4, small=2, large=1, medium=1, default=1
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, prioritize_immediate: bool = True) -> None:
|
|
30
|
+
self._weights: Dict[str, int] = {
|
|
31
|
+
"micro": 4,
|
|
32
|
+
"small": 2,
|
|
33
|
+
"large": 1,
|
|
34
|
+
"medium": 1,
|
|
35
|
+
"default": 1,
|
|
36
|
+
}
|
|
37
|
+
self._prioritize_immediate: bool = bool(prioritize_immediate)
|
|
38
|
+
|
|
39
|
+
def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
|
|
40
|
+
# Immediate-first if enabled (non-blocking)
|
|
41
|
+
if self._prioritize_immediate:
|
|
42
|
+
try:
|
|
43
|
+
job = client.fetch_message(queues["immediate"], 0)
|
|
44
|
+
if job is not None:
|
|
45
|
+
return job
|
|
46
|
+
except TimeoutError:
|
|
47
|
+
pass
|
|
48
|
+
candidates = list(order)
|
|
49
|
+
weights = [self._weights[q] for q in candidates]
|
|
50
|
+
while candidates:
|
|
51
|
+
try:
|
|
52
|
+
chosen = random.choices(candidates, weights=weights, k=1)[0]
|
|
53
|
+
job = client.fetch_message(queues[chosen], 0)
|
|
54
|
+
if job is not None:
|
|
55
|
+
return job
|
|
56
|
+
except TimeoutError:
|
|
57
|
+
pass
|
|
58
|
+
finally:
|
|
59
|
+
idx = candidates.index(chosen)
|
|
60
|
+
del candidates[idx]
|
|
61
|
+
del weights[idx]
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class _SimpleStrategy(_SchedulingStrategy):
|
|
66
|
+
"""
|
|
67
|
+
Simple strategy placeholder. Actual simple-mode handling is done in QosScheduler.fetch_next
|
|
68
|
+
to directly fetch from the base 'default' queue using the provided timeout.
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
|
|
72
|
+
# Block up to 30s on the base/default queue and return first available job
|
|
73
|
+
try:
|
|
74
|
+
return client.fetch_message(queues["default"], 30.0)
|
|
75
|
+
except TimeoutError:
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class _RoundRobinStrategy(_SchedulingStrategy):
|
|
80
|
+
"""
|
|
81
|
+
Simple round-robin over non-immediate queues. Maintains rotation across calls.
|
|
82
|
+
"""
|
|
83
|
+
|
|
84
|
+
def __init__(self, order: list[str], prioritize_immediate: bool = True) -> None:
|
|
85
|
+
self._order = list(order)
|
|
86
|
+
self._len = len(self._order)
|
|
87
|
+
self._idx = 0
|
|
88
|
+
self._prioritize_immediate: bool = bool(prioritize_immediate)
|
|
89
|
+
|
|
90
|
+
def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
|
|
91
|
+
# Immediate-first if enabled (non-blocking)
|
|
92
|
+
if self._prioritize_immediate:
|
|
93
|
+
try:
|
|
94
|
+
job = client.fetch_message(queues["immediate"], 0)
|
|
95
|
+
if job is not None:
|
|
96
|
+
return job
|
|
97
|
+
except TimeoutError:
|
|
98
|
+
pass
|
|
99
|
+
start_idx = self._idx
|
|
100
|
+
for step in range(self._len):
|
|
101
|
+
i = (start_idx + step) % self._len
|
|
102
|
+
qname = self._order[i]
|
|
103
|
+
try:
|
|
104
|
+
job = client.fetch_message(queues[qname], 0)
|
|
105
|
+
if job is not None:
|
|
106
|
+
# advance rotation to the position after the chosen one
|
|
107
|
+
self._idx = (i + 1) % self._len
|
|
108
|
+
return job
|
|
109
|
+
except TimeoutError:
|
|
110
|
+
continue
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class _WeightedRoundRobinStrategy(_SchedulingStrategy):
|
|
115
|
+
"""
|
|
116
|
+
Smooth Weighted Round Robin (SWRR) using weights micro=4, small=2, large=1, medium=1, default=1.
|
|
117
|
+
Maintains current weights across calls.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def __init__(self, prioritize_immediate: bool = True) -> None:
|
|
121
|
+
self._weights: Dict[str, int] = {
|
|
122
|
+
"micro": 4,
|
|
123
|
+
"small": 2,
|
|
124
|
+
"large": 1,
|
|
125
|
+
"medium": 1,
|
|
126
|
+
"default": 1,
|
|
127
|
+
}
|
|
128
|
+
self._current: Dict[str, int] = {k: 0 for k in self._weights.keys()}
|
|
129
|
+
self._total: int = sum(self._weights.values())
|
|
130
|
+
self._prioritize_immediate: bool = bool(prioritize_immediate)
|
|
131
|
+
|
|
132
|
+
def try_once(self, client, queues: Dict[str, str], order: list[str]) -> Optional[dict]:
|
|
133
|
+
# Immediate-first if enabled (non-blocking)
|
|
134
|
+
if self._prioritize_immediate:
|
|
135
|
+
try:
|
|
136
|
+
job = client.fetch_message(queues["immediate"], 0)
|
|
137
|
+
if job is not None:
|
|
138
|
+
return job
|
|
139
|
+
except TimeoutError:
|
|
140
|
+
pass
|
|
141
|
+
# Attempt up to len(order) selections per sweep, excluding queues that prove empty
|
|
142
|
+
active = list(order)
|
|
143
|
+
for _ in range(len(order)):
|
|
144
|
+
if not active:
|
|
145
|
+
break
|
|
146
|
+
for q in active:
|
|
147
|
+
self._current[q] += self._weights[q]
|
|
148
|
+
chosen = max(active, key=lambda q: self._current[q])
|
|
149
|
+
self._current[chosen] -= self._total
|
|
150
|
+
try:
|
|
151
|
+
job = client.fetch_message(queues[chosen], 0)
|
|
152
|
+
if job is not None:
|
|
153
|
+
return job
|
|
154
|
+
except TimeoutError:
|
|
155
|
+
job = None
|
|
156
|
+
# If no job available from chosen, exclude it for the remainder of this sweep
|
|
157
|
+
if job is None and chosen in active:
|
|
158
|
+
active.remove(chosen)
|
|
159
|
+
# Fallback: single non-blocking attempt for each queue in order
|
|
160
|
+
for q in order:
|
|
161
|
+
try:
|
|
162
|
+
job = client.fetch_message(queues[q], 0)
|
|
163
|
+
if job is not None:
|
|
164
|
+
return job
|
|
165
|
+
except TimeoutError:
|
|
166
|
+
continue
|
|
167
|
+
return None
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class QosScheduler:
|
|
171
|
+
"""
|
|
172
|
+
Simplified scheduler that fetches jobs from the default queue only.
|
|
173
|
+
Uses the provided timeout value when polling the broker.
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
def __init__(
|
|
177
|
+
self,
|
|
178
|
+
base_queue: str,
|
|
179
|
+
total_buffer_capacity: int = 1,
|
|
180
|
+
num_prefetch_threads: int = 0,
|
|
181
|
+
prefetch_poll_interval: float = 0.0,
|
|
182
|
+
prefetch_non_immediate: bool = False,
|
|
183
|
+
strategy: str = "lottery",
|
|
184
|
+
prioritize_immediate: bool = True,
|
|
185
|
+
) -> None:
|
|
186
|
+
self.base_queue = base_queue
|
|
187
|
+
|
|
188
|
+
# Define all derived queues; default behavior still uses only "default"
|
|
189
|
+
self.queues: Dict[str, str] = {
|
|
190
|
+
"default": f"{base_queue}",
|
|
191
|
+
"immediate": f"{base_queue}_immediate",
|
|
192
|
+
"micro": f"{base_queue}_micro",
|
|
193
|
+
"small": f"{base_queue}_small",
|
|
194
|
+
"medium": f"{base_queue}_medium",
|
|
195
|
+
"large": f"{base_queue}_large",
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
# Priority order for multi-queue fetching; "immediate" always first
|
|
199
|
+
self._priority_order = [
|
|
200
|
+
"immediate",
|
|
201
|
+
"micro",
|
|
202
|
+
"small",
|
|
203
|
+
"medium",
|
|
204
|
+
"large",
|
|
205
|
+
"default",
|
|
206
|
+
]
|
|
207
|
+
|
|
208
|
+
# Non-immediate queue order reference
|
|
209
|
+
self._non_immediate_order = ["micro", "small", "large", "medium", "default"]
|
|
210
|
+
|
|
211
|
+
# Logger
|
|
212
|
+
self._logger = logging.getLogger(__name__)
|
|
213
|
+
|
|
214
|
+
# No prefetching - just direct calls
|
|
215
|
+
self._total_buffer_capacity: int = int(total_buffer_capacity)
|
|
216
|
+
self._num_prefetch_threads: int = int(num_prefetch_threads)
|
|
217
|
+
self._prefetch_poll_interval: float = float(prefetch_poll_interval)
|
|
218
|
+
self._prefetch_non_immediate: bool = bool(prefetch_non_immediate)
|
|
219
|
+
|
|
220
|
+
# Strategy selection
|
|
221
|
+
self._simple_mode: bool = False
|
|
222
|
+
if strategy == "simple":
|
|
223
|
+
self._strategy_impl: _SchedulingStrategy = _SimpleStrategy()
|
|
224
|
+
self._simple_mode = True
|
|
225
|
+
elif strategy == "round_robin":
|
|
226
|
+
self._strategy_impl = _RoundRobinStrategy(self._non_immediate_order, prioritize_immediate)
|
|
227
|
+
elif strategy == "weighted_round_robin":
|
|
228
|
+
self._strategy_impl = _WeightedRoundRobinStrategy(prioritize_immediate)
|
|
229
|
+
else:
|
|
230
|
+
self._strategy_impl = _LotteryStrategy(prioritize_immediate)
|
|
231
|
+
|
|
232
|
+
# Context manager helpers for clean shutdown
|
|
233
|
+
def __enter__(self) -> "QosScheduler":
|
|
234
|
+
return self
|
|
235
|
+
|
|
236
|
+
def __exit__(self, exc_type, exc, tb) -> None:
|
|
237
|
+
self.close()
|
|
238
|
+
|
|
239
|
+
# ---------------------------- Public API ----------------------------
|
|
240
|
+
def close(self) -> None:
|
|
241
|
+
"""
|
|
242
|
+
Cleanly close the scheduler. No-op for the current implementation
|
|
243
|
+
since we do not spin background threads.
|
|
244
|
+
"""
|
|
245
|
+
return None
|
|
246
|
+
|
|
247
|
+
def fetch_next(self, client, timeout: float = 0.0) -> Optional[dict]:
|
|
248
|
+
"""
|
|
249
|
+
Immediate-first, then strategy-based scheduling among non-immediate queues.
|
|
250
|
+
|
|
251
|
+
Behavior:
|
|
252
|
+
- Always check 'immediate' first (non-blocking). If present, return immediately.
|
|
253
|
+
- If not, select using the configured strategy (lottery, round_robin, weighted_round_robin).
|
|
254
|
+
- If no job is found in a full pass:
|
|
255
|
+
- If timeout <= 0: return None.
|
|
256
|
+
- Else: sleep in 0.5s increments and retry until accumulated elapsed time >= timeout.
|
|
257
|
+
"""
|
|
258
|
+
# Simple mode: delegate to the strategy (blocks up to 30s on base queue)
|
|
259
|
+
if getattr(self, "_simple_mode", False):
|
|
260
|
+
return self._strategy_impl.try_once(client, self.queues, self._non_immediate_order)
|
|
261
|
+
|
|
262
|
+
start = time.monotonic()
|
|
263
|
+
while True:
|
|
264
|
+
# Strategy-based attempt (strategy may include immediate priority internally)
|
|
265
|
+
job = self._strategy_impl.try_once(client, self.queues, self._non_immediate_order)
|
|
266
|
+
if job is not None:
|
|
267
|
+
return job
|
|
268
|
+
|
|
269
|
+
# No job found in this sweep
|
|
270
|
+
if timeout <= 0:
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
elapsed = time.monotonic() - start
|
|
274
|
+
if elapsed >= timeout:
|
|
275
|
+
return None
|
|
276
|
+
|
|
277
|
+
# Sleep up to 0.5s, but not beyond remaining timeout
|
|
278
|
+
remaining = timeout - elapsed
|
|
279
|
+
sleep_time = 0.5 if remaining > 0.5 else remaining
|
|
280
|
+
if sleep_time > 0:
|
|
281
|
+
time.sleep(sleep_time)
|
|
282
|
+
else:
|
|
283
|
+
return None
|
|
@@ -5,8 +5,9 @@
|
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
7
|
import math
|
|
8
|
-
import multiprocessing as mp
|
|
9
8
|
import os
|
|
9
|
+
import sys
|
|
10
|
+
import multiprocessing as mp
|
|
10
11
|
from threading import Lock
|
|
11
12
|
from typing import Any, Callable, Optional
|
|
12
13
|
|
|
@@ -103,7 +104,12 @@ class ProcessWorkerPoolSingleton:
|
|
|
103
104
|
The total number of worker processes to start.
|
|
104
105
|
"""
|
|
105
106
|
self._total_workers = total_max_workers
|
|
106
|
-
|
|
107
|
+
|
|
108
|
+
start_method = "fork"
|
|
109
|
+
if sys.platform.lower() == "darwin":
|
|
110
|
+
start_method = "spawn"
|
|
111
|
+
self._context: mp.context.ForkContext = mp.get_context(start_method)
|
|
112
|
+
|
|
107
113
|
# Bounded task queue: maximum tasks queued = 2 * total_max_workers.
|
|
108
114
|
self._task_queue: mp.Queue = self._context.Queue(maxsize=2 * total_max_workers)
|
|
109
115
|
self._next_task_id: int = 0
|