nv-ingest-api 2025.5.11.dev20250511__tar.gz → 2025.5.13.dev20250513__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- {nv_ingest_api-2025.5.11.dev20250511/src/nv_ingest_api.egg-info → nv_ingest_api-2025.5.13.dev20250513}/PKG-INFO +1 -1
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/interface/transform.py +1 -1
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +3 -3
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/image/image_extractor.py +5 -5
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +1 -1
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +44 -17
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +1 -1
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +35 -38
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +7 -1
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +17 -9
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +20 -16
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +1 -1
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +2 -2
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +1 -1
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/transform/caption_image.py +1 -1
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/transform/embed_text.py +75 -56
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/exception_handlers/converters.py +1 -1
- nv_ingest_api-2025.5.13.dev20250513/src/nv_ingest_api/util/exception_handlers/decorators.py +481 -0
- nv_ingest_api-2025.5.13.dev20250513/src/nv_ingest_api/util/logging/configuration.py +38 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/pdf/pdfium.py +1 -1
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +1 -1
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +1 -1
- nv_ingest_api-2025.5.13.dev20250513/src/nv_ingest_api/util/system/__init__.py +0 -0
- nv_ingest_api-2025.5.13.dev20250513/src/nv_ingest_api/util/system/hardware_info.py +426 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513/src/nv_ingest_api.egg-info}/PKG-INFO +1 -1
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api.egg-info/SOURCES.txt +3 -1
- nv_ingest_api-2025.5.11.dev20250511/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -223
- nv_ingest_api-2025.5.11.dev20250511/src/nv_ingest_api/util/logging/configuration.py +0 -31
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/LICENSE +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/MANIFEST.in +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/README.md +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/pyproject.toml +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/setup.cfg +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/interface/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/interface/extract.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/interface/mutate.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/interface/store.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/interface/utility.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/enums/common.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/paddle.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/store/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/internal/transform/split_text.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/control_message/validators.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/converters/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/converters/containers.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/converters/datetools.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/converters/dftools.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/converters/formats.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/detectors/language.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/image_processing/transforms.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/logging/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/message_brokers/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/nim/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/schema/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/service_clients/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api.egg-info/requires.txt +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
- {nv_ingest_api-2025.5.11.dev20250511 → nv_ingest_api-2025.5.13.dev20250513}/src/version.py +0 -0
|
@@ -207,7 +207,7 @@ def transform_image_create_vlm_caption(
|
|
|
207
207
|
"api_key": api_key,
|
|
208
208
|
"prompt": prompt,
|
|
209
209
|
"endpoint_url": endpoint_url,
|
|
210
|
-
"
|
|
210
|
+
"image_caption_model_name": model_name,
|
|
211
211
|
}
|
|
212
212
|
filtered_task_config: Dict[str, str] = {k: v for k, v in task_config.items() if v is not None}
|
|
213
213
|
|
|
@@ -7,7 +7,7 @@ import base64
|
|
|
7
7
|
import functools
|
|
8
8
|
import io
|
|
9
9
|
import logging
|
|
10
|
-
from typing import Optional, Dict, Any, Union
|
|
10
|
+
from typing import Optional, Dict, Any, Union, Tuple
|
|
11
11
|
|
|
12
12
|
import pandas as pd
|
|
13
13
|
from pydantic import BaseModel
|
|
@@ -146,7 +146,7 @@ def extract_primitives_from_docx_internal(
|
|
|
146
146
|
task_config: Union[Dict[str, Any], BaseModel],
|
|
147
147
|
extraction_config: DocxExtractorSchema,
|
|
148
148
|
execution_trace_log: Optional[Dict[str, Any]] = None,
|
|
149
|
-
) -> pd.DataFrame:
|
|
149
|
+
) -> Tuple[pd.DataFrame, Union[Dict, None]]:
|
|
150
150
|
"""
|
|
151
151
|
Processes a pandas DataFrame containing DOCX files encoded in base64, extracting text from
|
|
152
152
|
each document and replacing the original content with the extracted text.
|
|
@@ -202,4 +202,4 @@ def extract_primitives_from_docx_internal(
|
|
|
202
202
|
else:
|
|
203
203
|
extracted_df = pd.DataFrame({"document_type": [], "metadata": [], "uuid": []})
|
|
204
204
|
|
|
205
|
-
return extracted_df
|
|
205
|
+
return extracted_df, {}
|
|
@@ -16,7 +16,7 @@ import pandas as pd
|
|
|
16
16
|
from pydantic import BaseModel
|
|
17
17
|
|
|
18
18
|
from nv_ingest_api.internal.extract.image.image_helpers.common import unstructured_image_extractor
|
|
19
|
-
from nv_ingest_api.internal.schemas.extract.extract_image_schema import
|
|
19
|
+
from nv_ingest_api.internal.schemas.extract.extract_image_schema import ImageConfigSchema
|
|
20
20
|
from nv_ingest_api.util.exception_handlers.decorators import unified_exception_handler
|
|
21
21
|
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
@@ -26,7 +26,7 @@ logger = logging.getLogger(__name__)
|
|
|
26
26
|
def _decode_and_extract_from_image(
|
|
27
27
|
base64_row: pd.Series,
|
|
28
28
|
task_config: Dict[str, Any],
|
|
29
|
-
validated_extraction_config:
|
|
29
|
+
validated_extraction_config: ImageConfigSchema,
|
|
30
30
|
execution_trace_log: Optional[List[Any]] = None,
|
|
31
31
|
) -> Any:
|
|
32
32
|
"""
|
|
@@ -106,10 +106,10 @@ def _decode_and_extract_from_image(
|
|
|
106
106
|
|
|
107
107
|
logger.debug(
|
|
108
108
|
f"decode_and_extract: Extracting image content using image_extraction_config: "
|
|
109
|
-
f"{validated_extraction_config
|
|
109
|
+
f"{validated_extraction_config}"
|
|
110
110
|
)
|
|
111
|
-
if validated_extraction_config
|
|
112
|
-
extract_params["image_extraction_config"] = validated_extraction_config
|
|
111
|
+
if validated_extraction_config is not None:
|
|
112
|
+
extract_params["image_extraction_config"] = validated_extraction_config
|
|
113
113
|
|
|
114
114
|
if execution_trace_log is not None:
|
|
115
115
|
extract_params["trace_info"] = execution_trace_log
|
|
@@ -476,7 +476,7 @@ def _extract_text_and_bounding_boxes(
|
|
|
476
476
|
|
|
477
477
|
def _create_clients(nemoretriever_parse_config):
|
|
478
478
|
model_interface = nemoretriever_parse_utils.NemoRetrieverParseModelInterface(
|
|
479
|
-
model_name=nemoretriever_parse_config.
|
|
479
|
+
model_name=nemoretriever_parse_config.nemoretriever_parse_model_name,
|
|
480
480
|
)
|
|
481
481
|
nemoretriever_parse_client = create_inference_client(
|
|
482
482
|
nemoretriever_parse_config.nemoretriever_parse_endpoints,
|
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
|
|
18
18
|
import logging
|
|
19
19
|
import io
|
|
20
|
-
import operator
|
|
21
20
|
import re
|
|
22
21
|
import uuid
|
|
23
22
|
from collections import defaultdict
|
|
@@ -155,6 +154,12 @@ def _finalize_images(
|
|
|
155
154
|
extracted_data.append(image_entry)
|
|
156
155
|
|
|
157
156
|
|
|
157
|
+
def _safe_position(shape):
|
|
158
|
+
top = shape.top if shape.top is not None else float("inf")
|
|
159
|
+
left = shape.left if shape.left is not None else float("inf")
|
|
160
|
+
return (top, left)
|
|
161
|
+
|
|
162
|
+
|
|
158
163
|
# -----------------------------------------------------------------------------
|
|
159
164
|
# Helper Function: Recursive Image Extraction
|
|
160
165
|
# -----------------------------------------------------------------------------
|
|
@@ -283,7 +288,7 @@ def python_pptx(
|
|
|
283
288
|
|
|
284
289
|
for slide_idx, slide in enumerate(presentation.slides):
|
|
285
290
|
# Obtain a flat list of shapes (ungrouped) sorted by top then left.
|
|
286
|
-
shapes = sorted(ungroup_shapes(slide.shapes), key=
|
|
291
|
+
shapes = sorted(ungroup_shapes(slide.shapes), key=_safe_position)
|
|
287
292
|
|
|
288
293
|
page_nearby_blocks = {
|
|
289
294
|
"text": {"content": [], "bbox": []},
|
|
@@ -656,21 +661,43 @@ def get_bbox(
|
|
|
656
661
|
shape_object: Optional[Slide] = None,
|
|
657
662
|
text_depth: Optional[TextTypeEnum] = None,
|
|
658
663
|
):
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
664
|
+
"""
|
|
665
|
+
Safely computes bounding box for a slide, shape, or document.
|
|
666
|
+
Ensures that missing or None values are gracefully handled.
|
|
667
|
+
|
|
668
|
+
Returns
|
|
669
|
+
-------
|
|
670
|
+
Tuple[int, int, int, int]
|
|
671
|
+
Bounding box as (top, left, bottom, right).
|
|
672
|
+
Defaults to (-1, -1, -1, -1) if invalid or unsupported.
|
|
673
|
+
"""
|
|
674
|
+
try:
|
|
675
|
+
if text_depth == TextTypeEnum.DOCUMENT:
|
|
676
|
+
return (-1, -1, -1, -1)
|
|
677
|
+
|
|
678
|
+
elif text_depth == TextTypeEnum.PAGE and presentation_object:
|
|
679
|
+
top = left = 0
|
|
680
|
+
width = presentation_object.slide_width
|
|
681
|
+
height = presentation_object.slide_height
|
|
682
|
+
return (top, left, top + height, left + width)
|
|
683
|
+
|
|
684
|
+
elif shape_object:
|
|
685
|
+
top = shape_object.top if shape_object.top is not None else -1
|
|
686
|
+
left = shape_object.left if shape_object.left is not None else -1
|
|
687
|
+
width = shape_object.width if shape_object.width is not None else -1
|
|
688
|
+
height = shape_object.height if shape_object.height is not None else -1
|
|
689
|
+
|
|
690
|
+
# If all are valid, return normally, else return placeholder
|
|
691
|
+
if -1 in [top, left, width, height]:
|
|
692
|
+
return (-1, -1, -1, -1)
|
|
693
|
+
|
|
694
|
+
return (top, left, top + height, left + width)
|
|
695
|
+
|
|
696
|
+
except Exception as e:
|
|
697
|
+
logger.warning(f"get_bbox: Failed to compute bbox due to {e}")
|
|
698
|
+
return (-1, -1, -1, -1)
|
|
699
|
+
|
|
700
|
+
return (-1, -1, -1, -1)
|
|
674
701
|
|
|
675
702
|
|
|
676
703
|
def ungroup_shapes(shapes):
|
|
@@ -5,9 +5,9 @@
|
|
|
5
5
|
from typing import Any, Dict, List, Optional, Tuple
|
|
6
6
|
|
|
7
7
|
from nv_ingest_api.internal.primitives.nim import ModelInterface
|
|
8
|
+
import numpy as np
|
|
8
9
|
|
|
9
10
|
|
|
10
|
-
# Assume ModelInterface is defined elsewhere in the project.
|
|
11
11
|
class EmbeddingModelInterface(ModelInterface):
|
|
12
12
|
"""
|
|
13
13
|
An interface for handling inference with an embedding model endpoint.
|
|
@@ -22,20 +22,13 @@ class EmbeddingModelInterface(ModelInterface):
|
|
|
22
22
|
|
|
23
23
|
def prepare_data_for_inference(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
24
24
|
"""
|
|
25
|
-
Prepare input data for embedding inference.
|
|
26
|
-
and that its value is a list.
|
|
27
|
-
|
|
28
|
-
Raises
|
|
29
|
-
------
|
|
30
|
-
KeyError
|
|
31
|
-
If the 'prompts' key is missing.
|
|
25
|
+
Prepare input data for embedding inference. Returns a list of strings representing the text to be embedded.
|
|
32
26
|
"""
|
|
33
27
|
if "prompts" not in data:
|
|
34
28
|
raise KeyError("Input data must include 'prompts'.")
|
|
35
|
-
# Ensure the prompts are in list format.
|
|
36
29
|
if not isinstance(data["prompts"], list):
|
|
37
30
|
data["prompts"] = [data["prompts"]]
|
|
38
|
-
return data
|
|
31
|
+
return {"prompts": data["prompts"]}
|
|
39
32
|
|
|
40
33
|
def format_input(
|
|
41
34
|
self, data: Dict[str, Any], protocol: str, max_batch_size: int, **kwargs
|
|
@@ -63,29 +56,32 @@ class EmbeddingModelInterface(ModelInterface):
|
|
|
63
56
|
- payloads is a list of JSON-serializable payload dictionaries.
|
|
64
57
|
- batch_data_list is a list of dictionaries containing the key "prompts" corresponding to each batch.
|
|
65
58
|
"""
|
|
66
|
-
if protocol != "http":
|
|
67
|
-
raise ValueError("EmbeddingModelInterface only supports HTTP protocol.")
|
|
68
|
-
|
|
69
|
-
prompts = data.get("prompts", [])
|
|
70
59
|
|
|
71
60
|
def chunk_list(lst, chunk_size):
|
|
61
|
+
lst = lst["prompts"]
|
|
72
62
|
return [lst[i : i + chunk_size] for i in range(0, len(lst), chunk_size)]
|
|
73
63
|
|
|
74
|
-
batches = chunk_list(
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
"input_type": kwargs.get("input_type", "
|
|
64
|
+
batches = chunk_list(data, max_batch_size)
|
|
65
|
+
if protocol == "http":
|
|
66
|
+
payloads = []
|
|
67
|
+
batch_data_list = []
|
|
68
|
+
for batch in batches:
|
|
69
|
+
payload = {
|
|
70
|
+
"model": kwargs.get("model_name"),
|
|
71
|
+
"input": batch,
|
|
72
|
+
"encoding_format": kwargs.get("encoding_format", "float"),
|
|
73
|
+
"input_type": kwargs.get("input_type", "passage"),
|
|
84
74
|
"truncate": kwargs.get("truncate", "NONE"),
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
75
|
+
}
|
|
76
|
+
payloads.append(payload)
|
|
77
|
+
batch_data_list.append({"prompts": batch})
|
|
78
|
+
elif protocol == "grpc":
|
|
79
|
+
payloads = []
|
|
80
|
+
batch_data_list = []
|
|
81
|
+
for batch in batches:
|
|
82
|
+
text_np = np.array([[text.encode("utf-8")] for text in batch], dtype=np.object_)
|
|
83
|
+
payloads.append(text_np)
|
|
84
|
+
batch_data_list.append({"prompts": batch})
|
|
89
85
|
return payloads, batch_data_list
|
|
90
86
|
|
|
91
87
|
def parse_output(self, response: Any, protocol: str, data: Optional[Dict[str, Any]] = None, **kwargs) -> Any:
|
|
@@ -108,16 +104,17 @@ class EmbeddingModelInterface(ModelInterface):
|
|
|
108
104
|
list
|
|
109
105
|
A list of generated embeddings extracted from the response.
|
|
110
106
|
"""
|
|
111
|
-
if protocol
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
107
|
+
if protocol == "http":
|
|
108
|
+
if isinstance(response, dict):
|
|
109
|
+
embeddings = response.get("data")
|
|
110
|
+
if not embeddings:
|
|
111
|
+
raise RuntimeError("Unexpected response format: 'data' key is missing or empty.")
|
|
112
|
+
# Each item in embeddings is expected to have an 'embedding' field.
|
|
113
|
+
return [item.get("embedding", None) for item in embeddings]
|
|
114
|
+
else:
|
|
115
|
+
return [str(response)]
|
|
116
|
+
elif protocol == "grpc":
|
|
117
|
+
return [res.flatten() for res in response]
|
|
121
118
|
|
|
122
119
|
def process_inference_results(self, output: Any, protocol: str, **kwargs) -> Any:
|
|
123
120
|
"""
|
|
@@ -709,7 +709,13 @@ def postprocess_results(
|
|
|
709
709
|
raise ValueError(f"Error in postprocessing {result.shape} and {original_image_shape}: {e}")
|
|
710
710
|
|
|
711
711
|
for box, score, label in zip(bboxes, scores, labels):
|
|
712
|
-
|
|
712
|
+
# TODO(Devin): Sometimes we get back unexpected class labels?
|
|
713
|
+
if (label < 0) or (label >= len(class_labels)):
|
|
714
|
+
logger.warning(f"Invalid class label {label} found in postprocessing")
|
|
715
|
+
continue
|
|
716
|
+
else:
|
|
717
|
+
class_name = class_labels[int(label)]
|
|
718
|
+
|
|
713
719
|
annotation_dict[class_name].append([round(float(x), 4) for x in np.concatenate((box, [score]))])
|
|
714
720
|
|
|
715
721
|
out.append(annotation_dict)
|
|
@@ -129,7 +129,7 @@ class NimClient:
|
|
|
129
129
|
"""
|
|
130
130
|
if self.protocol == "grpc":
|
|
131
131
|
logger.debug("Performing gRPC inference for a batch...")
|
|
132
|
-
response = self._grpc_infer(batch_input, model_name)
|
|
132
|
+
response = self._grpc_infer(batch_input, model_name, **kwargs)
|
|
133
133
|
logger.debug("gRPC inference received response for a batch")
|
|
134
134
|
elif self.protocol == "http":
|
|
135
135
|
logger.debug("Performing HTTP inference for a batch...")
|
|
@@ -221,7 +221,7 @@ class NimClient:
|
|
|
221
221
|
|
|
222
222
|
return all_results
|
|
223
223
|
|
|
224
|
-
def _grpc_infer(self, formatted_input: np.ndarray, model_name: str) -> np.ndarray:
|
|
224
|
+
def _grpc_infer(self, formatted_input: np.ndarray, model_name: str, **kwargs) -> np.ndarray:
|
|
225
225
|
"""
|
|
226
226
|
Perform inference using the gRPC protocol.
|
|
227
227
|
|
|
@@ -238,16 +238,24 @@ class NimClient:
|
|
|
238
238
|
The output of the model as a numpy array.
|
|
239
239
|
"""
|
|
240
240
|
|
|
241
|
-
|
|
242
|
-
|
|
241
|
+
parameters = kwargs.get("parameters", {})
|
|
242
|
+
output_names = kwargs.get("outputs", ["output"])
|
|
243
|
+
dtype = kwargs.get("dtype", "FP32")
|
|
244
|
+
input_name = kwargs.get("input_name", "input")
|
|
243
245
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
logger.debug(f"gRPC inference response: {response}")
|
|
246
|
+
input_tensors = grpcclient.InferInput(input_name, formatted_input.shape, datatype=dtype)
|
|
247
|
+
input_tensors.set_data_from_numpy(formatted_input)
|
|
247
248
|
|
|
248
|
-
|
|
249
|
+
outputs = [grpcclient.InferRequestedOutput(output_name) for output_name in output_names]
|
|
250
|
+
response = self.client.infer(
|
|
251
|
+
model_name=model_name, parameters=parameters, inputs=[input_tensors], outputs=outputs
|
|
252
|
+
)
|
|
253
|
+
logger.debug(f"gRPC inference response: {response}")
|
|
249
254
|
|
|
250
|
-
|
|
255
|
+
if len(outputs) == 1:
|
|
256
|
+
return response.as_numpy(outputs[0].name())
|
|
257
|
+
else:
|
|
258
|
+
return [response.as_numpy(output.name()) for output in outputs]
|
|
251
259
|
|
|
252
260
|
def _http_infer(self, formatted_input: dict) -> dict:
|
|
253
261
|
"""
|
|
@@ -31,13 +31,15 @@ def traceable(trace_name=None):
|
|
|
31
31
|
|
|
32
32
|
Notes
|
|
33
33
|
-----
|
|
34
|
-
The decorated function must accept a IngestControlMessage object as its
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
The decorated function must accept a IngestControlMessage object as one of its arguments.
|
|
35
|
+
For a regular function, this is expected to be the first argument; for a class method,
|
|
36
|
+
this is expected to be the second argument (after 'self'). The IngestControlMessage object
|
|
37
|
+
must implement `has_metadata`, `get_metadata`, and `set_metadata` methods used by the decorator
|
|
38
|
+
to check for the trace tagging flag and to add trace metadata.
|
|
37
39
|
|
|
38
40
|
The trace metadata added by the decorator includes two entries:
|
|
39
|
-
- 'trace::entry::<trace_name>': The
|
|
40
|
-
- 'trace::exit::<trace_name>': The
|
|
41
|
+
- 'trace::entry::<trace_name>': The timestamp marking the function's entry.
|
|
42
|
+
- 'trace::exit::<trace_name>': The timestamp marking the function's exit.
|
|
41
43
|
|
|
42
44
|
Example
|
|
43
45
|
-------
|
|
@@ -47,23 +49,25 @@ def traceable(trace_name=None):
|
|
|
47
49
|
... def process_message(message):
|
|
48
50
|
... pass
|
|
49
51
|
|
|
50
|
-
Applying the decorator with a custom trace name:
|
|
51
|
-
|
|
52
|
-
>>> @traceable(custom_trace_name="CustomTraceName")
|
|
53
|
-
... def process_message(message):
|
|
54
|
-
... pass
|
|
55
|
-
|
|
56
|
-
In both examples, `process_message` will have entry and exit timestamps added to the
|
|
57
|
-
IngestControlMessage's metadata if 'config::add_trace_tagging' is True.
|
|
52
|
+
Applying the decorator with a custom trace name on a class method:
|
|
58
53
|
|
|
54
|
+
>>> class Processor:
|
|
55
|
+
... @traceable(trace_name="CustomTrace")
|
|
56
|
+
... def process(self, message):
|
|
57
|
+
... pass
|
|
59
58
|
"""
|
|
60
59
|
|
|
61
60
|
def decorator_trace_tagging(func):
|
|
62
61
|
@functools.wraps(func)
|
|
63
62
|
def wrapper_trace_tagging(*args, **kwargs):
|
|
64
|
-
# Assuming the first argument is always the message
|
|
65
63
|
ts_fetched = datetime.now()
|
|
66
|
-
|
|
64
|
+
# Determine which argument is the message.
|
|
65
|
+
if hasattr(args[0], "has_metadata"):
|
|
66
|
+
message = args[0]
|
|
67
|
+
elif len(args) > 1 and hasattr(args[1], "has_metadata"):
|
|
68
|
+
message = args[1]
|
|
69
|
+
else:
|
|
70
|
+
raise ValueError("traceable decorator could not find a message argument with 'has_metadata()'")
|
|
67
71
|
|
|
68
72
|
do_trace_tagging = (message.has_metadata("config::add_trace_tagging") is True) and (
|
|
69
73
|
message.get_metadata("config::add_trace_tagging") is True
|
|
@@ -79,7 +83,7 @@ def traceable(trace_name=None):
|
|
|
79
83
|
message.set_timestamp(f"trace::entry::{trace_prefix}_channel_in", ts_send)
|
|
80
84
|
message.set_timestamp(f"trace::exit::{trace_prefix}_channel_in", ts_fetched)
|
|
81
85
|
|
|
82
|
-
# Call the decorated function
|
|
86
|
+
# Call the decorated function.
|
|
83
87
|
result = func(*args, **kwargs)
|
|
84
88
|
|
|
85
89
|
if do_trace_tagging:
|
|
@@ -131,7 +131,7 @@ class NemoRetrieverParseConfigSchema(BaseModel):
|
|
|
131
131
|
nemoretriever_parse_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
132
132
|
nemoretriever_parse_infer_protocol: str = ""
|
|
133
133
|
|
|
134
|
-
|
|
134
|
+
nemoretriever_parse_model_name: str = "nvidia/nemoretriever-parse"
|
|
135
135
|
|
|
136
136
|
timeout: float = 300.0
|
|
137
137
|
|
|
@@ -76,7 +76,7 @@ class IngestTaskCaptionSchema(BaseModelNoExt):
|
|
|
76
76
|
api_key: Optional[str] = None
|
|
77
77
|
endpoint_url: Optional[str] = None
|
|
78
78
|
prompt: Optional[str] = None
|
|
79
|
-
|
|
79
|
+
caption_model_name: Optional[str] = None
|
|
80
80
|
|
|
81
81
|
|
|
82
82
|
class IngestTaskFilterParamsSchema(BaseModelNoExt):
|
|
@@ -104,7 +104,7 @@ class IngestTaskDedupSchema(BaseModelNoExt):
|
|
|
104
104
|
|
|
105
105
|
class IngestTaskEmbedSchema(BaseModelNoExt):
|
|
106
106
|
endpoint_url: Optional[str] = None
|
|
107
|
-
|
|
107
|
+
embedding_model_name: Optional[str] = None
|
|
108
108
|
api_key: Optional[str] = None
|
|
109
109
|
filter_errors: bool = False
|
|
110
110
|
|
|
@@ -10,6 +10,6 @@ class ImageCaptionExtractionSchema(BaseModel):
|
|
|
10
10
|
api_key: str = "api_key"
|
|
11
11
|
endpoint_url: str = "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-11b-vision-instruct/chat/completions"
|
|
12
12
|
prompt: str = "Caption the content of this image:"
|
|
13
|
-
|
|
13
|
+
image_caption_model_name: str = "meta/llama-3.2-11b-vision-instruct"
|
|
14
14
|
raise_on_failure: bool = False
|
|
15
15
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -173,7 +173,7 @@ def transform_image_create_vlm_caption_internal(
|
|
|
173
173
|
api_key: str = task_config.get("api_key") or transform_config.api_key
|
|
174
174
|
prompt: str = task_config.get("prompt") or transform_config.prompt
|
|
175
175
|
endpoint_url: str = task_config.get("endpoint_url") or transform_config.endpoint_url
|
|
176
|
-
model_name: str = task_config.get("
|
|
176
|
+
model_name: str = task_config.get("image_caption_model_name") or transform_config.image_caption_model_name
|
|
177
177
|
|
|
178
178
|
# Create a mask for rows where the content type is "image".
|
|
179
179
|
df_mask: pd.Series = df_transform_ledger["metadata"].apply(
|