nv-ingest-api 2025.10.26.dev20251026__tar.gz → 2025.10.27.dev20251027__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nv_ingest_api-2025.10.26.dev20251026/src/nv_ingest_api.egg-info → nv_ingest_api-2025.10.27.dev20251027}/PKG-INFO +2 -1
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/pyproject.toml +1 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +4 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +4 -2
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +10 -1
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +4 -2
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +4 -2
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +10 -1
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +6 -4
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +4 -2
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +9 -1
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +2 -0
- nv_ingest_api-2025.10.27.dev20251027/src/nv_ingest_api/internal/schemas/mixins.py +39 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +4 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/transform/embed_text.py +82 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027/src/nv_ingest_api.egg-info}/PKG-INFO +2 -1
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api.egg-info/SOURCES.txt +1 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api.egg-info/requires.txt +1 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/LICENSE +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/MANIFEST.in +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/README.md +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/setup.cfg +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/interface/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/interface/extract.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/interface/mutate.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/interface/store.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/interface/transform.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/interface/utility.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/enums/common.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/html/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/html/html_extractor.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/image/image_extractor.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/meta/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/meta/udf.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/extract/extract_html_schema.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/meta/udf.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/store/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/transform/caption_image.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/internal/transform/split_text.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/control_message/validators.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/converters/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/converters/containers.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/converters/datetools.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/converters/dftools.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/converters/formats.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/dataloader/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/dataloader/dataloader.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/detectors/language.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/exception_handlers/converters.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/image_processing/transforms.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/imports/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/imports/callable_signatures.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/imports/dynamic_resolvers.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/introspection/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/introspection/class_inspect.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/introspection/function_inspect.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/logging/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/logging/configuration.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/logging/sanitize.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/message_brokers/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/nim/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/pdf/pdfium.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/schema/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/service_clients/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/string_processing/configuration.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/string_processing/yaml.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/system/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api/util/system/hardware_info.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/udfs/__init__.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/udfs/llm_summarizer_udf.py +0 -0
- {nv_ingest_api-2025.10.26.dev20251026 → nv_ingest_api-2025.10.27.dev20251027}/src/version.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nv-ingest-api
|
|
3
|
-
Version: 2025.10.
|
|
3
|
+
Version: 2025.10.27.dev20251027
|
|
4
4
|
Summary: Python module with core document ingestion functions.
|
|
5
5
|
Author-email: Jeremy Dyer <jdyer@nvidia.com>
|
|
6
6
|
License: Apache License
|
|
@@ -222,6 +222,7 @@ Requires-Dist: fsspec>=2025.5.1
|
|
|
222
222
|
Requires-Dist: universal_pathlib>=0.2.6
|
|
223
223
|
Requires-Dist: ffmpeg-python==0.2.0
|
|
224
224
|
Requires-Dist: tritonclient
|
|
225
|
+
Requires-Dist: glom
|
|
225
226
|
Dynamic: license-file
|
|
226
227
|
|
|
227
228
|
# nv-ingest-api
|
|
@@ -355,6 +355,10 @@ def create_audio_inference_client(
|
|
|
355
355
|
if (infer_protocol is None) and (grpc_endpoint and grpc_endpoint.strip()):
|
|
356
356
|
infer_protocol = "grpc"
|
|
357
357
|
|
|
358
|
+
# Normalize protocol to lowercase for case-insensitive comparison
|
|
359
|
+
if infer_protocol:
|
|
360
|
+
infer_protocol = infer_protocol.lower()
|
|
361
|
+
|
|
358
362
|
if infer_protocol == "http":
|
|
359
363
|
raise ValueError("`http` endpoints are not supported for audio. Use `grpc`.")
|
|
360
364
|
|
|
@@ -10,10 +10,12 @@ from typing import Tuple
|
|
|
10
10
|
from pydantic import BaseModel, Field
|
|
11
11
|
from pydantic import root_validator
|
|
12
12
|
|
|
13
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
14
|
+
|
|
13
15
|
logger = logging.getLogger(__name__)
|
|
14
16
|
|
|
15
17
|
|
|
16
|
-
class AudioConfigSchema(
|
|
18
|
+
class AudioConfigSchema(LowercaseProtocolMixin):
|
|
17
19
|
"""
|
|
18
20
|
Configuration schema for audio extraction endpoints and options.
|
|
19
21
|
|
|
@@ -87,13 +89,13 @@ class AudioConfigSchema(BaseModel):
|
|
|
87
89
|
|
|
88
90
|
values[endpoint_name] = (grpc_service, http_service)
|
|
89
91
|
|
|
92
|
+
# Auto-infer protocol from endpoints if not specified
|
|
90
93
|
protocol_name = "audio_infer_protocol"
|
|
91
94
|
protocol_value = values.get(protocol_name)
|
|
92
95
|
|
|
93
96
|
if not protocol_value:
|
|
94
97
|
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
95
98
|
|
|
96
|
-
protocol_value = protocol_value.lower()
|
|
97
99
|
values[protocol_name] = protocol_value
|
|
98
100
|
|
|
99
101
|
return values
|
|
@@ -8,10 +8,12 @@ from typing import Tuple
|
|
|
8
8
|
|
|
9
9
|
from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
|
|
10
10
|
|
|
11
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
12
|
+
|
|
11
13
|
logger = logging.getLogger(__name__)
|
|
12
14
|
|
|
13
15
|
|
|
14
|
-
class ChartExtractorConfigSchema(
|
|
16
|
+
class ChartExtractorConfigSchema(LowercaseProtocolMixin):
|
|
15
17
|
"""
|
|
16
18
|
Configuration schema for chart extraction service endpoints and options.
|
|
17
19
|
|
|
@@ -96,6 +98,13 @@ class ChartExtractorConfigSchema(BaseModel):
|
|
|
96
98
|
|
|
97
99
|
values[endpoint_name] = (grpc_service, http_service)
|
|
98
100
|
|
|
101
|
+
# Auto-infer protocol from endpoints if not specified
|
|
102
|
+
protocol_name = endpoint_name.replace("_endpoints", "_infer_protocol")
|
|
103
|
+
protocol_value = values.get(protocol_name)
|
|
104
|
+
if not protocol_value:
|
|
105
|
+
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
106
|
+
values[protocol_name] = protocol_value
|
|
107
|
+
|
|
99
108
|
return values
|
|
100
109
|
|
|
101
110
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -9,10 +9,12 @@ from typing import Tuple
|
|
|
9
9
|
|
|
10
10
|
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
13
|
+
|
|
12
14
|
logger = logging.getLogger(__name__)
|
|
13
15
|
|
|
14
16
|
|
|
15
|
-
class DocxConfigSchema(
|
|
17
|
+
class DocxConfigSchema(LowercaseProtocolMixin):
|
|
16
18
|
"""
|
|
17
19
|
Configuration schema for docx extraction endpoints and options.
|
|
18
20
|
|
|
@@ -85,11 +87,11 @@ class DocxConfigSchema(BaseModel):
|
|
|
85
87
|
|
|
86
88
|
values[endpoint_name] = (grpc_service, http_service)
|
|
87
89
|
|
|
90
|
+
# Auto-infer protocol from endpoints if not specified
|
|
88
91
|
protocol_name = f"{model_name}_infer_protocol"
|
|
89
92
|
protocol_value = values.get(protocol_name)
|
|
90
93
|
if not protocol_value:
|
|
91
94
|
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
92
|
-
protocol_value = protocol_value.lower()
|
|
93
95
|
values[protocol_name] = protocol_value
|
|
94
96
|
|
|
95
97
|
return values
|
|
@@ -9,10 +9,12 @@ from typing import Tuple
|
|
|
9
9
|
|
|
10
10
|
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
13
|
+
|
|
12
14
|
logger = logging.getLogger(__name__)
|
|
13
15
|
|
|
14
16
|
|
|
15
|
-
class ImageConfigSchema(
|
|
17
|
+
class ImageConfigSchema(LowercaseProtocolMixin):
|
|
16
18
|
"""
|
|
17
19
|
Configuration schema for image extraction endpoints and options.
|
|
18
20
|
|
|
@@ -85,11 +87,11 @@ class ImageConfigSchema(BaseModel):
|
|
|
85
87
|
|
|
86
88
|
values[endpoint_name] = (grpc_service, http_service)
|
|
87
89
|
|
|
90
|
+
# Auto-infer protocol from endpoints if not specified
|
|
88
91
|
protocol_name = f"{model_name}_infer_protocol"
|
|
89
92
|
protocol_value = values.get(protocol_name)
|
|
90
93
|
if not protocol_value:
|
|
91
94
|
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
92
|
-
protocol_value = protocol_value.lower()
|
|
93
95
|
values[protocol_name] = protocol_value
|
|
94
96
|
|
|
95
97
|
return values
|
|
@@ -8,10 +8,12 @@ from typing import Tuple
|
|
|
8
8
|
|
|
9
9
|
from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
|
|
10
10
|
|
|
11
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
12
|
+
|
|
11
13
|
logger = logging.getLogger(__name__)
|
|
12
14
|
|
|
13
15
|
|
|
14
|
-
class InfographicExtractorConfigSchema(
|
|
16
|
+
class InfographicExtractorConfigSchema(LowercaseProtocolMixin):
|
|
15
17
|
"""
|
|
16
18
|
Configuration schema for infographic extraction service endpoints and options.
|
|
17
19
|
|
|
@@ -89,6 +91,13 @@ class InfographicExtractorConfigSchema(BaseModel):
|
|
|
89
91
|
|
|
90
92
|
values[endpoint_name] = (grpc_service, http_service)
|
|
91
93
|
|
|
94
|
+
# Auto-infer protocol from endpoints if not specified
|
|
95
|
+
protocol_name = endpoint_name.replace("_endpoints", "_infer_protocol")
|
|
96
|
+
protocol_value = values.get(protocol_name)
|
|
97
|
+
if not protocol_value:
|
|
98
|
+
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
99
|
+
values[protocol_name] = protocol_value
|
|
100
|
+
|
|
92
101
|
return values
|
|
93
102
|
|
|
94
103
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -9,10 +9,12 @@ from typing import Tuple
|
|
|
9
9
|
|
|
10
10
|
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
13
|
+
|
|
12
14
|
logger = logging.getLogger(__name__)
|
|
13
15
|
|
|
14
16
|
|
|
15
|
-
class PDFiumConfigSchema(
|
|
17
|
+
class PDFiumConfigSchema(LowercaseProtocolMixin):
|
|
16
18
|
"""
|
|
17
19
|
Configuration schema for PDFium endpoints and options.
|
|
18
20
|
|
|
@@ -82,11 +84,11 @@ class PDFiumConfigSchema(BaseModel):
|
|
|
82
84
|
|
|
83
85
|
values[endpoint_name] = (grpc_service, http_service)
|
|
84
86
|
|
|
87
|
+
# Auto-infer protocol from endpoints if not specified
|
|
85
88
|
protocol_name = f"{model_name}_infer_protocol"
|
|
86
89
|
protocol_value = values.get(protocol_name)
|
|
87
90
|
if not protocol_value:
|
|
88
91
|
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
89
|
-
protocol_value = protocol_value.lower()
|
|
90
92
|
values[protocol_name] = protocol_value
|
|
91
93
|
|
|
92
94
|
return values
|
|
@@ -94,7 +96,7 @@ class PDFiumConfigSchema(BaseModel):
|
|
|
94
96
|
model_config = ConfigDict(extra="forbid")
|
|
95
97
|
|
|
96
98
|
|
|
97
|
-
class NemoRetrieverParseConfigSchema(
|
|
99
|
+
class NemoRetrieverParseConfigSchema(LowercaseProtocolMixin):
|
|
98
100
|
"""
|
|
99
101
|
Configuration schema for NemoRetrieverParse endpoints and options.
|
|
100
102
|
|
|
@@ -170,11 +172,11 @@ class NemoRetrieverParseConfigSchema(BaseModel):
|
|
|
170
172
|
|
|
171
173
|
values[endpoint_name] = (grpc_service, http_service)
|
|
172
174
|
|
|
175
|
+
# Auto-infer protocol from endpoints if not specified
|
|
173
176
|
protocol_name = f"{model_name}_infer_protocol"
|
|
174
177
|
protocol_value = values.get(protocol_name)
|
|
175
178
|
if not protocol_value:
|
|
176
179
|
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
177
|
-
protocol_value = protocol_value.lower()
|
|
178
180
|
values[protocol_name] = protocol_value
|
|
179
181
|
|
|
180
182
|
return values
|
|
@@ -9,10 +9,12 @@ from typing import Tuple
|
|
|
9
9
|
|
|
10
10
|
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
13
|
+
|
|
12
14
|
logger = logging.getLogger(__name__)
|
|
13
15
|
|
|
14
16
|
|
|
15
|
-
class PPTXConfigSchema(
|
|
17
|
+
class PPTXConfigSchema(LowercaseProtocolMixin):
|
|
16
18
|
"""
|
|
17
19
|
Configuration schema for docx extraction endpoints and options.
|
|
18
20
|
|
|
@@ -85,11 +87,11 @@ class PPTXConfigSchema(BaseModel):
|
|
|
85
87
|
|
|
86
88
|
values[endpoint_name] = (grpc_service, http_service)
|
|
87
89
|
|
|
90
|
+
# Auto-infer protocol from endpoints if not specified
|
|
88
91
|
protocol_name = f"{model_name}_infer_protocol"
|
|
89
92
|
protocol_value = values.get(protocol_name)
|
|
90
93
|
if not protocol_value:
|
|
91
94
|
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
92
|
-
protocol_value = protocol_value.lower()
|
|
93
95
|
values[protocol_name] = protocol_value
|
|
94
96
|
|
|
95
97
|
return values
|
|
@@ -9,11 +9,12 @@ from typing import Tuple
|
|
|
9
9
|
|
|
10
10
|
from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
|
+
from nv_ingest_api.internal.schemas.mixins import LowercaseProtocolMixin
|
|
12
13
|
|
|
13
14
|
logger = logging.getLogger(__name__)
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
class TableExtractorConfigSchema(
|
|
17
|
+
class TableExtractorConfigSchema(LowercaseProtocolMixin):
|
|
17
18
|
"""
|
|
18
19
|
Configuration schema for the table extraction stage settings.
|
|
19
20
|
|
|
@@ -91,6 +92,13 @@ class TableExtractorConfigSchema(BaseModel):
|
|
|
91
92
|
|
|
92
93
|
values[endpoint_name] = (grpc_service, http_service)
|
|
93
94
|
|
|
95
|
+
# Auto-infer protocol from endpoints if not specified
|
|
96
|
+
protocol_name = endpoint_name.replace("_endpoints", "_infer_protocol")
|
|
97
|
+
protocol_value = values.get(protocol_name)
|
|
98
|
+
if not protocol_value:
|
|
99
|
+
protocol_value = "http" if http_service else "grpc" if grpc_service else ""
|
|
100
|
+
values[protocol_name] = protocol_value
|
|
101
|
+
|
|
94
102
|
return values
|
|
95
103
|
|
|
96
104
|
model_config = ConfigDict(extra="forbid")
|
|
@@ -126,6 +126,8 @@ class IngestTaskEmbedSchema(BaseModelNoExt):
|
|
|
126
126
|
image_elements_modality: Optional[str] = None
|
|
127
127
|
structured_elements_modality: Optional[str] = None
|
|
128
128
|
audio_elements_modality: Optional[str] = None
|
|
129
|
+
custom_content_field: Optional[str] = None
|
|
130
|
+
result_target_field: Optional[str] = None
|
|
129
131
|
|
|
130
132
|
|
|
131
133
|
class IngestTaskVdbUploadSchema(BaseModelNoExt):
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
Shared mixins for Pydantic schemas.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from typing import Any
|
|
10
|
+
from pydantic import BaseModel, field_validator
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LowercaseProtocolMixin(BaseModel):
|
|
14
|
+
"""
|
|
15
|
+
Mixin that automatically lowercases any field ending with '_infer_protocol'.
|
|
16
|
+
|
|
17
|
+
This ensures case-insensitive handling of protocol values (e.g., "HTTP" -> "http").
|
|
18
|
+
Apply this mixin to any schema that has protocol fields to normalize user input.
|
|
19
|
+
|
|
20
|
+
Examples
|
|
21
|
+
--------
|
|
22
|
+
>>> class MyConfigSchema(LowercaseProtocolMixin):
|
|
23
|
+
... yolox_infer_protocol: str = ""
|
|
24
|
+
... ocr_infer_protocol: str = ""
|
|
25
|
+
>>>
|
|
26
|
+
>>> config = MyConfigSchema(yolox_infer_protocol="GRPC", ocr_infer_protocol="HTTP")
|
|
27
|
+
>>> config.yolox_infer_protocol
|
|
28
|
+
'grpc'
|
|
29
|
+
>>> config.ocr_infer_protocol
|
|
30
|
+
'http'
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
@field_validator("*", mode="before")
|
|
34
|
+
@classmethod
|
|
35
|
+
def _lowercase_protocol_fields(cls, v: Any, info):
|
|
36
|
+
"""Lowercase any field ending with '_infer_protocol'."""
|
|
37
|
+
if info.field_name.endswith("_infer_protocol") and v is not None:
|
|
38
|
+
return str(v).strip().lower()
|
|
39
|
+
return v
|
|
@@ -7,6 +7,8 @@ import logging
|
|
|
7
7
|
|
|
8
8
|
from pydantic import ConfigDict, BaseModel, Field, model_validator, field_validator
|
|
9
9
|
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
10
12
|
from nv_ingest_api.util.logging.configuration import LogLevel
|
|
11
13
|
|
|
12
14
|
logger = logging.getLogger(__name__)
|
|
@@ -26,6 +28,8 @@ class TextEmbeddingSchema(BaseModel):
|
|
|
26
28
|
image_elements_modality: str = Field(default="text")
|
|
27
29
|
structured_elements_modality: str = Field(default="text")
|
|
28
30
|
audio_elements_modality: str = Field(default="text")
|
|
31
|
+
custom_content_field: Optional[str] = None
|
|
32
|
+
result_target_field: Optional[str] = None
|
|
29
33
|
|
|
30
34
|
model_config = ConfigDict(extra="forbid")
|
|
31
35
|
|
|
@@ -7,6 +7,7 @@ from concurrent.futures import ThreadPoolExecutor
|
|
|
7
7
|
from functools import partial
|
|
8
8
|
from typing import Any, Dict, Tuple, Optional, Iterable, List
|
|
9
9
|
|
|
10
|
+
import glom
|
|
10
11
|
import pandas as pd
|
|
11
12
|
from openai import OpenAI
|
|
12
13
|
|
|
@@ -282,6 +283,33 @@ def _add_embeddings(row, embeddings, info_msgs):
|
|
|
282
283
|
return row
|
|
283
284
|
|
|
284
285
|
|
|
286
|
+
def _add_custom_embeddings(row, embeddings, result_target_field):
|
|
287
|
+
"""
|
|
288
|
+
Updates a DataFrame row with embedding data and associated error info
|
|
289
|
+
based on a user supplied custom content field.
|
|
290
|
+
|
|
291
|
+
Parameters
|
|
292
|
+
----------
|
|
293
|
+
row : pandas.Series
|
|
294
|
+
A row of the DataFrame.
|
|
295
|
+
embeddings : dict
|
|
296
|
+
Dictionary mapping row indices to embeddings.
|
|
297
|
+
result_target_field: str
|
|
298
|
+
The field in custom_content to output the embeddings to
|
|
299
|
+
|
|
300
|
+
Returns
|
|
301
|
+
-------
|
|
302
|
+
pandas.Series
|
|
303
|
+
The updated row
|
|
304
|
+
"""
|
|
305
|
+
embedding = embeddings.get(row.name, None)
|
|
306
|
+
|
|
307
|
+
if embedding is not None:
|
|
308
|
+
row["metadata"] = glom.assign(row["metadata"], "custom_content." + result_target_field, embedding, missing=dict)
|
|
309
|
+
|
|
310
|
+
return row
|
|
311
|
+
|
|
312
|
+
|
|
285
313
|
def _format_image_input_string(image_b64: Optional[str]) -> str:
|
|
286
314
|
if not image_b64:
|
|
287
315
|
return
|
|
@@ -381,6 +409,20 @@ def _get_pandas_audio_content(row, modality="text"):
|
|
|
381
409
|
return row.get("audio_metadata", {}).get("audio_transcript")
|
|
382
410
|
|
|
383
411
|
|
|
412
|
+
def _get_pandas_custom_content(row, custom_content_field):
|
|
413
|
+
custom_content = row.get("custom_content", {})
|
|
414
|
+
content = glom.glom(custom_content, custom_content_field, default=None)
|
|
415
|
+
if content is None:
|
|
416
|
+
logger.warning(f"Custom content field: {custom_content_field} not found")
|
|
417
|
+
return None
|
|
418
|
+
|
|
419
|
+
try:
|
|
420
|
+
return str(content)
|
|
421
|
+
except (TypeError, ValueError):
|
|
422
|
+
logger.warning(f"Cannot convert custom content field: {custom_content_field} to string")
|
|
423
|
+
return None
|
|
424
|
+
|
|
425
|
+
|
|
384
426
|
# ------------------------------------------------------------------------------
|
|
385
427
|
# Batch Processing Utilities
|
|
386
428
|
# ------------------------------------------------------------------------------
|
|
@@ -519,6 +561,7 @@ def transform_create_text_embeddings_internal(
|
|
|
519
561
|
api_key = task_config.get("api_key") or transform_config.api_key
|
|
520
562
|
endpoint_url = task_config.get("endpoint_url") or transform_config.embedding_nim_endpoint
|
|
521
563
|
model_name = task_config.get("model_name") or transform_config.embedding_model
|
|
564
|
+
custom_content_field = task_config.get("custom_content_field") or transform_config.custom_content_field
|
|
522
565
|
|
|
523
566
|
if execution_trace_log is None:
|
|
524
567
|
execution_trace_log = {}
|
|
@@ -612,4 +655,43 @@ def transform_create_text_embeddings_internal(
|
|
|
612
655
|
content_masks.append(content_mask)
|
|
613
656
|
|
|
614
657
|
combined_df = _concatenate_extractions_pandas(df_transform_ledger, embedding_dataframes, content_masks)
|
|
658
|
+
|
|
659
|
+
# Embed custom content
|
|
660
|
+
if custom_content_field is not None:
|
|
661
|
+
result_target_field = task_config.get("result_target_field") or custom_content_field + "_embedding"
|
|
662
|
+
|
|
663
|
+
extracted_custom_content = (
|
|
664
|
+
combined_df["metadata"]
|
|
665
|
+
.apply(partial(_get_pandas_custom_content, custom_content_field=custom_content_field))
|
|
666
|
+
.apply(lambda x: x.strip() if isinstance(x, str) and x.strip() else None)
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
valid_custom_content_mask = extracted_custom_content.notna()
|
|
670
|
+
if valid_custom_content_mask.any():
|
|
671
|
+
custom_content_list = extracted_custom_content[valid_custom_content_mask].to_list()
|
|
672
|
+
custom_content_batches = _generate_batches(custom_content_list, batch_size=transform_config.batch_size)
|
|
673
|
+
|
|
674
|
+
custom_content_embeddings = _async_runner(
|
|
675
|
+
custom_content_batches,
|
|
676
|
+
api_key,
|
|
677
|
+
endpoint_url,
|
|
678
|
+
model_name,
|
|
679
|
+
transform_config.encoding_format,
|
|
680
|
+
transform_config.input_type,
|
|
681
|
+
transform_config.truncate,
|
|
682
|
+
False,
|
|
683
|
+
)
|
|
684
|
+
custom_embeddings_dict = dict(
|
|
685
|
+
zip(
|
|
686
|
+
extracted_custom_content.loc[valid_custom_content_mask].index,
|
|
687
|
+
custom_content_embeddings.get("embeddings", []),
|
|
688
|
+
)
|
|
689
|
+
)
|
|
690
|
+
else:
|
|
691
|
+
custom_embeddings_dict = {}
|
|
692
|
+
|
|
693
|
+
combined_df = combined_df.apply(
|
|
694
|
+
_add_custom_embeddings, embeddings=custom_embeddings_dict, result_target_field=result_target_field, axis=1
|
|
695
|
+
)
|
|
696
|
+
|
|
615
697
|
return combined_df, {"trace_info": execution_trace_log}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nv-ingest-api
|
|
3
|
-
Version: 2025.10.
|
|
3
|
+
Version: 2025.10.27.dev20251027
|
|
4
4
|
Summary: Python module with core document ingestion functions.
|
|
5
5
|
Author-email: Jeremy Dyer <jdyer@nvidia.com>
|
|
6
6
|
License: Apache License
|
|
@@ -222,6 +222,7 @@ Requires-Dist: fsspec>=2025.5.1
|
|
|
222
222
|
Requires-Dist: universal_pathlib>=0.2.6
|
|
223
223
|
Requires-Dist: ffmpeg-python==0.2.0
|
|
224
224
|
Requires-Dist: tritonclient
|
|
225
|
+
Requires-Dist: glom
|
|
225
226
|
Dynamic: license-file
|
|
226
227
|
|
|
227
228
|
# nv-ingest-api
|
|
@@ -78,6 +78,7 @@ src/nv_ingest_api/internal/primitives/tracing/latency.py
|
|
|
78
78
|
src/nv_ingest_api/internal/primitives/tracing/logging.py
|
|
79
79
|
src/nv_ingest_api/internal/primitives/tracing/tagging.py
|
|
80
80
|
src/nv_ingest_api/internal/schemas/__init__.py
|
|
81
|
+
src/nv_ingest_api/internal/schemas/mixins.py
|
|
81
82
|
src/nv_ingest_api/internal/schemas/extract/__init__.py
|
|
82
83
|
src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py
|
|
83
84
|
src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|