nv-ingest-api 2025.8.19.dev20250819__tar.gz → 2025.8.21.dev20250821__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- {nv_ingest_api-2025.8.19.dev20250819/src/nv_ingest_api.egg-info → nv_ingest_api-2025.8.21.dev20250821}/PKG-INFO +1 -1
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/interface/__init__.py +14 -11
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +4 -1
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +3 -2
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +3 -3
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +2 -2
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +2 -2
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +2 -2
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +2 -2
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +3 -3
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +2 -2
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +2 -2
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +4 -4
- nv_ingest_api-2025.8.21.dev20250821/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +33 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +15 -2
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/transform/embed_text.py +4 -1
- nv_ingest_api-2025.8.21.dev20250821/src/nv_ingest_api/util/logging/sanitize.py +84 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821/src/nv_ingest_api.egg-info}/PKG-INFO +1 -1
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api.egg-info/SOURCES.txt +1 -0
- nv_ingest_api-2025.8.19.dev20250819/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +0 -15
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/LICENSE +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/MANIFEST.in +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/README.md +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/pyproject.toml +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/setup.cfg +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/interface/extract.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/interface/mutate.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/interface/store.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/interface/transform.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/interface/utility.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/enums/common.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/html/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/html/html_extractor.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/image/image_extractor.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/meta/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/meta/udf.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/extract/extract_html_schema.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/meta/udf.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/store/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/transform/caption_image.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/internal/transform/split_text.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/control_message/validators.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/converters/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/converters/containers.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/converters/datetools.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/converters/dftools.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/converters/formats.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/detectors/language.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/exception_handlers/converters.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/image_processing/transforms.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/imports/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/imports/callable_signatures.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/imports/dynamic_resolvers.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/introspection/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/introspection/class_inspect.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/introspection/function_inspect.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/logging/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/logging/configuration.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/message_brokers/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/nim/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/pdf/pdfium.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/schema/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/service_clients/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/string_processing/configuration.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/string_processing/yaml.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/system/__init__.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api/util/system/hardware_info.py +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api.egg-info/requires.txt +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
- {nv_ingest_api-2025.8.19.dev20250819 → nv_ingest_api-2025.8.21.dev20250821}/src/version.py +0 -0
|
@@ -11,6 +11,7 @@ from typing import Dict, Any, Optional, List
|
|
|
11
11
|
from pydantic import BaseModel
|
|
12
12
|
|
|
13
13
|
from nv_ingest_api.internal.schemas.extract.extract_pdf_schema import PDFiumConfigSchema, NemoRetrieverParseConfigSchema
|
|
14
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
14
15
|
|
|
15
16
|
logger = logging.getLogger(__name__)
|
|
16
17
|
|
|
@@ -180,29 +181,31 @@ def extraction_interface_relay_constructor(api_fn, task_keys: Optional[List[str]
|
|
|
180
181
|
if extractor_schema is None:
|
|
181
182
|
extractor_schema = {f"{extract_method}_config": extraction_config_dict}
|
|
182
183
|
|
|
183
|
-
# Log the task and extractor configurations for debugging
|
|
184
|
+
# Log the task and extractor configurations for debugging (sanitized)
|
|
184
185
|
logger.debug("\n" + "=" * 80)
|
|
185
186
|
logger.debug(f"DEBUG - API Function: {api_fn.__name__}")
|
|
186
187
|
logger.debug(f"DEBUG - Extract Method: {extract_method}")
|
|
187
188
|
logger.debug("-" * 80)
|
|
188
189
|
|
|
189
|
-
#
|
|
190
|
-
|
|
191
|
-
|
|
190
|
+
# Sanitize and format the task config as a string and log it
|
|
191
|
+
sanitized_task_config = sanitize_for_logging(task_config)
|
|
192
|
+
task_config_str = pprint.pformat(sanitized_task_config, width=100, sort_dicts=False)
|
|
193
|
+
logger.debug(f"DEBUG - Task Config (sanitized):\n{task_config_str}")
|
|
192
194
|
logger.debug("-" * 80)
|
|
193
195
|
|
|
194
|
-
#
|
|
196
|
+
# Sanitize and format the extractor config as a string and log it
|
|
195
197
|
if hasattr(extractor_schema, "model_dump"):
|
|
196
|
-
|
|
198
|
+
sanitized_extractor_config = sanitize_for_logging(extractor_schema.model_dump())
|
|
197
199
|
else:
|
|
198
|
-
|
|
200
|
+
sanitized_extractor_config = sanitize_for_logging(extractor_schema)
|
|
201
|
+
extractor_config_str = pprint.pformat(sanitized_extractor_config, width=100, sort_dicts=False)
|
|
199
202
|
logger.debug(f"DEBUG - Extractor Config Type: {type(extractor_schema)}")
|
|
200
|
-
logger.debug(f"DEBUG - Extractor Config:\n{extractor_config_str}")
|
|
203
|
+
logger.debug(f"DEBUG - Extractor Config (sanitized):\n{extractor_config_str}")
|
|
201
204
|
logger.debug("=" * 80 + "\n")
|
|
202
205
|
|
|
203
|
-
# Call the backend API function.
|
|
204
|
-
pprint.pprint(
|
|
205
|
-
pprint.pprint(
|
|
206
|
+
# Call the backend API function. Print sanitized configs for any debug consumers of stdout.
|
|
207
|
+
pprint.pprint(sanitized_task_config)
|
|
208
|
+
pprint.pprint(sanitized_extractor_config)
|
|
206
209
|
result = api_fn(ledger, task_config, extractor_schema, execution_trace_log)
|
|
207
210
|
|
|
208
211
|
# If the result is a tuple, return only the first element
|
|
@@ -193,7 +193,10 @@ async def async_llama_parse(
|
|
|
193
193
|
A string of extracted text.
|
|
194
194
|
"""
|
|
195
195
|
base_url = "https://api.cloud.llamaindex.ai/api/parsing"
|
|
196
|
-
|
|
196
|
+
# Normalize in case api_key contains only whitespace; avoid sending an empty bearer token
|
|
197
|
+
_token = (api_key or "").strip()
|
|
198
|
+
_auth_value = f"Bearer {_token}" if _token else "Bearer <no key provided>"
|
|
199
|
+
headers = {"Authorization": _auth_value}
|
|
197
200
|
mime_type = "application/pdf"
|
|
198
201
|
|
|
199
202
|
try:
|
|
@@ -11,6 +11,7 @@ from typing import Any
|
|
|
11
11
|
from typing import Dict
|
|
12
12
|
from typing import List
|
|
13
13
|
from typing import Optional
|
|
14
|
+
from nv_ingest_api.util.logging.sanitize import sanitize_for_logging
|
|
14
15
|
|
|
15
16
|
import pandas as pd
|
|
16
17
|
from nv_ingest_api.internal.extract.pdf.engines import adobe_extractor
|
|
@@ -131,7 +132,7 @@ def _orchestrate_row_extraction(
|
|
|
131
132
|
method_config = extractor_config[config_key]
|
|
132
133
|
else:
|
|
133
134
|
# If no matching config is found, log a warning but don't fail
|
|
134
|
-
logger.warning(f"No {config_key} found in extractor_config: {extractor_config}")
|
|
135
|
+
logger.warning(f"No {config_key} found in extractor_config: {sanitize_for_logging(extractor_config)}")
|
|
135
136
|
method_config = None
|
|
136
137
|
|
|
137
138
|
# Add the method-specific config to the parameters if available
|
|
@@ -141,7 +142,7 @@ def _orchestrate_row_extraction(
|
|
|
141
142
|
|
|
142
143
|
# The resulting parameters constitute the complete extractor_config
|
|
143
144
|
extractor_config = params
|
|
144
|
-
logger.debug(f"Final extractor_config: {extractor_config}")
|
|
145
|
+
logger.debug(f"Final extractor_config: {sanitize_for_logging(extractor_config)}")
|
|
145
146
|
|
|
146
147
|
result = _work_extract_pdf(
|
|
147
148
|
pdf_stream=pdf_stream,
|
|
@@ -7,7 +7,7 @@ import logging
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from typing import Tuple
|
|
9
9
|
|
|
10
|
-
from pydantic import BaseModel
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
11
|
from pydantic import root_validator
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
@@ -42,12 +42,12 @@ class AudioConfigSchema(BaseModel):
|
|
|
42
42
|
Pydantic config option to forbid extra fields.
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
|
-
auth_token: Optional[str] = None
|
|
45
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
46
46
|
audio_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
47
47
|
audio_infer_protocol: Optional[str] = None
|
|
48
48
|
function_id: Optional[str] = None
|
|
49
49
|
use_ssl: Optional[bool] = None
|
|
50
|
-
ssl_cert: Optional[str] = None
|
|
50
|
+
ssl_cert: Optional[str] = Field(default=None, repr=False)
|
|
51
51
|
segment_audio: Optional[bool] = None
|
|
52
52
|
|
|
53
53
|
@root_validator(pre=True)
|
|
@@ -6,7 +6,7 @@ import logging
|
|
|
6
6
|
from typing import Optional
|
|
7
7
|
from typing import Tuple
|
|
8
8
|
|
|
9
|
-
from pydantic import field_validator, model_validator, ConfigDict, BaseModel
|
|
9
|
+
from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
@@ -44,7 +44,7 @@ class ChartExtractorConfigSchema(BaseModel):
|
|
|
44
44
|
Pydantic config option to forbid extra fields.
|
|
45
45
|
"""
|
|
46
46
|
|
|
47
|
-
auth_token: Optional[str] = None
|
|
47
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
48
48
|
|
|
49
49
|
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
50
50
|
yolox_infer_protocol: str = ""
|
|
@@ -7,7 +7,7 @@ import logging
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from typing import Tuple
|
|
9
9
|
|
|
10
|
-
from pydantic import model_validator, ConfigDict, BaseModel
|
|
10
|
+
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
@@ -41,7 +41,7 @@ class DocxConfigSchema(BaseModel):
|
|
|
41
41
|
Pydantic config option to forbid extra fields.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
auth_token: Optional[str] = None
|
|
44
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
45
45
|
|
|
46
46
|
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
47
47
|
yolox_infer_protocol: str = ""
|
|
@@ -7,7 +7,7 @@ import logging
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from typing import Tuple
|
|
9
9
|
|
|
10
|
-
from pydantic import model_validator, ConfigDict, BaseModel
|
|
10
|
+
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
@@ -41,7 +41,7 @@ class ImageConfigSchema(BaseModel):
|
|
|
41
41
|
Pydantic config option to forbid extra fields.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
auth_token: Optional[str] = None
|
|
44
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
45
45
|
|
|
46
46
|
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
47
47
|
yolox_infer_protocol: str = ""
|
|
@@ -6,7 +6,7 @@ import logging
|
|
|
6
6
|
from typing import Optional
|
|
7
7
|
from typing import Tuple
|
|
8
8
|
|
|
9
|
-
from pydantic import field_validator, model_validator, ConfigDict, BaseModel
|
|
9
|
+
from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
|
|
10
10
|
|
|
11
11
|
logger = logging.getLogger(__name__)
|
|
12
12
|
|
|
@@ -40,7 +40,7 @@ class InfographicExtractorConfigSchema(BaseModel):
|
|
|
40
40
|
Pydantic config option to forbid extra fields.
|
|
41
41
|
"""
|
|
42
42
|
|
|
43
|
-
auth_token: Optional[str] = None
|
|
43
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
44
44
|
|
|
45
45
|
ocr_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
46
46
|
ocr_infer_protocol: str = ""
|
|
@@ -7,7 +7,7 @@ import logging
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from typing import Tuple
|
|
9
9
|
|
|
10
|
-
from pydantic import model_validator, ConfigDict, BaseModel
|
|
10
|
+
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
@@ -41,7 +41,7 @@ class PDFiumConfigSchema(BaseModel):
|
|
|
41
41
|
Pydantic config option to forbid extra fields.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
auth_token: Optional[str] = None
|
|
44
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
45
45
|
|
|
46
46
|
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
47
47
|
yolox_infer_protocol: str = ""
|
|
@@ -123,7 +123,7 @@ class NemoRetrieverParseConfigSchema(BaseModel):
|
|
|
123
123
|
Pydantic config option to forbid extra fields.
|
|
124
124
|
"""
|
|
125
125
|
|
|
126
|
-
auth_token: Optional[str] = None
|
|
126
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
127
127
|
|
|
128
128
|
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
129
129
|
yolox_infer_protocol: str = ""
|
|
@@ -7,7 +7,7 @@ import logging
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from typing import Tuple
|
|
9
9
|
|
|
10
|
-
from pydantic import model_validator, ConfigDict, BaseModel
|
|
10
|
+
from pydantic import model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
@@ -41,7 +41,7 @@ class PPTXConfigSchema(BaseModel):
|
|
|
41
41
|
Pydantic config option to forbid extra fields.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
auth_token: Optional[str] = None
|
|
44
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
45
45
|
|
|
46
46
|
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
47
47
|
yolox_infer_protocol: str = ""
|
|
@@ -7,7 +7,7 @@ import logging
|
|
|
7
7
|
from typing import Optional
|
|
8
8
|
from typing import Tuple
|
|
9
9
|
|
|
10
|
-
from pydantic import field_validator, model_validator, ConfigDict, BaseModel
|
|
10
|
+
from pydantic import field_validator, model_validator, ConfigDict, BaseModel, Field
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
@@ -42,7 +42,7 @@ class TableExtractorConfigSchema(BaseModel):
|
|
|
42
42
|
Pydantic config option to forbid extra fields.
|
|
43
43
|
"""
|
|
44
44
|
|
|
45
|
-
auth_token: Optional[str] = None
|
|
45
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
46
46
|
|
|
47
47
|
yolox_endpoints: Tuple[Optional[str], Optional[str]] = (None, None)
|
|
48
48
|
yolox_infer_protocol: str = ""
|
|
@@ -73,7 +73,7 @@ class IngestTaskStoreSchema(BaseModelNoExt):
|
|
|
73
73
|
|
|
74
74
|
# Captioning: All fields are optional and override default parameters.
|
|
75
75
|
class IngestTaskCaptionSchema(BaseModelNoExt):
|
|
76
|
-
api_key: Optional[str] = None
|
|
76
|
+
api_key: Optional[str] = Field(default=None, repr=False)
|
|
77
77
|
endpoint_url: Optional[str] = None
|
|
78
78
|
prompt: Optional[str] = None
|
|
79
79
|
model_name: Optional[str] = None
|
|
@@ -105,7 +105,7 @@ class IngestTaskDedupSchema(BaseModelNoExt):
|
|
|
105
105
|
class IngestTaskEmbedSchema(BaseModelNoExt):
|
|
106
106
|
endpoint_url: Optional[str] = None
|
|
107
107
|
model_name: Optional[str] = None
|
|
108
|
-
api_key: Optional[str] = None
|
|
108
|
+
api_key: Optional[str] = Field(default=None, repr=False)
|
|
109
109
|
filter_errors: bool = False
|
|
110
110
|
text_elements_modality: Optional[str] = None
|
|
111
111
|
image_elements_modality: Optional[str] = None
|
|
@@ -121,13 +121,13 @@ class IngestTaskVdbUploadSchema(BaseModelNoExt):
|
|
|
121
121
|
|
|
122
122
|
|
|
123
123
|
class IngestTaskAudioExtraction(BaseModelNoExt):
|
|
124
|
-
auth_token: Optional[str] = None
|
|
124
|
+
auth_token: Optional[str] = Field(default=None, repr=False)
|
|
125
125
|
grpc_endpoint: Optional[str] = None
|
|
126
126
|
http_endpoint: Optional[str] = None
|
|
127
127
|
infer_protocol: Optional[str] = None
|
|
128
128
|
function_id: Optional[str] = None
|
|
129
129
|
use_ssl: Optional[bool] = None
|
|
130
|
-
ssl_cert: Optional[str] = None
|
|
130
|
+
ssl_cert: Optional[str] = Field(default=None, repr=False)
|
|
131
131
|
segment_audio: Optional[bool] = None
|
|
132
132
|
|
|
133
133
|
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
from pydantic import ConfigDict, BaseModel, model_validator, field_validator, Field
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ImageCaptionExtractionSchema(BaseModel):
|
|
10
|
+
api_key: str = Field(default="", repr=False)
|
|
11
|
+
endpoint_url: str = "https://integrate.api.nvidia.com/v1/chat/completions"
|
|
12
|
+
prompt: str = "Caption the content of this image:"
|
|
13
|
+
model_name: str = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
|
|
14
|
+
raise_on_failure: bool = False
|
|
15
|
+
model_config = ConfigDict(extra="forbid")
|
|
16
|
+
|
|
17
|
+
@field_validator("api_key", mode="before")
|
|
18
|
+
@classmethod
|
|
19
|
+
def _coerce_api_key_none(cls, v):
|
|
20
|
+
return "" if v is None else v
|
|
21
|
+
|
|
22
|
+
@model_validator(mode="before")
|
|
23
|
+
@classmethod
|
|
24
|
+
def _coerce_none_to_empty(cls, values):
|
|
25
|
+
"""Allow None for string fields where empty string is acceptable.
|
|
26
|
+
|
|
27
|
+
Specifically, convert api_key=None to api_key="" so validation passes
|
|
28
|
+
when no API key is supplied.
|
|
29
|
+
"""
|
|
30
|
+
if isinstance(values, dict):
|
|
31
|
+
if values.get("api_key") is None:
|
|
32
|
+
values["api_key"] = ""
|
|
33
|
+
return values
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
7
|
|
|
8
|
-
from pydantic import ConfigDict, BaseModel, Field
|
|
8
|
+
from pydantic import ConfigDict, BaseModel, Field, model_validator, field_validator
|
|
9
9
|
|
|
10
10
|
from nv_ingest_api.util.logging.configuration import LogLevel
|
|
11
11
|
|
|
@@ -13,7 +13,7 @@ logger = logging.getLogger(__name__)
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class TextEmbeddingSchema(BaseModel):
|
|
16
|
-
api_key: str = Field(default="
|
|
16
|
+
api_key: str = Field(default="", repr=False)
|
|
17
17
|
batch_size: int = Field(default=4)
|
|
18
18
|
embedding_model: str = Field(default="nvidia/llama-3.2-nv-embedqa-1b-v2")
|
|
19
19
|
embedding_nim_endpoint: str = Field(default="http://embedding:8000/v1")
|
|
@@ -28,3 +28,16 @@ class TextEmbeddingSchema(BaseModel):
|
|
|
28
28
|
audio_elements_modality: str = Field(default="text")
|
|
29
29
|
|
|
30
30
|
model_config = ConfigDict(extra="forbid")
|
|
31
|
+
|
|
32
|
+
@field_validator("api_key", mode="before")
|
|
33
|
+
@classmethod
|
|
34
|
+
def _coerce_api_key_none(cls, v):
|
|
35
|
+
return "" if v is None else v
|
|
36
|
+
|
|
37
|
+
@model_validator(mode="before")
|
|
38
|
+
@classmethod
|
|
39
|
+
def _coerce_none_to_empty(cls, values):
|
|
40
|
+
"""Convert api_key=None to empty string so validation passes when key is omitted."""
|
|
41
|
+
if isinstance(values, dict) and values.get("api_key") is None:
|
|
42
|
+
values["api_key"] = ""
|
|
43
|
+
return values
|
|
@@ -75,8 +75,11 @@ def _make_async_request(
|
|
|
75
75
|
response = {}
|
|
76
76
|
|
|
77
77
|
try:
|
|
78
|
+
# Normalize API key to avoid sending an empty bearer token via SDK internals
|
|
79
|
+
_token = (api_key or "").strip()
|
|
80
|
+
_api_key = _token if _token else "<no key provided>"
|
|
78
81
|
client = OpenAI(
|
|
79
|
-
api_key=
|
|
82
|
+
api_key=_api_key,
|
|
80
83
|
base_url=embedding_nim_endpoint,
|
|
81
84
|
)
|
|
82
85
|
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Any, Mapping, MutableMapping, Sequence, Set
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
# Pydantic is optional at runtime for this helper; import if available
|
|
11
|
+
from pydantic import BaseModel # type: ignore
|
|
12
|
+
except Exception: # pragma: no cover - pydantic always present in this repo
|
|
13
|
+
BaseModel = None # type: ignore
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
_DEFAULT_SENSITIVE_KEYS: Set[str] = {
|
|
17
|
+
"access_token",
|
|
18
|
+
"api_key",
|
|
19
|
+
"authorization",
|
|
20
|
+
"auth_token",
|
|
21
|
+
"client_secret",
|
|
22
|
+
"hf_access_token",
|
|
23
|
+
"hugging_face_access_token",
|
|
24
|
+
"password",
|
|
25
|
+
"refresh_token",
|
|
26
|
+
"secret",
|
|
27
|
+
"ssl_cert",
|
|
28
|
+
"x-api-key",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
_REDACTION = "***REDACTED***"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _is_mapping(obj: Any) -> bool:
|
|
35
|
+
try:
|
|
36
|
+
return isinstance(obj, Mapping)
|
|
37
|
+
except Exception:
|
|
38
|
+
return False
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _is_sequence(obj: Any) -> bool:
|
|
42
|
+
# Exclude strings/bytes from sequences we want to traverse
|
|
43
|
+
return isinstance(obj, Sequence) and not isinstance(obj, (str, bytes, bytearray))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def sanitize_for_logging(
|
|
47
|
+
data: Any,
|
|
48
|
+
sensitive_keys: Set[str] | None = None,
|
|
49
|
+
redaction: str = _REDACTION,
|
|
50
|
+
) -> Any:
|
|
51
|
+
"""
|
|
52
|
+
Recursively sanitize common secret fields from dicts, lists, tuples, and Pydantic models.
|
|
53
|
+
|
|
54
|
+
- Key comparison is case-insensitive and matches exact keys only.
|
|
55
|
+
- Does not mutate input; returns a sanitized deep copy.
|
|
56
|
+
- For Pydantic BaseModel instances, uses model_dump() before redaction.
|
|
57
|
+
"""
|
|
58
|
+
keys = {k.lower() for k in (sensitive_keys or _DEFAULT_SENSITIVE_KEYS)}
|
|
59
|
+
|
|
60
|
+
# Handle Pydantic models without importing pydantic at module import time
|
|
61
|
+
if BaseModel is not None and isinstance(data, BaseModel): # type: ignore[arg-type]
|
|
62
|
+
try:
|
|
63
|
+
return sanitize_for_logging(data.model_dump(), keys, redaction)
|
|
64
|
+
except Exception:
|
|
65
|
+
# Fall through and try generic handling below
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
# Dict-like
|
|
69
|
+
if _is_mapping(data):
|
|
70
|
+
out: MutableMapping[str, Any] = type(data)() # preserve mapping type where possible
|
|
71
|
+
for k, v in data.items(): # type: ignore[assignment]
|
|
72
|
+
key_lower = str(k).lower()
|
|
73
|
+
if key_lower in keys:
|
|
74
|
+
out[k] = redaction
|
|
75
|
+
else:
|
|
76
|
+
out[k] = sanitize_for_logging(v, keys, redaction)
|
|
77
|
+
return out
|
|
78
|
+
|
|
79
|
+
# List/Tuple/Sequence
|
|
80
|
+
if _is_sequence(data):
|
|
81
|
+
return type(data)(sanitize_for_logging(v, keys, redaction) for v in data)
|
|
82
|
+
|
|
83
|
+
# Fallback: return as-is
|
|
84
|
+
return data
|
|
@@ -145,6 +145,7 @@ src/nv_ingest_api/util/introspection/class_inspect.py
|
|
|
145
145
|
src/nv_ingest_api/util/introspection/function_inspect.py
|
|
146
146
|
src/nv_ingest_api/util/logging/__init__.py
|
|
147
147
|
src/nv_ingest_api/util/logging/configuration.py
|
|
148
|
+
src/nv_ingest_api/util/logging/sanitize.py
|
|
148
149
|
src/nv_ingest_api/util/message_brokers/__init__.py
|
|
149
150
|
src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py
|
|
150
151
|
src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
|
|
2
|
-
# All rights reserved.
|
|
3
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
from pydantic import ConfigDict, BaseModel
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class ImageCaptionExtractionSchema(BaseModel):
|
|
10
|
-
api_key: str = "api_key"
|
|
11
|
-
endpoint_url: str = "https://integrate.api.nvidia.com/v1/chat/completions"
|
|
12
|
-
prompt: str = "Caption the content of this image:"
|
|
13
|
-
model_name: str = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
|
|
14
|
-
raise_on_failure: bool = False
|
|
15
|
-
model_config = ConfigDict(extra="forbid")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|