nv-ingest-api 2025.7.14.dev20250714__tar.gz → 2025.7.15.dev20250715__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- {nv_ingest_api-2025.7.14.dev20250714/src/nv_ingest_api.egg-info → nv_ingest_api-2025.7.15.dev20250715}/PKG-INFO +2 -1
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/pyproject.toml +1 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +2 -1
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +2 -1
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +1 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +7 -12
- nv_ingest_api-2025.7.15.dev20250715/src/nv_ingest_api/util/image_processing/transforms.py +657 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/pdf/pdfium.py +5 -13
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715/src/nv_ingest_api.egg-info}/PKG-INFO +2 -1
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api.egg-info/requires.txt +1 -0
- nv_ingest_api-2025.7.14.dev20250714/src/nv_ingest_api/util/image_processing/transforms.py +0 -407
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/LICENSE +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/MANIFEST.in +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/README.md +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/setup.cfg +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/interface/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/interface/extract.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/interface/mutate.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/interface/store.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/interface/transform.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/interface/utility.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/enums/common.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/html/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/html/html_extractor.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/image/image_extractor.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/model_interface/paddle.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/extract/extract_html_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/store/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/transform/caption_image.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/transform/embed_text.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/internal/transform/split_text.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/control_message/validators.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/converters/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/converters/containers.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/converters/datetools.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/converters/dftools.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/converters/formats.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/detectors/language.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/exception_handlers/converters.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/imports/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/imports/callable_signatures.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/imports/dynamic_resolvers.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/logging/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/logging/configuration.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/message_brokers/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/nim/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/schema/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/service_clients/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/system/__init__.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api/util/system/hardware_info.py +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api.egg-info/SOURCES.txt +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
- {nv_ingest_api-2025.7.14.dev20250714 → nv_ingest_api-2025.7.15.dev20250715}/src/version.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nv-ingest-api
|
|
3
|
-
Version: 2025.7.
|
|
3
|
+
Version: 2025.7.15.dev20250715
|
|
4
4
|
Summary: Python module with core document ingestion functions.
|
|
5
5
|
Author-email: Jeremy Dyer <jdyer@nvidia.com>
|
|
6
6
|
License: Apache License
|
|
@@ -217,6 +217,7 @@ Requires-Dist: backoff==2.2.1
|
|
|
217
217
|
Requires-Dist: pandas>=2.0
|
|
218
218
|
Requires-Dist: pydantic>2.0.0
|
|
219
219
|
Requires-Dist: pydantic-settings>2.0.0
|
|
220
|
+
Requires-Dist: tritonclient
|
|
220
221
|
Dynamic: license-file
|
|
221
222
|
|
|
222
223
|
# nv-ingest-api
|
|
@@ -40,6 +40,7 @@ from nv_ingest_api.internal.schemas.meta.metadata_schema import validate_metadat
|
|
|
40
40
|
from nv_ingest_api.internal.primitives.nim.model_interface.yolox import (
|
|
41
41
|
YOLOX_PAGE_IMAGE_PREPROC_WIDTH,
|
|
42
42
|
YOLOX_PAGE_IMAGE_PREPROC_HEIGHT,
|
|
43
|
+
YOLOX_PAGE_IMAGE_FORMAT,
|
|
43
44
|
)
|
|
44
45
|
from nv_ingest_api.internal.schemas.extract.extract_pdf_schema import NemoRetrieverParseConfigSchema
|
|
45
46
|
from nv_ingest_api.util.metadata.aggregators import (
|
|
@@ -355,7 +356,7 @@ def nemoretriever_parse_extractor(
|
|
|
355
356
|
img_numpy = crop_image(page_image, transformed_bbox)
|
|
356
357
|
|
|
357
358
|
if img_numpy is not None:
|
|
358
|
-
base64_img = numpy_to_base64(img_numpy)
|
|
359
|
+
base64_img = numpy_to_base64(img_numpy, format=YOLOX_PAGE_IMAGE_FORMAT)
|
|
359
360
|
image = Base64Image(
|
|
360
361
|
image=base64_img,
|
|
361
362
|
bbox=transformed_bbox,
|
|
@@ -28,6 +28,7 @@ from nv_ingest_api.internal.primitives.nim.default_values import YOLOX_MAX_BATCH
|
|
|
28
28
|
from nv_ingest_api.internal.primitives.nim.model_interface.yolox import (
|
|
29
29
|
YOLOX_PAGE_IMAGE_PREPROC_WIDTH,
|
|
30
30
|
YOLOX_PAGE_IMAGE_PREPROC_HEIGHT,
|
|
31
|
+
YOLOX_PAGE_IMAGE_FORMAT,
|
|
31
32
|
get_yolox_model_name,
|
|
32
33
|
YoloxPageElementsModelInterface,
|
|
33
34
|
)
|
|
@@ -186,7 +187,7 @@ def _extract_page_element_images(
|
|
|
186
187
|
if cropped is None:
|
|
187
188
|
continue
|
|
188
189
|
|
|
189
|
-
base64_img = numpy_to_base64(cropped)
|
|
190
|
+
base64_img = numpy_to_base64(cropped, format=YOLOX_PAGE_IMAGE_FORMAT)
|
|
190
191
|
|
|
191
192
|
bbox_in_orig_coord = (
|
|
192
193
|
int(w1) - pad_width,
|
|
@@ -120,6 +120,7 @@ class NemoRetrieverParseModelInterface(ModelInterface):
|
|
|
120
120
|
logger.debug("Formatting input for HTTP NemoRetrieverParse model")
|
|
121
121
|
# Prepare payload for HTTP request
|
|
122
122
|
|
|
123
|
+
## TODO: Ask @Edward Kim if we want to switch to JPEG/PNG here
|
|
123
124
|
if "images" in data:
|
|
124
125
|
base64_list = [numpy_to_base64(img) for img in data["images"]]
|
|
125
126
|
else:
|
|
@@ -2,9 +2,7 @@
|
|
|
2
2
|
# All rights reserved.
|
|
3
3
|
# SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
import base64
|
|
7
|
-
import io
|
|
5
|
+
import os
|
|
8
6
|
import logging
|
|
9
7
|
import warnings
|
|
10
8
|
from math import log
|
|
@@ -20,11 +18,11 @@ import packaging
|
|
|
20
18
|
import pandas as pd
|
|
21
19
|
import torch
|
|
22
20
|
import torchvision
|
|
23
|
-
from PIL import Image
|
|
24
21
|
|
|
25
22
|
from nv_ingest_api.internal.primitives.nim import ModelInterface
|
|
26
23
|
from nv_ingest_api.internal.primitives.nim.model_interface.helpers import get_model_name
|
|
27
24
|
from nv_ingest_api.util.image_processing import scale_image_to_encoding_size
|
|
25
|
+
from nv_ingest_api.util.image_processing.transforms import numpy_to_base64
|
|
28
26
|
|
|
29
27
|
logger = logging.getLogger(__name__)
|
|
30
28
|
|
|
@@ -35,6 +33,7 @@ YOLOX_PAGE_MIN_SCORE = 0.1
|
|
|
35
33
|
YOLOX_PAGE_NIM_MAX_IMAGE_SIZE = 512_000
|
|
36
34
|
YOLOX_PAGE_IMAGE_PREPROC_HEIGHT = 1024
|
|
37
35
|
YOLOX_PAGE_IMAGE_PREPROC_WIDTH = 1024
|
|
36
|
+
YOLOX_PAGE_IMAGE_FORMAT = os.getenv("YOLOX_PAGE_IMAGE_FORMAT", "PNG")
|
|
38
37
|
|
|
39
38
|
# yolox-page-elements-v1 contants
|
|
40
39
|
YOLOX_PAGE_V1_NUM_CLASSES = 4
|
|
@@ -239,15 +238,11 @@ class YoloxModelInterfaceBase(ModelInterface):
|
|
|
239
238
|
# Convert to uint8 if needed.
|
|
240
239
|
if image.dtype != np.uint8:
|
|
241
240
|
image = (image * 255).astype(np.uint8)
|
|
242
|
-
# Convert the numpy array to a PIL Image.
|
|
243
|
-
image_pil = Image.fromarray(image)
|
|
244
|
-
original_size = image_pil.size
|
|
245
|
-
|
|
246
|
-
# Save the image to a buffer and encode to base64.
|
|
247
|
-
buffered = io.BytesIO()
|
|
248
|
-
image_pil.save(buffered, format="PNG")
|
|
249
|
-
image_b64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
|
|
250
241
|
|
|
242
|
+
# Get original size directly from numpy array (width, height)
|
|
243
|
+
original_size = (image.shape[1], image.shape[0])
|
|
244
|
+
# Convert numpy array directly to base64 using OpenCV
|
|
245
|
+
image_b64 = numpy_to_base64(image, format=YOLOX_PAGE_IMAGE_FORMAT)
|
|
251
246
|
# Scale the image if necessary.
|
|
252
247
|
scaled_image_b64, new_size = scale_image_to_encoding_size(
|
|
253
248
|
image_b64, max_base64_size=self.nim_max_image_size
|