nv-ingest-api 25.6.2__tar.gz → 25.6.26.dev20250626__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- {nv_ingest_api-25.6.2/src/nv_ingest_api.egg-info → nv_ingest_api-25.6.26.dev20250626}/PKG-INFO +1 -1
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +50 -14
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +1 -1
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +1 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +1 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +2 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/transform/split_text.py +19 -5
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626/src/nv_ingest_api.egg-info}/PKG-INFO +1 -1
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/version.py +0 -8
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/LICENSE +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/MANIFEST.in +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/README.md +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/pyproject.toml +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/setup.cfg +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/interface/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/interface/extract.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/interface/mutate.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/interface/store.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/interface/transform.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/interface/utility.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/enums/common.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/html/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/html/html_extractor.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/image/image_extractor.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/model_interface/paddle.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/extract/extract_html_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/store/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/transform/caption_image.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/transform/embed_text.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/control_message/validators.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/converters/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/converters/containers.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/converters/datetools.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/converters/dftools.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/converters/formats.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/detectors/language.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/exception_handlers/converters.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/image_processing/transforms.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/logging/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/logging/configuration.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/message_brokers/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/nim/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/pdf/pdfium.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/schema/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/service_clients/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/system/__init__.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/system/hardware_info.py +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api.egg-info/SOURCES.txt +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api.egg-info/requires.txt +0 -0
- {nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
import logging
|
|
6
6
|
|
|
7
7
|
import pandas as pd
|
|
8
|
+
import functools
|
|
9
|
+
import uuid
|
|
8
10
|
from typing import Any
|
|
9
11
|
from typing import Dict
|
|
10
12
|
from typing import Optional
|
|
@@ -21,7 +23,7 @@ logger = logging.getLogger(__name__)
|
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
@unified_exception_handler
|
|
24
|
-
def
|
|
26
|
+
def _extract_from_audio(row: pd.Series, audio_client: Any, trace_info: Dict, segment_audio: bool = False) -> Dict:
|
|
25
27
|
"""
|
|
26
28
|
Modifies the metadata of a row if the conditions for table extraction are met.
|
|
27
29
|
|
|
@@ -56,24 +58,42 @@ def _update_audio_metadata(row: pd.Series, audio_client: Any, trace_info: Dict)
|
|
|
56
58
|
base64_audio = metadata.pop("content")
|
|
57
59
|
content_metadata = metadata.get("content_metadata", {})
|
|
58
60
|
|
|
59
|
-
# Only
|
|
61
|
+
# Only extract transcript if content type is audio
|
|
60
62
|
if (content_metadata.get("type") != ContentTypeEnum.AUDIO) or (base64_audio in (None, "")):
|
|
61
|
-
return
|
|
63
|
+
return [row.to_list()]
|
|
62
64
|
|
|
63
|
-
#
|
|
64
|
-
|
|
65
|
+
# Get the result from the inference model
|
|
66
|
+
segments, transcript = audio_client.infer(
|
|
65
67
|
base64_audio,
|
|
66
68
|
model_name="parakeet",
|
|
67
69
|
trace_info=trace_info, # traceable_func arg
|
|
68
70
|
stage_name="audio_extraction",
|
|
69
71
|
)
|
|
70
72
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
73
|
+
extracted_data = []
|
|
74
|
+
if segment_audio:
|
|
75
|
+
for segment in segments:
|
|
76
|
+
segment_metadata = metadata.copy()
|
|
77
|
+
audio_metadata = {"audio_transcript": segment["text"]}
|
|
78
|
+
segment_metadata["audio_metadata"] = validate_schema(audio_metadata, AudioMetadataSchema).model_dump()
|
|
79
|
+
segment_metadata["content_metadata"]["start_time"] = segment["start"]
|
|
80
|
+
segment_metadata["content_metadata"]["end_time"] = segment["end"]
|
|
81
|
+
|
|
82
|
+
extracted_data.append(
|
|
83
|
+
[
|
|
84
|
+
ContentTypeEnum.AUDIO,
|
|
85
|
+
validate_schema(segment_metadata, MetadataSchema).model_dump(),
|
|
86
|
+
str(uuid.uuid4()),
|
|
87
|
+
]
|
|
88
|
+
)
|
|
89
|
+
else:
|
|
90
|
+
audio_metadata = {"audio_transcript": transcript}
|
|
91
|
+
metadata["audio_metadata"] = validate_schema(audio_metadata, AudioMetadataSchema).model_dump()
|
|
92
|
+
extracted_data.append(
|
|
93
|
+
[ContentTypeEnum.AUDIO, validate_schema(metadata, MetadataSchema).model_dump(), str(uuid.uuid4())]
|
|
94
|
+
)
|
|
75
95
|
|
|
76
|
-
return
|
|
96
|
+
return extracted_data
|
|
77
97
|
|
|
78
98
|
|
|
79
99
|
def extract_text_from_audio_internal(
|
|
@@ -121,6 +141,7 @@ def extract_text_from_audio_internal(
|
|
|
121
141
|
function_id = extract_params.get("function_id") or audio_extraction_config.function_id
|
|
122
142
|
use_ssl = extract_params.get("use_ssl") or audio_extraction_config.use_ssl
|
|
123
143
|
ssl_cert = extract_params.get("ssl_cert") or audio_extraction_config.ssl_cert
|
|
144
|
+
segment_audio = extract_params.get("segment_audio") or audio_extraction_config.segment_audio
|
|
124
145
|
|
|
125
146
|
parakeet_client = create_audio_inference_client(
|
|
126
147
|
(grpc_endpoint, http_endpoint),
|
|
@@ -136,12 +157,27 @@ def extract_text_from_audio_internal(
|
|
|
136
157
|
logger.debug("No trace_info provided. Initialized empty trace_info dictionary.")
|
|
137
158
|
|
|
138
159
|
try:
|
|
139
|
-
#
|
|
140
|
-
|
|
141
|
-
|
|
160
|
+
# Create a partial function to extract using the provided configurations.
|
|
161
|
+
_extract_from_audio_partial = functools.partial(
|
|
162
|
+
_extract_from_audio,
|
|
163
|
+
audio_client=parakeet_client,
|
|
164
|
+
trace_info=execution_trace_log,
|
|
165
|
+
segment_audio=segment_audio,
|
|
142
166
|
)
|
|
143
167
|
|
|
144
|
-
|
|
168
|
+
# Apply the _extract_from_audio_partial function to each row in the DataFrame
|
|
169
|
+
extraction_series = df_extraction_ledger.apply(_extract_from_audio_partial, axis=1)
|
|
170
|
+
|
|
171
|
+
# Explode the results if the extraction returns lists.
|
|
172
|
+
extraction_series = extraction_series.explode().dropna()
|
|
173
|
+
|
|
174
|
+
# Convert the extracted results into a DataFrame.
|
|
175
|
+
if not extraction_series.empty:
|
|
176
|
+
extracted_df = pd.DataFrame(extraction_series.to_list(), columns=["document_type", "metadata", "uuid"])
|
|
177
|
+
else:
|
|
178
|
+
extracted_df = pd.DataFrame({"document_type": [], "metadata": [], "uuid": []})
|
|
179
|
+
|
|
180
|
+
return extracted_df, execution_trace_log
|
|
145
181
|
|
|
146
182
|
except Exception as e:
|
|
147
183
|
logger.exception(f"Error occurred while extracting audio data: {e}", exc_info=True)
|
|
@@ -101,7 +101,7 @@ class ParakeetClient:
|
|
|
101
101
|
segments, transcript = process_transcription_response(response)
|
|
102
102
|
logger.debug("Processing Parakeet inference results (pass-through).")
|
|
103
103
|
|
|
104
|
-
return transcript
|
|
104
|
+
return segments, transcript
|
|
105
105
|
|
|
106
106
|
def transcribe(
|
|
107
107
|
self,
|
|
@@ -124,6 +124,7 @@ class IngestTaskAudioExtraction(BaseModelNoExt):
|
|
|
124
124
|
function_id: Optional[str] = None
|
|
125
125
|
use_ssl: Optional[bool] = None
|
|
126
126
|
ssl_cert: Optional[str] = None
|
|
127
|
+
segment_audio: Optional[bool] = None
|
|
127
128
|
|
|
128
129
|
|
|
129
130
|
class IngestTaskTableExtraction(BaseModelNoExt):
|
|
@@ -97,6 +97,8 @@ class ContentMetadataSchema(BaseModelNoExt):
|
|
|
97
97
|
page_number: int = -1
|
|
98
98
|
hierarchy: ContentHierarchySchema = ContentHierarchySchema()
|
|
99
99
|
subtype: Union[ContentTypeEnum, str] = ""
|
|
100
|
+
start_time: int = -1
|
|
101
|
+
end_time: int = -1
|
|
100
102
|
|
|
101
103
|
|
|
102
104
|
class TextMetadataSchema(BaseModelNoExt):
|
|
@@ -31,9 +31,16 @@ def _build_split_documents(row, chunks: List[str]) -> List[dict[str, Any]]:
|
|
|
31
31
|
metadata = row.metadata if hasattr(row, "metadata") and isinstance(row.metadata, dict) else {}
|
|
32
32
|
metadata = copy.deepcopy(metadata)
|
|
33
33
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
if row.document_type == ContentTypeEnum.AUDIO:
|
|
35
|
+
metadata["audio_metadata"]["audio_transcript"] = text
|
|
36
|
+
documents.append(
|
|
37
|
+
{"document_type": ContentTypeEnum.AUDIO.value, "metadata": metadata, "uuid": str(uuid.uuid4())}
|
|
38
|
+
)
|
|
39
|
+
else:
|
|
40
|
+
metadata["content"] = text
|
|
41
|
+
documents.append(
|
|
42
|
+
{"document_type": ContentTypeEnum.TEXT.value, "metadata": metadata, "uuid": str(uuid.uuid4())}
|
|
43
|
+
)
|
|
37
44
|
|
|
38
45
|
return documents
|
|
39
46
|
|
|
@@ -118,7 +125,7 @@ def transform_text_split_and_tokenize_internal(
|
|
|
118
125
|
)
|
|
119
126
|
|
|
120
127
|
# Filter to documents with text content.
|
|
121
|
-
text_type_condition = df_transform_ledger["document_type"]
|
|
128
|
+
text_type_condition = df_transform_ledger["document_type"].isin([ContentTypeEnum.TEXT, ContentTypeEnum.AUDIO])
|
|
122
129
|
|
|
123
130
|
normalized_meta_df = pd.json_normalize(df_transform_ledger["metadata"], errors="ignore")
|
|
124
131
|
if "source_metadata.source_type" in normalized_meta_df.columns:
|
|
@@ -147,7 +154,14 @@ def transform_text_split_and_tokenize_internal(
|
|
|
147
154
|
|
|
148
155
|
split_docs: List[Dict[str, Any]] = []
|
|
149
156
|
for _, row in df_filtered.iterrows():
|
|
150
|
-
|
|
157
|
+
if row["document_type"] == ContentTypeEnum.AUDIO:
|
|
158
|
+
content: str = (
|
|
159
|
+
row["metadata"]["audio_metadata"]["audio_transcript"]
|
|
160
|
+
if row["metadata"]["audio_metadata"]["audio_transcript"] is not None
|
|
161
|
+
else ""
|
|
162
|
+
)
|
|
163
|
+
else:
|
|
164
|
+
content: str = row["metadata"]["content"] if row["metadata"]["content"] is not None else ""
|
|
151
165
|
chunks: List[str] = _split_into_chunks(content, tokenizer_model, chunk_size, chunk_overlap)
|
|
152
166
|
split_docs.extend(_build_split_documents(row, chunks))
|
|
153
167
|
|
|
@@ -5,7 +5,6 @@
|
|
|
5
5
|
|
|
6
6
|
import datetime
|
|
7
7
|
import os
|
|
8
|
-
import re
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
def get_version():
|
|
@@ -16,13 +15,6 @@ def get_version():
|
|
|
16
15
|
if not version:
|
|
17
16
|
version = f"{datetime.datetime.now().strftime('%Y.%m.%d')}"
|
|
18
17
|
|
|
19
|
-
# We only check this for dev, we assume for release the user knows what they are doing
|
|
20
|
-
if release_type != "release":
|
|
21
|
-
# Ensure the version is PEP 440 compatible
|
|
22
|
-
pep440_regex = r"^\d{4}\.\d{1,2}\.\d{1,2}$"
|
|
23
|
-
if not re.match(pep440_regex, version):
|
|
24
|
-
raise ValueError(f"Version '{version}' is not PEP 440 compatible")
|
|
25
|
-
|
|
26
18
|
# Construct the final version string
|
|
27
19
|
if release_type == "dev":
|
|
28
20
|
# If rev is not specified and defaults to 0 lets create a more meaningful development
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/interface/__init__.py
RENAMED
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/interface/extract.py
RENAMED
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/interface/mutate.py
RENAMED
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/interface/store.py
RENAMED
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/interface/transform.py
RENAMED
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/interface/utility.py
RENAMED
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/internal/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/nim/__init__.py
RENAMED
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/pdf/__init__.py
RENAMED
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/pdf/pdfium.py
RENAMED
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/schema/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api/util/system/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api.egg-info/requires.txt
RENAMED
|
File without changes
|
{nv_ingest_api-25.6.2 → nv_ingest_api-25.6.26.dev20250626}/src/nv_ingest_api.egg-info/top_level.txt
RENAMED
|
File without changes
|