nv-ingest-api 2025.6.15.dev20250615__tar.gz → 2025.6.17.dev20250617__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- {nv_ingest_api-2025.6.15.dev20250615/src/nv_ingest_api.egg-info → nv_ingest_api-2025.6.17.dev20250617}/PKG-INFO +1 -1
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/transform/split_text.py +19 -5
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617/src/nv_ingest_api.egg-info}/PKG-INFO +1 -1
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/LICENSE +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/MANIFEST.in +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/README.md +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/pyproject.toml +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/setup.cfg +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/interface/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/interface/extract.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/interface/mutate.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/interface/store.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/interface/transform.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/interface/utility.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/enums/common.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/html/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/html/html_extractor.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/image/image_extractor.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/model_interface/paddle.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/extract/extract_html_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/store/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/transform/caption_image.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/internal/transform/embed_text.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/control_message/validators.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/converters/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/converters/containers.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/converters/datetools.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/converters/dftools.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/converters/formats.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/detectors/language.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/exception_handlers/converters.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/image_processing/transforms.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/logging/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/logging/configuration.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/message_brokers/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/nim/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/pdf/pdfium.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/schema/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/service_clients/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/system/__init__.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api/util/system/hardware_info.py +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api.egg-info/SOURCES.txt +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api.egg-info/requires.txt +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
- {nv_ingest_api-2025.6.15.dev20250615 → nv_ingest_api-2025.6.17.dev20250617}/src/version.py +0 -0
|
@@ -31,9 +31,16 @@ def _build_split_documents(row, chunks: List[str]) -> List[dict[str, Any]]:
|
|
|
31
31
|
metadata = row.metadata if hasattr(row, "metadata") and isinstance(row.metadata, dict) else {}
|
|
32
32
|
metadata = copy.deepcopy(metadata)
|
|
33
33
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
34
|
+
if row.document_type == ContentTypeEnum.AUDIO:
|
|
35
|
+
metadata["audio_metadata"]["audio_transcript"] = text
|
|
36
|
+
documents.append(
|
|
37
|
+
{"document_type": ContentTypeEnum.AUDIO.value, "metadata": metadata, "uuid": str(uuid.uuid4())}
|
|
38
|
+
)
|
|
39
|
+
else:
|
|
40
|
+
metadata["content"] = text
|
|
41
|
+
documents.append(
|
|
42
|
+
{"document_type": ContentTypeEnum.TEXT.value, "metadata": metadata, "uuid": str(uuid.uuid4())}
|
|
43
|
+
)
|
|
37
44
|
|
|
38
45
|
return documents
|
|
39
46
|
|
|
@@ -118,7 +125,7 @@ def transform_text_split_and_tokenize_internal(
|
|
|
118
125
|
)
|
|
119
126
|
|
|
120
127
|
# Filter to documents with text content.
|
|
121
|
-
text_type_condition = df_transform_ledger["document_type"]
|
|
128
|
+
text_type_condition = df_transform_ledger["document_type"].isin([ContentTypeEnum.TEXT, ContentTypeEnum.AUDIO])
|
|
122
129
|
|
|
123
130
|
normalized_meta_df = pd.json_normalize(df_transform_ledger["metadata"], errors="ignore")
|
|
124
131
|
if "source_metadata.source_type" in normalized_meta_df.columns:
|
|
@@ -147,7 +154,14 @@ def transform_text_split_and_tokenize_internal(
|
|
|
147
154
|
|
|
148
155
|
split_docs: List[Dict[str, Any]] = []
|
|
149
156
|
for _, row in df_filtered.iterrows():
|
|
150
|
-
|
|
157
|
+
if row["document_type"] == ContentTypeEnum.AUDIO:
|
|
158
|
+
content: str = (
|
|
159
|
+
row["metadata"]["audio_metadata"]["audio_transcript"]
|
|
160
|
+
if row["metadata"]["audio_metadata"]["audio_transcript"] is not None
|
|
161
|
+
else ""
|
|
162
|
+
)
|
|
163
|
+
else:
|
|
164
|
+
content: str = row["metadata"]["content"] if row["metadata"]["content"] is not None else ""
|
|
151
165
|
chunks: List[str] = _split_into_chunks(content, tokenizer_model, chunk_size, chunk_overlap)
|
|
152
166
|
split_docs.extend(_build_split_documents(row, chunks))
|
|
153
167
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|