nv-ingest-api 2025.6.3.dev20250603__tar.gz → 2025.6.5.dev20250605__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (164) hide show
  1. {nv_ingest_api-2025.6.3.dev20250603/src/nv_ingest_api.egg-info → nv_ingest_api-2025.6.5.dev20250605}/PKG-INFO +1 -1
  2. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +50 -14
  3. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +1 -1
  4. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +1 -0
  5. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +1 -0
  6. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +2 -0
  7. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605/src/nv_ingest_api.egg-info}/PKG-INFO +1 -1
  8. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/LICENSE +0 -0
  9. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/MANIFEST.in +0 -0
  10. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/README.md +0 -0
  11. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/pyproject.toml +0 -0
  12. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/setup.cfg +0 -0
  13. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/__init__.py +0 -0
  14. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/interface/__init__.py +0 -0
  15. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/interface/extract.py +0 -0
  16. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/interface/mutate.py +0 -0
  17. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/interface/store.py +0 -0
  18. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/interface/transform.py +0 -0
  19. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/interface/utility.py +0 -0
  20. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/__init__.py +0 -0
  21. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
  22. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/enums/common.py +0 -0
  23. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
  24. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
  25. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
  26. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -0
  27. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
  28. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
  29. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
  30. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
  31. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/html/__init__.py +0 -0
  32. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/html/html_extractor.py +0 -0
  33. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
  34. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
  35. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/image/image_extractor.py +0 -0
  36. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
  37. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
  38. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
  39. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
  40. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
  41. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
  42. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
  43. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
  44. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +0 -0
  45. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -0
  46. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -0
  47. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
  48. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
  49. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
  50. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
  51. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
  52. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -0
  53. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -0
  54. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
  55. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
  56. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
  57. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
  58. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
  59. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
  60. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
  61. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
  62. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
  63. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
  64. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
  65. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
  66. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
  67. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -0
  68. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/model_interface/paddle.py +0 -0
  69. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -0
  70. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
  71. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +0 -0
  72. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +0 -0
  73. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
  74. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
  75. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
  76. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
  77. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +0 -0
  78. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
  79. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
  80. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +0 -0
  81. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -0
  82. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/extract/extract_html_schema.py +0 -0
  83. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -0
  84. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +0 -0
  85. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +0 -0
  86. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -0
  87. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +0 -0
  88. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
  89. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
  90. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
  91. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
  92. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
  93. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
  94. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
  95. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
  96. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
  97. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
  98. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
  99. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
  100. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +0 -0
  101. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
  102. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +0 -0
  103. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
  104. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/store/__init__.py +0 -0
  105. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
  106. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
  107. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
  108. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/transform/caption_image.py +0 -0
  109. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/transform/embed_text.py +0 -0
  110. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/internal/transform/split_text.py +0 -0
  111. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/__init__.py +0 -0
  112. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
  113. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/control_message/validators.py +0 -0
  114. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/converters/__init__.py +0 -0
  115. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
  116. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/converters/containers.py +0 -0
  117. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/converters/datetools.py +0 -0
  118. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/converters/dftools.py +0 -0
  119. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/converters/formats.py +0 -0
  120. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
  121. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
  122. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/detectors/language.py +0 -0
  123. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
  124. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/exception_handlers/converters.py +0 -0
  125. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -0
  126. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
  127. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
  128. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
  129. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
  130. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
  131. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
  132. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
  133. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/image_processing/transforms.py +0 -0
  134. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/logging/__init__.py +0 -0
  135. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/logging/configuration.py +0 -0
  136. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/message_brokers/__init__.py +0 -0
  137. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
  138. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
  139. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
  140. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -0
  141. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
  142. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
  143. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
  144. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -0
  145. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/nim/__init__.py +0 -0
  146. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
  147. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/pdf/pdfium.py +0 -0
  148. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/schema/__init__.py +0 -0
  149. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
  150. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/service_clients/__init__.py +0 -0
  151. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
  152. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
  153. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
  154. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +0 -0
  155. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
  156. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +0 -0
  157. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
  158. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/system/__init__.py +0 -0
  159. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api/util/system/hardware_info.py +0 -0
  160. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api.egg-info/SOURCES.txt +0 -0
  161. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
  162. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api.egg-info/requires.txt +0 -0
  163. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
  164. {nv_ingest_api-2025.6.3.dev20250603 → nv_ingest_api-2025.6.5.dev20250605}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.6.3.dev20250603
3
+ Version: 2025.6.5.dev20250605
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -5,6 +5,8 @@
5
5
  import logging
6
6
 
7
7
  import pandas as pd
8
+ import functools
9
+ import uuid
8
10
  from typing import Any
9
11
  from typing import Dict
10
12
  from typing import Optional
@@ -21,7 +23,7 @@ logger = logging.getLogger(__name__)
21
23
 
22
24
 
23
25
  @unified_exception_handler
24
- def _update_audio_metadata(row: pd.Series, audio_client: Any, trace_info: Dict) -> Dict:
26
+ def _extract_from_audio(row: pd.Series, audio_client: Any, trace_info: Dict, segment_audio: bool = False) -> Dict:
25
27
  """
26
28
  Modifies the metadata of a row if the conditions for table extraction are met.
27
29
 
@@ -56,24 +58,42 @@ def _update_audio_metadata(row: pd.Series, audio_client: Any, trace_info: Dict)
56
58
  base64_audio = metadata.pop("content")
57
59
  content_metadata = metadata.get("content_metadata", {})
58
60
 
59
- # Only modify if content type is audio
61
+ # Only extract transcript if content type is audio
60
62
  if (content_metadata.get("type") != ContentTypeEnum.AUDIO) or (base64_audio in (None, "")):
61
- return metadata
63
+ return [row.to_list()]
62
64
 
63
- # Modify audio metadata with the result from the inference model
64
- audio_result = audio_client.infer(
65
+ # Get the result from the inference model
66
+ segments, transcript = audio_client.infer(
65
67
  base64_audio,
66
68
  model_name="parakeet",
67
69
  trace_info=trace_info, # traceable_func arg
68
70
  stage_name="audio_extraction",
69
71
  )
70
72
 
71
- row["document_type"] = ContentTypeEnum.AUDIO
72
- audio_metadata = {"audio_transcript": audio_result}
73
- metadata["audio_metadata"] = validate_schema(audio_metadata, AudioMetadataSchema).model_dump()
74
- row["metadata"] = validate_schema(metadata, MetadataSchema).model_dump()
73
+ extracted_data = []
74
+ if segment_audio:
75
+ for segment in segments:
76
+ segment_metadata = metadata.copy()
77
+ audio_metadata = {"audio_transcript": segment["text"]}
78
+ segment_metadata["audio_metadata"] = validate_schema(audio_metadata, AudioMetadataSchema).model_dump()
79
+ segment_metadata["content_metadata"]["start_time"] = segment["start"]
80
+ segment_metadata["content_metadata"]["end_time"] = segment["end"]
81
+
82
+ extracted_data.append(
83
+ [
84
+ ContentTypeEnum.AUDIO,
85
+ validate_schema(segment_metadata, MetadataSchema).model_dump(),
86
+ str(uuid.uuid4()),
87
+ ]
88
+ )
89
+ else:
90
+ audio_metadata = {"audio_transcript": transcript}
91
+ metadata["audio_metadata"] = validate_schema(audio_metadata, AudioMetadataSchema).model_dump()
92
+ extracted_data.append(
93
+ [ContentTypeEnum.AUDIO, validate_schema(metadata, MetadataSchema).model_dump(), str(uuid.uuid4())]
94
+ )
75
95
 
76
- return metadata
96
+ return extracted_data
77
97
 
78
98
 
79
99
  def extract_text_from_audio_internal(
@@ -121,6 +141,7 @@ def extract_text_from_audio_internal(
121
141
  function_id = extract_params.get("function_id") or audio_extraction_config.function_id
122
142
  use_ssl = extract_params.get("use_ssl") or audio_extraction_config.use_ssl
123
143
  ssl_cert = extract_params.get("ssl_cert") or audio_extraction_config.ssl_cert
144
+ segment_audio = extract_params.get("segment_audio") or audio_extraction_config.segment_audio
124
145
 
125
146
  parakeet_client = create_audio_inference_client(
126
147
  (grpc_endpoint, http_endpoint),
@@ -136,12 +157,27 @@ def extract_text_from_audio_internal(
136
157
  logger.debug("No trace_info provided. Initialized empty trace_info dictionary.")
137
158
 
138
159
  try:
139
- # Apply the _update_metadata function to each row in the DataFrame
140
- df_extraction_ledger["metadata"] = df_extraction_ledger.apply(
141
- _update_audio_metadata, axis=1, args=(parakeet_client, execution_trace_log)
160
+ # Create a partial function to extract using the provided configurations.
161
+ _extract_from_audio_partial = functools.partial(
162
+ _extract_from_audio,
163
+ audio_client=parakeet_client,
164
+ trace_info=execution_trace_log,
165
+ segment_audio=segment_audio,
142
166
  )
143
167
 
144
- return df_extraction_ledger, execution_trace_log
168
+ # Apply the _extract_from_audio_partial function to each row in the DataFrame
169
+ extraction_series = df_extraction_ledger.apply(_extract_from_audio_partial, axis=1)
170
+
171
+ # Explode the results if the extraction returns lists.
172
+ extraction_series = extraction_series.explode().dropna()
173
+
174
+ # Convert the extracted results into a DataFrame.
175
+ if not extraction_series.empty:
176
+ extracted_df = pd.DataFrame(extraction_series.to_list(), columns=["document_type", "metadata", "uuid"])
177
+ else:
178
+ extracted_df = pd.DataFrame({"document_type": [], "metadata": [], "uuid": []})
179
+
180
+ return extracted_df, execution_trace_log
145
181
 
146
182
  except Exception as e:
147
183
  logger.exception(f"Error occurred while extracting audio data: {e}", exc_info=True)
@@ -101,7 +101,7 @@ class ParakeetClient:
101
101
  segments, transcript = process_transcription_response(response)
102
102
  logger.debug("Processing Parakeet inference results (pass-through).")
103
103
 
104
- return transcript
104
+ return segments, transcript
105
105
 
106
106
  def transcribe(
107
107
  self,
@@ -48,6 +48,7 @@ class AudioConfigSchema(BaseModel):
48
48
  function_id: Optional[str] = None
49
49
  use_ssl: Optional[bool] = None
50
50
  ssl_cert: Optional[str] = None
51
+ segment_audio: Optional[bool] = None
51
52
 
52
53
  @root_validator(pre=True)
53
54
  def validate_endpoints(cls, values):
@@ -124,6 +124,7 @@ class IngestTaskAudioExtraction(BaseModelNoExt):
124
124
  function_id: Optional[str] = None
125
125
  use_ssl: Optional[bool] = None
126
126
  ssl_cert: Optional[str] = None
127
+ segment_audio: Optional[bool] = None
127
128
 
128
129
 
129
130
  class IngestTaskTableExtraction(BaseModelNoExt):
@@ -97,6 +97,8 @@ class ContentMetadataSchema(BaseModelNoExt):
97
97
  page_number: int = -1
98
98
  hierarchy: ContentHierarchySchema = ContentHierarchySchema()
99
99
  subtype: Union[ContentTypeEnum, str] = ""
100
+ start_time: int = -1
101
+ end_time: int = -1
100
102
 
101
103
 
102
104
  class TextMetadataSchema(BaseModelNoExt):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.6.3.dev20250603
3
+ Version: 2025.6.5.dev20250605
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License