nv-ingest-api 2025.5.14.dev20250514__tar.gz → 2025.5.15.dev20250515__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (166) hide show
  1. {nv_ingest_api-2025.5.14.dev20250514/src/nv_ingest_api.egg-info → nv_ingest_api-2025.5.15.dev20250515}/PKG-INFO +1 -1
  2. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +5 -8
  3. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +1 -1
  4. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +1 -1
  5. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +1 -1
  6. nv_ingest_api-2025.5.15.dev20250515/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +37 -0
  7. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +34 -23
  8. nv_ingest_api-2025.5.15.dev20250515/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +26 -0
  9. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +9 -7
  10. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/store/image_upload.py +1 -0
  11. nv_ingest_api-2025.5.15.dev20250515/src/nv_ingest_api/util/schema/__init__.py +3 -0
  12. nv_ingest_api-2025.5.15.dev20250515/src/nv_ingest_api/util/service_clients/__init__.py +3 -0
  13. nv_ingest_api-2025.5.15.dev20250515/src/nv_ingest_api/util/service_clients/redis/__init__.py +3 -0
  14. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/system/hardware_info.py +4 -0
  15. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515/src/nv_ingest_api.egg-info}/PKG-INFO +1 -1
  16. nv_ingest_api-2025.5.14.dev20250514/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -23
  17. nv_ingest_api-2025.5.14.dev20250514/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +0 -25
  18. nv_ingest_api-2025.5.14.dev20250514/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
  19. nv_ingest_api-2025.5.14.dev20250514/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
  20. nv_ingest_api-2025.5.14.dev20250514/src/nv_ingest_api/util/system/__init__.py +0 -0
  21. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/LICENSE +0 -0
  22. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/MANIFEST.in +0 -0
  23. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/README.md +0 -0
  24. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/pyproject.toml +0 -0
  25. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/setup.cfg +0 -0
  26. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/__init__.py +0 -0
  27. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/interface/__init__.py +0 -0
  28. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/interface/extract.py +0 -0
  29. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/interface/mutate.py +0 -0
  30. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/interface/store.py +0 -0
  31. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/interface/transform.py +0 -0
  32. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/interface/utility.py +0 -0
  33. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/__init__.py +0 -0
  34. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
  35. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/enums/common.py +0 -0
  36. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
  37. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
  38. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -0
  39. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
  40. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -0
  41. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
  42. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
  43. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
  44. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
  45. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
  46. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
  47. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/image/image_extractor.py +0 -0
  48. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
  49. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
  50. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
  51. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
  52. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
  53. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
  54. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
  55. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
  56. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +0 -0
  57. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -0
  58. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -0
  59. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
  60. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
  61. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
  62. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
  63. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
  64. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -0
  65. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
  66. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
  67. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
  68. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
  69. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
  70. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
  71. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
  72. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
  73. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
  74. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
  75. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
  76. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
  77. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
  78. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -0
  79. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/model_interface/paddle.py +0 -0
  80. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -0
  81. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -0
  82. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
  83. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +0 -0
  84. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +0 -0
  85. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
  86. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
  87. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
  88. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
  89. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +0 -0
  90. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
  91. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
  92. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +0 -0
  93. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -0
  94. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -0
  95. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +0 -0
  96. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -0
  97. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
  98. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
  99. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
  100. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
  101. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
  102. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +0 -0
  103. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
  104. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
  105. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
  106. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
  107. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
  108. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
  109. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +0 -0
  110. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
  111. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/store/__init__.py +0 -0
  112. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
  113. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
  114. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/transform/caption_image.py +0 -0
  115. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/transform/embed_text.py +0 -0
  116. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/internal/transform/split_text.py +0 -0
  117. {nv_ingest_api-2025.5.14.dev20250514/src/nv_ingest_api/util/message_brokers → nv_ingest_api-2025.5.15.dev20250515/src/nv_ingest_api/util}/__init__.py +0 -0
  118. {nv_ingest_api-2025.5.14.dev20250514/src/nv_ingest_api/util → nv_ingest_api-2025.5.15.dev20250515/src/nv_ingest_api/util/control_message}/__init__.py +0 -0
  119. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/control_message/validators.py +0 -0
  120. {nv_ingest_api-2025.5.14.dev20250514/src/nv_ingest_api/util/control_message → nv_ingest_api-2025.5.15.dev20250515/src/nv_ingest_api/util/converters}/__init__.py +0 -0
  121. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
  122. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/converters/containers.py +0 -0
  123. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/converters/datetools.py +0 -0
  124. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/converters/dftools.py +0 -0
  125. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/converters/formats.py +0 -0
  126. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
  127. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
  128. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/detectors/language.py +0 -0
  129. {nv_ingest_api-2025.5.14.dev20250514/src/nv_ingest_api/util/converters → nv_ingest_api-2025.5.15.dev20250515/src/nv_ingest_api/util/exception_handlers}/__init__.py +0 -0
  130. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/exception_handlers/converters.py +0 -0
  131. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -0
  132. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
  133. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
  134. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
  135. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
  136. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
  137. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
  138. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
  139. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/image_processing/transforms.py +0 -0
  140. {nv_ingest_api-2025.5.14.dev20250514/src/nv_ingest_api/util/exception_handlers → nv_ingest_api-2025.5.15.dev20250515/src/nv_ingest_api/util/logging}/__init__.py +0 -0
  141. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/logging/configuration.py +0 -0
  142. {nv_ingest_api-2025.5.14.dev20250514/src/nv_ingest_api/util/service_clients → nv_ingest_api-2025.5.15.dev20250515/src/nv_ingest_api/util/message_brokers}/__init__.py +0 -0
  143. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
  144. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
  145. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
  146. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -0
  147. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
  148. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
  149. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
  150. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -0
  151. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/nim/__init__.py +0 -0
  152. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
  153. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/pdf/pdfium.py +0 -0
  154. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
  155. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
  156. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
  157. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +0 -0
  158. {nv_ingest_api-2025.5.14.dev20250514/src/nv_ingest_api/util/logging → nv_ingest_api-2025.5.15.dev20250515/src/nv_ingest_api/util/service_clients/rest}/__init__.py +0 -0
  159. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +0 -0
  160. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
  161. {nv_ingest_api-2025.5.14.dev20250514/src/nv_ingest_api/util/schema → nv_ingest_api-2025.5.15.dev20250515/src/nv_ingest_api/util/system}/__init__.py +0 -0
  162. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api.egg-info/SOURCES.txt +0 -0
  163. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
  164. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api.egg-info/requires.txt +0 -0
  165. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
  166. {nv_ingest_api-2025.5.14.dev20250514 → nv_ingest_api-2025.5.15.dev20250515}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.5.14.dev20250514
3
+ Version: 2025.5.15.dev20250515
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -99,14 +99,11 @@ def _decode_and_extract_from_pptx(
99
99
 
100
100
  # Retrieve extraction parameters (and remove boolean flags as they are consumed).
101
101
  extract_params: Dict[str, Any] = prepared_task_props.get("params", {})
102
- try:
103
- extract_text: bool = extract_params.pop("extract_text", False)
104
- extract_images: bool = extract_params.pop("extract_images", False)
105
- extract_tables: bool = extract_params.pop("extract_tables", False)
106
- extract_charts: bool = extract_params.pop("extract_charts", False)
107
- extract_infographics: bool = extract_params.pop("extract_infographics", False)
108
- except KeyError as e:
109
- raise ValueError(f"Missing required extraction flag: {e}")
102
+ extract_text: bool = extract_params.pop("extract_text", False)
103
+ extract_images: bool = extract_params.pop("extract_images", False)
104
+ extract_tables: bool = extract_params.pop("extract_tables", False)
105
+ extract_charts: bool = extract_params.pop("extract_charts", False)
106
+ extract_infographics: bool = extract_params.pop("extract_infographics", False)
110
107
 
111
108
  # Inject additional configuration and trace information.
112
109
  if getattr(extraction_config, "pptx_extraction_config", None) is not None:
@@ -129,7 +129,7 @@ class ChartExtractorSchema(BaseModel):
129
129
  @field_validator("max_queue_size", "n_workers")
130
130
  def check_positive(cls, v, field):
131
131
  if v <= 0:
132
- raise ValueError(f"{field.field_name} must be greater than 10.")
132
+ raise ValueError(f"{field.field_name} must be greater than 0.")
133
133
  return v
134
134
 
135
135
  model_config = ConfigDict(extra="forbid")
@@ -122,7 +122,7 @@ class InfographicExtractorSchema(BaseModel):
122
122
  @field_validator("max_queue_size", "n_workers")
123
123
  def check_positive(cls, v, field):
124
124
  if v <= 0:
125
- raise ValueError(f"{field.field_name} must be greater than 10.")
125
+ raise ValueError(f"{field.field_name} must be greater than 0.")
126
126
  return v
127
127
 
128
128
  model_config = ConfigDict(extra="forbid")
@@ -122,7 +122,7 @@ class TableExtractorSchema(BaseModel):
122
122
  @field_validator("max_queue_size", "n_workers")
123
123
  def check_positive(cls, v, field):
124
124
  if v <= 0:
125
- raise ValueError(f"{field.field_name} must be greater than 10.")
125
+ raise ValueError(f"{field.field_name} must be greater than 0.")
126
126
  return v
127
127
 
128
128
  endpoint_config: Optional[TableExtractorConfigSchema] = None
@@ -0,0 +1,37 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from pydantic import BaseModel, Field
6
+ from typing import Optional, Literal, Annotated
7
+
8
+
9
+ class MessageBrokerClientSchema(BaseModel):
10
+ """
11
+ Configuration schema for message broker client connections.
12
+ Supports Redis or simple in-memory clients.
13
+ """
14
+
15
+ host: str = Field(default="redis", description="Hostname of the broker service.")
16
+
17
+ port: Annotated[int, Field(gt=0, lt=65536)] = Field(
18
+ default=6379, description="Port to connect to. Must be between 1 and 65535."
19
+ )
20
+
21
+ client_type: Literal["redis", "simple"] = Field(
22
+ default="redis", description="Type of broker client. Supported values: 'redis', 'simple'."
23
+ )
24
+
25
+ broker_params: Optional[dict] = Field(
26
+ default_factory=dict, description="Optional parameters passed to the broker client."
27
+ )
28
+
29
+ connection_timeout: Annotated[int, Field(ge=0)] = Field(
30
+ default=300, description="Connection timeout in seconds. Must be >= 0."
31
+ )
32
+
33
+ max_backoff: Annotated[int, Field(ge=0)] = Field(
34
+ default=300, description="Maximum backoff time in seconds. Must be >= 0."
35
+ )
36
+
37
+ max_retries: Annotated[int, Field(ge=0)] = Field(default=0, description="Maximum number of retries. Must be >= 0.")
@@ -160,29 +160,40 @@ class IngestTaskSchema(BaseModelNoExt):
160
160
  @model_validator(mode="before")
161
161
  @classmethod
162
162
  def check_task_properties_type(cls, values):
163
- task_type, task_properties = values.get("type"), values.get("task_properties", {})
164
- if task_type and task_properties:
165
- expected_type = {
166
- TaskTypeEnum.CAPTION: IngestTaskCaptionSchema,
167
- TaskTypeEnum.DEDUP: IngestTaskDedupSchema,
168
- TaskTypeEnum.EMBED: IngestTaskEmbedSchema,
169
- TaskTypeEnum.EXTRACT: IngestTaskExtractSchema,
170
- TaskTypeEnum.FILTER: IngestTaskFilterSchema, # Extend mapping as necessary
171
- TaskTypeEnum.SPLIT: IngestTaskSplitSchema,
172
- TaskTypeEnum.STORE_EMBEDDING: IngestTaskStoreEmbedSchema,
173
- TaskTypeEnum.STORE: IngestTaskStoreSchema,
174
- TaskTypeEnum.VDB_UPLOAD: IngestTaskVdbUploadSchema,
175
- TaskTypeEnum.AUDIO_DATA_EXTRACT: IngestTaskAudioExtraction,
176
- TaskTypeEnum.TABLE_DATA_EXTRACT: IngestTaskTableExtraction,
177
- TaskTypeEnum.CHART_DATA_EXTRACT: IngestTaskChartExtraction,
178
- TaskTypeEnum.INFOGRAPHIC_DATA_EXTRACT: IngestTaskInfographicExtraction,
179
- }.get(
180
- task_type
181
- ) # Removed .upper()
182
-
183
- # Validate task_properties against the expected schema.
184
- validated_task_properties = expected_type(**task_properties)
185
- values["task_properties"] = validated_task_properties
163
+ task_type = values.get("type")
164
+ task_properties = values.get("task_properties", {})
165
+
166
+ # Ensure task_type is lowercased and converted to enum early
167
+ if isinstance(task_type, str):
168
+ task_type = task_type.lower()
169
+ try:
170
+ task_type = TaskTypeEnum(task_type)
171
+ except ValueError:
172
+ raise ValueError(f"{task_type} is not a valid TaskTypeEnum value")
173
+
174
+ task_type_to_schema = {
175
+ TaskTypeEnum.CAPTION: IngestTaskCaptionSchema,
176
+ TaskTypeEnum.DEDUP: IngestTaskDedupSchema,
177
+ TaskTypeEnum.EMBED: IngestTaskEmbedSchema,
178
+ TaskTypeEnum.EXTRACT: IngestTaskExtractSchema,
179
+ TaskTypeEnum.FILTER: IngestTaskFilterSchema,
180
+ TaskTypeEnum.SPLIT: IngestTaskSplitSchema,
181
+ TaskTypeEnum.STORE_EMBEDDING: IngestTaskStoreEmbedSchema,
182
+ TaskTypeEnum.STORE: IngestTaskStoreSchema,
183
+ TaskTypeEnum.VDB_UPLOAD: IngestTaskVdbUploadSchema,
184
+ TaskTypeEnum.AUDIO_DATA_EXTRACT: IngestTaskAudioExtraction,
185
+ TaskTypeEnum.TABLE_DATA_EXTRACT: IngestTaskTableExtraction,
186
+ TaskTypeEnum.CHART_DATA_EXTRACT: IngestTaskChartExtraction,
187
+ TaskTypeEnum.INFOGRAPHIC_DATA_EXTRACT: IngestTaskInfographicExtraction,
188
+ }
189
+
190
+ expected_schema_cls = task_type_to_schema.get(task_type)
191
+ if expected_schema_cls is None:
192
+ raise ValueError(f"Unsupported or missing task_type '{task_type}'")
193
+
194
+ validated_task_properties = expected_schema_cls(**task_properties)
195
+ values["type"] = task_type # ensure type is now always the enum
196
+ values["task_properties"] = validated_task_properties
186
197
  return values
187
198
 
188
199
  @field_validator("type", mode="before")
@@ -0,0 +1,26 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+
6
+ import logging
7
+
8
+ from pydantic import ConfigDict, BaseModel, Field
9
+
10
+ from nv_ingest_api.util.logging.configuration import LogLevel
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class TextEmbeddingSchema(BaseModel):
16
+ api_key: str = Field(default="api_key")
17
+ batch_size: int = Field(default=4)
18
+ embedding_model: str = Field(default="nvidia/nv-embedqa-e5-v5")
19
+ embedding_nim_endpoint: str = Field(default="http://embedding:8000/v1")
20
+ encoding_format: str = Field(default="float")
21
+ httpx_log_level: LogLevel = Field(default=LogLevel.WARNING)
22
+ input_type: str = Field(default="passage")
23
+ raise_on_failure: bool = Field(default=False)
24
+ truncate: str = Field(default="END")
25
+
26
+ model_config = ConfigDict(extra="forbid")
@@ -2,21 +2,23 @@
2
2
  # All rights reserved.
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
- from pydantic import Field, BaseModel, field_validator
5
+ from pydantic import Field, BaseModel, field_validator, ConfigDict
6
6
 
7
7
  from typing import Optional
8
8
 
9
- from typing_extensions import Annotated
10
-
11
9
 
12
10
  class TextSplitterSchema(BaseModel):
13
11
  tokenizer: Optional[str] = None
14
- chunk_size: Annotated[int, Field(gt=0)] = 1024
15
- chunk_overlap: Annotated[int, Field(ge=0)] = 150
12
+ chunk_size: int = Field(default=1024, gt=0)
13
+ chunk_overlap: int = Field(default=150, ge=0)
16
14
  raise_on_failure: bool = False
17
15
 
18
16
  @field_validator("chunk_overlap")
19
- def check_chunk_overlap(cls, v, values, **kwargs):
20
- if v is not None and "chunk_size" in values.data and v >= values.data["chunk_size"]:
17
+ @classmethod
18
+ def check_chunk_overlap(cls, v, values):
19
+ chunk_size = values.data.get("chunk_size")
20
+ if chunk_size is not None and v >= chunk_size:
21
21
  raise ValueError("chunk_overlap must be less than chunk_size")
22
22
  return v
23
+
24
+ model_config = ConfigDict(extra="forbid")
@@ -116,6 +116,7 @@ def _upload_images_to_minio(df: pd.DataFrame, params: Dict[str, Any]) -> pd.Data
116
116
  if "content" not in metadata:
117
117
  logger.error("Row %s: missing 'content' in metadata", idx)
118
118
  continue
119
+
119
120
  if "source_metadata" not in metadata or not isinstance(metadata["source_metadata"], dict):
120
121
  logger.error("Row %s: missing or invalid 'source_metadata' in metadata", idx)
121
122
  continue
@@ -0,0 +1,3 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,3 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,3 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
@@ -45,6 +45,10 @@ class SystemResourceProbe:
45
45
  A value of 0.5 suggests a hyperthread adds 50% extra performance.
46
46
  Requires psutil to be installed and report physical cores.
47
47
  Defaults to 0.75.
48
+
49
+ Note: the default value of 0.75 is a heuristic and may not be optimal
50
+ for all situations. It is where parallel pdf decomposition efficiency
51
+ is observed to begin rolling off.
48
52
  """
49
53
  if not (0.0 <= hyperthread_weight <= 1.0):
50
54
  raise ValueError("hyperthread_weight must be between 0.0 and 1.0")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.5.14.dev20250514
3
+ Version: 2025.5.15.dev20250515
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -1,23 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
2
- # All rights reserved.
3
- # SPDX-License-Identifier: Apache-2.0
4
-
5
-
6
- from typing import Optional, Literal
7
-
8
- from pydantic import Field, BaseModel
9
- from typing_extensions import Annotated
10
-
11
-
12
- class MessageBrokerClientSchema(BaseModel):
13
- host: str = "redis"
14
- port: Annotated[int, Field(gt=0, lt=65536)] = 6379
15
-
16
- # Update this for new broker types
17
- client_type: Literal["redis", "simple"] = "redis" # Restrict to 'redis' or 'simple'
18
-
19
- broker_params: Optional[dict] = Field(default_factory=dict)
20
-
21
- connection_timeout: Optional[Annotated[int, Field(ge=0)]] = 300
22
- max_backoff: Optional[Annotated[int, Field(ge=0)]] = 300
23
- max_retries: Optional[Annotated[int, Field(ge=0)]] = 0
@@ -1,25 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
2
- # All rights reserved.
3
- # SPDX-License-Identifier: Apache-2.0
4
-
5
-
6
- import logging
7
-
8
- from pydantic import ConfigDict, BaseModel
9
-
10
- from nv_ingest_api.util.logging.configuration import LogLevel
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
-
15
- class TextEmbeddingSchema(BaseModel):
16
- api_key: str = "api_key"
17
- batch_size: int = 4
18
- embedding_model: str = "nvidia/nv-embedqa-e5-v5"
19
- embedding_nim_endpoint: str = "http://embedding:8000/v1"
20
- encoding_format: str = "float"
21
- httpx_log_level: LogLevel = LogLevel.WARNING
22
- input_type: str = "passage"
23
- raise_on_failure: bool = False
24
- truncate: str = "END"
25
- model_config = ConfigDict(extra="forbid")