nv-ingest-api 2025.8.26.dev20250826__tar.gz → 2025.8.27.dev20250827__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (178) hide show
  1. {nv_ingest_api-2025.8.26.dev20250826/src/nv_ingest_api.egg-info → nv_ingest_api-2025.8.27.dev20250827}/PKG-INFO +1 -1
  2. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827/src/nv_ingest_api.egg-info}/PKG-INFO +1 -1
  3. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api.egg-info/SOURCES.txt +3 -1
  4. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api.egg-info/top_level.txt +1 -0
  5. nv_ingest_api-2025.8.27.dev20250827/src/udfs/__init__.py +5 -0
  6. nv_ingest_api-2025.8.27.dev20250827/src/udfs/llm_summarizer_udf.py +210 -0
  7. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/LICENSE +0 -0
  8. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/MANIFEST.in +0 -0
  9. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/README.md +0 -0
  10. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/pyproject.toml +0 -0
  11. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/setup.cfg +0 -0
  12. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/__init__.py +0 -0
  13. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/interface/__init__.py +0 -0
  14. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/interface/extract.py +0 -0
  15. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/interface/mutate.py +0 -0
  16. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/interface/store.py +0 -0
  17. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/interface/transform.py +0 -0
  18. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/interface/utility.py +0 -0
  19. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/__init__.py +0 -0
  20. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
  21. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/enums/common.py +0 -0
  22. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
  23. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
  24. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -0
  25. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
  26. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -0
  27. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
  28. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
  29. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
  30. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
  31. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/html/__init__.py +0 -0
  32. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/html/html_extractor.py +0 -0
  33. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
  34. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
  35. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/image/image_extractor.py +0 -0
  36. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
  37. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
  38. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
  39. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
  40. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
  41. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
  42. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
  43. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
  44. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +0 -0
  45. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -0
  46. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -0
  47. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
  48. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
  49. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
  50. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
  51. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
  52. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -0
  53. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -0
  54. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/meta/__init__.py +0 -0
  55. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/meta/udf.py +0 -0
  56. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
  57. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
  58. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
  59. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
  60. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
  61. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
  62. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
  63. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
  64. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
  65. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
  66. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
  67. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
  68. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
  69. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -0
  70. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py +0 -0
  71. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -0
  72. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -0
  73. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
  74. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +0 -0
  75. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +0 -0
  76. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
  77. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
  78. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
  79. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
  80. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +0 -0
  81. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
  82. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
  83. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +0 -0
  84. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +0 -0
  85. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -0
  86. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/extract/extract_html_schema.py +0 -0
  87. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -0
  88. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +0 -0
  89. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +0 -0
  90. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -0
  91. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +0 -0
  92. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
  93. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
  94. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
  95. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
  96. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
  97. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
  98. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +0 -0
  99. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +0 -0
  100. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/meta/udf.py +0 -0
  101. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
  102. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
  103. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
  104. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
  105. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
  106. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
  107. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +0 -0
  108. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
  109. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +0 -0
  110. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
  111. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/store/__init__.py +0 -0
  112. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
  113. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
  114. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
  115. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/transform/caption_image.py +0 -0
  116. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/transform/embed_text.py +0 -0
  117. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/internal/transform/split_text.py +0 -0
  118. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/__init__.py +0 -0
  119. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
  120. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/control_message/validators.py +0 -0
  121. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/converters/__init__.py +0 -0
  122. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
  123. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/converters/containers.py +0 -0
  124. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/converters/datetools.py +0 -0
  125. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/converters/dftools.py +0 -0
  126. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/converters/formats.py +0 -0
  127. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
  128. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
  129. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/detectors/language.py +0 -0
  130. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
  131. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/exception_handlers/converters.py +0 -0
  132. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -0
  133. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
  134. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
  135. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
  136. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
  137. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
  138. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
  139. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
  140. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/image_processing/transforms.py +0 -0
  141. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/imports/__init__.py +0 -0
  142. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/imports/callable_signatures.py +0 -0
  143. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/imports/dynamic_resolvers.py +0 -0
  144. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/introspection/__init__.py +0 -0
  145. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/introspection/class_inspect.py +0 -0
  146. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/introspection/function_inspect.py +0 -0
  147. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/logging/__init__.py +0 -0
  148. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/logging/configuration.py +0 -0
  149. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/logging/sanitize.py +0 -0
  150. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/message_brokers/__init__.py +0 -0
  151. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
  152. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
  153. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
  154. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -0
  155. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
  156. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
  157. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
  158. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -0
  159. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/nim/__init__.py +0 -0
  160. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
  161. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/pdf/pdfium.py +0 -0
  162. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/schema/__init__.py +0 -0
  163. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
  164. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/service_clients/__init__.py +0 -0
  165. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
  166. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
  167. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
  168. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +0 -0
  169. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
  170. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +0 -0
  171. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
  172. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/string_processing/configuration.py +0 -0
  173. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/string_processing/yaml.py +0 -0
  174. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/system/__init__.py +0 -0
  175. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api/util/system/hardware_info.py +0 -0
  176. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
  177. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/nv_ingest_api.egg-info/requires.txt +0 -0
  178. {nv_ingest_api-2025.8.26.dev20250826 → nv_ingest_api-2025.8.27.dev20250827}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.8.26.dev20250826
3
+ Version: 2025.8.27.dev20250827
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.8.26.dev20250826
3
+ Version: 2025.8.27.dev20250827
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -171,4 +171,6 @@ src/nv_ingest_api/util/string_processing/__init__.py
171
171
  src/nv_ingest_api/util/string_processing/configuration.py
172
172
  src/nv_ingest_api/util/string_processing/yaml.py
173
173
  src/nv_ingest_api/util/system/__init__.py
174
- src/nv_ingest_api/util/system/hardware_info.py
174
+ src/nv_ingest_api/util/system/hardware_info.py
175
+ src/udfs/__init__.py
176
+ src/udfs/llm_summarizer_udf.py
@@ -0,0 +1,5 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ """UDF (User Defined Function) modules for NV-Ingest API."""
@@ -0,0 +1,210 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ LLM Content Summarizer UDF for NV-Ingest Pipeline
4
+
5
+ This UDF uses an LLM API to generate concise summaries
6
+ of text content chunks, adding AI-generated summaries to the metadata for
7
+ enhanced downstream processing and search capabilities.
8
+
9
+ Environment Variables:
10
+ - NVIDIA_API_KEY: API key for NVIDIA NIM endpoints (required)
11
+ - LLM_SUMMARIZATION_MODEL: Model to use (default: nvidia/llama-3.1-nemotron-70b-instruct)
12
+ - LLM_SUMMARIZATION_BASE_URL: API base URL (default: https://integrate.api.nvidia.com/v1)
13
+ - LLM_SUMMARIZATION_TIMEOUT: API timeout in seconds (default: 60)
14
+ - LLM_MIN_CONTENT_LENGTH: Minimum content length to summarize (default: 50)
15
+ - LLM_MAX_CONTENT_LENGTH: Maximum content length to send to API (default: 12000)
16
+ """
17
+
18
+ import os
19
+ import logging
20
+ from typing import Optional
21
+
22
+
23
+ def content_summarizer(control_message: "IngestControlMessage") -> "IngestControlMessage": # noqa: F821
24
+ """
25
+ UDF function that adds LLM-generated summaries to text content chunks.
26
+
27
+ This function processes text primitives and generates concise summaries using
28
+ an LLM API, storing the results in the metadata's custom_content field.
29
+
30
+ Features:
31
+ - Flexible content detection across multiple metadata locations
32
+ - Robust error handling with graceful fallbacks
33
+ - Comprehensive logging for monitoring and debugging
34
+ - Configurable content length thresholds
35
+ - Safe metadata manipulation preserving existing data
36
+
37
+ Parameters
38
+ ----------
39
+ control_message : IngestControlMessage
40
+ The control message containing the DataFrame payload with text content
41
+
42
+ Returns
43
+ -------
44
+ IngestControlMessage
45
+ The modified control message with LLM summaries added to metadata
46
+ """
47
+ from openai import OpenAI
48
+
49
+ logger = logging.getLogger(__name__)
50
+ logger.info("UDF: Starting LLM content summarization")
51
+
52
+ # Get configuration from environment
53
+ api_key = os.getenv("NVIDIA_API_KEY", "")
54
+ model_name = os.getenv("LLM_SUMMARIZATION_MODEL", "nvidia/llama-3.1-nemotron-70b-instruct")
55
+ base_url = os.getenv("LLM_SUMMARIZATION_BASE_URL", "https://integrate.api.nvidia.com/v1")
56
+ timeout = int(os.getenv("LLM_SUMMARIZATION_TIMEOUT", "60"))
57
+ min_content_length = int(os.getenv("LLM_MIN_CONTENT_LENGTH", "50"))
58
+ max_content_length = int(os.getenv("LLM_MAX_CONTENT_LENGTH", "12000"))
59
+
60
+ if not api_key:
61
+ logger.warning("NVIDIA_API_KEY not found, skipping summarization")
62
+ return control_message
63
+
64
+ # Get the DataFrame payload
65
+ df = control_message.payload()
66
+ if df is None or len(df) == 0:
67
+ logger.warning("No payload found in control message")
68
+ return control_message
69
+
70
+ logger.info(f"Processing {len(df)} rows for LLM summarization")
71
+
72
+ # Initialize OpenAI client with error handling
73
+ try:
74
+ client = OpenAI(base_url=base_url, api_key=api_key, timeout=timeout)
75
+ except Exception as e:
76
+ logger.error(f"Failed to initialize OpenAI client: {e}")
77
+ return control_message
78
+
79
+ # Stats for reporting
80
+ stats = {"processed": 0, "summarized": 0, "skipped": 0, "failed": 0}
81
+
82
+ # Process each row
83
+ for idx, row in df.iterrows():
84
+ stats["processed"] += 1
85
+
86
+ try:
87
+ # Extract content - be more flexible about where it comes from
88
+ content = _extract_content(row, logger)
89
+
90
+ if not content:
91
+ stats["skipped"] += 1
92
+ continue
93
+
94
+ content = content.strip()
95
+ if len(content) < min_content_length:
96
+ stats["skipped"] += 1
97
+ continue
98
+
99
+ # Truncate if needed
100
+ if len(content) > max_content_length:
101
+ content = content[:max_content_length]
102
+
103
+ # Generate summary
104
+ summary = _generate_summary(client, content, model_name, logger)
105
+
106
+ if summary:
107
+ # Add to metadata
108
+ _add_summary(df, idx, row, summary, model_name, logger)
109
+ stats["summarized"] += 1
110
+ else:
111
+ stats["failed"] += 1
112
+
113
+ except Exception as e:
114
+ stats["failed"] += 1
115
+ logger.error(f"Row {idx}: Error processing content: {e}")
116
+
117
+ # Update the control message with modified DataFrame
118
+ control_message.payload(df)
119
+
120
+ logger.info(
121
+ f"LLM summarization complete: {stats['summarized']}/{stats['processed']} documents summarized, "
122
+ f"{stats['skipped']} skipped, {stats['failed']} failed"
123
+ )
124
+
125
+ return control_message
126
+
127
+
128
+ def _extract_content(row, logger) -> Optional[str]:
129
+ """Extract text content from row, trying multiple locations."""
130
+ content = ""
131
+
132
+ # Try different locations for content
133
+ if isinstance(row.get("metadata"), dict):
134
+ metadata = row["metadata"]
135
+
136
+ # Primary location: metadata.content
137
+ content = metadata.get("content", "")
138
+
139
+ # If no content, try other locations
140
+ if not content:
141
+ # Try in text_metadata
142
+ text_metadata = metadata.get("text_metadata", {})
143
+ content = text_metadata.get("text", "") or text_metadata.get("content", "")
144
+
145
+ # Try top-level content field
146
+ if not content:
147
+ content = row.get("content", "")
148
+
149
+ if not content:
150
+ return None
151
+
152
+ return content
153
+
154
+
155
+ def _generate_summary(client, content: str, model_name: str, logger) -> Optional[str]:
156
+ """Generate summary with robust error handling."""
157
+ prompt = f"""Please provide a comprehensive 3-4 sentence summary of the following document:
158
+
159
+ {content}
160
+
161
+ Focus on the main purpose, key topics, and important details.
162
+ This summary will be used for document search and understanding.
163
+
164
+ Summary:"""
165
+
166
+ try:
167
+ completion = client.chat.completions.create(
168
+ model=model_name,
169
+ messages=[{"role": "user", "content": prompt}],
170
+ max_tokens=400, # Increased for more comprehensive summaries
171
+ temperature=0.7,
172
+ )
173
+
174
+ if completion.choices and len(completion.choices) > 0:
175
+ summary = completion.choices[0].message.content.strip()
176
+ return summary
177
+ else:
178
+ return None
179
+
180
+ except Exception as e:
181
+ logger.error(f"API call failed: {e}")
182
+ return None
183
+
184
+
185
+ def _add_summary(df, idx: int, row, summary: str, model_name: str, logger):
186
+ """Add summary to metadata with safe handling."""
187
+ try:
188
+ # Get current metadata or create new dict - handle None case properly
189
+ existing_metadata = row.get("metadata")
190
+ if existing_metadata is not None and isinstance(existing_metadata, dict):
191
+ metadata = dict(existing_metadata) # Create a copy
192
+ else:
193
+ metadata = {}
194
+
195
+ # Ensure custom_content exists
196
+ if "custom_content" not in metadata or metadata["custom_content"] is None:
197
+ metadata["custom_content"] = {}
198
+
199
+ # Add LLM summary
200
+ metadata["custom_content"]["llm_summary"] = {"summary": summary, "model": model_name}
201
+
202
+ # Update the DataFrame at the specific index
203
+ try:
204
+ df.at[idx, "metadata"] = metadata
205
+ except Exception:
206
+ # Alternative approach: update the original row reference
207
+ df.iloc[idx]["metadata"] = metadata
208
+
209
+ except Exception as e:
210
+ logger.error(f"Failed to add summary to row {idx}: {e}")