nv-ingest-api 2025.10.14.dev20251014__tar.gz → 2025.10.15.dev20251015__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

Files changed (181) hide show
  1. {nv_ingest_api-2025.10.14.dev20251014/src/nv_ingest_api.egg-info → nv_ingest_api-2025.10.15.dev20251015}/PKG-INFO +1 -1
  2. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/meta/ingest_job_schema.py +12 -0
  3. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/service_clients/rest/rest_client.py +24 -3
  4. nv_ingest_api-2025.10.15.dev20251015/src/nv_ingest_api/util/string_processing/yaml.py +82 -0
  5. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015/src/nv_ingest_api.egg-info}/PKG-INFO +1 -1
  6. nv_ingest_api-2025.10.14.dev20251014/src/nv_ingest_api/util/string_processing/yaml.py +0 -45
  7. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/LICENSE +0 -0
  8. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/MANIFEST.in +0 -0
  9. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/README.md +0 -0
  10. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/pyproject.toml +0 -0
  11. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/setup.cfg +0 -0
  12. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/__init__.py +0 -0
  13. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/interface/__init__.py +0 -0
  14. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/interface/extract.py +0 -0
  15. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/interface/mutate.py +0 -0
  16. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/interface/store.py +0 -0
  17. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/interface/transform.py +0 -0
  18. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/interface/utility.py +0 -0
  19. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/__init__.py +0 -0
  20. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/enums/__init__.py +0 -0
  21. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/enums/common.py +0 -0
  22. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/__init__.py +0 -0
  23. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/audio/__init__.py +0 -0
  24. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/audio/audio_extraction.py +0 -0
  25. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/docx/__init__.py +0 -0
  26. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/docx/docx_extractor.py +0 -0
  27. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/docx/engines/__init__.py +0 -0
  28. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/__init__.py +0 -0
  29. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docx_helper.py +0 -0
  30. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/docx/engines/docxreader_helpers/docxreader.py +0 -0
  31. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/html/__init__.py +0 -0
  32. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/html/html_extractor.py +0 -0
  33. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/image/__init__.py +0 -0
  34. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/image/chart_extractor.py +0 -0
  35. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/image/image_extractor.py +0 -0
  36. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/image/image_helpers/__init__.py +0 -0
  37. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/image/image_helpers/common.py +0 -0
  38. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/image/infographic_extractor.py +0 -0
  39. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/image/table_extractor.py +0 -0
  40. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/pdf/__init__.py +0 -0
  41. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/pdf/engines/__init__.py +0 -0
  42. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/pdf/engines/adobe.py +0 -0
  43. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/pdf/engines/llama.py +0 -0
  44. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/pdf/engines/nemoretriever.py +0 -0
  45. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/pdf/engines/pdf_helpers/__init__.py +0 -0
  46. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/pdf/engines/pdfium.py +0 -0
  47. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/pdf/engines/tika.py +0 -0
  48. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/pdf/engines/unstructured_io.py +0 -0
  49. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/pdf/pdf_extractor.py +0 -0
  50. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/pptx/__init__.py +0 -0
  51. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/pptx/engines/__init__.py +0 -0
  52. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/pptx/engines/pptx_helper.py +0 -0
  53. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/extract/pptx/pptx_extractor.py +0 -0
  54. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/meta/__init__.py +0 -0
  55. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/meta/udf.py +0 -0
  56. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/mutate/__init__.py +0 -0
  57. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/mutate/deduplicate.py +0 -0
  58. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/mutate/filter.py +0 -0
  59. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/__init__.py +0 -0
  60. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/control_message_task.py +0 -0
  61. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/ingest_control_message.py +0 -0
  62. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/__init__.py +0 -0
  63. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/default_values.py +0 -0
  64. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/model_interface/__init__.py +0 -0
  65. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/model_interface/cached.py +0 -0
  66. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/model_interface/decorators.py +0 -0
  67. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/model_interface/deplot.py +0 -0
  68. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/model_interface/helpers.py +0 -0
  69. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py +0 -0
  70. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/model_interface/ocr.py +0 -0
  71. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py +0 -0
  72. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +0 -0
  73. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/model_interface/vlm.py +0 -0
  74. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/model_interface/yolox.py +0 -0
  75. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/nim_client.py +0 -0
  76. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/nim/nim_model_interface.py +0 -0
  77. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/tracing/__init__.py +0 -0
  78. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/tracing/latency.py +0 -0
  79. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/tracing/logging.py +0 -0
  80. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/primitives/tracing/tagging.py +0 -0
  81. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/__init__.py +0 -0
  82. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/extract/__init__.py +0 -0
  83. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/extract/extract_audio_schema.py +0 -0
  84. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/extract/extract_chart_schema.py +0 -0
  85. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/extract/extract_docx_schema.py +0 -0
  86. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/extract/extract_html_schema.py +0 -0
  87. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/extract/extract_image_schema.py +0 -0
  88. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/extract/extract_infographic_schema.py +0 -0
  89. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/extract/extract_pdf_schema.py +0 -0
  90. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/extract/extract_pptx_schema.py +0 -0
  91. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/extract/extract_table_schema.py +0 -0
  92. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/message_brokers/__init__.py +0 -0
  93. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/message_brokers/message_broker_client_schema.py +0 -0
  94. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/message_brokers/request_schema.py +0 -0
  95. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/message_brokers/response_schema.py +0 -0
  96. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/meta/__init__.py +0 -0
  97. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/meta/base_model_noext.py +0 -0
  98. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/meta/metadata_schema.py +0 -0
  99. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/meta/udf.py +0 -0
  100. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/mutate/__init__.py +0 -0
  101. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/mutate/mutate_image_dedup_schema.py +0 -0
  102. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/store/__init__.py +0 -0
  103. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/store/store_embedding_schema.py +0 -0
  104. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/store/store_image_schema.py +0 -0
  105. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/transform/__init__.py +0 -0
  106. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/transform/transform_image_caption_schema.py +0 -0
  107. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/transform/transform_image_filter_schema.py +0 -0
  108. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/transform/transform_text_embedding_schema.py +0 -0
  109. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/schemas/transform/transform_text_splitter_schema.py +0 -0
  110. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/store/__init__.py +0 -0
  111. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/store/embed_text_upload.py +0 -0
  112. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/store/image_upload.py +0 -0
  113. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/transform/__init__.py +0 -0
  114. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/transform/caption_image.py +0 -0
  115. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/transform/embed_text.py +0 -0
  116. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/internal/transform/split_text.py +0 -0
  117. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/__init__.py +0 -0
  118. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/control_message/__init__.py +0 -0
  119. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/control_message/validators.py +0 -0
  120. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/converters/__init__.py +0 -0
  121. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/converters/bytetools.py +0 -0
  122. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/converters/containers.py +0 -0
  123. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/converters/datetools.py +0 -0
  124. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/converters/dftools.py +0 -0
  125. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/converters/formats.py +0 -0
  126. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/converters/type_mappings.py +0 -0
  127. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/dataloader/__init__.py +0 -0
  128. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/dataloader/dataloader.py +0 -0
  129. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/detectors/__init__.py +0 -0
  130. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/detectors/language.py +0 -0
  131. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/exception_handlers/__init__.py +0 -0
  132. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/exception_handlers/converters.py +0 -0
  133. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/exception_handlers/decorators.py +0 -0
  134. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/exception_handlers/detectors.py +0 -0
  135. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/exception_handlers/pdf.py +0 -0
  136. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/exception_handlers/schemas.py +0 -0
  137. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/image_processing/__init__.py +0 -0
  138. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/image_processing/clustering.py +0 -0
  139. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/image_processing/processing.py +0 -0
  140. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/image_processing/table_and_chart.py +0 -0
  141. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/image_processing/transforms.py +0 -0
  142. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/imports/__init__.py +0 -0
  143. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/imports/callable_signatures.py +0 -0
  144. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/imports/dynamic_resolvers.py +0 -0
  145. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/introspection/__init__.py +0 -0
  146. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/introspection/class_inspect.py +0 -0
  147. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/introspection/function_inspect.py +0 -0
  148. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/logging/__init__.py +0 -0
  149. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/logging/configuration.py +0 -0
  150. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/logging/sanitize.py +0 -0
  151. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/message_brokers/__init__.py +0 -0
  152. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/message_brokers/simple_message_broker/__init__.py +0 -0
  153. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/message_brokers/simple_message_broker/broker.py +0 -0
  154. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/message_brokers/simple_message_broker/ordered_message_queue.py +0 -0
  155. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/message_brokers/simple_message_broker/simple_client.py +0 -0
  156. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/metadata/__init__.py +0 -0
  157. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/metadata/aggregators.py +0 -0
  158. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/multi_processing/__init__.py +0 -0
  159. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/multi_processing/mp_pool_singleton.py +0 -0
  160. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/nim/__init__.py +0 -0
  161. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/pdf/__init__.py +0 -0
  162. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/pdf/pdfium.py +0 -0
  163. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/schema/__init__.py +0 -0
  164. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/schema/schema_validator.py +0 -0
  165. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/service_clients/__init__.py +0 -0
  166. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/service_clients/client_base.py +0 -0
  167. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/service_clients/kafka/__init__.py +0 -0
  168. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/service_clients/redis/__init__.py +0 -0
  169. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/service_clients/redis/redis_client.py +0 -0
  170. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/service_clients/rest/__init__.py +0 -0
  171. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/string_processing/__init__.py +0 -0
  172. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/string_processing/configuration.py +0 -0
  173. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/system/__init__.py +0 -0
  174. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api/util/system/hardware_info.py +0 -0
  175. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api.egg-info/SOURCES.txt +0 -0
  176. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api.egg-info/dependency_links.txt +0 -0
  177. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api.egg-info/requires.txt +0 -0
  178. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/nv_ingest_api.egg-info/top_level.txt +0 -0
  179. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/udfs/__init__.py +0 -0
  180. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/udfs/llm_summarizer_udf.py +0 -0
  181. {nv_ingest_api-2025.10.14.dev20251014 → nv_ingest_api-2025.10.15.dev20251015}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.10.14.dev20251014
3
+ Version: 2025.10.15.dev20251015
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -32,6 +32,17 @@ class TracingOptionsSchema(BaseModelNoExt):
32
32
  total_pages: Optional[int] = None
33
33
 
34
34
 
35
+ # PDF Configuration Schema
36
+ class PdfConfigSchema(BaseModelNoExt):
37
+ """PDF-specific configuration options for job submission.
38
+
39
+ Note: split_page_count accepts any positive integer but will be clamped
40
+ to [1, 128] range by the server at runtime.
41
+ """
42
+
43
+ split_page_count: Annotated[int, Field(ge=1)] = 32
44
+
45
+
35
46
  # Ingest Task Schemas
36
47
 
37
48
 
@@ -270,6 +281,7 @@ class IngestJobSchema(BaseModelNoExt):
270
281
  job_id: Union[str, int]
271
282
  tasks: List[IngestTaskSchema]
272
283
  tracing_options: Optional[TracingOptionsSchema] = None
284
+ pdf_config: Optional[PdfConfigSchema] = None
273
285
 
274
286
 
275
287
  # ------------------------------------------------------------------------------
@@ -3,7 +3,6 @@
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  import logging
6
- import os
7
6
  import re
8
7
  import time
9
8
  from typing import Any, Union, Tuple, Optional, Dict, Callable
@@ -104,6 +103,17 @@ class RestClient(MessageBrokerClientBase):
104
103
  Default timeout in seconds for waiting for data after connection. Default is None.
105
104
  http_allocator : Optional[Callable[[], Any]], optional
106
105
  A callable that returns an HTTP client instance. If None, `requests.Session()` is used.
106
+ **kwargs : dict
107
+ Additional keyword arguments. Supported keys:
108
+ - api_version : str, optional
109
+ API version to use ('v1' or 'v2'). Defaults to 'v1' if not specified.
110
+ Invalid versions will log a warning and fall back to 'v1'.
111
+ - base_url : str, optional
112
+ Override the generated base URL.
113
+ - headers : dict, optional
114
+ Additional headers to include in requests.
115
+ - auth : optional
116
+ Authentication configuration for requests.
107
117
 
108
118
  Returns
109
119
  -------
@@ -138,8 +148,19 @@ class RestClient(MessageBrokerClientBase):
138
148
  )
139
149
  self._client = requests.Session()
140
150
 
141
- # Allow API version override via environment variable or kwargs
142
- api_version = kwargs.get("api_version") or os.getenv("NV_INGEST_API_VERSION", "v1")
151
+ # Validate and normalize API version to prevent misconfiguration
152
+ # Default to v1 for backwards compatibility if not explicitly provided
153
+ VALID_API_VERSIONS = {"v1", "v2"}
154
+ raw_api_version = kwargs.get("api_version", "v1")
155
+ api_version = str(raw_api_version).strip().lower()
156
+
157
+ if api_version not in VALID_API_VERSIONS:
158
+ logger.warning(
159
+ f"Invalid API version '{raw_api_version}' specified. "
160
+ f"Valid versions are: {VALID_API_VERSIONS}. Falling back to 'v1'."
161
+ )
162
+ api_version = "v1"
163
+
143
164
  self._api_version = api_version
144
165
  self._submit_endpoint: str = f"/{api_version}/submit_job"
145
166
  self._fetch_endpoint: str = f"/{api_version}/fetch_job"
@@ -0,0 +1,82 @@
1
+ import os
2
+ import re
3
+ from typing import Optional
4
+
5
+ # This regex finds all forms of environment variables:
6
+ # $VAR, ${VAR}, $VAR|default, and ${VAR|default}
7
+ # It avoids matching escaped variables like $$.
8
+ # Default values can be quoted or unquoted.
9
+ _ENV_VAR_PATTERN = re.compile(
10
+ r"""(?<!\$)\$(?:
11
+ {(?P<braced>\w+)(?:\|(?P<braced_default>[^}]+))?}
12
+ |
13
+ (?P<named>\w+)(?:\|(?P<named_default>"[^"\\]*(?:\\.[^"\\]*)*"|'[^'\\]*(?:\\.[^'\\]*)*'|\S+))?
14
+ )""",
15
+ re.VERBOSE,
16
+ )
17
+
18
+
19
+ def _replacer(match: re.Match) -> str:
20
+ """Replaces a regex match with the corresponding environment variable."""
21
+ var_name = match.group("braced") or match.group("named")
22
+ default_val = match.group("braced_default") or match.group("named_default")
23
+
24
+ # First try the primary env var
25
+ value = os.environ.get(var_name)
26
+ if value is not None:
27
+ return value
28
+
29
+ # If primary is missing, try the default.
30
+ resolved_default = _resolve_default_with_single_fallback(default_val)
31
+
32
+ if resolved_default is None:
33
+ return ""
34
+
35
+ return resolved_default
36
+
37
+
38
+ def _is_var_ref(token: str) -> Optional[str]:
39
+ """If token is a $VAR or ${VAR} reference, return VAR name; else None."""
40
+ if not token:
41
+ return None
42
+ if token.startswith("${") and token.endswith("}"):
43
+ inner = token[2:-1]
44
+ return inner if re.fullmatch(r"\w+", inner) else None
45
+ if token.startswith("$"):
46
+ inner = token[1:]
47
+ return inner if re.fullmatch(r"\w+", inner) else None
48
+ return None
49
+
50
+
51
+ def _resolve_default_with_single_fallback(default_val: Optional[str]) -> Optional[str]:
52
+ """
53
+ Support a single-level fallback where the default itself can be another env var.
54
+ For example, in $A|$B or ${A|$B}, we try B if A missing.
55
+ """
56
+ if default_val is None:
57
+ return None
58
+
59
+ var = _is_var_ref(default_val)
60
+ if var is not None:
61
+ return os.environ.get(var, None)
62
+
63
+ return default_val
64
+
65
+
66
+ def substitute_env_vars_in_yaml_content(raw_content: str) -> str:
67
+ """
68
+ Substitutes environment variables in a YAML string.
69
+
70
+ This function finds all occurrences of environment variable placeholders
71
+ ($VAR, ${VAR}, $VAR|default, ${VAR|default}) in the input string
72
+ and replaces them with their corresponding environment variable values.
73
+ Also supports a single fallback to another env var: $VAR|$OTHER, ${VAR|$OTHER}
74
+ Quoted defaults are preserved EXACTLY as written (e.g., 'a,b' keeps quotes).
75
+
76
+ Args:
77
+ raw_content: The raw string content of a YAML file.
78
+
79
+ Returns:
80
+ The YAML string with environment variables substituted.
81
+ """
82
+ return _ENV_VAR_PATTERN.sub(_replacer, raw_content)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.10.14.dev20251014
3
+ Version: 2025.10.15.dev20251015
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -1,45 +0,0 @@
1
- import os
2
- import re
3
-
4
- # This regex finds all forms of environment variables:
5
- # $VAR, ${VAR}, $VAR|default, and ${VAR|default}
6
- # It avoids matching escaped variables like $$.
7
- # Default values can be quoted or unquoted.
8
- _ENV_VAR_PATTERN = re.compile(
9
- r"""(?<!\$)\$(?:
10
- {(?P<braced>\w+)(?:\|(?P<braced_default>[^}]+))?}
11
- |
12
- (?P<named>\w+)(?:\|(?P<named_default>"[^"\\]*(?:\\.[^"\\]*)*"|'[^'\\]*(?:\\.[^'\\]*)*'|\S+))?
13
- )""",
14
- re.VERBOSE,
15
- )
16
-
17
-
18
- def _replacer(match: re.Match) -> str:
19
- """Replaces a regex match with the corresponding environment variable."""
20
- var_name = match.group("braced") or match.group("named")
21
- default_val = match.group("braced_default") or match.group("named_default")
22
-
23
- # Get value from environment, or use default.
24
- value = os.environ.get(var_name, default_val)
25
-
26
- if value is None:
27
- return ""
28
- return value
29
-
30
-
31
- def substitute_env_vars_in_yaml_content(raw_content: str) -> str:
32
- """
33
- Substitutes environment variables in a YAML string.
34
-
35
- This function finds all occurrences of environment variable placeholders
36
- ($VAR, ${VAR}, $VAR|default, ${VAR|default}) in the input string
37
- and replaces them with their corresponding environment variable values.
38
-
39
- Args:
40
- raw_content: The raw string content of a YAML file.
41
-
42
- Returns:
43
- The YAML string with environment variables substituted.
44
- """
45
- return _ENV_VAR_PATTERN.sub(_replacer, raw_content)