nv-ingest 2025.7.15.dev20250715__tar.gz → 2025.7.17.dev20250717__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

Files changed (108) hide show
  1. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/PKG-INFO +1 -1
  2. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/api/v1/health.py +1 -1
  3. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +8 -7
  4. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +5 -2
  5. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +3 -2
  6. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +10 -10
  7. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest.egg-info/PKG-INFO +1 -1
  8. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/LICENSE +0 -0
  9. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/MANIFEST.in +0 -0
  10. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/__init__.py +0 -0
  11. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/api/__init__.py +0 -0
  12. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/api/main.py +0 -0
  13. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/api/v1/__init__.py +0 -0
  14. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/api/v1/ingest.py +0 -0
  15. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/api/v1/metrics.py +0 -0
  16. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/__init__.py +0 -0
  17. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/__init__.py +0 -0
  18. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
  19. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
  20. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
  21. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
  22. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
  23. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
  24. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
  25. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
  26. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
  27. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  28. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
  29. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +0 -0
  30. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +0 -0
  31. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +0 -0
  32. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
  33. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
  34. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
  35. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
  36. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
  37. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
  38. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
  39. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
  40. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
  41. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
  42. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
  43. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
  44. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -0
  45. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
  46. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
  47. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
  48. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +0 -0
  49. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
  50. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
  51. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
  52. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
  53. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
  54. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
  55. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
  56. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
  57. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
  58. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
  59. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
  60. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
  61. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
  62. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
  63. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
  64. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
  65. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
  66. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +0 -0
  67. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +0 -0
  68. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
  69. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
  70. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
  71. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
  72. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
  73. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +0 -0
  74. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +0 -0
  75. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
  76. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
  77. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
  78. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/schemas/__init__.py +0 -0
  79. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
  80. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
  81. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
  82. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
  83. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
  84. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
  85. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
  86. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
  87. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
  88. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
  89. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
  90. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/util/__init__.py +0 -0
  91. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
  92. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
  93. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/util/service/__init__.py +0 -0
  94. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
  95. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
  96. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
  97. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
  98. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
  99. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
  100. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
  101. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
  102. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest/version.py +0 -0
  103. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest.egg-info/SOURCES.txt +0 -0
  104. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest.egg-info/dependency_links.txt +0 -0
  105. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest.egg-info/requires.txt +0 -0
  106. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/nv_ingest.egg-info/top_level.txt +0 -0
  107. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/pyproject.toml +0 -0
  108. {nv_ingest-2025.7.15.dev20250715 → nv_ingest-2025.7.17.dev20250717}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.7.15.dev20250715
3
+ Version: 2025.7.17.dev20250717
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -18,7 +18,7 @@ router = APIRouter()
18
18
 
19
19
  # List of ALL of the HTTP environment variable endpoints that should be checked
20
20
  READY_CHECK_ENV_VAR_MAP = {
21
- "paddle": "PADDLE_HTTP_ENDPOINT",
21
+ "ocr": "OCR_HTTP_ENDPOINT",
22
22
  "yolox_graphic_elements": "YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT",
23
23
  "yolox_page_elements": "YOLOX_HTTP_ENDPOINT",
24
24
  "yolox_table_structure": "YOLOX_TABLE_STRUCTURE_HTTP_ENDPOINT",
@@ -147,8 +147,9 @@ if __name__ == "__main__":
147
147
  os.environ["YOLOX_GRAPHIC_ELEMENTS_GRPC_ENDPOINT"] = "127.0.0.1:8004"
148
148
  os.environ["YOLOX_GRAPHIC_ELEMENTS_HTTP_ENDPOINT"] = "http://localhost:8003/v1/infer"
149
149
  os.environ["YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL"] = "http"
150
- os.environ["PADDLE_GRPC_ENDPOINT"] = "localhost:8010"
151
- os.environ["PADDLE_INFER_PROTOCOL"] = "grpc"
150
+ os.environ["OCR_GRPC_ENDPOINT"] = "localhost:8010"
151
+ os.environ["OCR_INFER_PROTOCOL"] = "grpc"
152
+ os.environ["OCR_MODEL_NAME"] = "paddle"
152
153
  os.environ["NEMORETRIEVER_PARSE_HTTP_ENDPOINT"] = "https://integrate.api.nvidia.com/v1/chat/completions"
153
154
  os.environ["VLM_CAPTION_ENDPOINT"] = "https://integrate.api.nvidia.com/v1/chat/completions"
154
155
  os.environ["VLM_CAPTION_MODEL_NAME"] = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
@@ -172,7 +173,7 @@ if __name__ == "__main__":
172
173
  nemoretriever_parse_grpc, nemoretriever_parse_http, nemoretriever_parse_auth, nemoretriever_parse_protocol = (
173
174
  get_nim_service("nemoretriever_parse")
174
175
  )
175
- paddle_grpc, paddle_http, paddle_auth, paddle_protocol = get_nim_service("paddle")
176
+ ocr_grpc, ocr_http, ocr_auth, ocr_protocol = get_nim_service("ocr")
176
177
 
177
178
  model_name = os.environ.get("NEMORETRIEVER_PARSE_MODEL_NAME", "nvidia/nemoretriever-parse")
178
179
  pdf_extractor_config = {
@@ -201,8 +202,8 @@ if __name__ == "__main__":
201
202
  "endpoint_config": {
202
203
  "yolox_endpoints": (yolox_graphic_elements_grpc, yolox_graphic_elements_http),
203
204
  "yolox_infer_protocol": yolox_graphic_elements_protocol,
204
- "paddle_endpoints": (paddle_grpc, paddle_http),
205
- "paddle_infer_protocol": paddle_protocol,
205
+ "ocr_endpoints": (ocr_grpc, ocr_http),
206
+ "ocr_infer_protocol": ocr_protocol,
206
207
  "auth_token": yolox_auth,
207
208
  }
208
209
  }
@@ -210,8 +211,8 @@ if __name__ == "__main__":
210
211
  "endpoint_config": {
211
212
  "yolox_endpoints": (yolox_table_structure_grpc, yolox_table_structure_http),
212
213
  "yolox_infer_protocol": yolox_table_structure_protocol,
213
- "paddle_endpoints": (paddle_grpc, paddle_http),
214
- "paddle_infer_protocol": paddle_protocol,
214
+ "ocr_endpoints": (ocr_grpc, ocr_http),
215
+ "ocr_infer_protocol": ocr_protocol,
215
216
  "auth_token": yolox_auth,
216
217
  }
217
218
  }
@@ -269,8 +269,11 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
269
269
  self._logger.debug("Received message type: %s", type(job))
270
270
  if isinstance(job, BaseModel):
271
271
  self._logger.debug("Message is a BaseModel with response_code: %s", job.response_code)
272
- if job.response_code != 0:
273
- self._logger.debug("Message response_code != 0, returning None")
272
+ if job.response_code not in (0, 2):
273
+ self._logger.debug("Message received with unhandled response_code, returning None")
274
+ return None
275
+ if job.response_code == 2:
276
+ self._logger.debug("Message response_code == 2, returning None")
274
277
  return None
275
278
  job = json.loads(job.response)
276
279
  self._logger.debug("Successfully fetched message with job_id: %s", job.get("job_id", "unknown"))
@@ -78,8 +78,9 @@ class PipelineCreationSchema(BaseModel):
78
78
  otel_exporter_otlp_endpoint: str = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "localhost:4317")
79
79
 
80
80
  # OCR settings
81
- paddle_http_endpoint: str = os.getenv("PADDLE_HTTP_ENDPOINT", "https://ai.api.nvidia.com/v1/cv/baidu/paddleocr")
82
- paddle_infer_protocol: str = os.getenv("PADDLE_INFER_PROTOCOL", "http")
81
+ ocr_http_endpoint: str = os.getenv("OCR_HTTP_ENDPOINT", "https://ai.api.nvidia.com/v1/cv/baidu/paddleocr")
82
+ ocr_infer_protocol: str = os.getenv("OCR_INFER_PROTOCOL", "http")
83
+ ocr_model_name: str = os.getenv("OCR_MODEL_NAME", "paddle")
83
84
 
84
85
  # Task queue settings
85
86
  REDIS_INGEST_TASK_QUEUE: str = "ingest_task_queue"
@@ -223,15 +223,15 @@ def add_table_extractor_stage(pipeline, default_cpu_count, stage_name="table_ext
223
223
  yolox_table_structure_grpc, yolox_table_structure_http, yolox_auth, yolox_table_structure_protocol = (
224
224
  get_nim_service("yolox_table_structure")
225
225
  )
226
- paddle_grpc, paddle_http, paddle_auth, paddle_protocol = get_nim_service("paddle")
226
+ ocr_grpc, ocr_http, ocr_auth, ocr_protocol = get_nim_service("ocr")
227
227
 
228
228
  table_extractor_config = TableExtractorSchema(
229
229
  **{
230
230
  "endpoint_config": {
231
231
  "yolox_endpoints": (yolox_table_structure_grpc, yolox_table_structure_http),
232
232
  "yolox_infer_protocol": yolox_table_structure_protocol,
233
- "paddle_endpoints": (paddle_grpc, paddle_http),
234
- "paddle_infer_protocol": paddle_protocol,
233
+ "ocr_endpoints": (ocr_grpc, ocr_http),
234
+ "ocr_infer_protocol": ocr_protocol,
235
235
  "auth_token": yolox_auth,
236
236
  }
237
237
  }
@@ -252,15 +252,15 @@ def add_chart_extractor_stage(pipeline, default_cpu_count, stage_name="chart_ext
252
252
  yolox_graphic_elements_grpc, yolox_graphic_elements_http, yolox_auth, yolox_graphic_elements_protocol = (
253
253
  get_nim_service("yolox_graphic_elements")
254
254
  )
255
- paddle_grpc, paddle_http, paddle_auth, paddle_protocol = get_nim_service("paddle")
255
+ ocr_grpc, ocr_http, ocr_auth, ocr_protocol = get_nim_service("ocr")
256
256
 
257
257
  chart_extractor_config = ChartExtractorSchema(
258
258
  **{
259
259
  "endpoint_config": {
260
260
  "yolox_endpoints": (yolox_graphic_elements_grpc, yolox_graphic_elements_http),
261
261
  "yolox_infer_protocol": yolox_graphic_elements_protocol,
262
- "paddle_endpoints": (paddle_grpc, paddle_http),
263
- "paddle_infer_protocol": paddle_protocol,
262
+ "ocr_endpoints": (ocr_grpc, ocr_http),
263
+ "ocr_infer_protocol": ocr_protocol,
264
264
  "auth_token": yolox_auth,
265
265
  }
266
266
  }
@@ -278,14 +278,14 @@ def add_chart_extractor_stage(pipeline, default_cpu_count, stage_name="chart_ext
278
278
 
279
279
 
280
280
  def add_infographic_extractor_stage(pipeline, default_cpu_count, stage_name="infographic_extractor"):
281
- paddle_grpc, paddle_http, paddle_auth, paddle_protocol = get_nim_service("paddle")
281
+ ocr_grpc, ocr_http, ocr_auth, ocr_protocol = get_nim_service("ocr")
282
282
 
283
283
  infographic_content_extractor_config = InfographicExtractorSchema(
284
284
  **{
285
285
  "endpoint_config": {
286
- "paddle_endpoints": (paddle_grpc, paddle_http),
287
- "paddle_infer_protocol": paddle_protocol,
288
- "auth_token": paddle_auth,
286
+ "ocr_endpoints": (ocr_grpc, ocr_http),
287
+ "ocr_infer_protocol": ocr_protocol,
288
+ "auth_token": ocr_auth,
289
289
  }
290
290
  }
291
291
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.7.15.dev20250715
3
+ Version: 2025.7.17.dev20250717
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License