nv-ingest 2025.6.2.dev20250602__tar.gz → 2025.7.7.dev20250707__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest might be problematic. Click here for more details.

Files changed (109) hide show
  1. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/PKG-INFO +4 -4
  2. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +4 -4
  3. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/primitives/pipeline_topology.py +2 -2
  4. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/primitives/ray_pipeline.py +33 -5
  5. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/primitives/ray_stat_collector.py +1 -1
  6. nv_ingest-2025.7.7.dev20250707/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +161 -0
  7. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_source_stage_base.py +3 -6
  8. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +1 -10
  9. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/transforms/text_embed.py +1 -1
  10. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/transforms/text_splitter.py +47 -2
  11. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_builders.py +3 -3
  12. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/util/pipeline/pipeline_runners.py +7 -3
  13. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/util/pipeline/stage_builders.py +7 -9
  14. nv_ingest-2025.7.7.dev20250707/nv_ingest/framework/orchestration/ray/util/pipeline/tools.py +203 -0
  15. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/version.py +0 -8
  16. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest.egg-info/PKG-INFO +4 -4
  17. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest.egg-info/SOURCES.txt +1 -0
  18. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest.egg-info/requires.txt +3 -3
  19. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/pyproject.toml +3 -3
  20. nv_ingest-2025.6.2.dev20250602/nv_ingest/framework/orchestration/ray/stages/injectors/metadata_injector.py +0 -97
  21. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/LICENSE +0 -0
  22. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/MANIFEST.in +0 -0
  23. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/__init__.py +0 -0
  24. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/api/__init__.py +0 -0
  25. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/api/main.py +0 -0
  26. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/api/v1/__init__.py +0 -0
  27. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/api/v1/health.py +0 -0
  28. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/api/v1/ingest.py +0 -0
  29. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/api/v1/metrics.py +0 -0
  30. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/__init__.py +0 -0
  31. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/__init__.py +0 -0
  32. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/__init__.py +0 -0
  33. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/edges/__init__.py +0 -0
  34. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/edges/async_queue_edge.py +0 -0
  35. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/edges/ray_queue_edge.py +0 -0
  36. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/edges/threaded_queue_edge.py +0 -0
  37. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/examples/__init__.py +0 -0
  38. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/examples/task_source_harness.py +0 -0
  39. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/examples/task_source_sink_harness.py +0 -0
  40. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/primitives/__init__.py +0 -0
  41. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/primitives/dataclasses.py +0 -0
  42. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/primitives/pipeline_monitor.py +0 -0
  43. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/__init__.py +0 -0
  44. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/extractors/__init__.py +0 -0
  45. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -0
  46. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/extractors/chart_extractor.py +0 -0
  47. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/extractors/docx_extractor.py +0 -0
  48. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/extractors/html_extractor.py +0 -0
  49. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/extractors/image_extractor.py +0 -0
  50. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/extractors/infographic_extractor.py +0 -0
  51. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/extractors/pdf_extractor.py +0 -0
  52. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/extractors/pptx_extractor.py +0 -0
  53. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/extractors/table_extractor.py +0 -0
  54. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/injectors/__init__.py +0 -0
  55. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/meta/__init__.py +0 -0
  56. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_edge_base.py +0 -0
  57. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_sink_stage_base.py +0 -0
  58. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/meta/ray_actor_stage_base.py +0 -0
  59. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/mutate/__init__.py +0 -0
  60. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/mutate/image_dedup.py +0 -0
  61. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/mutate/image_filter.py +0 -0
  62. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/sinks/__init__.py +0 -0
  63. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/sinks/default_drain.py +0 -0
  64. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/sinks/message_broker_task_sink.py +0 -0
  65. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/sources/__init__.py +0 -0
  66. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/storage/__init__.py +0 -0
  67. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +0 -0
  68. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/storage/store_embeddings.py +0 -0
  69. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/telemetry/__init__.py +0 -0
  70. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/telemetry/job_counter.py +0 -0
  71. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_meter.py +0 -0
  72. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/telemetry/otel_tracer.py +0 -0
  73. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/transforms/__init__.py +0 -0
  74. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/transforms/image_caption.py +0 -0
  75. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/utility/__init__.py +0 -0
  76. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/stages/utility/throughput_monitor.py +0 -0
  77. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/util/__init__.py +0 -0
  78. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/util/pipeline/__init__.py +0 -0
  79. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/util/pipeline/pid_controller.py +0 -0
  80. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/util/system_tools/__init__.py +0 -0
  81. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/util/system_tools/memory.py +0 -0
  82. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/orchestration/ray/util/system_tools/visualizers.py +0 -0
  83. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/schemas/__init__.py +0 -0
  84. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/schemas/framework_ingest_config_schema.py +0 -0
  85. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/schemas/framework_job_counter_schema.py +0 -0
  86. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/schemas/framework_message_broker_sink_schema.py +0 -0
  87. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/schemas/framework_message_broker_source_schema.py +0 -0
  88. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/schemas/framework_message_wrapper_schema.py +0 -0
  89. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/schemas/framework_metadata_injector_schema.py +0 -0
  90. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/schemas/framework_otel_meter_schema.py +0 -0
  91. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/schemas/framework_otel_tracer_schema.py +0 -0
  92. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/schemas/framework_processing_job_schema.py +0 -0
  93. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/schemas/framework_task_injection_schema.py +0 -0
  94. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/schemas/framework_vdb_task_sink_schema.py +0 -0
  95. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/util/__init__.py +0 -0
  96. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/util/flow_control/__init__.py +0 -0
  97. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/util/flow_control/filter_by_task.py +0 -0
  98. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/util/service/__init__.py +0 -0
  99. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/util/service/impl/__init__.py +0 -0
  100. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/util/service/impl/ingest/__init__.py +0 -0
  101. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +0 -0
  102. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/util/service/meta/__init__.py +0 -0
  103. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/util/service/meta/ingest/__init__.py +0 -0
  104. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/util/service/meta/ingest/ingest_service_meta.py +0 -0
  105. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/util/telemetry/__init__.py +0 -0
  106. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest/framework/util/telemetry/global_stats.py +0 -0
  107. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest.egg-info/dependency_links.txt +0 -0
  108. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/nv_ingest.egg-info/top_level.txt +0 -0
  109. {nv_ingest-2025.6.2.dev20250602 → nv_ingest-2025.7.7.dev20250707}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest
3
- Version: 2025.6.2.dev20250602
3
+ Version: 2025.7.7.dev20250707
4
4
  Summary: Python module for multimodal document ingestion
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -240,13 +240,13 @@ Requires-Dist: python-docx>=1.1.2
240
240
  Requires-Dist: python-dotenv>=1.0.1
241
241
  Requires-Dist: python-pptx>=1.0.2
242
242
  Requires-Dist: prometheus-client
243
- Requires-Dist: torch==2.4.1
243
+ Requires-Dist: torch>=2.4.1
244
244
  Requires-Dist: ray[all]>=2.37.0
245
245
  Requires-Dist: redis>=5.2.1
246
246
  Requires-Dist: requests>=2.28.2
247
247
  Requires-Dist: scikit-learn>=1.6.0
248
248
  Requires-Dist: scipy>=1.15.1
249
- Requires-Dist: setuptools>=58.2.0
249
+ Requires-Dist: setuptools>=78.1.1
250
250
  Requires-Dist: tabulate>=0.9.0
251
251
  Requires-Dist: torchvision
252
252
  Requires-Dist: torchaudio
@@ -259,7 +259,7 @@ Requires-Dist: opencv-python
259
259
  Requires-Dist: pymilvus>=2.5.10
260
260
  Requires-Dist: pymilvus[bulk_writer,model]
261
261
  Requires-Dist: tritonclient
262
- Requires-Dist: nvidia-riva-client>=2.18.0
262
+ Requires-Dist: nvidia-riva-client==2.20.0
263
263
  Requires-Dist: unstructured-client
264
264
  Requires-Dist: markitdown
265
265
  Dynamic: license-file
@@ -63,7 +63,7 @@ def get_nim_service(env_var_prefix):
63
63
  "",
64
64
  )
65
65
  auth_token = os.environ.get(
66
- "NVIDIA_BUILD_API_KEY",
66
+ "NVIDIA_API_KEY",
67
67
  "",
68
68
  ) or os.environ.get(
69
69
  "NGC_API_KEY",
@@ -151,11 +151,11 @@ if __name__ == "__main__":
151
151
  os.environ["PADDLE_INFER_PROTOCOL"] = "grpc"
152
152
  os.environ["NEMORETRIEVER_PARSE_HTTP_ENDPOINT"] = "https://integrate.api.nvidia.com/v1/chat/completions"
153
153
  os.environ["VLM_CAPTION_ENDPOINT"] = "https://integrate.api.nvidia.com/v1/chat/completions"
154
- os.environ["VLM_CAPTION_MODEL_NAME"] = "meta/llama-3.2-11b-vision-instruct"
154
+ os.environ["VLM_CAPTION_MODEL_NAME"] = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
155
155
  logger.info("Environment variables set.")
156
156
 
157
157
  image_caption_endpoint_url = "https://integrate.api.nvidia.com/v1/chat/completions"
158
- image_caption_model_name = "meta/llama-3.2-11b-vision-instruct"
158
+ model_name = "nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
159
159
  yolox_grpc, yolox_http, yolox_auth, yolox_protocol = get_nim_service("yolox")
160
160
  (
161
161
  yolox_table_structure_grpc,
@@ -228,7 +228,7 @@ if __name__ == "__main__":
228
228
  image_caption_config = {
229
229
  "api_key": yolox_auth,
230
230
  "endpoint_url": image_caption_endpoint_url,
231
- "image_caption_model_name": image_caption_model_name,
231
+ "model_name": model_name,
232
232
  "prompt": "Caption the content of this image:",
233
233
  }
234
234
  logger.info("Service configuration retrieved from get_nim_service and environment variables.")
@@ -555,7 +555,7 @@ class PipelineTopology:
555
555
  return None
556
556
 
557
557
  def get_connections(self) -> Dict[str, List[Tuple[str, int]]]:
558
- """Returns a shallow copy of the connections dictionary."""
558
+ """Returns a shallow copy of the connection dictionary."""
559
559
  with self._lock:
560
560
  # Shallow copy is usually sufficient here as tuples are immutable
561
561
  return self._connections.copy()
@@ -571,7 +571,7 @@ class PipelineTopology:
571
571
  return len(self._stage_actors.get(stage_name, []))
572
572
 
573
573
  def get_edge_queues(self) -> Dict[str, Tuple[Any, int]]:
574
- """Returns a shallow copy of the edge queues dictionary."""
574
+ """Returns a shallow copy of the edge queues' dictionary."""
575
575
  with self._lock:
576
576
  return self._edge_queues.copy()
577
577
 
@@ -9,6 +9,7 @@ import threading
9
9
  from abc import ABC, abstractmethod
10
10
  from collections import defaultdict
11
11
  from dataclasses import dataclass
12
+ from types import FunctionType
12
13
 
13
14
  import psutil
14
15
  import uuid
@@ -24,6 +25,9 @@ import time
24
25
  from nv_ingest.framework.orchestration.ray.primitives.pipeline_topology import PipelineTopology, StageInfo
25
26
  from nv_ingest.framework.orchestration.ray.primitives.ray_stat_collector import RayStatsCollector
26
27
  from nv_ingest.framework.orchestration.ray.util.pipeline.pid_controller import PIDController, ResourceConstraintManager
28
+ from nv_ingest.framework.orchestration.ray.util.pipeline.tools import wrap_callable_as_stage
29
+ from nv_ingest_api.util.imports.callable_signatures import ingest_stage_callable_signature
30
+ from nv_ingest_api.util.imports.dynamic_resolvers import resolve_callable_from_path
27
31
 
28
32
  logger = logging.getLogger(__name__)
29
33
 
@@ -43,7 +47,7 @@ class PipelineInterface(ABC):
43
47
  Parameters
44
48
  ----------
45
49
  monitor_poll_interval : float
46
- Interval in seconds for monitoring poll (default: 5.0).
50
+ Interval in seconds for the monitoring poll (default: 5.0).
47
51
  scaling_poll_interval : float
48
52
  Interval in seconds for scaling decisions (default: 30.0).
49
53
  """
@@ -270,7 +274,7 @@ class RayPipeline(PipelineInterface):
270
274
 
271
275
  logger.info("RayStatsCollector initialized using StatsConfig.")
272
276
 
273
- # --- Accessor Methods for Stats Collector (and internal use) ---
277
+ # --- Accessor Methods for Stat Collector (and internal use) ---
274
278
 
275
279
  def __del__(self):
276
280
  try:
@@ -428,15 +432,39 @@ class RayPipeline(PipelineInterface):
428
432
  return self
429
433
 
430
434
  def add_stage(
431
- self, *, name: str, stage_actor: Any, config: BaseModel, min_replicas: int = 0, max_replicas: int = 1
435
+ self,
436
+ *,
437
+ name: str,
438
+ stage_actor: Any,
439
+ config: BaseModel,
440
+ min_replicas: int = 0,
441
+ max_replicas: int = 1,
432
442
  ) -> "RayPipeline":
433
443
  if min_replicas < 0:
434
444
  logger.warning(f"Stage '{name}': min_replicas cannot be negative. Overriding to 0.")
435
445
  min_replicas = 0
446
+
447
+ resolved_actor = stage_actor
448
+
449
+ # Support module path (e.g., "mypkg.mymodule:my_lambda")
450
+ if isinstance(stage_actor, str):
451
+ resolved_actor = resolve_callable_from_path(
452
+ callable_path=stage_actor, signature_schema=ingest_stage_callable_signature
453
+ )
454
+
455
+ # Wrap callables
456
+ if isinstance(resolved_actor, FunctionType):
457
+ schema_type = type(config)
458
+ resolved_actor = wrap_callable_as_stage(resolved_actor, schema_type)
459
+
436
460
  stage_info = StageInfo(
437
- name=name, callable=stage_actor, config=config, min_replicas=min_replicas, max_replicas=max_replicas
461
+ name=name,
462
+ callable=resolved_actor,
463
+ config=config,
464
+ min_replicas=min_replicas,
465
+ max_replicas=max_replicas,
438
466
  )
439
- self.topology.add_stage(stage_info) # Delegate
467
+ self.topology.add_stage(stage_info)
440
468
 
441
469
  return self
442
470
 
@@ -40,7 +40,7 @@ class RayStatsCollector:
40
40
  - `get_edge_queues() -> Dict[str, Tuple[Any, int]]`
41
41
  These methods should return snapshots suitable for iteration.
42
42
  interval : float, optional
43
- The interval in seconds between stats collection attempts, by default 5.0.
43
+ The interval in seconds between stat collection attempts, by default 5.0.
44
44
  actor_timeout : float, optional
45
45
  Timeout in seconds for waiting for stats from a single actor, by default 5.0.
46
46
  queue_timeout : float, optional
@@ -0,0 +1,161 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from datetime import datetime
6
+ import logging
7
+ import pandas as pd
8
+ from typing import Any
9
+ from pydantic import BaseModel
10
+ import ray
11
+
12
+ from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
13
+ from nv_ingest_api.internal.enums.common import (
14
+ DocumentTypeEnum,
15
+ ContentTypeEnum,
16
+ AccessLevelEnum,
17
+ TextTypeEnum,
18
+ LanguageEnum,
19
+ )
20
+ from nv_ingest_api.internal.primitives.tracing.tagging import traceable
21
+ from nv_ingest_api.internal.schemas.meta.metadata_schema import ContentHierarchySchema
22
+ from nv_ingest_api.util.converters.type_mappings import doc_type_to_content_type
23
+ from nv_ingest_api.util.exception_handlers.decorators import (
24
+ nv_ingest_node_failure_try_except,
25
+ )
26
+
27
+ # logging.basicConfig(level=logging.DEBUG)
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ @ray.remote
32
+ class MetadataInjectionStage(RayActorStage):
33
+ """
34
+ A Ray actor stage that performs metadata injection on IngestControlMessages.
35
+
36
+ This stage iterates over the rows of the DataFrame payload, checks if metadata
37
+ injection is required, and if so, injects the appropriate metadata.
38
+ """
39
+
40
+ def __init__(self, config: BaseModel) -> None:
41
+ # Call the base initializer to set attributes like self._running.
42
+ super().__init__(config)
43
+ # Additional initialization can be added here if necessary.
44
+ logger.info("MetadataInjectionStage initialized with config: %s", config)
45
+
46
+ @traceable("metadata_injector")
47
+ @nv_ingest_node_failure_try_except(annotation_id="metadata_injector", raise_on_failure=False)
48
+ def on_data(self, message: Any) -> Any:
49
+ """
50
+ Process an incoming IngestControlMessage by injecting metadata into its DataFrame payload.
51
+
52
+ Parameters
53
+ ----------
54
+ message : IngestControlMessage
55
+ The incoming message containing the payload DataFrame.
56
+
57
+ Returns
58
+ -------
59
+ IngestControlMessage
60
+ The message with updated metadata if injection was required.
61
+ """
62
+ df = message.payload()
63
+ update_required = False
64
+ rows = []
65
+ logger.info("Starting metadata injection on DataFrame with %d rows", len(df))
66
+
67
+ for _, row in df.iterrows():
68
+ try:
69
+ # Convert document type to content type using enums.
70
+ content_type = doc_type_to_content_type(DocumentTypeEnum(row["document_type"]))
71
+ # Check if metadata is missing or doesn't contain 'content'
72
+ if (
73
+ "metadata" not in row
74
+ or not isinstance(row["metadata"], dict)
75
+ or "content" not in row["metadata"].keys()
76
+ ):
77
+ update_required = True
78
+
79
+ # Initialize default structures based on MetaDataSchema
80
+ default_source_metadata = {
81
+ "source_id": row.get("source_id"),
82
+ "source_name": row.get("source_name"),
83
+ "source_type": row["document_type"],
84
+ "source_location": "",
85
+ "collection_id": "",
86
+ "date_created": datetime.now().isoformat(),
87
+ "last_modified": datetime.now().isoformat(),
88
+ "summary": "",
89
+ "partition_id": -1,
90
+ "access_level": AccessLevelEnum.UNKNOWN.value,
91
+ }
92
+
93
+ default_content_metadata = {
94
+ "type": content_type.name.lower(),
95
+ "page_number": -1,
96
+ "description": "",
97
+ "hierarchy": ContentHierarchySchema().model_dump(),
98
+ "subtype": "",
99
+ "start_time": -1,
100
+ "end_time": -1,
101
+ }
102
+
103
+ default_audio_metadata = None
104
+ if content_type == ContentTypeEnum.AUDIO:
105
+ default_audio_metadata = {
106
+ "audio_type": row["document_type"],
107
+ "audio_transcript": "",
108
+ }
109
+
110
+ default_image_metadata = None
111
+ if content_type == ContentTypeEnum.IMAGE:
112
+ default_image_metadata = {
113
+ "image_type": row["document_type"],
114
+ "structured_image_type": ContentTypeEnum.NONE.value,
115
+ "caption": "",
116
+ "text": "",
117
+ "image_location": (0, 0, 0, 0),
118
+ "image_location_max_dimensions": (0, 0),
119
+ "uploaded_image_url": "",
120
+ "width": 0,
121
+ "height": 0,
122
+ }
123
+
124
+ default_text_metadata = None
125
+ if content_type == ContentTypeEnum.TEXT:
126
+ default_text_metadata = {
127
+ "text_type": TextTypeEnum.DOCUMENT.value,
128
+ "summary": "",
129
+ "keywords": "",
130
+ "language": LanguageEnum.UNKNOWN.value,
131
+ "text_location": (0, 0, 0, 0),
132
+ "text_location_max_dimensions": (0, 0, 0, 0),
133
+ }
134
+
135
+ row["metadata"] = {
136
+ "content": row["content"],
137
+ "content_metadata": default_content_metadata,
138
+ "error_metadata": None,
139
+ "audio_metadata": default_audio_metadata,
140
+ "image_metadata": default_image_metadata,
141
+ "source_metadata": default_source_metadata,
142
+ "text_metadata": default_text_metadata,
143
+ }
144
+ logger.info(
145
+ f"METADATA_INJECTOR_DEBUG: Rebuilt metadata for source_id='{row.get('source_id', 'N/A')}'. "
146
+ f"Metadata keys: {list(row['metadata'].keys())}."
147
+ f"'content' present: {'content' in row['metadata']}"
148
+ )
149
+ except Exception as inner_e:
150
+ logger.exception("Failed to process row during metadata injection")
151
+ raise inner_e
152
+ rows.append(row)
153
+
154
+ if update_required:
155
+ docs = pd.DataFrame(rows)
156
+ message.payload(docs)
157
+ logger.info("Metadata injection updated payload with %d rows", len(docs))
158
+ else:
159
+ logger.info("No metadata update was necessary during metadata injection")
160
+
161
+ return message
@@ -23,16 +23,13 @@ class RayActorSourceStage(RayActorStage, ABC):
23
23
  super().__init__(config, log_to_stdout=log_to_stdout)
24
24
  self.paused = False
25
25
 
26
+ def on_data(self, IngestControlMessage):
27
+ return NotImplemented("Source stages do not implement on_data().")
28
+
26
29
  @ray.method(num_returns=1)
27
30
  def set_input_queue(self, queue_handle: Any) -> bool:
28
31
  raise NotImplementedError("Source stages do not support an input queue.")
29
32
 
30
- def get_input(self) -> Any:
31
- """
32
- Source stages must implement get_input() to fetch control messages from an external source.
33
- """
34
- pass
35
-
36
33
  @abstractmethod
37
34
  def _read_input(self) -> Any:
38
35
  """
@@ -304,14 +304,6 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
304
304
 
305
305
  return control_message
306
306
 
307
- def on_data(self, control_message: any) -> any:
308
- """
309
- Process the control message.
310
- For this source stage, no additional processing is done, so simply return it.
311
- """
312
- self._logger.debug("on_data: Received control message for processing")
313
- return control_message
314
-
315
307
  # In the processing loop, instead of checking a boolean, we wait on the event.
316
308
  def _processing_loop(self) -> None:
317
309
  """
@@ -336,7 +328,6 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
336
328
  self._active_processing = True
337
329
 
338
330
  self._logger.debug("Control message received; processing data")
339
- updated_cm = self.on_data(control_message)
340
331
 
341
332
  # Block until not paused using the pause event.
342
333
  if self.output_queue is not None:
@@ -349,7 +340,7 @@ class MessageBrokerTaskSourceStage(RayActorSourceStage):
349
340
 
350
341
  while True:
351
342
  try:
352
- self.output_queue.put(updated_cm)
343
+ self.output_queue.put(control_message)
353
344
  self.stats["successful_queue_writes"] += 1
354
345
  break
355
346
  except Exception:
@@ -32,7 +32,7 @@ class TextEmbeddingTransformStage(RayActorStage):
32
32
  """
33
33
 
34
34
  def __init__(self, config: TextEmbeddingSchema) -> None:
35
- super().__init__(config)
35
+ super().__init__(config, log_to_stdout=False)
36
36
  try:
37
37
  self.validated_config = config
38
38
  logger.info("TextEmbeddingTransformStage configuration validated successfully.")
@@ -1,11 +1,14 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
2
+ # All rights reserved.
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
1
5
  import logging
2
6
  from typing import Any
3
7
  import ray
4
8
 
5
- # Assume these imports come from your project:
6
9
  from nv_ingest.framework.orchestration.ray.stages.meta.ray_actor_stage_base import RayActorStage
7
10
  from nv_ingest.framework.util.flow_control import filter_by_task
8
- from nv_ingest_api.internal.primitives.ingest_control_message import remove_task_by_type
11
+ from nv_ingest_api.internal.primitives.ingest_control_message import remove_task_by_type, IngestControlMessage
9
12
  from nv_ingest_api.internal.primitives.tracing.tagging import traceable
10
13
  from nv_ingest_api.internal.schemas.transform.transform_text_splitter_schema import TextSplitterSchema
11
14
  from nv_ingest_api.internal.transform.split_text import transform_text_split_and_tokenize_internal
@@ -72,3 +75,45 @@ class TextSplitterStage(RayActorStage):
72
75
  logger.info("TextSplitterStage.on_data: Finished processing, returning updated message.")
73
76
 
74
77
  return message
78
+
79
+
80
+ def text_splitter_fn(control_message: IngestControlMessage, stage_config: TextSplitterSchema) -> IngestControlMessage:
81
+ """
82
+ Process an incoming IngestControlMessage by splitting and tokenizing its text.
83
+
84
+ Parameters
85
+ ----------
86
+ control_message : IngestControlMessage
87
+ The incoming message containing the payload DataFrame.
88
+
89
+ stage_config : BaseModel
90
+ The stage level configuration object
91
+
92
+ Returns
93
+ -------
94
+ IngestControlMessage
95
+ The updated message with its payload transformed.
96
+ """
97
+
98
+ # Extract the DataFrame payload.
99
+ df_payload = control_message.payload()
100
+ logger.debug("Extracted payload with %d rows.", len(df_payload))
101
+
102
+ # Remove the "split" task to obtain task-specific configuration.
103
+ task_config = remove_task_by_type(control_message, "split")
104
+ logger.debug("Extracted task config: %s", task_config)
105
+
106
+ # Transform the DataFrame (split text and tokenize).
107
+ df_updated = transform_text_split_and_tokenize_internal(
108
+ df_transform_ledger=df_payload,
109
+ task_config=task_config,
110
+ transform_config=stage_config,
111
+ execution_trace_log=None,
112
+ )
113
+ logger.info("TextSplitterStage.on_data: Transformation complete. Updated payload has %d rows.", len(df_updated))
114
+
115
+ # Update the message payload.
116
+ control_message.payload(df_updated)
117
+ logger.info("TextSplitterStage.on_data: Finished processing, returning updated message.")
118
+
119
+ return control_message
@@ -174,9 +174,9 @@ def setup_ingestion_pipeline(pipeline: RayPipeline, ingest_config: Dict[str, Any
174
174
  pipeline.make_edge(image_dedup_stage_id, text_splitter_stage_id, queue_size=ingest_edge_buffer_size)
175
175
 
176
176
  ###### Primitive Transforms ########
177
- pipeline.make_edge(text_splitter_stage_id, embed_extractions_stage_id, queue_size=ingest_edge_buffer_size)
178
- pipeline.make_edge(embed_extractions_stage_id, image_caption_stage_id, queue_size=ingest_edge_buffer_size)
179
- pipeline.make_edge(image_caption_stage_id, image_storage_stage_id, queue_size=ingest_edge_buffer_size)
177
+ pipeline.make_edge(text_splitter_stage_id, image_caption_stage_id, queue_size=ingest_edge_buffer_size)
178
+ pipeline.make_edge(image_caption_stage_id, embed_extractions_stage_id, queue_size=ingest_edge_buffer_size)
179
+ pipeline.make_edge(embed_extractions_stage_id, image_storage_stage_id, queue_size=ingest_edge_buffer_size)
180
180
 
181
181
  ###### Primitive Storage ########
182
182
  pipeline.make_edge(image_storage_stage_id, embedding_storage_stage_id, queue_size=ingest_edge_buffer_size)
@@ -72,7 +72,7 @@ class PipelineCreationSchema(BaseModel):
72
72
 
73
73
  # API keys
74
74
  ngc_api_key: str = os.getenv("NGC_API_KEY", "")
75
- nvidia_build_api_key: str = os.getenv("NVIDIA_BUILD_API_KEY", "")
75
+ nvidia_api_key: str = os.getenv("NVIDIA_API_KEY", "")
76
76
 
77
77
  # Observability settings
78
78
  otel_exporter_otlp_endpoint: str = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "localhost:4317")
@@ -86,9 +86,9 @@ class PipelineCreationSchema(BaseModel):
86
86
 
87
87
  # Vision language model settings
88
88
  vlm_caption_endpoint: str = os.getenv(
89
- "VLM_CAPTION_ENDPOINT", "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-11b-vision-instruct/chat/completions"
89
+ "VLM_CAPTION_ENDPOINT", "https://ai.api.nvidia.com/v1/gr/nvidia/llama-3.1-nemotron-nano-vl-8b-v1/chat/completions"
90
90
  )
91
- vlm_caption_model_name: str = os.getenv("VLM_CAPTION_MODEL_NAME", "meta/llama-3.2-11b-vision-instruct")
91
+ vlm_caption_model_name: str = os.getenv("VLM_CAPTION_MODEL_NAME", "nvidia/llama-3.1-nemotron-nano-vl-8b-v1")
92
92
 
93
93
  # YOLOX image processing settings
94
94
  yolox_graphic_elements_http_endpoint: str = os.getenv(
@@ -331,6 +331,10 @@ def run_pipeline(
331
331
  """
332
332
  if run_in_subprocess:
333
333
  logger.info("Launching pipeline in Python subprocess using multiprocessing.")
334
+ if (ingest_config.ngc_api_key is None or ingest_config.ngc_api_key == "") and (
335
+ ingest_config.nvidia_api_key is None or ingest_config.nvidia_api_key == ""
336
+ ):
337
+ logger.warning("NGC_API_KEY or NVIDIA_API_KEY are not set. NIM Related functions will not work.")
334
338
 
335
339
  ctx = multiprocessing.get_context("fork")
336
340
  process = ctx.Process(
@@ -2,8 +2,6 @@
2
2
  # All rights reserved.
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
- # TODO(Devin)
6
- # flake8: noqa
7
5
  import os
8
6
 
9
7
  import click
@@ -11,6 +9,7 @@ import logging
11
9
 
12
10
  from nv_ingest.framework.orchestration.ray.stages.sinks.default_drain import DefaultDrainSink
13
11
  from nv_ingest.framework.orchestration.ray.stages.telemetry.otel_tracer import OpenTelemetryTracerStage
12
+ from nv_ingest.framework.orchestration.ray.stages.transforms.text_splitter import TextSplitterStage
14
13
  from nv_ingest.framework.schemas.framework_otel_tracer_schema import OpenTelemetryTracerSchema
15
14
  from nv_ingest_api.internal.schemas.extract.extract_infographic_schema import InfographicExtractorSchema
16
15
 
@@ -41,7 +40,6 @@ from nv_ingest.framework.orchestration.ray.stages.storage.image_storage import I
41
40
  from nv_ingest.framework.orchestration.ray.stages.storage.store_embeddings import EmbeddingStorageStage
42
41
  from nv_ingest.framework.orchestration.ray.stages.transforms.image_caption import ImageCaptionTransformStage
43
42
  from nv_ingest.framework.orchestration.ray.stages.transforms.text_embed import TextEmbeddingTransformStage
44
- from nv_ingest.framework.orchestration.ray.stages.transforms.text_splitter import TextSplitterStage
45
43
  from nv_ingest.framework.schemas.framework_metadata_injector_schema import MetadataInjectorSchema
46
44
  from nv_ingest_api.internal.schemas.extract.extract_audio_schema import AudioExtractorSchema
47
45
  from nv_ingest_api.internal.schemas.extract.extract_chart_schema import ChartExtractorSchema
@@ -107,7 +105,7 @@ def get_nim_service(env_var_prefix):
107
105
  "",
108
106
  )
109
107
  auth_token = os.environ.get(
110
- "NVIDIA_BUILD_API_KEY",
108
+ "NVIDIA_API_KEY",
111
109
  "",
112
110
  ) or os.environ.get(
113
111
  "NGC_API_KEY",
@@ -137,7 +135,7 @@ def get_audio_retrieval_service(env_var_prefix):
137
135
  "",
138
136
  )
139
137
  auth_token = os.environ.get(
140
- "NVIDIA_BUILD_API_KEY",
138
+ "NVIDIA_API_KEY",
141
139
  "",
142
140
  ) or os.environ.get(
143
141
  "NGC_API_KEY",
@@ -465,7 +463,7 @@ def add_text_splitter_stage(pipeline, default_cpu_count, stage_name="text_splitt
465
463
 
466
464
  def add_image_caption_stage(pipeline, default_cpu_count, stage_name="image_caption"):
467
465
  auth_token = os.environ.get(
468
- "NVIDIA_BUILD_API_KEY",
466
+ "NVIDIA_API_KEY",
469
467
  "",
470
468
  ) or os.environ.get(
471
469
  "NGC_API_KEY",
@@ -473,13 +471,13 @@ def add_image_caption_stage(pipeline, default_cpu_count, stage_name="image_capti
473
471
  )
474
472
 
475
473
  endpoint_url = os.environ.get("VLM_CAPTION_ENDPOINT", "localhost:5000")
476
- model_name = os.environ.get("VLM_CAPTION_MODEL_NAME", "meta/llama-3.2-11b-vision-instruct")
474
+ model_name = os.environ.get("VLM_CAPTION_MODEL_NAME", "nvidia/llama-3.1-nemotron-nano-vl-8b-v1")
477
475
 
478
476
  config = ImageCaptionExtractionSchema(
479
477
  **{
480
478
  "api_key": auth_token,
481
479
  "endpoint_url": endpoint_url,
482
- "image_caption_model_name": model_name,
480
+ "model_name": model_name,
483
481
  "prompt": "Caption the content of this image:",
484
482
  }
485
483
  )
@@ -497,7 +495,7 @@ def add_image_caption_stage(pipeline, default_cpu_count, stage_name="image_capti
497
495
 
498
496
  def add_text_embedding_stage(pipeline, default_cpu_count, stage_name="text_embedding"):
499
497
  api_key = os.environ.get(
500
- "NVIDIA_BUILD_API_KEY",
498
+ "NVIDIA_API_KEY",
501
499
  "",
502
500
  ) or os.environ.get(
503
501
  "NGC_API_KEY",