nv-ingest 2025.10.4.dev20251004__py3-none-any.whl → 2025.12.10.dev20251210__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. nv_ingest/api/__init__.py +6 -0
  2. nv_ingest/api/main.py +2 -0
  3. nv_ingest/api/tracing.py +82 -0
  4. nv_ingest/api/v2/README.md +203 -0
  5. nv_ingest/api/v2/__init__.py +3 -0
  6. nv_ingest/api/v2/ingest.py +1300 -0
  7. nv_ingest/framework/orchestration/process/dependent_services.py +17 -10
  8. nv_ingest/framework/orchestration/process/strategies.py +6 -2
  9. nv_ingest/framework/orchestration/process/termination.py +49 -9
  10. nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +2 -2
  11. nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -2
  12. nv_ingest/framework/orchestration/ray/stages/extractors/ocr_extractor.py +71 -0
  13. nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +41 -8
  14. nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +72 -6
  15. nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +215 -11
  16. nv_ingest/pipeline/config/replica_resolver.py +12 -2
  17. nv_ingest/pipeline/default_libmode_pipeline_impl.py +32 -18
  18. nv_ingest/pipeline/default_pipeline_impl.py +75 -33
  19. {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/METADATA +4 -2
  20. {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/RECORD +23 -18
  21. {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/WHEEL +0 -0
  22. {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/licenses/LICENSE +0 -0
  23. {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/top_level.txt +0 -0
@@ -7,9 +7,7 @@ import json
7
7
  import logging
8
8
  import os
9
9
  from json import JSONDecodeError
10
- from typing import Optional, Dict, Any
11
-
12
- from typing import List
10
+ from typing import Optional, Dict, Any, List
13
11
 
14
12
  import redis
15
13
 
@@ -133,6 +131,8 @@ class RedisIngestService(IngestServiceMeta):
133
131
  self._bulk_vdb_cache_prefix: str = "vdb_bulk_upload_cache:"
134
132
  self._cache_prefix: str = "processing_cache:"
135
133
  self._state_prefix: str = "job_state:"
134
+ # Bound async-to-thread concurrency slightly below Redis connection pool
135
+ self._async_operation_semaphore: Optional[asyncio.Semaphore] = None
136
136
 
137
137
  self._ingest_client = RedisClient(
138
138
  host=self._redis_hostname,
@@ -151,6 +151,16 @@ class RedisIngestService(IngestServiceMeta):
151
151
  f"FetchMode: {fetch_mode.name}, ResultTTL: {result_data_ttl_seconds}, StateTTL: {state_ttl_seconds}"
152
152
  )
153
153
 
154
+ def _get_async_semaphore(self) -> asyncio.Semaphore:
155
+ if self._async_operation_semaphore is None:
156
+ semaphore_limit = max(1, self._concurrency_level - 2)
157
+ self._async_operation_semaphore = asyncio.Semaphore(semaphore_limit)
158
+ return self._async_operation_semaphore
159
+
160
+ async def _run_bounded_to_thread(self, func, *args, **kwargs):
161
+ async with self._get_async_semaphore():
162
+ return await asyncio.to_thread(func, *args, **kwargs)
163
+
154
164
  async def submit_job(self, job_spec_wrapper: "MessageWrapper", trace_id: str) -> str:
155
165
  """
156
166
  Validates, prepares, and submits a job specification to the Redis task queue.
@@ -208,12 +218,33 @@ class RedisIngestService(IngestServiceMeta):
208
218
  ttl_for_result: Optional[int] = (
209
219
  self._result_data_ttl_seconds if self._fetch_mode == FetchMode.NON_DESTRUCTIVE else None
210
220
  )
221
+ # Determine target queue based on optional QoS hint
222
+ queue_hint = None
223
+ try:
224
+ routing_opts = job_spec.get("routing_options") or {}
225
+ tracing_opts = job_spec.get("tracing_options") or {}
226
+ queue_hint = routing_opts.get("queue_hint") or tracing_opts.get("queue_hint")
227
+ except Exception:
228
+ queue_hint = None
229
+ allowed = {"default", "immediate", "micro", "small", "medium", "large"}
230
+ if isinstance(queue_hint, str) and queue_hint in allowed:
231
+ if queue_hint == "default":
232
+ channel_name = self._redis_task_queue
233
+ else:
234
+ channel_name = f"{self._redis_task_queue}_{queue_hint}"
235
+ else:
236
+ channel_name = self._redis_task_queue
237
+ logger.debug(
238
+ f"Submitting job {trace_id} to queue '{channel_name}' (hint={queue_hint}) "
239
+ f"with result TTL: {ttl_for_result}"
240
+ )
241
+
211
242
  logger.debug(
212
243
  f"Submitting job {trace_id} to queue '{self._redis_task_queue}' with result TTL: {ttl_for_result}"
213
244
  )
214
- await asyncio.to_thread(
245
+ await self._run_bounded_to_thread(
215
246
  self._ingest_client.submit_message,
216
- channel_name=self._redis_task_queue,
247
+ channel_name=channel_name,
217
248
  message=job_spec_json,
218
249
  ttl_seconds=ttl_for_result,
219
250
  )
@@ -252,7 +283,7 @@ class RedisIngestService(IngestServiceMeta):
252
283
  try:
253
284
  result_channel: str = f"{job_id}"
254
285
  logger.debug(f"Attempting to fetch job result for {job_id} using mode {self._fetch_mode.name}")
255
- message = await asyncio.to_thread(
286
+ message = await self._run_bounded_to_thread(
256
287
  self._ingest_client.fetch_message,
257
288
  channel_name=result_channel,
258
289
  timeout=10,
@@ -264,7 +295,7 @@ class RedisIngestService(IngestServiceMeta):
264
295
  logger.warning(f"fetch_message for {job_id} returned None unexpectedly.")
265
296
  raise TimeoutError("No data found (unexpected None response).")
266
297
  except (TimeoutError, redis.RedisError, ConnectionError, ValueError, RuntimeError) as e:
267
- logger.info(f"Fetch operation for job {job_id} did not complete: ({type(e).__name__}) {e}")
298
+ logger.debug(f"Fetch operation for job {job_id} did not complete: ({type(e).__name__}) {e}")
268
299
  raise e
269
300
  except Exception as e:
270
301
  logger.exception(f"Unexpected error during async fetch_job for {job_id}: {e}")
@@ -289,7 +320,7 @@ class RedisIngestService(IngestServiceMeta):
289
320
  ttl_to_set: Optional[int] = self._state_ttl_seconds
290
321
  try:
291
322
  logger.debug(f"Setting state for {job_id} to {state} with TTL {ttl_to_set}")
292
- await asyncio.to_thread(
323
+ await self._run_bounded_to_thread(
293
324
  self._ingest_client.get_client().set,
294
325
  state_key,
295
326
  state,
@@ -317,7 +348,10 @@ class RedisIngestService(IngestServiceMeta):
317
348
  """
318
349
  state_key: str = f"{self._state_prefix}{job_id}"
319
350
  try:
320
- data_bytes: Optional[bytes] = await asyncio.to_thread(self._ingest_client.get_client().get, state_key)
351
+ data_bytes: Optional[bytes] = await self._run_bounded_to_thread(
352
+ self._ingest_client.get_client().get,
353
+ state_key,
354
+ )
321
355
  if data_bytes:
322
356
  state: str = data_bytes.decode("utf-8")
323
357
  logger.debug(f"Retrieved state for {job_id}: {state}")
@@ -350,7 +384,7 @@ class RedisIngestService(IngestServiceMeta):
350
384
  cache_key: str = f"{self._cache_prefix}{job_id}"
351
385
  try:
352
386
  data_to_store: str = json.dumps([job.model_dump(mode="json") for job in jobs_data])
353
- await asyncio.to_thread(
387
+ await self._run_bounded_to_thread(
354
388
  self._ingest_client.get_client().set,
355
389
  cache_key,
356
390
  data_to_store,
@@ -375,7 +409,10 @@ class RedisIngestService(IngestServiceMeta):
375
409
  """
376
410
  cache_key: str = f"{self._cache_prefix}{job_id}"
377
411
  try:
378
- data_bytes: Optional[bytes] = await asyncio.to_thread(self._ingest_client.get_client().get, cache_key)
412
+ data_bytes: Optional[bytes] = await self._run_bounded_to_thread(
413
+ self._ingest_client.get_client().get,
414
+ cache_key,
415
+ )
379
416
  if data_bytes is None:
380
417
  return []
381
418
  return [ProcessingJob(**job) for job in json.loads(data_bytes)]
@@ -393,3 +430,170 @@ class RedisIngestService(IngestServiceMeta):
393
430
  The current fetch mode.
394
431
  """
395
432
  return self._fetch_mode
433
+
434
+ async def set_parent_job_mapping(
435
+ self,
436
+ parent_job_id: str,
437
+ subjob_ids: List[str],
438
+ metadata: Dict[str, Any],
439
+ *,
440
+ subjob_descriptors: Optional[List[Dict[str, Any]]] = None,
441
+ ) -> None:
442
+ """
443
+ Store parent-subjob mapping in Redis for V2 PDF splitting.
444
+
445
+ Parameters
446
+ ----------
447
+ parent_job_id : str
448
+ The parent job identifier
449
+ subjob_ids : List[str]
450
+ List of subjob identifiers
451
+ metadata : Dict[str, Any]
452
+ Metadata about the parent job (total_pages, original_source_id, etc.)
453
+ subjob_descriptors : List[Dict[str, Any]], optional
454
+ Detailed descriptors (job_id, chunk_index, start/end pages) for subjobs
455
+ """
456
+ parent_key = f"parent:{parent_job_id}:subjobs"
457
+ metadata_key = f"parent:{parent_job_id}:metadata"
458
+
459
+ try:
460
+ # Store subjob IDs as a set (only if there are subjobs)
461
+ if subjob_ids:
462
+ await self._run_bounded_to_thread(
463
+ self._ingest_client.get_client().sadd,
464
+ parent_key,
465
+ *subjob_ids,
466
+ )
467
+
468
+ # Store metadata as hash (including original subjob ordering for deterministic fetches)
469
+ metadata_to_store = dict(metadata)
470
+ try:
471
+ metadata_to_store["subjob_order"] = json.dumps(subjob_ids)
472
+ except (TypeError, ValueError):
473
+ logger.warning(
474
+ "Unable to serialize subjob ordering for parent %s; falling back to Redis set ordering",
475
+ parent_job_id,
476
+ )
477
+ metadata_to_store.pop("subjob_order", None)
478
+
479
+ if subjob_descriptors:
480
+ metadata_to_store["subjob_descriptors"] = json.dumps(subjob_descriptors)
481
+
482
+ await self._run_bounded_to_thread(
483
+ self._ingest_client.get_client().hset,
484
+ metadata_key,
485
+ mapping=metadata_to_store,
486
+ )
487
+
488
+ # Set TTL on both keys to match state TTL
489
+ if self._state_ttl_seconds:
490
+ await self._run_bounded_to_thread(
491
+ self._ingest_client.get_client().expire,
492
+ parent_key,
493
+ self._state_ttl_seconds,
494
+ )
495
+ await self._run_bounded_to_thread(
496
+ self._ingest_client.get_client().expire,
497
+ metadata_key,
498
+ self._state_ttl_seconds,
499
+ )
500
+
501
+ logger.debug(f"Stored parent job mapping for {parent_job_id} with {len(subjob_ids)} subjobs")
502
+
503
+ except Exception as err:
504
+ logger.exception(f"Error storing parent job mapping for {parent_job_id}: {err}")
505
+ raise
506
+
507
+ async def get_parent_job_info(self, parent_job_id: str) -> Optional[Dict[str, Any]]:
508
+ """
509
+ Retrieve parent job information including subjob IDs and metadata.
510
+
511
+ Parameters
512
+ ----------
513
+ parent_job_id : str
514
+ The parent job identifier
515
+
516
+ Returns
517
+ -------
518
+ Dict[str, Any] or None
519
+ Dictionary with 'subjob_ids' and 'metadata' keys, or None if not a parent job
520
+ """
521
+ parent_key = f"parent:{parent_job_id}:subjobs"
522
+ metadata_key = f"parent:{parent_job_id}:metadata"
523
+
524
+ try:
525
+ # Check if this is a parent job (check metadata_key since non-split PDFs may not have parent_key)
526
+ exists = await self._run_bounded_to_thread(
527
+ self._ingest_client.get_client().exists,
528
+ metadata_key, # Check metadata instead of parent_key for non-split PDF support
529
+ )
530
+
531
+ if not exists:
532
+ return None
533
+
534
+ # Get subjob IDs (may be empty for non-split PDFs)
535
+ subjob_ids_bytes = await self._run_bounded_to_thread(
536
+ self._ingest_client.get_client().smembers,
537
+ parent_key,
538
+ )
539
+ subjob_id_set = {id.decode("utf-8") for id in subjob_ids_bytes} if subjob_ids_bytes else set()
540
+
541
+ # Get metadata
542
+ metadata_dict = await self._run_bounded_to_thread(
543
+ self._ingest_client.get_client().hgetall,
544
+ metadata_key,
545
+ )
546
+ metadata = {k.decode("utf-8"): v.decode("utf-8") for k, v in metadata_dict.items()}
547
+
548
+ # Convert numeric strings back to numbers
549
+ if "total_pages" in metadata:
550
+ metadata["total_pages"] = int(metadata["total_pages"])
551
+ if "pages_per_chunk" in metadata:
552
+ try:
553
+ metadata["pages_per_chunk"] = int(metadata["pages_per_chunk"])
554
+ except ValueError:
555
+ metadata.pop("pages_per_chunk", None)
556
+
557
+ ordered_ids: Optional[List[str]] = None
558
+ stored_order = metadata.pop("subjob_order", None)
559
+ if stored_order:
560
+ try:
561
+ candidate_order = json.loads(stored_order)
562
+ if isinstance(candidate_order, list):
563
+ ordered_ids = [sid for sid in candidate_order if sid in subjob_id_set]
564
+ except (ValueError, TypeError) as exc:
565
+ logger.warning(
566
+ "Failed to parse stored subjob order for parent %s: %s",
567
+ parent_job_id,
568
+ exc,
569
+ )
570
+
571
+ if ordered_ids is None:
572
+ ordered_ids = sorted(subjob_id_set)
573
+ else:
574
+ remaining_ids = sorted(subjob_id_set - set(ordered_ids))
575
+ ordered_ids.extend(remaining_ids)
576
+
577
+ subjob_descriptors: Optional[List[Dict[str, Any]]] = None
578
+ stored_descriptors = metadata.pop("subjob_descriptors", None)
579
+ if stored_descriptors:
580
+ try:
581
+ decoded = json.loads(stored_descriptors)
582
+ if isinstance(decoded, list):
583
+ subjob_descriptors = decoded
584
+ except (ValueError, TypeError) as exc:
585
+ logger.warning(
586
+ "Failed to parse stored subjob descriptors for parent %s: %s",
587
+ parent_job_id,
588
+ exc,
589
+ )
590
+
591
+ return {
592
+ "subjob_ids": ordered_ids,
593
+ "metadata": metadata,
594
+ "subjob_descriptors": subjob_descriptors or [],
595
+ }
596
+
597
+ except Exception as err:
598
+ logger.error(f"Error retrieving parent job info for {parent_job_id}: {err}")
599
+ return None
@@ -11,6 +11,7 @@ consumption stays within the static_memory_threshold.
11
11
  """
12
12
 
13
13
  import logging
14
+ import os
14
15
  from typing import List
15
16
  from copy import deepcopy
16
17
 
@@ -102,8 +103,17 @@ def resolve_static_replicas(pipeline_config: PipelineConfigSchema) -> PipelineCo
102
103
 
103
104
  logger.info(f"Total baseline memory demand: {total_memory_demand_mb}MB from {len(non_static_stages)} stages")
104
105
 
105
- # Check if we need to scale down
106
- if total_memory_demand_mb <= available_memory_mb:
106
+ # Optional bypass of global memory-based scale down via environment variable
107
+ bypass_env = os.getenv("NV_INGEST_BYPASS_STATIC_MEMORY_SCALE_DOWN", "").strip().lower()
108
+ bypass_scale_down = bypass_env in ("1", "true", "yes", "on")
109
+
110
+ # Check if we need to scale down (unless bypassed)
111
+ if bypass_scale_down:
112
+ logger.warning(
113
+ "Bypassing static memory-based replica scale-down due to NV_INGEST_BYPASS_STATIC_MEMORY_SCALE_DOWN"
114
+ )
115
+ scaling_factor = 1.0
116
+ elif total_memory_demand_mb <= available_memory_mb:
107
117
  logger.info("Memory demand within threshold, applying baseline replica counts")
108
118
  scaling_factor = 1.0
109
119
  else:
@@ -65,14 +65,14 @@ stages:
65
65
  actor: "nv_ingest.framework.orchestration.ray.stages.extractors.pdf_extractor:PDFExtractorStage"
66
66
  config:
67
67
  pdfium_config:
68
- auth_token: $NGC_API_KEY|""
68
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
69
69
  yolox_endpoints: [
70
70
  $YOLOX_GRPC_ENDPOINT|"",
71
71
  $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
72
72
  ]
73
73
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
74
74
  nemoretriever_parse_config:
75
- auth_token: $NGC_API_KEY|""
75
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
76
76
  nemoretriever_parse_endpoints: [
77
77
  $NEMORETRIEVER_PARSE_GRPC_ENDPOINT|"",
78
78
  $NEMORETRIEVER_PARSE_HTTP_ENDPOINT|"https://integrate.api.nvidia.com/v1/chat/completions"
@@ -106,7 +106,7 @@ stages:
106
106
  ]
107
107
  function_id: $AUDIO_FUNCTION_ID|"1598d209-5e27-4d3c-8079-4751568b1081"
108
108
  audio_infer_protocol: $AUDIO_INFER_PROTOCOL|grpc
109
- auth_token: $NGC_API_KEY|""
109
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
110
110
  replicas:
111
111
  min_replicas: 0
112
112
  max_replicas:
@@ -127,7 +127,14 @@ stages:
127
127
  $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
128
128
  ]
129
129
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
130
- auth_token: $NGC_API_KEY|""
130
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
131
+ pdfium_config:
132
+ yolox_endpoints: [
133
+ $YOLOX_GRPC_ENDPOINT|"",
134
+ $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
135
+ ]
136
+ yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
137
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
131
138
  replicas:
132
139
  min_replicas: 0
133
140
  max_replicas:
@@ -148,7 +155,14 @@ stages:
148
155
  $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
149
156
  ]
150
157
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
151
- auth_token: $NGC_API_KEY|""
158
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
159
+ pdfium_config:
160
+ yolox_endpoints: [
161
+ $YOLOX_GRPC_ENDPOINT|"",
162
+ $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
163
+ ]
164
+ yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
165
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
152
166
  replicas:
153
167
  min_replicas: 0
154
168
  max_replicas:
@@ -169,7 +183,7 @@ stages:
169
183
  $YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
170
184
  ]
171
185
  yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
172
- auth_token: $NGC_API_KEY|""
186
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
173
187
  replicas:
174
188
  min_replicas: 0
175
189
  max_replicas:
@@ -200,11 +214,11 @@ stages:
200
214
  config:
201
215
  endpoint_config:
202
216
  ocr_endpoints: [
203
- $OCR_GRPC_ENDPOINT|"grpc.nvcf.nvidia.com:443",
204
- $OCR_HTTP_ENDPOINT|""
217
+ $OCR_GRPC_ENDPOINT|"",
218
+ $OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
205
219
  ]
206
- ocr_infer_protocol: $OCR_INFER_PROTOCOL|grpc
207
- auth_token: $NGC_API_KEY|""
220
+ ocr_infer_protocol: $OCR_INFER_PROTOCOL|"http"
221
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
208
222
  replicas:
209
223
  min_replicas: 0
210
224
  max_replicas:
@@ -227,10 +241,10 @@ stages:
227
241
  yolox_infer_protocol: $YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL|"http"
228
242
  ocr_endpoints: [
229
243
  $OCR_GRPC_ENDPOINT|"",
230
- $OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/baidu/paddleocr"
244
+ $OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
231
245
  ]
232
246
  ocr_infer_protocol: $PADDLE_INFER_PROTOCOL|"http"
233
- auth_token: $NGC_API_KEY|""
247
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
234
248
  replicas:
235
249
  min_replicas: 0
236
250
  max_replicas:
@@ -254,10 +268,10 @@ stages:
254
268
  yolox_infer_protocol: $YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL|"http"
255
269
  ocr_endpoints: [
256
270
  $OCR_GRPC_ENDPOINT|"",
257
- $OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/baidu/paddleocr"
271
+ $OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
258
272
  ]
259
273
  ocr_infer_protocol: $OCR_INFER_PROTOCOL|"http"
260
- auth_token: $NGC_API_KEY|""
274
+ auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
261
275
  replicas:
262
276
  min_replicas: 0
263
277
  max_replicas:
@@ -317,9 +331,9 @@ stages:
317
331
  phase: 4 # TRANSFORM
318
332
  actor: "nv_ingest.framework.orchestration.ray.stages.transforms.image_caption:ImageCaptionTransformStage"
319
333
  config:
320
- api_key: $NGC_API_KEY|""
321
- endpoint_url: $VLM_CAPTION_ENDPOINT|"https://integrate.api.nvidia.com/v1/chat/completions"
322
- model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/llama-3.1-nemotron-nano-vl-8b-v1"
334
+ api_key: $NGC_API_KEY|$NVIDIA_API_KEY
335
+ endpoint_url: $VLM_CAPTION_ENDPOINT|"http://vlm:8000/v1/chat/completions"
336
+ model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/nemotron-nano-12b-v2-vl"
323
337
  prompt: "Caption the content of this image:"
324
338
  replicas:
325
339
  min_replicas: 0
@@ -335,7 +349,7 @@ stages:
335
349
  phase: 4 # TRANSFORM
336
350
  actor: "nv_ingest.framework.orchestration.ray.stages.transforms.text_embed:TextEmbeddingTransformStage"
337
351
  config:
338
- api_key: $NGC_API_KEY|""
352
+ api_key: $NGC_API_KEY|$NVIDIA_API_KEY
339
353
  embedding_model: $EMBEDDING_NIM_MODEL_NAME|"nvidia/llama-3.2-nv-embedqa-1b-v2"
340
354
  embedding_nim_endpoint: $EMBEDDING_NIM_ENDPOINT|"https://integrate.api.nvidia.com/v1"
341
355
  replicas: