nv-ingest 2025.10.4.dev20251004__py3-none-any.whl → 2025.12.10.dev20251210__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nv_ingest/api/__init__.py +6 -0
- nv_ingest/api/main.py +2 -0
- nv_ingest/api/tracing.py +82 -0
- nv_ingest/api/v2/README.md +203 -0
- nv_ingest/api/v2/__init__.py +3 -0
- nv_ingest/api/v2/ingest.py +1300 -0
- nv_ingest/framework/orchestration/process/dependent_services.py +17 -10
- nv_ingest/framework/orchestration/process/strategies.py +6 -2
- nv_ingest/framework/orchestration/process/termination.py +49 -9
- nv_ingest/framework/orchestration/ray/examples/pipeline_test_harness.py +2 -2
- nv_ingest/framework/orchestration/ray/stages/extractors/audio_extractor.py +0 -2
- nv_ingest/framework/orchestration/ray/stages/extractors/ocr_extractor.py +71 -0
- nv_ingest/framework/orchestration/ray/stages/sources/message_broker_task_source.py +41 -8
- nv_ingest/framework/orchestration/ray/stages/storage/image_storage.py +72 -6
- nv_ingest/framework/util/service/impl/ingest/redis_ingest_service.py +215 -11
- nv_ingest/pipeline/config/replica_resolver.py +12 -2
- nv_ingest/pipeline/default_libmode_pipeline_impl.py +32 -18
- nv_ingest/pipeline/default_pipeline_impl.py +75 -33
- {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/METADATA +4 -2
- {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/RECORD +23 -18
- {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/WHEEL +0 -0
- {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest-2025.10.4.dev20251004.dist-info → nv_ingest-2025.12.10.dev20251210.dist-info}/top_level.txt +0 -0
|
@@ -7,9 +7,7 @@ import json
|
|
|
7
7
|
import logging
|
|
8
8
|
import os
|
|
9
9
|
from json import JSONDecodeError
|
|
10
|
-
from typing import Optional, Dict, Any
|
|
11
|
-
|
|
12
|
-
from typing import List
|
|
10
|
+
from typing import Optional, Dict, Any, List
|
|
13
11
|
|
|
14
12
|
import redis
|
|
15
13
|
|
|
@@ -133,6 +131,8 @@ class RedisIngestService(IngestServiceMeta):
|
|
|
133
131
|
self._bulk_vdb_cache_prefix: str = "vdb_bulk_upload_cache:"
|
|
134
132
|
self._cache_prefix: str = "processing_cache:"
|
|
135
133
|
self._state_prefix: str = "job_state:"
|
|
134
|
+
# Bound async-to-thread concurrency slightly below Redis connection pool
|
|
135
|
+
self._async_operation_semaphore: Optional[asyncio.Semaphore] = None
|
|
136
136
|
|
|
137
137
|
self._ingest_client = RedisClient(
|
|
138
138
|
host=self._redis_hostname,
|
|
@@ -151,6 +151,16 @@ class RedisIngestService(IngestServiceMeta):
|
|
|
151
151
|
f"FetchMode: {fetch_mode.name}, ResultTTL: {result_data_ttl_seconds}, StateTTL: {state_ttl_seconds}"
|
|
152
152
|
)
|
|
153
153
|
|
|
154
|
+
def _get_async_semaphore(self) -> asyncio.Semaphore:
|
|
155
|
+
if self._async_operation_semaphore is None:
|
|
156
|
+
semaphore_limit = max(1, self._concurrency_level - 2)
|
|
157
|
+
self._async_operation_semaphore = asyncio.Semaphore(semaphore_limit)
|
|
158
|
+
return self._async_operation_semaphore
|
|
159
|
+
|
|
160
|
+
async def _run_bounded_to_thread(self, func, *args, **kwargs):
|
|
161
|
+
async with self._get_async_semaphore():
|
|
162
|
+
return await asyncio.to_thread(func, *args, **kwargs)
|
|
163
|
+
|
|
154
164
|
async def submit_job(self, job_spec_wrapper: "MessageWrapper", trace_id: str) -> str:
|
|
155
165
|
"""
|
|
156
166
|
Validates, prepares, and submits a job specification to the Redis task queue.
|
|
@@ -208,12 +218,33 @@ class RedisIngestService(IngestServiceMeta):
|
|
|
208
218
|
ttl_for_result: Optional[int] = (
|
|
209
219
|
self._result_data_ttl_seconds if self._fetch_mode == FetchMode.NON_DESTRUCTIVE else None
|
|
210
220
|
)
|
|
221
|
+
# Determine target queue based on optional QoS hint
|
|
222
|
+
queue_hint = None
|
|
223
|
+
try:
|
|
224
|
+
routing_opts = job_spec.get("routing_options") or {}
|
|
225
|
+
tracing_opts = job_spec.get("tracing_options") or {}
|
|
226
|
+
queue_hint = routing_opts.get("queue_hint") or tracing_opts.get("queue_hint")
|
|
227
|
+
except Exception:
|
|
228
|
+
queue_hint = None
|
|
229
|
+
allowed = {"default", "immediate", "micro", "small", "medium", "large"}
|
|
230
|
+
if isinstance(queue_hint, str) and queue_hint in allowed:
|
|
231
|
+
if queue_hint == "default":
|
|
232
|
+
channel_name = self._redis_task_queue
|
|
233
|
+
else:
|
|
234
|
+
channel_name = f"{self._redis_task_queue}_{queue_hint}"
|
|
235
|
+
else:
|
|
236
|
+
channel_name = self._redis_task_queue
|
|
237
|
+
logger.debug(
|
|
238
|
+
f"Submitting job {trace_id} to queue '{channel_name}' (hint={queue_hint}) "
|
|
239
|
+
f"with result TTL: {ttl_for_result}"
|
|
240
|
+
)
|
|
241
|
+
|
|
211
242
|
logger.debug(
|
|
212
243
|
f"Submitting job {trace_id} to queue '{self._redis_task_queue}' with result TTL: {ttl_for_result}"
|
|
213
244
|
)
|
|
214
|
-
await
|
|
245
|
+
await self._run_bounded_to_thread(
|
|
215
246
|
self._ingest_client.submit_message,
|
|
216
|
-
channel_name=
|
|
247
|
+
channel_name=channel_name,
|
|
217
248
|
message=job_spec_json,
|
|
218
249
|
ttl_seconds=ttl_for_result,
|
|
219
250
|
)
|
|
@@ -252,7 +283,7 @@ class RedisIngestService(IngestServiceMeta):
|
|
|
252
283
|
try:
|
|
253
284
|
result_channel: str = f"{job_id}"
|
|
254
285
|
logger.debug(f"Attempting to fetch job result for {job_id} using mode {self._fetch_mode.name}")
|
|
255
|
-
message = await
|
|
286
|
+
message = await self._run_bounded_to_thread(
|
|
256
287
|
self._ingest_client.fetch_message,
|
|
257
288
|
channel_name=result_channel,
|
|
258
289
|
timeout=10,
|
|
@@ -264,7 +295,7 @@ class RedisIngestService(IngestServiceMeta):
|
|
|
264
295
|
logger.warning(f"fetch_message for {job_id} returned None unexpectedly.")
|
|
265
296
|
raise TimeoutError("No data found (unexpected None response).")
|
|
266
297
|
except (TimeoutError, redis.RedisError, ConnectionError, ValueError, RuntimeError) as e:
|
|
267
|
-
logger.
|
|
298
|
+
logger.debug(f"Fetch operation for job {job_id} did not complete: ({type(e).__name__}) {e}")
|
|
268
299
|
raise e
|
|
269
300
|
except Exception as e:
|
|
270
301
|
logger.exception(f"Unexpected error during async fetch_job for {job_id}: {e}")
|
|
@@ -289,7 +320,7 @@ class RedisIngestService(IngestServiceMeta):
|
|
|
289
320
|
ttl_to_set: Optional[int] = self._state_ttl_seconds
|
|
290
321
|
try:
|
|
291
322
|
logger.debug(f"Setting state for {job_id} to {state} with TTL {ttl_to_set}")
|
|
292
|
-
await
|
|
323
|
+
await self._run_bounded_to_thread(
|
|
293
324
|
self._ingest_client.get_client().set,
|
|
294
325
|
state_key,
|
|
295
326
|
state,
|
|
@@ -317,7 +348,10 @@ class RedisIngestService(IngestServiceMeta):
|
|
|
317
348
|
"""
|
|
318
349
|
state_key: str = f"{self._state_prefix}{job_id}"
|
|
319
350
|
try:
|
|
320
|
-
data_bytes: Optional[bytes] = await
|
|
351
|
+
data_bytes: Optional[bytes] = await self._run_bounded_to_thread(
|
|
352
|
+
self._ingest_client.get_client().get,
|
|
353
|
+
state_key,
|
|
354
|
+
)
|
|
321
355
|
if data_bytes:
|
|
322
356
|
state: str = data_bytes.decode("utf-8")
|
|
323
357
|
logger.debug(f"Retrieved state for {job_id}: {state}")
|
|
@@ -350,7 +384,7 @@ class RedisIngestService(IngestServiceMeta):
|
|
|
350
384
|
cache_key: str = f"{self._cache_prefix}{job_id}"
|
|
351
385
|
try:
|
|
352
386
|
data_to_store: str = json.dumps([job.model_dump(mode="json") for job in jobs_data])
|
|
353
|
-
await
|
|
387
|
+
await self._run_bounded_to_thread(
|
|
354
388
|
self._ingest_client.get_client().set,
|
|
355
389
|
cache_key,
|
|
356
390
|
data_to_store,
|
|
@@ -375,7 +409,10 @@ class RedisIngestService(IngestServiceMeta):
|
|
|
375
409
|
"""
|
|
376
410
|
cache_key: str = f"{self._cache_prefix}{job_id}"
|
|
377
411
|
try:
|
|
378
|
-
data_bytes: Optional[bytes] = await
|
|
412
|
+
data_bytes: Optional[bytes] = await self._run_bounded_to_thread(
|
|
413
|
+
self._ingest_client.get_client().get,
|
|
414
|
+
cache_key,
|
|
415
|
+
)
|
|
379
416
|
if data_bytes is None:
|
|
380
417
|
return []
|
|
381
418
|
return [ProcessingJob(**job) for job in json.loads(data_bytes)]
|
|
@@ -393,3 +430,170 @@ class RedisIngestService(IngestServiceMeta):
|
|
|
393
430
|
The current fetch mode.
|
|
394
431
|
"""
|
|
395
432
|
return self._fetch_mode
|
|
433
|
+
|
|
434
|
+
async def set_parent_job_mapping(
|
|
435
|
+
self,
|
|
436
|
+
parent_job_id: str,
|
|
437
|
+
subjob_ids: List[str],
|
|
438
|
+
metadata: Dict[str, Any],
|
|
439
|
+
*,
|
|
440
|
+
subjob_descriptors: Optional[List[Dict[str, Any]]] = None,
|
|
441
|
+
) -> None:
|
|
442
|
+
"""
|
|
443
|
+
Store parent-subjob mapping in Redis for V2 PDF splitting.
|
|
444
|
+
|
|
445
|
+
Parameters
|
|
446
|
+
----------
|
|
447
|
+
parent_job_id : str
|
|
448
|
+
The parent job identifier
|
|
449
|
+
subjob_ids : List[str]
|
|
450
|
+
List of subjob identifiers
|
|
451
|
+
metadata : Dict[str, Any]
|
|
452
|
+
Metadata about the parent job (total_pages, original_source_id, etc.)
|
|
453
|
+
subjob_descriptors : List[Dict[str, Any]], optional
|
|
454
|
+
Detailed descriptors (job_id, chunk_index, start/end pages) for subjobs
|
|
455
|
+
"""
|
|
456
|
+
parent_key = f"parent:{parent_job_id}:subjobs"
|
|
457
|
+
metadata_key = f"parent:{parent_job_id}:metadata"
|
|
458
|
+
|
|
459
|
+
try:
|
|
460
|
+
# Store subjob IDs as a set (only if there are subjobs)
|
|
461
|
+
if subjob_ids:
|
|
462
|
+
await self._run_bounded_to_thread(
|
|
463
|
+
self._ingest_client.get_client().sadd,
|
|
464
|
+
parent_key,
|
|
465
|
+
*subjob_ids,
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
# Store metadata as hash (including original subjob ordering for deterministic fetches)
|
|
469
|
+
metadata_to_store = dict(metadata)
|
|
470
|
+
try:
|
|
471
|
+
metadata_to_store["subjob_order"] = json.dumps(subjob_ids)
|
|
472
|
+
except (TypeError, ValueError):
|
|
473
|
+
logger.warning(
|
|
474
|
+
"Unable to serialize subjob ordering for parent %s; falling back to Redis set ordering",
|
|
475
|
+
parent_job_id,
|
|
476
|
+
)
|
|
477
|
+
metadata_to_store.pop("subjob_order", None)
|
|
478
|
+
|
|
479
|
+
if subjob_descriptors:
|
|
480
|
+
metadata_to_store["subjob_descriptors"] = json.dumps(subjob_descriptors)
|
|
481
|
+
|
|
482
|
+
await self._run_bounded_to_thread(
|
|
483
|
+
self._ingest_client.get_client().hset,
|
|
484
|
+
metadata_key,
|
|
485
|
+
mapping=metadata_to_store,
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
# Set TTL on both keys to match state TTL
|
|
489
|
+
if self._state_ttl_seconds:
|
|
490
|
+
await self._run_bounded_to_thread(
|
|
491
|
+
self._ingest_client.get_client().expire,
|
|
492
|
+
parent_key,
|
|
493
|
+
self._state_ttl_seconds,
|
|
494
|
+
)
|
|
495
|
+
await self._run_bounded_to_thread(
|
|
496
|
+
self._ingest_client.get_client().expire,
|
|
497
|
+
metadata_key,
|
|
498
|
+
self._state_ttl_seconds,
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
logger.debug(f"Stored parent job mapping for {parent_job_id} with {len(subjob_ids)} subjobs")
|
|
502
|
+
|
|
503
|
+
except Exception as err:
|
|
504
|
+
logger.exception(f"Error storing parent job mapping for {parent_job_id}: {err}")
|
|
505
|
+
raise
|
|
506
|
+
|
|
507
|
+
async def get_parent_job_info(self, parent_job_id: str) -> Optional[Dict[str, Any]]:
|
|
508
|
+
"""
|
|
509
|
+
Retrieve parent job information including subjob IDs and metadata.
|
|
510
|
+
|
|
511
|
+
Parameters
|
|
512
|
+
----------
|
|
513
|
+
parent_job_id : str
|
|
514
|
+
The parent job identifier
|
|
515
|
+
|
|
516
|
+
Returns
|
|
517
|
+
-------
|
|
518
|
+
Dict[str, Any] or None
|
|
519
|
+
Dictionary with 'subjob_ids' and 'metadata' keys, or None if not a parent job
|
|
520
|
+
"""
|
|
521
|
+
parent_key = f"parent:{parent_job_id}:subjobs"
|
|
522
|
+
metadata_key = f"parent:{parent_job_id}:metadata"
|
|
523
|
+
|
|
524
|
+
try:
|
|
525
|
+
# Check if this is a parent job (check metadata_key since non-split PDFs may not have parent_key)
|
|
526
|
+
exists = await self._run_bounded_to_thread(
|
|
527
|
+
self._ingest_client.get_client().exists,
|
|
528
|
+
metadata_key, # Check metadata instead of parent_key for non-split PDF support
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
if not exists:
|
|
532
|
+
return None
|
|
533
|
+
|
|
534
|
+
# Get subjob IDs (may be empty for non-split PDFs)
|
|
535
|
+
subjob_ids_bytes = await self._run_bounded_to_thread(
|
|
536
|
+
self._ingest_client.get_client().smembers,
|
|
537
|
+
parent_key,
|
|
538
|
+
)
|
|
539
|
+
subjob_id_set = {id.decode("utf-8") for id in subjob_ids_bytes} if subjob_ids_bytes else set()
|
|
540
|
+
|
|
541
|
+
# Get metadata
|
|
542
|
+
metadata_dict = await self._run_bounded_to_thread(
|
|
543
|
+
self._ingest_client.get_client().hgetall,
|
|
544
|
+
metadata_key,
|
|
545
|
+
)
|
|
546
|
+
metadata = {k.decode("utf-8"): v.decode("utf-8") for k, v in metadata_dict.items()}
|
|
547
|
+
|
|
548
|
+
# Convert numeric strings back to numbers
|
|
549
|
+
if "total_pages" in metadata:
|
|
550
|
+
metadata["total_pages"] = int(metadata["total_pages"])
|
|
551
|
+
if "pages_per_chunk" in metadata:
|
|
552
|
+
try:
|
|
553
|
+
metadata["pages_per_chunk"] = int(metadata["pages_per_chunk"])
|
|
554
|
+
except ValueError:
|
|
555
|
+
metadata.pop("pages_per_chunk", None)
|
|
556
|
+
|
|
557
|
+
ordered_ids: Optional[List[str]] = None
|
|
558
|
+
stored_order = metadata.pop("subjob_order", None)
|
|
559
|
+
if stored_order:
|
|
560
|
+
try:
|
|
561
|
+
candidate_order = json.loads(stored_order)
|
|
562
|
+
if isinstance(candidate_order, list):
|
|
563
|
+
ordered_ids = [sid for sid in candidate_order if sid in subjob_id_set]
|
|
564
|
+
except (ValueError, TypeError) as exc:
|
|
565
|
+
logger.warning(
|
|
566
|
+
"Failed to parse stored subjob order for parent %s: %s",
|
|
567
|
+
parent_job_id,
|
|
568
|
+
exc,
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
if ordered_ids is None:
|
|
572
|
+
ordered_ids = sorted(subjob_id_set)
|
|
573
|
+
else:
|
|
574
|
+
remaining_ids = sorted(subjob_id_set - set(ordered_ids))
|
|
575
|
+
ordered_ids.extend(remaining_ids)
|
|
576
|
+
|
|
577
|
+
subjob_descriptors: Optional[List[Dict[str, Any]]] = None
|
|
578
|
+
stored_descriptors = metadata.pop("subjob_descriptors", None)
|
|
579
|
+
if stored_descriptors:
|
|
580
|
+
try:
|
|
581
|
+
decoded = json.loads(stored_descriptors)
|
|
582
|
+
if isinstance(decoded, list):
|
|
583
|
+
subjob_descriptors = decoded
|
|
584
|
+
except (ValueError, TypeError) as exc:
|
|
585
|
+
logger.warning(
|
|
586
|
+
"Failed to parse stored subjob descriptors for parent %s: %s",
|
|
587
|
+
parent_job_id,
|
|
588
|
+
exc,
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
return {
|
|
592
|
+
"subjob_ids": ordered_ids,
|
|
593
|
+
"metadata": metadata,
|
|
594
|
+
"subjob_descriptors": subjob_descriptors or [],
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
except Exception as err:
|
|
598
|
+
logger.error(f"Error retrieving parent job info for {parent_job_id}: {err}")
|
|
599
|
+
return None
|
|
@@ -11,6 +11,7 @@ consumption stays within the static_memory_threshold.
|
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
13
|
import logging
|
|
14
|
+
import os
|
|
14
15
|
from typing import List
|
|
15
16
|
from copy import deepcopy
|
|
16
17
|
|
|
@@ -102,8 +103,17 @@ def resolve_static_replicas(pipeline_config: PipelineConfigSchema) -> PipelineCo
|
|
|
102
103
|
|
|
103
104
|
logger.info(f"Total baseline memory demand: {total_memory_demand_mb}MB from {len(non_static_stages)} stages")
|
|
104
105
|
|
|
105
|
-
#
|
|
106
|
-
|
|
106
|
+
# Optional bypass of global memory-based scale down via environment variable
|
|
107
|
+
bypass_env = os.getenv("NV_INGEST_BYPASS_STATIC_MEMORY_SCALE_DOWN", "").strip().lower()
|
|
108
|
+
bypass_scale_down = bypass_env in ("1", "true", "yes", "on")
|
|
109
|
+
|
|
110
|
+
# Check if we need to scale down (unless bypassed)
|
|
111
|
+
if bypass_scale_down:
|
|
112
|
+
logger.warning(
|
|
113
|
+
"Bypassing static memory-based replica scale-down due to NV_INGEST_BYPASS_STATIC_MEMORY_SCALE_DOWN"
|
|
114
|
+
)
|
|
115
|
+
scaling_factor = 1.0
|
|
116
|
+
elif total_memory_demand_mb <= available_memory_mb:
|
|
107
117
|
logger.info("Memory demand within threshold, applying baseline replica counts")
|
|
108
118
|
scaling_factor = 1.0
|
|
109
119
|
else:
|
|
@@ -65,14 +65,14 @@ stages:
|
|
|
65
65
|
actor: "nv_ingest.framework.orchestration.ray.stages.extractors.pdf_extractor:PDFExtractorStage"
|
|
66
66
|
config:
|
|
67
67
|
pdfium_config:
|
|
68
|
-
auth_token: $NGC_API_KEY
|
|
68
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
69
69
|
yolox_endpoints: [
|
|
70
70
|
$YOLOX_GRPC_ENDPOINT|"",
|
|
71
71
|
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
|
|
72
72
|
]
|
|
73
73
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
74
74
|
nemoretriever_parse_config:
|
|
75
|
-
auth_token: $NGC_API_KEY
|
|
75
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
76
76
|
nemoretriever_parse_endpoints: [
|
|
77
77
|
$NEMORETRIEVER_PARSE_GRPC_ENDPOINT|"",
|
|
78
78
|
$NEMORETRIEVER_PARSE_HTTP_ENDPOINT|"https://integrate.api.nvidia.com/v1/chat/completions"
|
|
@@ -106,7 +106,7 @@ stages:
|
|
|
106
106
|
]
|
|
107
107
|
function_id: $AUDIO_FUNCTION_ID|"1598d209-5e27-4d3c-8079-4751568b1081"
|
|
108
108
|
audio_infer_protocol: $AUDIO_INFER_PROTOCOL|grpc
|
|
109
|
-
auth_token: $NGC_API_KEY
|
|
109
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
110
110
|
replicas:
|
|
111
111
|
min_replicas: 0
|
|
112
112
|
max_replicas:
|
|
@@ -127,7 +127,14 @@ stages:
|
|
|
127
127
|
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
|
|
128
128
|
]
|
|
129
129
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
130
|
-
auth_token: $NGC_API_KEY
|
|
130
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
131
|
+
pdfium_config:
|
|
132
|
+
yolox_endpoints: [
|
|
133
|
+
$YOLOX_GRPC_ENDPOINT|"",
|
|
134
|
+
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
|
|
135
|
+
]
|
|
136
|
+
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
137
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
131
138
|
replicas:
|
|
132
139
|
min_replicas: 0
|
|
133
140
|
max_replicas:
|
|
@@ -148,7 +155,14 @@ stages:
|
|
|
148
155
|
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
|
|
149
156
|
]
|
|
150
157
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
151
|
-
auth_token: $NGC_API_KEY
|
|
158
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
159
|
+
pdfium_config:
|
|
160
|
+
yolox_endpoints: [
|
|
161
|
+
$YOLOX_GRPC_ENDPOINT|"",
|
|
162
|
+
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
|
|
163
|
+
]
|
|
164
|
+
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
165
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
152
166
|
replicas:
|
|
153
167
|
min_replicas: 0
|
|
154
168
|
max_replicas:
|
|
@@ -169,7 +183,7 @@ stages:
|
|
|
169
183
|
$YOLOX_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-page-elements-v2"
|
|
170
184
|
]
|
|
171
185
|
yolox_infer_protocol: $YOLOX_INFER_PROTOCOL|http
|
|
172
|
-
auth_token: $NGC_API_KEY
|
|
186
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
173
187
|
replicas:
|
|
174
188
|
min_replicas: 0
|
|
175
189
|
max_replicas:
|
|
@@ -200,11 +214,11 @@ stages:
|
|
|
200
214
|
config:
|
|
201
215
|
endpoint_config:
|
|
202
216
|
ocr_endpoints: [
|
|
203
|
-
$OCR_GRPC_ENDPOINT|"
|
|
204
|
-
$OCR_HTTP_ENDPOINT|""
|
|
217
|
+
$OCR_GRPC_ENDPOINT|"",
|
|
218
|
+
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
|
|
205
219
|
]
|
|
206
|
-
ocr_infer_protocol: $OCR_INFER_PROTOCOL|
|
|
207
|
-
auth_token: $NGC_API_KEY
|
|
220
|
+
ocr_infer_protocol: $OCR_INFER_PROTOCOL|"http"
|
|
221
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
208
222
|
replicas:
|
|
209
223
|
min_replicas: 0
|
|
210
224
|
max_replicas:
|
|
@@ -227,10 +241,10 @@ stages:
|
|
|
227
241
|
yolox_infer_protocol: $YOLOX_TABLE_STRUCTURE_INFER_PROTOCOL|"http"
|
|
228
242
|
ocr_endpoints: [
|
|
229
243
|
$OCR_GRPC_ENDPOINT|"",
|
|
230
|
-
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/
|
|
244
|
+
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
|
|
231
245
|
]
|
|
232
246
|
ocr_infer_protocol: $PADDLE_INFER_PROTOCOL|"http"
|
|
233
|
-
auth_token: $NGC_API_KEY
|
|
247
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
234
248
|
replicas:
|
|
235
249
|
min_replicas: 0
|
|
236
250
|
max_replicas:
|
|
@@ -254,10 +268,10 @@ stages:
|
|
|
254
268
|
yolox_infer_protocol: $YOLOX_GRAPHIC_ELEMENTS_INFER_PROTOCOL|"http"
|
|
255
269
|
ocr_endpoints: [
|
|
256
270
|
$OCR_GRPC_ENDPOINT|"",
|
|
257
|
-
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/
|
|
271
|
+
$OCR_HTTP_ENDPOINT|"https://ai.api.nvidia.com/v1/cv/nvidia/nemoretriever-ocr-v1"
|
|
258
272
|
]
|
|
259
273
|
ocr_infer_protocol: $OCR_INFER_PROTOCOL|"http"
|
|
260
|
-
auth_token: $NGC_API_KEY
|
|
274
|
+
auth_token: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
261
275
|
replicas:
|
|
262
276
|
min_replicas: 0
|
|
263
277
|
max_replicas:
|
|
@@ -317,9 +331,9 @@ stages:
|
|
|
317
331
|
phase: 4 # TRANSFORM
|
|
318
332
|
actor: "nv_ingest.framework.orchestration.ray.stages.transforms.image_caption:ImageCaptionTransformStage"
|
|
319
333
|
config:
|
|
320
|
-
api_key: $NGC_API_KEY
|
|
321
|
-
endpoint_url: $VLM_CAPTION_ENDPOINT|"
|
|
322
|
-
model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/
|
|
334
|
+
api_key: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
335
|
+
endpoint_url: $VLM_CAPTION_ENDPOINT|"http://vlm:8000/v1/chat/completions"
|
|
336
|
+
model_name: $VLM_CAPTION_MODEL_NAME|"nvidia/nemotron-nano-12b-v2-vl"
|
|
323
337
|
prompt: "Caption the content of this image:"
|
|
324
338
|
replicas:
|
|
325
339
|
min_replicas: 0
|
|
@@ -335,7 +349,7 @@ stages:
|
|
|
335
349
|
phase: 4 # TRANSFORM
|
|
336
350
|
actor: "nv_ingest.framework.orchestration.ray.stages.transforms.text_embed:TextEmbeddingTransformStage"
|
|
337
351
|
config:
|
|
338
|
-
api_key: $NGC_API_KEY
|
|
352
|
+
api_key: $NGC_API_KEY|$NVIDIA_API_KEY
|
|
339
353
|
embedding_model: $EMBEDDING_NIM_MODEL_NAME|"nvidia/llama-3.2-nv-embedqa-1b-v2"
|
|
340
354
|
embedding_nim_endpoint: $EMBEDDING_NIM_ENDPOINT|"https://integrate.api.nvidia.com/v1"
|
|
341
355
|
replicas:
|