nv-ingest-client 2025.10.15.dev20251015__py3-none-any.whl → 2025.10.16.dev20251016__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-client might be problematic. Click here for more details.
- nv_ingest_client/client/client.py +141 -216
- nv_ingest_client/util/vdb/milvus.py +1 -1
- {nv_ingest_client-2025.10.15.dev20251015.dist-info → nv_ingest_client-2025.10.16.dev20251016.dist-info}/METADATA +1 -1
- {nv_ingest_client-2025.10.15.dev20251015.dist-info → nv_ingest_client-2025.10.16.dev20251016.dist-info}/RECORD +8 -8
- {nv_ingest_client-2025.10.15.dev20251015.dist-info → nv_ingest_client-2025.10.16.dev20251016.dist-info}/WHEEL +0 -0
- {nv_ingest_client-2025.10.15.dev20251015.dist-info → nv_ingest_client-2025.10.16.dev20251016.dist-info}/entry_points.txt +0 -0
- {nv_ingest_client-2025.10.15.dev20251015.dist-info → nv_ingest_client-2025.10.16.dev20251016.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_client-2025.10.15.dev20251015.dist-info → nv_ingest_client-2025.10.16.dev20251016.dist-info}/top_level.txt +0 -0
|
@@ -9,7 +9,6 @@ import json
|
|
|
9
9
|
import logging
|
|
10
10
|
import math
|
|
11
11
|
import os
|
|
12
|
-
import random
|
|
13
12
|
import time
|
|
14
13
|
import threading
|
|
15
14
|
import copy
|
|
@@ -65,15 +64,12 @@ class DataDecodeException(Exception):
|
|
|
65
64
|
|
|
66
65
|
class _ConcurrentProcessor:
|
|
67
66
|
"""
|
|
68
|
-
Manages
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
they become available within the batch using `as_completed`. Retries due
|
|
75
|
-
to job readiness timeouts are handled by adding the job index to the next
|
|
76
|
-
processing batch.
|
|
67
|
+
Manages asynchronous submission and result fetching while keeping a steady
|
|
68
|
+
pool of up to `batch_size` in-flight jobs:
|
|
69
|
+
- Retries (202/TimeoutError) are re-queued immediately.
|
|
70
|
+
- New jobs are submitted as capacity frees up.
|
|
71
|
+
- Fetches are started for jobs added each cycle.
|
|
72
|
+
- We always attempt to keep the executor saturated up to `batch_size`.
|
|
77
73
|
"""
|
|
78
74
|
|
|
79
75
|
def __init__(
|
|
@@ -150,8 +146,6 @@ class _ConcurrentProcessor:
|
|
|
150
146
|
# State variables managed across batch cycles
|
|
151
147
|
self.retry_job_ids: List[str] = []
|
|
152
148
|
self.retry_counts: Dict[str, int] = defaultdict(int)
|
|
153
|
-
self.next_allowed_fetch_time: Dict[str, float] = {}
|
|
154
|
-
self._retry_backoff_cap: float = 5.0
|
|
155
149
|
self.results: List[Dict[str, Any]] = [] # Stores successful results (full dicts)
|
|
156
150
|
self.failures: List[Tuple[str, str]] = [] # (job_index, error_message)
|
|
157
151
|
|
|
@@ -199,8 +193,6 @@ class _ConcurrentProcessor:
|
|
|
199
193
|
# Cleanup retry count if it exists for this job
|
|
200
194
|
if job_index in self.retry_counts:
|
|
201
195
|
del self.retry_counts[job_index]
|
|
202
|
-
if job_index in self.next_allowed_fetch_time:
|
|
203
|
-
del self.next_allowed_fetch_time[job_index]
|
|
204
196
|
|
|
205
197
|
# Attempt to mark state as FAILED locally in the client (best effort)
|
|
206
198
|
try:
|
|
@@ -258,8 +250,6 @@ class _ConcurrentProcessor:
|
|
|
258
250
|
# Cleanup retry count if it exists
|
|
259
251
|
if job_index in self.retry_counts:
|
|
260
252
|
del self.retry_counts[job_index]
|
|
261
|
-
if job_index in self.next_allowed_fetch_time:
|
|
262
|
-
del self.next_allowed_fetch_time[job_index]
|
|
263
253
|
|
|
264
254
|
# Execute completion callback if provided
|
|
265
255
|
if self.completion_callback:
|
|
@@ -305,7 +295,7 @@ class _ConcurrentProcessor:
|
|
|
305
295
|
|
|
306
296
|
def _collect_retry_jobs_for_batch(self) -> List[str]:
|
|
307
297
|
"""
|
|
308
|
-
Collect
|
|
298
|
+
Collect retry jobs for this batch, mirroring handler behavior (no pacing filter).
|
|
309
299
|
|
|
310
300
|
Returns
|
|
311
301
|
-------
|
|
@@ -315,34 +305,17 @@ class _ConcurrentProcessor:
|
|
|
315
305
|
if not self.retry_job_ids:
|
|
316
306
|
return []
|
|
317
307
|
|
|
318
|
-
|
|
319
|
-
eligible: List[str] =
|
|
320
|
-
|
|
321
|
-
for job_id in self.retry_job_ids:
|
|
322
|
-
allowed_at = self.next_allowed_fetch_time.get(job_id, 0.0)
|
|
323
|
-
if allowed_at <= now:
|
|
324
|
-
eligible.append(job_id)
|
|
325
|
-
else:
|
|
326
|
-
remaining.append(job_id)
|
|
327
|
-
|
|
308
|
+
# Take all retries this cycle and clear the list (handler resets per-iteration)
|
|
309
|
+
eligible: List[str] = list(self.retry_job_ids)
|
|
310
|
+
self.retry_job_ids = []
|
|
328
311
|
if eligible and self.verbose:
|
|
329
312
|
logger.debug(f"Adding {len(eligible)} retry jobs to current batch.")
|
|
330
|
-
|
|
331
|
-
# Keep non-eligible retries for a later batch
|
|
332
|
-
self.retry_job_ids = remaining
|
|
333
313
|
return eligible
|
|
334
314
|
|
|
335
315
|
def _schedule_retry(self, job_index: str) -> None:
|
|
336
316
|
"""
|
|
337
|
-
Schedule
|
|
317
|
+
Schedule an immediate retry for a job (no pacing), mirroring handler behavior.
|
|
338
318
|
"""
|
|
339
|
-
now = time.time()
|
|
340
|
-
attempt = max(1, self.retry_counts.get(job_index, 1))
|
|
341
|
-
base = max(0.01, float(self.retry_delay) if self.retry_delay is not None else 1.0)
|
|
342
|
-
delay = min(base * (2 ** (attempt - 1)), self._retry_backoff_cap)
|
|
343
|
-
jitter = random.uniform(0.8, 1.2)
|
|
344
|
-
wait_s = delay * jitter
|
|
345
|
-
self.next_allowed_fetch_time[job_index] = now + wait_s
|
|
346
319
|
if job_index not in self.retry_job_ids:
|
|
347
320
|
self.retry_job_ids.append(job_index)
|
|
348
321
|
|
|
@@ -405,11 +378,6 @@ class _ConcurrentProcessor:
|
|
|
405
378
|
_ = self.client.submit_job_async(current_batch_new_job_indices, self.job_queue_id)
|
|
406
379
|
# Add successfully initiated jobs to the overall batch list
|
|
407
380
|
current_batch_job_indices.extend(current_batch_new_job_indices)
|
|
408
|
-
# Stagger the first fetch attempt slightly to avoid immediate 202s
|
|
409
|
-
now = time.time()
|
|
410
|
-
for job_index in current_batch_new_job_indices:
|
|
411
|
-
allowed_at = self.next_allowed_fetch_time.get(job_index, 0.0)
|
|
412
|
-
self.next_allowed_fetch_time[job_index] = max(allowed_at, now + float(self.initial_fetch_delay))
|
|
413
381
|
# Update count of total initiated jobs
|
|
414
382
|
submitted_new_indices_count += len(current_batch_new_job_indices)
|
|
415
383
|
return current_batch_job_indices, submitted_new_indices_count
|
|
@@ -440,35 +408,18 @@ class _ConcurrentProcessor:
|
|
|
440
408
|
normalized_job_indices : List[str]
|
|
441
409
|
The job indices normalized to those actually returned by the client if a discrepancy occurs.
|
|
442
410
|
"""
|
|
443
|
-
# Filter indices by next_allowed_fetch_time to respect pacing for new jobs
|
|
444
|
-
now = time.time()
|
|
445
|
-
eligible_indices: List[str] = []
|
|
446
|
-
deferred_indices: List[str] = []
|
|
447
|
-
for idx in current_batch_job_indices:
|
|
448
|
-
if self.next_allowed_fetch_time.get(idx, 0.0) <= now:
|
|
449
|
-
eligible_indices.append(idx)
|
|
450
|
-
else:
|
|
451
|
-
deferred_indices.append(idx)
|
|
452
|
-
|
|
453
|
-
# Defer ineligible jobs for later retry window
|
|
454
|
-
for idx in deferred_indices:
|
|
455
|
-
if idx not in self.retry_job_ids:
|
|
456
|
-
self.retry_job_ids.append(idx)
|
|
457
|
-
|
|
458
411
|
if self.verbose:
|
|
459
|
-
logger.debug(
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
batch_futures_dict = (
|
|
465
|
-
self.client.fetch_job_result_async(eligible_indices, data_only=False) if eligible_indices else {}
|
|
412
|
+
logger.debug(f"Calling fetch_job_result_async for {len(current_batch_job_indices)} jobs.")
|
|
413
|
+
batch_futures_dict: Dict[Future, str] = (
|
|
414
|
+
self.client.fetch_job_result_async(current_batch_job_indices, data_only=False, timeout=None)
|
|
415
|
+
if current_batch_job_indices
|
|
416
|
+
else {}
|
|
466
417
|
)
|
|
467
418
|
|
|
468
419
|
# Check for discrepancies where client might not return all futures
|
|
469
|
-
if
|
|
420
|
+
if current_batch_job_indices and (len(batch_futures_dict) != len(current_batch_job_indices)):
|
|
470
421
|
returned_indices = set(batch_futures_dict.values())
|
|
471
|
-
missing_indices = [idx for idx in
|
|
422
|
+
missing_indices = [idx for idx in current_batch_job_indices if idx not in returned_indices]
|
|
472
423
|
logger.error(
|
|
473
424
|
f"fetch_job_result_async discrepancy: Expected {len(current_batch_job_indices)}, got "
|
|
474
425
|
f"{len(batch_futures_dict)}. Missing: {missing_indices}"
|
|
@@ -483,82 +434,10 @@ class _ConcurrentProcessor:
|
|
|
483
434
|
# Continue processing only the futures we received
|
|
484
435
|
normalized_job_indices = list(returned_indices)
|
|
485
436
|
else:
|
|
486
|
-
normalized_job_indices = list(
|
|
437
|
+
normalized_job_indices = list(current_batch_job_indices)
|
|
487
438
|
|
|
488
439
|
return batch_futures_dict, normalized_job_indices
|
|
489
440
|
|
|
490
|
-
def _process_batch_futures(self, batch_futures_dict: Dict[Future, str], batch_timeout: float) -> None:
|
|
491
|
-
"""
|
|
492
|
-
Process the batch futures as they complete, handling success, 202-timeout retries,
|
|
493
|
-
and failures according to existing logic.
|
|
494
|
-
"""
|
|
495
|
-
if not batch_futures_dict:
|
|
496
|
-
if self.verbose:
|
|
497
|
-
logger.debug("No futures returned/available for processing in this batch.")
|
|
498
|
-
return
|
|
499
|
-
|
|
500
|
-
try:
|
|
501
|
-
for future in as_completed(batch_futures_dict.keys(), timeout=batch_timeout):
|
|
502
|
-
job_index = batch_futures_dict[future]
|
|
503
|
-
try:
|
|
504
|
-
# Expect list with one tuple: [(data, index, trace)]
|
|
505
|
-
result_list = future.result()
|
|
506
|
-
if not isinstance(result_list, list) or len(result_list) != 1:
|
|
507
|
-
raise ValueError(f"Expected list length 1, got {len(result_list)}")
|
|
508
|
-
|
|
509
|
-
result_tuple = result_list[0]
|
|
510
|
-
if not isinstance(result_tuple, (tuple, list)) or len(result_tuple) != 3:
|
|
511
|
-
raise ValueError(f"Expected tuple/list length 3, got {len(result_tuple)}")
|
|
512
|
-
|
|
513
|
-
full_response_dict, fetched_job_index, trace_id = result_tuple
|
|
514
|
-
|
|
515
|
-
if fetched_job_index != job_index:
|
|
516
|
-
logger.warning(f"Mismatch: Future for {job_index} returned {fetched_job_index}")
|
|
517
|
-
|
|
518
|
-
self._handle_processing_success(job_index, full_response_dict, trace_id)
|
|
519
|
-
|
|
520
|
-
except TimeoutError:
|
|
521
|
-
# Handle job not ready - check retry policy and schedule paced retry
|
|
522
|
-
self.retry_counts[job_index] += 1
|
|
523
|
-
if self.max_job_retries is None or self.retry_counts[job_index] <= self.max_job_retries:
|
|
524
|
-
if self.verbose:
|
|
525
|
-
logger.info(
|
|
526
|
-
f"Job {job_index} not ready, scheduling paced retry (Attempt "
|
|
527
|
-
f"{self.retry_counts[job_index]}/{self.max_job_retries or 'inf'})."
|
|
528
|
-
)
|
|
529
|
-
self._schedule_retry(job_index)
|
|
530
|
-
else:
|
|
531
|
-
error_msg = f"Exceeded max fetch retries ({self.max_job_retries}) for job {job_index}."
|
|
532
|
-
logger.error(error_msg)
|
|
533
|
-
self._handle_processing_failure(job_index, error_msg)
|
|
534
|
-
|
|
535
|
-
except (ValueError, RuntimeError) as e:
|
|
536
|
-
logger.error(f"Job {job_index} failed processing result: {e}", exc_info=self.verbose)
|
|
537
|
-
self._handle_processing_failure(job_index, f"Error processing result: {e}")
|
|
538
|
-
except Exception as e:
|
|
539
|
-
logger.exception(f"Unhandled error processing future for job {job_index}: {e}")
|
|
540
|
-
self._handle_processing_failure(job_index, f"Unhandled error processing future: {e}")
|
|
541
|
-
|
|
542
|
-
except TimeoutError:
|
|
543
|
-
self._handle_batch_timeout(batch_futures_dict, batch_timeout)
|
|
544
|
-
|
|
545
|
-
def _handle_batch_timeout(self, batch_futures_dict: Dict[Future, str], batch_timeout: float) -> None:
|
|
546
|
-
"""
|
|
547
|
-
Handle a timeout while waiting for batch futures, mirroring the original behavior.
|
|
548
|
-
"""
|
|
549
|
-
logger.error(
|
|
550
|
-
f"Batch processing timed out after {batch_timeout}s waiting for futures. "
|
|
551
|
-
"Some jobs in batch may be lost or incomplete."
|
|
552
|
-
)
|
|
553
|
-
remaining_indices_in_batch = []
|
|
554
|
-
for f, idx in batch_futures_dict.items():
|
|
555
|
-
if not f.done():
|
|
556
|
-
remaining_indices_in_batch.append(idx)
|
|
557
|
-
f.cancel() # Attempt to cancel underlying task
|
|
558
|
-
logger.warning(f"Jobs potentially lost/cancelled due to batch timeout: {remaining_indices_in_batch}")
|
|
559
|
-
for idx in remaining_indices_in_batch:
|
|
560
|
-
self._handle_processing_failure(idx, f"Batch processing timed out after {batch_timeout}s")
|
|
561
|
-
|
|
562
441
|
def run(self) -> Tuple[List[Dict[str, Any]], List[Tuple[str, str]]]:
|
|
563
442
|
"""
|
|
564
443
|
Executes the main processing loop in batches.
|
|
@@ -587,78 +466,117 @@ class _ConcurrentProcessor:
|
|
|
587
466
|
total_jobs = len(self.all_job_indices_list)
|
|
588
467
|
submitted_new_indices_count = 0 # Tracks indices for which submission has been initiated at least once
|
|
589
468
|
|
|
590
|
-
logger.
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
469
|
+
logger.debug(f"Starting batch processing for {total_jobs} jobs with batch size {self.batch_size}.")
|
|
470
|
+
|
|
471
|
+
# Keep up to batch_size jobs in-flight at all times
|
|
472
|
+
inflight_futures: Dict[Future, str] = {}
|
|
473
|
+
|
|
474
|
+
while (submitted_new_indices_count < total_jobs) or self.retry_job_ids or inflight_futures:
|
|
475
|
+
# 1) Top up from retries first
|
|
476
|
+
capacity = max(0, self.batch_size - len(inflight_futures))
|
|
477
|
+
to_fetch: List[str] = []
|
|
478
|
+
if capacity > 0 and self.retry_job_ids:
|
|
479
|
+
take = min(capacity, len(self.retry_job_ids))
|
|
480
|
+
retry_now = self.retry_job_ids[:take]
|
|
481
|
+
self.retry_job_ids = self.retry_job_ids[take:]
|
|
482
|
+
to_fetch.extend(retry_now)
|
|
483
|
+
capacity -= len(retry_now)
|
|
484
|
+
|
|
485
|
+
# 2) Then add new jobs up to capacity
|
|
486
|
+
if capacity > 0 and (submitted_new_indices_count < total_jobs):
|
|
487
|
+
new_count = min(capacity, total_jobs - submitted_new_indices_count)
|
|
488
|
+
new_job_indices = self.all_job_indices_list[
|
|
489
|
+
submitted_new_indices_count : submitted_new_indices_count + new_count
|
|
490
|
+
]
|
|
491
|
+
|
|
492
|
+
if not self.job_queue_id:
|
|
493
|
+
error_msg = "Cannot submit new jobs: job_queue_id is not set."
|
|
494
|
+
logger.error(error_msg)
|
|
495
|
+
for job_index in new_job_indices:
|
|
496
|
+
self._handle_processing_failure(job_index, error_msg, is_submission_failure=True)
|
|
497
|
+
submitted_new_indices_count += len(new_job_indices)
|
|
498
|
+
if self.fail_on_submit_error:
|
|
499
|
+
raise ValueError(error_msg)
|
|
500
|
+
else:
|
|
501
|
+
try:
|
|
502
|
+
_ = self.client.submit_job_async(new_job_indices, self.job_queue_id)
|
|
503
|
+
submitted_new_indices_count += len(new_job_indices)
|
|
504
|
+
to_fetch.extend(new_job_indices)
|
|
505
|
+
except Exception as e:
|
|
506
|
+
error_msg = f"Batch async submission initiation failed for {len(new_job_indices)} new jobs: {e}"
|
|
507
|
+
logger.error(error_msg, exc_info=True)
|
|
508
|
+
for job_index in new_job_indices:
|
|
509
|
+
self._handle_processing_failure(
|
|
510
|
+
job_index, f"Batch submission initiation error: {e}", is_submission_failure=True
|
|
511
|
+
)
|
|
512
|
+
submitted_new_indices_count += len(new_job_indices)
|
|
513
|
+
if self.fail_on_submit_error:
|
|
514
|
+
raise RuntimeError(error_msg) from e
|
|
603
515
|
|
|
604
|
-
# 3)
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
# (errors already logged and failures recorded inside helper)
|
|
614
|
-
if self.fail_on_submit_error:
|
|
615
|
-
raise
|
|
616
|
-
|
|
617
|
-
# 4) If no jobs to fetch this cycle, decide whether to exit or continue
|
|
618
|
-
if not current_batch_job_indices:
|
|
619
|
-
if self.verbose:
|
|
620
|
-
logger.debug("No jobs identified for fetching in this batch iteration.")
|
|
621
|
-
if not self.retry_job_ids and submitted_new_indices_count >= total_jobs:
|
|
622
|
-
logger.debug("Exiting loop: No jobs to fetch and no retries pending.")
|
|
623
|
-
break
|
|
624
|
-
# If retries remain but are not yet eligible, sleep until earliest allowed
|
|
625
|
-
if self.retry_job_ids:
|
|
626
|
-
now = time.time()
|
|
627
|
-
future_times = [self.next_allowed_fetch_time.get(j, now) for j in self.retry_job_ids]
|
|
628
|
-
# Consider only times in the future
|
|
629
|
-
future_times = [t for t in future_times if t > now]
|
|
630
|
-
if future_times:
|
|
631
|
-
sleep_for = min(max(min(future_times) - now, 0.05), 1.0)
|
|
632
|
-
if self.verbose:
|
|
633
|
-
logger.debug(f"Pacing retries: sleeping {sleep_for:.2f}s waiting for next allowed fetch.")
|
|
634
|
-
time.sleep(sleep_for)
|
|
635
|
-
continue
|
|
636
|
-
|
|
637
|
-
# 5) Initiate fetching for the current batch
|
|
638
|
-
try:
|
|
639
|
-
batch_futures_dict, _ = self._initiate_fetch_for_batch(current_batch_job_indices)
|
|
640
|
-
except Exception as fetch_init_err:
|
|
641
|
-
error_msg = (
|
|
642
|
-
f"fetch_job_result_async failed for batch ({len(current_batch_job_indices)} jobs): {fetch_init_err}"
|
|
643
|
-
)
|
|
644
|
-
logger.error(error_msg, exc_info=True)
|
|
645
|
-
logger.warning(
|
|
646
|
-
f"Marking all {len(current_batch_job_indices)} jobs in failed fetch initiation batch as failed."
|
|
647
|
-
)
|
|
648
|
-
for job_index in current_batch_job_indices:
|
|
649
|
-
self._handle_processing_failure(
|
|
650
|
-
job_index, f"Fetch initiation failed for batch: {fetch_init_err}", is_submission_failure=True
|
|
516
|
+
# 3) Launch fetches for the jobs we added to this cycle
|
|
517
|
+
if to_fetch:
|
|
518
|
+
try:
|
|
519
|
+
new_futures = self.client.fetch_job_result_async(to_fetch, data_only=False, timeout=None)
|
|
520
|
+
inflight_futures.update(new_futures)
|
|
521
|
+
except Exception as fetch_init_err:
|
|
522
|
+
logger.error(
|
|
523
|
+
f"fetch_job_result_async failed to start for {len(to_fetch)} jobs: {fetch_init_err}",
|
|
524
|
+
exc_info=True,
|
|
651
525
|
)
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
#
|
|
526
|
+
for job_index in to_fetch:
|
|
527
|
+
self._handle_processing_failure(
|
|
528
|
+
job_index, f"Fetch initiation error: {fetch_init_err}", is_submission_failure=True
|
|
529
|
+
)
|
|
530
|
+
if self.fail_on_submit_error:
|
|
531
|
+
raise RuntimeError(
|
|
532
|
+
f"Stopping due to fetch initiation failure: {fetch_init_err}"
|
|
533
|
+
) from fetch_init_err
|
|
534
|
+
|
|
535
|
+
# 4) If nothing left anywhere, exit
|
|
536
|
+
if not inflight_futures and not self.retry_job_ids and submitted_new_indices_count >= total_jobs:
|
|
537
|
+
logger.debug("Exiting loop: No in-flight jobs, no retries, and all jobs submitted.")
|
|
538
|
+
break
|
|
539
|
+
|
|
540
|
+
# 5) Wait for at least one in-flight future to complete, then process done ones
|
|
541
|
+
if inflight_futures:
|
|
542
|
+
done, _ = concurrent.futures.wait(
|
|
543
|
+
set(inflight_futures.keys()), return_when=concurrent.futures.FIRST_COMPLETED
|
|
544
|
+
)
|
|
545
|
+
for future in done:
|
|
546
|
+
job_index = inflight_futures.pop(future, None)
|
|
547
|
+
if job_index is None:
|
|
548
|
+
continue
|
|
549
|
+
try:
|
|
550
|
+
result_list = future.result()
|
|
551
|
+
if not isinstance(result_list, list) or len(result_list) != 1:
|
|
552
|
+
raise ValueError(f"Expected list length 1, got {len(result_list)}")
|
|
553
|
+
result_tuple = result_list[0]
|
|
554
|
+
if not isinstance(result_tuple, (tuple, list)) or len(result_tuple) != 3:
|
|
555
|
+
raise ValueError(f"Expected tuple/list length 3, got {len(result_tuple)}")
|
|
556
|
+
full_response_dict, fetched_job_index, trace_id = result_tuple
|
|
557
|
+
if fetched_job_index != job_index:
|
|
558
|
+
logger.warning(f"Mismatch: Future for {job_index} returned {fetched_job_index}")
|
|
559
|
+
self._handle_processing_success(job_index, full_response_dict, trace_id)
|
|
560
|
+
except TimeoutError:
|
|
561
|
+
# Not ready -> immediate retry
|
|
562
|
+
self.retry_counts[job_index] += 1
|
|
563
|
+
if self.max_job_retries is None or self.retry_counts[job_index] <= self.max_job_retries:
|
|
564
|
+
if self.verbose:
|
|
565
|
+
logger.info(
|
|
566
|
+
f"Job {job_index} not ready, scheduling retry "
|
|
567
|
+
f"(Attempt {self.retry_counts[job_index]}/{self.max_job_retries or 'inf'})."
|
|
568
|
+
)
|
|
569
|
+
self._schedule_retry(job_index)
|
|
570
|
+
else:
|
|
571
|
+
error_msg = f"Exceeded max fetch retries ({self.max_job_retries}) for job {job_index}."
|
|
572
|
+
logger.error(error_msg)
|
|
573
|
+
self._handle_processing_failure(job_index, error_msg)
|
|
574
|
+
except (ValueError, RuntimeError) as e:
|
|
575
|
+
logger.error(f"Job {job_index} failed processing result: {e}", exc_info=self.verbose)
|
|
576
|
+
self._handle_processing_failure(job_index, f"Error processing result: {e}")
|
|
577
|
+
except Exception as e:
|
|
578
|
+
logger.exception(f"Unhandled error processing future for job {job_index}: {e}")
|
|
579
|
+
self._handle_processing_failure(job_index, f"Unhandled error processing future: {e}")
|
|
662
580
|
|
|
663
581
|
# --- Final Logging ---
|
|
664
582
|
self._log_final_status(total_jobs)
|
|
@@ -719,7 +637,7 @@ class NvIngestClient:
|
|
|
719
637
|
**self._message_client_kwargs,
|
|
720
638
|
)
|
|
721
639
|
|
|
722
|
-
# Initialize the worker pool with the specified size
|
|
640
|
+
# Initialize the worker pool with the specified size (used for both submit and fetch)
|
|
723
641
|
self._worker_pool = ThreadPoolExecutor(max_workers=worker_pool_size)
|
|
724
642
|
|
|
725
643
|
# Telemetry state and controls
|
|
@@ -1215,6 +1133,7 @@ class NvIngestClient:
|
|
|
1215
1133
|
self,
|
|
1216
1134
|
job_ids: Union[str, List[str]],
|
|
1217
1135
|
data_only: bool = False,
|
|
1136
|
+
timeout: Optional[Tuple[int, Optional[float]]] = None,
|
|
1218
1137
|
) -> List[Tuple[Any, str, Optional[str]]]:
|
|
1219
1138
|
"""
|
|
1220
1139
|
Fetch job results via CLI semantics (synchronous list return).
|
|
@@ -1234,7 +1153,8 @@ class NvIngestClient:
|
|
|
1234
1153
|
if isinstance(job_ids, str):
|
|
1235
1154
|
job_ids = [job_ids]
|
|
1236
1155
|
|
|
1237
|
-
|
|
1156
|
+
eff_timeout: Tuple[int, Optional[float]] = timeout if timeout is not None else (100, None)
|
|
1157
|
+
return [self._fetch_job_result(job_id, timeout=eff_timeout, data_only=data_only) for job_id in job_ids]
|
|
1238
1158
|
|
|
1239
1159
|
def _validate_batch_size(self, batch_size: Optional[int]) -> int:
|
|
1240
1160
|
"""
|
|
@@ -1351,8 +1271,8 @@ class NvIngestClient:
|
|
|
1351
1271
|
# Validate and set batch_size
|
|
1352
1272
|
validated_batch_size = self._validate_batch_size(batch_size)
|
|
1353
1273
|
|
|
1354
|
-
# Prepare timeout tuple
|
|
1355
|
-
effective_timeout: Tuple[int,
|
|
1274
|
+
# Prepare timeout tuple to mirror handler behavior: finite connect, unbounded read (long-poll)
|
|
1275
|
+
effective_timeout: Tuple[int, Optional[float]] = (int(timeout), None)
|
|
1356
1276
|
|
|
1357
1277
|
# Delegate to the concurrent processor
|
|
1358
1278
|
processor = _ConcurrentProcessor(
|
|
@@ -1407,7 +1327,12 @@ class NvIngestClient:
|
|
|
1407
1327
|
job_state.trace_id = future.result()[0] # Trace_id from `submit_job` endpoint submission
|
|
1408
1328
|
job_state.future = None
|
|
1409
1329
|
|
|
1410
|
-
def fetch_job_result_async(
|
|
1330
|
+
def fetch_job_result_async(
|
|
1331
|
+
self,
|
|
1332
|
+
job_ids: Union[str, List[str]],
|
|
1333
|
+
data_only: bool = True,
|
|
1334
|
+
timeout: Optional[Tuple[int, Optional[float]]] = None,
|
|
1335
|
+
) -> Dict[Future, str]:
|
|
1411
1336
|
"""
|
|
1412
1337
|
Fetches job results for a list or a single job ID asynchronously and returns a mapping of futures to job IDs.
|
|
1413
1338
|
|
|
@@ -1428,7 +1353,7 @@ class NvIngestClient:
|
|
|
1428
1353
|
future_to_job_id = {}
|
|
1429
1354
|
for job_id in job_ids:
|
|
1430
1355
|
job_state = self._get_and_check_job_state(job_id)
|
|
1431
|
-
future = self._worker_pool.submit(self.fetch_job_result_cli, job_id, data_only)
|
|
1356
|
+
future = self._worker_pool.submit(self.fetch_job_result_cli, job_id, data_only, timeout)
|
|
1432
1357
|
job_state.future = future
|
|
1433
1358
|
future_to_job_id[future] = job_id
|
|
1434
1359
|
|
|
@@ -1352,7 +1352,7 @@ def nvingest_retrieval(
|
|
|
1352
1352
|
nvidia_api_key=nvidia_api_key,
|
|
1353
1353
|
input_type="query",
|
|
1354
1354
|
output_names=["embeddings"],
|
|
1355
|
-
grpc=not (urlparse(embedding_endpoint).scheme
|
|
1355
|
+
grpc=not ("http" in urlparse(embedding_endpoint).scheme),
|
|
1356
1356
|
)
|
|
1357
1357
|
client = client or MilvusClient(milvus_uri, token=f"{username}:{password}")
|
|
1358
1358
|
final_top_k = top_k
|
|
@@ -6,7 +6,7 @@ nv_ingest_client/cli/util/click.py,sha256=YjQU1uF148FU5D3ozC2m1kkfOOJxO1U8U552-T
|
|
|
6
6
|
nv_ingest_client/cli/util/processing.py,sha256=ULGCYQF1RTDQV_b35YM1WQRqIjR2wQRMJWu41DogagE,6259
|
|
7
7
|
nv_ingest_client/cli/util/system.py,sha256=AQLq0DD2Ns8jRanrKu1tmVBKPA9rl-F3-ZsGI6FXLqE,1105
|
|
8
8
|
nv_ingest_client/client/__init__.py,sha256=eEX9l1qmkLH2lAAZU3eP17SCV06ZjjrshHAB_xbboHA,375
|
|
9
|
-
nv_ingest_client/client/client.py,sha256=
|
|
9
|
+
nv_ingest_client/client/client.py,sha256=Ic7FPXGN4o-qk0atcbVcofE0ytgW16-B-KqJtXjY8ws,74461
|
|
10
10
|
nv_ingest_client/client/ingest_job_handler.py,sha256=lMk-yQ0b0aK5ucxfNPVhxofzORIIK0jDzCYTmfdMZFw,17059
|
|
11
11
|
nv_ingest_client/client/interface.py,sha256=7G2M59FayYyxvTwP6YCSeB42l6bMfpBNmd4kadKK6iU,50890
|
|
12
12
|
nv_ingest_client/client/util/processing.py,sha256=Ky7x7QbLn3BlgYwmrmoIc-o1VwmlmrcP9tn7GVTi0t0,2502
|
|
@@ -45,11 +45,11 @@ nv_ingest_client/util/file_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
|
|
|
45
45
|
nv_ingest_client/util/file_processing/extract.py,sha256=uXEATBYZXjxdymGTNQvvzDD2eHgpuq4PdU6HsMl0Lp0,4662
|
|
46
46
|
nv_ingest_client/util/vdb/__init__.py,sha256=ZmoEzeM9LzwwrVvu_DVUnjRNx-x8ahkNeIrSfSKzbAk,513
|
|
47
47
|
nv_ingest_client/util/vdb/adt_vdb.py,sha256=UubzAMSfyrqqpD-OQErpBs25hC2Mw8zGZ4waenGXPOk,515
|
|
48
|
-
nv_ingest_client/util/vdb/milvus.py,sha256=
|
|
48
|
+
nv_ingest_client/util/vdb/milvus.py,sha256=uJUnH9gv8JYKvmI3BbljEsyRhV3l9-jP4F4sKOcfsWE,78702
|
|
49
49
|
nv_ingest_client/util/vdb/opensearch.py,sha256=I4FzF95VWCOkyzhfm-szdfK1Zd9ugUc8AxxpAdEMWGE,7538
|
|
50
|
-
nv_ingest_client-2025.10.
|
|
51
|
-
nv_ingest_client-2025.10.
|
|
52
|
-
nv_ingest_client-2025.10.
|
|
53
|
-
nv_ingest_client-2025.10.
|
|
54
|
-
nv_ingest_client-2025.10.
|
|
55
|
-
nv_ingest_client-2025.10.
|
|
50
|
+
nv_ingest_client-2025.10.16.dev20251016.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
51
|
+
nv_ingest_client-2025.10.16.dev20251016.dist-info/METADATA,sha256=cx9dmXi1Hrit1c0sGa4lyc-2HRHu5Gyd-UlK5qa9jkQ,30627
|
|
52
|
+
nv_ingest_client-2025.10.16.dev20251016.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
53
|
+
nv_ingest_client-2025.10.16.dev20251016.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
|
|
54
|
+
nv_ingest_client-2025.10.16.dev20251016.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
|
|
55
|
+
nv_ingest_client-2025.10.16.dev20251016.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|