nv-ingest-client 2025.10.14.dev20251014__py3-none-any.whl → 2025.10.16.dev20251016__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-client might be problematic. Click here for more details.

@@ -9,7 +9,6 @@ import json
9
9
  import logging
10
10
  import math
11
11
  import os
12
- import random
13
12
  import time
14
13
  import threading
15
14
  import copy
@@ -36,7 +35,11 @@ from nv_ingest_client.primitives.tasks import TaskType
36
35
  from nv_ingest_client.primitives.tasks import is_valid_task_type
37
36
  from nv_ingest_client.primitives.tasks import task_factory
38
37
  from nv_ingest_client.util.processing import handle_future_result, IngestJobFailure
39
- from nv_ingest_client.util.util import create_job_specs_for_batch, check_ingest_result
38
+ from nv_ingest_client.util.util import (
39
+ create_job_specs_for_batch,
40
+ check_ingest_result,
41
+ apply_pdf_split_config_to_job_specs,
42
+ )
40
43
 
41
44
  logger = logging.getLogger(__name__)
42
45
 
@@ -61,15 +64,12 @@ class DataDecodeException(Exception):
61
64
 
62
65
  class _ConcurrentProcessor:
63
66
  """
64
- Manages the asynchronous submission and result fetching of jobs using a
65
- client's public methods, mirroring the batching structure of the CLI path.
66
-
67
- This processor takes a list of pre-created job indices, submits them in
68
- batches via the client's `submit_job_async`, and then fetches results
69
- for each batch using `fetch_job_result_async`. It processes results as
70
- they become available within the batch using `as_completed`. Retries due
71
- to job readiness timeouts are handled by adding the job index to the next
72
- processing batch.
67
+ Manages asynchronous submission and result fetching while keeping a steady
68
+ pool of up to `batch_size` in-flight jobs:
69
+ - Retries (202/TimeoutError) are re-queued immediately.
70
+ - New jobs are submitted as capacity frees up.
71
+ - Fetches are started for jobs added each cycle.
72
+ - We always attempt to keep the executor saturated up to `batch_size`.
73
73
  """
74
74
 
75
75
  def __init__(
@@ -146,8 +146,6 @@ class _ConcurrentProcessor:
146
146
  # State variables managed across batch cycles
147
147
  self.retry_job_ids: List[str] = []
148
148
  self.retry_counts: Dict[str, int] = defaultdict(int)
149
- self.next_allowed_fetch_time: Dict[str, float] = {}
150
- self._retry_backoff_cap: float = 5.0
151
149
  self.results: List[Dict[str, Any]] = [] # Stores successful results (full dicts)
152
150
  self.failures: List[Tuple[str, str]] = [] # (job_index, error_message)
153
151
 
@@ -195,8 +193,6 @@ class _ConcurrentProcessor:
195
193
  # Cleanup retry count if it exists for this job
196
194
  if job_index in self.retry_counts:
197
195
  del self.retry_counts[job_index]
198
- if job_index in self.next_allowed_fetch_time:
199
- del self.next_allowed_fetch_time[job_index]
200
196
 
201
197
  # Attempt to mark state as FAILED locally in the client (best effort)
202
198
  try:
@@ -254,8 +250,6 @@ class _ConcurrentProcessor:
254
250
  # Cleanup retry count if it exists
255
251
  if job_index in self.retry_counts:
256
252
  del self.retry_counts[job_index]
257
- if job_index in self.next_allowed_fetch_time:
258
- del self.next_allowed_fetch_time[job_index]
259
253
 
260
254
  # Execute completion callback if provided
261
255
  if self.completion_callback:
@@ -301,7 +295,7 @@ class _ConcurrentProcessor:
301
295
 
302
296
  def _collect_retry_jobs_for_batch(self) -> List[str]:
303
297
  """
304
- Collect eligible retry jobs for this batch based on per-job next-allowed time.
298
+ Collect retry jobs for this batch, mirroring handler behavior (no pacing filter).
305
299
 
306
300
  Returns
307
301
  -------
@@ -311,34 +305,17 @@ class _ConcurrentProcessor:
311
305
  if not self.retry_job_ids:
312
306
  return []
313
307
 
314
- now = time.time()
315
- eligible: List[str] = []
316
- remaining: List[str] = []
317
- for job_id in self.retry_job_ids:
318
- allowed_at = self.next_allowed_fetch_time.get(job_id, 0.0)
319
- if allowed_at <= now:
320
- eligible.append(job_id)
321
- else:
322
- remaining.append(job_id)
323
-
308
+ # Take all retries this cycle and clear the list (handler resets per-iteration)
309
+ eligible: List[str] = list(self.retry_job_ids)
310
+ self.retry_job_ids = []
324
311
  if eligible and self.verbose:
325
312
  logger.debug(f"Adding {len(eligible)} retry jobs to current batch.")
326
-
327
- # Keep non-eligible retries for a later batch
328
- self.retry_job_ids = remaining
329
313
  return eligible
330
314
 
331
315
  def _schedule_retry(self, job_index: str) -> None:
332
316
  """
333
- Schedule a paced retry for a job using exponential backoff with jitter.
317
+ Schedule an immediate retry for a job (no pacing), mirroring handler behavior.
334
318
  """
335
- now = time.time()
336
- attempt = max(1, self.retry_counts.get(job_index, 1))
337
- base = max(0.01, float(self.retry_delay) if self.retry_delay is not None else 1.0)
338
- delay = min(base * (2 ** (attempt - 1)), self._retry_backoff_cap)
339
- jitter = random.uniform(0.8, 1.2)
340
- wait_s = delay * jitter
341
- self.next_allowed_fetch_time[job_index] = now + wait_s
342
319
  if job_index not in self.retry_job_ids:
343
320
  self.retry_job_ids.append(job_index)
344
321
 
@@ -401,11 +378,6 @@ class _ConcurrentProcessor:
401
378
  _ = self.client.submit_job_async(current_batch_new_job_indices, self.job_queue_id)
402
379
  # Add successfully initiated jobs to the overall batch list
403
380
  current_batch_job_indices.extend(current_batch_new_job_indices)
404
- # Stagger the first fetch attempt slightly to avoid immediate 202s
405
- now = time.time()
406
- for job_index in current_batch_new_job_indices:
407
- allowed_at = self.next_allowed_fetch_time.get(job_index, 0.0)
408
- self.next_allowed_fetch_time[job_index] = max(allowed_at, now + float(self.initial_fetch_delay))
409
381
  # Update count of total initiated jobs
410
382
  submitted_new_indices_count += len(current_batch_new_job_indices)
411
383
  return current_batch_job_indices, submitted_new_indices_count
@@ -436,35 +408,18 @@ class _ConcurrentProcessor:
436
408
  normalized_job_indices : List[str]
437
409
  The job indices normalized to those actually returned by the client if a discrepancy occurs.
438
410
  """
439
- # Filter indices by next_allowed_fetch_time to respect pacing for new jobs
440
- now = time.time()
441
- eligible_indices: List[str] = []
442
- deferred_indices: List[str] = []
443
- for idx in current_batch_job_indices:
444
- if self.next_allowed_fetch_time.get(idx, 0.0) <= now:
445
- eligible_indices.append(idx)
446
- else:
447
- deferred_indices.append(idx)
448
-
449
- # Defer ineligible jobs for later retry window
450
- for idx in deferred_indices:
451
- if idx not in self.retry_job_ids:
452
- self.retry_job_ids.append(idx)
453
-
454
411
  if self.verbose:
455
- logger.debug(
456
- f"Calling fetch_job_result_async for {len(eligible_indices)} eligible jobs "
457
- f"(deferred {len(deferred_indices)})."
458
- )
459
- # Use data_only=False to get full response for callback/results
460
- batch_futures_dict = (
461
- self.client.fetch_job_result_async(eligible_indices, data_only=False) if eligible_indices else {}
412
+ logger.debug(f"Calling fetch_job_result_async for {len(current_batch_job_indices)} jobs.")
413
+ batch_futures_dict: Dict[Future, str] = (
414
+ self.client.fetch_job_result_async(current_batch_job_indices, data_only=False, timeout=None)
415
+ if current_batch_job_indices
416
+ else {}
462
417
  )
463
418
 
464
419
  # Check for discrepancies where client might not return all futures
465
- if eligible_indices and (len(batch_futures_dict) != len(eligible_indices)):
420
+ if current_batch_job_indices and (len(batch_futures_dict) != len(current_batch_job_indices)):
466
421
  returned_indices = set(batch_futures_dict.values())
467
- missing_indices = [idx for idx in eligible_indices if idx not in returned_indices]
422
+ missing_indices = [idx for idx in current_batch_job_indices if idx not in returned_indices]
468
423
  logger.error(
469
424
  f"fetch_job_result_async discrepancy: Expected {len(current_batch_job_indices)}, got "
470
425
  f"{len(batch_futures_dict)}. Missing: {missing_indices}"
@@ -479,82 +434,10 @@ class _ConcurrentProcessor:
479
434
  # Continue processing only the futures we received
480
435
  normalized_job_indices = list(returned_indices)
481
436
  else:
482
- normalized_job_indices = list(eligible_indices)
437
+ normalized_job_indices = list(current_batch_job_indices)
483
438
 
484
439
  return batch_futures_dict, normalized_job_indices
485
440
 
486
- def _process_batch_futures(self, batch_futures_dict: Dict[Future, str], batch_timeout: float) -> None:
487
- """
488
- Process the batch futures as they complete, handling success, 202-timeout retries,
489
- and failures according to existing logic.
490
- """
491
- if not batch_futures_dict:
492
- if self.verbose:
493
- logger.debug("No futures returned/available for processing in this batch.")
494
- return
495
-
496
- try:
497
- for future in as_completed(batch_futures_dict.keys(), timeout=batch_timeout):
498
- job_index = batch_futures_dict[future]
499
- try:
500
- # Expect list with one tuple: [(data, index, trace)]
501
- result_list = future.result()
502
- if not isinstance(result_list, list) or len(result_list) != 1:
503
- raise ValueError(f"Expected list length 1, got {len(result_list)}")
504
-
505
- result_tuple = result_list[0]
506
- if not isinstance(result_tuple, (tuple, list)) or len(result_tuple) != 3:
507
- raise ValueError(f"Expected tuple/list length 3, got {len(result_tuple)}")
508
-
509
- full_response_dict, fetched_job_index, trace_id = result_tuple
510
-
511
- if fetched_job_index != job_index:
512
- logger.warning(f"Mismatch: Future for {job_index} returned {fetched_job_index}")
513
-
514
- self._handle_processing_success(job_index, full_response_dict, trace_id)
515
-
516
- except TimeoutError:
517
- # Handle job not ready - check retry policy and schedule paced retry
518
- self.retry_counts[job_index] += 1
519
- if self.max_job_retries is None or self.retry_counts[job_index] <= self.max_job_retries:
520
- if self.verbose:
521
- logger.info(
522
- f"Job {job_index} not ready, scheduling paced retry (Attempt "
523
- f"{self.retry_counts[job_index]}/{self.max_job_retries or 'inf'})."
524
- )
525
- self._schedule_retry(job_index)
526
- else:
527
- error_msg = f"Exceeded max fetch retries ({self.max_job_retries}) for job {job_index}."
528
- logger.error(error_msg)
529
- self._handle_processing_failure(job_index, error_msg)
530
-
531
- except (ValueError, RuntimeError) as e:
532
- logger.error(f"Job {job_index} failed processing result: {e}", exc_info=self.verbose)
533
- self._handle_processing_failure(job_index, f"Error processing result: {e}")
534
- except Exception as e:
535
- logger.exception(f"Unhandled error processing future for job {job_index}: {e}")
536
- self._handle_processing_failure(job_index, f"Unhandled error processing future: {e}")
537
-
538
- except TimeoutError:
539
- self._handle_batch_timeout(batch_futures_dict, batch_timeout)
540
-
541
- def _handle_batch_timeout(self, batch_futures_dict: Dict[Future, str], batch_timeout: float) -> None:
542
- """
543
- Handle a timeout while waiting for batch futures, mirroring the original behavior.
544
- """
545
- logger.error(
546
- f"Batch processing timed out after {batch_timeout}s waiting for futures. "
547
- "Some jobs in batch may be lost or incomplete."
548
- )
549
- remaining_indices_in_batch = []
550
- for f, idx in batch_futures_dict.items():
551
- if not f.done():
552
- remaining_indices_in_batch.append(idx)
553
- f.cancel() # Attempt to cancel underlying task
554
- logger.warning(f"Jobs potentially lost/cancelled due to batch timeout: {remaining_indices_in_batch}")
555
- for idx in remaining_indices_in_batch:
556
- self._handle_processing_failure(idx, f"Batch processing timed out after {batch_timeout}s")
557
-
558
441
  def run(self) -> Tuple[List[Dict[str, Any]], List[Tuple[str, str]]]:
559
442
  """
560
443
  Executes the main processing loop in batches.
@@ -583,78 +466,117 @@ class _ConcurrentProcessor:
583
466
  total_jobs = len(self.all_job_indices_list)
584
467
  submitted_new_indices_count = 0 # Tracks indices for which submission has been initiated at least once
585
468
 
586
- logger.info(f"Starting batch processing for {total_jobs} jobs with batch size {self.batch_size}.")
587
-
588
- while (submitted_new_indices_count < total_jobs) or self.retry_job_ids:
589
-
590
- # 1) Collect retries intended for this batch
591
- current_batch_job_indices: List[str] = self._collect_retry_jobs_for_batch()
592
-
593
- # 2) Select new jobs to fill the batch capacity
594
- current_batch_new_job_indices, submitted_new_indices_count = self._select_new_jobs_for_batch(
595
- submitted_new_indices_count=submitted_new_indices_count,
596
- total_jobs=total_jobs,
597
- already_in_batch=len(current_batch_job_indices),
598
- )
469
+ logger.debug(f"Starting batch processing for {total_jobs} jobs with batch size {self.batch_size}.")
470
+
471
+ # Keep up to batch_size jobs in-flight at all times
472
+ inflight_futures: Dict[Future, str] = {}
473
+
474
+ while (submitted_new_indices_count < total_jobs) or self.retry_job_ids or inflight_futures:
475
+ # 1) Top up from retries first
476
+ capacity = max(0, self.batch_size - len(inflight_futures))
477
+ to_fetch: List[str] = []
478
+ if capacity > 0 and self.retry_job_ids:
479
+ take = min(capacity, len(self.retry_job_ids))
480
+ retry_now = self.retry_job_ids[:take]
481
+ self.retry_job_ids = self.retry_job_ids[take:]
482
+ to_fetch.extend(retry_now)
483
+ capacity -= len(retry_now)
484
+
485
+ # 2) Then add new jobs up to capacity
486
+ if capacity > 0 and (submitted_new_indices_count < total_jobs):
487
+ new_count = min(capacity, total_jobs - submitted_new_indices_count)
488
+ new_job_indices = self.all_job_indices_list[
489
+ submitted_new_indices_count : submitted_new_indices_count + new_count
490
+ ]
491
+
492
+ if not self.job_queue_id:
493
+ error_msg = "Cannot submit new jobs: job_queue_id is not set."
494
+ logger.error(error_msg)
495
+ for job_index in new_job_indices:
496
+ self._handle_processing_failure(job_index, error_msg, is_submission_failure=True)
497
+ submitted_new_indices_count += len(new_job_indices)
498
+ if self.fail_on_submit_error:
499
+ raise ValueError(error_msg)
500
+ else:
501
+ try:
502
+ _ = self.client.submit_job_async(new_job_indices, self.job_queue_id)
503
+ submitted_new_indices_count += len(new_job_indices)
504
+ to_fetch.extend(new_job_indices)
505
+ except Exception as e:
506
+ error_msg = f"Batch async submission initiation failed for {len(new_job_indices)} new jobs: {e}"
507
+ logger.error(error_msg, exc_info=True)
508
+ for job_index in new_job_indices:
509
+ self._handle_processing_failure(
510
+ job_index, f"Batch submission initiation error: {e}", is_submission_failure=True
511
+ )
512
+ submitted_new_indices_count += len(new_job_indices)
513
+ if self.fail_on_submit_error:
514
+ raise RuntimeError(error_msg) from e
599
515
 
600
- # 3) Initiate async submission for the selected new jobs
601
- try:
602
- current_batch_job_indices, submitted_new_indices_count = self._submit_new_jobs_async(
603
- current_batch_new_job_indices,
604
- current_batch_job_indices,
605
- submitted_new_indices_count,
606
- )
607
- except Exception as e: # noqa: F841
608
- # Preserve original fail-on-submit behavior
609
- # (errors already logged and failures recorded inside helper)
610
- if self.fail_on_submit_error:
611
- raise
612
-
613
- # 4) If no jobs to fetch this cycle, decide whether to exit or continue
614
- if not current_batch_job_indices:
615
- if self.verbose:
616
- logger.debug("No jobs identified for fetching in this batch iteration.")
617
- if not self.retry_job_ids and submitted_new_indices_count >= total_jobs:
618
- logger.debug("Exiting loop: No jobs to fetch and no retries pending.")
619
- break
620
- # If retries remain but are not yet eligible, sleep until earliest allowed
621
- if self.retry_job_ids:
622
- now = time.time()
623
- future_times = [self.next_allowed_fetch_time.get(j, now) for j in self.retry_job_ids]
624
- # Consider only times in the future
625
- future_times = [t for t in future_times if t > now]
626
- if future_times:
627
- sleep_for = min(max(min(future_times) - now, 0.05), 1.0)
628
- if self.verbose:
629
- logger.debug(f"Pacing retries: sleeping {sleep_for:.2f}s waiting for next allowed fetch.")
630
- time.sleep(sleep_for)
631
- continue
632
-
633
- # 5) Initiate fetching for the current batch
634
- try:
635
- batch_futures_dict, _ = self._initiate_fetch_for_batch(current_batch_job_indices)
636
- except Exception as fetch_init_err:
637
- error_msg = (
638
- f"fetch_job_result_async failed for batch ({len(current_batch_job_indices)} jobs): {fetch_init_err}"
639
- )
640
- logger.error(error_msg, exc_info=True)
641
- logger.warning(
642
- f"Marking all {len(current_batch_job_indices)} jobs in failed fetch initiation batch as failed."
643
- )
644
- for job_index in current_batch_job_indices:
645
- self._handle_processing_failure(
646
- job_index, f"Fetch initiation failed for batch: {fetch_init_err}", is_submission_failure=True
516
+ # 3) Launch fetches for the jobs we added to this cycle
517
+ if to_fetch:
518
+ try:
519
+ new_futures = self.client.fetch_job_result_async(to_fetch, data_only=False, timeout=None)
520
+ inflight_futures.update(new_futures)
521
+ except Exception as fetch_init_err:
522
+ logger.error(
523
+ f"fetch_job_result_async failed to start for {len(to_fetch)} jobs: {fetch_init_err}",
524
+ exc_info=True,
647
525
  )
648
- if self.fail_on_submit_error:
649
- raise RuntimeError(
650
- f"Stopping due to fetch initiation failure: {fetch_init_err}"
651
- ) from fetch_init_err
652
- continue
653
-
654
- # 6) Process results for the current batch
655
- batch_timeout = 600.0
656
- self._process_batch_futures(batch_futures_dict, batch_timeout)
657
- # End of processing for this batch cycle
526
+ for job_index in to_fetch:
527
+ self._handle_processing_failure(
528
+ job_index, f"Fetch initiation error: {fetch_init_err}", is_submission_failure=True
529
+ )
530
+ if self.fail_on_submit_error:
531
+ raise RuntimeError(
532
+ f"Stopping due to fetch initiation failure: {fetch_init_err}"
533
+ ) from fetch_init_err
534
+
535
+ # 4) If nothing left anywhere, exit
536
+ if not inflight_futures and not self.retry_job_ids and submitted_new_indices_count >= total_jobs:
537
+ logger.debug("Exiting loop: No in-flight jobs, no retries, and all jobs submitted.")
538
+ break
539
+
540
+ # 5) Wait for at least one in-flight future to complete, then process done ones
541
+ if inflight_futures:
542
+ done, _ = concurrent.futures.wait(
543
+ set(inflight_futures.keys()), return_when=concurrent.futures.FIRST_COMPLETED
544
+ )
545
+ for future in done:
546
+ job_index = inflight_futures.pop(future, None)
547
+ if job_index is None:
548
+ continue
549
+ try:
550
+ result_list = future.result()
551
+ if not isinstance(result_list, list) or len(result_list) != 1:
552
+ raise ValueError(f"Expected list length 1, got {len(result_list)}")
553
+ result_tuple = result_list[0]
554
+ if not isinstance(result_tuple, (tuple, list)) or len(result_tuple) != 3:
555
+ raise ValueError(f"Expected tuple/list length 3, got {len(result_tuple)}")
556
+ full_response_dict, fetched_job_index, trace_id = result_tuple
557
+ if fetched_job_index != job_index:
558
+ logger.warning(f"Mismatch: Future for {job_index} returned {fetched_job_index}")
559
+ self._handle_processing_success(job_index, full_response_dict, trace_id)
560
+ except TimeoutError:
561
+ # Not ready -> immediate retry
562
+ self.retry_counts[job_index] += 1
563
+ if self.max_job_retries is None or self.retry_counts[job_index] <= self.max_job_retries:
564
+ if self.verbose:
565
+ logger.info(
566
+ f"Job {job_index} not ready, scheduling retry "
567
+ f"(Attempt {self.retry_counts[job_index]}/{self.max_job_retries or 'inf'})."
568
+ )
569
+ self._schedule_retry(job_index)
570
+ else:
571
+ error_msg = f"Exceeded max fetch retries ({self.max_job_retries}) for job {job_index}."
572
+ logger.error(error_msg)
573
+ self._handle_processing_failure(job_index, error_msg)
574
+ except (ValueError, RuntimeError) as e:
575
+ logger.error(f"Job {job_index} failed processing result: {e}", exc_info=self.verbose)
576
+ self._handle_processing_failure(job_index, f"Error processing result: {e}")
577
+ except Exception as e:
578
+ logger.exception(f"Unhandled error processing future for job {job_index}: {e}")
579
+ self._handle_processing_failure(job_index, f"Unhandled error processing future: {e}")
658
580
 
659
581
  # --- Final Logging ---
660
582
  self._log_final_status(total_jobs)
@@ -688,11 +610,12 @@ class NvIngestClient:
688
610
  message_client_port : int, optional
689
611
  Port of the REST/message service. Defaults to 7670.
690
612
  message_client_kwargs : dict, optional
691
- Extra keyword arguments passed to the client allocator.
613
+ Extra keyword arguments passed to the client allocator. For RestClient,
614
+ can include 'api_version' (e.g., 'v1' or 'v2'). Defaults to 'v1'.
692
615
  msg_counter_id : str, optional
693
616
  Identifier for message counting. Defaults to "nv-ingest-message-id".
694
617
  worker_pool_size : int, optional
695
- Number of workers in the thread pool. Defaults to 1.
618
+ Number of workers in the thread pool. Defaults to 8.
696
619
 
697
620
  Returns
698
621
  -------
@@ -714,7 +637,7 @@ class NvIngestClient:
714
637
  **self._message_client_kwargs,
715
638
  )
716
639
 
717
- # Initialize the worker pool with the specified size
640
+ # Initialize the worker pool with the specified size (used for both submit and fetch)
718
641
  self._worker_pool = ThreadPoolExecutor(max_workers=worker_pool_size)
719
642
 
720
643
  # Telemetry state and controls
@@ -1210,6 +1133,7 @@ class NvIngestClient:
1210
1133
  self,
1211
1134
  job_ids: Union[str, List[str]],
1212
1135
  data_only: bool = False,
1136
+ timeout: Optional[Tuple[int, Optional[float]]] = None,
1213
1137
  ) -> List[Tuple[Any, str, Optional[str]]]:
1214
1138
  """
1215
1139
  Fetch job results via CLI semantics (synchronous list return).
@@ -1229,7 +1153,8 @@ class NvIngestClient:
1229
1153
  if isinstance(job_ids, str):
1230
1154
  job_ids = [job_ids]
1231
1155
 
1232
- return [self._fetch_job_result(job_id, data_only=data_only) for job_id in job_ids]
1156
+ eff_timeout: Tuple[int, Optional[float]] = timeout if timeout is not None else (100, None)
1157
+ return [self._fetch_job_result(job_id, timeout=eff_timeout, data_only=data_only) for job_id in job_ids]
1233
1158
 
1234
1159
  def _validate_batch_size(self, batch_size: Optional[int]) -> int:
1235
1160
  """
@@ -1346,8 +1271,8 @@ class NvIngestClient:
1346
1271
  # Validate and set batch_size
1347
1272
  validated_batch_size = self._validate_batch_size(batch_size)
1348
1273
 
1349
- # Prepare timeout tuple for fetch calls (enable long-poll): (connect<=5s, read=timeout)
1350
- effective_timeout: Tuple[int, int] = (min(5, int(timeout)), int(timeout))
1274
+ # Prepare timeout tuple to mirror handler behavior: finite connect, unbounded read (long-poll)
1275
+ effective_timeout: Tuple[int, Optional[float]] = (int(timeout), None)
1351
1276
 
1352
1277
  # Delegate to the concurrent processor
1353
1278
  processor = _ConcurrentProcessor(
@@ -1402,7 +1327,12 @@ class NvIngestClient:
1402
1327
  job_state.trace_id = future.result()[0] # Trace_id from `submit_job` endpoint submission
1403
1328
  job_state.future = None
1404
1329
 
1405
- def fetch_job_result_async(self, job_ids: Union[str, List[str]], data_only: bool = True) -> Dict[Future, str]:
1330
+ def fetch_job_result_async(
1331
+ self,
1332
+ job_ids: Union[str, List[str]],
1333
+ data_only: bool = True,
1334
+ timeout: Optional[Tuple[int, Optional[float]]] = None,
1335
+ ) -> Dict[Future, str]:
1406
1336
  """
1407
1337
  Fetches job results for a list or a single job ID asynchronously and returns a mapping of futures to job IDs.
1408
1338
 
@@ -1423,7 +1353,7 @@ class NvIngestClient:
1423
1353
  future_to_job_id = {}
1424
1354
  for job_id in job_ids:
1425
1355
  job_state = self._get_and_check_job_state(job_id)
1426
- future = self._worker_pool.submit(self.fetch_job_result_cli, job_id, data_only)
1356
+ future = self._worker_pool.submit(self.fetch_job_result_cli, job_id, data_only, timeout)
1427
1357
  job_state.future = future
1428
1358
  future_to_job_id[future] = job_id
1429
1359
 
@@ -1707,7 +1637,9 @@ class NvIngestClient:
1707
1637
 
1708
1638
  return results
1709
1639
 
1710
- def create_jobs_for_batch(self, files_batch: List[str], tasks: Dict[str, Any]) -> List[str]:
1640
+ def create_jobs_for_batch(
1641
+ self, files_batch: List[str], tasks: Dict[str, Any], pdf_split_page_count: int = None
1642
+ ) -> List[str]:
1711
1643
  """
1712
1644
  Create and submit job specifications (JobSpecs) for a batch of files, returning the job IDs.
1713
1645
  This function takes a batch of files, processes each file to extract its content and type,
@@ -1723,6 +1655,9 @@ class NvIngestClient:
1723
1655
  A dictionary of tasks to be added to each job. The keys represent task names, and the
1724
1656
  values represent task specifications or configurations. Standard tasks include "split",
1725
1657
  "extract", "store", "caption", "dedup", "filter", "embed".
1658
+ pdf_split_page_count : int, optional
1659
+ Number of pages per PDF chunk for splitting (1-128). If provided, this will be added
1660
+ to the job spec's extended_options for PDF files.
1726
1661
 
1727
1662
  Returns
1728
1663
  -------
@@ -1769,6 +1704,10 @@ class NvIngestClient:
1769
1704
 
1770
1705
  job_specs = create_job_specs_for_batch(files_batch)
1771
1706
 
1707
+ # Apply PDF split config if provided
1708
+ if pdf_split_page_count is not None:
1709
+ apply_pdf_split_config_to_job_specs(job_specs, pdf_split_page_count)
1710
+
1772
1711
  job_ids = []
1773
1712
  for job_spec in job_specs:
1774
1713
  logger.debug(f"Tasks: {tasks.keys()}")
@@ -45,6 +45,7 @@ class IngestJobHandler:
45
45
  show_progress: bool = True,
46
46
  show_telemetry: bool = False,
47
47
  job_queue_id: str = "ingest_task_queue",
48
+ pdf_split_page_count: int = None,
48
49
  ) -> None:
49
50
  self.client = client
50
51
  self.files = files
@@ -56,6 +57,7 @@ class IngestJobHandler:
56
57
  self.show_progress = show_progress
57
58
  self.show_telemetry = show_telemetry
58
59
  self.job_queue_id = job_queue_id
60
+ self.pdf_split_page_count = pdf_split_page_count
59
61
  self._pbar = None
60
62
  # Internal state used across iterations
61
63
  self._retry_job_ids: List[str] = []
@@ -144,7 +146,9 @@ class IngestJobHandler:
144
146
  new_job_count: int = min(self.batch_size - cur_job_count, len(self.files) - self._processed)
145
147
  batch_files: List[str] = self.files[self._processed : self._processed + new_job_count]
146
148
 
147
- new_job_indices: List[str] = self.client.create_jobs_for_batch(batch_files, self.tasks)
149
+ new_job_indices: List[str] = self.client.create_jobs_for_batch(
150
+ batch_files, self.tasks, pdf_split_page_count=self.pdf_split_page_count
151
+ )
148
152
  if len(new_job_indices) != new_job_count:
149
153
  missing_jobs: int = new_job_count - len(new_job_indices)
150
154
  error_msg: str = (
@@ -304,6 +308,7 @@ class IngestJobHandler:
304
308
  trace_ids: Dict[str, str] = defaultdict(list) # type: ignore
305
309
  failed_jobs: List[str] = []
306
310
  retry_counts: Dict[str, int] = defaultdict(int)
311
+ pages_per_sec: float = None
307
312
 
308
313
  start_time_ns: int = time.time_ns()
309
314
  self._init_progress_bar(total_files)
@@ -54,7 +54,7 @@ from nv_ingest_client.primitives.tasks import StoreEmbedTask
54
54
  from nv_ingest_client.primitives.tasks import UDFTask
55
55
  from nv_ingest_client.util.processing import check_schema
56
56
  from nv_ingest_client.util.system import ensure_directory_with_permissions
57
- from nv_ingest_client.util.util import filter_function_kwargs
57
+ from nv_ingest_client.util.util import filter_function_kwargs, apply_pdf_split_config_to_job_specs
58
58
  from nv_ingest_client.util.vdb import VDB, get_vdb_op_cls
59
59
  from tqdm import tqdm
60
60
 
@@ -1237,6 +1237,44 @@ class Ingestor:
1237
1237
 
1238
1238
  return self
1239
1239
 
1240
+ @ensure_job_specs
1241
+ def pdf_split_config(self, pages_per_chunk: int = 32) -> "Ingestor":
1242
+ """
1243
+ Configure PDF splitting behavior for V2 API.
1244
+
1245
+ Parameters
1246
+ ----------
1247
+ pages_per_chunk : int, optional
1248
+ Number of pages per PDF chunk (default: 32)
1249
+ Server enforces boundaries: min=1, max=128
1250
+
1251
+ Returns
1252
+ -------
1253
+ Ingestor
1254
+ Self for method chaining
1255
+
1256
+ Notes
1257
+ -----
1258
+ - Only affects V2 API endpoints with PDF splitting support
1259
+ - Server will clamp values outside [1, 128] range
1260
+ - Smaller chunks = more parallelism but more overhead
1261
+ - Larger chunks = less overhead but reduced concurrency
1262
+ """
1263
+ MIN_PAGES = 1
1264
+ MAX_PAGES = 128
1265
+
1266
+ # Warn if value will be clamped by server
1267
+ if pages_per_chunk < MIN_PAGES:
1268
+ logger.warning(f"pages_per_chunk={pages_per_chunk} is below minimum. Server will clamp to {MIN_PAGES}.")
1269
+ elif pages_per_chunk > MAX_PAGES:
1270
+ logger.warning(f"pages_per_chunk={pages_per_chunk} exceeds maximum. Server will clamp to {MAX_PAGES}.")
1271
+
1272
+ # Flatten all job specs and apply PDF config using shared utility
1273
+ all_job_specs = [spec for job_specs in self._job_specs._file_type_to_job_spec.values() for spec in job_specs]
1274
+ apply_pdf_split_config_to_job_specs(all_job_specs, pages_per_chunk)
1275
+
1276
+ return self
1277
+
1240
1278
  def _count_job_states(self, job_states: set[JobStateEnum]) -> int:
1241
1279
  """
1242
1280
  Counts the jobs in specified states.
@@ -74,6 +74,12 @@ logger = logging.getLogger(__name__)
74
74
  @click.option("--client_host", default="localhost", help="DNS name or URL for the endpoint.")
75
75
  @click.option("--client_port", default=7670, type=int, help="Port for the client endpoint.")
76
76
  @click.option("--client_kwargs", help="Additional arguments to pass to the client.", default="{}")
77
+ @click.option(
78
+ "--api_version",
79
+ default="v1",
80
+ type=click.Choice(["v1", "v2"], case_sensitive=False),
81
+ help="API version to use (v1 or v2). V2 required for PDF split page count feature.",
82
+ )
77
83
  @click.option(
78
84
  "--client_type",
79
85
  default="rest",
@@ -119,6 +125,8 @@ Example:
119
125
  --task 'extract:{"document_type":"docx", "extract_text":true, "extract_images":true}'
120
126
  --task 'embed'
121
127
  --task 'caption:{}'
128
+ --pdf_split_page_count 64 # Configure PDF splitting (requires --api_version v2)
129
+ --api_version v2 # Use V2 API for PDF splitting support
122
130
 
123
131
  \b
124
132
  Tasks and Options:
@@ -207,6 +215,12 @@ for locating portions of the system that might be bottlenecks for the overall ru
207
215
  )
208
216
  @click.option("--zipkin_host", default="localhost", help="DNS name or Zipkin API.")
209
217
  @click.option("--zipkin_port", default=9411, type=int, help="Port for the Zipkin trace API")
218
+ @click.option(
219
+ "--pdf_split_page_count",
220
+ default=None,
221
+ type=int,
222
+ help="Number of pages per PDF chunk for splitting. Allows per-request tuning of PDF split size in v2 api.",
223
+ )
210
224
  @click.option("--version", is_flag=True, help="Show version.")
211
225
  @click.pass_context
212
226
  def main(
@@ -215,6 +229,7 @@ def main(
215
229
  client_host: str,
216
230
  client_kwargs: str,
217
231
  client_port: int,
232
+ api_version: str,
218
233
  client_type: str,
219
234
  concurrency_n: int,
220
235
  dataset: str,
@@ -228,6 +243,7 @@ def main(
228
243
  collect_profiling_traces: bool,
229
244
  zipkin_host: str,
230
245
  zipkin_port: int,
246
+ pdf_split_page_count: int,
231
247
  task: [str],
232
248
  version: [bool],
233
249
  ):
@@ -268,6 +284,10 @@ def main(
268
284
  _client_kwargs_obj = json.loads(client_kwargs)
269
285
  except Exception:
270
286
  _client_kwargs_obj = {"raw": client_kwargs}
287
+
288
+ # Merge api_version into client_kwargs
289
+ _client_kwargs_obj["api_version"] = api_version
290
+
271
291
  _sanitized_client_kwargs = sanitize_for_logging(_client_kwargs_obj)
272
292
  logging.debug(
273
293
  f"Creating message client: {client_host} and port: {client_port} -> "
@@ -285,7 +305,7 @@ def main(
285
305
  message_client_allocator=client_allocator,
286
306
  message_client_hostname=client_host,
287
307
  message_client_port=client_port,
288
- message_client_kwargs=json.loads(client_kwargs),
308
+ message_client_kwargs=_client_kwargs_obj,
289
309
  worker_pool_size=concurrency_n,
290
310
  )
291
311
 
@@ -300,6 +320,7 @@ def main(
300
320
  save_images_separately=save_images_separately,
301
321
  show_progress=True,
302
322
  show_telemetry=True,
323
+ pdf_split_page_count=pdf_split_page_count,
303
324
  )
304
325
  (total_files, trace_times, pages_processed, trace_ids) = handler.run()
305
326
 
@@ -110,6 +110,7 @@ class JobSpec:
110
110
  "job_id": str(self._job_id),
111
111
  "tasks": [task.to_dict() for task in self._tasks],
112
112
  "tracing_options": self._extended_options.get("tracing_options", {}),
113
+ "pdf_config": self._extended_options.get("pdf_config", {}),
113
114
  }
114
115
 
115
116
  @property
@@ -20,7 +20,7 @@ logger = logging.getLogger(__name__)
20
20
 
21
21
 
22
22
  def analyze_document_chunks(
23
- results: Union[List[List[Dict[str, Any]]], List[Dict[str, Any]]]
23
+ results: Union[List[List[Dict[str, Any]]], List[Dict[str, Any]]],
24
24
  ) -> Dict[str, Dict[str, Dict[str, int]]]:
25
25
  """
26
26
  Analyze ingestor results to count elements by type and page for each document.
@@ -350,6 +350,32 @@ def create_job_specs_for_batch(files_batch: List[str]) -> List[JobSpec]:
350
350
  return job_specs
351
351
 
352
352
 
353
+ def apply_pdf_split_config_to_job_specs(job_specs: List[JobSpec], pages_per_chunk: int) -> None:
354
+ """
355
+ Apply PDF split configuration to a list of JobSpec objects.
356
+
357
+ Modifies job specs in-place by adding pdf_config to extended_options for PDF files only.
358
+
359
+ Parameters
360
+ ----------
361
+ job_specs : List[JobSpec]
362
+ List of job specifications to potentially modify
363
+ pages_per_chunk : int
364
+ Number of pages per PDF chunk (will be stored as-is; server performs clamping)
365
+
366
+ Notes
367
+ -----
368
+ - Only modifies job specs with document_type == "pdf" (case-insensitive)
369
+ - Modifies job specs in-place
370
+ - Safe to call on mixed document types (only PDFs are affected)
371
+ """
372
+ for job_spec in job_specs:
373
+ if job_spec.document_type.lower() == "pdf":
374
+ if "pdf_config" not in job_spec._extended_options:
375
+ job_spec._extended_options["pdf_config"] = {}
376
+ job_spec._extended_options["pdf_config"]["split_page_count"] = pages_per_chunk
377
+
378
+
353
379
  def filter_function_kwargs(func, **kwargs):
354
380
  """
355
381
  Filters and returns keyword arguments that match the parameters of a given function.
@@ -917,7 +917,9 @@ def wait_for_index(collection_name: str, num_elements: int, client: MilvusClient
917
917
  break
918
918
  # check if indexed_rows is staying the same, too many times means something is wrong
919
919
  if new_indexed_rows == indexed_rows:
920
- pos_movement = -1
920
+ pos_movement -= 1
921
+ else:
922
+ pos_movement = 10
921
923
  # if pos_movement is 0, raise an error, means the rows are not getting indexed as expected
922
924
  if pos_movement == 0:
923
925
  raise ValueError("Rows are not getting indexed as expected")
@@ -1046,9 +1048,10 @@ def write_to_nvingest_collection(
1046
1048
  client,
1047
1049
  collection_name,
1048
1050
  )
1049
- # Make sure all rows are indexed, decided not to wrap in a timeout because we dont
1050
- # know how long this should take, it is num_elements dependent.
1051
- wait_for_index(collection_name, num_elements, client)
1051
+ if not local_index:
1052
+ # Make sure all rows are indexed, decided not to wrap in a timeout because we dont
1053
+ # know how long this should take, it is num_elements dependent.
1054
+ wait_for_index(collection_name, num_elements, client)
1052
1055
  else:
1053
1056
  minio_client = Minio(minio_endpoint, access_key=access_key, secret_key=secret_key, secure=False)
1054
1057
  bucket_name = bucket_name if bucket_name else ClientConfigSchema().minio_bucket_name
@@ -1349,7 +1352,7 @@ def nvingest_retrieval(
1349
1352
  nvidia_api_key=nvidia_api_key,
1350
1353
  input_type="query",
1351
1354
  output_names=["embeddings"],
1352
- grpc=not (urlparse(embedding_endpoint).scheme == "http"),
1355
+ grpc=not ("http" in urlparse(embedding_endpoint).scheme),
1353
1356
  )
1354
1357
  client = client or MilvusClient(milvus_uri, token=f"{username}:{password}")
1355
1358
  final_top_k = top_k
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.10.14.dev20251014
3
+ Version: 2025.10.16.dev20251016
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -1,18 +1,18 @@
1
1
  nv_ingest_client/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
2
- nv_ingest_client/nv_ingest_cli.py,sha256=8HpbU6l0i19M9kjZdZKbf03z-CZIZoikigZuo9wD77g,13693
2
+ nv_ingest_client/nv_ingest_cli.py,sha256=84fc0-6TUe-0BMasRIiRH4okfjno4AKCaKvUwJEZ45k,14457
3
3
  nv_ingest_client/cli/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
4
4
  nv_ingest_client/cli/util/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
5
5
  nv_ingest_client/cli/util/click.py,sha256=YjQU1uF148FU5D3ozC2m1kkfOOJxO1U8U552-T8PjU4,20029
6
6
  nv_ingest_client/cli/util/processing.py,sha256=ULGCYQF1RTDQV_b35YM1WQRqIjR2wQRMJWu41DogagE,6259
7
7
  nv_ingest_client/cli/util/system.py,sha256=AQLq0DD2Ns8jRanrKu1tmVBKPA9rl-F3-ZsGI6FXLqE,1105
8
8
  nv_ingest_client/client/__init__.py,sha256=eEX9l1qmkLH2lAAZU3eP17SCV06ZjjrshHAB_xbboHA,375
9
- nv_ingest_client/client/client.py,sha256=gCbOSDoNnt7AYFvyCIjfH36gVxFHNDl2vtpKPKFKSzs,76802
10
- nv_ingest_client/client/ingest_job_handler.py,sha256=ww_0hE9-GkWT6eygL4EOkwwX-DeD6mMQrF8LXhNIpdc,16846
11
- nv_ingest_client/client/interface.py,sha256=1WKdCynIqdSJGpcLIim85hzDj8vKPtKuPjGkKYCMp4I,49421
9
+ nv_ingest_client/client/client.py,sha256=Ic7FPXGN4o-qk0atcbVcofE0ytgW16-B-KqJtXjY8ws,74461
10
+ nv_ingest_client/client/ingest_job_handler.py,sha256=lMk-yQ0b0aK5ucxfNPVhxofzORIIK0jDzCYTmfdMZFw,17059
11
+ nv_ingest_client/client/interface.py,sha256=7G2M59FayYyxvTwP6YCSeB42l6bMfpBNmd4kadKK6iU,50890
12
12
  nv_ingest_client/client/util/processing.py,sha256=Ky7x7QbLn3BlgYwmrmoIc-o1VwmlmrcP9tn7GVTi0t0,2502
13
13
  nv_ingest_client/primitives/__init__.py,sha256=3rbpLCI7Bl0pntGatAxXD_V01y6dcLhHFheI3wqet-I,269
14
14
  nv_ingest_client/primitives/jobs/__init__.py,sha256=-yohgHv3LcCtSleHSaxjv1oO7nNcMCjN3ZYoOkIypIk,469
15
- nv_ingest_client/primitives/jobs/job_spec.py,sha256=NYT8K31b6p2v0zbIYugcARqJ8DTHpSNf_D1-V6M8YXA,15609
15
+ nv_ingest_client/primitives/jobs/job_spec.py,sha256=teAZbpvxn25jIEUP5YJsAX_E_z9iWhejS-uy5opshFM,15681
16
16
  nv_ingest_client/primitives/jobs/job_state.py,sha256=CEe_oZr4p_MobauWIyhuNrP8y7AUwxhIGBuO7dN-VOQ,5277
17
17
  nv_ingest_client/primitives/tasks/__init__.py,sha256=D8X4XuwCxk4g_sMSpNRL1XsjVE1eACYaUdEjSanSEfU,1130
18
18
  nv_ingest_client/primitives/tasks/audio_extraction.py,sha256=KD5VvaRm6PYelfofZq_-83CbOmupgosokZzFERI5wDA,3559
@@ -32,24 +32,24 @@ nv_ingest_client/primitives/tasks/udf.py,sha256=5e_WJVgocnK-z0EGCEwPO_zG8WJEhuIs
32
32
  nv_ingest_client/primitives/tasks/vdb_upload.py,sha256=mXOyQJfQfaoN96nntzevd0sKUs60-AHi8lc1jxG3DAw,1765
33
33
  nv_ingest_client/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  nv_ingest_client/util/dataset.py,sha256=2yDPs47HNj8AOdOAfJL4XVji0BMRJq_NH8CG4s4xT-Q,3701
35
- nv_ingest_client/util/document_analysis.py,sha256=8m_qQhQW7ykgHwg7YdRsNuV_MIP6ige4gwCbkyEoKqA,10568
35
+ nv_ingest_client/util/document_analysis.py,sha256=T4olsfjwm4BZmT9xXT8M8RWKhdCPSASsDpzQmJDflts,10569
36
36
  nv_ingest_client/util/image_disk_utils.py,sha256=M-lSRBvNlOMm20uiYygQ0Oh4GMKspih7G03rKNRzOSE,11507
37
37
  nv_ingest_client/util/milvus.py,sha256=MwBix_UBg54i7xONBIwjcqeKSBkqunxBJBK2f0bPMoo,61
38
38
  nv_ingest_client/util/process_json_files.py,sha256=YKR-fGT4kM8zO2p8r5tpo5-vvFywkcLuNieozvPWvo0,3785
39
39
  nv_ingest_client/util/processing.py,sha256=bAy8it-OUgGFO3pcy6D3ezpyZ6p2DfmoQUGhx3QmVf8,8989
40
40
  nv_ingest_client/util/system.py,sha256=DVIRLlEWkpqftqxazCuPNdaFSjQiHGMYcHzBufJSRUM,2216
41
41
  nv_ingest_client/util/transport.py,sha256=Kwi3r-EUD5yOInW2rH7tYm2DXnzP3aU9l95V-BbXO90,1836
42
- nv_ingest_client/util/util.py,sha256=0hmru3s3J-lgqTDK88x3ZWZGmumAYudjT6vlMpeHVnw,14825
42
+ nv_ingest_client/util/util.py,sha256=qwJ4MqF8w4-lws76z8iz1V0Hz_ebDYN8yAKyJPGuHuU,15828
43
43
  nv_ingest_client/util/zipkin.py,sha256=p2tMtTVAqrZGxmAxWKE42wkx7U5KywiX5munI7rJt_k,4473
44
44
  nv_ingest_client/util/file_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
45
  nv_ingest_client/util/file_processing/extract.py,sha256=uXEATBYZXjxdymGTNQvvzDD2eHgpuq4PdU6HsMl0Lp0,4662
46
46
  nv_ingest_client/util/vdb/__init__.py,sha256=ZmoEzeM9LzwwrVvu_DVUnjRNx-x8ahkNeIrSfSKzbAk,513
47
47
  nv_ingest_client/util/vdb/adt_vdb.py,sha256=UubzAMSfyrqqpD-OQErpBs25hC2Mw8zGZ4waenGXPOk,515
48
- nv_ingest_client/util/vdb/milvus.py,sha256=dYXszrWdwYYASBW6t8lMI6QK9-BzhV6HAUYjt3cIDsE,78602
48
+ nv_ingest_client/util/vdb/milvus.py,sha256=uJUnH9gv8JYKvmI3BbljEsyRhV3l9-jP4F4sKOcfsWE,78702
49
49
  nv_ingest_client/util/vdb/opensearch.py,sha256=I4FzF95VWCOkyzhfm-szdfK1Zd9ugUc8AxxpAdEMWGE,7538
50
- nv_ingest_client-2025.10.14.dev20251014.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
51
- nv_ingest_client-2025.10.14.dev20251014.dist-info/METADATA,sha256=Jr4vwEkmEJeHg_Mmtq_SU6JU2emsij1qvAKOD5eECwI,30627
52
- nv_ingest_client-2025.10.14.dev20251014.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
- nv_ingest_client-2025.10.14.dev20251014.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
54
- nv_ingest_client-2025.10.14.dev20251014.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
55
- nv_ingest_client-2025.10.14.dev20251014.dist-info/RECORD,,
50
+ nv_ingest_client-2025.10.16.dev20251016.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
51
+ nv_ingest_client-2025.10.16.dev20251016.dist-info/METADATA,sha256=cx9dmXi1Hrit1c0sGa4lyc-2HRHu5Gyd-UlK5qa9jkQ,30627
52
+ nv_ingest_client-2025.10.16.dev20251016.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
53
+ nv_ingest_client-2025.10.16.dev20251016.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
54
+ nv_ingest_client-2025.10.16.dev20251016.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
55
+ nv_ingest_client-2025.10.16.dev20251016.dist-info/RECORD,,