nv-ingest-client 2025.8.6.dev20250806__py3-none-any.whl → 2025.8.8.dev20250808__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-client might be problematic. Click here for more details.

@@ -535,10 +535,6 @@ class Ingestor:
535
535
 
536
536
  proc_kwargs = filter_function_kwargs(self._client.process_jobs_concurrently, **kwargs)
537
537
 
538
- _return_failures = return_failures
539
- if self._vdb_bulk_upload:
540
- return_failures = True
541
-
542
538
  results, failures = self._client.process_jobs_concurrently(
543
539
  job_indices=self._job_ids,
544
540
  job_queue_id=self._job_queue_id,
@@ -567,15 +563,39 @@ class Ingestor:
567
563
 
568
564
  if self._vdb_bulk_upload:
569
565
  if len(failures) > 0:
570
- raise RuntimeError(f"Failed to ingest documents, unable to complete vdb bulk upload: {failures}")
566
+ # Calculate success metrics
567
+ total_jobs = len(results) + len(failures)
568
+ successful_jobs = len(results)
569
+
570
+ if return_failures:
571
+ # Emit message about partial success
572
+ logger.warning(
573
+ f"Job was not completely successful. "
574
+ f"{successful_jobs} out of {total_jobs} records completed successfully. "
575
+ f"Uploading successful results to vector database."
576
+ )
577
+
578
+ # Upload only the successful results
579
+ if successful_jobs > 0:
580
+ self._vdb_bulk_upload.run(results)
571
581
 
572
- self._vdb_bulk_upload.run(results)
582
+ if self._purge_results_after_vdb_upload:
583
+ logger.info("Purging saved results from disk after successful VDB upload.")
584
+ self._purge_saved_results(results)
573
585
 
574
- if self._purge_results_after_vdb_upload:
575
- logger.info("Purging saved results from disk after successful VDB upload.")
576
- self._purge_saved_results(results)
586
+ else:
587
+ # Original behavior: raise RuntimeError
588
+ raise RuntimeError(
589
+ "Failed to ingest documents, unable to complete vdb bulk upload due to "
590
+ f"no successful results. {len(failures)} out of {total_jobs} records failed "
591
+ )
592
+ else:
593
+ # No failures - proceed with normal upload
594
+ self._vdb_bulk_upload.run(results)
577
595
 
578
- return_failures = _return_failures
596
+ if self._purge_results_after_vdb_upload:
597
+ logger.info("Purging saved results from disk after successful VDB upload.")
598
+ self._purge_saved_results(results)
579
599
 
580
600
  return (results, failures) if return_failures else results
581
601
 
@@ -729,7 +729,7 @@ def write_records_minio(records, writer: RemoteBulkWriter) -> RemoteBulkWriter:
729
729
  for element in records:
730
730
  writer.append_row(element)
731
731
  writer.commit()
732
- print(f"Wrote data to: {writer.batch_files}")
732
+ logger.debug(f"Wrote data to: {writer.batch_files}")
733
733
  return writer
734
734
 
735
735
 
@@ -757,9 +757,10 @@ def bulk_insert_milvus(
757
757
 
758
758
  connections.connect(uri=milvus_uri)
759
759
  t_bulk_start = time.time()
760
+ files_to_upload = [_file for file_set in writer.batch_files for _file in file_set]
760
761
  task_id = utility.do_bulk_insert(
761
762
  collection_name=collection_name,
762
- files=writer.batch_files[0],
763
+ files=files_to_upload,
763
764
  consistency_level=CONSISTENCY,
764
765
  )
765
766
  # list_bulk_insert_tasks = utility.list_bulk_insert_tasks(collection_name=collection_name)
@@ -769,11 +770,11 @@ def bulk_insert_milvus(
769
770
  state = task.state_name
770
771
  if state == "Completed":
771
772
  t_bulk_end = time.time()
772
- print("Start time:", task.create_time_str)
773
- print("Imported row count:", task.row_count)
774
- print(f"Bulk {collection_name} upload took {t_bulk_end - t_bulk_start} s")
773
+ logger.info("Start time:", task.create_time_str)
774
+ logger.info("Imported row count:", task.row_count)
775
+ logger.info(f"Bulk {collection_name} upload took {t_bulk_end - t_bulk_start} s")
775
776
  if task.state == BulkInsertState.ImportFailed:
776
- print("Failed reason:", task.failed_reason)
777
+ logger.error("Failed reason:", task.failed_reason)
777
778
  time.sleep(1)
778
779
 
779
780
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.8.6.dev20250806
3
+ Version: 2025.8.8.dev20250808
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -8,7 +8,7 @@ nv_ingest_client/cli/util/system.py,sha256=AQLq0DD2Ns8jRanrKu1tmVBKPA9rl-F3-ZsGI
8
8
  nv_ingest_client/cli/util/tasks.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
9
9
  nv_ingest_client/client/__init__.py,sha256=eEX9l1qmkLH2lAAZU3eP17SCV06ZjjrshHAB_xbboHA,375
10
10
  nv_ingest_client/client/client.py,sha256=wgPeLUByBNcQRkl1FXe7neHNNC5eY2sVve99g5sW41k,65068
11
- nv_ingest_client/client/interface.py,sha256=4OfPCk3631YP74Ajt5RlbmfYlvyHTJU33WOXFGSfmms,37300
11
+ nv_ingest_client/client/interface.py,sha256=3irfSXfOQ2m7iQenHv8eTrMT54akGEyAk0YV6Hm17SE,38397
12
12
  nv_ingest_client/client/util/processing.py,sha256=MtVRtGnRB8unwTa5b6-LYODx-7kg-RYP3wLmjdqymXw,2195
13
13
  nv_ingest_client/primitives/__init__.py,sha256=3rbpLCI7Bl0pntGatAxXD_V01y6dcLhHFheI3wqet-I,269
14
14
  nv_ingest_client/primitives/exceptions.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -44,11 +44,11 @@ nv_ingest_client/util/file_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
44
44
  nv_ingest_client/util/file_processing/extract.py,sha256=uXEATBYZXjxdymGTNQvvzDD2eHgpuq4PdU6HsMl0Lp0,4662
45
45
  nv_ingest_client/util/vdb/__init__.py,sha256=ZmoEzeM9LzwwrVvu_DVUnjRNx-x8ahkNeIrSfSKzbAk,513
46
46
  nv_ingest_client/util/vdb/adt_vdb.py,sha256=UubzAMSfyrqqpD-OQErpBs25hC2Mw8zGZ4waenGXPOk,515
47
- nv_ingest_client/util/vdb/milvus.py,sha256=sC9Rb-Du4QI8PXlVI3sMWaSMEwOtzy0_YdjiEAKl5sg,73398
47
+ nv_ingest_client/util/vdb/milvus.py,sha256=uVgCTjg-Njz9Lq_sURNlZEky4KtdapxPr6a5Ug6vCmo,73511
48
48
  nv_ingest_client/util/vdb/opensearch.py,sha256=I4FzF95VWCOkyzhfm-szdfK1Zd9ugUc8AxxpAdEMWGE,7538
49
- nv_ingest_client-2025.8.6.dev20250806.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
50
- nv_ingest_client-2025.8.6.dev20250806.dist-info/METADATA,sha256=k_c5cSZaxna7nC2_WC4fp7E57iR-CXiRgnZcOslp6Pw,30736
51
- nv_ingest_client-2025.8.6.dev20250806.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
52
- nv_ingest_client-2025.8.6.dev20250806.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
53
- nv_ingest_client-2025.8.6.dev20250806.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
54
- nv_ingest_client-2025.8.6.dev20250806.dist-info/RECORD,,
49
+ nv_ingest_client-2025.8.8.dev20250808.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
50
+ nv_ingest_client-2025.8.8.dev20250808.dist-info/METADATA,sha256=R26r2mvtfpC_WIubq7y8CBV-jt_lnlOGfSg5a4e6LU4,30736
51
+ nv_ingest_client-2025.8.8.dev20250808.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
52
+ nv_ingest_client-2025.8.8.dev20250808.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
53
+ nv_ingest_client-2025.8.8.dev20250808.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
54
+ nv_ingest_client-2025.8.8.dev20250808.dist-info/RECORD,,