nv-ingest-client 2025.8.6.dev20250806__tar.gz → 2025.8.8.dev20250808__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-client might be problematic. Click here for more details.
- {nv_ingest_client-2025.8.6.dev20250806/src/nv_ingest_client.egg-info → nv_ingest_client-2025.8.8.dev20250808}/PKG-INFO +1 -1
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/client/interface.py +30 -10
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/vdb/milvus.py +7 -6
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808/src/nv_ingest_client.egg-info}/PKG-INFO +1 -1
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/LICENSE +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/MANIFEST.in +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/README.md +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/pyproject.toml +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/setup.cfg +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/__init__.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/cli/__init__.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/cli/util/__init__.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/cli/util/click.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/cli/util/processing.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/cli/util/system.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/cli/util/tasks.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/client/__init__.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/client/client.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/client/util/processing.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/nv_ingest_cli.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/__init__.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/exceptions.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/jobs/__init__.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/jobs/job_spec.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/jobs/job_state.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/__init__.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/audio_extraction.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/caption.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/chart_extraction.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/dedup.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/embed.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/extract.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/filter.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/infographic_extraction.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/split.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/store.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/table_extraction.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/task_base.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/task_factory.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/transform.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/primitives/tasks/vdb_upload.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/__init__.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/dataset.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/file_processing/__init__.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/file_processing/extract.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/milvus.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/process_json_files.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/processing.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/system.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/transport.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/util.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/vdb/__init__.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/vdb/adt_vdb.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/vdb/opensearch.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client/util/zipkin.py +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client.egg-info/SOURCES.txt +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client.egg-info/dependency_links.txt +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client.egg-info/entry_points.txt +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client.egg-info/requires.txt +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/nv_ingest_client.egg-info/top_level.txt +0 -0
- {nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/version.py +0 -0
|
@@ -535,10 +535,6 @@ class Ingestor:
|
|
|
535
535
|
|
|
536
536
|
proc_kwargs = filter_function_kwargs(self._client.process_jobs_concurrently, **kwargs)
|
|
537
537
|
|
|
538
|
-
_return_failures = return_failures
|
|
539
|
-
if self._vdb_bulk_upload:
|
|
540
|
-
return_failures = True
|
|
541
|
-
|
|
542
538
|
results, failures = self._client.process_jobs_concurrently(
|
|
543
539
|
job_indices=self._job_ids,
|
|
544
540
|
job_queue_id=self._job_queue_id,
|
|
@@ -567,15 +563,39 @@ class Ingestor:
|
|
|
567
563
|
|
|
568
564
|
if self._vdb_bulk_upload:
|
|
569
565
|
if len(failures) > 0:
|
|
570
|
-
|
|
566
|
+
# Calculate success metrics
|
|
567
|
+
total_jobs = len(results) + len(failures)
|
|
568
|
+
successful_jobs = len(results)
|
|
569
|
+
|
|
570
|
+
if return_failures:
|
|
571
|
+
# Emit message about partial success
|
|
572
|
+
logger.warning(
|
|
573
|
+
f"Job was not completely successful. "
|
|
574
|
+
f"{successful_jobs} out of {total_jobs} records completed successfully. "
|
|
575
|
+
f"Uploading successful results to vector database."
|
|
576
|
+
)
|
|
577
|
+
|
|
578
|
+
# Upload only the successful results
|
|
579
|
+
if successful_jobs > 0:
|
|
580
|
+
self._vdb_bulk_upload.run(results)
|
|
571
581
|
|
|
572
|
-
|
|
582
|
+
if self._purge_results_after_vdb_upload:
|
|
583
|
+
logger.info("Purging saved results from disk after successful VDB upload.")
|
|
584
|
+
self._purge_saved_results(results)
|
|
573
585
|
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
586
|
+
else:
|
|
587
|
+
# Original behavior: raise RuntimeError
|
|
588
|
+
raise RuntimeError(
|
|
589
|
+
"Failed to ingest documents, unable to complete vdb bulk upload due to "
|
|
590
|
+
f"no successful results. {len(failures)} out of {total_jobs} records failed "
|
|
591
|
+
)
|
|
592
|
+
else:
|
|
593
|
+
# No failures - proceed with normal upload
|
|
594
|
+
self._vdb_bulk_upload.run(results)
|
|
577
595
|
|
|
578
|
-
|
|
596
|
+
if self._purge_results_after_vdb_upload:
|
|
597
|
+
logger.info("Purging saved results from disk after successful VDB upload.")
|
|
598
|
+
self._purge_saved_results(results)
|
|
579
599
|
|
|
580
600
|
return (results, failures) if return_failures else results
|
|
581
601
|
|
|
@@ -729,7 +729,7 @@ def write_records_minio(records, writer: RemoteBulkWriter) -> RemoteBulkWriter:
|
|
|
729
729
|
for element in records:
|
|
730
730
|
writer.append_row(element)
|
|
731
731
|
writer.commit()
|
|
732
|
-
|
|
732
|
+
logger.debug(f"Wrote data to: {writer.batch_files}")
|
|
733
733
|
return writer
|
|
734
734
|
|
|
735
735
|
|
|
@@ -757,9 +757,10 @@ def bulk_insert_milvus(
|
|
|
757
757
|
|
|
758
758
|
connections.connect(uri=milvus_uri)
|
|
759
759
|
t_bulk_start = time.time()
|
|
760
|
+
files_to_upload = [_file for file_set in writer.batch_files for _file in file_set]
|
|
760
761
|
task_id = utility.do_bulk_insert(
|
|
761
762
|
collection_name=collection_name,
|
|
762
|
-
files=
|
|
763
|
+
files=files_to_upload,
|
|
763
764
|
consistency_level=CONSISTENCY,
|
|
764
765
|
)
|
|
765
766
|
# list_bulk_insert_tasks = utility.list_bulk_insert_tasks(collection_name=collection_name)
|
|
@@ -769,11 +770,11 @@ def bulk_insert_milvus(
|
|
|
769
770
|
state = task.state_name
|
|
770
771
|
if state == "Completed":
|
|
771
772
|
t_bulk_end = time.time()
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
773
|
+
logger.info("Start time:", task.create_time_str)
|
|
774
|
+
logger.info("Imported row count:", task.row_count)
|
|
775
|
+
logger.info(f"Bulk {collection_name} upload took {t_bulk_end - t_bulk_start} s")
|
|
775
776
|
if task.state == BulkInsertState.ImportFailed:
|
|
776
|
-
|
|
777
|
+
logger.error("Failed reason:", task.failed_reason)
|
|
777
778
|
time.sleep(1)
|
|
778
779
|
|
|
779
780
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/pyproject.toml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest_client-2025.8.6.dev20250806 → nv_ingest_client-2025.8.8.dev20250808}/src/version.py
RENAMED
|
File without changes
|