nv-ingest-client 2025.8.19.dev20250819__tar.gz → 2025.8.20.dev20250820__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-client might be problematic. Click here for more details.
- {nv_ingest_client-2025.8.19.dev20250819/src/nv_ingest_client.egg-info → nv_ingest_client-2025.8.20.dev20250820}/PKG-INFO +1 -1
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/dataset.py +8 -2
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/vdb/milvus.py +6 -1
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820/src/nv_ingest_client.egg-info}/PKG-INFO +1 -1
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/LICENSE +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/MANIFEST.in +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/README.md +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/pyproject.toml +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/setup.cfg +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/__init__.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/cli/__init__.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/cli/util/__init__.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/cli/util/click.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/cli/util/processing.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/cli/util/system.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/client/__init__.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/client/client.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/client/interface.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/client/util/processing.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/nv_ingest_cli.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/__init__.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/jobs/__init__.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/jobs/job_spec.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/jobs/job_state.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/__init__.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/audio_extraction.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/caption.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/chart_extraction.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/dedup.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/embed.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/extract.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/filter.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/infographic_extraction.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/split.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/store.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/table_extraction.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/task_base.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/task_factory.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/udf.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/primitives/tasks/vdb_upload.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/__init__.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/file_processing/__init__.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/file_processing/extract.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/milvus.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/process_json_files.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/processing.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/system.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/transport.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/util.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/vdb/__init__.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/vdb/adt_vdb.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/vdb/opensearch.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client/util/zipkin.py +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client.egg-info/SOURCES.txt +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client.egg-info/dependency_links.txt +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client.egg-info/entry_points.txt +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client.egg-info/requires.txt +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/nv_ingest_client.egg-info/top_level.txt +0 -0
- {nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/version.py +0 -0
|
@@ -94,8 +94,14 @@ def get_dataset_files(dataset_bytes: BytesIO, shuffle: bool = False) -> list:
|
|
|
94
94
|
dataset_bytes.seek(0)
|
|
95
95
|
dataset = json.load(dataset_bytes)
|
|
96
96
|
sampled_files = dataset.get("sampled_files", [])
|
|
97
|
-
if shuffle:
|
|
98
|
-
|
|
97
|
+
if shuffle and len(sampled_files) > 1:
|
|
98
|
+
original = list(sampled_files)
|
|
99
|
+
# Create a shuffled copy without mutating the original list
|
|
100
|
+
shuffled = random.sample(sampled_files, k=len(sampled_files))
|
|
101
|
+
# Guard against seeded RNG or accidental identity by forcing a different order
|
|
102
|
+
if shuffled == original:
|
|
103
|
+
shuffled = shuffled[1:] + shuffled[:1]
|
|
104
|
+
return shuffled
|
|
99
105
|
return sampled_files
|
|
100
106
|
except json.JSONDecodeError as err:
|
|
101
107
|
raise ValueError(f"{err}")
|
|
@@ -170,10 +170,15 @@ def grab_meta_collection_info(
|
|
|
170
170
|
embedding_model: str = None,
|
|
171
171
|
embedding_dim: int = None,
|
|
172
172
|
client: MilvusClient = None,
|
|
173
|
+
milvus_uri: str = None,
|
|
174
|
+
username: str = None,
|
|
175
|
+
password: str = None,
|
|
173
176
|
):
|
|
174
177
|
timestamp = timestamp or ""
|
|
175
178
|
embedding_model = embedding_model or ""
|
|
176
179
|
embedding_dim = embedding_dim or ""
|
|
180
|
+
if milvus_uri:
|
|
181
|
+
client = MilvusClient(milvus_uri, token=f"{username}:{password}")
|
|
177
182
|
results = client.query_iterator(
|
|
178
183
|
collection_name=meta_collection_name,
|
|
179
184
|
output_fields=[
|
|
@@ -771,7 +776,7 @@ def bulk_insert_milvus(
|
|
|
771
776
|
"""
|
|
772
777
|
minio_client = Minio(minio_endpoint, access_key=access_key, secret_key=secret_key, secure=False)
|
|
773
778
|
|
|
774
|
-
connections.connect(uri=milvus_uri,
|
|
779
|
+
connections.connect(uri=milvus_uri, token=f"{username}:{password}")
|
|
775
780
|
t_bulk_start = time.time()
|
|
776
781
|
task_ids = []
|
|
777
782
|
uploaded_files = []
|
|
File without changes
|
{nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/MANIFEST.in
RENAMED
|
File without changes
|
|
File without changes
|
{nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/pyproject.toml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest_client-2025.8.19.dev20250819 → nv_ingest_client-2025.8.20.dev20250820}/src/version.py
RENAMED
|
File without changes
|