nv-ingest-client 2025.8.19.dev20250819__py3-none-any.whl → 2025.8.20.dev20250820__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-client might be problematic. Click here for more details.

@@ -94,8 +94,14 @@ def get_dataset_files(dataset_bytes: BytesIO, shuffle: bool = False) -> list:
94
94
  dataset_bytes.seek(0)
95
95
  dataset = json.load(dataset_bytes)
96
96
  sampled_files = dataset.get("sampled_files", [])
97
- if shuffle:
98
- random.shuffle(sampled_files)
97
+ if shuffle and len(sampled_files) > 1:
98
+ original = list(sampled_files)
99
+ # Create a shuffled copy without mutating the original list
100
+ shuffled = random.sample(sampled_files, k=len(sampled_files))
101
+ # Guard against seeded RNG or accidental identity by forcing a different order
102
+ if shuffled == original:
103
+ shuffled = shuffled[1:] + shuffled[:1]
104
+ return shuffled
99
105
  return sampled_files
100
106
  except json.JSONDecodeError as err:
101
107
  raise ValueError(f"{err}")
@@ -170,10 +170,15 @@ def grab_meta_collection_info(
170
170
  embedding_model: str = None,
171
171
  embedding_dim: int = None,
172
172
  client: MilvusClient = None,
173
+ milvus_uri: str = None,
174
+ username: str = None,
175
+ password: str = None,
173
176
  ):
174
177
  timestamp = timestamp or ""
175
178
  embedding_model = embedding_model or ""
176
179
  embedding_dim = embedding_dim or ""
180
+ if milvus_uri:
181
+ client = MilvusClient(milvus_uri, token=f"{username}:{password}")
177
182
  results = client.query_iterator(
178
183
  collection_name=meta_collection_name,
179
184
  output_fields=[
@@ -771,7 +776,7 @@ def bulk_insert_milvus(
771
776
  """
772
777
  minio_client = Minio(minio_endpoint, access_key=access_key, secret_key=secret_key, secure=False)
773
778
 
774
- connections.connect(uri=milvus_uri, username=username, password=password)
779
+ connections.connect(uri=milvus_uri, token=f"{username}:{password}")
775
780
  t_bulk_start = time.time()
776
781
  task_ids = []
777
782
  uploaded_files = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.8.19.dev20250819
3
+ Version: 2025.8.20.dev20250820
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -30,7 +30,7 @@ nv_ingest_client/primitives/tasks/task_factory.py,sha256=x8FXrhlgRYTxM0rLvsUvM8w
30
30
  nv_ingest_client/primitives/tasks/udf.py,sha256=5e_WJVgocnK-z0EGCEwPO_zG8WJEhuIsOUTjPmr8REY,12833
31
31
  nv_ingest_client/primitives/tasks/vdb_upload.py,sha256=mXOyQJfQfaoN96nntzevd0sKUs60-AHi8lc1jxG3DAw,1765
32
32
  nv_ingest_client/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
- nv_ingest_client/util/dataset.py,sha256=b6if_hM15iUJC4rvSHS0cmGBsSuZ3W-NoKDMTulx4b8,3316
33
+ nv_ingest_client/util/dataset.py,sha256=2yDPs47HNj8AOdOAfJL4XVji0BMRJq_NH8CG4s4xT-Q,3701
34
34
  nv_ingest_client/util/milvus.py,sha256=MwBix_UBg54i7xONBIwjcqeKSBkqunxBJBK2f0bPMoo,61
35
35
  nv_ingest_client/util/process_json_files.py,sha256=YKR-fGT4kM8zO2p8r5tpo5-vvFywkcLuNieozvPWvo0,3785
36
36
  nv_ingest_client/util/processing.py,sha256=bAy8it-OUgGFO3pcy6D3ezpyZ6p2DfmoQUGhx3QmVf8,8989
@@ -42,11 +42,11 @@ nv_ingest_client/util/file_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
42
42
  nv_ingest_client/util/file_processing/extract.py,sha256=uXEATBYZXjxdymGTNQvvzDD2eHgpuq4PdU6HsMl0Lp0,4662
43
43
  nv_ingest_client/util/vdb/__init__.py,sha256=ZmoEzeM9LzwwrVvu_DVUnjRNx-x8ahkNeIrSfSKzbAk,513
44
44
  nv_ingest_client/util/vdb/adt_vdb.py,sha256=UubzAMSfyrqqpD-OQErpBs25hC2Mw8zGZ4waenGXPOk,515
45
- nv_ingest_client/util/vdb/milvus.py,sha256=PC3qXjrdTab2xVS3FZkhj_28T5R9DNaHZ8a7D721Pik,77269
45
+ nv_ingest_client/util/vdb/milvus.py,sha256=5yjn9uZ0fB10RrJml0WdImsfvfcowDtwrPrl_oYnnF0,77436
46
46
  nv_ingest_client/util/vdb/opensearch.py,sha256=I4FzF95VWCOkyzhfm-szdfK1Zd9ugUc8AxxpAdEMWGE,7538
47
- nv_ingest_client-2025.8.19.dev20250819.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
48
- nv_ingest_client-2025.8.19.dev20250819.dist-info/METADATA,sha256=QYd4COuKD4YRil1snCzCeQpUBEuSzpnb5wGVGW__VMk,30737
49
- nv_ingest_client-2025.8.19.dev20250819.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
50
- nv_ingest_client-2025.8.19.dev20250819.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
51
- nv_ingest_client-2025.8.19.dev20250819.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
52
- nv_ingest_client-2025.8.19.dev20250819.dist-info/RECORD,,
47
+ nv_ingest_client-2025.8.20.dev20250820.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
48
+ nv_ingest_client-2025.8.20.dev20250820.dist-info/METADATA,sha256=54Czy3ATSEasGQ0SwUpgxKw1wErWaryTKkHw9-LYpcE,30737
49
+ nv_ingest_client-2025.8.20.dev20250820.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
50
+ nv_ingest_client-2025.8.20.dev20250820.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
51
+ nv_ingest_client-2025.8.20.dev20250820.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
52
+ nv_ingest_client-2025.8.20.dev20250820.dist-info/RECORD,,