nv-ingest-client 2025.8.18.dev20250818__py3-none-any.whl → 2025.8.19.dev20250819__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-client might be problematic. Click here for more details.

@@ -84,11 +84,10 @@ def create_nvingest_meta_schema():
84
84
 
85
85
  def create_meta_collection(
86
86
  schema: CollectionSchema,
87
- milvus_uri: str = "http://localhost:19530",
88
87
  collection_name: str = "meta",
89
88
  recreate=False,
89
+ client: MilvusClient = None,
90
90
  ):
91
- client = MilvusClient(milvus_uri)
92
91
  if client.has_collection(collection_name) and not recreate:
93
92
  # already exists, dont erase and recreate
94
93
  return
@@ -106,7 +105,6 @@ def create_meta_collection(
106
105
  def write_meta_collection(
107
106
  collection_name: str,
108
107
  fields: List[str],
109
- milvus_uri: str = "http://localhost:19530",
110
108
  creation_timestamp: str = None,
111
109
  dense_index: str = None,
112
110
  dense_dim: int = None,
@@ -114,6 +112,7 @@ def write_meta_collection(
114
112
  embedding_model: str = None,
115
113
  sparse_model: str = None,
116
114
  meta_collection_name: str = "meta",
115
+ client: MilvusClient = None,
117
116
  ):
118
117
  client_config = ClientConfigSchema()
119
118
  data = {
@@ -132,14 +131,12 @@ def write_meta_collection(
132
131
  },
133
132
  "user_fields": [field.name for field in fields],
134
133
  }
135
- client = MilvusClient(milvus_uri)
136
134
  client.insert(collection_name=meta_collection_name, data=data)
137
135
 
138
136
 
139
137
  def log_new_meta_collection(
140
138
  collection_name: str,
141
139
  fields: List[str],
142
- milvus_uri: str = "http://localhost:19530",
143
140
  creation_timestamp: str = None,
144
141
  dense_index: str = None,
145
142
  dense_dim: int = None,
@@ -148,13 +145,13 @@ def log_new_meta_collection(
148
145
  sparse_model: str = None,
149
146
  meta_collection_name: str = "meta",
150
147
  recreate: bool = False,
148
+ client: MilvusClient = None,
151
149
  ):
152
150
  schema = create_nvingest_meta_schema()
153
- create_meta_collection(schema, milvus_uri, recreate=recreate)
151
+ create_meta_collection(schema, client=client, recreate=recreate)
154
152
  write_meta_collection(
155
153
  collection_name,
156
154
  fields=fields,
157
- milvus_uri=milvus_uri,
158
155
  creation_timestamp=creation_timestamp,
159
156
  dense_index=dense_index,
160
157
  dense_dim=dense_dim,
@@ -162,6 +159,7 @@ def log_new_meta_collection(
162
159
  embedding_model=embedding_model,
163
160
  sparse_model=sparse_model,
164
161
  meta_collection_name=meta_collection_name,
162
+ client=client,
165
163
  )
166
164
 
167
165
 
@@ -171,12 +169,11 @@ def grab_meta_collection_info(
171
169
  timestamp: str = None,
172
170
  embedding_model: str = None,
173
171
  embedding_dim: int = None,
174
- milvus_uri: str = "http://localhost:19530",
172
+ client: MilvusClient = None,
175
173
  ):
176
174
  timestamp = timestamp or ""
177
175
  embedding_model = embedding_model or ""
178
176
  embedding_dim = embedding_dim or ""
179
- client = MilvusClient(milvus_uri)
180
177
  results = client.query_iterator(
181
178
  collection_name=meta_collection_name,
182
179
  output_fields=[
@@ -404,6 +401,8 @@ def create_nvingest_collection(
404
401
  gpu_search: bool = False,
405
402
  dense_dim: int = 2048,
406
403
  recreate_meta: bool = False,
404
+ username: str = None,
405
+ password: str = None,
407
406
  ) -> CollectionSchema:
408
407
  """
409
408
  Creates a milvus collection with an nv-ingest compatible schema under
@@ -413,9 +412,7 @@ def create_nvingest_collection(
413
412
  ----------
414
413
  collection_name : str
415
414
  Name of the collection to be created.
416
- milvus_uri : str,
417
- Milvus address with http(s) preffix and port. Can also be a file path, to activate
418
- milvus-lite.
415
+
419
416
  sparse : bool, optional
420
417
  When set to true, this adds a Sparse index to the IndexParams, usually activated for
421
418
  hybrid search.
@@ -426,6 +423,11 @@ def create_nvingest_collection(
426
423
  If true, creates a GPU_CAGRA index for dense embeddings.
427
424
  dense_dim : int, optional
428
425
  Sets the dimension size for the dense embedding in the milvus schema.
426
+ username : str, optional
427
+ Milvus username.
428
+ password : str, optional
429
+ Milvus password.
430
+
429
431
 
430
432
  Returns
431
433
  -------
@@ -435,7 +437,7 @@ def create_nvingest_collection(
435
437
  """
436
438
  local_index = False
437
439
  if urlparse(milvus_uri).scheme:
438
- connections.connect(uri=milvus_uri)
440
+ connections.connect(uri=milvus_uri, token=f"{username}:{password}")
439
441
  server_version = utility.get_server_version()
440
442
  if "lite" in server_version:
441
443
  gpu_index = False
@@ -444,7 +446,7 @@ def create_nvingest_collection(
444
446
  if milvus_uri.endswith(".db"):
445
447
  local_index = True
446
448
 
447
- client = MilvusClient(milvus_uri)
449
+ client = MilvusClient(milvus_uri, token=f"{username}:{password}")
448
450
  schema = create_nvingest_schema(dense_dim=dense_dim, sparse=sparse, local_index=local_index)
449
451
  index_params = create_nvingest_index_params(
450
452
  sparse=sparse,
@@ -457,11 +459,11 @@ def create_nvingest_collection(
457
459
  log_new_meta_collection(
458
460
  collection_name,
459
461
  fields=schema.fields,
460
- milvus_uri=milvus_uri,
461
462
  dense_index=str(d_idx),
462
463
  dense_dim=dense_dim,
463
464
  sparse_index=str(s_idx),
464
465
  recreate=recreate_meta,
466
+ client=client,
465
467
  )
466
468
  return schema
467
469
 
@@ -744,6 +746,8 @@ def bulk_insert_milvus(
744
746
  access_key: str = "minioadmin",
745
747
  secret_key: str = "minioadmin",
746
748
  bucket_name: str = "nv-ingest",
749
+ username: str = None,
750
+ password: str = None,
747
751
  ):
748
752
  """
749
753
  This function initialize the bulk ingest of all minio uploaded records, and checks for
@@ -760,10 +764,14 @@ def bulk_insert_milvus(
760
764
  milvus_uri : str,
761
765
  Milvus address with http(s) preffix and port. Can also be a file path, to activate
762
766
  milvus-lite.
767
+ username : str, optional
768
+ Milvus username.
769
+ password : str, optional
770
+ Milvus password.
763
771
  """
764
772
  minio_client = Minio(minio_endpoint, access_key=access_key, secret_key=secret_key, secure=False)
765
773
 
766
- connections.connect(uri=milvus_uri)
774
+ connections.connect(uri=milvus_uri, username=username, password=password)
767
775
  t_bulk_start = time.time()
768
776
  task_ids = []
769
777
  uploaded_files = []
@@ -913,6 +921,8 @@ def write_to_nvingest_collection(
913
921
  meta_source_field=None,
914
922
  meta_fields=None,
915
923
  stream: bool = False,
924
+ username: str = None,
925
+ password: str = None,
916
926
  **kwargs,
917
927
  ):
918
928
  """
@@ -953,9 +963,13 @@ def write_to_nvingest_collection(
953
963
  Minio bucket name.
954
964
  stream : bool, optional
955
965
  When true, the records will be inserted into milvus using the stream insert method.
966
+ username : str, optional
967
+ Milvus username.
968
+ password : str, optional
969
+ Milvus password.
956
970
  """
957
971
  local_index = False
958
- connections.connect(uri=milvus_uri)
972
+ connections.connect(uri=milvus_uri, token=f"{username}:{password}")
959
973
  if urlparse(milvus_uri).scheme:
960
974
  server_version = utility.get_server_version()
961
975
  if "lite" in server_version:
@@ -978,7 +992,7 @@ def write_to_nvingest_collection(
978
992
  elif local_index and sparse:
979
993
  bm25_ef = BM25EmbeddingFunction(build_default_analyzer(language="en"))
980
994
  bm25_ef.load(bm25_save_path)
981
- client = MilvusClient(milvus_uri)
995
+ client = MilvusClient(milvus_uri, token=f"{username}:{password}")
982
996
  schema = Collection(collection_name).schema
983
997
  if isinstance(meta_dataframe, str):
984
998
  meta_dataframe = pandas_file_reader(meta_dataframe)
@@ -1038,6 +1052,8 @@ def write_to_nvingest_collection(
1038
1052
  access_key,
1039
1053
  secret_key,
1040
1054
  bucket_name,
1055
+ username=username,
1056
+ password=password,
1041
1057
  )
1042
1058
  # fixes bulk insert lag time https://github.com/milvus-io/milvus/issues/21746
1043
1059
  client.refresh_load(collection_name)
@@ -1222,6 +1238,9 @@ def nvingest_retrieval(
1222
1238
  nv_ranker_max_batch_size: int = 64,
1223
1239
  _filter: str = "",
1224
1240
  ef_param: int = 200,
1241
+ client: MilvusClient = None,
1242
+ username: str = None,
1243
+ password: str = None,
1225
1244
  **kwargs,
1226
1245
  ):
1227
1246
  """
@@ -1268,6 +1287,12 @@ def nvingest_retrieval(
1268
1287
  Max size for the number of candidates to rerank.
1269
1288
  nv_ranker_top_k : int,
1270
1289
  The number of candidates to return after reranking.
1290
+ client : MilvusClient, optional
1291
+ Milvus client instance.
1292
+ username : str, optional
1293
+ Milvus username.
1294
+ password : str, optional
1295
+ Milvus password.
1271
1296
  Returns
1272
1297
  -------
1273
1298
  List
@@ -1289,7 +1314,7 @@ def nvingest_retrieval(
1289
1314
  model_name = model_name if model_name else client_config.embedding_nim_model_name
1290
1315
  local_index = False
1291
1316
  embed_model = NVIDIAEmbedding(base_url=embedding_endpoint, model=model_name, nvidia_api_key=nvidia_api_key)
1292
- client = MilvusClient(milvus_uri)
1317
+ client = client or MilvusClient(milvus_uri, token=f"{username}:{password}")
1293
1318
  final_top_k = top_k
1294
1319
  if nv_ranker:
1295
1320
  top_k = nv_ranker_top_k
@@ -1345,7 +1370,14 @@ def nvingest_retrieval(
1345
1370
  return results
1346
1371
 
1347
1372
 
1348
- def remove_records(source_name: str, collection_name: str, milvus_uri: str = "http://localhost:19530"):
1373
+ def remove_records(
1374
+ source_name: str,
1375
+ collection_name: str,
1376
+ milvus_uri: str = "http://localhost:19530",
1377
+ username: str = None,
1378
+ password: str = None,
1379
+ client: MilvusClient = None,
1380
+ ):
1349
1381
  """
1350
1382
  This function allows a user to remove chunks associated with an ingested file.
1351
1383
  Supply the full path of the file you would like to remove and this function will
@@ -1360,6 +1392,12 @@ def remove_records(source_name: str, collection_name: str, milvus_uri: str = "ht
1360
1392
  milvus_uri : str,
1361
1393
  Milvus address with http(s) preffix and port. Can also be a file path, to activate
1362
1394
  milvus-lite.
1395
+ client : MilvusClient, optional
1396
+ Milvus client instance.
1397
+ username : str, optional
1398
+ Milvus username.
1399
+ password : str, optional
1400
+ Milvus password.
1363
1401
 
1364
1402
  Returns
1365
1403
  -------
@@ -1367,7 +1405,7 @@ def remove_records(source_name: str, collection_name: str, milvus_uri: str = "ht
1367
1405
  Dictionary with one key, `delete_cnt`. The value represents the number of entities
1368
1406
  removed.
1369
1407
  """
1370
- client = MilvusClient(milvus_uri)
1408
+ client = client or MilvusClient(milvus_uri, token=f"{username}:{password}")
1371
1409
  result_ids = client.delete(
1372
1410
  collection_name=collection_name,
1373
1411
  filter=f'(source["source_name"] == "{source_name}")',
@@ -1474,6 +1512,9 @@ def pull_all_milvus(
1474
1512
  write_dir: str = None,
1475
1513
  batch_size: int = 1000,
1476
1514
  include_embeddings: bool = False,
1515
+ username: str = None,
1516
+ password: str = None,
1517
+ client: MilvusClient = None,
1477
1518
  ):
1478
1519
  """
1479
1520
  This function takes the input collection name and pulls all the records
@@ -1492,12 +1533,18 @@ def pull_all_milvus(
1492
1533
  The number of records to pull in each batch. Defaults to 1000.
1493
1534
  include_embeddings : bool, optional
1494
1535
  Whether to include the embeddings in the output. Defaults to False.
1536
+ username : str, optional
1537
+ Milvus username.
1538
+ password : str, optional
1539
+ Milvus password.
1540
+ client : MilvusClient, optional
1541
+ Milvus client instance.
1495
1542
  Returns
1496
1543
  -------
1497
1544
  List
1498
1545
  List of records/files with records from the collection.
1499
1546
  """
1500
- client = MilvusClient(milvus_uri)
1547
+ client = client or MilvusClient(milvus_uri, token=f"{username}:{password}")
1501
1548
  output_fields = ["source", "content_metadata", "text"]
1502
1549
  if include_embeddings:
1503
1550
  output_fields.append("vector")
@@ -1572,6 +1619,9 @@ def embed_index_collection(
1572
1619
  meta_fields: list[str] = None,
1573
1620
  intput_type: str = "passage",
1574
1621
  truncate: str = "END",
1622
+ client: MilvusClient = None,
1623
+ username: str = None,
1624
+ password: str = None,
1575
1625
  **kwargs,
1576
1626
  ):
1577
1627
  """
@@ -1609,6 +1659,12 @@ def embed_index_collection(
1609
1659
  meta_source_field (str, optional): The field in the metadata that serves as the source identifier.
1610
1660
  Defaults to None.
1611
1661
  meta_fields (list[str], optional): A list of metadata fields to include. Defaults to None.
1662
+ client : MilvusClient, optional
1663
+ Milvus client instance.
1664
+ username : str, optional
1665
+ Milvus username.
1666
+ password : str, optional
1667
+ Milvus password.
1612
1668
  **kwargs: Additional keyword arguments for customization.
1613
1669
  """
1614
1670
  client_config = ClientConfigSchema()
@@ -1642,6 +1698,8 @@ def embed_index_collection(
1642
1698
  meta_dataframe=meta_dataframe,
1643
1699
  meta_source_field=meta_source_field,
1644
1700
  meta_fields=meta_fields,
1701
+ username=username,
1702
+ password=password,
1645
1703
  **kwargs,
1646
1704
  )
1647
1705
  # running in parts
@@ -1866,6 +1924,8 @@ class Milvus(VDB):
1866
1924
  meta_fields: list[str] = None,
1867
1925
  stream: bool = False,
1868
1926
  threshold: int = 1000,
1927
+ username: str = None,
1928
+ password: str = None,
1869
1929
  **kwargs,
1870
1930
  ):
1871
1931
  """
@@ -1895,9 +1955,11 @@ class Milvus(VDB):
1895
1955
  meta_source_field (str, optional): The field in the metadata that serves as the source identifier.
1896
1956
  Defaults to None.
1897
1957
  meta_fields (list[str], optional): A list of metadata fields to include. Defaults to None.
1898
- **kwargs: Additional keyword arguments for customization.
1899
1958
  stream (bool, optional): When true, the records will be inserted into milvus using the stream
1900
1959
  insert method.
1960
+ username (str, optional): The username for Milvus authentication. Defaults to None.
1961
+ password (str, optional): The password for Milvus authentication. Defaults to None.
1962
+ **kwargs: Additional keyword arguments for customization.
1901
1963
  """
1902
1964
  kwargs = locals().copy()
1903
1965
  kwargs.pop("self", None)
@@ -1927,6 +1989,8 @@ class Milvus(VDB):
1927
1989
  "gpu_index": self.__dict__.get("gpu_index", True),
1928
1990
  "gpu_search": self.__dict__.get("gpu_search", True),
1929
1991
  "dense_dim": self.__dict__.get("dense_dim", 2048),
1992
+ "username": self.__dict__.get("username", None),
1993
+ "password": self.__dict__.get("password", None),
1930
1994
  }
1931
1995
  return (self.collection_name, conn_dict)
1932
1996
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.8.18.dev20250818
3
+ Version: 2025.8.19.dev20250819
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -42,11 +42,11 @@ nv_ingest_client/util/file_processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
42
42
  nv_ingest_client/util/file_processing/extract.py,sha256=uXEATBYZXjxdymGTNQvvzDD2eHgpuq4PdU6HsMl0Lp0,4662
43
43
  nv_ingest_client/util/vdb/__init__.py,sha256=ZmoEzeM9LzwwrVvu_DVUnjRNx-x8ahkNeIrSfSKzbAk,513
44
44
  nv_ingest_client/util/vdb/adt_vdb.py,sha256=UubzAMSfyrqqpD-OQErpBs25hC2Mw8zGZ4waenGXPOk,515
45
- nv_ingest_client/util/vdb/milvus.py,sha256=SIUiW285lDFUXwJjes_58Y3c4pK51SHFqbn0QEqOmm4,75243
45
+ nv_ingest_client/util/vdb/milvus.py,sha256=PC3qXjrdTab2xVS3FZkhj_28T5R9DNaHZ8a7D721Pik,77269
46
46
  nv_ingest_client/util/vdb/opensearch.py,sha256=I4FzF95VWCOkyzhfm-szdfK1Zd9ugUc8AxxpAdEMWGE,7538
47
- nv_ingest_client-2025.8.18.dev20250818.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
48
- nv_ingest_client-2025.8.18.dev20250818.dist-info/METADATA,sha256=7AoAmMB2B45WV5L1-nHJPTjMQDaO3fExvKWv-5xp6gg,30737
49
- nv_ingest_client-2025.8.18.dev20250818.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
50
- nv_ingest_client-2025.8.18.dev20250818.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
51
- nv_ingest_client-2025.8.18.dev20250818.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
52
- nv_ingest_client-2025.8.18.dev20250818.dist-info/RECORD,,
47
+ nv_ingest_client-2025.8.19.dev20250819.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
48
+ nv_ingest_client-2025.8.19.dev20250819.dist-info/METADATA,sha256=QYd4COuKD4YRil1snCzCeQpUBEuSzpnb5wGVGW__VMk,30737
49
+ nv_ingest_client-2025.8.19.dev20250819.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
50
+ nv_ingest_client-2025.8.19.dev20250819.dist-info/entry_points.txt,sha256=3uQVZkTZIjO08_bjTV-g0CDF5H1nrP1zWXU9gJOweuI,137
51
+ nv_ingest_client-2025.8.19.dev20250819.dist-info/top_level.txt,sha256=1eMhBFD3SiWmpXnod2LM66C1HrSLSk96ninZi5XX-cE,17
52
+ nv_ingest_client-2025.8.19.dev20250819.dist-info/RECORD,,