nv-ingest-client 2025.9.18.dev20250918__tar.gz → 2025.9.19.dev20250919__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-client might be problematic. Click here for more details.

Files changed (61) hide show
  1. {nv_ingest_client-2025.9.18.dev20250918/src/nv_ingest_client.egg-info → nv_ingest_client-2025.9.19.dev20250919}/PKG-INFO +1 -1
  2. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/transport.py +10 -4
  3. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/vdb/milvus.py +28 -17
  4. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919/src/nv_ingest_client.egg-info}/PKG-INFO +1 -1
  5. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/LICENSE +0 -0
  6. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/MANIFEST.in +0 -0
  7. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/README.md +0 -0
  8. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/pyproject.toml +0 -0
  9. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/setup.cfg +0 -0
  10. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/__init__.py +0 -0
  11. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/cli/__init__.py +0 -0
  12. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/cli/util/__init__.py +0 -0
  13. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/cli/util/click.py +0 -0
  14. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/cli/util/processing.py +0 -0
  15. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/cli/util/system.py +0 -0
  16. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/client/__init__.py +0 -0
  17. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/client/client.py +0 -0
  18. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/client/interface.py +0 -0
  19. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/client/util/processing.py +0 -0
  20. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/nv_ingest_cli.py +0 -0
  21. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/__init__.py +0 -0
  22. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/jobs/__init__.py +0 -0
  23. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/jobs/job_spec.py +0 -0
  24. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/jobs/job_state.py +0 -0
  25. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/__init__.py +0 -0
  26. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/audio_extraction.py +0 -0
  27. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/caption.py +0 -0
  28. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/chart_extraction.py +0 -0
  29. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/dedup.py +0 -0
  30. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/embed.py +0 -0
  31. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/extract.py +0 -0
  32. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/filter.py +0 -0
  33. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/infographic_extraction.py +0 -0
  34. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/split.py +0 -0
  35. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/store.py +0 -0
  36. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/table_extraction.py +0 -0
  37. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/task_base.py +0 -0
  38. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/task_factory.py +0 -0
  39. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/udf.py +0 -0
  40. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/vdb_upload.py +0 -0
  41. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/__init__.py +0 -0
  42. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/dataset.py +0 -0
  43. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/document_analysis.py +0 -0
  44. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/file_processing/__init__.py +0 -0
  45. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/file_processing/extract.py +0 -0
  46. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/image_disk_utils.py +0 -0
  47. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/milvus.py +0 -0
  48. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/process_json_files.py +0 -0
  49. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/processing.py +0 -0
  50. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/system.py +0 -0
  51. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/util.py +0 -0
  52. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/vdb/__init__.py +0 -0
  53. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/vdb/adt_vdb.py +0 -0
  54. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/vdb/opensearch.py +0 -0
  55. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/zipkin.py +0 -0
  56. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client.egg-info/SOURCES.txt +0 -0
  57. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client.egg-info/dependency_links.txt +0 -0
  58. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client.egg-info/entry_points.txt +0 -0
  59. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client.egg-info/requires.txt +0 -0
  60. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client.egg-info/top_level.txt +0 -0
  61. {nv_ingest_client-2025.9.18.dev20250918 → nv_ingest_client-2025.9.19.dev20250919}/src/version.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.9.18.dev20250918
3
+ Version: 2025.9.19.dev20250919
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -15,12 +15,18 @@ def infer_microservice(
15
15
  truncate: str = "END",
16
16
  batch_size: int = 8191,
17
17
  grpc: bool = False,
18
+ input_names: list = ["text"],
19
+ output_names: list = ["embeddings"],
20
+ dtypes: list = ["BYTES"],
18
21
  ):
19
22
  """
20
23
  This function takes the input data and creates a list of embeddings
21
24
  using the NVIDIA embedding microservice.
22
25
  """
23
- data = {"prompts": [res["metadata"]["content"] for res in data]}
26
+ if isinstance(data[0], str):
27
+ data = {"prompts": data}
28
+ else:
29
+ data = {"prompts": [res["metadata"]["content"] for res in data]}
24
30
  if grpc:
25
31
  model_name = re.sub(r"[^a-zA-Z0-9]", "_", model_name)
26
32
  client = NimClient(
@@ -33,10 +39,10 @@ def infer_microservice(
33
39
  data,
34
40
  model_name,
35
41
  parameters={"input_type": input_type, "truncate": truncate},
36
- outputs=["embeddings"],
37
- dtype=["BYTES"],
38
- input_name=["text"],
42
+ dtypes=dtypes,
43
+ input_names=input_names,
39
44
  batch_size=batch_size,
45
+ output_names=output_names,
40
46
  )
41
47
  else:
42
48
  embedding_endpoint = f"{embedding_endpoint}/embeddings"
@@ -776,17 +776,21 @@ def bulk_insert_milvus(
776
776
  t_bulk_start = time.time()
777
777
  task_ids = []
778
778
 
779
- task_id = utility.do_bulk_insert(
780
- collection_name=collection_name,
781
- files=[file for files in writer.batch_files for file in files],
782
- consistency_level=CONSISTENCY,
779
+ task_ids.append(
780
+ utility.do_bulk_insert(
781
+ collection_name=collection_name,
782
+ files=[file for files in writer.batch_files for file in files],
783
+ consistency_level=CONSISTENCY,
784
+ )
783
785
  )
784
786
 
785
787
  while len(task_ids) > 0:
786
788
  time.sleep(1)
787
- for task_id in task_ids:
789
+ tasks = copy.copy(task_ids)
790
+ for task_id in tasks:
788
791
  task = utility.get_bulk_insert_state(task_id=task_id)
789
792
  state = task.state_name
793
+ logger.info(f"Checking task: {task_id} - imported rows: {task.row_count}")
790
794
  if state == "Completed":
791
795
  logger.info(f"Task: {task_id}")
792
796
  logger.info(f"Start time: {task.create_time_str}")
@@ -884,7 +888,6 @@ def stream_insert_milvus(records, client: MilvusClient, collection_name: str, ba
884
888
  for idx in range(0, len(records), batch_size):
885
889
  client.insert(collection_name=collection_name, data=records[idx : idx + batch_size])
886
890
  count += len(records[idx : idx + batch_size])
887
- client.flush(collection_name)
888
891
  logger.info(f"streamed {count} records")
889
892
 
890
893
 
@@ -896,6 +899,7 @@ def wait_for_index(collection_name: str, num_elements: int, client: MilvusClient
896
899
  bulk inserts are not supported by this function
897
900
  (refer to MilvusClient.refresh_load for bulk inserts).
898
901
  """
902
+ client.flush(collection_name)
899
903
  index_names = utility.list_indexes(collection_name)
900
904
  indexed_rows = 0
901
905
  for index_name in index_names:
@@ -1082,6 +1086,7 @@ def write_to_nvingest_collection(
1082
1086
  )
1083
1087
  # fixes bulk insert lag time https://github.com/milvus-io/milvus/issues/21746
1084
1088
  client.refresh_load(collection_name)
1089
+ logger.info(f"Refresh load response: {client.get_load_state(collection_name)}")
1085
1090
 
1086
1091
 
1087
1092
  def dense_retrieval(
@@ -1110,8 +1115,8 @@ def dense_retrieval(
1110
1115
  Milvus Collection to search against
1111
1116
  client : MilvusClient
1112
1117
  Client connected to mivlus instance.
1113
- dense_model : NVIDIAEmbedding
1114
- Dense model to generate dense embeddings for queries.
1118
+ dense_model : Partial Function
1119
+ Partial function to generate dense embeddings with queries.
1115
1120
  top_k : int
1116
1121
  Number of search results to return per query.
1117
1122
  dense_field : str
@@ -1125,7 +1130,8 @@ def dense_retrieval(
1125
1130
  """
1126
1131
  dense_embeddings = []
1127
1132
  for query in queries:
1128
- dense_embeddings.append(dense_model.get_query_embedding(query))
1133
+ # dense_embeddings.append(dense_model.get_query_embedding(query))
1134
+ dense_embeddings += dense_model([query])
1129
1135
 
1130
1136
  search_params = {}
1131
1137
  if not gpu_search and not local_index:
@@ -1194,7 +1200,7 @@ def hybrid_retrieval(
1194
1200
  dense_embeddings = []
1195
1201
  sparse_embeddings = []
1196
1202
  for query in queries:
1197
- dense_embeddings.append(dense_model.get_query_embedding(query))
1203
+ dense_embeddings += dense_model([query])
1198
1204
  if sparse_model:
1199
1205
  sparse_embeddings.append(_format_sparse_embedding(sparse_model.encode_queries([query])))
1200
1206
  else:
@@ -1330,15 +1336,21 @@ def nvingest_retrieval(
1330
1336
  kwargs.pop("vdb_op", None)
1331
1337
  queries = kwargs.pop("queries", [])
1332
1338
  return vdb_op.retrieval(queries, **kwargs)
1333
- from llama_index.embeddings.nvidia import NVIDIAEmbedding
1334
1339
 
1335
1340
  client_config = ClientConfigSchema()
1336
1341
  nvidia_api_key = client_config.nvidia_api_key
1337
- # required for NVIDIAEmbedding call if the endpoint is Nvidia build api.
1338
1342
  embedding_endpoint = embedding_endpoint if embedding_endpoint else client_config.embedding_nim_endpoint
1339
1343
  model_name = model_name if model_name else client_config.embedding_nim_model_name
1340
1344
  local_index = False
1341
- embed_model = NVIDIAEmbedding(base_url=embedding_endpoint, model=model_name, nvidia_api_key=nvidia_api_key)
1345
+ embed_model = partial(
1346
+ infer_microservice,
1347
+ model_name=model_name,
1348
+ embedding_endpoint=embedding_endpoint,
1349
+ nvidia_api_key=nvidia_api_key,
1350
+ input_type="query",
1351
+ output_names=["embeddings"],
1352
+ grpc=not (urlparse(embedding_endpoint).scheme == "http"),
1353
+ )
1342
1354
  client = client or MilvusClient(milvus_uri, token=f"{username}:{password}")
1343
1355
  final_top_k = top_k
1344
1356
  if nv_ranker:
@@ -1642,7 +1654,7 @@ def embed_index_collection(
1642
1654
  meta_dataframe: Union[str, pd.DataFrame] = None,
1643
1655
  meta_source_field: str = None,
1644
1656
  meta_fields: list[str] = None,
1645
- intput_type: str = "passage",
1657
+ input_type: str = "passage",
1646
1658
  truncate: str = "END",
1647
1659
  client: MilvusClient = None,
1648
1660
  username: str = None,
@@ -1694,7 +1706,6 @@ def embed_index_collection(
1694
1706
  """
1695
1707
  client_config = ClientConfigSchema()
1696
1708
  nvidia_api_key = nvidia_api_key if nvidia_api_key else client_config.nvidia_api_key
1697
- # required for NVIDIAEmbedding call if the endpoint is Nvidia build api.
1698
1709
  embedding_endpoint = embedding_endpoint if embedding_endpoint else client_config.embedding_nim_endpoint
1699
1710
  model_name = model_name if model_name else client_config.embedding_nim_model_name
1700
1711
  # if not scheme we assume we are using grpc
@@ -1738,7 +1749,7 @@ def embed_index_collection(
1738
1749
  model_name,
1739
1750
  embedding_endpoint,
1740
1751
  nvidia_api_key,
1741
- intput_type,
1752
+ input_type,
1742
1753
  truncate,
1743
1754
  batch_size,
1744
1755
  grpc,
@@ -1756,7 +1767,7 @@ def embed_index_collection(
1756
1767
  model_name,
1757
1768
  embedding_endpoint,
1758
1769
  nvidia_api_key,
1759
- intput_type,
1770
+ input_type,
1760
1771
  truncate,
1761
1772
  batch_size,
1762
1773
  grpc,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-client
3
- Version: 2025.9.18.dev20250918
3
+ Version: 2025.9.19.dev20250919
4
4
  Summary: Python client for the nv-ingest service
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License