nv-ingest-client 2025.9.17.dev20250917__tar.gz → 2025.9.19.dev20250919__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-client might be problematic. Click here for more details.
- {nv_ingest_client-2025.9.17.dev20250917/src/nv_ingest_client.egg-info → nv_ingest_client-2025.9.19.dev20250919}/PKG-INFO +1 -1
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/transport.py +10 -4
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/vdb/milvus.py +28 -17
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919/src/nv_ingest_client.egg-info}/PKG-INFO +1 -1
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/LICENSE +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/MANIFEST.in +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/README.md +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/pyproject.toml +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/setup.cfg +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/__init__.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/cli/__init__.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/cli/util/__init__.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/cli/util/click.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/cli/util/processing.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/cli/util/system.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/client/__init__.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/client/client.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/client/interface.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/client/util/processing.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/nv_ingest_cli.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/__init__.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/jobs/__init__.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/jobs/job_spec.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/jobs/job_state.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/__init__.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/audio_extraction.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/caption.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/chart_extraction.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/dedup.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/embed.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/extract.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/filter.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/infographic_extraction.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/split.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/store.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/table_extraction.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/task_base.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/task_factory.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/udf.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/primitives/tasks/vdb_upload.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/__init__.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/dataset.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/document_analysis.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/file_processing/__init__.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/file_processing/extract.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/image_disk_utils.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/milvus.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/process_json_files.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/processing.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/system.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/util.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/vdb/__init__.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/vdb/adt_vdb.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/vdb/opensearch.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client/util/zipkin.py +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client.egg-info/SOURCES.txt +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client.egg-info/dependency_links.txt +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client.egg-info/entry_points.txt +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client.egg-info/requires.txt +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/nv_ingest_client.egg-info/top_level.txt +0 -0
- {nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/version.py +0 -0
|
@@ -15,12 +15,18 @@ def infer_microservice(
|
|
|
15
15
|
truncate: str = "END",
|
|
16
16
|
batch_size: int = 8191,
|
|
17
17
|
grpc: bool = False,
|
|
18
|
+
input_names: list = ["text"],
|
|
19
|
+
output_names: list = ["embeddings"],
|
|
20
|
+
dtypes: list = ["BYTES"],
|
|
18
21
|
):
|
|
19
22
|
"""
|
|
20
23
|
This function takes the input data and creates a list of embeddings
|
|
21
24
|
using the NVIDIA embedding microservice.
|
|
22
25
|
"""
|
|
23
|
-
data
|
|
26
|
+
if isinstance(data[0], str):
|
|
27
|
+
data = {"prompts": data}
|
|
28
|
+
else:
|
|
29
|
+
data = {"prompts": [res["metadata"]["content"] for res in data]}
|
|
24
30
|
if grpc:
|
|
25
31
|
model_name = re.sub(r"[^a-zA-Z0-9]", "_", model_name)
|
|
26
32
|
client = NimClient(
|
|
@@ -33,10 +39,10 @@ def infer_microservice(
|
|
|
33
39
|
data,
|
|
34
40
|
model_name,
|
|
35
41
|
parameters={"input_type": input_type, "truncate": truncate},
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
input_name=["text"],
|
|
42
|
+
dtypes=dtypes,
|
|
43
|
+
input_names=input_names,
|
|
39
44
|
batch_size=batch_size,
|
|
45
|
+
output_names=output_names,
|
|
40
46
|
)
|
|
41
47
|
else:
|
|
42
48
|
embedding_endpoint = f"{embedding_endpoint}/embeddings"
|
|
@@ -776,17 +776,21 @@ def bulk_insert_milvus(
|
|
|
776
776
|
t_bulk_start = time.time()
|
|
777
777
|
task_ids = []
|
|
778
778
|
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
779
|
+
task_ids.append(
|
|
780
|
+
utility.do_bulk_insert(
|
|
781
|
+
collection_name=collection_name,
|
|
782
|
+
files=[file for files in writer.batch_files for file in files],
|
|
783
|
+
consistency_level=CONSISTENCY,
|
|
784
|
+
)
|
|
783
785
|
)
|
|
784
786
|
|
|
785
787
|
while len(task_ids) > 0:
|
|
786
788
|
time.sleep(1)
|
|
787
|
-
|
|
789
|
+
tasks = copy.copy(task_ids)
|
|
790
|
+
for task_id in tasks:
|
|
788
791
|
task = utility.get_bulk_insert_state(task_id=task_id)
|
|
789
792
|
state = task.state_name
|
|
793
|
+
logger.info(f"Checking task: {task_id} - imported rows: {task.row_count}")
|
|
790
794
|
if state == "Completed":
|
|
791
795
|
logger.info(f"Task: {task_id}")
|
|
792
796
|
logger.info(f"Start time: {task.create_time_str}")
|
|
@@ -884,7 +888,6 @@ def stream_insert_milvus(records, client: MilvusClient, collection_name: str, ba
|
|
|
884
888
|
for idx in range(0, len(records), batch_size):
|
|
885
889
|
client.insert(collection_name=collection_name, data=records[idx : idx + batch_size])
|
|
886
890
|
count += len(records[idx : idx + batch_size])
|
|
887
|
-
client.flush(collection_name)
|
|
888
891
|
logger.info(f"streamed {count} records")
|
|
889
892
|
|
|
890
893
|
|
|
@@ -896,6 +899,7 @@ def wait_for_index(collection_name: str, num_elements: int, client: MilvusClient
|
|
|
896
899
|
bulk inserts are not supported by this function
|
|
897
900
|
(refer to MilvusClient.refresh_load for bulk inserts).
|
|
898
901
|
"""
|
|
902
|
+
client.flush(collection_name)
|
|
899
903
|
index_names = utility.list_indexes(collection_name)
|
|
900
904
|
indexed_rows = 0
|
|
901
905
|
for index_name in index_names:
|
|
@@ -1082,6 +1086,7 @@ def write_to_nvingest_collection(
|
|
|
1082
1086
|
)
|
|
1083
1087
|
# fixes bulk insert lag time https://github.com/milvus-io/milvus/issues/21746
|
|
1084
1088
|
client.refresh_load(collection_name)
|
|
1089
|
+
logger.info(f"Refresh load response: {client.get_load_state(collection_name)}")
|
|
1085
1090
|
|
|
1086
1091
|
|
|
1087
1092
|
def dense_retrieval(
|
|
@@ -1110,8 +1115,8 @@ def dense_retrieval(
|
|
|
1110
1115
|
Milvus Collection to search against
|
|
1111
1116
|
client : MilvusClient
|
|
1112
1117
|
Client connected to mivlus instance.
|
|
1113
|
-
dense_model :
|
|
1114
|
-
|
|
1118
|
+
dense_model : Partial Function
|
|
1119
|
+
Partial function to generate dense embeddings with queries.
|
|
1115
1120
|
top_k : int
|
|
1116
1121
|
Number of search results to return per query.
|
|
1117
1122
|
dense_field : str
|
|
@@ -1125,7 +1130,8 @@ def dense_retrieval(
|
|
|
1125
1130
|
"""
|
|
1126
1131
|
dense_embeddings = []
|
|
1127
1132
|
for query in queries:
|
|
1128
|
-
dense_embeddings.append(dense_model.get_query_embedding(query))
|
|
1133
|
+
# dense_embeddings.append(dense_model.get_query_embedding(query))
|
|
1134
|
+
dense_embeddings += dense_model([query])
|
|
1129
1135
|
|
|
1130
1136
|
search_params = {}
|
|
1131
1137
|
if not gpu_search and not local_index:
|
|
@@ -1194,7 +1200,7 @@ def hybrid_retrieval(
|
|
|
1194
1200
|
dense_embeddings = []
|
|
1195
1201
|
sparse_embeddings = []
|
|
1196
1202
|
for query in queries:
|
|
1197
|
-
dense_embeddings
|
|
1203
|
+
dense_embeddings += dense_model([query])
|
|
1198
1204
|
if sparse_model:
|
|
1199
1205
|
sparse_embeddings.append(_format_sparse_embedding(sparse_model.encode_queries([query])))
|
|
1200
1206
|
else:
|
|
@@ -1330,15 +1336,21 @@ def nvingest_retrieval(
|
|
|
1330
1336
|
kwargs.pop("vdb_op", None)
|
|
1331
1337
|
queries = kwargs.pop("queries", [])
|
|
1332
1338
|
return vdb_op.retrieval(queries, **kwargs)
|
|
1333
|
-
from llama_index.embeddings.nvidia import NVIDIAEmbedding
|
|
1334
1339
|
|
|
1335
1340
|
client_config = ClientConfigSchema()
|
|
1336
1341
|
nvidia_api_key = client_config.nvidia_api_key
|
|
1337
|
-
# required for NVIDIAEmbedding call if the endpoint is Nvidia build api.
|
|
1338
1342
|
embedding_endpoint = embedding_endpoint if embedding_endpoint else client_config.embedding_nim_endpoint
|
|
1339
1343
|
model_name = model_name if model_name else client_config.embedding_nim_model_name
|
|
1340
1344
|
local_index = False
|
|
1341
|
-
embed_model =
|
|
1345
|
+
embed_model = partial(
|
|
1346
|
+
infer_microservice,
|
|
1347
|
+
model_name=model_name,
|
|
1348
|
+
embedding_endpoint=embedding_endpoint,
|
|
1349
|
+
nvidia_api_key=nvidia_api_key,
|
|
1350
|
+
input_type="query",
|
|
1351
|
+
output_names=["embeddings"],
|
|
1352
|
+
grpc=not (urlparse(embedding_endpoint).scheme == "http"),
|
|
1353
|
+
)
|
|
1342
1354
|
client = client or MilvusClient(milvus_uri, token=f"{username}:{password}")
|
|
1343
1355
|
final_top_k = top_k
|
|
1344
1356
|
if nv_ranker:
|
|
@@ -1642,7 +1654,7 @@ def embed_index_collection(
|
|
|
1642
1654
|
meta_dataframe: Union[str, pd.DataFrame] = None,
|
|
1643
1655
|
meta_source_field: str = None,
|
|
1644
1656
|
meta_fields: list[str] = None,
|
|
1645
|
-
|
|
1657
|
+
input_type: str = "passage",
|
|
1646
1658
|
truncate: str = "END",
|
|
1647
1659
|
client: MilvusClient = None,
|
|
1648
1660
|
username: str = None,
|
|
@@ -1694,7 +1706,6 @@ def embed_index_collection(
|
|
|
1694
1706
|
"""
|
|
1695
1707
|
client_config = ClientConfigSchema()
|
|
1696
1708
|
nvidia_api_key = nvidia_api_key if nvidia_api_key else client_config.nvidia_api_key
|
|
1697
|
-
# required for NVIDIAEmbedding call if the endpoint is Nvidia build api.
|
|
1698
1709
|
embedding_endpoint = embedding_endpoint if embedding_endpoint else client_config.embedding_nim_endpoint
|
|
1699
1710
|
model_name = model_name if model_name else client_config.embedding_nim_model_name
|
|
1700
1711
|
# if not scheme we assume we are using grpc
|
|
@@ -1738,7 +1749,7 @@ def embed_index_collection(
|
|
|
1738
1749
|
model_name,
|
|
1739
1750
|
embedding_endpoint,
|
|
1740
1751
|
nvidia_api_key,
|
|
1741
|
-
|
|
1752
|
+
input_type,
|
|
1742
1753
|
truncate,
|
|
1743
1754
|
batch_size,
|
|
1744
1755
|
grpc,
|
|
@@ -1756,7 +1767,7 @@ def embed_index_collection(
|
|
|
1756
1767
|
model_name,
|
|
1757
1768
|
embedding_endpoint,
|
|
1758
1769
|
nvidia_api_key,
|
|
1759
|
-
|
|
1770
|
+
input_type,
|
|
1760
1771
|
truncate,
|
|
1761
1772
|
batch_size,
|
|
1762
1773
|
grpc,
|
|
File without changes
|
{nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/MANIFEST.in
RENAMED
|
File without changes
|
|
File without changes
|
{nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/pyproject.toml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nv_ingest_client-2025.9.17.dev20250917 → nv_ingest_client-2025.9.19.dev20250919}/src/version.py
RENAMED
|
File without changes
|