nv-ingest-api 2025.5.10.dev20250510__py3-none-any.whl → 2025.5.12.dev20250512__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nv-ingest-api might be problematic. Click here for more details.
- nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py +35 -37
- nv_ingest_api/internal/primitives/nim/nim_client.py +17 -9
- {nv_ingest_api-2025.5.10.dev20250510.dist-info → nv_ingest_api-2025.5.12.dev20250512.dist-info}/METADATA +1 -1
- {nv_ingest_api-2025.5.10.dev20250510.dist-info → nv_ingest_api-2025.5.12.dev20250512.dist-info}/RECORD +7 -7
- {nv_ingest_api-2025.5.10.dev20250510.dist-info → nv_ingest_api-2025.5.12.dev20250512.dist-info}/WHEEL +0 -0
- {nv_ingest_api-2025.5.10.dev20250510.dist-info → nv_ingest_api-2025.5.12.dev20250512.dist-info}/licenses/LICENSE +0 -0
- {nv_ingest_api-2025.5.10.dev20250510.dist-info → nv_ingest_api-2025.5.12.dev20250512.dist-info}/top_level.txt +0 -0
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
from typing import Any, Dict, List, Optional, Tuple
|
|
6
6
|
|
|
7
7
|
from nv_ingest_api.internal.primitives.nim import ModelInterface
|
|
8
|
+
import numpy as np
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
# Assume ModelInterface is defined elsewhere in the project.
|
|
@@ -22,20 +23,13 @@ class EmbeddingModelInterface(ModelInterface):
|
|
|
22
23
|
|
|
23
24
|
def prepare_data_for_inference(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
24
25
|
"""
|
|
25
|
-
Prepare input data for embedding inference.
|
|
26
|
-
and that its value is a list.
|
|
27
|
-
|
|
28
|
-
Raises
|
|
29
|
-
------
|
|
30
|
-
KeyError
|
|
31
|
-
If the 'prompts' key is missing.
|
|
26
|
+
Prepare input data for embedding inference. Returns a list of strings representing the text to be embedded.
|
|
32
27
|
"""
|
|
33
28
|
if "prompts" not in data:
|
|
34
29
|
raise KeyError("Input data must include 'prompts'.")
|
|
35
|
-
# Ensure the prompts are in list format.
|
|
36
30
|
if not isinstance(data["prompts"], list):
|
|
37
31
|
data["prompts"] = [data["prompts"]]
|
|
38
|
-
return data
|
|
32
|
+
return {"prompts": data["prompts"]}
|
|
39
33
|
|
|
40
34
|
def format_input(
|
|
41
35
|
self, data: Dict[str, Any], protocol: str, max_batch_size: int, **kwargs
|
|
@@ -63,29 +57,32 @@ class EmbeddingModelInterface(ModelInterface):
|
|
|
63
57
|
- payloads is a list of JSON-serializable payload dictionaries.
|
|
64
58
|
- batch_data_list is a list of dictionaries containing the key "prompts" corresponding to each batch.
|
|
65
59
|
"""
|
|
66
|
-
if protocol != "http":
|
|
67
|
-
raise ValueError("EmbeddingModelInterface only supports HTTP protocol.")
|
|
68
|
-
|
|
69
|
-
prompts = data.get("prompts", [])
|
|
70
60
|
|
|
71
61
|
def chunk_list(lst, chunk_size):
|
|
62
|
+
lst = lst["prompts"]
|
|
72
63
|
return [lst[i : i + chunk_size] for i in range(0, len(lst), chunk_size)]
|
|
73
64
|
|
|
74
|
-
batches = chunk_list(
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
"input_type": kwargs.get("input_type", "
|
|
65
|
+
batches = chunk_list(data, max_batch_size)
|
|
66
|
+
if protocol == "http":
|
|
67
|
+
payloads = []
|
|
68
|
+
batch_data_list = []
|
|
69
|
+
for batch in batches:
|
|
70
|
+
payload = {
|
|
71
|
+
"model": kwargs.get("model_name"),
|
|
72
|
+
"input": batch,
|
|
73
|
+
"encoding_format": kwargs.get("encoding_format", "float"),
|
|
74
|
+
"input_type": kwargs.get("input_type", "passage"),
|
|
84
75
|
"truncate": kwargs.get("truncate", "NONE"),
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
76
|
+
}
|
|
77
|
+
payloads.append(payload)
|
|
78
|
+
batch_data_list.append({"prompts": batch})
|
|
79
|
+
elif protocol == "grpc":
|
|
80
|
+
payloads = []
|
|
81
|
+
batch_data_list = []
|
|
82
|
+
for batch in batches:
|
|
83
|
+
text_np = np.array([[text.encode("utf-8")] for text in batch], dtype=np.object_)
|
|
84
|
+
payloads.append(text_np)
|
|
85
|
+
batch_data_list.append({"prompts": batch})
|
|
89
86
|
return payloads, batch_data_list
|
|
90
87
|
|
|
91
88
|
def parse_output(self, response: Any, protocol: str, data: Optional[Dict[str, Any]] = None, **kwargs) -> Any:
|
|
@@ -108,16 +105,17 @@ class EmbeddingModelInterface(ModelInterface):
|
|
|
108
105
|
list
|
|
109
106
|
A list of generated embeddings extracted from the response.
|
|
110
107
|
"""
|
|
111
|
-
if protocol
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
108
|
+
if protocol == "http":
|
|
109
|
+
if isinstance(response, dict):
|
|
110
|
+
embeddings = response.get("data")
|
|
111
|
+
if not embeddings:
|
|
112
|
+
raise RuntimeError("Unexpected response format: 'data' key is missing or empty.")
|
|
113
|
+
# Each item in embeddings is expected to have an 'embedding' field.
|
|
114
|
+
return [item.get("embedding", None) for item in embeddings]
|
|
115
|
+
else:
|
|
116
|
+
return [str(response)]
|
|
117
|
+
elif protocol == "grpc":
|
|
118
|
+
return [res.flatten() for res in response]
|
|
121
119
|
|
|
122
120
|
def process_inference_results(self, output: Any, protocol: str, **kwargs) -> Any:
|
|
123
121
|
"""
|
|
@@ -129,7 +129,7 @@ class NimClient:
|
|
|
129
129
|
"""
|
|
130
130
|
if self.protocol == "grpc":
|
|
131
131
|
logger.debug("Performing gRPC inference for a batch...")
|
|
132
|
-
response = self._grpc_infer(batch_input, model_name)
|
|
132
|
+
response = self._grpc_infer(batch_input, model_name, **kwargs)
|
|
133
133
|
logger.debug("gRPC inference received response for a batch")
|
|
134
134
|
elif self.protocol == "http":
|
|
135
135
|
logger.debug("Performing HTTP inference for a batch...")
|
|
@@ -221,7 +221,7 @@ class NimClient:
|
|
|
221
221
|
|
|
222
222
|
return all_results
|
|
223
223
|
|
|
224
|
-
def _grpc_infer(self, formatted_input: np.ndarray, model_name: str) -> np.ndarray:
|
|
224
|
+
def _grpc_infer(self, formatted_input: np.ndarray, model_name: str, **kwargs) -> np.ndarray:
|
|
225
225
|
"""
|
|
226
226
|
Perform inference using the gRPC protocol.
|
|
227
227
|
|
|
@@ -238,16 +238,24 @@ class NimClient:
|
|
|
238
238
|
The output of the model as a numpy array.
|
|
239
239
|
"""
|
|
240
240
|
|
|
241
|
-
|
|
242
|
-
|
|
241
|
+
parameters = kwargs.get("parameters", {})
|
|
242
|
+
output_names = kwargs.get("outputs", ["output"])
|
|
243
|
+
dtype = kwargs.get("dtype", "FP32")
|
|
244
|
+
input_name = kwargs.get("input_name", "input")
|
|
243
245
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
logger.debug(f"gRPC inference response: {response}")
|
|
246
|
+
input_tensors = grpcclient.InferInput(input_name, formatted_input.shape, datatype=dtype)
|
|
247
|
+
input_tensors.set_data_from_numpy(formatted_input)
|
|
247
248
|
|
|
249
|
+
outputs = [grpcclient.InferRequestedOutput(output_name) for output_name in output_names]
|
|
250
|
+
response = self.client.infer(
|
|
251
|
+
model_name=model_name, parameters=parameters, inputs=[input_tensors], outputs=outputs
|
|
252
|
+
)
|
|
253
|
+
logger.debug(f"gRPC inference response: {response}")
|
|
248
254
|
# TODO(self.client.has_error(response)) => raise error
|
|
249
|
-
|
|
250
|
-
|
|
255
|
+
if len(outputs) == 1:
|
|
256
|
+
return response.as_numpy(outputs[0].name())
|
|
257
|
+
else:
|
|
258
|
+
return [response.as_numpy(output.name()) for output in outputs]
|
|
251
259
|
|
|
252
260
|
def _http_infer(self, formatted_input: dict) -> dict:
|
|
253
261
|
"""
|
|
@@ -46,7 +46,7 @@ nv_ingest_api/internal/primitives/control_message_task.py,sha256=nWVB3QsP6p8BKwH
|
|
|
46
46
|
nv_ingest_api/internal/primitives/ingest_control_message.py,sha256=rvipBiiUaHuRhupFCFDCG8rv0PylSJibCiJ7rDeb98A,8514
|
|
47
47
|
nv_ingest_api/internal/primitives/nim/__init__.py,sha256=i_i_fBR2EcRCh2Y19DF6GM3s_Q0VPgo_thPnhEIJUyg,266
|
|
48
48
|
nv_ingest_api/internal/primitives/nim/default_values.py,sha256=W92XjfyeC6uuVxut6J7p00x1kpNsnXIDb97gSVytZJk,380
|
|
49
|
-
nv_ingest_api/internal/primitives/nim/nim_client.py,sha256=
|
|
49
|
+
nv_ingest_api/internal/primitives/nim/nim_client.py,sha256=lEP-PBp921--pxQzeVxxafR2BhONpli2Ad8oa0XLR4Y,14920
|
|
50
50
|
nv_ingest_api/internal/primitives/nim/nim_model_interface.py,sha256=wMEgoi79YQn_4338MVemkeZgM1J-vnz0aZWpvqDhib4,2392
|
|
51
51
|
nv_ingest_api/internal/primitives/nim/model_interface/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
|
|
52
52
|
nv_ingest_api/internal/primitives/nim/model_interface/cached.py,sha256=b1HX-PY1ExW5V6pXC1ZiHdobeG_BmbPr3rBbVJef13s,11003
|
|
@@ -56,7 +56,7 @@ nv_ingest_api/internal/primitives/nim/model_interface/helpers.py,sha256=x35a9AyT
|
|
|
56
56
|
nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py,sha256=MFWPqMTXs_MZG3ripRR21o7f_mVeoE46Q10yvJ8KNr0,7023
|
|
57
57
|
nv_ingest_api/internal/primitives/nim/model_interface/paddle.py,sha256=rSUPwl5XOrqneoS6aKhatVjrNBg_LhP3nwUWS_aTwz0,17950
|
|
58
58
|
nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py,sha256=OYg4AGki_wm--Np9VlSm0eZC-r54GbDOISbe9v0B9fw,12967
|
|
59
|
-
nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py,sha256=
|
|
59
|
+
nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py,sha256=8ld_if6N3pe3W7NA8Xwm-ndCq53s_v3LmmoyQHnxxEo,5071
|
|
60
60
|
nv_ingest_api/internal/primitives/nim/model_interface/vlm.py,sha256=qJ382PU1ZrIM-SR3cqIhtY_W2rmHec2HIa2aUB2SvaU,6031
|
|
61
61
|
nv_ingest_api/internal/primitives/nim/model_interface/yolox.py,sha256=exN0pKTBXd3pb5kKP96jinTYisgz1Y7EyWmWUuDNnCY,49312
|
|
62
62
|
nv_ingest_api/internal/primitives/tracing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -145,8 +145,8 @@ nv_ingest_api/util/service_clients/redis/redis_client.py,sha256=Xa9eeI3kfDBDlLsG
|
|
|
145
145
|
nv_ingest_api/util/service_clients/rest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
146
146
|
nv_ingest_api/util/service_clients/rest/rest_client.py,sha256=K8hzIV4EcV-97G0SboY6LHMhWLx87l9wCI2CdWw9W_E,21734
|
|
147
147
|
nv_ingest_api/util/string_processing/__init__.py,sha256=mkwHthyS-IILcLcL1tJYeF6mpqX3pxEw5aUzDGjTSeU,1411
|
|
148
|
-
nv_ingest_api-2025.5.
|
|
149
|
-
nv_ingest_api-2025.5.
|
|
150
|
-
nv_ingest_api-2025.5.
|
|
151
|
-
nv_ingest_api-2025.5.
|
|
152
|
-
nv_ingest_api-2025.5.
|
|
148
|
+
nv_ingest_api-2025.5.12.dev20250512.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
149
|
+
nv_ingest_api-2025.5.12.dev20250512.dist-info/METADATA,sha256=rdw7szDF5gzTKuYwhI7F3ZdLRpw8RBXo7WuE7p1j6rc,13889
|
|
150
|
+
nv_ingest_api-2025.5.12.dev20250512.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
|
|
151
|
+
nv_ingest_api-2025.5.12.dev20250512.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
|
|
152
|
+
nv_ingest_api-2025.5.12.dev20250512.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|