nv-ingest-api 2025.5.10.dev20250510__py3-none-any.whl → 2025.5.12.dev20250512__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nv-ingest-api might be problematic. Click here for more details.

@@ -5,6 +5,7 @@
5
5
  from typing import Any, Dict, List, Optional, Tuple
6
6
 
7
7
  from nv_ingest_api.internal.primitives.nim import ModelInterface
8
+ import numpy as np
8
9
 
9
10
 
10
11
  # Assume ModelInterface is defined elsewhere in the project.
@@ -22,20 +23,13 @@ class EmbeddingModelInterface(ModelInterface):
22
23
 
23
24
  def prepare_data_for_inference(self, data: Dict[str, Any]) -> Dict[str, Any]:
24
25
  """
25
- Prepare input data for embedding inference. Ensures that a 'prompts' key is provided
26
- and that its value is a list.
27
-
28
- Raises
29
- ------
30
- KeyError
31
- If the 'prompts' key is missing.
26
+ Prepare input data for embedding inference. Returns a list of strings representing the text to be embedded.
32
27
  """
33
28
  if "prompts" not in data:
34
29
  raise KeyError("Input data must include 'prompts'.")
35
- # Ensure the prompts are in list format.
36
30
  if not isinstance(data["prompts"], list):
37
31
  data["prompts"] = [data["prompts"]]
38
- return data
32
+ return {"prompts": data["prompts"]}
39
33
 
40
34
  def format_input(
41
35
  self, data: Dict[str, Any], protocol: str, max_batch_size: int, **kwargs
@@ -63,29 +57,32 @@ class EmbeddingModelInterface(ModelInterface):
63
57
  - payloads is a list of JSON-serializable payload dictionaries.
64
58
  - batch_data_list is a list of dictionaries containing the key "prompts" corresponding to each batch.
65
59
  """
66
- if protocol != "http":
67
- raise ValueError("EmbeddingModelInterface only supports HTTP protocol.")
68
-
69
- prompts = data.get("prompts", [])
70
60
 
71
61
  def chunk_list(lst, chunk_size):
62
+ lst = lst["prompts"]
72
63
  return [lst[i : i + chunk_size] for i in range(0, len(lst), chunk_size)]
73
64
 
74
- batches = chunk_list(prompts, max_batch_size)
75
- payloads = []
76
- batch_data_list = []
77
- for batch in batches:
78
- payload = {
79
- "model": kwargs.get("model_name"),
80
- "input": batch,
81
- "encoding_format": kwargs.get("encoding_format", "float"),
82
- "extra_body": {
83
- "input_type": kwargs.get("input_type", "query"),
65
+ batches = chunk_list(data, max_batch_size)
66
+ if protocol == "http":
67
+ payloads = []
68
+ batch_data_list = []
69
+ for batch in batches:
70
+ payload = {
71
+ "model": kwargs.get("model_name"),
72
+ "input": batch,
73
+ "encoding_format": kwargs.get("encoding_format", "float"),
74
+ "input_type": kwargs.get("input_type", "passage"),
84
75
  "truncate": kwargs.get("truncate", "NONE"),
85
- },
86
- }
87
- payloads.append(payload)
88
- batch_data_list.append({"prompts": batch})
76
+ }
77
+ payloads.append(payload)
78
+ batch_data_list.append({"prompts": batch})
79
+ elif protocol == "grpc":
80
+ payloads = []
81
+ batch_data_list = []
82
+ for batch in batches:
83
+ text_np = np.array([[text.encode("utf-8")] for text in batch], dtype=np.object_)
84
+ payloads.append(text_np)
85
+ batch_data_list.append({"prompts": batch})
89
86
  return payloads, batch_data_list
90
87
 
91
88
  def parse_output(self, response: Any, protocol: str, data: Optional[Dict[str, Any]] = None, **kwargs) -> Any:
@@ -108,16 +105,17 @@ class EmbeddingModelInterface(ModelInterface):
108
105
  list
109
106
  A list of generated embeddings extracted from the response.
110
107
  """
111
- if protocol != "http":
112
- raise ValueError("EmbeddingModelInterface only supports HTTP protocol.")
113
- if isinstance(response, dict):
114
- embeddings = response.get("data")
115
- if not embeddings:
116
- raise RuntimeError("Unexpected response format: 'data' key is missing or empty.")
117
- # Each item in embeddings is expected to have an 'embedding' field.
118
- return [item.get("embedding", None) for item in embeddings]
119
- else:
120
- return [str(response)]
108
+ if protocol == "http":
109
+ if isinstance(response, dict):
110
+ embeddings = response.get("data")
111
+ if not embeddings:
112
+ raise RuntimeError("Unexpected response format: 'data' key is missing or empty.")
113
+ # Each item in embeddings is expected to have an 'embedding' field.
114
+ return [item.get("embedding", None) for item in embeddings]
115
+ else:
116
+ return [str(response)]
117
+ elif protocol == "grpc":
118
+ return [res.flatten() for res in response]
121
119
 
122
120
  def process_inference_results(self, output: Any, protocol: str, **kwargs) -> Any:
123
121
  """
@@ -129,7 +129,7 @@ class NimClient:
129
129
  """
130
130
  if self.protocol == "grpc":
131
131
  logger.debug("Performing gRPC inference for a batch...")
132
- response = self._grpc_infer(batch_input, model_name)
132
+ response = self._grpc_infer(batch_input, model_name, **kwargs)
133
133
  logger.debug("gRPC inference received response for a batch")
134
134
  elif self.protocol == "http":
135
135
  logger.debug("Performing HTTP inference for a batch...")
@@ -221,7 +221,7 @@ class NimClient:
221
221
 
222
222
  return all_results
223
223
 
224
- def _grpc_infer(self, formatted_input: np.ndarray, model_name: str) -> np.ndarray:
224
+ def _grpc_infer(self, formatted_input: np.ndarray, model_name: str, **kwargs) -> np.ndarray:
225
225
  """
226
226
  Perform inference using the gRPC protocol.
227
227
 
@@ -238,16 +238,24 @@ class NimClient:
238
238
  The output of the model as a numpy array.
239
239
  """
240
240
 
241
- input_tensors = [grpcclient.InferInput("input", formatted_input.shape, datatype="FP32")]
242
- input_tensors[0].set_data_from_numpy(formatted_input)
241
+ parameters = kwargs.get("parameters", {})
242
+ output_names = kwargs.get("outputs", ["output"])
243
+ dtype = kwargs.get("dtype", "FP32")
244
+ input_name = kwargs.get("input_name", "input")
243
245
 
244
- outputs = [grpcclient.InferRequestedOutput("output")]
245
- response = self.client.infer(model_name=model_name, inputs=input_tensors, outputs=outputs)
246
- logger.debug(f"gRPC inference response: {response}")
246
+ input_tensors = grpcclient.InferInput(input_name, formatted_input.shape, datatype=dtype)
247
+ input_tensors.set_data_from_numpy(formatted_input)
247
248
 
249
+ outputs = [grpcclient.InferRequestedOutput(output_name) for output_name in output_names]
250
+ response = self.client.infer(
251
+ model_name=model_name, parameters=parameters, inputs=[input_tensors], outputs=outputs
252
+ )
253
+ logger.debug(f"gRPC inference response: {response}")
248
254
  # TODO(self.client.has_error(response)) => raise error
249
-
250
- return response.as_numpy("output")
255
+ if len(outputs) == 1:
256
+ return response.as_numpy(outputs[0].name())
257
+ else:
258
+ return [response.as_numpy(output.name()) for output in outputs]
251
259
 
252
260
  def _http_infer(self, formatted_input: dict) -> dict:
253
261
  """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nv-ingest-api
3
- Version: 2025.5.10.dev20250510
3
+ Version: 2025.5.12.dev20250512
4
4
  Summary: Python module with core document ingestion functions.
5
5
  Author-email: Jeremy Dyer <jdyer@nvidia.com>
6
6
  License: Apache License
@@ -46,7 +46,7 @@ nv_ingest_api/internal/primitives/control_message_task.py,sha256=nWVB3QsP6p8BKwH
46
46
  nv_ingest_api/internal/primitives/ingest_control_message.py,sha256=rvipBiiUaHuRhupFCFDCG8rv0PylSJibCiJ7rDeb98A,8514
47
47
  nv_ingest_api/internal/primitives/nim/__init__.py,sha256=i_i_fBR2EcRCh2Y19DF6GM3s_Q0VPgo_thPnhEIJUyg,266
48
48
  nv_ingest_api/internal/primitives/nim/default_values.py,sha256=W92XjfyeC6uuVxut6J7p00x1kpNsnXIDb97gSVytZJk,380
49
- nv_ingest_api/internal/primitives/nim/nim_client.py,sha256=7C_t3BnYz_hL2H8RmvOShLCKlfYmwIREC6vnOnzOHWA,14483
49
+ nv_ingest_api/internal/primitives/nim/nim_client.py,sha256=lEP-PBp921--pxQzeVxxafR2BhONpli2Ad8oa0XLR4Y,14920
50
50
  nv_ingest_api/internal/primitives/nim/nim_model_interface.py,sha256=wMEgoi79YQn_4338MVemkeZgM1J-vnz0aZWpvqDhib4,2392
51
51
  nv_ingest_api/internal/primitives/nim/model_interface/__init__.py,sha256=wQSlVx3T14ZgQAt-EPzEczQusXVW0W8yynnUaFFGE3s,143
52
52
  nv_ingest_api/internal/primitives/nim/model_interface/cached.py,sha256=b1HX-PY1ExW5V6pXC1ZiHdobeG_BmbPr3rBbVJef13s,11003
@@ -56,7 +56,7 @@ nv_ingest_api/internal/primitives/nim/model_interface/helpers.py,sha256=x35a9AyT
56
56
  nv_ingest_api/internal/primitives/nim/model_interface/nemoretriever_parse.py,sha256=MFWPqMTXs_MZG3ripRR21o7f_mVeoE46Q10yvJ8KNr0,7023
57
57
  nv_ingest_api/internal/primitives/nim/model_interface/paddle.py,sha256=rSUPwl5XOrqneoS6aKhatVjrNBg_LhP3nwUWS_aTwz0,17950
58
58
  nv_ingest_api/internal/primitives/nim/model_interface/parakeet.py,sha256=OYg4AGki_wm--Np9VlSm0eZC-r54GbDOISbe9v0B9fw,12967
59
- nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py,sha256=oai0-moKDJOnOMfTaGQf-vo6qMRD6pbcf7_XRIt-oJ8,4934
59
+ nv_ingest_api/internal/primitives/nim/model_interface/text_embedding.py,sha256=8ld_if6N3pe3W7NA8Xwm-ndCq53s_v3LmmoyQHnxxEo,5071
60
60
  nv_ingest_api/internal/primitives/nim/model_interface/vlm.py,sha256=qJ382PU1ZrIM-SR3cqIhtY_W2rmHec2HIa2aUB2SvaU,6031
61
61
  nv_ingest_api/internal/primitives/nim/model_interface/yolox.py,sha256=exN0pKTBXd3pb5kKP96jinTYisgz1Y7EyWmWUuDNnCY,49312
62
62
  nv_ingest_api/internal/primitives/tracing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -145,8 +145,8 @@ nv_ingest_api/util/service_clients/redis/redis_client.py,sha256=Xa9eeI3kfDBDlLsG
145
145
  nv_ingest_api/util/service_clients/rest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
146
146
  nv_ingest_api/util/service_clients/rest/rest_client.py,sha256=K8hzIV4EcV-97G0SboY6LHMhWLx87l9wCI2CdWw9W_E,21734
147
147
  nv_ingest_api/util/string_processing/__init__.py,sha256=mkwHthyS-IILcLcL1tJYeF6mpqX3pxEw5aUzDGjTSeU,1411
148
- nv_ingest_api-2025.5.10.dev20250510.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
149
- nv_ingest_api-2025.5.10.dev20250510.dist-info/METADATA,sha256=PP9KnoMh2nnyGqZ5KXrfj7bUO5D3R1NE091c-bKKbOo,13889
150
- nv_ingest_api-2025.5.10.dev20250510.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
151
- nv_ingest_api-2025.5.10.dev20250510.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
152
- nv_ingest_api-2025.5.10.dev20250510.dist-info/RECORD,,
148
+ nv_ingest_api-2025.5.12.dev20250512.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
149
+ nv_ingest_api-2025.5.12.dev20250512.dist-info/METADATA,sha256=rdw7szDF5gzTKuYwhI7F3ZdLRpw8RBXo7WuE7p1j6rc,13889
150
+ nv_ingest_api-2025.5.12.dev20250512.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
151
+ nv_ingest_api-2025.5.12.dev20250512.dist-info/top_level.txt,sha256=abjYMlTJGoG5tOdfIB-IWvLyKclw6HLaRSc8MxX4X6I,14
152
+ nv_ingest_api-2025.5.12.dev20250512.dist-info/RECORD,,