mlrun 1.10.0rc24__py3-none-any.whl → 1.10.0rc25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (31) hide show
  1. mlrun/common/schemas/hub.py +14 -0
  2. mlrun/common/schemas/model_monitoring/constants.py +1 -0
  3. mlrun/common/schemas/model_monitoring/model_endpoints.py +10 -1
  4. mlrun/datastore/azure_blob.py +66 -43
  5. mlrun/datastore/datastore_profile.py +8 -2
  6. mlrun/datastore/model_provider/huggingface_provider.py +118 -30
  7. mlrun/datastore/model_provider/model_provider.py +61 -3
  8. mlrun/datastore/model_provider/openai_provider.py +114 -43
  9. mlrun/db/base.py +1 -1
  10. mlrun/db/httpdb.py +6 -4
  11. mlrun/db/nopdb.py +1 -0
  12. mlrun/model_monitoring/api.py +2 -2
  13. mlrun/model_monitoring/applications/base.py +22 -10
  14. mlrun/model_monitoring/applications/context.py +1 -4
  15. mlrun/model_monitoring/controller.py +10 -2
  16. mlrun/model_monitoring/db/_schedules.py +2 -4
  17. mlrun/platforms/iguazio.py +7 -3
  18. mlrun/projects/project.py +28 -24
  19. mlrun/runtimes/nuclio/__init__.py +1 -0
  20. mlrun/runtimes/nuclio/application/application.py +11 -2
  21. mlrun/runtimes/nuclio/function.py +10 -0
  22. mlrun/runtimes/nuclio/serving.py +4 -0
  23. mlrun/runtimes/utils.py +22 -5
  24. mlrun/serving/server.py +25 -14
  25. mlrun/utils/version/version.json +2 -2
  26. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/METADATA +23 -22
  27. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/RECORD +31 -31
  28. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/WHEEL +0 -0
  29. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/entry_points.txt +0 -0
  30. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/licenses/LICENSE +0 -0
  31. {mlrun-1.10.0rc24.dist-info → mlrun-1.10.0rc25.dist-info}/top_level.txt +0 -0
@@ -15,6 +15,7 @@
15
15
  from datetime import datetime, timezone
16
16
  from typing import Optional
17
17
 
18
+ import deepdiff
18
19
  from pydantic.v1 import BaseModel, Extra, Field
19
20
 
20
21
  import mlrun.common.types
@@ -83,6 +84,19 @@ class HubSource(BaseModel):
83
84
  status=ObjectStatus(state="created"),
84
85
  )
85
86
 
87
+ def diff(self, another_source: "HubSource") -> dict:
88
+ """
89
+ Compare this HubSource with another one.
90
+ Returns a dict of differences (metadata, spec, status).
91
+ """
92
+ exclude_paths = [
93
+ "root['metadata']['updated']",
94
+ "root['metadata']['created']",
95
+ ]
96
+ return deepdiff.DeepDiff(
97
+ self.dict(), another_source.dict(), exclude_paths=exclude_paths
98
+ )
99
+
86
100
 
87
101
  last_source_index = -1
88
102
 
@@ -331,6 +331,7 @@ class EndpointType(IntEnum):
331
331
  class EndpointMode(IntEnum):
332
332
  REAL_TIME = 0
333
333
  BATCH = 1
334
+ BATCH_LEGACY = 2 # legacy batch mode, used for endpoints created through the batch inference job
334
335
 
335
336
 
336
337
  class MonitoringFunctionNames(MonitoringStrEnum):
@@ -119,7 +119,7 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
119
119
  project: constr(regex=PROJECT_PATTERN)
120
120
  endpoint_type: EndpointType = EndpointType.NODE_EP
121
121
  uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
122
- mode: EndpointMode = EndpointMode.REAL_TIME
122
+ mode: Optional[EndpointMode] = None
123
123
 
124
124
  @classmethod
125
125
  def mutable_fields(cls):
@@ -131,6 +131,15 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
131
131
  return str(v)
132
132
  return v
133
133
 
134
+ @validator("mode", pre=True, always=True)
135
+ def _set_mode_based_on_endpoint_type(cls, v, values): # noqa: N805
136
+ if v is None:
137
+ if values.get("endpoint_type") == EndpointType.BATCH_EP:
138
+ return EndpointMode.BATCH_LEGACY
139
+ else:
140
+ return EndpointMode.REAL_TIME
141
+ return v
142
+
134
143
 
135
144
  class ModelEndpointSpec(ObjectSpec, ModelEndpointParser):
136
145
  model_class: Optional[str] = ""
@@ -229,18 +229,25 @@ class AzureBlobStore(DataStore):
229
229
  st = self.storage_options
230
230
  service = "blob"
231
231
  primary_url = None
232
- if st.get("connection_string"):
232
+
233
+ # Parse connection string (fills account_name/account_key or SAS)
234
+ connection_string = st.get("connection_string")
235
+ if connection_string:
233
236
  primary_url, _, parsed_credential = parse_connection_str(
234
- st.get("connection_string"), credential=None, service=service
237
+ connection_string, credential=None, service=service
235
238
  )
236
- for key in ["account_name", "account_key"]:
237
- parsed_value = parsed_credential.get(key)
238
- if parsed_value:
239
+
240
+ if isinstance(parsed_credential, str):
241
+ # SharedAccessSignature as raw string
242
+ parsed_credential = {"sas_token": parsed_credential}
243
+
244
+ for key in ["account_name", "account_key", "sas_token"]:
245
+ if parsed_value := parsed_credential.get(key):
239
246
  if key in st and st[key] != parsed_value:
240
247
  if key == "account_name":
241
248
  raise mlrun.errors.MLRunInvalidArgumentError(
242
- f"Storage option for '{key}' is '{st[key]}',\
243
- which does not match corresponding connection string '{parsed_value}'"
249
+ f"Storage option for '{key}' is '{st[key]}', "
250
+ f"which does not match corresponding connection string '{parsed_value}'"
244
251
  )
245
252
  else:
246
253
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -249,6 +256,7 @@ class AzureBlobStore(DataStore):
249
256
  st[key] = parsed_value
250
257
 
251
258
  account_name = st.get("account_name")
259
+ # Derive host (prefer connection string primary URL)
252
260
  if primary_url:
253
261
  if primary_url.startswith("http://"):
254
262
  primary_url = primary_url[len("http://") :]
@@ -258,48 +266,63 @@ class AzureBlobStore(DataStore):
258
266
  elif account_name:
259
267
  host = f"{account_name}.{service}.core.windows.net"
260
268
  else:
269
+ # nothing to configure yet
261
270
  return res
262
271
 
263
- if "account_key" in st:
272
+ host = host.rstrip("/")
273
+
274
+ # Account key (optional; WASB supports it)
275
+ if "account_key" in st and st["account_key"]:
264
276
  res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
265
277
 
266
- if "client_secret" in st or "client_id" in st or "tenant_id" in st:
267
- res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "OAuth"
268
- res[f"spark.hadoop.fs.azure.account.oauth.provider.type.{host}"] = (
269
- "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"
270
- )
271
- if "client_id" in st:
272
- res[f"spark.hadoop.fs.azure.account.oauth2.client.id.{host}"] = st[
273
- "client_id"
274
- ]
275
- if "client_secret" in st:
276
- res[f"spark.hadoop.fs.azure.account.oauth2.client.secret.{host}"] = st[
277
- "client_secret"
278
- ]
279
- if "tenant_id" in st:
280
- tenant_id = st["tenant_id"]
281
- res[f"spark.hadoop.fs.azure.account.oauth2.client.endpoint.{host}"] = (
282
- f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
283
- )
278
+ # --- WASB + SAS (container-scoped key; no provider classes needed) ---
279
+ if "sas_token" in st and st["sas_token"]:
280
+ sas = st["sas_token"].lstrip("?")
281
+ if container := getattr(self, "endpoint", None) or st.get("container"):
282
+ # fs.azure.sas.<container>.<account>.blob.core.windows.net = <sas>
283
+ res[f"spark.hadoop.fs.azure.sas.{container}.{host}"] = sas
284
284
 
285
- if "sas_token" in st:
286
- res[f"spark.hadoop.fs.azure.account.auth.type.{host}"] = "SAS"
287
- res[f"spark.hadoop.fs.azure.sas.token.provider.type.{host}"] = (
288
- "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider"
289
- )
290
- res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
285
+ else:
286
+ raise mlrun.errors.MLRunInvalidArgumentError(
287
+ "Container name is required for WASB SAS. "
288
+ "Set self.endpoint or storage_options['container']."
289
+ )
291
290
  return res
292
291
 
293
292
  @property
294
293
  def spark_url(self):
295
- spark_options = self.get_spark_options()
296
- url = f"wasbs://{self.endpoint}"
297
- prefix = "spark.hadoop.fs.azure.account.key."
298
- if spark_options:
299
- for key in spark_options:
300
- if key.startswith(prefix):
301
- account_key = key[len(prefix) :]
302
- if not url.endswith(account_key):
303
- url += f"@{account_key}"
304
- break
305
- return url
294
+ # Build: wasbs://<container>@<host>
295
+ st = self.storage_options
296
+ service = "blob"
297
+
298
+ container = getattr(self, "endpoint", None) or st.get("container")
299
+ if not container:
300
+ raise mlrun.errors.MLRunInvalidArgumentError(
301
+ "Container is required to build the WASB URL "
302
+ "(self.endpoint or storage_options['container'])."
303
+ )
304
+
305
+ # Prefer host from connection string; else synthesize from account_name
306
+ host = None
307
+ account_name = st.get("account_name")
308
+ connection_string = st.get("connection_string")
309
+
310
+ if connection_string:
311
+ primary_url, _, _ = parse_connection_str(
312
+ connection_string, credential=None, service=service
313
+ )
314
+ if primary_url.startswith("http://"):
315
+ primary_url = primary_url[len("http://") :]
316
+ if primary_url.startswith("https://"):
317
+ primary_url = primary_url[len("https://") :]
318
+ host = primary_url.rstrip("/")
319
+
320
+ if not host and account_name:
321
+ host = f"{account_name}.{service}.core.windows.net"
322
+
323
+ if not host:
324
+ raise mlrun.errors.MLRunInvalidArgumentError(
325
+ "account_name is required (or provide a connection_string) to build the WASB URL."
326
+ )
327
+
328
+ return f"wasbs://{container}@{host}"
@@ -333,7 +333,9 @@ class DatastoreProfileGCS(DatastoreProfile):
333
333
  # in gcs the path after schema is starts with bucket, wherefore it should not start with "/".
334
334
  subpath = subpath[1:]
335
335
  if self.bucket:
336
- return f"gcs://{self.bucket}/{subpath}"
336
+ return (
337
+ f"gcs://{self.bucket}/{subpath}" if subpath else f"gcs://{self.bucket}"
338
+ )
337
339
  else:
338
340
  return f"gcs://{subpath}"
339
341
 
@@ -370,7 +372,11 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
370
372
  # in azure the path after schema is starts with container, wherefore it should not start with "/".
371
373
  subpath = subpath[1:]
372
374
  if self.container:
373
- return f"az://{self.container}/{subpath}"
375
+ return (
376
+ f"az://{self.container}/{subpath}"
377
+ if subpath
378
+ else f"az://{self.container}"
379
+ )
374
380
  else:
375
381
  return f"az://{subpath}"
376
382
 
@@ -36,6 +36,9 @@ class HuggingFaceProvider(ModelProvider):
36
36
  This class extends the ModelProvider base class and implements Hugging Face-specific
37
37
  functionality, including pipeline initialization, default text generation operations,
38
38
  and custom operations tailored to the Hugging Face Transformers pipeline API.
39
+
40
+ Note: The pipeline object will download the model (if not already cached) and load it
41
+ into memory for inference. Ensure you have the required CPU/GPU and memory to use this operation.
39
42
  """
40
43
 
41
44
  def __init__(
@@ -62,13 +65,12 @@ class HuggingFaceProvider(ModelProvider):
62
65
  )
63
66
  self.options = self.get_client_options()
64
67
  self._expected_operation_type = None
65
- self.load_client()
68
+ self._download_model()
66
69
 
67
70
  @staticmethod
68
71
  def _extract_string_output(response: list[dict]) -> str:
69
72
  """
70
- Extracts the first generated string from Hugging Face pipeline output,
71
- regardless of whether it's plain text-generation or chat-style output.
73
+ Extracts the first generated string from Hugging Face pipeline output
72
74
  """
73
75
  if not isinstance(response, list) or len(response) == 0:
74
76
  raise ValueError("Empty or invalid pipeline output")
@@ -86,6 +88,35 @@ class HuggingFaceProvider(ModelProvider):
86
88
  subpath = ""
87
89
  return endpoint, subpath
88
90
 
91
+ @property
92
+ def client(self) -> Any:
93
+ """
94
+ Lazily return the HuggingFace-pipeline client.
95
+
96
+ If the client has not been initialized yet, it will be created
97
+ by calling `load_client`.
98
+ """
99
+ self.load_client()
100
+ return self._client
101
+
102
+ def _download_model(self):
103
+ """
104
+ Pre-downloads model files locally to prevent race conditions in multiprocessing.
105
+
106
+ Uses snapshot_download with local_dir_use_symlinks=False to ensure proper
107
+ file copying for safe concurrent access across multiple processes.
108
+
109
+ :raises:
110
+ ImportError: If huggingface_hub package is not installed.
111
+ """
112
+ try:
113
+ from huggingface_hub import snapshot_download
114
+
115
+ # Download the model and tokenizer files directly to the cache.
116
+ snapshot_download(repo_id=self.model, local_dir_use_symlinks=False)
117
+ except ImportError as exc:
118
+ raise ImportError("huggingface_hub package is not installed") from exc
119
+
89
120
  def _response_handler(
90
121
  self,
91
122
  response: Union[str, list],
@@ -94,27 +125,46 @@ class HuggingFaceProvider(ModelProvider):
94
125
  **kwargs,
95
126
  ) -> Union[str, list, dict[str, Any]]:
96
127
  """
97
- Same as `ModelProvider._response_handler`.
128
+ Processes and formats the raw response from the HuggingFace pipeline according to the specified format.
98
129
 
99
- * Expected to receive the response with `return_full_text=False`.
130
+ The response should exclude the user’s input (no repetition in the output).
131
+ This can be accomplished by invoking the pipeline with `return_full_text=False`.
100
132
 
101
- :param messages: Same as in `ModelProvider._response_handler`.
102
- :param response: Same as in `ModelProvider._response_handler`.
103
- :param invoke_response_format: Same as in `ModelProvider._response_handler`, in full and string modes.
133
+ :param response: The raw response from the HuggingFace pipeline, typically a list of dictionaries
134
+ containing generated text sequences.
135
+ :param invoke_response_format: Determines how the response should be processed and returned. Options:
104
136
 
105
- For usage mode, generate 3 statistics:
106
- prompt_tokens, completion_tokens and total_tokens.
137
+ - STRING: Return only the main generated content as a string,
138
+ for single-answer responses.
139
+ - USAGE: Return a dictionary combining the string response with
140
+ token usage statistics:
107
141
 
108
- NOTE: Token counts are estimated after answer generation and
109
- may differ from the actual tokens generated by the model due to
110
- internal decoding behavior and implementation details.
142
+ .. code-block:: json
111
143
 
112
- :param kwargs: Same as in `ModelProvider._response_handler`.
144
+ {
145
+ "answer": "<generated_text>",
146
+ "usage": {
147
+ "prompt_tokens": <int>,
148
+ "completion_tokens": <int>,
149
+ "total_tokens": <int>
150
+ }
151
+ }
113
152
 
114
- :return: The result formatted according to the `invoke_response_format`.
153
+ Note: Token counts are estimated after answer generation and
154
+ may differ from the actual tokens generated by the model due to
155
+ internal decoding behavior and implementation details.
156
+
157
+ - FULL: Return the full raw response object.
158
+
159
+ :param messages: The original input messages used for token count estimation in USAGE mode.
160
+ Can be a string, list of strings, or chat format messages.
161
+ :param kwargs: Additional parameters for response processing.
162
+
163
+ :return: The processed response in the format specified by `invoke_response_format`.
164
+ Can be a string, dictionary, or the original response object.
115
165
 
116
166
  :raises MLRunInvalidArgumentError: If extracting the string response fails.
117
- :raises MLRunRuntimeError: If applying the chat template to the model fails.
167
+ :raises MLRunRuntimeError: If applying the chat template to the model fails during token usage calculation.
118
168
  """
119
169
  if InvokeResponseFormat.is_str_response(invoke_response_format.value):
120
170
  str_response = self._extract_string_output(response)
@@ -161,11 +211,15 @@ class HuggingFaceProvider(ModelProvider):
161
211
  :raises:
162
212
  ImportError: If the `transformers` package is not installed.
163
213
  """
214
+ if self._client:
215
+ return
164
216
  try:
165
217
  from transformers import pipeline, AutoModelForCausalLM # noqa
166
218
  from transformers import AutoTokenizer # noqa
167
219
  from transformers.pipelines.base import Pipeline # noqa
168
220
 
221
+ self.options["model_kwargs"] = self.options.get("model_kwargs", {})
222
+ self.options["model_kwargs"]["local_files_only"] = True
169
223
  self._client = pipeline(model=self.model, **self.options)
170
224
  self._expected_operation_type = Pipeline
171
225
  except ImportError as exc:
@@ -186,23 +240,38 @@ class HuggingFaceProvider(ModelProvider):
186
240
  self, operation: Optional["Pipeline"] = None, **invoke_kwargs
187
241
  ) -> Union[list, dict, Any]:
188
242
  """
189
- HuggingFace implementation of `ModelProvider.custom_invoke`.
190
- Use the default config in provider client/ user defined client:
243
+ Invokes a HuggingFace pipeline operation with the given keyword arguments.
244
+
245
+ This method provides flexibility to use a custom pipeline object for specific tasks
246
+ (e.g., image classification, sentiment analysis).
247
+
248
+ The operation must be a Pipeline object from the transformers library that accepts keyword arguments.
191
249
 
192
250
  Example:
193
- ```python
251
+ ```python
252
+ from transformers import pipeline
253
+ from PIL import Image
254
+
255
+ # Using custom pipeline for image classification
194
256
  image = Image.open(image_path)
195
- pipeline_object = pipeline("image-classification", model="microsoft/resnet-50")
257
+ pipeline_object = pipeline("image-classification", model="microsoft/resnet-50")
196
258
  result = hf_provider.custom_invoke(
197
259
  pipeline_object,
198
260
  inputs=image,
199
261
  )
200
- ```
262
+ ```
201
263
 
264
+ :param operation: A Pipeline object from the transformers library.
265
+ If not provided, defaults to the provider's configured pipeline.
266
+ :param invoke_kwargs: Keyword arguments to pass to the pipeline operation.
267
+ These are merged with `default_invoke_kwargs` and may include
268
+ parameters such as `inputs`, `max_length`, `temperature`, or task-specific options.
202
269
 
203
- :param operation: A pipeline object
204
- :param invoke_kwargs: Keyword arguments to pass to the operation.
205
- :return: The full response returned by the operation.
270
+ :return: The full response returned by the pipeline operation.
271
+ Format depends on the pipeline task (list for text generation,
272
+ dict for classification, etc.).
273
+
274
+ :raises MLRunInvalidArgumentError: If the operation is not a valid Pipeline object.
206
275
 
207
276
  """
208
277
  invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
@@ -222,12 +291,24 @@ class HuggingFaceProvider(ModelProvider):
222
291
  **invoke_kwargs,
223
292
  ) -> Union[str, list, dict[str, Any]]:
224
293
  """
225
- HuggingFace-specific implementation of `ModelProvider.invoke`.
226
- Invokes a HuggingFace model operation using the synchronous client.
227
- For full details, see `ModelProvider.invoke`.
294
+ HuggingFace-specific implementation of model invocation using the synchronous pipeline client.
295
+ Invokes a HuggingFace model operation for text generation tasks.
296
+
297
+ Note: Ensure your environment has sufficient computational resources (CPU/GPU and memory) to run the model.
228
298
 
229
299
  :param messages:
230
- Same as `ModelProvider.invoke`.
300
+ Input for the text generation model. Can be provided in multiple formats:
301
+
302
+ - A single string: Direct text input for generation
303
+ - A list of strings: Multiple text inputs for batch processing
304
+ - Chat format: A list of dictionaries with "role" and "content" keys:
305
+
306
+ .. code-block:: json
307
+
308
+ [
309
+ {"role": "system", "content": "You are a helpful assistant."},
310
+ {"role": "user", "content": "What is the capital of France?"}
311
+ ]
231
312
 
232
313
  :param invoke_response_format: InvokeResponseFormat
233
314
  Specifies the format of the returned response. Options:
@@ -245,17 +326,24 @@ class HuggingFaceProvider(ModelProvider):
245
326
  }
246
327
  }
247
328
 
329
+ Note: For usage mode, the model tokenizer should support apply_chat_template.
330
+
248
331
  - "full": Returns the raw response object from the HuggingFace model,
249
332
  typically a list of generated sequences (dictionaries).
250
333
  This format does not include token usage statistics.
251
334
 
252
335
  :param invoke_kwargs:
253
- Additional keyword arguments passed to the HuggingFace client. Same as in `ModelProvider.invoke`.
336
+ Additional keyword arguments passed to the HuggingFace pipeline.
254
337
 
255
338
  :return:
256
339
  A string, dictionary, or list of model outputs, depending on `invoke_response_format`.
257
- """
258
340
 
341
+ :raises MLRunInvalidArgumentError:
342
+ If the pipeline task is not "text-generation" or if the response contains multiple outputs when extracting
343
+ string content.
344
+ :raises MLRunRuntimeError:
345
+ If using "usage" response mode and the model tokenizer does not support chat template formatting.
346
+ """
259
347
  if self.client.task != "text-generation":
260
348
  raise mlrun.errors.MLRunInvalidArgumentError(
261
349
  "HuggingFaceProvider.invoke supports text-generation task only"
@@ -108,7 +108,7 @@ class ModelProvider(BaseRemoteClient):
108
108
  additional metadata or token usage statistics, in this format:
109
109
  {"answer": <string>, "usage": <dict>}
110
110
 
111
- - FULL: Return the full raw response object unmodified.
111
+ - FULL: Return the full raw response object.
112
112
 
113
113
  :param kwargs: Additional parameters that may be required by specific implementations.
114
114
 
@@ -164,7 +164,9 @@ class ModelProvider(BaseRemoteClient):
164
164
  )
165
165
  return self._async_client
166
166
 
167
- def custom_invoke(self, operation: Optional[Callable], **invoke_kwargs) -> Any:
167
+ def custom_invoke(
168
+ self, operation: Optional[Callable] = None, **invoke_kwargs
169
+ ) -> Any:
168
170
  """
169
171
  Invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.) with the given keyword arguments.
170
172
 
@@ -263,5 +265,61 @@ class ModelProvider(BaseRemoteClient):
263
265
  invoke_response_format=InvokeResponseFormat.FULL,
264
266
  **invoke_kwargs,
265
267
  ) -> Union[str, dict[str, Any], Any]:
266
- """Async version of `invoke`. See `invoke` for full documentation."""
268
+ """
269
+ Asynchronously invokes a generative AI model with the provided messages and additional parameters.
270
+ This method is designed to be a flexible interface for interacting with various
271
+ generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
272
+ a list of messages (following a standardized format) and receive a response.
273
+
274
+ :param messages: A list of dictionaries representing the conversation history or input messages.
275
+ Each dictionary should follow the format::
276
+ {"role": "system"| "user" | "assistant" ..., "content":
277
+ "Message content as a string"}
278
+
279
+ Example:
280
+
281
+ .. code-block:: json
282
+
283
+ [
284
+ {"role": "system", "content": "You are a helpful assistant."},
285
+ {"role": "user", "content": "What is the capital of France?"}
286
+ ]
287
+
288
+ This format is consistent across all backends. Defaults to None if no messages
289
+ are provided.
290
+
291
+ :param invoke_response_format: Determines how the model response is returned:
292
+
293
+ - string: Returns only the generated text content from the model output,
294
+ for single-answer responses only.
295
+
296
+ - usage: Combines the STRING response with additional metadata (token usage),
297
+ and returns the result in a dictionary.
298
+
299
+ Note: The usage dictionary may contain additional
300
+ keys depending on the model provider:
301
+
302
+ .. code-block:: json
303
+
304
+ {
305
+ "answer": "<generated_text>",
306
+ "usage": {
307
+ "prompt_tokens": <int>,
308
+ "completion_tokens": <int>,
309
+ "total_tokens": <int>
310
+ }
311
+
312
+ }
313
+
314
+ - full: Returns the full model output.
315
+
316
+ :param invoke_kwargs:
317
+ Additional keyword arguments to be passed to the underlying model API call.
318
+ These can include parameters such as temperature, max tokens, etc.,
319
+ depending on the capabilities of the specific backend being used.
320
+
321
+ :return: The invoke result formatted according to the specified
322
+ invoke_response_format parameter.
323
+
324
+ """
267
325
  raise NotImplementedError("async_invoke is not implemented")