mlrun 1.10.0rc18__py3-none-any.whl → 1.10.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (38) hide show
  1. mlrun/__init__.py +21 -2
  2. mlrun/common/constants.py +1 -0
  3. mlrun/common/schemas/function.py +10 -0
  4. mlrun/common/schemas/model_monitoring/constants.py +4 -11
  5. mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
  6. mlrun/datastore/__init__.py +9 -1
  7. mlrun/datastore/model_provider/huggingface_provider.py +114 -26
  8. mlrun/datastore/model_provider/model_provider.py +144 -70
  9. mlrun/datastore/model_provider/openai_provider.py +95 -37
  10. mlrun/db/base.py +0 -19
  11. mlrun/db/httpdb.py +10 -46
  12. mlrun/db/nopdb.py +0 -10
  13. mlrun/launcher/base.py +13 -6
  14. mlrun/model_monitoring/api.py +43 -22
  15. mlrun/model_monitoring/applications/base.py +1 -1
  16. mlrun/model_monitoring/controller.py +112 -38
  17. mlrun/model_monitoring/db/_schedules.py +13 -9
  18. mlrun/model_monitoring/stream_processing.py +16 -12
  19. mlrun/platforms/__init__.py +3 -2
  20. mlrun/projects/project.py +2 -2
  21. mlrun/run.py +1 -1
  22. mlrun/runtimes/base.py +5 -2
  23. mlrun/runtimes/daskjob.py +1 -0
  24. mlrun/runtimes/nuclio/application/application.py +84 -5
  25. mlrun/runtimes/nuclio/function.py +3 -1
  26. mlrun/serving/server.py +24 -0
  27. mlrun/serving/states.py +80 -30
  28. mlrun/serving/system_steps.py +60 -36
  29. mlrun/utils/helpers.py +37 -13
  30. mlrun/utils/notifications/notification_pusher.py +1 -1
  31. mlrun/utils/version/version.json +2 -2
  32. {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/METADATA +4 -4
  33. {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/RECORD +37 -38
  34. mlrun/api/schemas/__init__.py +0 -259
  35. {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/WHEEL +0 -0
  36. {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/entry_points.txt +0 -0
  37. {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/licenses/LICENSE +0 -0
  38. {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/top_level.txt +0 -0
mlrun/__init__.py CHANGED
@@ -31,6 +31,7 @@ from typing import Optional
31
31
 
32
32
  import dotenv
33
33
 
34
+ from .common.constants import MLRUN_ACTIVE_PROJECT
34
35
  from .config import config as mlconf
35
36
  from .datastore import DataItem, ModelProvider, store_manager
36
37
  from .db import get_run_db
@@ -167,11 +168,29 @@ def set_environment(
167
168
 
168
169
 
169
170
  def get_current_project(silent: bool = False) -> Optional[MlrunProject]:
170
- if not pipeline_context.project and not silent:
171
+ if pipeline_context.project:
172
+ return pipeline_context.project
173
+
174
+ project_name = environ.get(MLRUN_ACTIVE_PROJECT, None)
175
+ if not project_name:
176
+ if not silent:
177
+ raise MLRunInvalidArgumentError(
178
+ "No current project is initialized. Use new, get or load project functions first."
179
+ )
180
+ return None
181
+
182
+ project = load_project(
183
+ name=project_name,
184
+ url=project_name,
185
+ save=False,
186
+ sync_functions=False,
187
+ )
188
+
189
+ if not project and not silent:
171
190
  raise MLRunInvalidArgumentError(
172
191
  "No current project is initialized. Use new, get or load project functions first."
173
192
  )
174
- return pipeline_context.project
193
+ return project
175
194
 
176
195
 
177
196
  def get_sample_path(subpath=""):
mlrun/common/constants.py CHANGED
@@ -30,6 +30,7 @@ RESERVED_TAG_NAME_LATEST = "latest"
30
30
  JOB_TYPE_WORKFLOW_RUNNER = "workflow-runner"
31
31
  JOB_TYPE_PROJECT_LOADER = "project-loader"
32
32
  JOB_TYPE_RERUN_WORKFLOW_RUNNER = "rerun-workflow-runner"
33
+ MLRUN_ACTIVE_PROJECT = "MLRUN_ACTIVE_PROJECT"
33
34
 
34
35
 
35
36
  class MLRunInternalLabels:
@@ -114,11 +114,21 @@ class StateThresholds(pydantic.v1.BaseModel):
114
114
  default: typing.Optional[dict[str, str]]
115
115
 
116
116
 
117
+ class Backoff(pydantic.v1.BaseModel):
118
+ default_base_delay: typing.Optional[str]
119
+ min_base_delay: typing.Optional[str]
120
+
121
+
122
+ class RetrySpec(pydantic.v1.BaseModel):
123
+ backoff: Backoff
124
+
125
+
117
126
  class FunctionSpec(pydantic.v1.BaseModel):
118
127
  image_pull_secret: typing.Optional[ImagePullSecret]
119
128
  security_context: typing.Optional[SecurityContext]
120
129
  service_account: typing.Optional[ServiceAccount]
121
130
  state_thresholds: typing.Optional[StateThresholds]
131
+ retry: typing.Optional[RetrySpec]
122
132
 
123
133
  class Config:
124
134
  extra = pydantic.v1.Extra.allow
@@ -34,6 +34,7 @@ class ModelEndpointSchema(MonitoringStrEnum):
34
34
  UID = "uid"
35
35
  PROJECT = "project"
36
36
  ENDPOINT_TYPE = "endpoint_type"
37
+ MODE = "mode"
37
38
  NAME = "name"
38
39
  CREATED = "created"
39
40
  UPDATED = "updated"
@@ -326,18 +327,10 @@ class EndpointType(IntEnum):
326
327
  def top_level_list(cls):
327
328
  return [cls.NODE_EP, cls.ROUTER, cls.BATCH_EP]
328
329
 
329
- @classmethod
330
- def real_time_list(cls):
331
- return [cls.NODE_EP, cls.ROUTER, cls.LEAF_EP]
332
-
333
- @classmethod
334
- def batch_list(cls):
335
- return [cls.BATCH_EP]
336
330
 
337
-
338
- class EndpointMode(StrEnum):
339
- REAL_TIME = "real_time"
340
- BATCH = "batch"
331
+ class EndpointMode(IntEnum):
332
+ REAL_TIME = 0
333
+ BATCH = 1
341
334
 
342
335
 
343
336
  class MonitoringFunctionNames(MonitoringStrEnum):
@@ -28,6 +28,7 @@ from .constants import (
28
28
  FQN_REGEX,
29
29
  MODEL_ENDPOINT_ID_PATTERN,
30
30
  PROJECT_PATTERN,
31
+ EndpointMode,
31
32
  EndpointType,
32
33
  ModelEndpointMonitoringMetricType,
33
34
  ModelMonitoringMode,
@@ -118,6 +119,7 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
118
119
  project: constr(regex=PROJECT_PATTERN)
119
120
  endpoint_type: EndpointType = EndpointType.NODE_EP
120
121
  uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
122
+ mode: EndpointMode = EndpointMode.REAL_TIME
121
123
 
122
124
  @classmethod
123
125
  def mutable_fields(cls):
@@ -39,6 +39,7 @@ __all__ = [
39
39
  from urllib.parse import urlparse
40
40
 
41
41
  import fsspec
42
+ import storey
42
43
 
43
44
  import mlrun.datastore.wasbfs
44
45
  from mlrun.datastore.datastore_profile import (
@@ -168,11 +169,12 @@ def get_stream_pusher(stream_path: str, **kwargs):
168
169
  raise ValueError(f"unsupported stream path {stream_path}")
169
170
 
170
171
 
171
- class _DummyStream:
172
+ class _DummyStream(storey.MapClass):
172
173
  """stream emulator for tests and debug"""
173
174
 
174
175
  def __init__(self, event_list=None, **kwargs):
175
176
  self.event_list = event_list or []
177
+ super().__init__(**kwargs)
176
178
 
177
179
  def push(self, data, **kwargs):
178
180
  if not isinstance(data, list):
@@ -180,3 +182,9 @@ class _DummyStream:
180
182
  for item in data:
181
183
  logger.info(f"dummy stream got event: {item}, kwargs={kwargs}")
182
184
  self.event_list.append(item)
185
+
186
+ def do(self, event):
187
+ if not isinstance(event, list):
188
+ event = [event]
189
+ for item in event:
190
+ self.event_list.append(item)
@@ -12,16 +12,18 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import TYPE_CHECKING, Optional, TypeVar, Union
15
+ from typing import TYPE_CHECKING, Any, Optional, Union
16
16
 
17
17
  import mlrun
18
- from mlrun.datastore.model_provider.model_provider import ModelProvider
18
+ from mlrun.datastore.model_provider.model_provider import (
19
+ InvokeResponseFormat,
20
+ ModelProvider,
21
+ UsageResponseKeys,
22
+ )
19
23
 
20
24
  if TYPE_CHECKING:
21
25
  from transformers.pipelines.base import Pipeline
22
-
23
- T = TypeVar("T")
24
- ChatType = list[dict[str, str]] # according to transformers.pipelines.text_generation
26
+ from transformers.pipelines.text_generation import ChatType
25
27
 
26
28
 
27
29
  class HuggingFaceProvider(ModelProvider):
@@ -63,15 +65,18 @@ class HuggingFaceProvider(ModelProvider):
63
65
  self.load_client()
64
66
 
65
67
  @staticmethod
66
- def _extract_string_output(result) -> str:
68
+ def _extract_string_output(response: list[dict]) -> str:
67
69
  """
68
70
  Extracts the first generated string from Hugging Face pipeline output,
69
71
  regardless of whether it's plain text-generation or chat-style output.
70
72
  """
71
- if not isinstance(result, list) or len(result) == 0:
73
+ if not isinstance(response, list) or len(response) == 0:
72
74
  raise ValueError("Empty or invalid pipeline output")
73
-
74
- return result[0].get("generated_text")
75
+ if len(response) != 1:
76
+ raise mlrun.errors.MLRunInvalidArgumentError(
77
+ "HuggingFaceProvider: extracting string from response is only supported for single-response outputs"
78
+ )
79
+ return response[0].get("generated_text")
75
80
 
76
81
  @classmethod
77
82
  def parse_endpoint_and_path(cls, endpoint, subpath) -> (str, str):
@@ -81,6 +86,68 @@ class HuggingFaceProvider(ModelProvider):
81
86
  subpath = ""
82
87
  return endpoint, subpath
83
88
 
89
+ def _response_handler(
90
+ self,
91
+ response: Union[str, list],
92
+ invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
93
+ messages: Union[str, list[str], "ChatType", list["ChatType"]] = None,
94
+ **kwargs,
95
+ ) -> Union[str, list, dict[str, Any]]:
96
+ """
97
+ Same as `ModelProvider._response_handler`.
98
+
99
+ * Expected to receive the response with `return_full_text=False`.
100
+
101
+ :param messages: Same as in `ModelProvider._response_handler`.
102
+ :param response: Same as in `ModelProvider._response_handler`.
103
+ :param invoke_response_format: Same as in `ModelProvider._response_handler`, in full and string modes.
104
+
105
+ For usage mode, generate 3 statistics:
106
+ prompt_tokens, completion_tokens and total_tokens.
107
+
108
+ NOTE: Token counts are estimated after answer generation and
109
+ may differ from the actual tokens generated by the model due to
110
+ internal decoding behavior and implementation details.
111
+
112
+ :param kwargs: Same as in `ModelProvider._response_handler`.
113
+
114
+ :return: The result formatted according to the `invoke_response_format`.
115
+
116
+ :raises MLRunInvalidArgumentError: If extracting the string response fails.
117
+ :raises MLRunRuntimeError: If applying the chat template to the model fails.
118
+ """
119
+ if InvokeResponseFormat.is_str_response(invoke_response_format.value):
120
+ str_response = self._extract_string_output(response)
121
+ if invoke_response_format == InvokeResponseFormat.STRING:
122
+ return str_response
123
+ if invoke_response_format == InvokeResponseFormat.USAGE:
124
+ tokenizer = self.client.tokenizer
125
+ if not isinstance(messages, str):
126
+ try:
127
+ messages = tokenizer.apply_chat_template(
128
+ messages, tokenize=False, add_generation_prompt=True
129
+ )
130
+ except Exception as e:
131
+ raise mlrun.errors.MLRunRuntimeError(
132
+ f"Failed to apply chat template using the tokenizer for model '{self.model}'. "
133
+ "This may indicate that the tokenizer does not support chat formatting, "
134
+ "or that the input format is invalid. "
135
+ f"Original error: {e}"
136
+ )
137
+ prompt_tokens = len(tokenizer.encode(messages))
138
+ completion_tokens = len(tokenizer.encode(str_response))
139
+ total_tokens = prompt_tokens + completion_tokens
140
+ usage = {
141
+ "prompt_tokens": prompt_tokens,
142
+ "completion_tokens": completion_tokens,
143
+ "total_tokens": total_tokens,
144
+ }
145
+ response = {
146
+ UsageResponseKeys.ANSWER: str_response,
147
+ UsageResponseKeys.USAGE: usage,
148
+ }
149
+ return response
150
+
84
151
  def load_client(self) -> None:
85
152
  """
86
153
  Initializes the Hugging Face pipeline using the provided options.
@@ -91,7 +158,7 @@ class HuggingFaceProvider(ModelProvider):
91
158
 
92
159
  Note: Hugging Face pipelines are synchronous and do not support async invocation.
93
160
 
94
- Raises:
161
+ :raises:
95
162
  ImportError: If the `transformers` package is not installed.
96
163
  """
97
164
  try:
@@ -117,7 +184,7 @@ class HuggingFaceProvider(ModelProvider):
117
184
 
118
185
  def custom_invoke(
119
186
  self, operation: Optional["Pipeline"] = None, **invoke_kwargs
120
- ) -> Optional[T]:
187
+ ) -> Union[list, dict, Any]:
121
188
  """
122
189
  HuggingFace implementation of `ModelProvider.custom_invoke`.
123
190
  Use the default config in provider client/ user defined client:
@@ -150,34 +217,55 @@ class HuggingFaceProvider(ModelProvider):
150
217
 
151
218
  def invoke(
152
219
  self,
153
- messages: Union[str, list[str], ChatType, list[ChatType]] = None,
154
- as_str: bool = False,
220
+ messages: Union[str, list[str], "ChatType", list["ChatType"]],
221
+ invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
155
222
  **invoke_kwargs,
156
- ) -> Optional[Union[str, list, T]]:
223
+ ) -> Union[str, list, dict[str, Any]]:
157
224
  """
158
225
  HuggingFace-specific implementation of `ModelProvider.invoke`.
159
226
  Invokes a HuggingFace model operation using the synchronous client.
160
- For complete usage details, refer to `ModelProvider.invoke`.
227
+ For full details, see `ModelProvider.invoke`.
228
+
161
229
  :param messages:
162
- Same as ModelProvider.invoke.
230
+ Same as `ModelProvider.invoke`.
231
+
232
+ :param invoke_response_format: InvokeResponseFormat
233
+ Specifies the format of the returned response. Options:
163
234
 
164
- :param as_str:
165
- If `True`, returns only the main content from a single response
166
- (intended for single-response use cases).
167
- If `False`, returns the full response object, whose type depends on
168
- the client (e.g., `pipeline`).
235
+ - "string": Returns only the generated text content, extracted from a single response.
236
+ - "usage": Combines the generated text with metadata (e.g., token usage), returning a dictionary:
237
+
238
+ .. code-block:: json
239
+ {
240
+ "answer": "<generated_text>",
241
+ "usage": {
242
+ "prompt_tokens": <int>,
243
+ "completion_tokens": <int>,
244
+ "total_tokens": <int>
245
+ }
246
+ }
247
+
248
+ - "full": Returns the raw response object from the HuggingFace model,
249
+ typically a list of generated sequences (dictionaries).
250
+ This format does not include token usage statistics.
169
251
 
170
252
  :param invoke_kwargs:
171
- Same as ModelProvider.invoke.
172
- :return: Same as ModelProvider.invoke.
253
+ Additional keyword arguments passed to the HuggingFace client. Same as in `ModelProvider.invoke`.
254
+
255
+ :return:
256
+ A string, dictionary, or list of model outputs, depending on `invoke_response_format`.
173
257
  """
258
+
174
259
  if self.client.task != "text-generation":
175
260
  raise mlrun.errors.MLRunInvalidArgumentError(
176
261
  "HuggingFaceProvider.invoke supports text-generation task only"
177
262
  )
178
- if as_str:
263
+ if InvokeResponseFormat.is_str_response(invoke_response_format.value):
179
264
  invoke_kwargs["return_full_text"] = False
180
265
  response = self.custom_invoke(text_inputs=messages, **invoke_kwargs)
181
- if as_str:
182
- return self._extract_string_output(response)
266
+ response = self._response_handler(
267
+ messages=messages,
268
+ response=response,
269
+ invoke_response_format=invoke_response_format,
270
+ )
183
271
  return response
@@ -12,14 +12,38 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  from collections.abc import Awaitable
15
- from typing import Any, Callable, Optional, TypeVar, Union
15
+ from typing import Any, Callable, Optional, Union
16
16
 
17
17
  import mlrun.errors
18
+ from mlrun.common.types import StrEnum
18
19
  from mlrun.datastore.remote_client import (
19
20
  BaseRemoteClient,
20
21
  )
21
22
 
22
- T = TypeVar("T")
23
+
24
+ class InvokeResponseFormat(StrEnum):
25
+ STRING = "string"
26
+ USAGE = "usage"
27
+ FULL = "full"
28
+
29
+ @classmethod
30
+ def is_str_response(cls, invoke_response_format: str) -> bool:
31
+ """
32
+ Returns True if the response key corresponds to a string-based response (not a full generation object).
33
+ """
34
+ return invoke_response_format in {
35
+ cls.USAGE,
36
+ cls.STRING,
37
+ }
38
+
39
+
40
+ class UsageResponseKeys(StrEnum):
41
+ ANSWER = "answer"
42
+ USAGE = "usage"
43
+
44
+ @classmethod
45
+ def fields(cls) -> list[str]:
46
+ return [cls.ANSWER, cls.USAGE]
23
47
 
24
48
 
25
49
  class ModelProvider(BaseRemoteClient):
@@ -58,6 +82,41 @@ class ModelProvider(BaseRemoteClient):
58
82
  self._client = None
59
83
  self._async_client = None
60
84
 
85
+ @staticmethod
86
+ def _extract_string_output(response: Any) -> str:
87
+ """
88
+ Extracts string response from response object
89
+ """
90
+ pass
91
+
92
+ def _response_handler(
93
+ self,
94
+ response: Any,
95
+ invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
96
+ **kwargs,
97
+ ) -> Union[str, dict, Any]:
98
+ """
99
+ Handles the model response according to the specified response format.
100
+
101
+ :param response: The raw response returned from the model invocation.
102
+ :param invoke_response_format: Determines how the response should be processed and returned.
103
+ Options include:
104
+
105
+ - STRING: Return only the main generated content as a string,
106
+ typically for single-answer responses.
107
+ - USAGE: Return a dictionary combining the string response with
108
+ additional metadata or token usage statistics, in this format:
109
+ {"answer": <string>, "usage": <dict>}
110
+
111
+ - FULL: Return the full raw response object unmodified.
112
+
113
+ :param kwargs: Additional parameters that may be required by specific implementations.
114
+
115
+ :return: The processed response in the format specified by `invoke_response_format`.
116
+ Can be a string, dictionary, or the original response object.
117
+ """
118
+ return None
119
+
61
120
  def get_client_options(self) -> dict:
62
121
  """
63
122
  Returns a dictionary containing credentials and configuration
@@ -79,69 +138,6 @@ class ModelProvider(BaseRemoteClient):
79
138
 
80
139
  raise NotImplementedError("load_client method is not implemented")
81
140
 
82
- def invoke(
83
- self,
84
- messages: Optional[list[dict]] = None,
85
- as_str: bool = False,
86
- **invoke_kwargs,
87
- ) -> Optional[Union[str, T]]:
88
- """
89
- Invokes a generative AI model with the provided messages and additional parameters.
90
- This method is designed to be a flexible interface for interacting with various
91
- generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
92
- a list of messages (following a standardized format) and receive a response. The
93
- response can be returned as plain text or in its full structured format, depending
94
- on the `as_str` parameter.
95
-
96
- :param messages: A list of dictionaries representing the conversation history or input messages.
97
- Each dictionary should follow the format::
98
- {"role": "system"| "user" | "assistant" ..., "content": "Message content as a string"}
99
- Example:
100
-
101
- .. code-block:: json
102
-
103
- [
104
- {"role": "system", "content": "You are a helpful assistant."},
105
- {"role": "user", "content": "What is the capital of France?"}
106
- ]
107
-
108
- This format is consistent across all backends. Defaults to None if no messages
109
- are provided.
110
-
111
- :param as_str: A boolean flag indicating whether to return the response as a plain string.
112
- - If True, the function extracts and returns the main content of the first
113
- response.
114
- - If False, the function returns the full response object,
115
- which may include additional metadata or multiple response options.
116
- Defaults to False.
117
-
118
- :param invoke_kwargs:
119
- Additional keyword arguments to be passed to the underlying model API call.
120
- These can include parameters such as temperature, max tokens, etc.,
121
- depending on the capabilities of the specific backend being used.
122
-
123
- :return:
124
- - If `as_str` is True: Returns the main content of the first response as a string.
125
- - If `as_str` is False: Returns the full response object.
126
-
127
- """
128
- raise NotImplementedError("invoke method is not implemented")
129
-
130
- def custom_invoke(
131
- self, operation: Optional[Callable[..., T]] = None, **invoke_kwargs
132
- ) -> Optional[T]:
133
- """
134
- Invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.) with the given keyword arguments.
135
-
136
- Useful for dynamically calling model methods like text generation, chat completions, or image generation.
137
- The operation must be a callable that accepts keyword arguments.
138
-
139
- :param operation: A callable representing the model operation (e.g., a client method).
140
- :param invoke_kwargs: Keyword arguments to pass to the operation.
141
- :return: The full response returned by the operation.
142
- """
143
- raise NotImplementedError("custom_invoke method is not implemented")
144
-
145
141
  @property
146
142
  def client(self) -> Any:
147
143
  return self._client
@@ -168,9 +164,22 @@ class ModelProvider(BaseRemoteClient):
168
164
  )
169
165
  return self._async_client
170
166
 
167
+ def custom_invoke(self, operation: Optional[Callable], **invoke_kwargs) -> Any:
168
+ """
169
+ Invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.) with the given keyword arguments.
170
+
171
+ Useful for dynamically calling model methods like text generation, chat completions, or image generation.
172
+ The operation must be a callable that accepts keyword arguments.
173
+
174
+ :param operation: A callable representing the model operation (e.g., a client method).
175
+ :param invoke_kwargs: Keyword arguments to pass to the operation.
176
+ :return: The full response returned by the operation.
177
+ """
178
+ raise NotImplementedError("custom_invoke method is not implemented")
179
+
171
180
  async def async_custom_invoke(
172
- self, operation: Optional[Callable[..., Awaitable[T]]], **invoke_kwargs
173
- ) -> Optional[T]:
181
+ self, operation: Optional[Callable[..., Awaitable[Any]]], **invoke_kwargs
182
+ ) -> Any:
174
183
  """
175
184
  Asynchronously invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.)
176
185
  with the given keyword arguments.
@@ -183,11 +192,76 @@ class ModelProvider(BaseRemoteClient):
183
192
  """
184
193
  raise NotImplementedError("async_custom_invoke is not implemented")
185
194
 
195
+ def invoke(
196
+ self,
197
+ messages: Union[list[dict], Any],
198
+ invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
199
+ **invoke_kwargs,
200
+ ) -> Union[str, dict[str, Any], Any]:
201
+ """
202
+ Invokes a generative AI model with the provided messages and additional parameters.
203
+ This method is designed to be a flexible interface for interacting with various
204
+ generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
205
+ a list of messages (following a standardized format) and receive a response.
206
+
207
+ :param messages: A list of dictionaries representing the conversation history or input messages.
208
+ Each dictionary should follow the format::
209
+ {"role": "system"| "user" | "assistant" ..., "content":
210
+ "Message content as a string"}
211
+
212
+ Example:
213
+
214
+ .. code-block:: json
215
+
216
+ [
217
+ {"role": "system", "content": "You are a helpful assistant."},
218
+ {"role": "user", "content": "What is the capital of France?"}
219
+ ]
220
+
221
+ This format is consistent across all backends. Defaults to None if no messages
222
+ are provided.
223
+
224
+ :param invoke_response_format: Determines how the model response is returned:
225
+
226
+ - string: Returns only the generated text content from the model output,
227
+ for single-answer responses only.
228
+
229
+ - usage: Combines the STRING response with additional metadata (token usage),
230
+ and returns the result in a dictionary.
231
+
232
+ Note: The usage dictionary may contain additional
233
+ keys depending on the model provider:
234
+
235
+ .. code-block:: json
236
+
237
+ {
238
+ "answer": "<generated_text>",
239
+ "usage": {
240
+ "prompt_tokens": <int>,
241
+ "completion_tokens": <int>,
242
+ "total_tokens": <int>
243
+ }
244
+
245
+ }
246
+
247
+ - full: Returns the full model output.
248
+
249
+ :param invoke_kwargs:
250
+ Additional keyword arguments to be passed to the underlying model API call.
251
+ These can include parameters such as temperature, max tokens, etc.,
252
+ depending on the capabilities of the specific backend being used.
253
+
254
+ :return: The invoke result formatted according to the specified
255
+ invoke_response_format parameter.
256
+
257
+ """
258
+ raise NotImplementedError("invoke method is not implemented")
259
+
186
260
  async def async_invoke(
187
261
  self,
188
- messages: Optional[list[dict]] = None,
189
- as_str: bool = False,
262
+ messages: list[dict],
263
+ invoke_response_format=InvokeResponseFormat.FULL,
190
264
  **invoke_kwargs,
191
- ) -> Optional[str]:
265
+ ) -> Union[str, dict[str, Any], Any]:
192
266
  """Async version of `invoke`. See `invoke` for full documentation."""
193
267
  raise NotImplementedError("async_invoke is not implemented")