mlrun 1.10.0rc19__py3-none-any.whl → 1.10.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (30) hide show
  1. mlrun/common/schemas/function.py +10 -0
  2. mlrun/common/schemas/model_monitoring/constants.py +4 -11
  3. mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
  4. mlrun/datastore/model_provider/huggingface_provider.py +109 -20
  5. mlrun/datastore/model_provider/model_provider.py +110 -32
  6. mlrun/datastore/model_provider/openai_provider.py +87 -31
  7. mlrun/db/base.py +0 -19
  8. mlrun/db/httpdb.py +10 -46
  9. mlrun/db/nopdb.py +0 -10
  10. mlrun/launcher/base.py +0 -6
  11. mlrun/model_monitoring/api.py +43 -22
  12. mlrun/model_monitoring/applications/base.py +1 -1
  13. mlrun/model_monitoring/controller.py +112 -38
  14. mlrun/model_monitoring/db/_schedules.py +13 -9
  15. mlrun/model_monitoring/stream_processing.py +16 -12
  16. mlrun/platforms/__init__.py +3 -2
  17. mlrun/projects/project.py +2 -2
  18. mlrun/serving/server.py +23 -0
  19. mlrun/serving/states.py +76 -29
  20. mlrun/serving/system_steps.py +60 -36
  21. mlrun/utils/helpers.py +27 -13
  22. mlrun/utils/notifications/notification_pusher.py +1 -1
  23. mlrun/utils/version/version.json +2 -2
  24. {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc20.dist-info}/METADATA +4 -4
  25. {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc20.dist-info}/RECORD +29 -30
  26. mlrun/api/schemas/__init__.py +0 -259
  27. {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc20.dist-info}/WHEEL +0 -0
  28. {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc20.dist-info}/entry_points.txt +0 -0
  29. {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc20.dist-info}/licenses/LICENSE +0 -0
  30. {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc20.dist-info}/top_level.txt +0 -0
@@ -114,11 +114,21 @@ class StateThresholds(pydantic.v1.BaseModel):
114
114
  default: typing.Optional[dict[str, str]]
115
115
 
116
116
 
117
+ class Backoff(pydantic.v1.BaseModel):
118
+ default_base_delay: typing.Optional[str]
119
+ min_base_delay: typing.Optional[str]
120
+
121
+
122
+ class RetrySpec(pydantic.v1.BaseModel):
123
+ backoff: Backoff
124
+
125
+
117
126
  class FunctionSpec(pydantic.v1.BaseModel):
118
127
  image_pull_secret: typing.Optional[ImagePullSecret]
119
128
  security_context: typing.Optional[SecurityContext]
120
129
  service_account: typing.Optional[ServiceAccount]
121
130
  state_thresholds: typing.Optional[StateThresholds]
131
+ retry: typing.Optional[RetrySpec]
122
132
 
123
133
  class Config:
124
134
  extra = pydantic.v1.Extra.allow
@@ -34,6 +34,7 @@ class ModelEndpointSchema(MonitoringStrEnum):
34
34
  UID = "uid"
35
35
  PROJECT = "project"
36
36
  ENDPOINT_TYPE = "endpoint_type"
37
+ MODE = "mode"
37
38
  NAME = "name"
38
39
  CREATED = "created"
39
40
  UPDATED = "updated"
@@ -326,18 +327,10 @@ class EndpointType(IntEnum):
326
327
  def top_level_list(cls):
327
328
  return [cls.NODE_EP, cls.ROUTER, cls.BATCH_EP]
328
329
 
329
- @classmethod
330
- def real_time_list(cls):
331
- return [cls.NODE_EP, cls.ROUTER, cls.LEAF_EP]
332
-
333
- @classmethod
334
- def batch_list(cls):
335
- return [cls.BATCH_EP]
336
330
 
337
-
338
- class EndpointMode(StrEnum):
339
- REAL_TIME = "real_time"
340
- BATCH = "batch"
331
+ class EndpointMode(IntEnum):
332
+ REAL_TIME = 0
333
+ BATCH = 1
341
334
 
342
335
 
343
336
  class MonitoringFunctionNames(MonitoringStrEnum):
@@ -28,6 +28,7 @@ from .constants import (
28
28
  FQN_REGEX,
29
29
  MODEL_ENDPOINT_ID_PATTERN,
30
30
  PROJECT_PATTERN,
31
+ EndpointMode,
31
32
  EndpointType,
32
33
  ModelEndpointMonitoringMetricType,
33
34
  ModelMonitoringMode,
@@ -118,6 +119,7 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
118
119
  project: constr(regex=PROJECT_PATTERN)
119
120
  endpoint_type: EndpointType = EndpointType.NODE_EP
120
121
  uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
122
+ mode: EndpointMode = EndpointMode.REAL_TIME
121
123
 
122
124
  @classmethod
123
125
  def mutable_fields(cls):
@@ -15,7 +15,11 @@
15
15
  from typing import TYPE_CHECKING, Any, Optional, Union
16
16
 
17
17
  import mlrun
18
- from mlrun.datastore.model_provider.model_provider import ModelProvider
18
+ from mlrun.datastore.model_provider.model_provider import (
19
+ InvokeResponseFormat,
20
+ ModelProvider,
21
+ UsageResponseKeys,
22
+ )
19
23
 
20
24
  if TYPE_CHECKING:
21
25
  from transformers.pipelines.base import Pipeline
@@ -61,15 +65,18 @@ class HuggingFaceProvider(ModelProvider):
61
65
  self.load_client()
62
66
 
63
67
  @staticmethod
64
- def _extract_string_output(result) -> str:
68
+ def _extract_string_output(response: list[dict]) -> str:
65
69
  """
66
70
  Extracts the first generated string from Hugging Face pipeline output,
67
71
  regardless of whether it's plain text-generation or chat-style output.
68
72
  """
69
- if not isinstance(result, list) or len(result) == 0:
73
+ if not isinstance(response, list) or len(response) == 0:
70
74
  raise ValueError("Empty or invalid pipeline output")
71
-
72
- return result[0].get("generated_text")
75
+ if len(response) != 1:
76
+ raise mlrun.errors.MLRunInvalidArgumentError(
77
+ "HuggingFaceProvider: extracting string from response is only supported for single-response outputs"
78
+ )
79
+ return response[0].get("generated_text")
73
80
 
74
81
  @classmethod
75
82
  def parse_endpoint_and_path(cls, endpoint, subpath) -> (str, str):
@@ -79,6 +86,68 @@ class HuggingFaceProvider(ModelProvider):
79
86
  subpath = ""
80
87
  return endpoint, subpath
81
88
 
89
+ def _response_handler(
90
+ self,
91
+ response: Union[str, list],
92
+ invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
93
+ messages: Union[str, list[str], "ChatType", list["ChatType"]] = None,
94
+ **kwargs,
95
+ ) -> Union[str, list, dict[str, Any]]:
96
+ """
97
+ Same as `ModelProvider._response_handler`.
98
+
99
+ * Expected to receive the response with `return_full_text=False`.
100
+
101
+ :param messages: Same as in `ModelProvider._response_handler`.
102
+ :param response: Same as in `ModelProvider._response_handler`.
103
+ :param invoke_response_format: Same as in `ModelProvider._response_handler`, in full and string modes.
104
+
105
+ For usage mode, generate 3 statistics:
106
+ prompt_tokens, completion_tokens and total_tokens.
107
+
108
+ NOTE: Token counts are estimated after answer generation and
109
+ may differ from the actual tokens generated by the model due to
110
+ internal decoding behavior and implementation details.
111
+
112
+ :param kwargs: Same as in `ModelProvider._response_handler`.
113
+
114
+ :return: The result formatted according to the `invoke_response_format`.
115
+
116
+ :raises MLRunInvalidArgumentError: If extracting the string response fails.
117
+ :raises MLRunRuntimeError: If applying the chat template to the model fails.
118
+ """
119
+ if InvokeResponseFormat.is_str_response(invoke_response_format.value):
120
+ str_response = self._extract_string_output(response)
121
+ if invoke_response_format == InvokeResponseFormat.STRING:
122
+ return str_response
123
+ if invoke_response_format == InvokeResponseFormat.USAGE:
124
+ tokenizer = self.client.tokenizer
125
+ if not isinstance(messages, str):
126
+ try:
127
+ messages = tokenizer.apply_chat_template(
128
+ messages, tokenize=False, add_generation_prompt=True
129
+ )
130
+ except Exception as e:
131
+ raise mlrun.errors.MLRunRuntimeError(
132
+ f"Failed to apply chat template using the tokenizer for model '{self.model}'. "
133
+ "This may indicate that the tokenizer does not support chat formatting, "
134
+ "or that the input format is invalid. "
135
+ f"Original error: {e}"
136
+ )
137
+ prompt_tokens = len(tokenizer.encode(messages))
138
+ completion_tokens = len(tokenizer.encode(str_response))
139
+ total_tokens = prompt_tokens + completion_tokens
140
+ usage = {
141
+ "prompt_tokens": prompt_tokens,
142
+ "completion_tokens": completion_tokens,
143
+ "total_tokens": total_tokens,
144
+ }
145
+ response = {
146
+ UsageResponseKeys.ANSWER: str_response,
147
+ UsageResponseKeys.USAGE: usage,
148
+ }
149
+ return response
150
+
82
151
  def load_client(self) -> None:
83
152
  """
84
153
  Initializes the Hugging Face pipeline using the provided options.
@@ -89,7 +158,7 @@ class HuggingFaceProvider(ModelProvider):
89
158
 
90
159
  Note: Hugging Face pipelines are synchronous and do not support async invocation.
91
160
 
92
- Raises:
161
+ :raises:
93
162
  ImportError: If the `transformers` package is not installed.
94
163
  """
95
164
  try:
@@ -148,35 +217,55 @@ class HuggingFaceProvider(ModelProvider):
148
217
 
149
218
  def invoke(
150
219
  self,
151
- messages: Union[str, list[str], "ChatType", list["ChatType"]] = None,
152
- as_str: bool = False,
220
+ messages: Union[str, list[str], "ChatType", list["ChatType"]],
221
+ invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
153
222
  **invoke_kwargs,
154
- ) -> Union[str, list]:
223
+ ) -> Union[str, list, dict[str, Any]]:
155
224
  """
156
225
  HuggingFace-specific implementation of `ModelProvider.invoke`.
157
226
  Invokes a HuggingFace model operation using the synchronous client.
158
- For complete usage details, refer to `ModelProvider.invoke`.
227
+ For full details, see `ModelProvider.invoke`.
159
228
 
160
229
  :param messages:
161
- Same as ModelProvider.invoke.
230
+ Same as `ModelProvider.invoke`.
162
231
 
163
- :param as_str:
164
- If `True`, return only the main content (e.g., generated text) from a
165
- **single-response output** — intended for use cases where you expect exactly one result.
232
+ :param invoke_response_format: InvokeResponseFormat
233
+ Specifies the format of the returned response. Options:
166
234
 
167
- If `False`, return the **full raw response object**, which is a list of dictionaries.
235
+ - "string": Returns only the generated text content, extracted from a single response.
236
+ - "usage": Combines the generated text with metadata (e.g., token usage), returning a dictionary:
237
+
238
+ .. code-block:: json
239
+ {
240
+ "answer": "<generated_text>",
241
+ "usage": {
242
+ "prompt_tokens": <int>,
243
+ "completion_tokens": <int>,
244
+ "total_tokens": <int>
245
+ }
246
+ }
247
+
248
+ - "full": Returns the raw response object from the HuggingFace model,
249
+ typically a list of generated sequences (dictionaries).
250
+ This format does not include token usage statistics.
168
251
 
169
252
  :param invoke_kwargs:
170
- Same as ModelProvider.invoke.
171
- :return: Same as ModelProvider.invoke.
253
+ Additional keyword arguments passed to the HuggingFace client. Same as in `ModelProvider.invoke`.
254
+
255
+ :return:
256
+ A string, dictionary, or list of model outputs, depending on `invoke_response_format`.
172
257
  """
258
+
173
259
  if self.client.task != "text-generation":
174
260
  raise mlrun.errors.MLRunInvalidArgumentError(
175
261
  "HuggingFaceProvider.invoke supports text-generation task only"
176
262
  )
177
- if as_str:
263
+ if InvokeResponseFormat.is_str_response(invoke_response_format.value):
178
264
  invoke_kwargs["return_full_text"] = False
179
265
  response = self.custom_invoke(text_inputs=messages, **invoke_kwargs)
180
- if as_str:
181
- return self._extract_string_output(response)
266
+ response = self._response_handler(
267
+ messages=messages,
268
+ response=response,
269
+ invoke_response_format=invoke_response_format,
270
+ )
182
271
  return response
@@ -15,11 +15,37 @@ from collections.abc import Awaitable
15
15
  from typing import Any, Callable, Optional, Union
16
16
 
17
17
  import mlrun.errors
18
+ from mlrun.common.types import StrEnum
18
19
  from mlrun.datastore.remote_client import (
19
20
  BaseRemoteClient,
20
21
  )
21
22
 
22
23
 
24
+ class InvokeResponseFormat(StrEnum):
25
+ STRING = "string"
26
+ USAGE = "usage"
27
+ FULL = "full"
28
+
29
+ @classmethod
30
+ def is_str_response(cls, invoke_response_format: str) -> bool:
31
+ """
32
+ Returns True if the response key corresponds to a string-based response (not a full generation object).
33
+ """
34
+ return invoke_response_format in {
35
+ cls.USAGE,
36
+ cls.STRING,
37
+ }
38
+
39
+
40
+ class UsageResponseKeys(StrEnum):
41
+ ANSWER = "answer"
42
+ USAGE = "usage"
43
+
44
+ @classmethod
45
+ def fields(cls) -> list[str]:
46
+ return [cls.ANSWER, cls.USAGE]
47
+
48
+
23
49
  class ModelProvider(BaseRemoteClient):
24
50
  """
25
51
  The ModelProvider class is an abstract base for integrating with external
@@ -56,6 +82,41 @@ class ModelProvider(BaseRemoteClient):
56
82
  self._client = None
57
83
  self._async_client = None
58
84
 
85
+ @staticmethod
86
+ def _extract_string_output(response: Any) -> str:
87
+ """
88
+ Extracts string response from response object
89
+ """
90
+ pass
91
+
92
+ def _response_handler(
93
+ self,
94
+ response: Any,
95
+ invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
96
+ **kwargs,
97
+ ) -> Union[str, dict, Any]:
98
+ """
99
+ Handles the model response according to the specified response format.
100
+
101
+ :param response: The raw response returned from the model invocation.
102
+ :param invoke_response_format: Determines how the response should be processed and returned.
103
+ Options include:
104
+
105
+ - STRING: Return only the main generated content as a string,
106
+ typically for single-answer responses.
107
+ - USAGE: Return a dictionary combining the string response with
108
+ additional metadata or token usage statistics, in this format:
109
+ {"answer": <string>, "usage": <dict>}
110
+
111
+ - FULL: Return the full raw response object unmodified.
112
+
113
+ :param kwargs: Additional parameters that may be required by specific implementations.
114
+
115
+ :return: The processed response in the format specified by `invoke_response_format`.
116
+ Can be a string, dictionary, or the original response object.
117
+ """
118
+ return None
119
+
59
120
  def get_client_options(self) -> dict:
60
121
  """
61
122
  Returns a dictionary containing credentials and configuration
@@ -133,57 +194,74 @@ class ModelProvider(BaseRemoteClient):
133
194
 
134
195
  def invoke(
135
196
  self,
136
- messages: Optional[list[dict]] = None,
137
- as_str: bool = False,
197
+ messages: Union[list[dict], Any],
198
+ invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
138
199
  **invoke_kwargs,
139
- ) -> Union[str, Any]:
200
+ ) -> Union[str, dict[str, Any], Any]:
140
201
  """
141
202
  Invokes a generative AI model with the provided messages and additional parameters.
142
203
  This method is designed to be a flexible interface for interacting with various
143
204
  generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
144
- a list of messages (following a standardized format) and receive a response. The
145
- response can be returned as plain text or in its full structured format, depending
146
- on the `as_str` parameter.
205
+ a list of messages (following a standardized format) and receive a response.
206
+
207
+ :param messages: A list of dictionaries representing the conversation history or input messages.
208
+ Each dictionary should follow the format::
209
+ {"role": "system"| "user" | "assistant" ..., "content":
210
+ "Message content as a string"}
211
+
212
+ Example:
213
+
214
+ .. code-block:: json
215
+
216
+ [
217
+ {"role": "system", "content": "You are a helpful assistant."},
218
+ {"role": "user", "content": "What is the capital of France?"}
219
+ ]
220
+
221
+ This format is consistent across all backends. Defaults to None if no messages
222
+ are provided.
223
+
224
+ :param invoke_response_format: Determines how the model response is returned:
225
+
226
+ - string: Returns only the generated text content from the model output,
227
+ for single-answer responses only.
228
+
229
+ - usage: Combines the STRING response with additional metadata (token usage),
230
+ and returns the result in a dictionary.
147
231
 
148
- :param messages: A list of dictionaries representing the conversation history or input messages.
149
- Each dictionary should follow the format::
150
- {"role": "system"| "user" | "assistant" ..., "content": "Message content as a string"}
151
- Example:
232
+ Note: The usage dictionary may contain additional
233
+ keys depending on the model provider:
152
234
 
153
- .. code-block:: json
235
+ .. code-block:: json
154
236
 
155
- [
156
- {"role": "system", "content": "You are a helpful assistant."},
157
- {"role": "user", "content": "What is the capital of France?"}
158
- ]
237
+ {
238
+ "answer": "<generated_text>",
239
+ "usage": {
240
+ "prompt_tokens": <int>,
241
+ "completion_tokens": <int>,
242
+ "total_tokens": <int>
243
+ }
159
244
 
160
- This format is consistent across all backends. Defaults to None if no messages
161
- are provided.
245
+ }
162
246
 
163
- :param as_str: A boolean flag indicating whether to return the response as a plain string.
164
- - If True, the function extracts and returns the main content of the first
165
- response.
166
- - If False, the function returns the full response object,
167
- which may include additional metadata or multiple response options.
168
- Defaults to False.
247
+ - full: Returns the full model output.
169
248
 
170
249
  :param invoke_kwargs:
171
- Additional keyword arguments to be passed to the underlying model API call.
172
- These can include parameters such as temperature, max tokens, etc.,
173
- depending on the capabilities of the specific backend being used.
250
+ Additional keyword arguments to be passed to the underlying model API call.
251
+ These can include parameters such as temperature, max tokens, etc.,
252
+ depending on the capabilities of the specific backend being used.
174
253
 
175
- :return:
176
- - If `as_str` is True: Returns the main content of the first response as a string.
177
- - If `as_str` is False: Returns the full response object.
254
+ :return: The invoke result formatted according to the specified
255
+ invoke_response_format parameter.
178
256
 
179
257
  """
180
258
  raise NotImplementedError("invoke method is not implemented")
181
259
 
182
260
  async def async_invoke(
183
261
  self,
184
- messages: Optional[list[dict]] = None,
185
- as_str: bool = False,
262
+ messages: list[dict],
263
+ invoke_response_format=InvokeResponseFormat.FULL,
186
264
  **invoke_kwargs,
187
- ) -> Union[str, Any]:
265
+ ) -> Union[str, dict[str, Any], Any]:
188
266
  """Async version of `invoke`. See `invoke` for full documentation."""
189
267
  raise NotImplementedError("async_invoke is not implemented")
@@ -16,7 +16,11 @@ from collections.abc import Awaitable
16
16
  from typing import TYPE_CHECKING, Any, Callable, Optional, Union
17
17
 
18
18
  import mlrun
19
- from mlrun.datastore.model_provider.model_provider import ModelProvider
19
+ from mlrun.datastore.model_provider.model_provider import (
20
+ InvokeResponseFormat,
21
+ ModelProvider,
22
+ UsageResponseKeys,
23
+ )
20
24
  from mlrun.datastore.utils import accepts_param
21
25
 
22
26
  if TYPE_CHECKING:
@@ -38,6 +42,7 @@ class OpenAIProvider(ModelProvider):
38
42
  """
39
43
 
40
44
  support_async = True
45
+ response_class = None
41
46
 
42
47
  def __init__(
43
48
  self,
@@ -64,6 +69,27 @@ class OpenAIProvider(ModelProvider):
64
69
  self.options = self.get_client_options()
65
70
  self.load_client()
66
71
 
72
+ @classmethod
73
+ def _import_response_class(cls) -> None:
74
+ if not cls.response_class:
75
+ try:
76
+ from openai.types.chat.chat_completion import ChatCompletion
77
+ except ImportError as exc:
78
+ raise ImportError("openai package is not installed") from exc
79
+ cls.response_class = ChatCompletion
80
+
81
+ @staticmethod
82
+ def _extract_string_output(response: "ChatCompletion") -> str:
83
+ """
84
+ Extracts the first generated string from Hugging Face pipeline output,
85
+ regardless of whether it's plain text-generation or chat-style output.
86
+ """
87
+ if len(response.choices) != 1:
88
+ raise mlrun.errors.MLRunInvalidArgumentError(
89
+ "OpenAIProvider: extracting string from response is only supported for single-response outputs"
90
+ )
91
+ return response.choices[0].message.content
92
+
67
93
  @classmethod
68
94
  def parse_endpoint_and_path(cls, endpoint, subpath) -> (str, str):
69
95
  if endpoint and subpath:
@@ -180,60 +206,90 @@ class OpenAIProvider(ModelProvider):
180
206
  **invoke_kwargs, **model_kwargs
181
207
  )
182
208
 
209
+ def _response_handler(
210
+ self,
211
+ response: "ChatCompletion",
212
+ invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
213
+ **kwargs,
214
+ ) -> ["ChatCompletion", str, dict[str, Any]]:
215
+ if InvokeResponseFormat.is_str_response(invoke_response_format.value):
216
+ str_response = self._extract_string_output(response)
217
+ if invoke_response_format == InvokeResponseFormat.STRING:
218
+ return str_response
219
+ if invoke_response_format == InvokeResponseFormat.USAGE:
220
+ stats = response.to_dict()["usage"]
221
+ response = {
222
+ UsageResponseKeys.ANSWER: str_response,
223
+ UsageResponseKeys.USAGE: stats,
224
+ }
225
+ return response
226
+
183
227
  def invoke(
184
228
  self,
185
- messages: Optional[list[dict]] = None,
186
- as_str: bool = False,
229
+ messages: list[dict],
230
+ invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
187
231
  **invoke_kwargs,
188
- ) -> Union[str, "ChatCompletion"]:
232
+ ) -> Union[dict[str, Any], str, "ChatCompletion"]:
189
233
  """
190
234
  OpenAI-specific implementation of `ModelProvider.invoke`.
191
- Invokes an OpenAI model operation using the sync client.
235
+ Invokes an OpenAI model operation using the synchronous client.
192
236
  For full details, see `ModelProvider.invoke`.
193
237
 
194
- :param messages: Same as ModelProvider.invoke.
238
+ :param messages:
239
+ Same as `ModelProvider.invoke`.
195
240
 
196
- :param as_str: bool
197
- If `True`, returns only the main content of the first response
198
- (`response.choices[0].message.content`).
199
- If `False`, returns the full response object, whose type depends on
200
- the specific OpenAI SDK operation used (e.g., chat completion, completion, etc.).
241
+ :param invoke_response_format: InvokeResponseFormat
242
+ Specifies the format of the returned response. Options:
243
+
244
+ - "string": Returns only the generated text content, taken from a single response.
245
+ - "stats": Combines the generated text with metadata (e.g., token usage), returning a dictionary:
246
+
247
+ .. code-block:: json
248
+ {
249
+ "answer": "<generated_text>",
250
+ "stats": <ChatCompletion>.to_dict()["usage"]
251
+ }
252
+
253
+ - "full": Returns the full OpenAI `ChatCompletion` object.
201
254
 
202
255
  :param invoke_kwargs:
203
- Same as ModelProvider.invoke.
204
- :return: Same as ModelProvider.invoke.
256
+ Additional keyword arguments passed to the OpenAI client. Same as in `ModelProvider.invoke`.
205
257
 
258
+ :return:
259
+ A string, dictionary, or `ChatCompletion` object, depending on `invoke_response_format`.
206
260
  """
261
+
207
262
  response = self.custom_invoke(messages=messages, **invoke_kwargs)
208
- if as_str:
209
- return response.choices[0].message.content
210
- return response
263
+ return self._response_handler(
264
+ messages=messages,
265
+ invoke_response_format=invoke_response_format,
266
+ response=response,
267
+ )
211
268
 
212
269
  async def async_invoke(
213
270
  self,
214
- messages: Optional[list[dict]] = None,
215
- as_str: bool = False,
271
+ messages: list[dict],
272
+ invoke_response_format=InvokeResponseFormat.FULL,
216
273
  **invoke_kwargs,
217
- ) -> Union[str, "ChatCompletion"]:
274
+ ) -> Union[str, "ChatCompletion", dict]:
218
275
  """
219
276
  OpenAI-specific implementation of `ModelProvider.async_invoke`.
220
277
  Invokes an OpenAI model operation using the async client.
221
- For full details, see `ModelProvider.async_invoke`.
278
+ For full details, see `ModelProvider.async_invoke` and `OpenAIProvider.invoke`.
222
279
 
223
- :param messages: Same as ModelProvider.async_invoke.
280
+ :param messages: Same as `OpenAIProvider.invoke`.
224
281
 
225
- :param as_str: bool
226
- If `True`, returns only the main content of the first response
227
- (`response.choices[0].message.content`).
228
- If `False`, returns the full awaited response object, whose type depends on
229
- the specific OpenAI SDK operation used (e.g., chat completion, completion, etc.).
282
+ :param invoke_response_format: InvokeResponseFormat
283
+ Same as `OpenAIProvider.invoke`.
230
284
 
231
285
  :param invoke_kwargs:
232
- Same as ModelProvider.async_invoke.
233
- :returns Same as ModelProvider.async_invoke.
286
+ Same as `OpenAIProvider.invoke`.
287
+ :returns Same as `ModelProvider.async_invoke`.
234
288
 
235
289
  """
236
290
  response = await self.async_custom_invoke(messages=messages, **invoke_kwargs)
237
- if as_str:
238
- return response.choices[0].message.content
239
- return response
291
+ return self._response_handler(
292
+ messages=messages,
293
+ invoke_response_format=invoke_response_format,
294
+ response=response,
295
+ )
mlrun/db/base.py CHANGED
@@ -16,8 +16,6 @@ import datetime
16
16
  from abc import ABC, abstractmethod
17
17
  from typing import Literal, Optional, Union
18
18
 
19
- from deprecated import deprecated
20
-
21
19
  import mlrun.alerts
22
20
  import mlrun.common
23
21
  import mlrun.common.formatters
@@ -445,23 +443,6 @@ class RunDBInterface(ABC):
445
443
  ) -> dict:
446
444
  pass
447
445
 
448
- # TODO: remove in 1.10.0
449
- @deprecated(
450
- version="1.7.0",
451
- reason="'list_features' will be removed in 1.10.0, use 'list_features_v2' instead",
452
- category=FutureWarning,
453
- )
454
- @abstractmethod
455
- def list_features(
456
- self,
457
- project: str,
458
- name: Optional[str] = None,
459
- tag: Optional[str] = None,
460
- entities: Optional[list[str]] = None,
461
- labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
462
- ) -> mlrun.common.schemas.FeaturesOutput:
463
- pass
464
-
465
446
  @abstractmethod
466
447
  def list_features_v2(
467
448
  self,