mlrun 1.10.0rc19__py3-none-any.whl → 1.10.0rc21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/schemas/function.py +10 -0
- mlrun/common/schemas/model_monitoring/constants.py +4 -11
- mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
- mlrun/datastore/model_provider/huggingface_provider.py +109 -20
- mlrun/datastore/model_provider/model_provider.py +110 -32
- mlrun/datastore/model_provider/openai_provider.py +87 -31
- mlrun/db/base.py +0 -19
- mlrun/db/httpdb.py +10 -46
- mlrun/db/nopdb.py +0 -10
- mlrun/launcher/base.py +0 -6
- mlrun/model_monitoring/api.py +43 -22
- mlrun/model_monitoring/applications/base.py +1 -1
- mlrun/model_monitoring/controller.py +112 -38
- mlrun/model_monitoring/db/_schedules.py +13 -9
- mlrun/model_monitoring/stream_processing.py +16 -12
- mlrun/platforms/__init__.py +3 -2
- mlrun/projects/project.py +2 -2
- mlrun/run.py +38 -5
- mlrun/serving/server.py +23 -0
- mlrun/serving/states.py +76 -29
- mlrun/serving/system_steps.py +60 -36
- mlrun/utils/helpers.py +27 -13
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/METADATA +6 -5
- {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/RECORD +30 -31
- mlrun/api/schemas/__init__.py +0 -259
- {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc19.dist-info → mlrun-1.10.0rc21.dist-info}/top_level.txt +0 -0
mlrun/common/schemas/function.py
CHANGED
|
@@ -114,11 +114,21 @@ class StateThresholds(pydantic.v1.BaseModel):
|
|
|
114
114
|
default: typing.Optional[dict[str, str]]
|
|
115
115
|
|
|
116
116
|
|
|
117
|
+
class Backoff(pydantic.v1.BaseModel):
|
|
118
|
+
default_base_delay: typing.Optional[str]
|
|
119
|
+
min_base_delay: typing.Optional[str]
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class RetrySpec(pydantic.v1.BaseModel):
|
|
123
|
+
backoff: Backoff
|
|
124
|
+
|
|
125
|
+
|
|
117
126
|
class FunctionSpec(pydantic.v1.BaseModel):
|
|
118
127
|
image_pull_secret: typing.Optional[ImagePullSecret]
|
|
119
128
|
security_context: typing.Optional[SecurityContext]
|
|
120
129
|
service_account: typing.Optional[ServiceAccount]
|
|
121
130
|
state_thresholds: typing.Optional[StateThresholds]
|
|
131
|
+
retry: typing.Optional[RetrySpec]
|
|
122
132
|
|
|
123
133
|
class Config:
|
|
124
134
|
extra = pydantic.v1.Extra.allow
|
|
@@ -34,6 +34,7 @@ class ModelEndpointSchema(MonitoringStrEnum):
|
|
|
34
34
|
UID = "uid"
|
|
35
35
|
PROJECT = "project"
|
|
36
36
|
ENDPOINT_TYPE = "endpoint_type"
|
|
37
|
+
MODE = "mode"
|
|
37
38
|
NAME = "name"
|
|
38
39
|
CREATED = "created"
|
|
39
40
|
UPDATED = "updated"
|
|
@@ -326,18 +327,10 @@ class EndpointType(IntEnum):
|
|
|
326
327
|
def top_level_list(cls):
|
|
327
328
|
return [cls.NODE_EP, cls.ROUTER, cls.BATCH_EP]
|
|
328
329
|
|
|
329
|
-
@classmethod
|
|
330
|
-
def real_time_list(cls):
|
|
331
|
-
return [cls.NODE_EP, cls.ROUTER, cls.LEAF_EP]
|
|
332
|
-
|
|
333
|
-
@classmethod
|
|
334
|
-
def batch_list(cls):
|
|
335
|
-
return [cls.BATCH_EP]
|
|
336
330
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
BATCH = "batch"
|
|
331
|
+
class EndpointMode(IntEnum):
|
|
332
|
+
REAL_TIME = 0
|
|
333
|
+
BATCH = 1
|
|
341
334
|
|
|
342
335
|
|
|
343
336
|
class MonitoringFunctionNames(MonitoringStrEnum):
|
|
@@ -28,6 +28,7 @@ from .constants import (
|
|
|
28
28
|
FQN_REGEX,
|
|
29
29
|
MODEL_ENDPOINT_ID_PATTERN,
|
|
30
30
|
PROJECT_PATTERN,
|
|
31
|
+
EndpointMode,
|
|
31
32
|
EndpointType,
|
|
32
33
|
ModelEndpointMonitoringMetricType,
|
|
33
34
|
ModelMonitoringMode,
|
|
@@ -118,6 +119,7 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
|
|
|
118
119
|
project: constr(regex=PROJECT_PATTERN)
|
|
119
120
|
endpoint_type: EndpointType = EndpointType.NODE_EP
|
|
120
121
|
uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
|
|
122
|
+
mode: EndpointMode = EndpointMode.REAL_TIME
|
|
121
123
|
|
|
122
124
|
@classmethod
|
|
123
125
|
def mutable_fields(cls):
|
|
@@ -15,7 +15,11 @@
|
|
|
15
15
|
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
16
16
|
|
|
17
17
|
import mlrun
|
|
18
|
-
from mlrun.datastore.model_provider.model_provider import
|
|
18
|
+
from mlrun.datastore.model_provider.model_provider import (
|
|
19
|
+
InvokeResponseFormat,
|
|
20
|
+
ModelProvider,
|
|
21
|
+
UsageResponseKeys,
|
|
22
|
+
)
|
|
19
23
|
|
|
20
24
|
if TYPE_CHECKING:
|
|
21
25
|
from transformers.pipelines.base import Pipeline
|
|
@@ -61,15 +65,18 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
61
65
|
self.load_client()
|
|
62
66
|
|
|
63
67
|
@staticmethod
|
|
64
|
-
def _extract_string_output(
|
|
68
|
+
def _extract_string_output(response: list[dict]) -> str:
|
|
65
69
|
"""
|
|
66
70
|
Extracts the first generated string from Hugging Face pipeline output,
|
|
67
71
|
regardless of whether it's plain text-generation or chat-style output.
|
|
68
72
|
"""
|
|
69
|
-
if not isinstance(
|
|
73
|
+
if not isinstance(response, list) or len(response) == 0:
|
|
70
74
|
raise ValueError("Empty or invalid pipeline output")
|
|
71
|
-
|
|
72
|
-
|
|
75
|
+
if len(response) != 1:
|
|
76
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
77
|
+
"HuggingFaceProvider: extracting string from response is only supported for single-response outputs"
|
|
78
|
+
)
|
|
79
|
+
return response[0].get("generated_text")
|
|
73
80
|
|
|
74
81
|
@classmethod
|
|
75
82
|
def parse_endpoint_and_path(cls, endpoint, subpath) -> (str, str):
|
|
@@ -79,6 +86,68 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
79
86
|
subpath = ""
|
|
80
87
|
return endpoint, subpath
|
|
81
88
|
|
|
89
|
+
def _response_handler(
|
|
90
|
+
self,
|
|
91
|
+
response: Union[str, list],
|
|
92
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
93
|
+
messages: Union[str, list[str], "ChatType", list["ChatType"]] = None,
|
|
94
|
+
**kwargs,
|
|
95
|
+
) -> Union[str, list, dict[str, Any]]:
|
|
96
|
+
"""
|
|
97
|
+
Same as `ModelProvider._response_handler`.
|
|
98
|
+
|
|
99
|
+
* Expected to receive the response with `return_full_text=False`.
|
|
100
|
+
|
|
101
|
+
:param messages: Same as in `ModelProvider._response_handler`.
|
|
102
|
+
:param response: Same as in `ModelProvider._response_handler`.
|
|
103
|
+
:param invoke_response_format: Same as in `ModelProvider._response_handler`, in full and string modes.
|
|
104
|
+
|
|
105
|
+
For usage mode, generate 3 statistics:
|
|
106
|
+
prompt_tokens, completion_tokens and total_tokens.
|
|
107
|
+
|
|
108
|
+
NOTE: Token counts are estimated after answer generation and
|
|
109
|
+
may differ from the actual tokens generated by the model due to
|
|
110
|
+
internal decoding behavior and implementation details.
|
|
111
|
+
|
|
112
|
+
:param kwargs: Same as in `ModelProvider._response_handler`.
|
|
113
|
+
|
|
114
|
+
:return: The result formatted according to the `invoke_response_format`.
|
|
115
|
+
|
|
116
|
+
:raises MLRunInvalidArgumentError: If extracting the string response fails.
|
|
117
|
+
:raises MLRunRuntimeError: If applying the chat template to the model fails.
|
|
118
|
+
"""
|
|
119
|
+
if InvokeResponseFormat.is_str_response(invoke_response_format.value):
|
|
120
|
+
str_response = self._extract_string_output(response)
|
|
121
|
+
if invoke_response_format == InvokeResponseFormat.STRING:
|
|
122
|
+
return str_response
|
|
123
|
+
if invoke_response_format == InvokeResponseFormat.USAGE:
|
|
124
|
+
tokenizer = self.client.tokenizer
|
|
125
|
+
if not isinstance(messages, str):
|
|
126
|
+
try:
|
|
127
|
+
messages = tokenizer.apply_chat_template(
|
|
128
|
+
messages, tokenize=False, add_generation_prompt=True
|
|
129
|
+
)
|
|
130
|
+
except Exception as e:
|
|
131
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
132
|
+
f"Failed to apply chat template using the tokenizer for model '{self.model}'. "
|
|
133
|
+
"This may indicate that the tokenizer does not support chat formatting, "
|
|
134
|
+
"or that the input format is invalid. "
|
|
135
|
+
f"Original error: {e}"
|
|
136
|
+
)
|
|
137
|
+
prompt_tokens = len(tokenizer.encode(messages))
|
|
138
|
+
completion_tokens = len(tokenizer.encode(str_response))
|
|
139
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
140
|
+
usage = {
|
|
141
|
+
"prompt_tokens": prompt_tokens,
|
|
142
|
+
"completion_tokens": completion_tokens,
|
|
143
|
+
"total_tokens": total_tokens,
|
|
144
|
+
}
|
|
145
|
+
response = {
|
|
146
|
+
UsageResponseKeys.ANSWER: str_response,
|
|
147
|
+
UsageResponseKeys.USAGE: usage,
|
|
148
|
+
}
|
|
149
|
+
return response
|
|
150
|
+
|
|
82
151
|
def load_client(self) -> None:
|
|
83
152
|
"""
|
|
84
153
|
Initializes the Hugging Face pipeline using the provided options.
|
|
@@ -89,7 +158,7 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
89
158
|
|
|
90
159
|
Note: Hugging Face pipelines are synchronous and do not support async invocation.
|
|
91
160
|
|
|
92
|
-
|
|
161
|
+
:raises:
|
|
93
162
|
ImportError: If the `transformers` package is not installed.
|
|
94
163
|
"""
|
|
95
164
|
try:
|
|
@@ -148,35 +217,55 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
148
217
|
|
|
149
218
|
def invoke(
|
|
150
219
|
self,
|
|
151
|
-
messages: Union[str, list[str], "ChatType", list["ChatType"]]
|
|
152
|
-
|
|
220
|
+
messages: Union[str, list[str], "ChatType", list["ChatType"]],
|
|
221
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
153
222
|
**invoke_kwargs,
|
|
154
|
-
) -> Union[str, list]:
|
|
223
|
+
) -> Union[str, list, dict[str, Any]]:
|
|
155
224
|
"""
|
|
156
225
|
HuggingFace-specific implementation of `ModelProvider.invoke`.
|
|
157
226
|
Invokes a HuggingFace model operation using the synchronous client.
|
|
158
|
-
For
|
|
227
|
+
For full details, see `ModelProvider.invoke`.
|
|
159
228
|
|
|
160
229
|
:param messages:
|
|
161
|
-
|
|
230
|
+
Same as `ModelProvider.invoke`.
|
|
162
231
|
|
|
163
|
-
:param
|
|
164
|
-
|
|
165
|
-
**single-response output** — intended for use cases where you expect exactly one result.
|
|
232
|
+
:param invoke_response_format: InvokeResponseFormat
|
|
233
|
+
Specifies the format of the returned response. Options:
|
|
166
234
|
|
|
167
|
-
|
|
235
|
+
- "string": Returns only the generated text content, extracted from a single response.
|
|
236
|
+
- "usage": Combines the generated text with metadata (e.g., token usage), returning a dictionary:
|
|
237
|
+
|
|
238
|
+
.. code-block:: json
|
|
239
|
+
{
|
|
240
|
+
"answer": "<generated_text>",
|
|
241
|
+
"usage": {
|
|
242
|
+
"prompt_tokens": <int>,
|
|
243
|
+
"completion_tokens": <int>,
|
|
244
|
+
"total_tokens": <int>
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
- "full": Returns the raw response object from the HuggingFace model,
|
|
249
|
+
typically a list of generated sequences (dictionaries).
|
|
250
|
+
This format does not include token usage statistics.
|
|
168
251
|
|
|
169
252
|
:param invoke_kwargs:
|
|
170
|
-
|
|
171
|
-
|
|
253
|
+
Additional keyword arguments passed to the HuggingFace client. Same as in `ModelProvider.invoke`.
|
|
254
|
+
|
|
255
|
+
:return:
|
|
256
|
+
A string, dictionary, or list of model outputs, depending on `invoke_response_format`.
|
|
172
257
|
"""
|
|
258
|
+
|
|
173
259
|
if self.client.task != "text-generation":
|
|
174
260
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
175
261
|
"HuggingFaceProvider.invoke supports text-generation task only"
|
|
176
262
|
)
|
|
177
|
-
if
|
|
263
|
+
if InvokeResponseFormat.is_str_response(invoke_response_format.value):
|
|
178
264
|
invoke_kwargs["return_full_text"] = False
|
|
179
265
|
response = self.custom_invoke(text_inputs=messages, **invoke_kwargs)
|
|
180
|
-
|
|
181
|
-
|
|
266
|
+
response = self._response_handler(
|
|
267
|
+
messages=messages,
|
|
268
|
+
response=response,
|
|
269
|
+
invoke_response_format=invoke_response_format,
|
|
270
|
+
)
|
|
182
271
|
return response
|
|
@@ -15,11 +15,37 @@ from collections.abc import Awaitable
|
|
|
15
15
|
from typing import Any, Callable, Optional, Union
|
|
16
16
|
|
|
17
17
|
import mlrun.errors
|
|
18
|
+
from mlrun.common.types import StrEnum
|
|
18
19
|
from mlrun.datastore.remote_client import (
|
|
19
20
|
BaseRemoteClient,
|
|
20
21
|
)
|
|
21
22
|
|
|
22
23
|
|
|
24
|
+
class InvokeResponseFormat(StrEnum):
|
|
25
|
+
STRING = "string"
|
|
26
|
+
USAGE = "usage"
|
|
27
|
+
FULL = "full"
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def is_str_response(cls, invoke_response_format: str) -> bool:
|
|
31
|
+
"""
|
|
32
|
+
Returns True if the response key corresponds to a string-based response (not a full generation object).
|
|
33
|
+
"""
|
|
34
|
+
return invoke_response_format in {
|
|
35
|
+
cls.USAGE,
|
|
36
|
+
cls.STRING,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class UsageResponseKeys(StrEnum):
|
|
41
|
+
ANSWER = "answer"
|
|
42
|
+
USAGE = "usage"
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def fields(cls) -> list[str]:
|
|
46
|
+
return [cls.ANSWER, cls.USAGE]
|
|
47
|
+
|
|
48
|
+
|
|
23
49
|
class ModelProvider(BaseRemoteClient):
|
|
24
50
|
"""
|
|
25
51
|
The ModelProvider class is an abstract base for integrating with external
|
|
@@ -56,6 +82,41 @@ class ModelProvider(BaseRemoteClient):
|
|
|
56
82
|
self._client = None
|
|
57
83
|
self._async_client = None
|
|
58
84
|
|
|
85
|
+
@staticmethod
|
|
86
|
+
def _extract_string_output(response: Any) -> str:
|
|
87
|
+
"""
|
|
88
|
+
Extracts string response from response object
|
|
89
|
+
"""
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
def _response_handler(
|
|
93
|
+
self,
|
|
94
|
+
response: Any,
|
|
95
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
96
|
+
**kwargs,
|
|
97
|
+
) -> Union[str, dict, Any]:
|
|
98
|
+
"""
|
|
99
|
+
Handles the model response according to the specified response format.
|
|
100
|
+
|
|
101
|
+
:param response: The raw response returned from the model invocation.
|
|
102
|
+
:param invoke_response_format: Determines how the response should be processed and returned.
|
|
103
|
+
Options include:
|
|
104
|
+
|
|
105
|
+
- STRING: Return only the main generated content as a string,
|
|
106
|
+
typically for single-answer responses.
|
|
107
|
+
- USAGE: Return a dictionary combining the string response with
|
|
108
|
+
additional metadata or token usage statistics, in this format:
|
|
109
|
+
{"answer": <string>, "usage": <dict>}
|
|
110
|
+
|
|
111
|
+
- FULL: Return the full raw response object unmodified.
|
|
112
|
+
|
|
113
|
+
:param kwargs: Additional parameters that may be required by specific implementations.
|
|
114
|
+
|
|
115
|
+
:return: The processed response in the format specified by `invoke_response_format`.
|
|
116
|
+
Can be a string, dictionary, or the original response object.
|
|
117
|
+
"""
|
|
118
|
+
return None
|
|
119
|
+
|
|
59
120
|
def get_client_options(self) -> dict:
|
|
60
121
|
"""
|
|
61
122
|
Returns a dictionary containing credentials and configuration
|
|
@@ -133,57 +194,74 @@ class ModelProvider(BaseRemoteClient):
|
|
|
133
194
|
|
|
134
195
|
def invoke(
|
|
135
196
|
self,
|
|
136
|
-
messages:
|
|
137
|
-
|
|
197
|
+
messages: Union[list[dict], Any],
|
|
198
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
138
199
|
**invoke_kwargs,
|
|
139
|
-
) -> Union[str, Any]:
|
|
200
|
+
) -> Union[str, dict[str, Any], Any]:
|
|
140
201
|
"""
|
|
141
202
|
Invokes a generative AI model with the provided messages and additional parameters.
|
|
142
203
|
This method is designed to be a flexible interface for interacting with various
|
|
143
204
|
generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
|
|
144
|
-
a list of messages (following a standardized format) and receive a response.
|
|
145
|
-
|
|
146
|
-
|
|
205
|
+
a list of messages (following a standardized format) and receive a response.
|
|
206
|
+
|
|
207
|
+
:param messages: A list of dictionaries representing the conversation history or input messages.
|
|
208
|
+
Each dictionary should follow the format::
|
|
209
|
+
{"role": "system"| "user" | "assistant" ..., "content":
|
|
210
|
+
"Message content as a string"}
|
|
211
|
+
|
|
212
|
+
Example:
|
|
213
|
+
|
|
214
|
+
.. code-block:: json
|
|
215
|
+
|
|
216
|
+
[
|
|
217
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
218
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
219
|
+
]
|
|
220
|
+
|
|
221
|
+
This format is consistent across all backends. Defaults to None if no messages
|
|
222
|
+
are provided.
|
|
223
|
+
|
|
224
|
+
:param invoke_response_format: Determines how the model response is returned:
|
|
225
|
+
|
|
226
|
+
- string: Returns only the generated text content from the model output,
|
|
227
|
+
for single-answer responses only.
|
|
228
|
+
|
|
229
|
+
- usage: Combines the STRING response with additional metadata (token usage),
|
|
230
|
+
and returns the result in a dictionary.
|
|
147
231
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
{"role": "system"| "user" | "assistant" ..., "content": "Message content as a string"}
|
|
151
|
-
Example:
|
|
232
|
+
Note: The usage dictionary may contain additional
|
|
233
|
+
keys depending on the model provider:
|
|
152
234
|
|
|
153
|
-
|
|
235
|
+
.. code-block:: json
|
|
154
236
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
237
|
+
{
|
|
238
|
+
"answer": "<generated_text>",
|
|
239
|
+
"usage": {
|
|
240
|
+
"prompt_tokens": <int>,
|
|
241
|
+
"completion_tokens": <int>,
|
|
242
|
+
"total_tokens": <int>
|
|
243
|
+
}
|
|
159
244
|
|
|
160
|
-
|
|
161
|
-
are provided.
|
|
245
|
+
}
|
|
162
246
|
|
|
163
|
-
|
|
164
|
-
- If True, the function extracts and returns the main content of the first
|
|
165
|
-
response.
|
|
166
|
-
- If False, the function returns the full response object,
|
|
167
|
-
which may include additional metadata or multiple response options.
|
|
168
|
-
Defaults to False.
|
|
247
|
+
- full: Returns the full model output.
|
|
169
248
|
|
|
170
249
|
:param invoke_kwargs:
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
250
|
+
Additional keyword arguments to be passed to the underlying model API call.
|
|
251
|
+
These can include parameters such as temperature, max tokens, etc.,
|
|
252
|
+
depending on the capabilities of the specific backend being used.
|
|
174
253
|
|
|
175
|
-
:return:
|
|
176
|
-
|
|
177
|
-
- If `as_str` is False: Returns the full response object.
|
|
254
|
+
:return: The invoke result formatted according to the specified
|
|
255
|
+
invoke_response_format parameter.
|
|
178
256
|
|
|
179
257
|
"""
|
|
180
258
|
raise NotImplementedError("invoke method is not implemented")
|
|
181
259
|
|
|
182
260
|
async def async_invoke(
|
|
183
261
|
self,
|
|
184
|
-
messages:
|
|
185
|
-
|
|
262
|
+
messages: list[dict],
|
|
263
|
+
invoke_response_format=InvokeResponseFormat.FULL,
|
|
186
264
|
**invoke_kwargs,
|
|
187
|
-
) -> Union[str, Any]:
|
|
265
|
+
) -> Union[str, dict[str, Any], Any]:
|
|
188
266
|
"""Async version of `invoke`. See `invoke` for full documentation."""
|
|
189
267
|
raise NotImplementedError("async_invoke is not implemented")
|
|
@@ -16,7 +16,11 @@ from collections.abc import Awaitable
|
|
|
16
16
|
from typing import TYPE_CHECKING, Any, Callable, Optional, Union
|
|
17
17
|
|
|
18
18
|
import mlrun
|
|
19
|
-
from mlrun.datastore.model_provider.model_provider import
|
|
19
|
+
from mlrun.datastore.model_provider.model_provider import (
|
|
20
|
+
InvokeResponseFormat,
|
|
21
|
+
ModelProvider,
|
|
22
|
+
UsageResponseKeys,
|
|
23
|
+
)
|
|
20
24
|
from mlrun.datastore.utils import accepts_param
|
|
21
25
|
|
|
22
26
|
if TYPE_CHECKING:
|
|
@@ -38,6 +42,7 @@ class OpenAIProvider(ModelProvider):
|
|
|
38
42
|
"""
|
|
39
43
|
|
|
40
44
|
support_async = True
|
|
45
|
+
response_class = None
|
|
41
46
|
|
|
42
47
|
def __init__(
|
|
43
48
|
self,
|
|
@@ -64,6 +69,27 @@ class OpenAIProvider(ModelProvider):
|
|
|
64
69
|
self.options = self.get_client_options()
|
|
65
70
|
self.load_client()
|
|
66
71
|
|
|
72
|
+
@classmethod
|
|
73
|
+
def _import_response_class(cls) -> None:
|
|
74
|
+
if not cls.response_class:
|
|
75
|
+
try:
|
|
76
|
+
from openai.types.chat.chat_completion import ChatCompletion
|
|
77
|
+
except ImportError as exc:
|
|
78
|
+
raise ImportError("openai package is not installed") from exc
|
|
79
|
+
cls.response_class = ChatCompletion
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def _extract_string_output(response: "ChatCompletion") -> str:
|
|
83
|
+
"""
|
|
84
|
+
Extracts the first generated string from Hugging Face pipeline output,
|
|
85
|
+
regardless of whether it's plain text-generation or chat-style output.
|
|
86
|
+
"""
|
|
87
|
+
if len(response.choices) != 1:
|
|
88
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
89
|
+
"OpenAIProvider: extracting string from response is only supported for single-response outputs"
|
|
90
|
+
)
|
|
91
|
+
return response.choices[0].message.content
|
|
92
|
+
|
|
67
93
|
@classmethod
|
|
68
94
|
def parse_endpoint_and_path(cls, endpoint, subpath) -> (str, str):
|
|
69
95
|
if endpoint and subpath:
|
|
@@ -180,60 +206,90 @@ class OpenAIProvider(ModelProvider):
|
|
|
180
206
|
**invoke_kwargs, **model_kwargs
|
|
181
207
|
)
|
|
182
208
|
|
|
209
|
+
def _response_handler(
|
|
210
|
+
self,
|
|
211
|
+
response: "ChatCompletion",
|
|
212
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
213
|
+
**kwargs,
|
|
214
|
+
) -> ["ChatCompletion", str, dict[str, Any]]:
|
|
215
|
+
if InvokeResponseFormat.is_str_response(invoke_response_format.value):
|
|
216
|
+
str_response = self._extract_string_output(response)
|
|
217
|
+
if invoke_response_format == InvokeResponseFormat.STRING:
|
|
218
|
+
return str_response
|
|
219
|
+
if invoke_response_format == InvokeResponseFormat.USAGE:
|
|
220
|
+
stats = response.to_dict()["usage"]
|
|
221
|
+
response = {
|
|
222
|
+
UsageResponseKeys.ANSWER: str_response,
|
|
223
|
+
UsageResponseKeys.USAGE: stats,
|
|
224
|
+
}
|
|
225
|
+
return response
|
|
226
|
+
|
|
183
227
|
def invoke(
|
|
184
228
|
self,
|
|
185
|
-
messages:
|
|
186
|
-
|
|
229
|
+
messages: list[dict],
|
|
230
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
187
231
|
**invoke_kwargs,
|
|
188
|
-
) -> Union[str, "ChatCompletion"]:
|
|
232
|
+
) -> Union[dict[str, Any], str, "ChatCompletion"]:
|
|
189
233
|
"""
|
|
190
234
|
OpenAI-specific implementation of `ModelProvider.invoke`.
|
|
191
|
-
Invokes an OpenAI model operation using the
|
|
235
|
+
Invokes an OpenAI model operation using the synchronous client.
|
|
192
236
|
For full details, see `ModelProvider.invoke`.
|
|
193
237
|
|
|
194
|
-
:param messages:
|
|
238
|
+
:param messages:
|
|
239
|
+
Same as `ModelProvider.invoke`.
|
|
195
240
|
|
|
196
|
-
:param
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
241
|
+
:param invoke_response_format: InvokeResponseFormat
|
|
242
|
+
Specifies the format of the returned response. Options:
|
|
243
|
+
|
|
244
|
+
- "string": Returns only the generated text content, taken from a single response.
|
|
245
|
+
- "stats": Combines the generated text with metadata (e.g., token usage), returning a dictionary:
|
|
246
|
+
|
|
247
|
+
.. code-block:: json
|
|
248
|
+
{
|
|
249
|
+
"answer": "<generated_text>",
|
|
250
|
+
"stats": <ChatCompletion>.to_dict()["usage"]
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
- "full": Returns the full OpenAI `ChatCompletion` object.
|
|
201
254
|
|
|
202
255
|
:param invoke_kwargs:
|
|
203
|
-
|
|
204
|
-
:return: Same as ModelProvider.invoke.
|
|
256
|
+
Additional keyword arguments passed to the OpenAI client. Same as in `ModelProvider.invoke`.
|
|
205
257
|
|
|
258
|
+
:return:
|
|
259
|
+
A string, dictionary, or `ChatCompletion` object, depending on `invoke_response_format`.
|
|
206
260
|
"""
|
|
261
|
+
|
|
207
262
|
response = self.custom_invoke(messages=messages, **invoke_kwargs)
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
263
|
+
return self._response_handler(
|
|
264
|
+
messages=messages,
|
|
265
|
+
invoke_response_format=invoke_response_format,
|
|
266
|
+
response=response,
|
|
267
|
+
)
|
|
211
268
|
|
|
212
269
|
async def async_invoke(
|
|
213
270
|
self,
|
|
214
|
-
messages:
|
|
215
|
-
|
|
271
|
+
messages: list[dict],
|
|
272
|
+
invoke_response_format=InvokeResponseFormat.FULL,
|
|
216
273
|
**invoke_kwargs,
|
|
217
|
-
) -> Union[str, "ChatCompletion"]:
|
|
274
|
+
) -> Union[str, "ChatCompletion", dict]:
|
|
218
275
|
"""
|
|
219
276
|
OpenAI-specific implementation of `ModelProvider.async_invoke`.
|
|
220
277
|
Invokes an OpenAI model operation using the async client.
|
|
221
|
-
For full details, see `ModelProvider.async_invoke`.
|
|
278
|
+
For full details, see `ModelProvider.async_invoke` and `OpenAIProvider.invoke`.
|
|
222
279
|
|
|
223
|
-
:param messages: Same as
|
|
280
|
+
:param messages: Same as `OpenAIProvider.invoke`.
|
|
224
281
|
|
|
225
|
-
:param
|
|
226
|
-
|
|
227
|
-
(`response.choices[0].message.content`).
|
|
228
|
-
If `False`, returns the full awaited response object, whose type depends on
|
|
229
|
-
the specific OpenAI SDK operation used (e.g., chat completion, completion, etc.).
|
|
282
|
+
:param invoke_response_format: InvokeResponseFormat
|
|
283
|
+
Same as `OpenAIProvider.invoke`.
|
|
230
284
|
|
|
231
285
|
:param invoke_kwargs:
|
|
232
|
-
Same as
|
|
233
|
-
:returns Same as ModelProvider.async_invoke
|
|
286
|
+
Same as `OpenAIProvider.invoke`.
|
|
287
|
+
:returns Same as `ModelProvider.async_invoke`.
|
|
234
288
|
|
|
235
289
|
"""
|
|
236
290
|
response = await self.async_custom_invoke(messages=messages, **invoke_kwargs)
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
291
|
+
return self._response_handler(
|
|
292
|
+
messages=messages,
|
|
293
|
+
invoke_response_format=invoke_response_format,
|
|
294
|
+
response=response,
|
|
295
|
+
)
|
mlrun/db/base.py
CHANGED
|
@@ -16,8 +16,6 @@ import datetime
|
|
|
16
16
|
from abc import ABC, abstractmethod
|
|
17
17
|
from typing import Literal, Optional, Union
|
|
18
18
|
|
|
19
|
-
from deprecated import deprecated
|
|
20
|
-
|
|
21
19
|
import mlrun.alerts
|
|
22
20
|
import mlrun.common
|
|
23
21
|
import mlrun.common.formatters
|
|
@@ -445,23 +443,6 @@ class RunDBInterface(ABC):
|
|
|
445
443
|
) -> dict:
|
|
446
444
|
pass
|
|
447
445
|
|
|
448
|
-
# TODO: remove in 1.10.0
|
|
449
|
-
@deprecated(
|
|
450
|
-
version="1.7.0",
|
|
451
|
-
reason="'list_features' will be removed in 1.10.0, use 'list_features_v2' instead",
|
|
452
|
-
category=FutureWarning,
|
|
453
|
-
)
|
|
454
|
-
@abstractmethod
|
|
455
|
-
def list_features(
|
|
456
|
-
self,
|
|
457
|
-
project: str,
|
|
458
|
-
name: Optional[str] = None,
|
|
459
|
-
tag: Optional[str] = None,
|
|
460
|
-
entities: Optional[list[str]] = None,
|
|
461
|
-
labels: Optional[Union[str, dict[str, Optional[str]], list[str]]] = None,
|
|
462
|
-
) -> mlrun.common.schemas.FeaturesOutput:
|
|
463
|
-
pass
|
|
464
|
-
|
|
465
446
|
@abstractmethod
|
|
466
447
|
def list_features_v2(
|
|
467
448
|
self,
|