mlrun 1.10.0rc18__py3-none-any.whl → 1.10.0rc20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +21 -2
- mlrun/common/constants.py +1 -0
- mlrun/common/schemas/function.py +10 -0
- mlrun/common/schemas/model_monitoring/constants.py +4 -11
- mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
- mlrun/datastore/__init__.py +9 -1
- mlrun/datastore/model_provider/huggingface_provider.py +114 -26
- mlrun/datastore/model_provider/model_provider.py +144 -70
- mlrun/datastore/model_provider/openai_provider.py +95 -37
- mlrun/db/base.py +0 -19
- mlrun/db/httpdb.py +10 -46
- mlrun/db/nopdb.py +0 -10
- mlrun/launcher/base.py +13 -6
- mlrun/model_monitoring/api.py +43 -22
- mlrun/model_monitoring/applications/base.py +1 -1
- mlrun/model_monitoring/controller.py +112 -38
- mlrun/model_monitoring/db/_schedules.py +13 -9
- mlrun/model_monitoring/stream_processing.py +16 -12
- mlrun/platforms/__init__.py +3 -2
- mlrun/projects/project.py +2 -2
- mlrun/run.py +1 -1
- mlrun/runtimes/base.py +5 -2
- mlrun/runtimes/daskjob.py +1 -0
- mlrun/runtimes/nuclio/application/application.py +84 -5
- mlrun/runtimes/nuclio/function.py +3 -1
- mlrun/serving/server.py +24 -0
- mlrun/serving/states.py +80 -30
- mlrun/serving/system_steps.py +60 -36
- mlrun/utils/helpers.py +37 -13
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/METADATA +4 -4
- {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/RECORD +37 -38
- mlrun/api/schemas/__init__.py +0 -259
- {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/WHEEL +0 -0
- {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/entry_points.txt +0 -0
- {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/licenses/LICENSE +0 -0
- {mlrun-1.10.0rc18.dist-info → mlrun-1.10.0rc20.dist-info}/top_level.txt +0 -0
mlrun/__init__.py
CHANGED
|
@@ -31,6 +31,7 @@ from typing import Optional
|
|
|
31
31
|
|
|
32
32
|
import dotenv
|
|
33
33
|
|
|
34
|
+
from .common.constants import MLRUN_ACTIVE_PROJECT
|
|
34
35
|
from .config import config as mlconf
|
|
35
36
|
from .datastore import DataItem, ModelProvider, store_manager
|
|
36
37
|
from .db import get_run_db
|
|
@@ -167,11 +168,29 @@ def set_environment(
|
|
|
167
168
|
|
|
168
169
|
|
|
169
170
|
def get_current_project(silent: bool = False) -> Optional[MlrunProject]:
|
|
170
|
-
if
|
|
171
|
+
if pipeline_context.project:
|
|
172
|
+
return pipeline_context.project
|
|
173
|
+
|
|
174
|
+
project_name = environ.get(MLRUN_ACTIVE_PROJECT, None)
|
|
175
|
+
if not project_name:
|
|
176
|
+
if not silent:
|
|
177
|
+
raise MLRunInvalidArgumentError(
|
|
178
|
+
"No current project is initialized. Use new, get or load project functions first."
|
|
179
|
+
)
|
|
180
|
+
return None
|
|
181
|
+
|
|
182
|
+
project = load_project(
|
|
183
|
+
name=project_name,
|
|
184
|
+
url=project_name,
|
|
185
|
+
save=False,
|
|
186
|
+
sync_functions=False,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
if not project and not silent:
|
|
171
190
|
raise MLRunInvalidArgumentError(
|
|
172
191
|
"No current project is initialized. Use new, get or load project functions first."
|
|
173
192
|
)
|
|
174
|
-
return
|
|
193
|
+
return project
|
|
175
194
|
|
|
176
195
|
|
|
177
196
|
def get_sample_path(subpath=""):
|
mlrun/common/constants.py
CHANGED
|
@@ -30,6 +30,7 @@ RESERVED_TAG_NAME_LATEST = "latest"
|
|
|
30
30
|
JOB_TYPE_WORKFLOW_RUNNER = "workflow-runner"
|
|
31
31
|
JOB_TYPE_PROJECT_LOADER = "project-loader"
|
|
32
32
|
JOB_TYPE_RERUN_WORKFLOW_RUNNER = "rerun-workflow-runner"
|
|
33
|
+
MLRUN_ACTIVE_PROJECT = "MLRUN_ACTIVE_PROJECT"
|
|
33
34
|
|
|
34
35
|
|
|
35
36
|
class MLRunInternalLabels:
|
mlrun/common/schemas/function.py
CHANGED
|
@@ -114,11 +114,21 @@ class StateThresholds(pydantic.v1.BaseModel):
|
|
|
114
114
|
default: typing.Optional[dict[str, str]]
|
|
115
115
|
|
|
116
116
|
|
|
117
|
+
class Backoff(pydantic.v1.BaseModel):
|
|
118
|
+
default_base_delay: typing.Optional[str]
|
|
119
|
+
min_base_delay: typing.Optional[str]
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class RetrySpec(pydantic.v1.BaseModel):
|
|
123
|
+
backoff: Backoff
|
|
124
|
+
|
|
125
|
+
|
|
117
126
|
class FunctionSpec(pydantic.v1.BaseModel):
|
|
118
127
|
image_pull_secret: typing.Optional[ImagePullSecret]
|
|
119
128
|
security_context: typing.Optional[SecurityContext]
|
|
120
129
|
service_account: typing.Optional[ServiceAccount]
|
|
121
130
|
state_thresholds: typing.Optional[StateThresholds]
|
|
131
|
+
retry: typing.Optional[RetrySpec]
|
|
122
132
|
|
|
123
133
|
class Config:
|
|
124
134
|
extra = pydantic.v1.Extra.allow
|
|
@@ -34,6 +34,7 @@ class ModelEndpointSchema(MonitoringStrEnum):
|
|
|
34
34
|
UID = "uid"
|
|
35
35
|
PROJECT = "project"
|
|
36
36
|
ENDPOINT_TYPE = "endpoint_type"
|
|
37
|
+
MODE = "mode"
|
|
37
38
|
NAME = "name"
|
|
38
39
|
CREATED = "created"
|
|
39
40
|
UPDATED = "updated"
|
|
@@ -326,18 +327,10 @@ class EndpointType(IntEnum):
|
|
|
326
327
|
def top_level_list(cls):
|
|
327
328
|
return [cls.NODE_EP, cls.ROUTER, cls.BATCH_EP]
|
|
328
329
|
|
|
329
|
-
@classmethod
|
|
330
|
-
def real_time_list(cls):
|
|
331
|
-
return [cls.NODE_EP, cls.ROUTER, cls.LEAF_EP]
|
|
332
|
-
|
|
333
|
-
@classmethod
|
|
334
|
-
def batch_list(cls):
|
|
335
|
-
return [cls.BATCH_EP]
|
|
336
330
|
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
BATCH = "batch"
|
|
331
|
+
class EndpointMode(IntEnum):
|
|
332
|
+
REAL_TIME = 0
|
|
333
|
+
BATCH = 1
|
|
341
334
|
|
|
342
335
|
|
|
343
336
|
class MonitoringFunctionNames(MonitoringStrEnum):
|
|
@@ -28,6 +28,7 @@ from .constants import (
|
|
|
28
28
|
FQN_REGEX,
|
|
29
29
|
MODEL_ENDPOINT_ID_PATTERN,
|
|
30
30
|
PROJECT_PATTERN,
|
|
31
|
+
EndpointMode,
|
|
31
32
|
EndpointType,
|
|
32
33
|
ModelEndpointMonitoringMetricType,
|
|
33
34
|
ModelMonitoringMode,
|
|
@@ -118,6 +119,7 @@ class ModelEndpointMetadata(ObjectMetadata, ModelEndpointParser):
|
|
|
118
119
|
project: constr(regex=PROJECT_PATTERN)
|
|
119
120
|
endpoint_type: EndpointType = EndpointType.NODE_EP
|
|
120
121
|
uid: Optional[constr(regex=MODEL_ENDPOINT_ID_PATTERN)]
|
|
122
|
+
mode: EndpointMode = EndpointMode.REAL_TIME
|
|
121
123
|
|
|
122
124
|
@classmethod
|
|
123
125
|
def mutable_fields(cls):
|
mlrun/datastore/__init__.py
CHANGED
|
@@ -39,6 +39,7 @@ __all__ = [
|
|
|
39
39
|
from urllib.parse import urlparse
|
|
40
40
|
|
|
41
41
|
import fsspec
|
|
42
|
+
import storey
|
|
42
43
|
|
|
43
44
|
import mlrun.datastore.wasbfs
|
|
44
45
|
from mlrun.datastore.datastore_profile import (
|
|
@@ -168,11 +169,12 @@ def get_stream_pusher(stream_path: str, **kwargs):
|
|
|
168
169
|
raise ValueError(f"unsupported stream path {stream_path}")
|
|
169
170
|
|
|
170
171
|
|
|
171
|
-
class _DummyStream:
|
|
172
|
+
class _DummyStream(storey.MapClass):
|
|
172
173
|
"""stream emulator for tests and debug"""
|
|
173
174
|
|
|
174
175
|
def __init__(self, event_list=None, **kwargs):
|
|
175
176
|
self.event_list = event_list or []
|
|
177
|
+
super().__init__(**kwargs)
|
|
176
178
|
|
|
177
179
|
def push(self, data, **kwargs):
|
|
178
180
|
if not isinstance(data, list):
|
|
@@ -180,3 +182,9 @@ class _DummyStream:
|
|
|
180
182
|
for item in data:
|
|
181
183
|
logger.info(f"dummy stream got event: {item}, kwargs={kwargs}")
|
|
182
184
|
self.event_list.append(item)
|
|
185
|
+
|
|
186
|
+
def do(self, event):
|
|
187
|
+
if not isinstance(event, list):
|
|
188
|
+
event = [event]
|
|
189
|
+
for item in event:
|
|
190
|
+
self.event_list.append(item)
|
|
@@ -12,16 +12,18 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from typing import TYPE_CHECKING,
|
|
15
|
+
from typing import TYPE_CHECKING, Any, Optional, Union
|
|
16
16
|
|
|
17
17
|
import mlrun
|
|
18
|
-
from mlrun.datastore.model_provider.model_provider import
|
|
18
|
+
from mlrun.datastore.model_provider.model_provider import (
|
|
19
|
+
InvokeResponseFormat,
|
|
20
|
+
ModelProvider,
|
|
21
|
+
UsageResponseKeys,
|
|
22
|
+
)
|
|
19
23
|
|
|
20
24
|
if TYPE_CHECKING:
|
|
21
25
|
from transformers.pipelines.base import Pipeline
|
|
22
|
-
|
|
23
|
-
T = TypeVar("T")
|
|
24
|
-
ChatType = list[dict[str, str]] # according to transformers.pipelines.text_generation
|
|
26
|
+
from transformers.pipelines.text_generation import ChatType
|
|
25
27
|
|
|
26
28
|
|
|
27
29
|
class HuggingFaceProvider(ModelProvider):
|
|
@@ -63,15 +65,18 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
63
65
|
self.load_client()
|
|
64
66
|
|
|
65
67
|
@staticmethod
|
|
66
|
-
def _extract_string_output(
|
|
68
|
+
def _extract_string_output(response: list[dict]) -> str:
|
|
67
69
|
"""
|
|
68
70
|
Extracts the first generated string from Hugging Face pipeline output,
|
|
69
71
|
regardless of whether it's plain text-generation or chat-style output.
|
|
70
72
|
"""
|
|
71
|
-
if not isinstance(
|
|
73
|
+
if not isinstance(response, list) or len(response) == 0:
|
|
72
74
|
raise ValueError("Empty or invalid pipeline output")
|
|
73
|
-
|
|
74
|
-
|
|
75
|
+
if len(response) != 1:
|
|
76
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
77
|
+
"HuggingFaceProvider: extracting string from response is only supported for single-response outputs"
|
|
78
|
+
)
|
|
79
|
+
return response[0].get("generated_text")
|
|
75
80
|
|
|
76
81
|
@classmethod
|
|
77
82
|
def parse_endpoint_and_path(cls, endpoint, subpath) -> (str, str):
|
|
@@ -81,6 +86,68 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
81
86
|
subpath = ""
|
|
82
87
|
return endpoint, subpath
|
|
83
88
|
|
|
89
|
+
def _response_handler(
|
|
90
|
+
self,
|
|
91
|
+
response: Union[str, list],
|
|
92
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
93
|
+
messages: Union[str, list[str], "ChatType", list["ChatType"]] = None,
|
|
94
|
+
**kwargs,
|
|
95
|
+
) -> Union[str, list, dict[str, Any]]:
|
|
96
|
+
"""
|
|
97
|
+
Same as `ModelProvider._response_handler`.
|
|
98
|
+
|
|
99
|
+
* Expected to receive the response with `return_full_text=False`.
|
|
100
|
+
|
|
101
|
+
:param messages: Same as in `ModelProvider._response_handler`.
|
|
102
|
+
:param response: Same as in `ModelProvider._response_handler`.
|
|
103
|
+
:param invoke_response_format: Same as in `ModelProvider._response_handler`, in full and string modes.
|
|
104
|
+
|
|
105
|
+
For usage mode, generate 3 statistics:
|
|
106
|
+
prompt_tokens, completion_tokens and total_tokens.
|
|
107
|
+
|
|
108
|
+
NOTE: Token counts are estimated after answer generation and
|
|
109
|
+
may differ from the actual tokens generated by the model due to
|
|
110
|
+
internal decoding behavior and implementation details.
|
|
111
|
+
|
|
112
|
+
:param kwargs: Same as in `ModelProvider._response_handler`.
|
|
113
|
+
|
|
114
|
+
:return: The result formatted according to the `invoke_response_format`.
|
|
115
|
+
|
|
116
|
+
:raises MLRunInvalidArgumentError: If extracting the string response fails.
|
|
117
|
+
:raises MLRunRuntimeError: If applying the chat template to the model fails.
|
|
118
|
+
"""
|
|
119
|
+
if InvokeResponseFormat.is_str_response(invoke_response_format.value):
|
|
120
|
+
str_response = self._extract_string_output(response)
|
|
121
|
+
if invoke_response_format == InvokeResponseFormat.STRING:
|
|
122
|
+
return str_response
|
|
123
|
+
if invoke_response_format == InvokeResponseFormat.USAGE:
|
|
124
|
+
tokenizer = self.client.tokenizer
|
|
125
|
+
if not isinstance(messages, str):
|
|
126
|
+
try:
|
|
127
|
+
messages = tokenizer.apply_chat_template(
|
|
128
|
+
messages, tokenize=False, add_generation_prompt=True
|
|
129
|
+
)
|
|
130
|
+
except Exception as e:
|
|
131
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
132
|
+
f"Failed to apply chat template using the tokenizer for model '{self.model}'. "
|
|
133
|
+
"This may indicate that the tokenizer does not support chat formatting, "
|
|
134
|
+
"or that the input format is invalid. "
|
|
135
|
+
f"Original error: {e}"
|
|
136
|
+
)
|
|
137
|
+
prompt_tokens = len(tokenizer.encode(messages))
|
|
138
|
+
completion_tokens = len(tokenizer.encode(str_response))
|
|
139
|
+
total_tokens = prompt_tokens + completion_tokens
|
|
140
|
+
usage = {
|
|
141
|
+
"prompt_tokens": prompt_tokens,
|
|
142
|
+
"completion_tokens": completion_tokens,
|
|
143
|
+
"total_tokens": total_tokens,
|
|
144
|
+
}
|
|
145
|
+
response = {
|
|
146
|
+
UsageResponseKeys.ANSWER: str_response,
|
|
147
|
+
UsageResponseKeys.USAGE: usage,
|
|
148
|
+
}
|
|
149
|
+
return response
|
|
150
|
+
|
|
84
151
|
def load_client(self) -> None:
|
|
85
152
|
"""
|
|
86
153
|
Initializes the Hugging Face pipeline using the provided options.
|
|
@@ -91,7 +158,7 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
91
158
|
|
|
92
159
|
Note: Hugging Face pipelines are synchronous and do not support async invocation.
|
|
93
160
|
|
|
94
|
-
|
|
161
|
+
:raises:
|
|
95
162
|
ImportError: If the `transformers` package is not installed.
|
|
96
163
|
"""
|
|
97
164
|
try:
|
|
@@ -117,7 +184,7 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
117
184
|
|
|
118
185
|
def custom_invoke(
|
|
119
186
|
self, operation: Optional["Pipeline"] = None, **invoke_kwargs
|
|
120
|
-
) ->
|
|
187
|
+
) -> Union[list, dict, Any]:
|
|
121
188
|
"""
|
|
122
189
|
HuggingFace implementation of `ModelProvider.custom_invoke`.
|
|
123
190
|
Use the default config in provider client/ user defined client:
|
|
@@ -150,34 +217,55 @@ class HuggingFaceProvider(ModelProvider):
|
|
|
150
217
|
|
|
151
218
|
def invoke(
|
|
152
219
|
self,
|
|
153
|
-
messages: Union[str, list[str], ChatType, list[ChatType]]
|
|
154
|
-
|
|
220
|
+
messages: Union[str, list[str], "ChatType", list["ChatType"]],
|
|
221
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
155
222
|
**invoke_kwargs,
|
|
156
|
-
) ->
|
|
223
|
+
) -> Union[str, list, dict[str, Any]]:
|
|
157
224
|
"""
|
|
158
225
|
HuggingFace-specific implementation of `ModelProvider.invoke`.
|
|
159
226
|
Invokes a HuggingFace model operation using the synchronous client.
|
|
160
|
-
For
|
|
227
|
+
For full details, see `ModelProvider.invoke`.
|
|
228
|
+
|
|
161
229
|
:param messages:
|
|
162
|
-
|
|
230
|
+
Same as `ModelProvider.invoke`.
|
|
231
|
+
|
|
232
|
+
:param invoke_response_format: InvokeResponseFormat
|
|
233
|
+
Specifies the format of the returned response. Options:
|
|
163
234
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
235
|
+
- "string": Returns only the generated text content, extracted from a single response.
|
|
236
|
+
- "usage": Combines the generated text with metadata (e.g., token usage), returning a dictionary:
|
|
237
|
+
|
|
238
|
+
.. code-block:: json
|
|
239
|
+
{
|
|
240
|
+
"answer": "<generated_text>",
|
|
241
|
+
"usage": {
|
|
242
|
+
"prompt_tokens": <int>,
|
|
243
|
+
"completion_tokens": <int>,
|
|
244
|
+
"total_tokens": <int>
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
- "full": Returns the raw response object from the HuggingFace model,
|
|
249
|
+
typically a list of generated sequences (dictionaries).
|
|
250
|
+
This format does not include token usage statistics.
|
|
169
251
|
|
|
170
252
|
:param invoke_kwargs:
|
|
171
|
-
|
|
172
|
-
|
|
253
|
+
Additional keyword arguments passed to the HuggingFace client. Same as in `ModelProvider.invoke`.
|
|
254
|
+
|
|
255
|
+
:return:
|
|
256
|
+
A string, dictionary, or list of model outputs, depending on `invoke_response_format`.
|
|
173
257
|
"""
|
|
258
|
+
|
|
174
259
|
if self.client.task != "text-generation":
|
|
175
260
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
176
261
|
"HuggingFaceProvider.invoke supports text-generation task only"
|
|
177
262
|
)
|
|
178
|
-
if
|
|
263
|
+
if InvokeResponseFormat.is_str_response(invoke_response_format.value):
|
|
179
264
|
invoke_kwargs["return_full_text"] = False
|
|
180
265
|
response = self.custom_invoke(text_inputs=messages, **invoke_kwargs)
|
|
181
|
-
|
|
182
|
-
|
|
266
|
+
response = self._response_handler(
|
|
267
|
+
messages=messages,
|
|
268
|
+
response=response,
|
|
269
|
+
invoke_response_format=invoke_response_format,
|
|
270
|
+
)
|
|
183
271
|
return response
|
|
@@ -12,14 +12,38 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
from collections.abc import Awaitable
|
|
15
|
-
from typing import Any, Callable, Optional,
|
|
15
|
+
from typing import Any, Callable, Optional, Union
|
|
16
16
|
|
|
17
17
|
import mlrun.errors
|
|
18
|
+
from mlrun.common.types import StrEnum
|
|
18
19
|
from mlrun.datastore.remote_client import (
|
|
19
20
|
BaseRemoteClient,
|
|
20
21
|
)
|
|
21
22
|
|
|
22
|
-
|
|
23
|
+
|
|
24
|
+
class InvokeResponseFormat(StrEnum):
|
|
25
|
+
STRING = "string"
|
|
26
|
+
USAGE = "usage"
|
|
27
|
+
FULL = "full"
|
|
28
|
+
|
|
29
|
+
@classmethod
|
|
30
|
+
def is_str_response(cls, invoke_response_format: str) -> bool:
|
|
31
|
+
"""
|
|
32
|
+
Returns True if the response key corresponds to a string-based response (not a full generation object).
|
|
33
|
+
"""
|
|
34
|
+
return invoke_response_format in {
|
|
35
|
+
cls.USAGE,
|
|
36
|
+
cls.STRING,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class UsageResponseKeys(StrEnum):
|
|
41
|
+
ANSWER = "answer"
|
|
42
|
+
USAGE = "usage"
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def fields(cls) -> list[str]:
|
|
46
|
+
return [cls.ANSWER, cls.USAGE]
|
|
23
47
|
|
|
24
48
|
|
|
25
49
|
class ModelProvider(BaseRemoteClient):
|
|
@@ -58,6 +82,41 @@ class ModelProvider(BaseRemoteClient):
|
|
|
58
82
|
self._client = None
|
|
59
83
|
self._async_client = None
|
|
60
84
|
|
|
85
|
+
@staticmethod
|
|
86
|
+
def _extract_string_output(response: Any) -> str:
|
|
87
|
+
"""
|
|
88
|
+
Extracts string response from response object
|
|
89
|
+
"""
|
|
90
|
+
pass
|
|
91
|
+
|
|
92
|
+
def _response_handler(
|
|
93
|
+
self,
|
|
94
|
+
response: Any,
|
|
95
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
96
|
+
**kwargs,
|
|
97
|
+
) -> Union[str, dict, Any]:
|
|
98
|
+
"""
|
|
99
|
+
Handles the model response according to the specified response format.
|
|
100
|
+
|
|
101
|
+
:param response: The raw response returned from the model invocation.
|
|
102
|
+
:param invoke_response_format: Determines how the response should be processed and returned.
|
|
103
|
+
Options include:
|
|
104
|
+
|
|
105
|
+
- STRING: Return only the main generated content as a string,
|
|
106
|
+
typically for single-answer responses.
|
|
107
|
+
- USAGE: Return a dictionary combining the string response with
|
|
108
|
+
additional metadata or token usage statistics, in this format:
|
|
109
|
+
{"answer": <string>, "usage": <dict>}
|
|
110
|
+
|
|
111
|
+
- FULL: Return the full raw response object unmodified.
|
|
112
|
+
|
|
113
|
+
:param kwargs: Additional parameters that may be required by specific implementations.
|
|
114
|
+
|
|
115
|
+
:return: The processed response in the format specified by `invoke_response_format`.
|
|
116
|
+
Can be a string, dictionary, or the original response object.
|
|
117
|
+
"""
|
|
118
|
+
return None
|
|
119
|
+
|
|
61
120
|
def get_client_options(self) -> dict:
|
|
62
121
|
"""
|
|
63
122
|
Returns a dictionary containing credentials and configuration
|
|
@@ -79,69 +138,6 @@ class ModelProvider(BaseRemoteClient):
|
|
|
79
138
|
|
|
80
139
|
raise NotImplementedError("load_client method is not implemented")
|
|
81
140
|
|
|
82
|
-
def invoke(
|
|
83
|
-
self,
|
|
84
|
-
messages: Optional[list[dict]] = None,
|
|
85
|
-
as_str: bool = False,
|
|
86
|
-
**invoke_kwargs,
|
|
87
|
-
) -> Optional[Union[str, T]]:
|
|
88
|
-
"""
|
|
89
|
-
Invokes a generative AI model with the provided messages and additional parameters.
|
|
90
|
-
This method is designed to be a flexible interface for interacting with various
|
|
91
|
-
generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
|
|
92
|
-
a list of messages (following a standardized format) and receive a response. The
|
|
93
|
-
response can be returned as plain text or in its full structured format, depending
|
|
94
|
-
on the `as_str` parameter.
|
|
95
|
-
|
|
96
|
-
:param messages: A list of dictionaries representing the conversation history or input messages.
|
|
97
|
-
Each dictionary should follow the format::
|
|
98
|
-
{"role": "system"| "user" | "assistant" ..., "content": "Message content as a string"}
|
|
99
|
-
Example:
|
|
100
|
-
|
|
101
|
-
.. code-block:: json
|
|
102
|
-
|
|
103
|
-
[
|
|
104
|
-
{"role": "system", "content": "You are a helpful assistant."},
|
|
105
|
-
{"role": "user", "content": "What is the capital of France?"}
|
|
106
|
-
]
|
|
107
|
-
|
|
108
|
-
This format is consistent across all backends. Defaults to None if no messages
|
|
109
|
-
are provided.
|
|
110
|
-
|
|
111
|
-
:param as_str: A boolean flag indicating whether to return the response as a plain string.
|
|
112
|
-
- If True, the function extracts and returns the main content of the first
|
|
113
|
-
response.
|
|
114
|
-
- If False, the function returns the full response object,
|
|
115
|
-
which may include additional metadata or multiple response options.
|
|
116
|
-
Defaults to False.
|
|
117
|
-
|
|
118
|
-
:param invoke_kwargs:
|
|
119
|
-
Additional keyword arguments to be passed to the underlying model API call.
|
|
120
|
-
These can include parameters such as temperature, max tokens, etc.,
|
|
121
|
-
depending on the capabilities of the specific backend being used.
|
|
122
|
-
|
|
123
|
-
:return:
|
|
124
|
-
- If `as_str` is True: Returns the main content of the first response as a string.
|
|
125
|
-
- If `as_str` is False: Returns the full response object.
|
|
126
|
-
|
|
127
|
-
"""
|
|
128
|
-
raise NotImplementedError("invoke method is not implemented")
|
|
129
|
-
|
|
130
|
-
def custom_invoke(
|
|
131
|
-
self, operation: Optional[Callable[..., T]] = None, **invoke_kwargs
|
|
132
|
-
) -> Optional[T]:
|
|
133
|
-
"""
|
|
134
|
-
Invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.) with the given keyword arguments.
|
|
135
|
-
|
|
136
|
-
Useful for dynamically calling model methods like text generation, chat completions, or image generation.
|
|
137
|
-
The operation must be a callable that accepts keyword arguments.
|
|
138
|
-
|
|
139
|
-
:param operation: A callable representing the model operation (e.g., a client method).
|
|
140
|
-
:param invoke_kwargs: Keyword arguments to pass to the operation.
|
|
141
|
-
:return: The full response returned by the operation.
|
|
142
|
-
"""
|
|
143
|
-
raise NotImplementedError("custom_invoke method is not implemented")
|
|
144
|
-
|
|
145
141
|
@property
|
|
146
142
|
def client(self) -> Any:
|
|
147
143
|
return self._client
|
|
@@ -168,9 +164,22 @@ class ModelProvider(BaseRemoteClient):
|
|
|
168
164
|
)
|
|
169
165
|
return self._async_client
|
|
170
166
|
|
|
167
|
+
def custom_invoke(self, operation: Optional[Callable], **invoke_kwargs) -> Any:
|
|
168
|
+
"""
|
|
169
|
+
Invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.) with the given keyword arguments.
|
|
170
|
+
|
|
171
|
+
Useful for dynamically calling model methods like text generation, chat completions, or image generation.
|
|
172
|
+
The operation must be a callable that accepts keyword arguments.
|
|
173
|
+
|
|
174
|
+
:param operation: A callable representing the model operation (e.g., a client method).
|
|
175
|
+
:param invoke_kwargs: Keyword arguments to pass to the operation.
|
|
176
|
+
:return: The full response returned by the operation.
|
|
177
|
+
"""
|
|
178
|
+
raise NotImplementedError("custom_invoke method is not implemented")
|
|
179
|
+
|
|
171
180
|
async def async_custom_invoke(
|
|
172
|
-
self, operation: Optional[Callable[..., Awaitable[
|
|
173
|
-
) ->
|
|
181
|
+
self, operation: Optional[Callable[..., Awaitable[Any]]], **invoke_kwargs
|
|
182
|
+
) -> Any:
|
|
174
183
|
"""
|
|
175
184
|
Asynchronously invokes a model operation from a provider (e.g., OpenAI, Hugging Face, etc.)
|
|
176
185
|
with the given keyword arguments.
|
|
@@ -183,11 +192,76 @@ class ModelProvider(BaseRemoteClient):
|
|
|
183
192
|
"""
|
|
184
193
|
raise NotImplementedError("async_custom_invoke is not implemented")
|
|
185
194
|
|
|
195
|
+
def invoke(
|
|
196
|
+
self,
|
|
197
|
+
messages: Union[list[dict], Any],
|
|
198
|
+
invoke_response_format: InvokeResponseFormat = InvokeResponseFormat.FULL,
|
|
199
|
+
**invoke_kwargs,
|
|
200
|
+
) -> Union[str, dict[str, Any], Any]:
|
|
201
|
+
"""
|
|
202
|
+
Invokes a generative AI model with the provided messages and additional parameters.
|
|
203
|
+
This method is designed to be a flexible interface for interacting with various
|
|
204
|
+
generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
|
|
205
|
+
a list of messages (following a standardized format) and receive a response.
|
|
206
|
+
|
|
207
|
+
:param messages: A list of dictionaries representing the conversation history or input messages.
|
|
208
|
+
Each dictionary should follow the format::
|
|
209
|
+
{"role": "system"| "user" | "assistant" ..., "content":
|
|
210
|
+
"Message content as a string"}
|
|
211
|
+
|
|
212
|
+
Example:
|
|
213
|
+
|
|
214
|
+
.. code-block:: json
|
|
215
|
+
|
|
216
|
+
[
|
|
217
|
+
{"role": "system", "content": "You are a helpful assistant."},
|
|
218
|
+
{"role": "user", "content": "What is the capital of France?"}
|
|
219
|
+
]
|
|
220
|
+
|
|
221
|
+
This format is consistent across all backends. Defaults to None if no messages
|
|
222
|
+
are provided.
|
|
223
|
+
|
|
224
|
+
:param invoke_response_format: Determines how the model response is returned:
|
|
225
|
+
|
|
226
|
+
- string: Returns only the generated text content from the model output,
|
|
227
|
+
for single-answer responses only.
|
|
228
|
+
|
|
229
|
+
- usage: Combines the STRING response with additional metadata (token usage),
|
|
230
|
+
and returns the result in a dictionary.
|
|
231
|
+
|
|
232
|
+
Note: The usage dictionary may contain additional
|
|
233
|
+
keys depending on the model provider:
|
|
234
|
+
|
|
235
|
+
.. code-block:: json
|
|
236
|
+
|
|
237
|
+
{
|
|
238
|
+
"answer": "<generated_text>",
|
|
239
|
+
"usage": {
|
|
240
|
+
"prompt_tokens": <int>,
|
|
241
|
+
"completion_tokens": <int>,
|
|
242
|
+
"total_tokens": <int>
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
- full: Returns the full model output.
|
|
248
|
+
|
|
249
|
+
:param invoke_kwargs:
|
|
250
|
+
Additional keyword arguments to be passed to the underlying model API call.
|
|
251
|
+
These can include parameters such as temperature, max tokens, etc.,
|
|
252
|
+
depending on the capabilities of the specific backend being used.
|
|
253
|
+
|
|
254
|
+
:return: The invoke result formatted according to the specified
|
|
255
|
+
invoke_response_format parameter.
|
|
256
|
+
|
|
257
|
+
"""
|
|
258
|
+
raise NotImplementedError("invoke method is not implemented")
|
|
259
|
+
|
|
186
260
|
async def async_invoke(
|
|
187
261
|
self,
|
|
188
|
-
messages:
|
|
189
|
-
|
|
262
|
+
messages: list[dict],
|
|
263
|
+
invoke_response_format=InvokeResponseFormat.FULL,
|
|
190
264
|
**invoke_kwargs,
|
|
191
|
-
) ->
|
|
265
|
+
) -> Union[str, dict[str, Any], Any]:
|
|
192
266
|
"""Async version of `invoke`. See `invoke` for full documentation."""
|
|
193
267
|
raise NotImplementedError("async_invoke is not implemented")
|